From: Linus Torvalds Date: Thu, 17 Jul 2008 17:38:59 +0000 (-0700) Subject: Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git... X-Git-Tag: firefly_0821_release~19633 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2b04be7e8ab5756ea36e137dd03c8773d184e67e;hp=2567d71cc7acd99f0a0dd02e17fe17fd7df7b30c;p=firefly-linux-kernel-4.4.55.git Merge branch 'x86-fixes-for-linus' of git://git./linux/kernel/git/tip/linux-2.6-tip * 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86: fix asm/e820.h for userspace inclusion x86: fix numaq_tsc_disable x86: fix kernel_physical_mapping_init() for large x86 systems --- diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index 9470ed9afcc0..f27be7d1a49f 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -29,46 +29,46 @@ Description: $ cd /sys/firmware/acpi/interrupts $ grep . * - error:0 - ff_gbl_lock:0 - ff_pmtimer:0 - ff_pwr_btn:0 - ff_rt_clk:0 - ff_slp_btn:0 - gpe00:0 - gpe01:0 - gpe02:0 - gpe03:0 - gpe04:0 - gpe05:0 - gpe06:0 - gpe07:0 - gpe08:0 - gpe09:174 - gpe0A:0 - gpe0B:0 - gpe0C:0 - gpe0D:0 - gpe0E:0 - gpe0F:0 - gpe10:0 - gpe11:60 - gpe12:0 - gpe13:0 - gpe14:0 - gpe15:0 - gpe16:0 - gpe17:0 - gpe18:0 - gpe19:7 - gpe1A:0 - gpe1B:0 - gpe1C:0 - gpe1D:0 - gpe1E:0 - gpe1F:0 - gpe_all:241 - sci:241 + error: 0 + ff_gbl_lock: 0 enable + ff_pmtimer: 0 invalid + ff_pwr_btn: 0 enable + ff_rt_clk: 2 disable + ff_slp_btn: 0 invalid + gpe00: 0 invalid + gpe01: 0 enable + gpe02: 108 enable + gpe03: 0 invalid + gpe04: 0 invalid + gpe05: 0 invalid + gpe06: 0 enable + gpe07: 0 enable + gpe08: 0 invalid + gpe09: 0 invalid + gpe0A: 0 invalid + gpe0B: 0 invalid + gpe0C: 0 invalid + gpe0D: 0 invalid + gpe0E: 0 invalid + gpe0F: 0 invalid + gpe10: 0 invalid + gpe11: 0 invalid + gpe12: 0 invalid + gpe13: 0 invalid + gpe14: 0 invalid + gpe15: 0 invalid + gpe16: 0 invalid + gpe17: 1084 enable + gpe18: 0 enable + gpe19: 0 invalid + gpe1A: 0 invalid + gpe1B: 0 invalid + gpe1C: 0 invalid + gpe1D: 0 invalid + gpe1E: 0 invalid + gpe1F: 0 invalid + gpe_all: 1192 + sci: 1194 sci - The total number of times the ACPI SCI has claimed an interrupt. @@ -89,6 +89,13 @@ Description: error - an interrupt that can't be accounted for above. + invalid: it's either a wakeup GPE or a GPE/Fixed Event that + doesn't have an event handler. + + disable: the GPE/Fixed Event is valid but disabled. + + enable: the GPE/Fixed Event is valid and enabled. + Root has permission to clear any of these counters. Eg. # echo 0 > gpe11 @@ -97,3 +104,43 @@ Description: None of these counters has an effect on the function of the system, they are simply statistics. + + Besides this, user can also write specific strings to these files + to enable/disable/clear ACPI interrupts in user space, which can be + used to debug some ACPI interrupt storm issues. + + Note that only writting to VALID GPE/Fixed Event is allowed, + i.e. user can only change the status of runtime GPE and + Fixed Event with event handler installed. + + Let's take power button fixed event for example, please kill acpid + and other user space applications so that the machine won't shutdown + when pressing the power button. + # cat ff_pwr_btn + 0 + # press the power button for 3 times; + # cat ff_pwr_btn + 3 + # echo disable > ff_pwr_btn + # cat ff_pwr_btn + disable + # press the power button for 3 times; + # cat ff_pwr_btn + disable + # echo enable > ff_pwr_btn + # cat ff_pwr_btn + 4 + /* + * this is because the status bit is set even if the enable bit is cleared, + * and it triggers an ACPI fixed event when the enable bit is set again + */ + # press the power button for 3 times; + # cat ff_pwr_btn + 7 + # echo disable > ff_pwr_btn + # press the power button for 3 times; + # echo clear > ff_pwr_btn /* clear the status bit */ + # echo disable > ff_pwr_btn + # cat ff_pwr_btn + 7 + diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt new file mode 100644 index 000000000000..540e9e7f59c5 --- /dev/null +++ b/Documentation/filesystems/ubifs.txt @@ -0,0 +1,164 @@ +Introduction +============= + +UBIFS file-system stands for UBI File System. UBI stands for "Unsorted +Block Images". UBIFS is a flash file system, which means it is designed +to work with flash devices. It is important to understand, that UBIFS +is completely different to any traditional file-system in Linux, like +Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems +which work with MTD devices, not block devices. The other Linux +file-system of this class is JFFS2. + +To make it more clear, here is a small comparison of MTD devices and +block devices. + +1 MTD devices represent flash devices and they consist of eraseblocks of + rather large size, typically about 128KiB. Block devices consist of + small blocks, typically 512 bytes. +2 MTD devices support 3 main operations - read from some offset within an + eraseblock, write to some offset within an eraseblock, and erase a whole + eraseblock. Block devices support 2 main operations - read a whole + block and write a whole block. +3 The whole eraseblock has to be erased before it becomes possible to + re-write its contents. Blocks may be just re-written. +4 Eraseblocks become worn out after some number of erase cycles - + typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC + NAND flashes. Blocks do not have the wear-out property. +5 Eraseblocks may become bad (only on NAND flashes) and software should + deal with this. Blocks on hard drives typically do not become bad, + because hardware has mechanisms to substitute bad blocks, at least in + modern LBA disks. + +It should be quite obvious why UBIFS is very different to traditional +file-systems. + +UBIFS works on top of UBI. UBI is a separate software layer which may be +found in drivers/mtd/ubi. UBI is basically a volume management and +wear-leveling layer. It provides so called UBI volumes which is a higher +level abstraction than a MTD device. The programming model of UBI devices +is very similar to MTD devices - they still consist of large eraseblocks, +they have read/write/erase operations, but UBI devices are devoid of +limitations like wear and bad blocks (items 4 and 5 in the above list). + +In a sense, UBIFS is a next generation of JFFS2 file-system, but it is +very different and incompatible to JFFS2. The following are the main +differences. + +* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on + top of UBI volumes. +* JFFS2 does not have on-media index and has to build it while mounting, + which requires full media scan. UBIFS maintains the FS indexing + information on the flash media and does not require full media scan, + so it mounts many times faster than JFFS2. +* JFFS2 is a write-through file-system, while UBIFS supports write-back, + which makes UBIFS much faster on writes. + +Similarly to JFFS2, UBIFS supports on-the-flight compression which makes +it possible to fit quite a lot of data to the flash. + +Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. +It does not need stuff like ckfs.ext2. UBIFS automatically replays its +journal and recovers from crashes, ensuring that the on-flash data +structures are consistent. + +UBIFS scales logarithmically (most of the data structures it uses are +trees), so the mount time and memory consumption do not linearly depend +on the flash size, like in case of JFFS2. This is because UBIFS +maintains the FS index on the flash media. However, UBIFS depends on +UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly. +Nevertheless, UBI/UBIFS scales considerably better than JFFS2. + +The authors of UBIFS believe, that it is possible to develop UBI2 which +would scale logarithmically as well. UBI2 would support the same API as UBI, +but it would be binary incompatible to UBI. So UBIFS would not need to be +changed to use UBI2 + + +Mount options +============= + +(*) == default. + +norm_unmount (*) commit on unmount; the journal is committed + when the file-system is unmounted so that the + next mount does not have to replay the journal + and it becomes very fast; +fast_unmount do not commit on unmount; this option makes + unmount faster, but the next mount slower + because of the need to replay the journal. + + +Quick usage instructions +======================== + +The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax, +where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is +UBI volume name. + +Mount volume 0 on UBI device 0 to /mnt/ubifs: +$ mount -t ubifs ubi0_0 /mnt/ubifs + +Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume +name): +$ mount -t ubifs ubi0:rootfs /mnt/ubifs + +The following is an example of the kernel boot arguments to attach mtd0 +to UBI and mount volume "rootfs": +ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs + + +Module Parameters for Debugging +=============================== + +When UBIFS has been compiled with debugging enabled, there are 3 module +parameters that are available to control aspects of testing and debugging. +The parameters are unsigned integers where each bit controls an option. +The parameters are: + +debug_msgs Selects which debug messages to display, as follows: + + Message Type Flag value + + General messages 1 + Journal messages 2 + Mount messages 4 + Commit messages 8 + LEB search messages 16 + Budgeting messages 32 + Garbage collection messages 64 + Tree Node Cache (TNC) messages 128 + LEB properties (lprops) messages 256 + Input/output messages 512 + Log messages 1024 + Scan messages 2048 + Recovery messages 4096 + +debug_chks Selects extra checks that UBIFS can do while running: + + Check Flag value + + General checks 1 + Check Tree Node Cache (TNC) 2 + Check indexing tree size 4 + Check orphan area 8 + Check old indexing tree 16 + Check LEB properties (lprops) 32 + Check leaf nodes and inodes 64 + +debug_tsts Selects a mode of testing, as follows: + + Test mode Flag value + + Force in-the-gaps method 2 + Failure mode for recovery testing 4 + +For example, set debug_msgs to 5 to display General messages and Mount +messages. + + +References +========== + +UBIFS documentation and FAQ/HOWTO at the MTD web site: +http://www.linux-mtd.infradead.org/doc/ubifs.html +http://www.linux-mtd.infradead.org/faq/ubifs.html diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt index c19efdeace2c..91a6ecbae0bb 100644 --- a/Documentation/ioctl/hdio.txt +++ b/Documentation/ioctl/hdio.txt @@ -508,12 +508,13 @@ HDIO_DRIVE_RESET execute a device reset error returns: EACCES Access denied: requires CAP_SYS_ADMIN + ENXIO No such device: phy dead or ctl_addr == 0 + EIO I/O error: reset timed out or hardware error notes: - Abort any current command, prevent anything else from being - queued, execute a reset on the device, and issue BLKRRPART - ioctl on the block device. + Execute a reset on the device as soon as the current IO + operation has completed. Executes an ATAPI soft reset if applicable, otherwise executes an ATA soft reset on the controller. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 312fe77764a4..09ad7450647b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -147,10 +147,14 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep } + Format: { s3_bios, s3_mode, s3_beep, old_ordering } See Documentation/power/video.txt for s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. + old_ordering causes the ACPI 1.0 ordering of the _PTS + control method, wrt putting devices into low power + states, to be enforced (the ACPI 2.0 ordering of _PTS is + used by default). acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode Format: { level | edge | high | low } @@ -818,7 +822,7 @@ and is between 256 and 4096 characters. It is defined in the file See Documentation/ide/ide.txt. idle= [X86] - Format: idle=poll or idle=mwait + Format: idle=poll or idle=mwait, idle=halt, idle=nomwait Poll forces a polling idle loop that can slightly improves the performance of waking up a idle CPU, but will use a lot of power and make the system run hot. Not recommended. @@ -826,6 +830,9 @@ and is between 256 and 4096 characters. It is defined in the file to not use it because it doesn't save as much power as a normal idle loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same as idle=poll. + idle=halt. Halt is forced to be used for CPU idle. + In such case C2/C3 won't be used again. + idle=nomwait. Disable mwait for CPU C-states ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem Claim all unknown PCI IDE storage controllers. @@ -1534,6 +1541,9 @@ and is between 256 and 4096 characters. It is defined in the file Use with caution as certain devices share address decoders between ROMs and other resources. + norom [X86-32,X86_64] Do not assign address space to + expansion ROMs that do not already have + BIOS assigned address ranges. irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be assigned automatically to PCI devices. You can make the kernel exclude IRQs of your ISA cards diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt index 79b7dbd22141..69b5dd4e5a59 100644 --- a/Documentation/laptops/acer-wmi.txt +++ b/Documentation/laptops/acer-wmi.txt @@ -174,8 +174,6 @@ The LED is exposed through the LED subsystem, and can be found in: The mail LED is autodetected, so if you don't have one, the LED device won't be registered. -If you have a mail LED that is not green, please report this to me. - Backlight ********* diff --git a/MAINTAINERS b/MAINTAINERS index ee1c56a20750..93fd6b2efeee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -216,8 +216,8 @@ W: http://code.google.com/p/aceracpi S: Maintained ACPI -P: Len Brown -M: len.brown@intel.com +P: Andi Kleen +M: ak@linux.intel.com M: lenb@kernel.org L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ @@ -239,8 +239,8 @@ W: http://www.lesswatts.org/projects/acpi/ S: Supported ACPI FAN DRIVER -P: Len Brown -M: len.brown@intel.com +P: Zhang Rui +M: rui.zhang@intel.com L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ S: Supported @@ -248,18 +248,18 @@ S: Supported ACPI PCI HOTPLUG DRIVER P: Kristen Carlson Accardi M: kristen.c.accardi@intel.com -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported ACPI THERMAL DRIVER -P: Len Brown -M: len.brown@intel.com +P: Zhang Rui +M: rui.zhang@intel.com L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ S: Supported ACPI VIDEO DRIVER -P: Rui Zhang +P: Zhang Rui M: rui.zhang@intel.com L: linux-acpi@vger.kernel.org W: http://www.lesswatts.org/projects/acpi/ @@ -348,7 +348,9 @@ W: http://www.linux-usb.org/SpeedTouch/ S: Maintained ALCHEMY AU1XX0 MMC DRIVER -S: Orphan +P: Manuel Lauss +M: manuel.lauss@gmail.com +S: Maintained ALI1563 I2C DRIVER P: Rudolf Marek @@ -1143,23 +1145,28 @@ COMPACTPCI HOTPLUG CORE P: Scott Murray M: scottm@somanetworks.com M: scott@spiteful.org -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported COMPACTPCI HOTPLUG ZIATECH ZT5550 DRIVER P: Scott Murray M: scottm@somanetworks.com M: scott@spiteful.org -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported COMPACTPCI HOTPLUG GENERIC DRIVER P: Scott Murray M: scottm@somanetworks.com M: scott@spiteful.org -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported +COMPAL LAPTOP SUPPORT +P: Cezary Jackiewicz +M: cezary.jackiewicz@gmail.com +S: Maintained + COMPUTONE INTELLIPORT MULTIPORT CARD P: Michael H. Warfield M: mhw@wittsend.com @@ -1787,6 +1794,12 @@ P: David Howells M: dhowells@redhat.com S: Maintained +FUJITSU LAPTOP EXTRAS +P: Jonathan Woithe +M: jwoithe@physics.adelaide.edu.au +L: linux-acpi@vger.kernel.org +S: Maintained + FUSE: FILESYSTEM IN USERSPACE P: Miklos Szeredi M: miklos@szeredi.hu @@ -2325,6 +2338,16 @@ L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/doc/jffs2.html S: Maintained +UBI FILE SYSTEM (UBIFS) +P: Artem Bityutskiy +M: dedekind@infradead.org +P: Adrian Hunter +M: ext-adrian.hunter@nokia.com +L: linux-mtd@lists.infradead.org +T: git git://git.infradead.org/~dedekind/ubifs-2.6.git +W: http://www.linux-mtd.infradead.org/doc/ubifs.html +S: Maintained + JFS FILESYSTEM P: Dave Kleikamp M: shaggy@austin.ibm.com @@ -3196,7 +3219,7 @@ S: Supported PCIE HOTPLUG DRIVER P: Kristen Carlson Accardi M: kristen.c.accardi@intel.com -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported PCMCIA SUBSYSTEM @@ -3538,6 +3561,13 @@ L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported +S3C24XX SD/MMC Driver +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +L: linux-kernel@vger.kernel.org +S: Supported + SAA7146 VIDEO4LINUX-2 DRIVER P: Michael Hunold M: michael@mihu.de @@ -3610,6 +3640,12 @@ P: Jim Cromie M: jim.cromie@gmail.com S: Maintained +SDRICOH_CS MMC/SD HOST CONTROLLER INTERFACE DRIVER +P: Sascha Sommer +M: saschasommer@freenet.de +L: sdricohcs-devel@lists.sourceforge.net (subscribers-only) +S: Maintained + SECURITY CONTACT P: Security Officers M: security@kernel.org @@ -3829,7 +3865,7 @@ S: Maintained SHPC HOTPLUG DRIVER P: Kristen Carlson Accardi M: kristen.c.accardi@intel.com -L: pcihpd-discuss@lists.sourceforge.net +L: linux-pci@vger.kernel.org S: Supported SECURE DIGITAL HOST CONTROLLER INTERFACE DRIVER diff --git a/Makefile b/Makefile index 1564577bdc53..6192922de9c0 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,3 @@ -FRED=42 VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 26 diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c index a51bb9fb3c89..c7fe94d03a1e 100644 --- a/arch/avr32/boards/atngw100/setup.c +++ b/arch/avr32/boards/atngw100/setup.c @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -51,6 +52,11 @@ static struct spi_board_info spi0_board_info[] __initdata = { }, }; +static struct mci_platform_data __initdata mci0_data = { + .detect_pin = GPIO_PIN_PC(25), + .wp_pin = GPIO_PIN_PE(0), +}; + /* * The next two functions should go away as the boot loader is * supposed to initialize the macb address registers with a valid @@ -170,6 +176,7 @@ static int __init atngw100_init(void) set_hw_addr(at32_add_device_eth(1, ð_data[1])); at32_add_device_spi(0, spi0_board_info, ARRAY_SIZE(spi0_board_info)); + at32_add_device_mci(0, &mci0_data); at32_add_device_usba(0, NULL); for (i = 0; i < ARRAY_SIZE(ngw_leds); i++) { diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c index 86b363c1c25b..e11659b732fa 100644 --- a/arch/avr32/boards/atstk1000/atstk1002.c +++ b/arch/avr32/boards/atstk1000/atstk1002.c @@ -234,6 +234,9 @@ static int __init atstk1002_init(void) #ifdef CONFIG_BOARD_ATSTK100X_SPI1 at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info)); #endif +#ifndef CONFIG_BOARD_ATSTK1002_SW2_CUSTOM + at32_add_device_mci(0, NULL); +#endif #ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM set_hw_addr(at32_add_device_eth(1, ð_data[1])); #else diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 07b21b121eef..021d51217184 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -1278,20 +1279,32 @@ static struct clk atmel_mci0_pclk = { .index = 9, }; -struct platform_device *__init at32_add_device_mci(unsigned int id) +struct platform_device *__init +at32_add_device_mci(unsigned int id, struct mci_platform_data *data) { - struct platform_device *pdev; + struct mci_platform_data _data; + struct platform_device *pdev; + struct dw_dma_slave *dws; if (id != 0) return NULL; pdev = platform_device_alloc("atmel_mci", id); if (!pdev) - return NULL; + goto fail; if (platform_device_add_resources(pdev, atmel_mci0_resource, ARRAY_SIZE(atmel_mci0_resource))) - goto err_add_resources; + goto fail; + + if (!data) { + data = &_data; + memset(data, 0, sizeof(struct mci_platform_data)); + } + + if (platform_device_add_data(pdev, data, + sizeof(struct mci_platform_data))) + goto fail; select_peripheral(PA(10), PERIPH_A, 0); /* CLK */ select_peripheral(PA(11), PERIPH_A, 0); /* CMD */ @@ -1300,12 +1313,19 @@ struct platform_device *__init at32_add_device_mci(unsigned int id) select_peripheral(PA(14), PERIPH_A, 0); /* DATA2 */ select_peripheral(PA(15), PERIPH_A, 0); /* DATA3 */ + if (data) { + if (data->detect_pin != GPIO_PIN_NONE) + at32_select_gpio(data->detect_pin, 0); + if (data->wp_pin != GPIO_PIN_NONE) + at32_select_gpio(data->wp_pin, 0); + } + atmel_mci0_pclk.dev = &pdev->dev; platform_device_add(pdev); return pdev; -err_add_resources: +fail: platform_device_put(pdev); return NULL; } diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c index 4f165c93be42..edae117fcc2b 100644 --- a/arch/frv/mb93090-mb00/pci-frv.c +++ b/arch/frv/mb93090-mb00/pci-frv.c @@ -19,36 +19,6 @@ #include "pci-frv.h" -#if 0 -void -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4*resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= IORESOURCE_ROM_ENABLE; - new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* Somebody might have asked allocation of a non-standard resource */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resource, - new, check); - } -} -#endif - /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index fabaf08d9a69..3ab8373103ec 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -55,6 +55,10 @@ void (*ia64_mark_idle)(int); unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +unsigned long idle_halt; +EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); void ia64_do_show_stack (struct unw_frame_info *info, void *arg) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 318b81100623..68c978be9a51 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -395,7 +395,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (kvm->vcpus[i]->cpu != -1) { call_data.vcpu = kvm->vcpus[i]; smp_call_function_single(kvm->vcpus[i]->cpu, - vcpu_global_purge, &call_data, 0, 1); + vcpu_global_purge, &call_data, 1); } else printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); @@ -1693,7 +1693,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) wake_up_interruptible(&vcpu->wq); if (vcpu->guest_mode) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); } int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c index 091f936c4485..0c69d9ec92d4 100644 --- a/arch/ia64/kvm/kvm_fw.c +++ b/arch/ia64/kvm/kvm_fw.c @@ -130,7 +130,7 @@ static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) args.cache_type = gr29; args.operation = gr30; smp_call_function(remote_pal_cache_flush, - (void *)&args, 1, 1); + (void *)&args, 1); if (args.status != 0) printk(KERN_ERR"pal_cache_flush error!," "status:0x%lx\n", args.status); diff --git a/arch/m68knommu/kernel/comempci.c b/arch/m68knommu/kernel/comempci.c index 6ee00effbad2..0a68b5a85f86 100644 --- a/arch/m68knommu/kernel/comempci.c +++ b/arch/m68knommu/kernel/comempci.c @@ -373,15 +373,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return(0); } -/*****************************************************************************/ - -void pcibios_update_resource(struct pci_dev *dev, struct resource *root, struct resource *r, int resource) -{ - printk(KERN_WARNING "%s(%d): no support for changing PCI resources...\n", - __FILE__, __LINE__); -} - - /*****************************************************************************/ /* diff --git a/arch/mips/pmc-sierra/yosemite/ht.c b/arch/mips/pmc-sierra/yosemite/ht.c index 6380662bbf3c..678388fd34b1 100644 --- a/arch/mips/pmc-sierra/yosemite/ht.c +++ b/arch/mips/pmc-sierra/yosemite/ht.c @@ -345,42 +345,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pcibios_enable_resources(dev); } - - -void pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - return; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4 * resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= IORESOURCE_ROM_ENABLE; - reg = dev->rom_base_reg; - } else { - /* - * Somebody might have asked allocation of a non-standard - * resource - */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & - ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : - PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resource, - new, check); - } -} - - void pcibios_align_resource(void *data, struct resource *res, resource_size_t size, resource_size_t align) { diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5dc8f8028d52..eb530b4128ba 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -289,7 +289,7 @@ config WARN_STACK_SIZE int "Maximum frame size considered safe (128-2048)" range 128 2048 depends on WARN_STACK - default "256" + default "2048" help This allows you to specify the maximum frame size a function may have without the compiler complaining about it. diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 57571f10270c..8841919ef7e6 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -10,6 +10,7 @@ #include #include #include +#include static unsigned long save_context_stack(struct stack_trace *trace, unsigned long sp, diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 08d2e7325252..f57095a2617c 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -76,38 +76,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } -void -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4*resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= IORESOURCE_ROM_ENABLE; - new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* - * Somebody might have asked allocation of a non-standard - * resource - */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? - PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resource, - new, check); - } -} - void pcibios_align_resource(void *data, struct resource *res, resource_size_t size, resource_size_t align) __attribute__ ((weak)); diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 112b09f16f36..d00a3656c287 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -408,7 +408,7 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->class = class >> 8; dev->revision = class & 0xff; - sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), + sprintf(dev->dev.bus_id, "%04x:%02x:%02x.%d", pci_domain_nr(bus), dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); if (ofpci_verbose) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5112c84f5421..da140611bb57 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,8 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FTRACE # Do not profile debug utilities -CFLAGS_REMOVE_tsc_64.o = -pg -CFLAGS_REMOVE_tsc_32.o = -pg +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg endif diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index de2d2e4ebad9..7c074eec39fb 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ACPI)) buf[2] |= ACPI_PDC_T_FFH; + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); + obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; obj->buffer.pointer = (u8 *) buf; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 793ad2045f58..868de3d5c39d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -158,6 +158,8 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 2; if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; + if (strncmp(str, "old_ordering", 12) == 0) + acpi_old_suspend_ordering(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 75cb5da4ea0a..bf9b441331e9 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1213,9 +1213,9 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; - device_power_up(); + device_power_up(PMSG_RESUME); local_irq_enable(); - device_resume(); + device_resume(PMSG_RESUME); queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { @@ -1240,7 +1240,7 @@ static void standby(void) apm_error("standby", err); local_irq_disable(); - device_power_up(); + device_power_up(PMSG_RESUME); local_irq_enable(); } @@ -1326,7 +1326,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - device_resume(); + device_resume(PMSG_RESUME); queue_event(event, NULL); } ignore_normal_resume = 0; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a4665f37cfc5..a0e11c0cc872 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = { {} }; -static void __init check_dev_quirk(int num, int slot, int func) +/** + * check_dev_quirk - apply early quirks to a given PCI device + * @num: bus number + * @slot: slot number + * @func: PCI function + * + * Check the vendor & device ID against the early quirks table. + * + * If the device is single function, let early_quirks() know so we don't + * poke at this device again. + */ +static int __init check_dev_quirk(int num, int slot, int func) { u16 class; u16 vendor; @@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func) class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); if (class == 0xffff) - return; + return -1; /* no class, treat as single function */ vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); @@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func) type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) - return; + return -1; + + return 0; } void __init early_quirks(void) @@ -167,6 +180,9 @@ void __init early_quirks(void) /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) for (slot = 0; slot < 32; slot++) - for (func = 0; func < 8; func++) - check_dev_quirk(num, slot, func); + for (func = 0; func < 8; func++) { + /* Only probe function 0 on single fn devices */ + if (check_dev_quirk(num, slot, func)) + break; + } } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7dceea947232..4d629c62f4f8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,6 +7,12 @@ #include #include #include +#include + +unsigned long idle_halt; +EXPORT_SYMBOL(idle_halt); +unsigned long idle_nomwait; +EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; @@ -325,7 +331,27 @@ static int __init idle_setup(char *str) pm_idle = poll_idle; } else if (!strcmp(str, "mwait")) force_mwait = 1; - else + else if (!strcmp(str, "halt")) { + /* + * When the boot option of idle=halt is added, halt is + * forced to be used for CPU idle. In such case CPU C2/C3 + * won't be used again. + * To continue to load the CPU idle driver, don't touch + * the boot_option_idle_override. + */ + pm_idle = default_idle; + idle_halt = 1; + return 0; + } else if (!strcmp(str, "nomwait")) { + /* + * If the boot option of "idle=nomwait" is added, + * it means that mwait will be disabled for CPU C2/C3 + * states. In such case it won't touch the variable + * of boot_option_idle_override. + */ + idle_nomwait = 1; + return 0; + } else return -1; boot_option_idle_override = 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 36c540d4ac4b..531b55b8e81a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -684,6 +684,11 @@ void __init setup_arch(char **cmdline_p) clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); } +#ifdef CONFIG_PCI + if (pci_early_dump_regs) + early_dump_pci_devices(); +#endif + finish_e820_parsing(); #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index f41d67f8f831..1eb2973a301c 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) num_memory_chunks++; - printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)" + printk(KERN_DEBUG "Memory range %08lx to %08lx" " in proximity domain %02x %s\n", start_pfn, end_pfn, - memory_affinity->memory_type, pxm, ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? "enabled and removable" : "enabled" ) ); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 20b9f59f95df..b67732bbb85a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -20,6 +20,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; +unsigned int pci_early_dump_regs; static int pci_bf_sort; int pci_routeirq; int pcibios_last_bus = -1; @@ -31,7 +32,7 @@ struct pci_raw_ops *raw_pci_ext_ops; int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) { - if (reg < 256 && raw_pci_ops) + if (domain == 0 && reg < 256 && raw_pci_ops) return raw_pci_ops->read(domain, bus, devfn, reg, len, val); if (raw_pci_ext_ops) return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val); @@ -41,7 +42,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val) { - if (reg < 256 && raw_pci_ops) + if (domain == 0 && reg < 256 && raw_pci_ops) return raw_pci_ops->write(domain, bus, devfn, reg, len, val); if (raw_pci_ext_ops) return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val); @@ -121,6 +122,21 @@ void __init dmi_check_skip_isa_align(void) dmi_check_system(can_skip_pciprobe_dmi_table); } +static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) +{ + struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; + + if (pci_probe & PCI_NOASSIGN_ROMS) { + if (rom_r->parent) + return; + if (rom_r->start) { + /* we deal with BIOS assigned ROM later */ + return; + } + rom_r->start = rom_r->end = rom_r->flags = 0; + } +} + /* * Called after each bus is probed, but before its children * are examined. @@ -128,7 +144,11 @@ void __init dmi_check_skip_isa_align(void) void __devinit pcibios_fixup_bus(struct pci_bus *b) { + struct pci_dev *dev; + pci_read_bridge_bases(b); + list_for_each_entry(dev, &b->devices, bus_list) + pcibios_fixup_device_resources(dev); } /* @@ -481,12 +501,18 @@ char * __devinit pcibios_setup(char *str) else if (!strcmp(str, "rom")) { pci_probe |= PCI_ASSIGN_ROMS; return NULL; + } else if (!strcmp(str, "norom")) { + pci_probe |= PCI_NOASSIGN_ROMS; + return NULL; } else if (!strcmp(str, "assign-busses")) { pci_probe |= PCI_ASSIGN_ALL_BUSSES; return NULL; } else if (!strcmp(str, "use_crs")) { pci_probe |= PCI_USE__CRS; return NULL; + } else if (!strcmp(str, "earlydump")) { + pci_early_dump_regs = 1; + return NULL; } else if (!strcmp(str, "routeirq")) { pci_routeirq = 1; return NULL; diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 42df4b6606df..858dbe3399f9 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { PDprintk("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - outb(val, 0xcfc); + outb(val, 0xcfc + (offset&3)); +} + +void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outw(val, 0xcfc + (offset&2)); } int early_pci_allowed(void) @@ -57,3 +64,54 @@ int early_pci_allowed(void) return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == PCI_PROBE_CONF1; } + +void early_dump_pci_device(u8 bus, u8 slot, u8 func) +{ + int i; + int j; + u32 val; + + printk("PCI: %02x:%02x:%02x", bus, slot, func); + + for (i = 0; i < 256; i += 4) { + if (!(i & 0x0f)) + printk("\n%04x:",i); + + val = read_pci_config(bus, slot, func, i); + for (j = 0; j < 4; j++) { + printk(" %02x", val & 0xff); + val >>= 8; + } + } + printk("\n"); +} + +void early_dump_pci_devices(void) +{ + unsigned bus, slot, func; + + if (!early_pci_allowed()) + return; + + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + u32 class; + u8 type; + class = read_pci_config(bus, slot, func, + PCI_CLASS_REVISION); + if (class == 0xffffffff) + break; + + early_dump_pci_device(bus, slot, func); + + /* No multi-function device? */ + type = read_pci_config_byte(bus, slot, func, + PCI_HEADER_TYPE); + if (!(type & 0x80)) + break; + } + } + } +} + diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index dc568c6b83f8..6a06a2eb0597 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -45,7 +45,8 @@ struct irq_router { char *name; u16 vendor, device; int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); - int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, + int new); }; struct irq_router_handler { @@ -77,7 +78,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) for (i = 0; i < rt->size; i++) sum += addr[i]; if (!sum) { - DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); + DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", + rt); return rt; } return NULL; @@ -183,7 +185,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, return (nr & 1) ? (x >> 4) : (x & 0xf); } -static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) +static void write_config_nybble(struct pci_dev *router, unsigned offset, + unsigned nr, unsigned int val) { u8 x; unsigned reg = offset + (nr >> 1); @@ -467,7 +470,8 @@ static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int return inb(0xc01) & 0xf; } -static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, + int pirq, int irq) { outb(pirq, 0xc00); outb(irq, 0xc01); @@ -660,7 +664,8 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router } -static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int serverworks_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) { switch (device) { case PCI_DEVICE_ID_SERVERWORKS_OSB4: @@ -827,10 +832,12 @@ static void __init pirq_find_router(struct irq_router *r) for (h = pirq_routers; h->vendor; h++) { /* First look for a router match */ - if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) + if (rt->rtr_vendor == h->vendor && + h->probe(r, pirq_router_dev, rt->rtr_device)) break; /* Fall back to a device match */ - if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) + if (pirq_router_dev->vendor == h->vendor && + h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", @@ -845,11 +852,13 @@ static void __init pirq_find_router(struct irq_router *r) static struct irq_info *pirq_get_info(struct pci_dev *dev) { struct irq_routing_table *rt = pirq_table; - int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); + int entries = (rt->size - sizeof(struct irq_routing_table)) / + sizeof(struct irq_info); struct irq_info *info; for (info = rt->slots; entries--; info++) - if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) + if (info->bus == dev->bus->number && + PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) return info; return NULL; } @@ -890,7 +899,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) DBG(" -> not routed\n" KERN_DEBUG); return 0; } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); + DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, + pirq_table->exclusive_irqs); mask &= pcibios_irq_mask; /* Work around broken HP Pavilion Notebooks which assign USB to @@ -903,7 +913,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ - if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { + if (acer_tm360_irqrouting && dev->irq == 11 && + dev->vendor == PCI_VENDOR_ID_O2) { pirq = 0x68; mask = 0x400; dev->irq = r->get(pirq_router_dev, dev, pirq); @@ -920,15 +931,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) newirq = 0; else printk("\n" KERN_WARNING - "PCI: IRQ %i for device %s doesn't match PIRQ mask " - "- try pci=usepirqmask\n" KERN_DEBUG, newirq, - pci_name(dev)); + "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" + KERN_DEBUG, newirq, + pci_name(dev)); } if (!newirq && assign) { for (i = 0; i < 16; i++) { if (!(mask & (1 << i))) continue; - if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) + if (pirq_penalty[i] < pirq_penalty[newirq] && + can_request_irq(i, IRQF_SHARED)) newirq = i; } } @@ -944,7 +956,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) DBG(" -> got IRQ %d\n", irq); msg = "Found"; eisa_set_level_irq(irq); - } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { + } else if (newirq && r->set && + (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { DBG(" -> assigning IRQ %d", newirq); if (r->set(pirq_router_dev, dev, pirq, newirq)) { eisa_set_level_irq(newirq); @@ -962,7 +975,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } else return 0; } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); + printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, + pci_name(dev)); /* Update IRQ for all devices with the same pirq value */ while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { @@ -974,7 +988,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!info) continue; if (info->irq[pin].link == pirq) { - /* We refuse to override the dev->irq information. Give a warning! */ + /* + * We refuse to override the dev->irq + * information. Give a warning! + */ if (dev2->irq && dev2->irq != irq && \ (!(pci_probe & PCI_USE_PIRQ_MASK) || \ ((1 << dev2->irq) & mask))) { @@ -987,7 +1004,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) - printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); + printk(KERN_INFO + "PCI: Sharing IRQ %d with %s\n", + irq, pci_name(dev2)); } } return 1; @@ -1001,15 +1020,21 @@ static void __init pcibios_fixup_irqs(void) DBG(KERN_DEBUG "PCI: IRQ fixup\n"); while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { /* - * If the BIOS has set an out of range IRQ number, just ignore it. - * Also keep track of which IRQ's are already in use. + * If the BIOS has set an out of range IRQ number, just + * ignore it. Also keep track of which IRQ's are + * already in use. */ if (dev->irq >= 16) { - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); + DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", + pci_name(dev), dev->irq); dev->irq = 0; } - /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ - if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) + /* + * If the IRQ is already assigned to a PCI device, + * ignore its ISA use penalty + */ + if (pirq_penalty[dev->irq] >= 100 && + pirq_penalty[dev->irq] < 100000) pirq_penalty[dev->irq] = 0; pirq_penalty[dev->irq]++; } @@ -1025,8 +1050,13 @@ static void __init pcibios_fixup_irqs(void) int irq; if (pin) { - pin--; /* interrupt pins are numbered starting from 1 */ - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + /* + * interrupt pins are numbered starting + * from 1 + */ + pin--; + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, + PCI_SLOT(dev->devfn), pin); /* * Busses behind bridges are typically not listed in the MP-table. * In this case we have to look up the IRQ based on the parent bus, @@ -1067,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) { if (!broken_hp_bios_irq9) { broken_hp_bios_irq9 = 1; - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", + d->ident); } return 0; } @@ -1080,7 +1111,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) { if (!acer_tm360_irqrouting) { acer_tm360_irqrouting = 1; - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", + d->ident); } return 0; } @@ -1092,7 +1124,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), - DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), + DMI_MATCH(DMI_PRODUCT_VERSION, + "HP Pavilion Notebook Model GE"), DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), }, }, @@ -1131,7 +1164,10 @@ int __init pcibios_irq_init(void) if (!(pirq_table->exclusive_irqs & (1 << i))) pirq_penalty[i] += 100; } - /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ + /* + * If we're using the I/O APIC, avoid using the PCI IRQ + * routing table + */ if (io_apic_assign_pci_irqs) pirq_table = NULL; } @@ -1175,7 +1211,7 @@ static int pirq_enable_irq(struct pci_dev *dev) if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { char *msg = ""; - pin--; /* interrupt pins are numbered starting from 1 */ + pin--; /* interrupt pins are numbered starting from 1 */ if (io_apic_assign_pci_irqs) { int irq; @@ -1195,13 +1231,16 @@ static int pirq_enable_irq(struct pci_dev *dev) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); + printk(KERN_WARNING + "PCI: using PPB %s[%c] to get irq %d\n", + pci_name(bridge), + 'A' + pin, irq); dev = bridge; } dev = temp_dev; if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + printk(KERN_INFO + "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", pci_name(dev), 'A' + pin, irq); dev->irq = irq; return 0; @@ -1212,12 +1251,17 @@ static int pirq_enable_irq(struct pci_dev *dev) else msg = " Please try using pci=biosirq."; - /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ - if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) + /* + * With IDE legacy devices the IRQ lookup failure is not + * a problem.. + */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && + !(dev->class & 0x5)) return 0; - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin, pci_name(dev), msg); + printk(KERN_WARNING + "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", + 'A' + pin, pci_name(dev), msg); } return 0; } diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index b2270a55b0cf..3e25deb821ac 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -28,6 +28,7 @@ #define PCI_USE__CRS 0x10000 #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 #define PCI_HAS_IO_ECS 0x40000 +#define PCI_NOASSIGN_ROMS 0x80000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index bba867391a85..735f5ea17473 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -336,6 +336,15 @@ config ACPI_EC the battery and thermal drivers. If you are compiling for a mobile system, say Y. +config ACPI_PCI_SLOT + tristate "PCI slot detection driver" + default n + help + This driver will attempt to discover all PCI slots in your system, + and creates entries in /sys/bus/pci/slots/. This feature can + help you correlate PCI bus addresses with the physical geography + of your slots. If you are unsure, say N. + config ACPI_POWER bool default y diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 40b0fcae4c78..52a4cd4b81d0 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -21,7 +21,7 @@ obj-$(CONFIG_X86) += blacklist.o # # ACPI Core Subsystem (Interpreter) # -obj-y += osl.o utils.o \ +obj-y += osl.o utils.o reboot.o\ dispatcher/ events/ executer/ hardware/ \ namespace/ parser/ resources/ tables/ \ utilities/ @@ -48,6 +48,7 @@ obj-$(CONFIG_ACPI_DOCK) += dock.o obj-$(CONFIG_ACPI_BAY) += bay.o obj-$(CONFIG_ACPI_VIDEO) += video.o obj-y += pci_root.o pci_link.o pci_irq.o pci_bind.o +obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o obj-$(CONFIG_ACPI_POWER) += power.o obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-$(CONFIG_ACPI_CONTAINER) += container.o diff --git a/drivers/acpi/bay.c b/drivers/acpi/bay.c index 61b6c5beb2d3..e6caf5d42e0e 100644 --- a/drivers/acpi/bay.c +++ b/drivers/acpi/bay.c @@ -377,6 +377,9 @@ static int __init bay_init(void) INIT_LIST_HEAD(&drive_bays); + if (acpi_disabled) + return -ENODEV; + if (acpi_disabled) return -ENODEV; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a6dbcf4d9ef5..ccae305ee55d 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -295,6 +295,28 @@ int acpi_bus_set_power(acpi_handle handle, int state) EXPORT_SYMBOL(acpi_bus_set_power); +bool acpi_bus_power_manageable(acpi_handle handle) +{ + struct acpi_device *device; + int result; + + result = acpi_bus_get_device(handle, &device); + return result ? false : device->flags.power_manageable; +} + +EXPORT_SYMBOL(acpi_bus_power_manageable); + +bool acpi_bus_can_wakeup(acpi_handle handle) +{ + struct acpi_device *device; + int result; + + result = acpi_bus_get_device(handle, &device); + return result ? false : device->wakeup.flags.valid; +} + +EXPORT_SYMBOL(acpi_bus_can_wakeup); + /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ @@ -612,7 +634,7 @@ static int __init acpi_bus_init_irq(void) return 0; } -acpi_native_uint acpi_gbl_permanent_mmap; +u8 acpi_gbl_permanent_mmap; void __init acpi_early_init(void) diff --git a/drivers/acpi/dispatcher/dsinit.c b/drivers/acpi/dispatcher/dsinit.c index 610b1ee102b0..949f7c75029e 100644 --- a/drivers/acpi/dispatcher/dsinit.c +++ b/drivers/acpi/dispatcher/dsinit.c @@ -151,7 +151,7 @@ acpi_ds_init_one_object(acpi_handle obj_handle, ******************************************************************************/ acpi_status -acpi_ds_initialize_objects(acpi_native_uint table_index, +acpi_ds_initialize_objects(u32 table_index, struct acpi_namespace_node * start_node) { acpi_status status; diff --git a/drivers/acpi/dispatcher/dsmethod.c b/drivers/acpi/dispatcher/dsmethod.c index 2509809a36cf..4613b9ca5792 100644 --- a/drivers/acpi/dispatcher/dsmethod.c +++ b/drivers/acpi/dispatcher/dsmethod.c @@ -377,7 +377,6 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, } info->parameters = &this_walk_state->operands[0]; - info->parameter_type = ACPI_PARAM_ARGS; status = acpi_ds_init_aml_walk(next_walk_state, NULL, method_node, obj_desc->method.aml_start, diff --git a/drivers/acpi/dispatcher/dsopcode.c b/drivers/acpi/dispatcher/dsopcode.c index a818e0ddb996..6a81c4400edf 100644 --- a/drivers/acpi/dispatcher/dsopcode.c +++ b/drivers/acpi/dispatcher/dsopcode.c @@ -691,12 +691,6 @@ acpi_ds_eval_buffer_field_operands(struct acpi_walk_state *walk_state, status = acpi_ex_resolve_operands(op->common.aml_opcode, ACPI_WALK_OPERANDS, walk_state); - - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - walk_state->num_operands, - "after AcpiExResolveOperands"); - if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "(%s) bad operand(s) (%X)", acpi_ps_get_opcode_name(op->common.aml_opcode), @@ -785,10 +779,6 @@ acpi_ds_eval_region_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - 1, "after AcpiExResolveOperands"); - obj_desc = acpi_ns_get_attached_object(node); if (!obj_desc) { return_ACPI_STATUS(AE_NOT_EXIST); @@ -848,7 +838,7 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state, union acpi_operand_object **operand; struct acpi_namespace_node *node; union acpi_parse_object *next_op; - acpi_native_uint table_index; + u32 table_index; struct acpi_table_header *table; ACPI_FUNCTION_TRACE_PTR(ds_eval_table_region_operands, op); @@ -882,10 +872,6 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - 1, "after AcpiExResolveOperands"); - operand = &walk_state->operands[0]; /* Find the ACPI table */ @@ -1091,10 +1077,8 @@ acpi_ds_eval_bank_field_operands(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name(op->common.aml_opcode), - 1, "after AcpiExResolveOperands"); - + ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, + acpi_ps_get_opcode_name(op->common.aml_opcode), 1); /* * Get the bank_value operand and save it * (at Top of stack) diff --git a/drivers/acpi/dispatcher/dswexec.c b/drivers/acpi/dispatcher/dswexec.c index b246b9657ead..b5072fa9c920 100644 --- a/drivers/acpi/dispatcher/dswexec.c +++ b/drivers/acpi/dispatcher/dswexec.c @@ -408,14 +408,6 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state) [walk_state-> num_operands - 1]), walk_state); - if (ACPI_SUCCESS(status)) { - ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, - ACPI_IMODE_EXECUTE, - acpi_ps_get_opcode_name - (walk_state->opcode), - walk_state->num_operands, - "after ExResolveOperands"); - } } if (ACPI_SUCCESS(status)) { diff --git a/drivers/acpi/dispatcher/dswstate.c b/drivers/acpi/dispatcher/dswstate.c index 1386ced332ec..b00d4af791aa 100644 --- a/drivers/acpi/dispatcher/dswstate.c +++ b/drivers/acpi/dispatcher/dswstate.c @@ -70,7 +70,7 @@ acpi_status acpi_ds_result_pop(union acpi_operand_object **object, struct acpi_walk_state *walk_state) { - acpi_native_uint index; + u32 index; union acpi_generic_state *state; acpi_status status; @@ -122,7 +122,7 @@ acpi_ds_result_pop(union acpi_operand_object **object, ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Obj=%p [%s] Index=%X State=%p Num=%X\n", *object, acpi_ut_get_object_type_name(*object), - (u32) index, walk_state, walk_state->result_count)); + index, walk_state, walk_state->result_count)); return (AE_OK); } @@ -146,7 +146,7 @@ acpi_ds_result_push(union acpi_operand_object * object, { union acpi_generic_state *state; acpi_status status; - acpi_native_uint index; + u32 index; ACPI_FUNCTION_NAME(ds_result_push); @@ -400,7 +400,7 @@ void acpi_ds_obj_stack_pop_and_delete(u32 pop_count, struct acpi_walk_state *walk_state) { - acpi_native_int i; + s32 i; union acpi_operand_object *obj_desc; ACPI_FUNCTION_NAME(ds_obj_stack_pop_and_delete); @@ -409,7 +409,7 @@ acpi_ds_obj_stack_pop_and_delete(u32 pop_count, return; } - for (i = (acpi_native_int) (pop_count - 1); i >= 0; i--) { + for (i = (s32) pop_count - 1; i >= 0; i--) { if (walk_state->num_operands == 0) { return; } @@ -615,14 +615,8 @@ acpi_ds_init_aml_walk(struct acpi_walk_state *walk_state, walk_state->pass_number = pass_number; if (info) { - if (info->parameter_type == ACPI_PARAM_GPE) { - walk_state->gpe_event_info = - ACPI_CAST_PTR(struct acpi_gpe_event_info, - info->parameters); - } else { - walk_state->params = info->parameters; - walk_state->caller_return_desc = &info->return_object; - } + walk_state->params = info->parameters; + walk_state->caller_return_desc = &info->return_object; } status = acpi_ps_init_scope(&walk_state->parser_state, op); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index bb7c51f712bd..1e872e79db33 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -917,6 +917,9 @@ static int __init dock_init(void) dock_station = NULL; + if (acpi_disabled) + return 0; + if (acpi_disabled) return 0; diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c index 5d30e5be1b1c..c56c5c6ea77b 100644 --- a/drivers/acpi/events/evevent.c +++ b/drivers/acpi/events/evevent.c @@ -188,7 +188,7 @@ acpi_status acpi_ev_install_xrupt_handlers(void) static acpi_status acpi_ev_fixed_event_initialize(void) { - acpi_native_uint i; + u32 i; acpi_status status; /* @@ -231,7 +231,7 @@ u32 acpi_ev_fixed_event_detect(void) u32 int_status = ACPI_INTERRUPT_NOT_HANDLED; u32 fixed_status; u32 fixed_enable; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_NAME(ev_fixed_event_detect); @@ -260,7 +260,7 @@ u32 acpi_ev_fixed_event_detect(void) /* Found an active (signalled) event */ acpi_os_fixed_event_count(i); - int_status |= acpi_ev_fixed_event_dispatch((u32) i); + int_status |= acpi_ev_fixed_event_dispatch(i); } } diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/events/evgpe.c index 5354be44f876..c5e53aae86f7 100644 --- a/drivers/acpi/events/evgpe.c +++ b/drivers/acpi/events/evgpe.c @@ -256,7 +256,7 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) return_ACPI_STATUS(status); } - /* Mark wake-disabled or HW disable, or both */ + /* Clear the appropriate enabled flags for this GPE */ switch (gpe_event_info->flags & ACPI_GPE_TYPE_MASK) { case ACPI_GPE_TYPE_WAKE: @@ -273,13 +273,23 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) /* Disable the requested runtime GPE */ ACPI_CLEAR_BIT(gpe_event_info->flags, ACPI_GPE_RUN_ENABLED); - - /* fallthrough */ + break; default: - acpi_hw_write_gpe_enable_reg(gpe_event_info); + break; } + /* + * Even if we don't know the GPE type, make sure that we always + * disable it. low_disable_gpe will just clear the enable bit for this + * GPE and write it. It will not write out the current GPE enable mask, + * since this may inadvertently enable GPEs too early, if a rogue GPE has + * come in during ACPICA initialization - possibly as a result of AML or + * other code that has enabled the GPE. + */ + status = acpi_hw_low_disable_gpe(gpe_event_info); + return_ACPI_STATUS(status); + return_ACPI_STATUS(AE_OK); } @@ -305,7 +315,7 @@ struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device, { union acpi_operand_object *obj_desc; struct acpi_gpe_block_info *gpe_block; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_ENTRY(); @@ -379,8 +389,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) u32 status_reg; u32 enable_reg; acpi_cpu_flags flags; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; ACPI_FUNCTION_NAME(ev_gpe_detect); @@ -462,13 +472,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) */ int_status |= acpi_ev_gpe_dispatch(&gpe_block-> - event_info[(i * - ACPI_GPE_REGISTER_WIDTH) - + - j], - (u32) j + - gpe_register_info-> - base_gpe_number); + event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number); } } } @@ -555,10 +559,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context) */ info->prefix_node = local_gpe_event_info.dispatch.method_node; - info->parameters = - ACPI_CAST_PTR(union acpi_operand_object *, - gpe_event_info); - info->parameter_type = ACPI_PARAM_GPE; info->flags = ACPI_IGNORE_RETURN_VALUE; status = acpi_ns_evaluate(info); diff --git a/drivers/acpi/events/evgpeblk.c b/drivers/acpi/events/evgpeblk.c index e6c4d4c49e79..73c058e2f5c2 100644 --- a/drivers/acpi/events/evgpeblk.c +++ b/drivers/acpi/events/evgpeblk.c @@ -189,8 +189,8 @@ acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block) { struct acpi_gpe_event_info *gpe_event_info; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; ACPI_FUNCTION_TRACE(ev_delete_gpe_handlers); @@ -203,7 +203,8 @@ acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info, for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) { gpe_event_info = &gpe_block-> - event_info[(i * ACPI_GPE_REGISTER_WIDTH) + j]; + event_info[((acpi_size) i * + ACPI_GPE_REGISTER_WIDTH) + j]; if ((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) == ACPI_GPE_DISPATCH_HANDLER) { @@ -744,8 +745,8 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block) struct acpi_gpe_event_info *gpe_event_info = NULL; struct acpi_gpe_event_info *this_event; struct acpi_gpe_register_info *this_register; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; acpi_status status; ACPI_FUNCTION_TRACE(ev_create_gpe_info_blocks); @@ -983,8 +984,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device, struct acpi_gpe_walk_info gpe_info; u32 wake_gpe_count; u32 gpe_enabled_count; - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; ACPI_FUNCTION_TRACE(ev_initialize_gpe_block); @@ -1033,7 +1034,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device, gpe_event_info = &gpe_block-> - event_info[(i * ACPI_GPE_REGISTER_WIDTH) + j]; + event_info[((acpi_size) i * + ACPI_GPE_REGISTER_WIDTH) + j]; if (((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) == ACPI_GPE_DISPATCH_METHOD) diff --git a/drivers/acpi/events/evmisc.c b/drivers/acpi/events/evmisc.c index 2113e58e2221..1d5670be729a 100644 --- a/drivers/acpi/events/evmisc.c +++ b/drivers/acpi/events/evmisc.c @@ -575,7 +575,7 @@ acpi_status acpi_ev_release_global_lock(void) void acpi_ev_terminate(void) { - acpi_native_uint i; + u32 i; acpi_status status; ACPI_FUNCTION_TRACE(ev_terminate); @@ -589,7 +589,7 @@ void acpi_ev_terminate(void) /* Disable all fixed events */ for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) { - status = acpi_disable_event((u32) i, 0); + status = acpi_disable_event(i, 0); if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "Could not disable fixed event %d", diff --git a/drivers/acpi/events/evregion.c b/drivers/acpi/events/evregion.c index 1628f5934752..236fbd1ca438 100644 --- a/drivers/acpi/events/evregion.c +++ b/drivers/acpi/events/evregion.c @@ -81,7 +81,7 @@ acpi_ev_install_handler(acpi_handle obj_handle, acpi_status acpi_ev_install_region_handlers(void) { acpi_status status; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ev_install_region_handlers); @@ -151,7 +151,7 @@ acpi_status acpi_ev_install_region_handlers(void) acpi_status acpi_ev_initialize_op_regions(void) { acpi_status status; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ev_initialize_op_regions); @@ -219,7 +219,6 @@ acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function) info->prefix_node = region_obj2->extra.method_REG; info->pathname = NULL; info->parameters = args; - info->parameter_type = ACPI_PARAM_ARGS; info->flags = ACPI_IGNORE_RETURN_VALUE; /* diff --git a/drivers/acpi/events/evrgnini.c b/drivers/acpi/events/evrgnini.c index 2e3d2c5e4f4d..6b94b38df07d 100644 --- a/drivers/acpi/events/evrgnini.c +++ b/drivers/acpi/events/evrgnini.c @@ -380,7 +380,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) acpi_status status; struct acpica_device_id hid; struct acpi_compatible_id_list *cid; - acpi_native_uint i; + u32 i; /* * Get the _HID and check for a PCI Root Bridge diff --git a/drivers/acpi/events/evxfevnt.c b/drivers/acpi/events/evxfevnt.c index 99a7502e6a87..73bfd6bf962f 100644 --- a/drivers/acpi/events/evxfevnt.c +++ b/drivers/acpi/events/evxfevnt.c @@ -472,7 +472,6 @@ acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags) } ACPI_EXPORT_SYMBOL(acpi_clear_gpe) -#ifdef ACPI_FUTURE_USAGE /******************************************************************************* * * FUNCTION: acpi_get_event_status @@ -489,6 +488,7 @@ ACPI_EXPORT_SYMBOL(acpi_clear_gpe) acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status) { acpi_status status = AE_OK; + u32 value; ACPI_FUNCTION_TRACE(acpi_get_event_status); @@ -506,7 +506,20 @@ acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status) status = acpi_get_register(acpi_gbl_fixed_event_info[event]. - status_register_id, event_status); + enable_register_id, &value); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(status); + + *event_status = value; + + status = + acpi_get_register(acpi_gbl_fixed_event_info[event]. + status_register_id, &value); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(status); + + if (value) + *event_status |= ACPI_EVENT_FLAG_SET; return_ACPI_STATUS(status); } @@ -566,7 +579,6 @@ acpi_get_gpe_status(acpi_handle gpe_device, } ACPI_EXPORT_SYMBOL(acpi_get_gpe_status) -#endif /* ACPI_FUTURE_USAGE */ /******************************************************************************* * * FUNCTION: acpi_install_gpe_block diff --git a/drivers/acpi/executer/exconfig.c b/drivers/acpi/executer/exconfig.c index 39d742190584..2a32c843cb4a 100644 --- a/drivers/acpi/executer/exconfig.c +++ b/drivers/acpi/executer/exconfig.c @@ -53,7 +53,7 @@ ACPI_MODULE_NAME("exconfig") /* Local prototypes */ static acpi_status -acpi_ex_add_table(acpi_native_uint table_index, +acpi_ex_add_table(u32 table_index, struct acpi_namespace_node *parent_node, union acpi_operand_object **ddb_handle); @@ -73,7 +73,7 @@ acpi_ex_add_table(acpi_native_uint table_index, ******************************************************************************/ static acpi_status -acpi_ex_add_table(acpi_native_uint table_index, +acpi_ex_add_table(u32 table_index, struct acpi_namespace_node *parent_node, union acpi_operand_object **ddb_handle) { @@ -96,7 +96,8 @@ acpi_ex_add_table(acpi_native_uint table_index, /* Install the new table into the local data structures */ - obj_desc->reference.object = ACPI_CAST_PTR(void, table_index); + obj_desc->reference.object = ACPI_CAST_PTR(void, + (unsigned long)table_index); /* Add the table to the namespace */ @@ -128,12 +129,12 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state, { acpi_status status; union acpi_operand_object **operand = &walk_state->operands[0]; - acpi_native_uint table_index; struct acpi_namespace_node *parent_node; struct acpi_namespace_node *start_node; struct acpi_namespace_node *parameter_node = NULL; union acpi_operand_object *ddb_handle; struct acpi_table_header *table; + u32 table_index; ACPI_FUNCTION_TRACE(ex_load_table_op); @@ -280,7 +281,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, { union acpi_operand_object *ddb_handle; struct acpi_table_desc table_desc; - acpi_native_uint table_index; + u32 table_index; acpi_status status; u32 length; @@ -437,7 +438,7 @@ acpi_status acpi_ex_unload_table(union acpi_operand_object *ddb_handle) { acpi_status status = AE_OK; union acpi_operand_object *table_desc = ddb_handle; - acpi_native_uint table_index; + u32 table_index; struct acpi_table_header *table; ACPI_FUNCTION_TRACE(ex_unload_table); @@ -454,9 +455,9 @@ acpi_status acpi_ex_unload_table(union acpi_operand_object *ddb_handle) return_ACPI_STATUS(AE_BAD_PARAMETER); } - /* Get the table index from the ddb_handle */ + /* Get the table index from the ddb_handle (acpi_size for 64-bit case) */ - table_index = (acpi_native_uint) table_desc->reference.object; + table_index = (u32) (acpi_size) table_desc->reference.object; /* Invoke table handler if present */ diff --git a/drivers/acpi/executer/exconvrt.c b/drivers/acpi/executer/exconvrt.c index fd954b4ed83d..261d97516d9b 100644 --- a/drivers/acpi/executer/exconvrt.c +++ b/drivers/acpi/executer/exconvrt.c @@ -288,11 +288,11 @@ acpi_ex_convert_to_ascii(acpi_integer integer, u16 base, u8 * string, u8 data_width) { acpi_integer digit; - acpi_native_uint i; - acpi_native_uint j; - acpi_native_uint k = 0; - acpi_native_uint hex_length; - acpi_native_uint decimal_length; + u32 i; + u32 j; + u32 k = 0; + u32 hex_length; + u32 decimal_length; u32 remainder; u8 supress_zeros; @@ -348,7 +348,7 @@ acpi_ex_convert_to_ascii(acpi_integer integer, /* hex_length: 2 ascii hex chars per data byte */ - hex_length = (acpi_native_uint) ACPI_MUL_2(data_width); + hex_length = ACPI_MUL_2(data_width); for (i = 0, j = (hex_length - 1); i < hex_length; i++, j--) { /* Get one hex digit, most significant digits first */ diff --git a/drivers/acpi/executer/excreate.c b/drivers/acpi/executer/excreate.c index 60e62c4f0577..ad09696d5069 100644 --- a/drivers/acpi/executer/excreate.c +++ b/drivers/acpi/executer/excreate.c @@ -45,8 +45,6 @@ #include #include #include -#include -#include #define _COMPONENT ACPI_EXECUTER ACPI_MODULE_NAME("excreate") diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/executer/exdump.c index 74f1b22601b3..2be2e2bf95bf 100644 --- a/drivers/acpi/executer/exdump.c +++ b/drivers/acpi/executer/exdump.c @@ -580,25 +580,22 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) case ACPI_TYPE_BUFFER: - acpi_os_printf("Buffer len %X @ %p\n", + acpi_os_printf("Buffer length %.2X @ %p\n", obj_desc->buffer.length, obj_desc->buffer.pointer); - length = obj_desc->buffer.length; - if (length > 64) { - length = 64; - } - /* Debug only -- dump the buffer contents */ if (obj_desc->buffer.pointer) { - acpi_os_printf("Buffer Contents: "); - - for (index = 0; index < length; index++) { - acpi_os_printf(" %02x", - obj_desc->buffer.pointer[index]); + length = obj_desc->buffer.length; + if (length > 128) { + length = 128; } - acpi_os_printf("\n"); + + acpi_os_printf + ("Buffer Contents: (displaying length 0x%.2X)\n", + length); + ACPI_DUMP_BUFFER(obj_desc->buffer.pointer, length); } break; @@ -756,54 +753,42 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) * * FUNCTION: acpi_ex_dump_operands * - * PARAMETERS: Operands - Operand list - * interpreter_mode - Load or Exec - * Ident - Identification - * num_levels - # of stack entries to dump above line - * Note - Output notation - * module_name - Caller's module name - * line_number - Caller's invocation line number + * PARAMETERS: Operands - A list of Operand objects + * opcode_name - AML opcode name + * num_operands - Operand count for this opcode * - * DESCRIPTION: Dump the object stack + * DESCRIPTION: Dump the operands associated with the opcode * ******************************************************************************/ void acpi_ex_dump_operands(union acpi_operand_object **operands, - acpi_interpreter_mode interpreter_mode, - char *ident, - u32 num_levels, - char *note, char *module_name, u32 line_number) + const char *opcode_name, u32 num_operands) { - acpi_native_uint i; - ACPI_FUNCTION_NAME(ex_dump_operands); - if (!ident) { - ident = "?"; - } - - if (!note) { - note = "?"; + if (!opcode_name) { + opcode_name = "UNKNOWN"; } ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "************* Operand Stack Contents (Opcode [%s], %d Operands)\n", - ident, num_levels)); + "**** Start operand dump for opcode [%s], %d operands\n", + opcode_name, num_operands)); - if (num_levels == 0) { - num_levels = 1; + if (num_operands == 0) { + num_operands = 1; } - /* Dump the operand stack starting at the top */ + /* Dump the individual operands */ - for (i = 0; num_levels > 0; i--, num_levels--) { - acpi_ex_dump_operand(operands[i], 0); + while (num_operands) { + acpi_ex_dump_operand(*operands, 0); + operands++; + num_operands--; } ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "************* Operand Stack dump from %s(%d), %s\n", - module_name, line_number, note)); + "**** End operand dump for [%s]\n", opcode_name)); return; } diff --git a/drivers/acpi/executer/exfldio.c b/drivers/acpi/executer/exfldio.c index e336b5dc7a50..9ff9d1f4615d 100644 --- a/drivers/acpi/executer/exfldio.c +++ b/drivers/acpi/executer/exfldio.c @@ -153,14 +153,15 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc, /* * Slack mode only: We will go ahead and allow access to this * field if it is within the region length rounded up to the next - * access width boundary. + * access width boundary. acpi_size cast for 64-bit compile. */ if (ACPI_ROUND_UP(rgn_desc->region.length, obj_desc->common_field. access_byte_width) >= - (obj_desc->common_field.base_byte_offset + - (acpi_native_uint) obj_desc->common_field. - access_byte_width + field_datum_byte_offset)) { + ((acpi_size) obj_desc->common_field. + base_byte_offset + + obj_desc->common_field.access_byte_width + + field_datum_byte_offset)) { return_ACPI_STATUS(AE_OK); } } diff --git a/drivers/acpi/executer/exmisc.c b/drivers/acpi/executer/exmisc.c index cc956a5b5267..731414a581a6 100644 --- a/drivers/acpi/executer/exmisc.c +++ b/drivers/acpi/executer/exmisc.c @@ -329,8 +329,8 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Result of two Strings is a String */ - return_desc = acpi_ut_create_string_object((acpi_size) - (operand0->string. + return_desc = acpi_ut_create_string_object(((acpi_size) + operand0->string. length + local_operand1-> string.length)); @@ -352,8 +352,8 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Result of two Buffers is a Buffer */ - return_desc = acpi_ut_create_buffer_object((acpi_size) - (operand0->buffer. + return_desc = acpi_ut_create_buffer_object(((acpi_size) + operand0->buffer. length + local_operand1-> buffer.length)); diff --git a/drivers/acpi/executer/exprep.c b/drivers/acpi/executer/exprep.c index 3a2f8cd4c62a..5d438c32989d 100644 --- a/drivers/acpi/executer/exprep.c +++ b/drivers/acpi/executer/exprep.c @@ -503,11 +503,11 @@ acpi_status acpi_ex_prep_field_value(struct acpi_create_field_info *info) */ second_desc = obj_desc->common.next_object; second_desc->extra.aml_start = - ((union acpi_parse_object *)(info->data_register_node))-> - named.data; + ACPI_CAST_PTR(union acpi_parse_object, + info->data_register_node)->named.data; second_desc->extra.aml_length = - ((union acpi_parse_object *)(info->data_register_node))-> - named.length; + ACPI_CAST_PTR(union acpi_parse_object, + info->data_register_node)->named.length; break; diff --git a/drivers/acpi/executer/exregion.c b/drivers/acpi/executer/exregion.c index 7cd8bb54fa01..7a41c409ae4d 100644 --- a/drivers/acpi/executer/exregion.c +++ b/drivers/acpi/executer/exregion.c @@ -156,7 +156,7 @@ acpi_ex_system_memory_space_handler(u32 function, /* Create a new mapping starting at the address given */ mem_info->mapped_logical_address = - acpi_os_map_memory((acpi_native_uint) address, window_size); + acpi_os_map_memory((acpi_physical_address) address, window_size); if (!mem_info->mapped_logical_address) { ACPI_ERROR((AE_INFO, "Could not map memory at %8.8X%8.8X, size %X", diff --git a/drivers/acpi/executer/exresop.c b/drivers/acpi/executer/exresop.c index 73e29e566a70..54085f16ec28 100644 --- a/drivers/acpi/executer/exresop.c +++ b/drivers/acpi/executer/exresop.c @@ -698,5 +698,9 @@ acpi_ex_resolve_operands(u16 opcode, } } + ACPI_DUMP_OPERANDS(walk_state->operands, + acpi_ps_get_opcode_name(opcode), + walk_state->num_operands); + return_ACPI_STATUS(status); } diff --git a/drivers/acpi/executer/exstore.c b/drivers/acpi/executer/exstore.c index 76c875bc3154..38b55e352495 100644 --- a/drivers/acpi/executer/exstore.c +++ b/drivers/acpi/executer/exstore.c @@ -343,12 +343,6 @@ acpi_ex_store(union acpi_operand_object *source_desc, acpi_ut_get_object_type_name(dest_desc), dest_desc)); - ACPI_DUMP_STACK_ENTRY(source_desc); - ACPI_DUMP_STACK_ENTRY(dest_desc); - ACPI_DUMP_OPERANDS(&dest_desc, ACPI_IMODE_EXECUTE, "ExStore", - 2, - "Target is not a Reference or Constant object"); - return_ACPI_STATUS(AE_AML_OPERAND_TYPE); } diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 6cf10cbc1eee..55c17afbe669 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -148,7 +148,7 @@ acpi_fan_write_state(struct file *file, const char __user * buffer, int result = 0; struct seq_file *m = file->private_data; struct acpi_device *device = m->private; - char state_string[12] = { '\0' }; + char state_string[3] = { '\0' }; if (count > sizeof(state_string) - 1) return -EINVAL; @@ -157,6 +157,12 @@ acpi_fan_write_state(struct file *file, const char __user * buffer, return -EFAULT; state_string[count] = '\0'; + if ((state_string[0] < '0') || (state_string[0] > '3')) + return -EINVAL; + if (state_string[1] == '\n') + state_string[1] = '\0'; + if (state_string[1] != '\0') + return -EINVAL; result = acpi_bus_set_power(device->handle, simple_strtoul(state_string, NULL, 0)); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 9b227d4dc9c9..0f2dd81736bd 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -166,6 +166,8 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) "firmware_node"); ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, "physical_node"); + if (acpi_dev->wakeup.flags.valid) + device_set_wakeup_capable(dev, true); } return 0; @@ -333,6 +335,9 @@ static int __init acpi_rtc_init(void) { struct device *dev = get_rtc_dev(); + if (acpi_disabled) + return 0; + if (acpi_disabled) return 0; diff --git a/drivers/acpi/hardware/hwgpe.c b/drivers/acpi/hardware/hwgpe.c index 14bc4f456ae8..0b80db9d9197 100644 --- a/drivers/acpi/hardware/hwgpe.c +++ b/drivers/acpi/hardware/hwgpe.c @@ -53,6 +53,54 @@ static acpi_status acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block); +/****************************************************************************** + * + * FUNCTION: acpi_hw_low_disable_gpe + * + * PARAMETERS: gpe_event_info - Info block for the GPE to be disabled + * + * RETURN: Status + * + * DESCRIPTION: Disable a single GPE in the enable register. + * + ******************************************************************************/ + +acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) +{ + struct acpi_gpe_register_info *gpe_register_info; + acpi_status status; + u32 enable_mask; + + /* Get the info block for the entire GPE register */ + + gpe_register_info = gpe_event_info->register_info; + if (!gpe_register_info) { + return (AE_NOT_EXIST); + } + + /* Get current value of the enable register that contains this GPE */ + + status = acpi_hw_low_level_read(ACPI_GPE_REGISTER_WIDTH, &enable_mask, + &gpe_register_info->enable_address); + if (ACPI_FAILURE(status)) { + return (status); + } + + /* Clear just the bit that corresponds to this GPE */ + + ACPI_CLEAR_BIT(enable_mask, + ((u32) 1 << + (gpe_event_info->gpe_number - + gpe_register_info->base_gpe_number))); + + /* Write the updated enable mask */ + + status = acpi_hw_low_level_write(ACPI_GPE_REGISTER_WIDTH, enable_mask, + &gpe_register_info->enable_address); + + return (status); +} + /****************************************************************************** * * FUNCTION: acpi_hw_write_gpe_enable_reg @@ -68,7 +116,7 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, ******************************************************************************/ acpi_status -acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info) +acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info) { struct acpi_gpe_register_info *gpe_register_info; acpi_status status; @@ -138,7 +186,6 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info) * ******************************************************************************/ -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info, acpi_event_status * event_status) @@ -198,7 +245,6 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info, unlock_and_exit: return (status); } -#endif /* ACPI_FUTURE_USAGE */ /****************************************************************************** * diff --git a/drivers/acpi/namespace/nsdump.c b/drivers/acpi/namespace/nsdump.c index 5445751b8a3e..0ab22004728a 100644 --- a/drivers/acpi/namespace/nsdump.c +++ b/drivers/acpi/namespace/nsdump.c @@ -73,7 +73,7 @@ acpi_ns_dump_one_device(acpi_handle obj_handle, void acpi_ns_print_pathname(u32 num_segments, char *pathname) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_NAME(ns_print_pathname); @@ -515,12 +515,12 @@ acpi_ns_dump_one_object(acpi_handle obj_handle, if (obj_type > ACPI_TYPE_LOCAL_MAX) { acpi_os_printf - ("(Ptr to ACPI Object type %X [UNKNOWN])\n", + ("(Pointer to ACPI Object type %.2X [UNKNOWN])\n", obj_type); bytes_to_dump = 32; } else { acpi_os_printf - ("(Ptr to ACPI Object type %X [%s])\n", + ("(Pointer to ACPI Object type %.2X [%s])\n", obj_type, acpi_ut_get_type_name(obj_type)); bytes_to_dump = sizeof(union acpi_operand_object); diff --git a/drivers/acpi/namespace/nseval.c b/drivers/acpi/namespace/nseval.c index 14bdfa92bea0..d369164e00b0 100644 --- a/drivers/acpi/namespace/nseval.c +++ b/drivers/acpi/namespace/nseval.c @@ -138,6 +138,41 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info) return_ACPI_STATUS(AE_NULL_OBJECT); } + /* + * Calculate the number of arguments being passed to the method + */ + + info->param_count = 0; + if (info->parameters) { + while (info->parameters[info->param_count]) + info->param_count++; + } + + /* Error if too few arguments were passed in */ + + if (info->param_count < info->obj_desc->method.param_count) { + ACPI_ERROR((AE_INFO, + "Insufficient arguments - " + "method [%4.4s] needs %d, found %d", + acpi_ut_get_node_name(info->resolved_node), + info->obj_desc->method.param_count, + info->param_count)); + return_ACPI_STATUS(AE_MISSING_ARGUMENTS); + } + + /* Just a warning if too many arguments */ + + else if (info->param_count > + info->obj_desc->method.param_count) { + ACPI_WARNING((AE_INFO, + "Excess arguments - " + "method [%4.4s] needs %d, found %d", + acpi_ut_get_node_name(info-> + resolved_node), + info->obj_desc->method.param_count, + info->param_count)); + } + ACPI_DUMP_PATHNAME(info->resolved_node, "Execute Method:", ACPI_LV_INFO, _COMPONENT); diff --git a/drivers/acpi/namespace/nsinit.c b/drivers/acpi/namespace/nsinit.c index 6d6d930c8e18..e4c57510d798 100644 --- a/drivers/acpi/namespace/nsinit.c +++ b/drivers/acpi/namespace/nsinit.c @@ -542,7 +542,6 @@ acpi_ns_init_one_device(acpi_handle obj_handle, info->prefix_node = device_node; info->pathname = METHOD_NAME__INI; info->parameters = NULL; - info->parameter_type = ACPI_PARAM_ARGS; info->flags = ACPI_IGNORE_RETURN_VALUE; /* diff --git a/drivers/acpi/namespace/nsload.c b/drivers/acpi/namespace/nsload.c index 2c92f6cf5ce1..a4a412b7c029 100644 --- a/drivers/acpi/namespace/nsload.c +++ b/drivers/acpi/namespace/nsload.c @@ -71,8 +71,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle); ******************************************************************************/ acpi_status -acpi_ns_load_table(acpi_native_uint table_index, - struct acpi_namespace_node *node) +acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node) { acpi_status status; diff --git a/drivers/acpi/namespace/nsparse.c b/drivers/acpi/namespace/nsparse.c index 46a79b0103b6..a82271a9dbb3 100644 --- a/drivers/acpi/namespace/nsparse.c +++ b/drivers/acpi/namespace/nsparse.c @@ -63,13 +63,13 @@ ACPI_MODULE_NAME("nsparse") * ******************************************************************************/ acpi_status -acpi_ns_one_complete_parse(acpi_native_uint pass_number, - acpi_native_uint table_index, - struct acpi_namespace_node * start_node) +acpi_ns_one_complete_parse(u32 pass_number, + u32 table_index, + struct acpi_namespace_node *start_node) { union acpi_parse_object *parse_root; acpi_status status; - acpi_native_uint aml_length; + u32 aml_length; u8 *aml_start; struct acpi_walk_state *walk_state; struct acpi_table_header *table; @@ -112,8 +112,8 @@ acpi_ns_one_complete_parse(acpi_native_uint pass_number, aml_start = (u8 *) table + sizeof(struct acpi_table_header); aml_length = table->length - sizeof(struct acpi_table_header); status = acpi_ds_init_aml_walk(walk_state, parse_root, NULL, - aml_start, (u32) aml_length, - NULL, (u8) pass_number); + aml_start, aml_length, NULL, + (u8) pass_number); } if (ACPI_FAILURE(status)) { @@ -158,8 +158,7 @@ acpi_ns_one_complete_parse(acpi_native_uint pass_number, ******************************************************************************/ acpi_status -acpi_ns_parse_table(acpi_native_uint table_index, - struct acpi_namespace_node *start_node) +acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) { acpi_status status; diff --git a/drivers/acpi/namespace/nsutils.c b/drivers/acpi/namespace/nsutils.c index 64c039843ed2..b0817e1127b1 100644 --- a/drivers/acpi/namespace/nsutils.c +++ b/drivers/acpi/namespace/nsutils.c @@ -73,9 +73,9 @@ acpi_name acpi_ns_find_parent_name(struct acpi_namespace_node *node_to_search); ******************************************************************************/ void -acpi_ns_report_error(char *module_name, +acpi_ns_report_error(const char *module_name, u32 line_number, - char *internal_name, acpi_status lookup_status) + const char *internal_name, acpi_status lookup_status) { acpi_status status; u32 bad_name; @@ -130,11 +130,11 @@ acpi_ns_report_error(char *module_name, ******************************************************************************/ void -acpi_ns_report_method_error(char *module_name, +acpi_ns_report_method_error(const char *module_name, u32 line_number, - char *message, + const char *message, struct acpi_namespace_node *prefix_node, - char *path, acpi_status method_status) + const char *path, acpi_status method_status) { acpi_status status; struct acpi_namespace_node *node = prefix_node; @@ -167,7 +167,8 @@ acpi_ns_report_method_error(char *module_name, ******************************************************************************/ void -acpi_ns_print_node_pathname(struct acpi_namespace_node *node, char *message) +acpi_ns_print_node_pathname(struct acpi_namespace_node *node, + const char *message) { struct acpi_buffer buffer; acpi_status status; @@ -296,7 +297,7 @@ u32 acpi_ns_local(acpi_object_type type) void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info) { - char *next_external_char; + const char *next_external_char; u32 i; ACPI_FUNCTION_ENTRY(); @@ -363,9 +364,9 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) { u32 num_segments = info->num_segments; char *internal_name = info->internal_name; - char *external_name = info->next_external_char; + const char *external_name = info->next_external_char; char *result = NULL; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ns_build_internal_name); @@ -400,12 +401,11 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) result = &internal_name[i]; } else if (num_segments == 2) { internal_name[i] = AML_DUAL_NAME_PREFIX; - result = &internal_name[(acpi_native_uint) (i + 1)]; + result = &internal_name[(acpi_size) i + 1]; } else { internal_name[i] = AML_MULTI_NAME_PREFIX_OP; - internal_name[(acpi_native_uint) (i + 1)] = - (char)num_segments; - result = &internal_name[(acpi_native_uint) (i + 2)]; + internal_name[(acpi_size) i + 1] = (char)num_segments; + result = &internal_name[(acpi_size) i + 2]; } } @@ -472,7 +472,8 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) * *******************************************************************************/ -acpi_status acpi_ns_internalize_name(char *external_name, char **converted_name) +acpi_status +acpi_ns_internalize_name(const char *external_name, char **converted_name) { char *internal_name; struct acpi_namestring_info info; @@ -528,15 +529,15 @@ acpi_status acpi_ns_internalize_name(char *external_name, char **converted_name) acpi_status acpi_ns_externalize_name(u32 internal_name_length, - char *internal_name, + const char *internal_name, u32 * converted_name_length, char **converted_name) { - acpi_native_uint names_index = 0; - acpi_native_uint num_segments = 0; - acpi_native_uint required_length; - acpi_native_uint prefix_length = 0; - acpi_native_uint i = 0; - acpi_native_uint j = 0; + u32 names_index = 0; + u32 num_segments = 0; + u32 required_length; + u32 prefix_length = 0; + u32 i = 0; + u32 j = 0; ACPI_FUNCTION_TRACE(ns_externalize_name); @@ -582,9 +583,8 @@ acpi_ns_externalize_name(u32 internal_name_length, /* 4-byte names */ names_index = prefix_length + 2; - num_segments = (acpi_native_uint) (u8) - internal_name[(acpi_native_uint) - (prefix_length + 1)]; + num_segments = (u8) + internal_name[(acpi_size) prefix_length + 1]; break; case AML_DUAL_NAME_PREFIX: @@ -823,7 +823,7 @@ u32 acpi_ns_opens_scope(acpi_object_type type) acpi_status acpi_ns_get_node(struct acpi_namespace_node *prefix_node, - char *pathname, + const char *pathname, u32 flags, struct acpi_namespace_node **return_node) { union acpi_generic_state scope_info; diff --git a/drivers/acpi/namespace/nsxfeval.c b/drivers/acpi/namespace/nsxfeval.c index a8d549187c84..38be5865d95d 100644 --- a/drivers/acpi/namespace/nsxfeval.c +++ b/drivers/acpi/namespace/nsxfeval.c @@ -182,7 +182,6 @@ acpi_evaluate_object(acpi_handle handle, } info->pathname = pathname; - info->parameter_type = ACPI_PARAM_ARGS; /* Convert and validate the device handle */ @@ -442,7 +441,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, u32 flags; struct acpica_device_id hid; struct acpi_compatible_id_list *cid; - acpi_native_uint i; + u32 i; int found; status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 658e5f3abae0..cb9864e39bae 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -120,10 +120,10 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) struct acpi_srat_mem_affinity *p = (struct acpi_srat_mem_affinity *)header; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "SRAT Memory (0x%lx length 0x%lx type 0x%x) in proximity domain %d %s%s\n", + "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s\n", (unsigned long)p->base_address, (unsigned long)p->length, - p->memory_type, p->proximity_domain, + p->proximity_domain, (p->flags & ACPI_SRAT_MEM_ENABLED)? "enabled" : "disabled", (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)? diff --git a/drivers/acpi/parser/psargs.c b/drivers/acpi/parser/psargs.c index e94463778845..d830b29b85b1 100644 --- a/drivers/acpi/parser/psargs.c +++ b/drivers/acpi/parser/psargs.c @@ -76,7 +76,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state) { u8 *aml = parser_state->aml; u32 package_length = 0; - acpi_native_uint byte_count; + u32 byte_count; u8 byte_zero_mask = 0x3F; /* Default [0:5] */ ACPI_FUNCTION_TRACE(ps_get_next_package_length); @@ -86,7 +86,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state) * used to encode the package length, either 0,1,2, or 3 */ byte_count = (aml[0] >> 6); - parser_state->aml += (byte_count + 1); + parser_state->aml += ((acpi_size) byte_count + 1); /* Get bytes 3, 2, 1 as needed */ diff --git a/drivers/acpi/parser/psxface.c b/drivers/acpi/parser/psxface.c index 52581454c47c..270469aae842 100644 --- a/drivers/acpi/parser/psxface.c +++ b/drivers/acpi/parser/psxface.c @@ -333,9 +333,9 @@ acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info) static void acpi_ps_update_parameter_list(struct acpi_evaluate_info *info, u16 action) { - acpi_native_uint i; + u32 i; - if ((info->parameter_type == ACPI_PARAM_ARGS) && (info->parameters)) { + if (info->parameters) { /* Update reference count for each parameter */ diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 89022a74faee..11acaee14d66 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -162,7 +162,7 @@ do_prt_fixups(struct acpi_prt_entry *entry, struct acpi_pci_routing_table *prt) !strcmp(prt->source, quirk->source) && strlen(prt->source) >= strlen(quirk->actual_source)) { printk(KERN_WARNING PREFIX "firmware reports " - "%04x:%02x:%02x[%c] connected to %s; " + "%04x:%02x:%02x PCI INT %c connected to %s; " "changing to %s\n", entry->id.segment, entry->id.bus, entry->id.device, 'A' + entry->pin, @@ -429,7 +429,7 @@ acpi_pci_irq_derive(struct pci_dev *dev, { struct pci_dev *bridge = dev; int irq = -1; - u8 bridge_pin = 0; + u8 bridge_pin = 0, orig_pin = pin; if (!dev) @@ -463,8 +463,8 @@ acpi_pci_irq_derive(struct pci_dev *dev, } if (irq < 0) { - printk(KERN_WARNING PREFIX "Unable to derive IRQ for device %s\n", - pci_name(dev)); + dev_warn(&dev->dev, "can't derive routing for PCI INT %c\n", + 'A' + orig_pin); return -1; } @@ -487,6 +487,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) int triggering = ACPI_LEVEL_SENSITIVE; int polarity = ACPI_ACTIVE_LOW; char *link = NULL; + char link_desc[16]; int rc; @@ -503,7 +504,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) pin--; if (!dev->bus) { - printk(KERN_ERR PREFIX "Invalid (NULL) 'bus' field\n"); + dev_err(&dev->dev, "invalid (NULL) 'bus' field\n"); return -ENODEV; } @@ -538,8 +539,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) * driver reported one, then use it. Exit in any case. */ if (irq < 0) { - printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: no GSI", - pci_name(dev), ('A' + pin)); + dev_warn(&dev->dev, "PCI INT %c: no GSI", 'A' + pin); /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF)) { printk(" - using IRQ %d\n", dev->irq); @@ -554,21 +554,21 @@ int acpi_pci_irq_enable(struct pci_dev *dev) rc = acpi_register_gsi(irq, triggering, polarity); if (rc < 0) { - printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: failed " - "to register GSI\n", pci_name(dev), ('A' + pin)); + dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", + 'A' + pin); return rc; } dev->irq = rc; - printk(KERN_INFO PREFIX "PCI Interrupt %s[%c] -> ", - pci_name(dev), 'A' + pin); - if (link) - printk("Link [%s] -> ", link); + snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link); + else + link_desc[0] = '\0'; - printk("GSI %u (%s, %s) -> IRQ %d\n", irq, - (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge", - (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq); + dev_info(&dev->dev, "PCI INT %c%s -> GSI %u (%s, %s) -> IRQ %d\n", + 'A' + pin, link_desc, irq, + (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge", + (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq); return 0; } @@ -616,10 +616,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev) * (e.g. PCI_UNDEFINED_IRQ). */ - printk(KERN_INFO PREFIX "PCI interrupt for device %s disabled\n", - pci_name(dev)); - + dev_info(&dev->dev, "PCI INT %c disabled\n", 'A' + pin); acpi_unregister_gsi(gsi); - - return; } diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c new file mode 100644 index 000000000000..b9ab030a52d5 --- /dev/null +++ b/drivers/acpi/pci_slot.c @@ -0,0 +1,368 @@ +/* + * pci_slot.c - ACPI PCI Slot Driver + * + * The code here is heavily leveraged from the acpiphp module. + * Thanks to Matthew Wilcox for much guidance. + * Thanks to Kenji Kaneshige for code + * review and fixes. + * + * Copyright (C) 2007 Alex Chiang + * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int debug; +static int check_sta_before_sun; + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Alex Chiang " +#define DRIVER_DESC "ACPI PCI Slot Detection Driver" +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); +MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); +module_param(debug, bool, 0644); + +#define _COMPONENT ACPI_PCI_COMPONENT +ACPI_MODULE_NAME("pci_slot"); + +#define MY_NAME "pci_slot" +#define err(format, arg...) printk(KERN_ERR "%s: " format , MY_NAME , ## arg) +#define info(format, arg...) printk(KERN_INFO "%s: " format , MY_NAME , ## arg) +#define dbg(format, arg...) \ + do { \ + if (debug) \ + printk(KERN_DEBUG "%s: " format, \ + MY_NAME , ## arg); \ + } while (0) + +#define SLOT_NAME_SIZE 20 /* Inspired by #define in acpiphp.h */ + +struct acpi_pci_slot { + acpi_handle root_handle; /* handle of the root bridge */ + struct pci_slot *pci_slot; /* corresponding pci_slot */ + struct list_head list; /* node in the list of slots */ +}; + +static int acpi_pci_slot_add(acpi_handle handle); +static void acpi_pci_slot_remove(acpi_handle handle); + +static LIST_HEAD(slot_list); +static DEFINE_MUTEX(slot_list_lock); +static struct acpi_pci_driver acpi_pci_slot_driver = { + .add = acpi_pci_slot_add, + .remove = acpi_pci_slot_remove, +}; + +static int +check_slot(acpi_handle handle, int *device, unsigned long *sun) +{ + int retval = 0; + unsigned long adr, sta; + acpi_status status; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); + dbg("Checking slot on path: %s\n", (char *)buffer.pointer); + + if (check_sta_before_sun) { + /* If SxFy doesn't have _STA, we just assume it's there */ + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); + if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + retval = -1; + goto out; + } + } + + status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); + if (ACPI_FAILURE(status)) { + dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer); + retval = -1; + goto out; + } + + *device = (adr >> 16) & 0xffff; + + /* No _SUN == not a slot == bail */ + status = acpi_evaluate_integer(handle, "_SUN", NULL, sun); + if (ACPI_FAILURE(status)) { + dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer); + retval = -1; + goto out; + } + +out: + kfree(buffer.pointer); + return retval; +} + +struct callback_args { + acpi_walk_callback user_function; /* only for walk_p2p_bridge */ + struct pci_bus *pci_bus; + acpi_handle root_handle; +}; + +/* + * register_slot + * + * Called once for each SxFy object in the namespace. Don't worry about + * calling pci_create_slot multiple times for the same pci_bus:device, + * since each subsequent call simply bumps the refcount on the pci_slot. + * + * The number of calls to pci_destroy_slot from unregister_slot is + * symmetrical. + */ +static acpi_status +register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + int device; + unsigned long sun; + char name[SLOT_NAME_SIZE]; + struct acpi_pci_slot *slot; + struct pci_slot *pci_slot; + struct callback_args *parent_context = context; + struct pci_bus *pci_bus = parent_context->pci_bus; + + if (check_slot(handle, &device, &sun)) + return AE_OK; + + slot = kmalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + err("%s: cannot allocate memory\n", __func__); + return AE_OK; + } + + snprintf(name, sizeof(name), "%u", (u32)sun); + pci_slot = pci_create_slot(pci_bus, device, name); + if (IS_ERR(pci_slot)) { + err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot)); + kfree(slot); + } + + slot->root_handle = parent_context->root_handle; + slot->pci_slot = pci_slot; + INIT_LIST_HEAD(&slot->list); + mutex_lock(&slot_list_lock); + list_add(&slot->list, &slot_list); + mutex_unlock(&slot_list_lock); + + dbg("pci_slot: %p, pci_bus: %x, device: %d, name: %s\n", + pci_slot, pci_bus->number, device, name); + + return AE_OK; +} + +/* + * walk_p2p_bridge - discover and walk p2p bridges + * @handle: points to an acpi_pci_root + * @context: p2p_bridge_context pointer + * + * Note that when we call ourselves recursively, we pass a different + * value of pci_bus in the child_context. + */ +static acpi_status +walk_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + int device, function; + unsigned long adr; + acpi_status status; + acpi_handle dummy_handle; + acpi_walk_callback user_function; + + struct pci_dev *dev; + struct pci_bus *pci_bus; + struct callback_args child_context; + struct callback_args *parent_context = context; + + pci_bus = parent_context->pci_bus; + user_function = parent_context->user_function; + + status = acpi_get_handle(handle, "_ADR", &dummy_handle); + if (ACPI_FAILURE(status)) + return AE_OK; + + status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); + if (ACPI_FAILURE(status)) + return AE_OK; + + device = (adr >> 16) & 0xffff; + function = adr & 0xffff; + + dev = pci_get_slot(pci_bus, PCI_DEVFN(device, function)); + if (!dev || !dev->subordinate) + goto out; + + child_context.pci_bus = dev->subordinate; + child_context.user_function = user_function; + child_context.root_handle = parent_context->root_handle; + + dbg("p2p bridge walk, pci_bus = %x\n", dev->subordinate->number); + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + user_function, &child_context, NULL); + if (ACPI_FAILURE(status)) + goto out; + + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + walk_p2p_bridge, &child_context, NULL); +out: + pci_dev_put(dev); + return AE_OK; +} + +/* + * walk_root_bridge - generic root bridge walker + * @handle: points to an acpi_pci_root + * @user_function: user callback for slot objects + * + * Call user_function for all objects underneath this root bridge. + * Walk p2p bridges underneath us and call user_function on those too. + */ +static int +walk_root_bridge(acpi_handle handle, acpi_walk_callback user_function) +{ + int seg, bus; + unsigned long tmp; + acpi_status status; + acpi_handle dummy_handle; + struct pci_bus *pci_bus; + struct callback_args context; + + /* If the bridge doesn't have _STA, we assume it is always there */ + status = acpi_get_handle(handle, "_STA", &dummy_handle); + if (ACPI_SUCCESS(status)) { + status = acpi_evaluate_integer(handle, "_STA", NULL, &tmp); + if (ACPI_FAILURE(status)) { + info("%s: _STA evaluation failure\n", __func__); + return 0; + } + if ((tmp & ACPI_STA_DEVICE_FUNCTIONING) == 0) + /* don't register this object */ + return 0; + } + + status = acpi_evaluate_integer(handle, "_SEG", NULL, &tmp); + seg = ACPI_SUCCESS(status) ? tmp : 0; + + status = acpi_evaluate_integer(handle, "_BBN", NULL, &tmp); + bus = ACPI_SUCCESS(status) ? tmp : 0; + + pci_bus = pci_find_bus(seg, bus); + if (!pci_bus) + return 0; + + context.pci_bus = pci_bus; + context.user_function = user_function; + context.root_handle = handle; + + dbg("root bridge walk, pci_bus = %x\n", pci_bus->number); + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + user_function, &context, NULL); + if (ACPI_FAILURE(status)) + return status; + + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + walk_p2p_bridge, &context, NULL); + if (ACPI_FAILURE(status)) + err("%s: walk_p2p_bridge failure - %d\n", __func__, status); + + return status; +} + +/* + * acpi_pci_slot_add + * @handle: points to an acpi_pci_root + */ +static int +acpi_pci_slot_add(acpi_handle handle) +{ + acpi_status status; + + status = walk_root_bridge(handle, register_slot); + if (ACPI_FAILURE(status)) + err("%s: register_slot failure - %d\n", __func__, status); + + return status; +} + +/* + * acpi_pci_slot_remove + * @handle: points to an acpi_pci_root + */ +static void +acpi_pci_slot_remove(acpi_handle handle) +{ + struct acpi_pci_slot *slot, *tmp; + + mutex_lock(&slot_list_lock); + list_for_each_entry_safe(slot, tmp, &slot_list, list) { + if (slot->root_handle == handle) { + list_del(&slot->list); + pci_destroy_slot(slot->pci_slot); + kfree(slot); + } + } + mutex_unlock(&slot_list_lock); +} + +static int do_sta_before_sun(const struct dmi_system_id *d) +{ + info("%s detected: will evaluate _STA before calling _SUN\n", d->ident); + check_sta_before_sun = 1; + return 0; +} + +static struct dmi_system_id acpi_pci_slot_dmi_table[] __initdata = { + /* + * Fujitsu Primequest machines will return 1023 to indicate an + * error if the _SUN method is evaluated on SxFy objects that + * are not present (as indicated by _STA), so for those machines, + * we want to check _STA before evaluating _SUN. + */ + { + .callback = do_sta_before_sun, + .ident = "Fujitsu PRIMEQUEST", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "FUJITSU LIMITED"), + DMI_MATCH(DMI_BIOS_VERSION, "PRIMEQUEST"), + }, + }, + {} +}; + +static int __init +acpi_pci_slot_init(void) +{ + dmi_check_system(acpi_pci_slot_dmi_table); + acpi_pci_register_driver(&acpi_pci_slot_driver); + return 0; +} + +static void __exit +acpi_pci_slot_exit(void) +{ + acpi_pci_unregister_driver(&acpi_pci_slot_driver); +} + +module_init(acpi_pci_slot_init); +module_exit(acpi_pci_slot_exit); diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 81e4f081a4ae..4ab21cb1c8c7 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -292,69 +292,135 @@ static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev) return 0; } +/** + * acpi_device_sleep_wake - execute _DSW (Device Sleep Wake) or (deprecated in + * ACPI 3.0) _PSW (Power State Wake) + * @dev: Device to handle. + * @enable: 0 - disable, 1 - enable the wake capabilities of the device. + * @sleep_state: Target sleep state of the system. + * @dev_state: Target power state of the device. + * + * Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power + * State Wake) for the device, if present. On failure reset the device's + * wakeup.flags.valid flag. + * + * RETURN VALUE: + * 0 if either _DSW or _PSW has been successfully executed + * 0 if neither _DSW nor _PSW has been found + * -ENODEV if the execution of either _DSW or _PSW has failed + */ +int acpi_device_sleep_wake(struct acpi_device *dev, + int enable, int sleep_state, int dev_state) +{ + union acpi_object in_arg[3]; + struct acpi_object_list arg_list = { 3, in_arg }; + acpi_status status = AE_OK; + + /* + * Try to execute _DSW first. + * + * Three agruments are needed for the _DSW object: + * Argument 0: enable/disable the wake capabilities + * Argument 1: target system state + * Argument 2: target device state + * When _DSW object is called to disable the wake capabilities, maybe + * the first argument is filled. The values of the other two agruments + * are meaningless. + */ + in_arg[0].type = ACPI_TYPE_INTEGER; + in_arg[0].integer.value = enable; + in_arg[1].type = ACPI_TYPE_INTEGER; + in_arg[1].integer.value = sleep_state; + in_arg[2].type = ACPI_TYPE_INTEGER; + in_arg[2].integer.value = dev_state; + status = acpi_evaluate_object(dev->handle, "_DSW", &arg_list, NULL); + if (ACPI_SUCCESS(status)) { + return 0; + } else if (status != AE_NOT_FOUND) { + printk(KERN_ERR PREFIX "_DSW execution failed\n"); + dev->wakeup.flags.valid = 0; + return -ENODEV; + } + + /* Execute _PSW */ + arg_list.count = 1; + in_arg[0].integer.value = enable; + status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL); + if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { + printk(KERN_ERR PREFIX "_PSW execution failed\n"); + dev->wakeup.flags.valid = 0; + return -ENODEV; + } + + return 0; +} + /* * Prepare a wakeup device, two steps (Ref ACPI 2.0:P229): * 1. Power on the power resources required for the wakeup device - * 2. Enable _PSW (power state wake) for the device if present + * 2. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power + * State Wake) for the device, if present */ -int acpi_enable_wakeup_device_power(struct acpi_device *dev) +int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state) { - union acpi_object arg = { ACPI_TYPE_INTEGER }; - struct acpi_object_list arg_list = { 1, &arg }; - acpi_status status = AE_OK; - int i; - int ret = 0; + int i, err; if (!dev || !dev->wakeup.flags.valid) - return -1; + return -EINVAL; + + /* + * Do not execute the code below twice in a row without calling + * acpi_disable_wakeup_device_power() in between for the same device + */ + if (dev->wakeup.flags.prepared) + return 0; - arg.integer.value = 1; /* Open power resource */ for (i = 0; i < dev->wakeup.resources.count; i++) { - ret = acpi_power_on(dev->wakeup.resources.handles[i], dev); + int ret = acpi_power_on(dev->wakeup.resources.handles[i], dev); if (ret) { printk(KERN_ERR PREFIX "Transition power state\n"); dev->wakeup.flags.valid = 0; - return -1; + return -ENODEV; } } - /* Execute PSW */ - status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL); - if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { - printk(KERN_ERR PREFIX "Evaluate _PSW\n"); - dev->wakeup.flags.valid = 0; - ret = -1; - } + /* + * Passing 3 as the third argument below means the device may be placed + * in arbitrary power state afterwards. + */ + err = acpi_device_sleep_wake(dev, 1, sleep_state, 3); + if (!err) + dev->wakeup.flags.prepared = 1; - return ret; + return err; } /* * Shutdown a wakeup device, counterpart of above method - * 1. Disable _PSW (power state wake) + * 1. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power + * State Wake) for the device, if present * 2. Shutdown down the power resources */ int acpi_disable_wakeup_device_power(struct acpi_device *dev) { - union acpi_object arg = { ACPI_TYPE_INTEGER }; - struct acpi_object_list arg_list = { 1, &arg }; - acpi_status status = AE_OK; - int i; - int ret = 0; - + int i, ret; if (!dev || !dev->wakeup.flags.valid) - return -1; + return -EINVAL; - arg.integer.value = 0; - /* Execute PSW */ - status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL); - if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { - printk(KERN_ERR PREFIX "Evaluate _PSW\n"); - dev->wakeup.flags.valid = 0; - return -1; - } + /* + * Do not execute the code below twice in a row without calling + * acpi_enable_wakeup_device_power() in between for the same device + */ + if (!dev->wakeup.flags.prepared) + return 0; + + dev->wakeup.flags.prepared = 0; + + ret = acpi_device_sleep_wake(dev, 0, 0, 0); + if (ret) + return ret; /* Close power resource */ for (i = 0; i < dev->wakeup.resources.count; i++) { @@ -362,7 +428,7 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev) if (ret) { printk(KERN_ERR PREFIX "Transition power state\n"); dev->wakeup.flags.valid = 0; - return -1; + return -ENODEV; } } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 9dd0fa93b9e1..ec0f2d581ece 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -118,8 +118,31 @@ static const struct file_operations acpi_processor_info_fops = { .release = single_release, }; -struct acpi_processor *processors[NR_CPUS]; +DEFINE_PER_CPU(struct acpi_processor *, processors); struct acpi_processor_errata errata __read_mostly; +static int set_no_mwait(const struct dmi_system_id *id) +{ + printk(KERN_NOTICE PREFIX "%s detected - " + "disable mwait for CPU C-stetes\n", id->ident); + idle_nomwait = 1; + return 0; +} + +static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { + { + set_no_mwait, "IFL91 board", { + DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), + DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), + DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, + { + set_no_mwait, "Extensa 5220", { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), + DMI_MATCH(DMI_SYS_VENDOR, "ACER"), + DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), + DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL}, + {}, +}; /* -------------------------------------------------------------------------- Errata Handling @@ -265,7 +288,20 @@ static int acpi_processor_set_pdc(struct acpi_processor *pr) if (!pdc_in) return status; + if (idle_nomwait) { + /* + * If mwait is disabled for CPU C-states, the C2C3_FFH access + * mode will be disabled in the parameter of _PDC object. + * Of course C1_FFH access mode will also be disabled. + */ + union acpi_object *obj; + u32 *buffer = NULL; + obj = pdc_in->pointer; + buffer = (u32 *)(obj->buffer.pointer); + buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH); + + } status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL); if (ACPI_FAILURE(status)) @@ -614,14 +650,14 @@ static int acpi_processor_get_info(struct acpi_processor *pr, unsigned has_uid) return 0; } -static void *processor_device_array[NR_CPUS]; +static DEFINE_PER_CPU(void *, processor_device_array); static int __cpuinit acpi_processor_start(struct acpi_device *device) { int result = 0; acpi_status status = AE_OK; struct acpi_processor *pr; - + struct sys_device *sysdev; pr = acpi_driver_data(device); @@ -638,20 +674,24 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device) * ACPI id of processors can be reported wrongly by the BIOS. * Don't trust it blindly */ - if (processor_device_array[pr->id] != NULL && - processor_device_array[pr->id] != device) { + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { printk(KERN_WARNING "BIOS reported wrong ACPI id " "for the processor\n"); return -ENODEV; } - processor_device_array[pr->id] = device; + per_cpu(processor_device_array, pr->id) = device; - processors[pr->id] = pr; + per_cpu(processors, pr->id) = pr; result = acpi_processor_add_fs(device); if (result) goto end; + sysdev = get_cpu_sysdev(pr->id); + if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) + return -EFAULT; + status = acpi_install_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify, pr); @@ -749,7 +789,7 @@ static int acpi_cpu_soft_notify(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct acpi_processor *pr = processors[cpu]; + struct acpi_processor *pr = per_cpu(processors, cpu); if (action == CPU_ONLINE && pr) { acpi_processor_ppc_has_changed(pr); @@ -810,6 +850,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify); + sysfs_remove_link(&device->dev.kobj, "sysdev"); + acpi_processor_remove_fs(device); if (pr->cdev) { @@ -819,8 +861,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) pr->cdev = NULL; } - processors[pr->id] = NULL; - processor_device_array[pr->id] = NULL; + per_cpu(processors, pr->id) = NULL; + per_cpu(processor_device_array, pr->id) = NULL; kfree(pr); return 0; @@ -1014,9 +1056,9 @@ static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) static int acpi_processor_handle_eject(struct acpi_processor *pr) { - if (cpu_online(pr->id)) { - return (-EINVAL); - } + if (cpu_online(pr->id)) + cpu_down(pr->id); + arch_unregister_cpu(pr->id); acpi_unmap_lsapic(pr->id); return (0); @@ -1068,8 +1110,6 @@ static int __init acpi_processor_init(void) { int result = 0; - - memset(&processors, 0, sizeof(processors)); memset(&errata, 0, sizeof(errata)); #ifdef CONFIG_SMP @@ -1083,6 +1123,11 @@ static int __init acpi_processor_init(void) return -ENOMEM; acpi_processor_dir->owner = THIS_MODULE; + /* + * Check whether the system is DMI table. If yes, OSPM + * should not use mwait for CPU-states. + */ + dmi_check_system(processor_idle_dmi_table); result = cpuidle_register_driver(&acpi_idle_driver); if (result < 0) goto out_proc; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4976e5db2b3f..d592dbb1d12a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -57,6 +58,7 @@ #include #include +#include #define ACPI_PROCESSOR_COMPONENT 0x01000000 #define ACPI_PROCESSOR_CLASS "processor" @@ -401,7 +403,7 @@ static void acpi_processor_idle(void) */ local_irq_disable(); - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (!pr) { local_irq_enable(); return; @@ -955,6 +957,21 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) } else { continue; } + if (cx.type == ACPI_STATE_C1 && + (idle_halt || idle_nomwait)) { + /* + * In most cases the C1 space_id obtained from + * _CST object is FIXED_HARDWARE access mode. + * But when the option of idle=halt is added, + * the entry_method type should be changed from + * CSTATE_FFH to CSTATE_HALT. + * When the option of idle=nomwait is added, + * the C1 entry_method type should be + * CSTATE_HALT. + */ + cx.entry_method = ACPI_CSTATE_HALT; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); + } } else { snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", cx.address); @@ -1431,7 +1448,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct acpi_processor *pr; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1471,7 +1488,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, u32 t1, t2; int sleep_ticks = 0; - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1549,7 +1566,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, u32 t1, t2; int sleep_ticks = 0; - pr = processors[smp_processor_id()]; + pr = __get_cpu_var(processors); if (unlikely(!pr)) return 0; @@ -1780,6 +1797,15 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, return 0; if (!first_run) { + if (idle_halt) { + /* + * When the boot option of "idle=halt" is added, halt + * is used for CPU IDLE. + * In such case C2/C3 is meaningless. So the max_cstate + * is set to one. + */ + max_cstate = 1; + } dmi_check_system(processor_power_dmi_table); max_cstate = acpi_processor_cstate_check(max_cstate); if (max_cstate < ACPI_C_STATES_MAX) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index d80b2d1441af..b4749969c6b4 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -89,7 +89,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, if (event != CPUFREQ_INCOMPATIBLE) goto out; - pr = processors[policy->cpu]; + pr = per_cpu(processors, policy->cpu); if (!pr || !pr->performance) goto out; @@ -572,7 +572,7 @@ int acpi_processor_preregister_performance( /* Call _PSD for all CPUs */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) { /* Look only at processors in ACPI namespace */ continue; @@ -603,7 +603,7 @@ int acpi_processor_preregister_performance( * domain info. */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -624,7 +624,7 @@ int acpi_processor_preregister_performance( cpus_clear(covered_cpus); for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -651,7 +651,7 @@ int acpi_processor_preregister_performance( if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -680,7 +680,7 @@ int acpi_processor_preregister_performance( if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -697,7 +697,7 @@ int acpi_processor_preregister_performance( err_ret: for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr || !pr->performance) continue; @@ -728,7 +728,7 @@ acpi_processor_register_performance(struct acpi_processor_performance mutex_lock(&performance_mutex); - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { mutex_unlock(&performance_mutex); return -ENODEV; @@ -766,7 +766,7 @@ acpi_processor_unregister_performance(struct acpi_processor_performance mutex_lock(&performance_mutex); - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { mutex_unlock(&performance_mutex); return; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index bb06738860c4..0622ace05220 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -71,7 +71,7 @@ static int acpi_processor_update_tsd_coord(void) * coordination between all CPUs. */ for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -93,7 +93,7 @@ static int acpi_processor_update_tsd_coord(void) cpus_clear(covered_cpus); for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -119,7 +119,7 @@ static int acpi_processor_update_tsd_coord(void) if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -152,7 +152,7 @@ static int acpi_processor_update_tsd_coord(void) if (i == j) continue; - match_pr = processors[j]; + match_pr = per_cpu(processors, j); if (!match_pr) continue; @@ -172,7 +172,7 @@ static int acpi_processor_update_tsd_coord(void) err_ret: for_each_possible_cpu(i) { - pr = processors[i]; + pr = per_cpu(processors, i); if (!pr) continue; @@ -214,7 +214,7 @@ static int acpi_processor_throttling_notifier(unsigned long event, void *data) struct acpi_processor_throttling *p_throttling; cpu = p_tstate->cpu; - pr = processors[cpu]; + pr = per_cpu(processors, cpu); if (!pr) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Invalid pr pointer\n")); return 0; @@ -1035,7 +1035,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * cpus. */ for_each_cpu_mask(i, online_throttling_cpus) { - match_pr = processors[i]; + match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the * error message and continue. @@ -1232,7 +1232,10 @@ static ssize_t acpi_processor_write_throttling(struct file *file, int result = 0; struct seq_file *m = file->private_data; struct acpi_processor *pr = m->private; - char state_string[12] = { '\0' }; + char state_string[5] = ""; + char *charp = NULL; + size_t state_val = 0; + char tmpbuf[5] = ""; if (!pr || (count > sizeof(state_string) - 1)) return -EINVAL; @@ -1241,10 +1244,23 @@ static ssize_t acpi_processor_write_throttling(struct file *file, return -EFAULT; state_string[count] = '\0'; + if ((count > 0) && (state_string[count-1] == '\n')) + state_string[count-1] = '\0'; - result = acpi_processor_set_throttling(pr, - simple_strtoul(state_string, - NULL, 0)); + charp = state_string; + if ((state_string[0] == 't') || (state_string[0] == 'T')) + charp++; + + state_val = simple_strtoul(charp, NULL, 0); + if (state_val >= pr->throttling.state_count) + return -EINVAL; + + snprintf(tmpbuf, 5, "%zu", state_val); + + if (strcmp(tmpbuf, charp) != 0) + return -EINVAL; + + result = acpi_processor_set_throttling(pr, state_val); if (result) return result; diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c new file mode 100644 index 000000000000..a6b662c00b67 --- /dev/null +++ b/drivers/acpi/reboot.c @@ -0,0 +1,50 @@ + +#include +#include +#include + +void acpi_reboot(void) +{ + struct acpi_generic_address *rr; + struct pci_bus *bus0; + u8 reset_value; + unsigned int devfn; + + if (acpi_disabled) + return; + + rr = &acpi_gbl_FADT.reset_register; + + /* Is the reset register supported? */ + if (!(acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) || + rr->bit_width != 8 || rr->bit_offset != 0) + return; + + reset_value = acpi_gbl_FADT.reset_value; + + /* The reset register can only exist in I/O, Memory or PCI config space + * on a device on bus 0. */ + switch (rr->space_id) { + case ACPI_ADR_SPACE_PCI_CONFIG: + /* The reset register can only live on bus 0. */ + bus0 = pci_find_bus(0, 0); + if (!bus0) + return; + /* Form PCI device/function pair. */ + devfn = PCI_DEVFN((rr->address >> 32) & 0xffff, + (rr->address >> 16) & 0xffff); + printk(KERN_DEBUG "Resetting with ACPI PCI RESET_REG."); + /* Write the value that resets us. */ + pci_bus_write_config_byte(bus0, devfn, + (rr->address & 0xffff), reset_value); + break; + + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + case ACPI_ADR_SPACE_SYSTEM_IO: + printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n"); + acpi_hw_low_level_write(8, reset_value, rr); + break; + } + /* Wait ten seconds */ + acpi_os_stall(10000000); +} diff --git a/drivers/acpi/resources/rscalc.c b/drivers/acpi/resources/rscalc.c index 8a112d11d491..f61ebc679e66 100644 --- a/drivers/acpi/resources/rscalc.c +++ b/drivers/acpi/resources/rscalc.c @@ -73,7 +73,7 @@ acpi_rs_stream_option_length(u32 resource_length, u32 minimum_total_length); static u8 acpi_rs_count_set_bits(u16 bit_field) { - acpi_native_uint bits_set; + u8 bits_set; ACPI_FUNCTION_ENTRY(); @@ -84,7 +84,7 @@ static u8 acpi_rs_count_set_bits(u16 bit_field) bit_field &= (u16) (bit_field - 1); } - return ((u8) bits_set); + return bits_set; } /******************************************************************************* diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/resources/rscreate.c index faddaee1bc07..7804a8c40e7a 100644 --- a/drivers/acpi/resources/rscreate.c +++ b/drivers/acpi/resources/rscreate.c @@ -181,9 +181,9 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, } /* - * Loop through the ACPI_INTERNAL_OBJECTS - Each object - * should be a package that in turn contains an - * acpi_integer Address, a u8 Pin, a Name and a u8 source_index. + * Loop through the ACPI_INTERNAL_OBJECTS - Each object should be a + * package that in turn contains an acpi_integer Address, a u8 Pin, + * a Name, and a u8 source_index. */ top_object_list = package_object->package.elements; number_of_elements = package_object->package.count; @@ -240,9 +240,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* 1) First subobject: Dereference the PRT.Address */ obj_desc = sub_object_list[0]; - if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) { - user_prt->address = obj_desc->integer.value; - } else { + if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) { ACPI_ERROR((AE_INFO, "(PRT[%X].Address) Need Integer, found %s", index, @@ -250,12 +248,12 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, return_ACPI_STATUS(AE_BAD_DATA); } + user_prt->address = obj_desc->integer.value; + /* 2) Second subobject: Dereference the PRT.Pin */ obj_desc = sub_object_list[1]; - if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) { - user_prt->pin = (u32) obj_desc->integer.value; - } else { + if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) { ACPI_ERROR((AE_INFO, "(PRT[%X].Pin) Need Integer, found %s", index, @@ -284,6 +282,25 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, } } + user_prt->pin = (u32) obj_desc->integer.value; + + /* + * If the BIOS has erroneously reversed the _PRT source_name (index 2) + * and the source_index (index 3), fix it. _PRT is important enough to + * workaround this BIOS error. This also provides compatibility with + * other ACPI implementations. + */ + obj_desc = sub_object_list[3]; + if (!obj_desc + || (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER)) { + sub_object_list[3] = sub_object_list[2]; + sub_object_list[2] = obj_desc; + + ACPI_WARNING((AE_INFO, + "(PRT[%X].Source) SourceName and SourceIndex are reversed, fixed", + index)); + } + /* * 3) Third subobject: Dereference the PRT.source_name * The name may be unresolved (slack mode), so allow a null object @@ -364,9 +381,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* 4) Fourth subobject: Dereference the PRT.source_index */ obj_desc = sub_object_list[source_index_index]; - if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) { - user_prt->source_index = (u32) obj_desc->integer.value; - } else { + if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) { ACPI_ERROR((AE_INFO, "(PRT[%X].SourceIndex) Need Integer, found %s", index, @@ -374,6 +389,8 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, return_ACPI_STATUS(AE_BAD_DATA); } + user_prt->source_index = (u32) obj_desc->integer.value; + /* Point to the next union acpi_operand_object in the top level package */ top_object_list++; diff --git a/drivers/acpi/resources/rsmisc.c b/drivers/acpi/resources/rsmisc.c index de1ac3881b22..96a6c0353255 100644 --- a/drivers/acpi/resources/rsmisc.c +++ b/drivers/acpi/resources/rsmisc.c @@ -82,7 +82,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, ACPI_FUNCTION_TRACE(rs_convert_aml_to_resource); - if (((acpi_native_uint) resource) & 0x3) { + if (((acpi_size) resource) & 0x3) { /* Each internal resource struct is expected to be 32-bit aligned */ diff --git a/drivers/acpi/resources/rsutils.c b/drivers/acpi/resources/rsutils.c index befe2302f41b..f7b3bcd59ba7 100644 --- a/drivers/acpi/resources/rsutils.c +++ b/drivers/acpi/resources/rsutils.c @@ -62,7 +62,7 @@ ACPI_MODULE_NAME("rsutils") ******************************************************************************/ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list) { - acpi_native_uint i; + u8 i; u8 bit_count; ACPI_FUNCTION_ENTRY(); @@ -71,7 +71,7 @@ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list) for (i = 0, bit_count = 0; mask; i++) { if (mask & 0x0001) { - list[bit_count] = (u8) i; + list[bit_count] = i; bit_count++; } @@ -96,8 +96,8 @@ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list) u16 acpi_rs_encode_bitmask(u8 * list, u8 count) { - acpi_native_uint i; - acpi_native_uint mask; + u32 i; + u16 mask; ACPI_FUNCTION_ENTRY(); @@ -107,7 +107,7 @@ u16 acpi_rs_encode_bitmask(u8 * list, u8 count) mask |= (0x1 << list[i]); } - return ((u16) mask); + return mask; } /******************************************************************************* @@ -130,7 +130,7 @@ u16 acpi_rs_encode_bitmask(u8 * list, u8 count) void acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_ENTRY(); @@ -679,7 +679,6 @@ acpi_rs_set_srs_method_data(struct acpi_namespace_node *node, info->prefix_node = node; info->pathname = METHOD_NAME__SRS; info->parameters = args; - info->parameter_type = ACPI_PARAM_ARGS; info->flags = ACPI_IGNORE_RETURN_VALUE; /* diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 6d85289f1c12..f3132aa47a69 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include /* for acpi_ex_eisa_id_to_string() */ @@ -92,17 +94,37 @@ acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, cha } static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); -static int acpi_eject_operation(acpi_handle handle, int lockable) +static int acpi_bus_hot_remove_device(void *context) { + struct acpi_device *device; + acpi_handle handle = context; struct acpi_object_list arg_list; union acpi_object arg; acpi_status status = AE_OK; - /* - * TBD: evaluate _PS3? - */ + if (acpi_bus_get_device(handle, &device)) + return 0; + + if (!device) + return 0; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Hot-removing device %s...\n", device->dev.bus_id)); + - if (lockable) { + if (acpi_bus_trim(device, 1)) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, + "Removing device failed\n")); + return -1; + } + + /* power off device */ + status = acpi_evaluate_object(handle, "_PS3", NULL, NULL); + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) + ACPI_DEBUG_PRINT((ACPI_DB_WARN, + "Power-off device failed\n")); + + if (device->flags.lockable) { arg_list.count = 1; arg_list.pointer = &arg; arg.type = ACPI_TYPE_INTEGER; @@ -118,26 +140,22 @@ static int acpi_eject_operation(acpi_handle handle, int lockable) /* * TBD: _EJD support. */ - status = acpi_evaluate_object(handle, "_EJ0", &arg_list, NULL); - if (ACPI_FAILURE(status)) { - return (-ENODEV); - } + if (ACPI_FAILURE(status)) + return -ENODEV; - return (0); + return 0; } static ssize_t acpi_eject_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { - int result; int ret = count; - int islockable; acpi_status status; - acpi_handle handle; acpi_object_type type = 0; struct acpi_device *acpi_device = to_acpi_device(d); + struct task_struct *task; if ((!count) || (buf[0] != '1')) { return -EINVAL; @@ -154,18 +172,12 @@ acpi_eject_store(struct device *d, struct device_attribute *attr, goto err; } - islockable = acpi_device->flags.lockable; - handle = acpi_device->handle; - - result = acpi_bus_trim(acpi_device, 1); - - if (!result) - result = acpi_eject_operation(handle, islockable); - - if (result) { - ret = -EBUSY; - } - err: + /* remove the device in another thread to fix the deadlock issue */ + task = kthread_run(acpi_bus_hot_remove_device, + acpi_device->handle, "acpi_hot_remove_device"); + if (IS_ERR(task)) + ret = PTR_ERR(task); +err: return ret; } @@ -691,9 +703,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device) acpi_status status = 0; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *package = NULL; - union acpi_object in_arg[3]; - struct acpi_object_list arg_list = { 3, in_arg }; - acpi_status psw_status = AE_OK; + int psw_error; struct acpi_device_id button_device_ids[] = { {"PNP0C0D", 0}, @@ -725,39 +735,11 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device) * So it is necessary to call _DSW object first. Only when it is not * present will the _PSW object used. */ - /* - * Three agruments are needed for the _DSW object. - * Argument 0: enable/disable the wake capabilities - * When _DSW object is called to disable the wake capabilities, maybe - * the first argument is filled. The value of the other two agruments - * is meaningless. - */ - in_arg[0].type = ACPI_TYPE_INTEGER; - in_arg[0].integer.value = 0; - in_arg[1].type = ACPI_TYPE_INTEGER; - in_arg[1].integer.value = 0; - in_arg[2].type = ACPI_TYPE_INTEGER; - in_arg[2].integer.value = 0; - psw_status = acpi_evaluate_object(device->handle, "_DSW", - &arg_list, NULL); - if (ACPI_FAILURE(psw_status) && (psw_status != AE_NOT_FOUND)) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "error in evaluate _DSW\n")); - /* - * When the _DSW object is not present, OSPM will call _PSW object. - */ - if (psw_status == AE_NOT_FOUND) { - /* - * Only one agruments is required for the _PSW object. - * agrument 0: enable/disable the wake capabilities - */ - arg_list.count = 1; - in_arg[0].integer.value = 0; - psw_status = acpi_evaluate_object(device->handle, "_PSW", - &arg_list, NULL); - if (ACPI_FAILURE(psw_status) && (psw_status != AE_NOT_FOUND)) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "error in " - "evaluate _PSW\n")); - } + psw_error = acpi_device_sleep_wake(device, 0, 0, 0); + if (psw_error) + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "error in _DSW or _PSW evaluation\n")); + /* Power button, Lid switch always enable wakeup */ if (!acpi_match_device_ids(device, button_device_ids)) device->wakeup.flags.run_wake = 1; diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 495c63a3e0af..0489a7d1d42c 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -24,10 +24,6 @@ u8 sleep_states[ACPI_S_STATE_COUNT]; -#ifdef CONFIG_PM_SLEEP -static u32 acpi_target_sleep_state = ACPI_STATE_S0; -#endif - static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP @@ -49,9 +45,96 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } -#ifdef CONFIG_SUSPEND -static struct platform_suspend_ops acpi_suspend_ops; +#ifdef CONFIG_PM_SLEEP +static u32 acpi_target_sleep_state = ACPI_STATE_S0; + +/* + * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the + * user to request that behavior by using the 'acpi_old_suspend_ordering' + * kernel command line option that causes the following variable to be set. + */ +static bool old_suspend_ordering; +void __init acpi_old_suspend_ordering(void) +{ + old_suspend_ordering = true; +} + +/** + * acpi_pm_disable_gpes - Disable the GPEs. + */ +static int acpi_pm_disable_gpes(void) +{ + acpi_hw_disable_all_gpes(); + return 0; +} + +/** + * __acpi_pm_prepare - Prepare the platform to enter the target state. + * + * If necessary, set the firmware waking vector and do arch-specific + * nastiness to get the wakeup code to the waking vector. + */ +static int __acpi_pm_prepare(void) +{ + int error = acpi_sleep_prepare(acpi_target_sleep_state); + + if (error) + acpi_target_sleep_state = ACPI_STATE_S0; + return error; +} + +/** + * acpi_pm_prepare - Prepare the platform to enter the target sleep + * state and disable the GPEs. + */ +static int acpi_pm_prepare(void) +{ + int error = __acpi_pm_prepare(); + + if (!error) + acpi_hw_disable_all_gpes(); + return error; +} + +/** + * acpi_pm_finish - Instruct the platform to leave a sleep state. + * + * This is called after we wake back up (or if entering the sleep state + * failed). + */ +static void acpi_pm_finish(void) +{ + u32 acpi_state = acpi_target_sleep_state; + + if (acpi_state == ACPI_STATE_S0) + return; + + printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n", + acpi_state); + acpi_disable_wakeup_device(acpi_state); + acpi_leave_sleep_state(acpi_state); + + /* reset firmware waking vector */ + acpi_set_firmware_waking_vector((acpi_physical_address) 0); + + acpi_target_sleep_state = ACPI_STATE_S0; +} + +/** + * acpi_pm_end - Finish up suspend sequence. + */ +static void acpi_pm_end(void) +{ + /* + * This is necessary in case acpi_pm_finish() is not called during a + * failing transition to a sleep state. + */ + acpi_target_sleep_state = ACPI_STATE_S0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_SUSPEND extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { @@ -61,13 +144,10 @@ static u32 acpi_suspend_states[] = { [PM_SUSPEND_MAX] = ACPI_STATE_S5 }; -static int init_8259A_after_S1; - /** * acpi_suspend_begin - Set the target system sleep state to the state * associated with given @pm_state, if supported. */ - static int acpi_suspend_begin(suspend_state_t pm_state) { u32 acpi_state = acpi_suspend_states[pm_state]; @@ -83,25 +163,6 @@ static int acpi_suspend_begin(suspend_state_t pm_state) return error; } -/** - * acpi_suspend_prepare - Do preliminary suspend work. - * - * If necessary, set the firmware waking vector and do arch-specific - * nastiness to get the wakeup code to the waking vector. - */ - -static int acpi_suspend_prepare(void) -{ - int error = acpi_sleep_prepare(acpi_target_sleep_state); - - if (error) { - acpi_target_sleep_state = ACPI_STATE_S0; - return error; - } - - return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT; -} - /** * acpi_suspend_enter - Actually enter a sleep state. * @pm_state: ignored @@ -110,7 +171,6 @@ static int acpi_suspend_prepare(void) * assembly, which in turn call acpi_enter_sleep_state(). * It's unfortunate, but it works. Please fix if you're feeling frisky. */ - static int acpi_suspend_enter(suspend_state_t pm_state) { acpi_status status = AE_OK; @@ -167,46 +227,6 @@ static int acpi_suspend_enter(suspend_state_t pm_state) return ACPI_SUCCESS(status) ? 0 : -EFAULT; } -/** - * acpi_suspend_finish - Instruct the platform to leave a sleep state. - * - * This is called after we wake back up (or if entering the sleep state - * failed). - */ - -static void acpi_suspend_finish(void) -{ - u32 acpi_state = acpi_target_sleep_state; - - acpi_disable_wakeup_device(acpi_state); - acpi_leave_sleep_state(acpi_state); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; - -#ifdef CONFIG_X86 - if (init_8259A_after_S1) { - printk("Broken toshiba laptop -> kicking interrupts\n"); - init_8259A(0); - } -#endif -} - -/** - * acpi_suspend_end - Finish up suspend sequence. - */ - -static void acpi_suspend_end(void) -{ - /* - * This is necessary in case acpi_suspend_finish() is not called during a - * failing transition to a sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; -} - static int acpi_suspend_state_valid(suspend_state_t pm_state) { u32 acpi_state; @@ -226,30 +246,39 @@ static int acpi_suspend_state_valid(suspend_state_t pm_state) static struct platform_suspend_ops acpi_suspend_ops = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin, - .prepare = acpi_suspend_prepare, + .prepare = acpi_pm_prepare, .enter = acpi_suspend_enter, - .finish = acpi_suspend_finish, - .end = acpi_suspend_end, + .finish = acpi_pm_finish, + .end = acpi_pm_end, }; -/* - * Toshiba fails to preserve interrupts over S1, reinitialization - * of 8259 is needed after S1 resume. +/** + * acpi_suspend_begin_old - Set the target system sleep state to the + * state associated with given @pm_state, if supported, and + * execute the _PTS control method. This function is used if the + * pre-ACPI 2.0 suspend ordering has been requested. */ -static int __init init_ints_after_s1(const struct dmi_system_id *d) +static int acpi_suspend_begin_old(suspend_state_t pm_state) { - printk(KERN_WARNING "%s with broken S1 detected.\n", d->ident); - init_8259A_after_S1 = 1; - return 0; + int error = acpi_suspend_begin(pm_state); + + if (!error) + error = __acpi_pm_prepare(); + return error; } -static struct dmi_system_id __initdata acpisleep_dmi_table[] = { - { - .callback = init_ints_after_s1, - .ident = "Toshiba Satellite 4030cdt", - .matches = {DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),}, - }, - {}, +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_suspend_ops acpi_suspend_ops_old = { + .valid = acpi_suspend_state_valid, + .begin = acpi_suspend_begin_old, + .prepare = acpi_pm_disable_gpes, + .enter = acpi_suspend_enter, + .finish = acpi_pm_finish, + .end = acpi_pm_end, + .recover = acpi_pm_finish, }; #endif /* CONFIG_SUSPEND */ @@ -257,22 +286,9 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { static int acpi_hibernation_begin(void) { acpi_target_sleep_state = ACPI_STATE_S4; - return 0; } -static int acpi_hibernation_prepare(void) -{ - int error = acpi_sleep_prepare(ACPI_STATE_S4); - - if (error) { - acpi_target_sleep_state = ACPI_STATE_S0; - return error; - } - - return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT; -} - static int acpi_hibernation_enter(void) { acpi_status status = AE_OK; @@ -302,52 +318,55 @@ static void acpi_hibernation_leave(void) acpi_leave_sleep_state_prep(ACPI_STATE_S4); } -static void acpi_hibernation_finish(void) +static void acpi_pm_enable_gpes(void) { - acpi_disable_wakeup_device(ACPI_STATE_S4); - acpi_leave_sleep_state(ACPI_STATE_S4); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; + acpi_hw_enable_all_runtime_gpes(); } -static void acpi_hibernation_end(void) -{ - /* - * This is necessary in case acpi_hibernation_finish() is not called - * during a failing transition to the sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; -} +static struct platform_hibernation_ops acpi_hibernation_ops = { + .begin = acpi_hibernation_begin, + .end = acpi_pm_end, + .pre_snapshot = acpi_pm_prepare, + .finish = acpi_pm_finish, + .prepare = acpi_pm_prepare, + .enter = acpi_hibernation_enter, + .leave = acpi_hibernation_leave, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, +}; -static int acpi_hibernation_pre_restore(void) +/** + * acpi_hibernation_begin_old - Set the target system sleep state to + * ACPI_STATE_S4 and execute the _PTS control method. This + * function is used if the pre-ACPI 2.0 suspend ordering has been + * requested. + */ +static int acpi_hibernation_begin_old(void) { - acpi_status status; - - status = acpi_hw_disable_all_gpes(); - - return ACPI_SUCCESS(status) ? 0 : -EFAULT; -} + int error = acpi_sleep_prepare(ACPI_STATE_S4); -static void acpi_hibernation_restore_cleanup(void) -{ - acpi_hw_enable_all_runtime_gpes(); + if (!error) + acpi_target_sleep_state = ACPI_STATE_S4; + return error; } -static struct platform_hibernation_ops acpi_hibernation_ops = { - .begin = acpi_hibernation_begin, - .end = acpi_hibernation_end, - .pre_snapshot = acpi_hibernation_prepare, - .finish = acpi_hibernation_finish, - .prepare = acpi_hibernation_prepare, +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_hibernation_ops acpi_hibernation_ops_old = { + .begin = acpi_hibernation_begin_old, + .end = acpi_pm_end, + .pre_snapshot = acpi_pm_disable_gpes, + .finish = acpi_pm_finish, + .prepare = acpi_pm_disable_gpes, .enter = acpi_hibernation_enter, .leave = acpi_hibernation_leave, - .pre_restore = acpi_hibernation_pre_restore, - .restore_cleanup = acpi_hibernation_restore_cleanup, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, + .recover = acpi_pm_finish, }; -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATION */ int acpi_suspend(u32 acpi_state) { @@ -368,8 +387,8 @@ int acpi_suspend(u32 acpi_state) /** * acpi_pm_device_sleep_state - return preferred power state of ACPI device * in the system sleep state given by %acpi_target_sleep_state - * @dev: device to examine - * @wake: if set, the device should be able to wake up the system + * @dev: device to examine; its driver model wakeup flags control + * whether it should be able to wake up the system * @d_min_p: used to store the upper limit of allowed states range * Return value: preferred power state of the device on success, -ENODEV on * failure (ie. if there's no 'struct acpi_device' for @dev) @@ -387,7 +406,7 @@ int acpi_suspend(u32 acpi_state) * via @wake. */ -int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) +int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) { acpi_handle handle = DEVICE_ACPI_HANDLE(dev); struct acpi_device *adev; @@ -426,7 +445,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) * can wake the system. _S0W may be valid, too. */ if (acpi_target_sleep_state == ACPI_STATE_S0 || - (wake && adev->wakeup.state.enabled && + (device_may_wakeup(dev) && adev->wakeup.state.enabled && adev->wakeup.sleep_state <= acpi_target_sleep_state)) { acpi_status status; @@ -448,6 +467,31 @@ int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) *d_min_p = d_min; return d_max; } + +/** + * acpi_pm_device_sleep_wake - enable or disable the system wake-up + * capability of given device + * @dev: device to handle + * @enable: 'true' - enable, 'false' - disable the wake-up capability + */ +int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +{ + acpi_handle handle; + struct acpi_device *adev; + + if (!device_may_wakeup(dev)) + return -EINVAL; + + handle = DEVICE_ACPI_HANDLE(dev); + if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { + printk(KERN_DEBUG "ACPI handle has no context!\n"); + return -ENODEV; + } + + return enable ? + acpi_enable_wakeup_device_power(adev, acpi_target_sleep_state) : + acpi_disable_wakeup_device_power(adev); +} #endif static void acpi_power_off_prepare(void) @@ -472,8 +516,6 @@ int __init acpi_sleep_init(void) u8 type_a, type_b; #ifdef CONFIG_SUSPEND int i = 0; - - dmi_check_system(acpisleep_dmi_table); #endif if (acpi_disabled) @@ -491,13 +533,15 @@ int __init acpi_sleep_init(void) } } - suspend_set_ops(&acpi_suspend_ops); + suspend_set_ops(old_suspend_ordering ? + &acpi_suspend_ops_old : &acpi_suspend_ops); #endif #ifdef CONFIG_HIBERNATION status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); if (ACPI_SUCCESS(status)) { - hibernation_set_ops(&acpi_hibernation_ops); + hibernation_set_ops(old_suspend_ordering ? + &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; printk(" S4"); } diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c index ed8e41becf0c..38655eb132dc 100644 --- a/drivers/acpi/sleep/wakeup.c +++ b/drivers/acpi/sleep/wakeup.c @@ -42,7 +42,7 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state) continue; spin_unlock(&acpi_device_lock); - acpi_enable_wakeup_device_power(dev); + acpi_enable_wakeup_device_power(dev, sleep_state); spin_lock(&acpi_device_lock); } spin_unlock(&acpi_device_lock); @@ -66,13 +66,15 @@ void acpi_enable_wakeup_device(u8 sleep_state) list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, wakeup_list); + if (!dev->wakeup.flags.valid) continue; + /* If users want to disable run-wake GPE, * we only disable it for wake and leave it for runtime */ - if (!dev->wakeup.state.enabled || - sleep_state > (u32) dev->wakeup.sleep_state) { + if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared) + || sleep_state > (u32) dev->wakeup.sleep_state) { if (dev->wakeup.flags.run_wake) { spin_unlock(&acpi_device_lock); /* set_gpe_type will disable GPE, leave it like that */ @@ -110,8 +112,9 @@ void acpi_disable_wakeup_device(u8 sleep_state) if (!dev->wakeup.flags.valid) continue; - if (!dev->wakeup.state.enabled || - sleep_state > (u32) dev->wakeup.sleep_state) { + + if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared) + || sleep_state > (u32) dev->wakeup.sleep_state) { if (dev->wakeup.flags.run_wake) { spin_unlock(&acpi_device_lock); acpi_set_gpe_type(dev->wakeup.gpe_device, diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 5bd2dec9a7ac..d8e3f153b295 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -167,7 +167,13 @@ static int acpi_system_sysfs_init(void) #define COUNT_ERROR 2 /* other */ #define NUM_COUNTERS_EXTRA 3 -static u32 *all_counters; +#define ACPI_EVENT_VALID 0x01 +struct event_counter { + u32 count; + u32 flags; +}; + +static struct event_counter *all_counters; static u32 num_gpes; static u32 num_counters; static struct attribute **all_attrs; @@ -202,9 +208,44 @@ static int count_num_gpes(void) return count; } +static int get_gpe_device(int index, acpi_handle *handle) +{ + struct acpi_gpe_xrupt_info *gpe_xrupt_info; + struct acpi_gpe_block_info *gpe_block; + acpi_cpu_flags flags; + struct acpi_namespace_node *node; + + flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock); + + gpe_xrupt_info = acpi_gbl_gpe_xrupt_list_head; + while (gpe_xrupt_info) { + gpe_block = gpe_xrupt_info->gpe_block_list_head; + node = gpe_block->node; + while (gpe_block) { + index -= gpe_block->register_count * + ACPI_GPE_REGISTER_WIDTH; + if (index < 0) { + acpi_os_release_lock(acpi_gbl_gpe_lock, flags); + /* return NULL if it's FADT GPE */ + if (node->type != ACPI_TYPE_DEVICE) + *handle = NULL; + else + *handle = node; + return 0; + } + node = gpe_block->node; + gpe_block = gpe_block->next; + } + gpe_xrupt_info = gpe_xrupt_info->next; + } + acpi_os_release_lock(acpi_gbl_gpe_lock, flags); + + return -ENODEV; +} + static void delete_gpe_attr_array(void) { - u32 *tmp = all_counters; + struct event_counter *tmp = all_counters; all_counters = NULL; kfree(tmp); @@ -230,9 +271,10 @@ void acpi_os_gpe_count(u32 gpe_number) return; if (gpe_number < num_gpes) - all_counters[gpe_number]++; + all_counters[gpe_number].count++; else - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]. + count++; return; } @@ -243,44 +285,144 @@ void acpi_os_fixed_event_count(u32 event_number) return; if (event_number < ACPI_NUM_FIXED_EVENTS) - all_counters[num_gpes + event_number]++; + all_counters[num_gpes + event_number].count++; else - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]. + count++; return; } +static int get_status(u32 index, acpi_event_status *status, acpi_handle *handle) +{ + int result = 0; + + if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS) + goto end; + + if (index < num_gpes) { + result = get_gpe_device(index, handle); + if (result) { + ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND, + "Invalid GPE 0x%x\n", index)); + goto end; + } + result = acpi_get_gpe_status(*handle, index, + ACPI_NOT_ISR, status); + } else if (index < (num_gpes + ACPI_NUM_FIXED_EVENTS)) + result = acpi_get_event_status(index - num_gpes, status); + + /* + * sleep/power button GPE/Fixed Event is enabled after acpi_system_init, + * check the status at runtime and mark it as valid once it's enabled + */ + if (!result && (*status & ACPI_EVENT_FLAG_ENABLED)) + all_counters[index].flags |= ACPI_EVENT_VALID; +end: + return result; +} + static ssize_t counter_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI] = + int index = attr - counter_attrs; + int size; + acpi_handle handle; + acpi_event_status status; + int result = 0; + + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI].count = acpi_irq_handled; - all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE] = + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE].count = acpi_gpe_count; - return sprintf(buf, "%d\n", all_counters[attr - counter_attrs]); + size = sprintf(buf, "%8d", all_counters[index].count); + + /* "gpe_all" or "sci" */ + if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS) + goto end; + + result = get_status(index, &status, &handle); + if (result) + goto end; + + if (!(all_counters[index].flags & ACPI_EVENT_VALID)) + size += sprintf(buf + size, " invalid"); + else if (status & ACPI_EVENT_FLAG_ENABLED) + size += sprintf(buf + size, " enable"); + else + size += sprintf(buf + size, " disable"); + +end: + size += sprintf(buf + size, "\n"); + return result ? result : size; } /* * counter_set() sets the specified counter. * setting the total "sci" file to any value clears all counters. + * enable/disable/clear a gpe/fixed event in user space. */ static ssize_t counter_set(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t size) { int index = attr - counter_attrs; + acpi_event_status status; + acpi_handle handle; + int result = 0; if (index == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) { int i; for (i = 0; i < num_counters; ++i) - all_counters[i] = 0; + all_counters[i].count = 0; acpi_gpe_count = 0; acpi_irq_handled = 0; + goto end; + } + /* show the event status for both GPEs and Fixed Events */ + result = get_status(index, &status, &handle); + if (result) + goto end; + + if (!(all_counters[index].flags & ACPI_EVENT_VALID)) { + ACPI_DEBUG_PRINT((ACPI_DB_WARN, + "Can not change Invalid GPE/Fixed Event status\n")); + return -EINVAL; + } + + if (index < num_gpes) { + if (!strcmp(buf, "disable\n") && + (status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_disable_gpe(handle, index, ACPI_NOT_ISR); + else if (!strcmp(buf, "enable\n") && + !(status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_enable_gpe(handle, index, ACPI_NOT_ISR); + else if (!strcmp(buf, "clear\n") && + (status & ACPI_EVENT_FLAG_SET)) + result = acpi_clear_gpe(handle, index, ACPI_NOT_ISR); + else + all_counters[index].count = strtoul(buf, NULL, 0); + } else if (index < num_gpes + ACPI_NUM_FIXED_EVENTS) { + int event = index - num_gpes; + if (!strcmp(buf, "disable\n") && + (status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_disable_event(event, ACPI_NOT_ISR); + else if (!strcmp(buf, "enable\n") && + !(status & ACPI_EVENT_FLAG_ENABLED)) + result = acpi_enable_event(event, ACPI_NOT_ISR); + else if (!strcmp(buf, "clear\n") && + (status & ACPI_EVENT_FLAG_SET)) + result = acpi_clear_event(event); + else + all_counters[index].count = strtoul(buf, NULL, 0); } else - all_counters[index] = strtoul(buf, NULL, 0); + all_counters[index].count = strtoul(buf, NULL, 0); - return size; + if (ACPI_FAILURE(result)) + result = -EINVAL; +end: + return result ? result : size; } void acpi_irq_stats_init(void) @@ -298,7 +440,8 @@ void acpi_irq_stats_init(void) if (all_attrs == NULL) return; - all_counters = kzalloc(sizeof(u32) * (num_counters), GFP_KERNEL); + all_counters = kzalloc(sizeof(struct event_counter) * (num_counters), + GFP_KERNEL); if (all_counters == NULL) goto fail; diff --git a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/tables/tbfadt.c index 949d4114eb9f..ccb5b64bbef3 100644 --- a/drivers/acpi/tables/tbfadt.c +++ b/drivers/acpi/tables/tbfadt.c @@ -124,7 +124,7 @@ static struct acpi_fadt_info fadt_info_table[] = { static void inline acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, - u8 bit_width, u64 address) + u8 byte_width, u64 address) { /* @@ -136,7 +136,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, /* All other fields are byte-wide */ generic_address->space_id = ACPI_ADR_SPACE_SYSTEM_IO; - generic_address->bit_width = bit_width; + generic_address->bit_width = byte_width << 3; generic_address->bit_offset = 0; generic_address->access_width = 0; } @@ -155,7 +155,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address, * ******************************************************************************/ -void acpi_tb_parse_fadt(acpi_native_uint table_index, u8 flags) +void acpi_tb_parse_fadt(u32 table_index, u8 flags) { u32 length; struct acpi_table_header *table; @@ -280,7 +280,7 @@ static void acpi_tb_convert_fadt(void) { u8 pm1_register_length; struct acpi_generic_address *target; - acpi_native_uint i; + u32 i; /* Update the local FADT table header length */ @@ -343,9 +343,11 @@ static void acpi_tb_convert_fadt(void) * * The PM event blocks are split into two register blocks, first is the * PM Status Register block, followed immediately by the PM Enable Register - * block. Each is of length (pm1_event_length/2) + * block. Each is of length (xpm1x_event_block.bit_width/2) */ - pm1_register_length = (u8) ACPI_DIV_2(acpi_gbl_FADT.pm1_event_length); + WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1a_event_block.bit_width)); + pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT + .xpm1a_event_block.bit_width); /* The PM1A register block is required */ @@ -360,14 +362,17 @@ static void acpi_tb_convert_fadt(void) /* The PM1B register block is optional, ignore if not present */ if (acpi_gbl_FADT.xpm1b_event_block.address) { + WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1b_event_block.bit_width)); + pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT + .xpm1b_event_block + .bit_width); acpi_tb_init_generic_address(&acpi_gbl_xpm1b_enable, pm1_register_length, (acpi_gbl_FADT.xpm1b_event_block. address + pm1_register_length)); /* Don't forget to copy space_id of the GAS */ acpi_gbl_xpm1b_enable.space_id = - acpi_gbl_FADT.xpm1a_event_block.space_id; - + acpi_gbl_FADT.xpm1b_event_block.space_id; } } @@ -396,7 +401,7 @@ static void acpi_tb_validate_fadt(void) u32 *address32; struct acpi_generic_address *address64; u8 length; - acpi_native_uint i; + u32 i; /* Examine all of the 64-bit extended address fields (X fields) */ diff --git a/drivers/acpi/tables/tbfind.c b/drivers/acpi/tables/tbfind.c index 9ca3afc98c80..531584defbb8 100644 --- a/drivers/acpi/tables/tbfind.c +++ b/drivers/acpi/tables/tbfind.c @@ -65,10 +65,9 @@ ACPI_MODULE_NAME("tbfind") ******************************************************************************/ acpi_status acpi_tb_find_table(char *signature, - char *oem_id, - char *oem_table_id, acpi_native_uint * table_index) + char *oem_id, char *oem_table_id, u32 *table_index) { - acpi_native_uint i; + u32 i; acpi_status status; struct acpi_table_header header; diff --git a/drivers/acpi/tables/tbinstal.c b/drivers/acpi/tables/tbinstal.c index 5336ce88f89f..b22185f55a16 100644 --- a/drivers/acpi/tables/tbinstal.c +++ b/drivers/acpi/tables/tbinstal.c @@ -107,11 +107,10 @@ acpi_status acpi_tb_verify_table(struct acpi_table_desc *table_desc) ******************************************************************************/ acpi_status -acpi_tb_add_table(struct acpi_table_desc *table_desc, - acpi_native_uint * table_index) +acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index) { - acpi_native_uint i; - acpi_native_uint length; + u32 i; + u32 length; acpi_status status = AE_OK; ACPI_FUNCTION_TRACE(tb_add_table); @@ -207,8 +206,8 @@ acpi_status acpi_tb_resize_root_table_list(void) /* Increase the Table Array size */ - tables = ACPI_ALLOCATE_ZEROED((acpi_gbl_root_table_list.size + - ACPI_ROOT_TABLE_SIZE_INCREMENT) + tables = ACPI_ALLOCATE_ZEROED(((acpi_size) acpi_gbl_root_table_list. + size + ACPI_ROOT_TABLE_SIZE_INCREMENT) * sizeof(struct acpi_table_desc)); if (!tables) { ACPI_ERROR((AE_INFO, @@ -220,7 +219,7 @@ acpi_status acpi_tb_resize_root_table_list(void) if (acpi_gbl_root_table_list.tables) { ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables, - acpi_gbl_root_table_list.size * + (acpi_size) acpi_gbl_root_table_list.size * sizeof(struct acpi_table_desc)); if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) { @@ -253,7 +252,7 @@ acpi_status acpi_tb_resize_root_table_list(void) acpi_status acpi_tb_store_table(acpi_physical_address address, struct acpi_table_header *table, - u32 length, u8 flags, acpi_native_uint * table_index) + u32 length, u8 flags, u32 *table_index) { acpi_status status = AE_OK; @@ -334,7 +333,7 @@ void acpi_tb_delete_table(struct acpi_table_desc *table_desc) void acpi_tb_terminate(void) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(tb_terminate); @@ -374,7 +373,7 @@ void acpi_tb_terminate(void) * ******************************************************************************/ -void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index) +void acpi_tb_delete_namespace_by_owner(u32 table_index) { acpi_owner_id owner_id; @@ -403,7 +402,7 @@ void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index) * ******************************************************************************/ -acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index) +acpi_status acpi_tb_allocate_owner_id(u32 table_index) { acpi_status status = AE_BAD_PARAMETER; @@ -431,7 +430,7 @@ acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index) * ******************************************************************************/ -acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index) +acpi_status acpi_tb_release_owner_id(u32 table_index) { acpi_status status = AE_BAD_PARAMETER; @@ -462,8 +461,7 @@ acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index) * ******************************************************************************/ -acpi_status -acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id) +acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id) { acpi_status status = AE_BAD_PARAMETER; @@ -490,7 +488,7 @@ acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id) * ******************************************************************************/ -u8 acpi_tb_is_table_loaded(acpi_native_uint table_index) +u8 acpi_tb_is_table_loaded(u32 table_index) { u8 is_loaded = FALSE; @@ -518,7 +516,7 @@ u8 acpi_tb_is_table_loaded(acpi_native_uint table_index) * ******************************************************************************/ -void acpi_tb_set_table_loaded_flag(acpi_native_uint table_index, u8 is_loaded) +void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded) { (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/tables/tbutils.c index bc019b9b6a68..0cc92ef5236f 100644 --- a/drivers/acpi/tables/tbutils.c +++ b/drivers/acpi/tables/tbutils.c @@ -49,8 +49,8 @@ ACPI_MODULE_NAME("tbutils") /* Local prototypes */ static acpi_physical_address -acpi_tb_get_root_table_entry(u8 * table_entry, - acpi_native_uint table_entry_size); +acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); + /******************************************************************************* * * FUNCTION: acpi_tb_check_xsdt @@ -238,7 +238,7 @@ acpi_status acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length) * ******************************************************************************/ -u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length) +u8 acpi_tb_checksum(u8 *buffer, u32 length) { u8 sum = 0; u8 *end = buffer + length; @@ -268,7 +268,7 @@ u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length) void acpi_tb_install_table(acpi_physical_address address, - u8 flags, char *signature, acpi_native_uint table_index) + u8 flags, char *signature, u32 table_index) { struct acpi_table_header *table; @@ -336,8 +336,7 @@ acpi_tb_install_table(acpi_physical_address address, ******************************************************************************/ static acpi_physical_address -acpi_tb_get_root_table_entry(u8 * table_entry, - acpi_native_uint table_entry_size) +acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size) { u64 address64; @@ -395,8 +394,8 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags) { struct acpi_table_rsdp *rsdp; - acpi_native_uint table_entry_size; - acpi_native_uint i; + u32 table_entry_size; + u32 i; u32 table_count; struct acpi_table_header *table; acpi_physical_address address; diff --git a/drivers/acpi/tables/tbxface.c b/drivers/acpi/tables/tbxface.c index 0e319604d3e7..fd7770aa1061 100644 --- a/drivers/acpi/tables/tbxface.c +++ b/drivers/acpi/tables/tbxface.c @@ -125,7 +125,7 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array, /* Root Table Array has been statically allocated by the host */ ACPI_MEMSET(initial_table_array, 0, - initial_table_count * + (acpi_size) initial_table_count * sizeof(struct acpi_table_desc)); acpi_gbl_root_table_list.tables = initial_table_array; @@ -183,9 +183,9 @@ acpi_status acpi_reallocate_root_table(void) return_ACPI_STATUS(AE_SUPPORT); } - new_size = - (acpi_gbl_root_table_list.count + - ACPI_ROOT_TABLE_SIZE_INCREMENT) * sizeof(struct acpi_table_desc); + new_size = ((acpi_size) acpi_gbl_root_table_list.count + + ACPI_ROOT_TABLE_SIZE_INCREMENT) * + sizeof(struct acpi_table_desc); /* Create new array and copy the old array */ @@ -222,7 +222,7 @@ acpi_status acpi_reallocate_root_table(void) acpi_status acpi_load_table(struct acpi_table_header *table_ptr) { acpi_status status; - acpi_native_uint table_index; + u32 table_index; struct acpi_table_desc table_desc; if (!table_ptr) @@ -264,11 +264,10 @@ ACPI_EXPORT_SYMBOL(acpi_load_table) *****************************************************************************/ acpi_status acpi_get_table_header(char *signature, - acpi_native_uint instance, - struct acpi_table_header * out_table_header) + u32 instance, struct acpi_table_header *out_table_header) { - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; struct acpi_table_header *header; /* Parameter validation */ @@ -378,10 +377,10 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id) *****************************************************************************/ acpi_status acpi_get_table(char *signature, - acpi_native_uint instance, struct acpi_table_header **out_table) + u32 instance, struct acpi_table_header **out_table) { - acpi_native_uint i; - acpi_native_uint j; + u32 i; + u32 j; acpi_status status; /* Parameter validation */ @@ -435,8 +434,7 @@ ACPI_EXPORT_SYMBOL(acpi_get_table) * ******************************************************************************/ acpi_status -acpi_get_table_by_index(acpi_native_uint table_index, - struct acpi_table_header ** table) +acpi_get_table_by_index(u32 table_index, struct acpi_table_header **table) { acpi_status status; @@ -493,7 +491,7 @@ static acpi_status acpi_tb_load_namespace(void) { acpi_status status; struct acpi_table_header *table; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(tb_load_namespace); diff --git a/drivers/acpi/tables/tbxfroot.c b/drivers/acpi/tables/tbxfroot.c index b8c0dfa084f6..2d157e0f98d2 100644 --- a/drivers/acpi/tables/tbxfroot.c +++ b/drivers/acpi/tables/tbxfroot.c @@ -118,7 +118,7 @@ static acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp) * ******************************************************************************/ -acpi_status acpi_find_root_pointer(acpi_native_uint * table_address) +acpi_status acpi_find_root_pointer(acpi_size *table_address) { u8 *table_ptr; u8 *mem_rover; @@ -153,7 +153,7 @@ acpi_status acpi_find_root_pointer(acpi_native_uint * table_address) * 1b) Search EBDA paragraphs (EBDA is required to be a * minimum of 1_k length) */ - table_ptr = acpi_os_map_memory((acpi_native_uint) + table_ptr = acpi_os_map_memory((acpi_physical_address) physical_address, ACPI_EBDA_WINDOW_SIZE); if (!table_ptr) { diff --git a/drivers/acpi/utilities/utalloc.c b/drivers/acpi/utilities/utalloc.c index ede084829a70..3dfb8a442b26 100644 --- a/drivers/acpi/utilities/utalloc.c +++ b/drivers/acpi/utilities/utalloc.c @@ -309,7 +309,8 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer, * ******************************************************************************/ -void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line) +void *acpi_ut_allocate(acpi_size size, + u32 component, const char *module, u32 line) { void *allocation; @@ -353,7 +354,7 @@ void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line) ******************************************************************************/ void *acpi_ut_allocate_zeroed(acpi_size size, - u32 component, char *module, u32 line) + u32 component, const char *module, u32 line) { void *allocation; diff --git a/drivers/acpi/utilities/utcopy.c b/drivers/acpi/utilities/utcopy.c index 655c290aca7b..53499ac90988 100644 --- a/drivers/acpi/utilities/utcopy.c +++ b/drivers/acpi/utilities/utcopy.c @@ -572,7 +572,7 @@ acpi_ut_copy_epackage_to_ipackage(union acpi_object *external_object, acpi_status status = AE_OK; union acpi_operand_object *package_object; union acpi_operand_object **package_elements; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ut_copy_epackage_to_ipackage); @@ -599,7 +599,7 @@ acpi_ut_copy_epackage_to_ipackage(union acpi_object *external_object, /* Truncate package and delete it */ - package_object->package.count = (u32) i; + package_object->package.count = i; package_elements[i] = NULL; acpi_ut_remove_reference(package_object); return_ACPI_STATUS(status); diff --git a/drivers/acpi/utilities/utdebug.c b/drivers/acpi/utilities/utdebug.c index f938f465efa4..fd66ecb6741e 100644 --- a/drivers/acpi/utilities/utdebug.c +++ b/drivers/acpi/utilities/utdebug.c @@ -157,7 +157,8 @@ void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *format, ...) + const char *module_name, + u32 component_id, const char *format, ...) { acpi_thread_id thread_id; va_list args; @@ -228,7 +229,8 @@ void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print_raw(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *format, ...) + const char *module_name, + u32 component_id, const char *format, ...) { va_list args; @@ -261,7 +263,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_debug_print_raw) ******************************************************************************/ void acpi_ut_trace(u32 line_number, - const char *function_name, char *module_name, u32 component_id) + const char *function_name, + const char *module_name, u32 component_id) { acpi_gbl_nesting_level++; @@ -293,7 +296,7 @@ ACPI_EXPORT_SYMBOL(acpi_ut_trace) void acpi_ut_trace_ptr(u32 line_number, const char *function_name, - char *module_name, u32 component_id, void *pointer) + const char *module_name, u32 component_id, void *pointer) { acpi_gbl_nesting_level++; acpi_ut_track_stack_ptr(); @@ -324,7 +327,7 @@ acpi_ut_trace_ptr(u32 line_number, void acpi_ut_trace_str(u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *string) + const char *module_name, u32 component_id, char *string) { acpi_gbl_nesting_level++; @@ -356,7 +359,7 @@ acpi_ut_trace_str(u32 line_number, void acpi_ut_trace_u32(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u32 integer) + const char *module_name, u32 component_id, u32 integer) { acpi_gbl_nesting_level++; @@ -386,7 +389,8 @@ acpi_ut_trace_u32(u32 line_number, void acpi_ut_exit(u32 line_number, - const char *function_name, char *module_name, u32 component_id) + const char *function_name, + const char *module_name, u32 component_id) { acpi_ut_debug_print(ACPI_LV_FUNCTIONS, @@ -417,7 +421,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_exit) void acpi_ut_status_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_status status) + const char *module_name, + u32 component_id, acpi_status status) { if (ACPI_SUCCESS(status)) { @@ -458,7 +463,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_status_exit) void acpi_ut_value_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_integer value) + const char *module_name, + u32 component_id, acpi_integer value) { acpi_ut_debug_print(ACPI_LV_FUNCTIONS, @@ -490,7 +496,7 @@ ACPI_EXPORT_SYMBOL(acpi_ut_value_exit) void acpi_ut_ptr_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u8 * ptr) + const char *module_name, u32 component_id, u8 *ptr) { acpi_ut_debug_print(ACPI_LV_FUNCTIONS, @@ -519,8 +525,8 @@ acpi_ut_ptr_exit(u32 line_number, void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) { - acpi_native_uint i = 0; - acpi_native_uint j; + u32 i = 0; + u32 j; u32 temp32; u8 buf_char; @@ -539,7 +545,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) /* Print current offset */ - acpi_os_printf("%6.4X: ", (u32) i); + acpi_os_printf("%6.4X: ", i); /* Print 16 hex chars */ @@ -549,7 +555,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) /* Dump fill spaces */ acpi_os_printf("%*s", ((display * 2) + 1), " "); - j += (acpi_native_uint) display; + j += display; continue; } @@ -557,32 +563,38 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) case DB_BYTE_DISPLAY: default: /* Default is BYTE display */ - acpi_os_printf("%02X ", buffer[i + j]); + acpi_os_printf("%02X ", + buffer[(acpi_size) i + j]); break; case DB_WORD_DISPLAY: - ACPI_MOVE_16_TO_32(&temp32, &buffer[i + j]); + ACPI_MOVE_16_TO_32(&temp32, + &buffer[(acpi_size) i + j]); acpi_os_printf("%04X ", temp32); break; case DB_DWORD_DISPLAY: - ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j]); + ACPI_MOVE_32_TO_32(&temp32, + &buffer[(acpi_size) i + j]); acpi_os_printf("%08X ", temp32); break; case DB_QWORD_DISPLAY: - ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j]); + ACPI_MOVE_32_TO_32(&temp32, + &buffer[(acpi_size) i + j]); acpi_os_printf("%08X", temp32); - ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j + 4]); + ACPI_MOVE_32_TO_32(&temp32, + &buffer[(acpi_size) i + j + + 4]); acpi_os_printf("%08X ", temp32); break; } - j += (acpi_native_uint) display; + j += display; } /* @@ -596,7 +608,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display) return; } - buf_char = buffer[i + j]; + buf_char = buffer[(acpi_size) i + j]; if (ACPI_IS_PRINT(buf_char)) { acpi_os_printf("%c", buf_char); } else { diff --git a/drivers/acpi/utilities/utdelete.c b/drivers/acpi/utilities/utdelete.c index 1fbc35139e84..c5c791a575c9 100644 --- a/drivers/acpi/utilities/utdelete.c +++ b/drivers/acpi/utilities/utdelete.c @@ -442,7 +442,7 @@ acpi_ut_update_object_reference(union acpi_operand_object *object, u16 action) union acpi_generic_state *state_list = NULL; union acpi_operand_object *next_object = NULL; union acpi_generic_state *state; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE_PTR(ut_update_object_reference, object); diff --git a/drivers/acpi/utilities/uteval.c b/drivers/acpi/utilities/uteval.c index 05e61be267d5..352747e49c7a 100644 --- a/drivers/acpi/utilities/uteval.c +++ b/drivers/acpi/utilities/uteval.c @@ -97,7 +97,7 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state) acpi_status status; union acpi_operand_object *string_desc; union acpi_operand_object *return_desc; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ut_osi_implementation); @@ -217,7 +217,6 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node, info->prefix_node = prefix_node; info->pathname = path; - info->parameter_type = ACPI_PARAM_ARGS; /* Evaluate the object/method */ @@ -514,7 +513,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node * device_node, u32 count; u32 size; struct acpi_compatible_id_list *cid_list; - acpi_native_uint i; + u32 i; ACPI_FUNCTION_TRACE(ut_execute_CID); diff --git a/drivers/acpi/utilities/utmisc.c b/drivers/acpi/utilities/utmisc.c index 1f057b71db1a..f34be6773556 100644 --- a/drivers/acpi/utilities/utmisc.c +++ b/drivers/acpi/utilities/utmisc.c @@ -64,7 +64,7 @@ ACPI_MODULE_NAME("utmisc") ******************************************************************************/ const char *acpi_ut_validate_exception(acpi_status status) { - acpi_status sub_status; + u32 sub_status; const char *exception = NULL; ACPI_FUNCTION_ENTRY(); @@ -85,32 +85,28 @@ const char *acpi_ut_validate_exception(acpi_status status) case AE_CODE_PROGRAMMER: if (sub_status <= AE_CODE_PGM_MAX) { - exception = - acpi_gbl_exception_names_pgm[sub_status - 1]; + exception = acpi_gbl_exception_names_pgm[sub_status]; } break; case AE_CODE_ACPI_TABLES: if (sub_status <= AE_CODE_TBL_MAX) { - exception = - acpi_gbl_exception_names_tbl[sub_status - 1]; + exception = acpi_gbl_exception_names_tbl[sub_status]; } break; case AE_CODE_AML: if (sub_status <= AE_CODE_AML_MAX) { - exception = - acpi_gbl_exception_names_aml[sub_status - 1]; + exception = acpi_gbl_exception_names_aml[sub_status]; } break; case AE_CODE_CONTROL: if (sub_status <= AE_CODE_CTRL_MAX) { - exception = - acpi_gbl_exception_names_ctrl[sub_status - 1]; + exception = acpi_gbl_exception_names_ctrl[sub_status]; } break; @@ -165,9 +161,9 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table) acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id) { - acpi_native_uint i; - acpi_native_uint j; - acpi_native_uint k; + u32 i; + u32 j; + u32 k; acpi_status status; ACPI_FUNCTION_TRACE(ut_allocate_owner_id); @@ -273,7 +269,7 @@ void acpi_ut_release_owner_id(acpi_owner_id * owner_id_ptr) { acpi_owner_id owner_id = *owner_id_ptr; acpi_status status; - acpi_native_uint index; + u32 index; u32 bit; ACPI_FUNCTION_TRACE_U32(ut_release_owner_id, owner_id); @@ -593,7 +589,7 @@ acpi_ut_display_init_pathname(u8 type, * ******************************************************************************/ -u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position) +u8 acpi_ut_valid_acpi_char(char character, u32 position) { if (!((character >= 'A' && character <= 'Z') || @@ -628,7 +624,7 @@ u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position) u8 acpi_ut_valid_acpi_name(u32 name) { - acpi_native_uint i; + u32 i; ACPI_FUNCTION_ENTRY(); @@ -657,7 +653,7 @@ u8 acpi_ut_valid_acpi_name(u32 name) acpi_name acpi_ut_repair_name(char *name) { - acpi_native_uint i; + u32 i; char new_name[ACPI_NAME_SIZE]; for (i = 0; i < ACPI_NAME_SIZE; i++) { @@ -1024,7 +1020,7 @@ acpi_ut_walk_package_tree(union acpi_operand_object * source_object, ******************************************************************************/ void ACPI_INTERNAL_VAR_XFACE -acpi_ut_error(char *module_name, u32 line_number, char *format, ...) +acpi_ut_error(const char *module_name, u32 line_number, const char *format, ...) { va_list args; @@ -1037,8 +1033,8 @@ acpi_ut_error(char *module_name, u32 line_number, char *format, ...) } void ACPI_INTERNAL_VAR_XFACE -acpi_ut_exception(char *module_name, - u32 line_number, acpi_status status, char *format, ...) +acpi_ut_exception(const char *module_name, + u32 line_number, acpi_status status, const char *format, ...) { va_list args; @@ -1054,7 +1050,8 @@ acpi_ut_exception(char *module_name, EXPORT_SYMBOL(acpi_ut_exception); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_warning(char *module_name, u32 line_number, char *format, ...) +acpi_ut_warning(const char *module_name, + u32 line_number, const char *format, ...) { va_list args; @@ -1067,7 +1064,7 @@ acpi_ut_warning(char *module_name, u32 line_number, char *format, ...) } void ACPI_INTERNAL_VAR_XFACE -acpi_ut_info(char *module_name, u32 line_number, char *format, ...) +acpi_ut_info(const char *module_name, u32 line_number, const char *format, ...) { va_list args; diff --git a/drivers/acpi/utilities/utmutex.c b/drivers/acpi/utilities/utmutex.c index f7d602b1a894..7331dde9e1b3 100644 --- a/drivers/acpi/utilities/utmutex.c +++ b/drivers/acpi/utilities/utmutex.c @@ -218,7 +218,7 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id) * the mutex ordering rule. This indicates a coding error somewhere in * the ACPI subsystem code. */ - for (i = mutex_id; i < ACPI_MAX_MUTEX; i++) { + for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) { if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) { if (i == mutex_id) { ACPI_ERROR((AE_INFO, @@ -315,7 +315,7 @@ acpi_status acpi_ut_release_mutex(acpi_mutex_handle mutex_id) * ordering rule. This indicates a coding error somewhere in * the ACPI subsystem code. */ - for (i = mutex_id; i < ACPI_MAX_MUTEX; i++) { + for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) { if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) { if (i == mutex_id) { continue; diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/utilities/utobject.c index e68466de8044..e25484495e65 100644 --- a/drivers/acpi/utilities/utobject.c +++ b/drivers/acpi/utilities/utobject.c @@ -83,7 +83,8 @@ acpi_ut_get_element_length(u8 object_type, * ******************************************************************************/ -union acpi_operand_object *acpi_ut_create_internal_object_dbg(char *module_name, +union acpi_operand_object *acpi_ut_create_internal_object_dbg(const char + *module_name, u32 line_number, u32 component_id, acpi_object_type @@ -175,8 +176,8 @@ union acpi_operand_object *acpi_ut_create_package_object(u32 count) * Create the element array. Count+1 allows the array to be null * terminated. */ - package_elements = ACPI_ALLOCATE_ZEROED((acpi_size) - (count + 1) * sizeof(void *)); + package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size) count + + 1) * sizeof(void *)); if (!package_elements) { acpi_ut_remove_reference(package_desc); return_PTR(NULL); @@ -347,7 +348,7 @@ u8 acpi_ut_valid_internal_object(void *object) * ******************************************************************************/ -void *acpi_ut_allocate_object_desc_dbg(char *module_name, +void *acpi_ut_allocate_object_desc_dbg(const char *module_name, u32 line_number, u32 component_id) { union acpi_operand_object *object; diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index d089c4519d45..64c889331f3b 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -631,6 +631,76 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag) * device : video output device (LCD, CRT, ..) * * Return Value: + * Maximum brightness level + * + * Allocate and initialize device->brightness. + */ + +static int +acpi_video_init_brightness(struct acpi_video_device *device) +{ + union acpi_object *obj = NULL; + int i, max_level = 0, count = 0; + union acpi_object *o; + struct acpi_video_device_brightness *br = NULL; + + if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available " + "LCD brightness level\n")); + goto out; + } + + if (obj->package.count < 2) + goto out; + + br = kzalloc(sizeof(*br), GFP_KERNEL); + if (!br) { + printk(KERN_ERR "can't allocate memory\n"); + goto out; + } + + br->levels = kmalloc(obj->package.count * sizeof *(br->levels), + GFP_KERNEL); + if (!br->levels) + goto out_free; + + for (i = 0; i < obj->package.count; i++) { + o = (union acpi_object *)&obj->package.elements[i]; + if (o->type != ACPI_TYPE_INTEGER) { + printk(KERN_ERR PREFIX "Invalid data\n"); + continue; + } + br->levels[count] = (u32) o->integer.value; + + if (br->levels[count] > max_level) + max_level = br->levels[count]; + count++; + } + + if (count < 2) + goto out_free_levels; + + br->count = count; + device->brightness = br; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "found %d brightness levels\n", count)); + kfree(obj); + return max_level; + +out_free_levels: + kfree(br->levels); +out_free: + kfree(br); +out: + device->brightness = NULL; + kfree(obj); + return 0; +} + +/* + * Arg: + * device : video output device (LCD, CRT, ..) + * + * Return Value: * None * * Find out all required AML methods defined under the output @@ -640,10 +710,7 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag) static void acpi_video_device_find_cap(struct acpi_video_device *device) { acpi_handle h_dummy1; - int i; u32 max_level = 0; - union acpi_object *obj = NULL; - struct acpi_video_device_brightness *br = NULL; memset(&device->cap, 0, sizeof(device->cap)); @@ -672,53 +739,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) device->cap._DSS = 1; } - if (ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) { - - if (obj->package.count >= 2) { - int count = 0; - union acpi_object *o; - - br = kzalloc(sizeof(*br), GFP_KERNEL); - if (!br) { - printk(KERN_ERR "can't allocate memory\n"); - } else { - br->levels = kmalloc(obj->package.count * - sizeof *(br->levels), GFP_KERNEL); - if (!br->levels) - goto out; - - for (i = 0; i < obj->package.count; i++) { - o = (union acpi_object *)&obj->package. - elements[i]; - if (o->type != ACPI_TYPE_INTEGER) { - printk(KERN_ERR PREFIX "Invalid data\n"); - continue; - } - br->levels[count] = (u32) o->integer.value; - - if (br->levels[count] > max_level) - max_level = br->levels[count]; - count++; - } - out: - if (count < 2) { - kfree(br->levels); - kfree(br); - } else { - br->count = count; - device->brightness = br; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "found %d brightness levels\n", - count)); - } - } - } - - } else { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available LCD brightness level\n")); - } - - kfree(obj); + max_level = acpi_video_init_brightness(device); if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){ int result; @@ -1695,6 +1716,8 @@ static void acpi_video_switch_brightness(struct acpi_video_device *device, int event) { unsigned long level_current, level_next; + if (!device->brightness) + return; acpi_video_device_lcd_get_level_current(device, &level_current); level_next = acpi_video_get_next_level(device, level_current, event); acpi_video_device_lcd_set_level(device, level_next); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 911ec600fe71..3f940393d6c7 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -453,6 +453,8 @@ int platform_driver_register(struct platform_driver *drv) drv->driver.suspend = platform_drv_suspend; if (drv->resume) drv->driver.resume = platform_drv_resume; + if (drv->pm) + drv->driver.pm = &drv->pm->base; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(platform_driver_register); @@ -560,7 +562,9 @@ static int platform_match(struct device *dev, struct device_driver *drv) return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0); } -static int platform_suspend(struct device *dev, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP + +static int platform_legacy_suspend(struct device *dev, pm_message_t mesg) { int ret = 0; @@ -570,7 +574,7 @@ static int platform_suspend(struct device *dev, pm_message_t mesg) return ret; } -static int platform_suspend_late(struct device *dev, pm_message_t mesg) +static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg) { struct platform_driver *drv = to_platform_driver(dev->driver); struct platform_device *pdev; @@ -583,7 +587,7 @@ static int platform_suspend_late(struct device *dev, pm_message_t mesg) return ret; } -static int platform_resume_early(struct device *dev) +static int platform_legacy_resume_early(struct device *dev) { struct platform_driver *drv = to_platform_driver(dev->driver); struct platform_device *pdev; @@ -596,7 +600,7 @@ static int platform_resume_early(struct device *dev) return ret; } -static int platform_resume(struct device *dev) +static int platform_legacy_resume(struct device *dev) { int ret = 0; @@ -606,15 +610,291 @@ static int platform_resume(struct device *dev) return ret; } +static int platform_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm && drv->pm->prepare) + ret = drv->pm->prepare(dev); + + return ret; +} + +static void platform_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int platform_pm_suspend(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->suspend) + ret = drv->pm->suspend(dev); + } else { + ret = platform_legacy_suspend(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int platform_pm_suspend_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->suspend_noirq) + ret = pdrv->pm->suspend_noirq(dev); + } else { + ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int platform_pm_resume(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->resume) + ret = drv->pm->resume(dev); + } else { + ret = platform_legacy_resume(dev); + } + + return ret; +} + +static int platform_pm_resume_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->resume_noirq) + ret = pdrv->pm->resume_noirq(dev); + } else { + ret = platform_legacy_resume_early(dev); + } + + return ret; +} + +#else /* !CONFIG_SUSPEND */ + +#define platform_pm_suspend NULL +#define platform_pm_resume NULL +#define platform_pm_suspend_noirq NULL +#define platform_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATION + +static int platform_pm_freeze(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze) + ret = drv->pm->freeze(dev); + } else { + ret = platform_legacy_suspend(dev, PMSG_FREEZE); + } + + return ret; +} + +static int platform_pm_freeze_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->freeze_noirq) + ret = pdrv->pm->freeze_noirq(dev); + } else { + ret = platform_legacy_suspend_late(dev, PMSG_FREEZE); + } + + return ret; +} + +static int platform_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->thaw) + ret = drv->pm->thaw(dev); + } else { + ret = platform_legacy_resume(dev); + } + + return ret; +} + +static int platform_pm_thaw_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->thaw_noirq) + ret = pdrv->pm->thaw_noirq(dev); + } else { + ret = platform_legacy_resume_early(dev); + } + + return ret; +} + +static int platform_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->poweroff) + ret = drv->pm->poweroff(dev); + } else { + ret = platform_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int platform_pm_poweroff_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->poweroff_noirq) + ret = pdrv->pm->poweroff_noirq(dev); + } else { + ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int platform_pm_restore(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm) { + if (drv->pm->restore) + ret = drv->pm->restore(dev); + } else { + ret = platform_legacy_resume(dev); + } + + return ret; +} + +static int platform_pm_restore_noirq(struct device *dev) +{ + struct platform_driver *pdrv; + int ret = 0; + + if (!dev->driver) + return 0; + + pdrv = to_platform_driver(dev->driver); + if (pdrv->pm) { + if (pdrv->pm->restore_noirq) + ret = pdrv->pm->restore_noirq(dev); + } else { + ret = platform_legacy_resume_early(dev); + } + + return ret; +} + +#else /* !CONFIG_HIBERNATION */ + +#define platform_pm_freeze NULL +#define platform_pm_thaw NULL +#define platform_pm_poweroff NULL +#define platform_pm_restore NULL +#define platform_pm_freeze_noirq NULL +#define platform_pm_thaw_noirq NULL +#define platform_pm_poweroff_noirq NULL +#define platform_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATION */ + +struct pm_ext_ops platform_pm_ops = { + .base = { + .prepare = platform_pm_prepare, + .complete = platform_pm_complete, + .suspend = platform_pm_suspend, + .resume = platform_pm_resume, + .freeze = platform_pm_freeze, + .thaw = platform_pm_thaw, + .poweroff = platform_pm_poweroff, + .restore = platform_pm_restore, + }, + .suspend_noirq = platform_pm_suspend_noirq, + .resume_noirq = platform_pm_resume_noirq, + .freeze_noirq = platform_pm_freeze_noirq, + .thaw_noirq = platform_pm_thaw_noirq, + .poweroff_noirq = platform_pm_poweroff_noirq, + .restore_noirq = platform_pm_restore_noirq, +}; + +#define PLATFORM_PM_OPS_PTR &platform_pm_ops + +#else /* !CONFIG_PM_SLEEP */ + +#define PLATFORM_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + struct bus_type platform_bus_type = { .name = "platform", .dev_attrs = platform_dev_attrs, .match = platform_match, .uevent = platform_uevent, - .suspend = platform_suspend, - .suspend_late = platform_suspend_late, - .resume_early = platform_resume_early, - .resume = platform_resume, + .pm = PLATFORM_PM_OPS_PTR, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 45cc3d9eacb8..3250c5257b74 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -12,11 +12,9 @@ * and add it to the list of power-controlled devices. sysfs entries for * controlling device power management will also be added. * - * A different set of lists than the global subsystem list are used to - * keep track of power info because we use different lists to hold - * devices based on what stage of the power management process they - * are in. The power domain dependencies may also differ from the - * ancestral dependencies that the subsystem list maintains. + * A separate list is used for keeping track of power info, because the power + * domain dependencies may differ from the ancestral dependencies that the + * subsystem list maintains. */ #include @@ -30,31 +28,40 @@ #include "power.h" /* - * The entries in the dpm_active list are in a depth first order, simply + * The entries in the dpm_list list are in a depth first order, simply * because children are guaranteed to be discovered after parents, and * are inserted at the back of the list on discovery. * - * All the other lists are kept in the same order, for consistency. - * However the lists aren't always traversed in the same order. - * Semaphores must be acquired from the top (i.e., front) down - * and released in the opposite order. Devices must be suspended - * from the bottom (i.e., end) up and resumed in the opposite order. - * That way no parent will be suspended while it still has an active - * child. - * * Since device_pm_add() may be called with a device semaphore held, * we must never try to acquire a device semaphore while holding * dpm_list_mutex. */ -LIST_HEAD(dpm_active); -static LIST_HEAD(dpm_off); -static LIST_HEAD(dpm_off_irq); +LIST_HEAD(dpm_list); static DEFINE_MUTEX(dpm_list_mtx); -/* 'true' if all devices have been suspended, protected by dpm_list_mtx */ -static bool all_sleeping; +/* + * Set once the preparation of devices for a PM transition has started, reset + * before starting to resume devices. Protected by dpm_list_mtx. + */ +static bool transition_started; + +/** + * device_pm_lock - lock the list of active devices used by the PM core + */ +void device_pm_lock(void) +{ + mutex_lock(&dpm_list_mtx); +} + +/** + * device_pm_unlock - unlock the list of active devices used by the PM core + */ +void device_pm_unlock(void) +{ + mutex_unlock(&dpm_list_mtx); +} /** * device_pm_add - add a device to the list of active devices @@ -68,17 +75,25 @@ int device_pm_add(struct device *dev) dev->bus ? dev->bus->name : "No Bus", kobject_name(&dev->kobj)); mutex_lock(&dpm_list_mtx); - if ((dev->parent && dev->parent->power.sleeping) || all_sleeping) { - if (dev->parent->power.sleeping) - dev_warn(dev, "parent %s is sleeping\n", + if (dev->parent) { + if (dev->parent->power.status >= DPM_SUSPENDING) { + dev_warn(dev, "parent %s is sleeping, will not add\n", dev->parent->bus_id); - else - dev_warn(dev, "all devices are sleeping\n"); + WARN_ON(true); + } + } else if (transition_started) { + /* + * We refuse to register parentless devices while a PM + * transition is in progress in order to avoid leaving them + * unhandled down the road + */ WARN_ON(true); } error = dpm_sysfs_add(dev); - if (!error) - list_add_tail(&dev->power.entry, &dpm_active); + if (!error) { + dev->power.status = DPM_ON; + list_add_tail(&dev->power.entry, &dpm_list); + } mutex_unlock(&dpm_list_mtx); return error; } @@ -100,73 +115,243 @@ void device_pm_remove(struct device *dev) mutex_unlock(&dpm_list_mtx); } +/** + * pm_op - execute the PM operation appropiate for given PM event + * @dev: Device. + * @ops: PM operations to choose from. + * @state: PM transition of the system being carried out. + */ +static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state) +{ + int error = 0; + + switch (state.event) { +#ifdef CONFIG_SUSPEND + case PM_EVENT_SUSPEND: + if (ops->suspend) { + error = ops->suspend(dev); + suspend_report_result(ops->suspend, error); + } + break; + case PM_EVENT_RESUME: + if (ops->resume) { + error = ops->resume(dev); + suspend_report_result(ops->resume, error); + } + break; +#endif /* CONFIG_SUSPEND */ +#ifdef CONFIG_HIBERNATION + case PM_EVENT_FREEZE: + case PM_EVENT_QUIESCE: + if (ops->freeze) { + error = ops->freeze(dev); + suspend_report_result(ops->freeze, error); + } + break; + case PM_EVENT_HIBERNATE: + if (ops->poweroff) { + error = ops->poweroff(dev); + suspend_report_result(ops->poweroff, error); + } + break; + case PM_EVENT_THAW: + case PM_EVENT_RECOVER: + if (ops->thaw) { + error = ops->thaw(dev); + suspend_report_result(ops->thaw, error); + } + break; + case PM_EVENT_RESTORE: + if (ops->restore) { + error = ops->restore(dev); + suspend_report_result(ops->restore, error); + } + break; +#endif /* CONFIG_HIBERNATION */ + default: + error = -EINVAL; + } + return error; +} + +/** + * pm_noirq_op - execute the PM operation appropiate for given PM event + * @dev: Device. + * @ops: PM operations to choose from. + * @state: PM transition of the system being carried out. + * + * The operation is executed with interrupts disabled by the only remaining + * functional CPU in the system. + */ +static int pm_noirq_op(struct device *dev, struct pm_ext_ops *ops, + pm_message_t state) +{ + int error = 0; + + switch (state.event) { +#ifdef CONFIG_SUSPEND + case PM_EVENT_SUSPEND: + if (ops->suspend_noirq) { + error = ops->suspend_noirq(dev); + suspend_report_result(ops->suspend_noirq, error); + } + break; + case PM_EVENT_RESUME: + if (ops->resume_noirq) { + error = ops->resume_noirq(dev); + suspend_report_result(ops->resume_noirq, error); + } + break; +#endif /* CONFIG_SUSPEND */ +#ifdef CONFIG_HIBERNATION + case PM_EVENT_FREEZE: + case PM_EVENT_QUIESCE: + if (ops->freeze_noirq) { + error = ops->freeze_noirq(dev); + suspend_report_result(ops->freeze_noirq, error); + } + break; + case PM_EVENT_HIBERNATE: + if (ops->poweroff_noirq) { + error = ops->poweroff_noirq(dev); + suspend_report_result(ops->poweroff_noirq, error); + } + break; + case PM_EVENT_THAW: + case PM_EVENT_RECOVER: + if (ops->thaw_noirq) { + error = ops->thaw_noirq(dev); + suspend_report_result(ops->thaw_noirq, error); + } + break; + case PM_EVENT_RESTORE: + if (ops->restore_noirq) { + error = ops->restore_noirq(dev); + suspend_report_result(ops->restore_noirq, error); + } + break; +#endif /* CONFIG_HIBERNATION */ + default: + error = -EINVAL; + } + return error; +} + +static char *pm_verb(int event) +{ + switch (event) { + case PM_EVENT_SUSPEND: + return "suspend"; + case PM_EVENT_RESUME: + return "resume"; + case PM_EVENT_FREEZE: + return "freeze"; + case PM_EVENT_QUIESCE: + return "quiesce"; + case PM_EVENT_HIBERNATE: + return "hibernate"; + case PM_EVENT_THAW: + return "thaw"; + case PM_EVENT_RESTORE: + return "restore"; + case PM_EVENT_RECOVER: + return "recover"; + default: + return "(unknown PM event)"; + } +} + +static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info) +{ + dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event), + ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ? + ", may wakeup" : ""); +} + +static void pm_dev_err(struct device *dev, pm_message_t state, char *info, + int error) +{ + printk(KERN_ERR "PM: Device %s failed to %s%s: error %d\n", + kobject_name(&dev->kobj), pm_verb(state.event), info, error); +} + /*------------------------- Resume routines -------------------------*/ /** - * resume_device_early - Power on one device (early resume). + * resume_device_noirq - Power on one device (early resume). * @dev: Device. + * @state: PM transition of the system being carried out. * * Must be called with interrupts disabled. */ -static int resume_device_early(struct device *dev) +static int resume_device_noirq(struct device *dev, pm_message_t state) { int error = 0; TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->bus && dev->bus->resume_early) { - dev_dbg(dev, "EARLY resume\n"); + if (!dev->bus) + goto End; + + if (dev->bus->pm) { + pm_dev_dbg(dev, state, "EARLY "); + error = pm_noirq_op(dev, dev->bus->pm, state); + } else if (dev->bus->resume_early) { + pm_dev_dbg(dev, state, "legacy EARLY "); error = dev->bus->resume_early(dev); } - + End: TRACE_RESUME(error); return error; } /** * dpm_power_up - Power on all regular (non-sysdev) devices. + * @state: PM transition of the system being carried out. * - * Walk the dpm_off_irq list and power each device up. This - * is used for devices that required they be powered down with - * interrupts disabled. As devices are powered on, they are moved - * to the dpm_off list. + * Execute the appropriate "noirq resume" callback for all devices marked + * as DPM_OFF_IRQ. * * Must be called with interrupts disabled and only one CPU running. */ -static void dpm_power_up(void) +static void dpm_power_up(pm_message_t state) { + struct device *dev; - while (!list_empty(&dpm_off_irq)) { - struct list_head *entry = dpm_off_irq.next; - struct device *dev = to_device(entry); + list_for_each_entry(dev, &dpm_list, power.entry) + if (dev->power.status > DPM_OFF) { + int error; - list_move_tail(entry, &dpm_off); - resume_device_early(dev); - } + dev->power.status = DPM_OFF; + error = resume_device_noirq(dev, state); + if (error) + pm_dev_err(dev, state, " early", error); + } } /** * device_power_up - Turn on all devices that need special attention. + * @state: PM transition of the system being carried out. * * Power on system devices, then devices that required we shut them down * with interrupts disabled. * * Must be called with interrupts disabled. */ -void device_power_up(void) +void device_power_up(pm_message_t state) { sysdev_resume(); - dpm_power_up(); + dpm_power_up(state); } EXPORT_SYMBOL_GPL(device_power_up); /** * resume_device - Restore state for one device. * @dev: Device. - * + * @state: PM transition of the system being carried out. */ -static int resume_device(struct device *dev) +static int resume_device(struct device *dev, pm_message_t state) { int error = 0; @@ -175,21 +360,40 @@ static int resume_device(struct device *dev) down(&dev->sem); - if (dev->bus && dev->bus->resume) { - dev_dbg(dev,"resuming\n"); - error = dev->bus->resume(dev); + if (dev->bus) { + if (dev->bus->pm) { + pm_dev_dbg(dev, state, ""); + error = pm_op(dev, &dev->bus->pm->base, state); + } else if (dev->bus->resume) { + pm_dev_dbg(dev, state, "legacy "); + error = dev->bus->resume(dev); + } + if (error) + goto End; } - if (!error && dev->type && dev->type->resume) { - dev_dbg(dev,"resuming\n"); - error = dev->type->resume(dev); + if (dev->type) { + if (dev->type->pm) { + pm_dev_dbg(dev, state, "type "); + error = pm_op(dev, dev->type->pm, state); + } else if (dev->type->resume) { + pm_dev_dbg(dev, state, "legacy type "); + error = dev->type->resume(dev); + } + if (error) + goto End; } - if (!error && dev->class && dev->class->resume) { - dev_dbg(dev,"class resume\n"); - error = dev->class->resume(dev); + if (dev->class) { + if (dev->class->pm) { + pm_dev_dbg(dev, state, "class "); + error = pm_op(dev, dev->class->pm, state); + } else if (dev->class->resume) { + pm_dev_dbg(dev, state, "legacy class "); + error = dev->class->resume(dev); + } } - + End: up(&dev->sem); TRACE_RESUME(error); @@ -198,78 +402,161 @@ static int resume_device(struct device *dev) /** * dpm_resume - Resume every device. + * @state: PM transition of the system being carried out. * - * Resume the devices that have either not gone through - * the late suspend, or that did go through it but also - * went through the early resume. + * Execute the appropriate "resume" callback for all devices the status of + * which indicates that they are inactive. + */ +static void dpm_resume(pm_message_t state) +{ + struct list_head list; + + INIT_LIST_HEAD(&list); + mutex_lock(&dpm_list_mtx); + transition_started = false; + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.next); + + get_device(dev); + if (dev->power.status >= DPM_OFF) { + int error; + + dev->power.status = DPM_RESUMING; + mutex_unlock(&dpm_list_mtx); + + error = resume_device(dev, state); + + mutex_lock(&dpm_list_mtx); + if (error) + pm_dev_err(dev, state, "", error); + } else if (dev->power.status == DPM_SUSPENDING) { + /* Allow new children of the device to be registered */ + dev->power.status = DPM_RESUMING; + } + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &list); + put_device(dev); + } + list_splice(&list, &dpm_list); + mutex_unlock(&dpm_list_mtx); +} + +/** + * complete_device - Complete a PM transition for given device + * @dev: Device. + * @state: PM transition of the system being carried out. + */ +static void complete_device(struct device *dev, pm_message_t state) +{ + down(&dev->sem); + + if (dev->class && dev->class->pm && dev->class->pm->complete) { + pm_dev_dbg(dev, state, "completing class "); + dev->class->pm->complete(dev); + } + + if (dev->type && dev->type->pm && dev->type->pm->complete) { + pm_dev_dbg(dev, state, "completing type "); + dev->type->pm->complete(dev); + } + + if (dev->bus && dev->bus->pm && dev->bus->pm->base.complete) { + pm_dev_dbg(dev, state, "completing "); + dev->bus->pm->base.complete(dev); + } + + up(&dev->sem); +} + +/** + * dpm_complete - Complete a PM transition for all devices. + * @state: PM transition of the system being carried out. * - * Take devices from the dpm_off_list, resume them, - * and put them on the dpm_locked list. + * Execute the ->complete() callbacks for all devices that are not marked + * as DPM_ON. */ -static void dpm_resume(void) +static void dpm_complete(pm_message_t state) { + struct list_head list; + + INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); - all_sleeping = false; - while(!list_empty(&dpm_off)) { - struct list_head *entry = dpm_off.next; - struct device *dev = to_device(entry); + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.prev); - list_move_tail(entry, &dpm_active); - dev->power.sleeping = false; - mutex_unlock(&dpm_list_mtx); - resume_device(dev); - mutex_lock(&dpm_list_mtx); + get_device(dev); + if (dev->power.status > DPM_ON) { + dev->power.status = DPM_ON; + mutex_unlock(&dpm_list_mtx); + + complete_device(dev, state); + + mutex_lock(&dpm_list_mtx); + } + if (!list_empty(&dev->power.entry)) + list_move(&dev->power.entry, &list); + put_device(dev); } + list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); } /** * device_resume - Restore state of each device in system. + * @state: PM transition of the system being carried out. * * Resume all the devices, unlock them all, and allow new * devices to be registered once again. */ -void device_resume(void) +void device_resume(pm_message_t state) { might_sleep(); - dpm_resume(); + dpm_resume(state); + dpm_complete(state); } EXPORT_SYMBOL_GPL(device_resume); /*------------------------- Suspend routines -------------------------*/ -static inline char *suspend_verb(u32 event) +/** + * resume_event - return a PM message representing the resume event + * corresponding to given sleep state. + * @sleep_state: PM message representing a sleep state. + */ +static pm_message_t resume_event(pm_message_t sleep_state) { - switch (event) { - case PM_EVENT_SUSPEND: return "suspend"; - case PM_EVENT_FREEZE: return "freeze"; - case PM_EVENT_PRETHAW: return "prethaw"; - default: return "(unknown suspend event)"; + switch (sleep_state.event) { + case PM_EVENT_SUSPEND: + return PMSG_RESUME; + case PM_EVENT_FREEZE: + case PM_EVENT_QUIESCE: + return PMSG_RECOVER; + case PM_EVENT_HIBERNATE: + return PMSG_RESTORE; } -} - -static void -suspend_device_dbg(struct device *dev, pm_message_t state, char *info) -{ - dev_dbg(dev, "%s%s%s\n", info, suspend_verb(state.event), - ((state.event == PM_EVENT_SUSPEND) && device_may_wakeup(dev)) ? - ", may wakeup" : ""); + return PMSG_ON; } /** - * suspend_device_late - Shut down one device (late suspend). + * suspend_device_noirq - Shut down one device (late suspend). * @dev: Device. - * @state: Power state device is entering. + * @state: PM transition of the system being carried out. * * This is called with interrupts off and only a single CPU running. */ -static int suspend_device_late(struct device *dev, pm_message_t state) +static int suspend_device_noirq(struct device *dev, pm_message_t state) { int error = 0; - if (dev->bus && dev->bus->suspend_late) { - suspend_device_dbg(dev, state, "LATE "); + if (!dev->bus) + return 0; + + if (dev->bus->pm) { + pm_dev_dbg(dev, state, "LATE "); + error = pm_noirq_op(dev, dev->bus->pm, state); + } else if (dev->bus->suspend_late) { + pm_dev_dbg(dev, state, "legacy LATE "); error = dev->bus->suspend_late(dev, state); suspend_report_result(dev->bus->suspend_late, error); } @@ -278,37 +565,30 @@ static int suspend_device_late(struct device *dev, pm_message_t state) /** * device_power_down - Shut down special devices. - * @state: Power state to enter. + * @state: PM transition of the system being carried out. * - * Power down devices that require interrupts to be disabled - * and move them from the dpm_off list to the dpm_off_irq list. + * Power down devices that require interrupts to be disabled. * Then power down system devices. * * Must be called with interrupts disabled and only one CPU running. */ int device_power_down(pm_message_t state) { + struct device *dev; int error = 0; - while (!list_empty(&dpm_off)) { - struct list_head *entry = dpm_off.prev; - struct device *dev = to_device(entry); - - error = suspend_device_late(dev, state); + list_for_each_entry_reverse(dev, &dpm_list, power.entry) { + error = suspend_device_noirq(dev, state); if (error) { - printk(KERN_ERR "Could not power down device %s: " - "error %d\n", - kobject_name(&dev->kobj), error); + pm_dev_err(dev, state, " late", error); break; } - if (!list_empty(&dev->power.entry)) - list_move(&dev->power.entry, &dpm_off_irq); + dev->power.status = DPM_OFF_IRQ; } - if (!error) error = sysdev_suspend(state); if (error) - dpm_power_up(); + dpm_power_up(resume_event(state)); return error; } EXPORT_SYMBOL_GPL(device_power_down); @@ -316,7 +596,7 @@ EXPORT_SYMBOL_GPL(device_power_down); /** * suspend_device - Save state of one device. * @dev: Device. - * @state: Power state device is entering. + * @state: PM transition of the system being carried out. */ static int suspend_device(struct device *dev, pm_message_t state) { @@ -324,24 +604,43 @@ static int suspend_device(struct device *dev, pm_message_t state) down(&dev->sem); - if (dev->class && dev->class->suspend) { - suspend_device_dbg(dev, state, "class "); - error = dev->class->suspend(dev, state); - suspend_report_result(dev->class->suspend, error); + if (dev->class) { + if (dev->class->pm) { + pm_dev_dbg(dev, state, "class "); + error = pm_op(dev, dev->class->pm, state); + } else if (dev->class->suspend) { + pm_dev_dbg(dev, state, "legacy class "); + error = dev->class->suspend(dev, state); + suspend_report_result(dev->class->suspend, error); + } + if (error) + goto End; } - if (!error && dev->type && dev->type->suspend) { - suspend_device_dbg(dev, state, "type "); - error = dev->type->suspend(dev, state); - suspend_report_result(dev->type->suspend, error); + if (dev->type) { + if (dev->type->pm) { + pm_dev_dbg(dev, state, "type "); + error = pm_op(dev, dev->type->pm, state); + } else if (dev->type->suspend) { + pm_dev_dbg(dev, state, "legacy type "); + error = dev->type->suspend(dev, state); + suspend_report_result(dev->type->suspend, error); + } + if (error) + goto End; } - if (!error && dev->bus && dev->bus->suspend) { - suspend_device_dbg(dev, state, ""); - error = dev->bus->suspend(dev, state); - suspend_report_result(dev->bus->suspend, error); + if (dev->bus) { + if (dev->bus->pm) { + pm_dev_dbg(dev, state, ""); + error = pm_op(dev, &dev->bus->pm->base, state); + } else if (dev->bus->suspend) { + pm_dev_dbg(dev, state, "legacy "); + error = dev->bus->suspend(dev, state); + suspend_report_result(dev->bus->suspend, error); + } } - + End: up(&dev->sem); return error; @@ -349,67 +648,139 @@ static int suspend_device(struct device *dev, pm_message_t state) /** * dpm_suspend - Suspend every device. - * @state: Power state to put each device in. - * - * Walk the dpm_locked list. Suspend each device and move it - * to the dpm_off list. + * @state: PM transition of the system being carried out. * - * (For historical reasons, if it returns -EAGAIN, that used to mean - * that the device would be called again with interrupts disabled. - * These days, we use the "suspend_late()" callback for that, so we - * print a warning and consider it an error). + * Execute the appropriate "suspend" callbacks for all devices. */ static int dpm_suspend(pm_message_t state) { + struct list_head list; int error = 0; + INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); - while (!list_empty(&dpm_active)) { - struct list_head *entry = dpm_active.prev; - struct device *dev = to_device(entry); + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.prev); - WARN_ON(dev->parent && dev->parent->power.sleeping); - - dev->power.sleeping = true; + get_device(dev); mutex_unlock(&dpm_list_mtx); + error = suspend_device(dev, state); + mutex_lock(&dpm_list_mtx); if (error) { - printk(KERN_ERR "Could not suspend device %s: " - "error %d%s\n", - kobject_name(&dev->kobj), - error, - (error == -EAGAIN ? - " (please convert to suspend_late)" : - "")); - dev->power.sleeping = false; + pm_dev_err(dev, state, "", error); + put_device(dev); break; } + dev->power.status = DPM_OFF; if (!list_empty(&dev->power.entry)) - list_move(&dev->power.entry, &dpm_off); + list_move(&dev->power.entry, &list); + put_device(dev); } - if (!error) - all_sleeping = true; + list_splice(&list, dpm_list.prev); mutex_unlock(&dpm_list_mtx); + return error; +} + +/** + * prepare_device - Execute the ->prepare() callback(s) for given device. + * @dev: Device. + * @state: PM transition of the system being carried out. + */ +static int prepare_device(struct device *dev, pm_message_t state) +{ + int error = 0; + + down(&dev->sem); + + if (dev->bus && dev->bus->pm && dev->bus->pm->base.prepare) { + pm_dev_dbg(dev, state, "preparing "); + error = dev->bus->pm->base.prepare(dev); + suspend_report_result(dev->bus->pm->base.prepare, error); + if (error) + goto End; + } + + if (dev->type && dev->type->pm && dev->type->pm->prepare) { + pm_dev_dbg(dev, state, "preparing type "); + error = dev->type->pm->prepare(dev); + suspend_report_result(dev->type->pm->prepare, error); + if (error) + goto End; + } + + if (dev->class && dev->class->pm && dev->class->pm->prepare) { + pm_dev_dbg(dev, state, "preparing class "); + error = dev->class->pm->prepare(dev); + suspend_report_result(dev->class->pm->prepare, error); + } + End: + up(&dev->sem); + + return error; +} + +/** + * dpm_prepare - Prepare all devices for a PM transition. + * @state: PM transition of the system being carried out. + * + * Execute the ->prepare() callback for all devices. + */ +static int dpm_prepare(pm_message_t state) +{ + struct list_head list; + int error = 0; + + INIT_LIST_HEAD(&list); + mutex_lock(&dpm_list_mtx); + transition_started = true; + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.next); + + get_device(dev); + dev->power.status = DPM_PREPARING; + mutex_unlock(&dpm_list_mtx); + error = prepare_device(dev, state); + + mutex_lock(&dpm_list_mtx); + if (error) { + dev->power.status = DPM_ON; + if (error == -EAGAIN) { + put_device(dev); + continue; + } + printk(KERN_ERR "PM: Failed to prepare device %s " + "for power transition: error %d\n", + kobject_name(&dev->kobj), error); + put_device(dev); + break; + } + dev->power.status = DPM_SUSPENDING; + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &list); + put_device(dev); + } + list_splice(&list, &dpm_list); + mutex_unlock(&dpm_list_mtx); return error; } /** * device_suspend - Save state and stop all devices in system. - * @state: new power management state + * @state: PM transition of the system being carried out. * - * Prevent new devices from being registered, then lock all devices - * and suspend them. + * Prepare and suspend all devices. */ int device_suspend(pm_message_t state) { int error; might_sleep(); - error = dpm_suspend(state); - if (error) - device_resume(); + error = dpm_prepare(state); + if (!error) + error = dpm_suspend(state); return error; } EXPORT_SYMBOL_GPL(device_suspend); diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index a6894f2a4b99..a3252c0e2887 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -4,7 +4,7 @@ * main.c */ -extern struct list_head dpm_active; /* The active device list */ +extern struct list_head dpm_list; /* The active device list */ static inline struct device *to_device(struct list_head *entry) { diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d11f74b038db..596aeecfdffe 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -6,9 +6,6 @@ #include #include "power.h" -int (*platform_enable_wakeup)(struct device *dev, int is_on); - - /* * wakeup - Report/change current wakeup option for device * diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 87a7f1d02578..9b1b20b59e0a 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -188,9 +188,9 @@ static int show_file_hash(unsigned int value) static int show_dev_hash(unsigned int value) { int match = 0; - struct list_head * entry = dpm_active.prev; + struct list_head *entry = dpm_list.prev; - while (entry != &dpm_active) { + while (entry != &dpm_list) { struct device * dev = to_device(entry); unsigned int hash = hash_string(DEVSEED, dev->bus_id, DEVHASH); if (hash == value) { diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 0d1d2133d9bc..61ad8d639ba3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -433,4 +433,16 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. +config BLK_DEV_HD + bool "Very old hard disk (MFM/RLL/IDE) driver" + depends on HAVE_IDE + depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN + help + This is a very old hard disk driver that lacks the enhanced + functionality of the newer ones. + + It is required for systems with ancient MFM/RLL/ESDI drives. + + If unsure, say N. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 5e584306be99..204332b29578 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -29,5 +29,6 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o diff --git a/drivers/block/hd.c b/drivers/block/hd.c new file mode 100644 index 000000000000..682243bf2e46 --- /dev/null +++ b/drivers/block/hd.c @@ -0,0 +1,814 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This is the low-level hd interrupt support. It traverses the + * request-list, using interrupts to jump between functions. As + * all the functions are called within interrupts, we may not + * sleep. Special care is recommended. + * + * modified by Drew Eckhardt to check nr of hd's from the CMOS. + * + * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * in the early extended-partition checks and added DM partitions + * + * IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", + * and general streamlining by Mark Lord. + * + * Removed 99% of above. Use Mark's ide driver for those options. + * This is now a lightweight ST-506 driver. (Paul Gortmaker) + * + * Modified 1995 Russell King for ARM processor. + * + * Bugfix: max_sectors must be <= 255 or the wheels tend to come + * off in a hurry once you queue things up - Paul G. 02/2001 + */ + +/* Uncomment the following if you want verbose error reports. */ +/* #define VERBOSE_ERRORS */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define REALLY_SLOW_IO +#include +#include +#include + +#ifdef __arm__ +#undef HD_IRQ +#endif +#include +#ifdef __arm__ +#define HD_IRQ IRQ_HARDDISK +#endif + +/* Hd controller regster ports */ + +#define HD_DATA 0x1f0 /* _CTL when writing */ +#define HD_ERROR 0x1f1 /* see err-bits */ +#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ +#define HD_SECTOR 0x1f3 /* starting sector */ +#define HD_LCYL 0x1f4 /* starting cylinder */ +#define HD_HCYL 0x1f5 /* high byte of starting cyl */ +#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ +#define HD_STATUS 0x1f7 /* see status-bits */ +#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ +#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ +#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ + +#define HD_CMD 0x3f6 /* used for resets */ +#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ + +/* Bits of HD_STATUS */ +#define ERR_STAT 0x01 +#define INDEX_STAT 0x02 +#define ECC_STAT 0x04 /* Corrected error */ +#define DRQ_STAT 0x08 +#define SEEK_STAT 0x10 +#define SERVICE_STAT SEEK_STAT +#define WRERR_STAT 0x20 +#define READY_STAT 0x40 +#define BUSY_STAT 0x80 + +/* Bits for HD_ERROR */ +#define MARK_ERR 0x01 /* Bad address mark */ +#define TRK0_ERR 0x02 /* couldn't find track 0 */ +#define ABRT_ERR 0x04 /* Command aborted */ +#define MCR_ERR 0x08 /* media change request */ +#define ID_ERR 0x10 /* ID field not found */ +#define MC_ERR 0x20 /* media changed */ +#define ECC_ERR 0x40 /* Uncorrectable ECC error */ +#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ +#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ + +static DEFINE_SPINLOCK(hd_lock); +static struct request_queue *hd_queue; + +#define MAJOR_NR HD_MAJOR +#define QUEUE (hd_queue) +#define CURRENT elv_next_request(hd_queue) + +#define TIMEOUT_VALUE (6*HZ) +#define HD_DELAY 0 + +#define MAX_ERRORS 16 /* Max read/write errors/sector */ +#define RESET_FREQ 8 /* Reset controller every 8th retry */ +#define RECAL_FREQ 4 /* Recalibrate every 4th retry */ +#define MAX_HD 2 + +#define STAT_OK (READY_STAT|SEEK_STAT) +#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK) + +static void recal_intr(void); +static void bad_rw_intr(void); + +static int reset; +static int hd_error; + +/* + * This struct defines the HD's and their types. + */ +struct hd_i_struct { + unsigned int head, sect, cyl, wpcom, lzone, ctl; + int unit; + int recalibrate; + int special_op; +}; + +#ifdef HD_TYPE +static struct hd_i_struct hd_info[] = { HD_TYPE }; +static int NR_HD = ARRAY_SIZE(hd_info); +#else +static struct hd_i_struct hd_info[MAX_HD]; +static int NR_HD; +#endif + +static struct gendisk *hd_gendisk[MAX_HD]; + +static struct timer_list device_timer; + +#define TIMEOUT_VALUE (6*HZ) + +#define SET_TIMER \ + do { \ + mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \ + } while (0) + +static void (*do_hd)(void) = NULL; +#define SET_HANDLER(x) \ +if ((do_hd = (x)) != NULL) \ + SET_TIMER; \ +else \ + del_timer(&device_timer); + + +#if (HD_DELAY > 0) + +#include + +unsigned long last_req; + +unsigned long read_timer(void) +{ + unsigned long t, flags; + int i; + + spin_lock_irqsave(&i8253_lock, flags); + t = jiffies * 11932; + outb_p(0, 0x43); + i = inb_p(0x40); + i |= inb(0x40) << 8; + spin_unlock_irqrestore(&i8253_lock, flags); + return(t - i); +} +#endif + +static void __init hd_setup(char *str, int *ints) +{ + int hdind = 0; + + if (ints[0] != 3) + return; + if (hd_info[0].head != 0) + hdind = 1; + hd_info[hdind].head = ints[2]; + hd_info[hdind].sect = ints[3]; + hd_info[hdind].cyl = ints[1]; + hd_info[hdind].wpcom = 0; + hd_info[hdind].lzone = ints[1]; + hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0); + NR_HD = hdind+1; +} + +static void dump_status(const char *msg, unsigned int stat) +{ + char *name = "hd?"; + if (CURRENT) + name = CURRENT->rq_disk->disk_name; + +#ifdef VERBOSE_ERRORS + printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); + if (stat & BUSY_STAT) printk("Busy "); + if (stat & READY_STAT) printk("DriveReady "); + if (stat & WRERR_STAT) printk("WriteFault "); + if (stat & SEEK_STAT) printk("SeekComplete "); + if (stat & DRQ_STAT) printk("DataRequest "); + if (stat & ECC_STAT) printk("CorrectedError "); + if (stat & INDEX_STAT) printk("Index "); + if (stat & ERR_STAT) printk("Error "); + printk("}\n"); + if ((stat & ERR_STAT) == 0) { + hd_error = 0; + } else { + hd_error = inb(HD_ERROR); + printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff); + if (hd_error & BBD_ERR) printk("BadSector "); + if (hd_error & ECC_ERR) printk("UncorrectableError "); + if (hd_error & ID_ERR) printk("SectorIdNotFound "); + if (hd_error & ABRT_ERR) printk("DriveStatusError "); + if (hd_error & TRK0_ERR) printk("TrackZeroNotFound "); + if (hd_error & MARK_ERR) printk("AddrMarkNotFound "); + printk("}"); + if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { + printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), + inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); + if (CURRENT) + printk(", sector=%ld", CURRENT->sector); + } + printk("\n"); + } +#else + printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff); + if ((stat & ERR_STAT) == 0) { + hd_error = 0; + } else { + hd_error = inb(HD_ERROR); + printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff); + } +#endif +} + +static void check_status(void) +{ + int i = inb_p(HD_STATUS); + + if (!OK_STATUS(i)) { + dump_status("check_status", i); + bad_rw_intr(); + } +} + +static int controller_busy(void) +{ + int retries = 100000; + unsigned char status; + + do { + status = inb_p(HD_STATUS); + } while ((status & BUSY_STAT) && --retries); + return status; +} + +static int status_ok(void) +{ + unsigned char status = inb_p(HD_STATUS); + + if (status & BUSY_STAT) + return 1; /* Ancient, but does it make sense??? */ + if (status & WRERR_STAT) + return 0; + if (!(status & READY_STAT)) + return 0; + if (!(status & SEEK_STAT)) + return 0; + return 1; +} + +static int controller_ready(unsigned int drive, unsigned int head) +{ + int retry = 100; + + do { + if (controller_busy() & BUSY_STAT) + return 0; + outb_p(0xA0 | (drive<<4) | head, HD_CURRENT); + if (status_ok()) + return 1; + } while (--retry); + return 0; +} + +static void hd_out(struct hd_i_struct *disk, + unsigned int nsect, + unsigned int sect, + unsigned int head, + unsigned int cyl, + unsigned int cmd, + void (*intr_addr)(void)) +{ + unsigned short port; + +#if (HD_DELAY > 0) + while (read_timer() - last_req < HD_DELAY) + /* nothing */; +#endif + if (reset) + return; + if (!controller_ready(disk->unit, head)) { + reset = 1; + return; + } + SET_HANDLER(intr_addr); + outb_p(disk->ctl, HD_CMD); + port = HD_DATA; + outb_p(disk->wpcom >> 2, ++port); + outb_p(nsect, ++port); + outb_p(sect, ++port); + outb_p(cyl, ++port); + outb_p(cyl >> 8, ++port); + outb_p(0xA0 | (disk->unit << 4) | head, ++port); + outb_p(cmd, ++port); +} + +static void hd_request (void); + +static int drive_busy(void) +{ + unsigned int i; + unsigned char c; + + for (i = 0; i < 500000 ; i++) { + c = inb_p(HD_STATUS); + if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK) + return 0; + } + dump_status("reset timed out", c); + return 1; +} + +static void reset_controller(void) +{ + int i; + + outb_p(4, HD_CMD); + for (i = 0; i < 1000; i++) barrier(); + outb_p(hd_info[0].ctl & 0x0f, HD_CMD); + for (i = 0; i < 1000; i++) barrier(); + if (drive_busy()) + printk("hd: controller still busy\n"); + else if ((hd_error = inb(HD_ERROR)) != 1) + printk("hd: controller reset failed: %02x\n", hd_error); +} + +static void reset_hd(void) +{ + static int i; + +repeat: + if (reset) { + reset = 0; + i = -1; + reset_controller(); + } else { + check_status(); + if (reset) + goto repeat; + } + if (++i < NR_HD) { + struct hd_i_struct *disk = &hd_info[i]; + disk->special_op = disk->recalibrate = 1; + hd_out(disk, disk->sect, disk->sect, disk->head-1, + disk->cyl, WIN_SPECIFY, &reset_hd); + if (reset) + goto repeat; + } else + hd_request(); +} + +/* + * Ok, don't know what to do with the unexpected interrupts: on some machines + * doing a reset and a retry seems to result in an eternal loop. Right now I + * ignore it, and just set the timeout. + * + * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the + * drive enters "idle", "standby", or "sleep" mode, so if the status looks + * "good", we just ignore the interrupt completely. + */ +static void unexpected_hd_interrupt(void) +{ + unsigned int stat = inb_p(HD_STATUS); + + if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) { + dump_status("unexpected interrupt", stat); + SET_TIMER; + } +} + +/* + * bad_rw_intr() now tries to be a bit smarter and does things + * according to the error returned by the controller. + * -Mika Liljeberg (liljeber@cs.Helsinki.FI) + */ +static void bad_rw_intr(void) +{ + struct request *req = CURRENT; + if (req != NULL) { + struct hd_i_struct *disk = req->rq_disk->private_data; + if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { + end_request(req, 0); + disk->special_op = disk->recalibrate = 1; + } else if (req->errors % RESET_FREQ == 0) + reset = 1; + else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0) + disk->special_op = disk->recalibrate = 1; + /* Otherwise just retry */ + } +} + +static inline int wait_DRQ(void) +{ + int retries; + int stat; + + for (retries = 0; retries < 100000; retries++) { + stat = inb_p(HD_STATUS); + if (stat & DRQ_STAT) + return 0; + } + dump_status("wait_DRQ", stat); + return -1; +} + +static void read_intr(void) +{ + struct request *req; + int i, retries = 100000; + + do { + i = (unsigned) inb_p(HD_STATUS); + if (i & BUSY_STAT) + continue; + if (!OK_STATUS(i)) + break; + if (i & DRQ_STAT) + goto ok_to_read; + } while (--retries > 0); + dump_status("read_intr", i); + bad_rw_intr(); + hd_request(); + return; +ok_to_read: + req = CURRENT; + insw(HD_DATA, req->buffer, 256); + req->sector++; + req->buffer += 512; + req->errors = 0; + i = --req->nr_sectors; + --req->current_nr_sectors; +#ifdef DEBUG + printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", + req->rq_disk->disk_name, req->sector, req->nr_sectors, + req->buffer+512); +#endif + if (req->current_nr_sectors <= 0) + end_request(req, 1); + if (i > 0) { + SET_HANDLER(&read_intr); + return; + } + (void) inb_p(HD_STATUS); +#if (HD_DELAY > 0) + last_req = read_timer(); +#endif + if (elv_next_request(QUEUE)) + hd_request(); + return; +} + +static void write_intr(void) +{ + struct request *req = CURRENT; + int i; + int retries = 100000; + + do { + i = (unsigned) inb_p(HD_STATUS); + if (i & BUSY_STAT) + continue; + if (!OK_STATUS(i)) + break; + if ((req->nr_sectors <= 1) || (i & DRQ_STAT)) + goto ok_to_write; + } while (--retries > 0); + dump_status("write_intr", i); + bad_rw_intr(); + hd_request(); + return; +ok_to_write: + req->sector++; + i = --req->nr_sectors; + --req->current_nr_sectors; + req->buffer += 512; + if (!i || (req->bio && req->current_nr_sectors <= 0)) + end_request(req, 1); + if (i > 0) { + SET_HANDLER(&write_intr); + outsw(HD_DATA, req->buffer, 256); + local_irq_enable(); + } else { +#if (HD_DELAY > 0) + last_req = read_timer(); +#endif + hd_request(); + } + return; +} + +static void recal_intr(void) +{ + check_status(); +#if (HD_DELAY > 0) + last_req = read_timer(); +#endif + hd_request(); +} + +/* + * This is another of the error-routines I don't know what to do with. The + * best idea seems to just set reset, and start all over again. + */ +static void hd_times_out(unsigned long dummy) +{ + char *name; + + do_hd = NULL; + + if (!CURRENT) + return; + + disable_irq(HD_IRQ); + local_irq_enable(); + reset = 1; + name = CURRENT->rq_disk->disk_name; + printk("%s: timeout\n", name); + if (++CURRENT->errors >= MAX_ERRORS) { +#ifdef DEBUG + printk("%s: too many errors\n", name); +#endif + end_request(CURRENT, 0); + } + local_irq_disable(); + hd_request(); + enable_irq(HD_IRQ); +} + +static int do_special_op(struct hd_i_struct *disk, struct request *req) +{ + if (disk->recalibrate) { + disk->recalibrate = 0; + hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); + return reset; + } + if (disk->head > 16) { + printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); + end_request(req, 0); + } + disk->special_op = 0; + return 1; +} + +/* + * The driver enables interrupts as much as possible. In order to do this, + * (a) the device-interrupt is disabled before entering hd_request(), + * and (b) the timeout-interrupt is disabled before the sti(). + * + * Interrupts are still masked (by default) whenever we are exchanging + * data/cmds with a drive, because some drives seem to have very poor + * tolerance for latency during I/O. The IDE driver has support to unmask + * interrupts for non-broken hardware, so use that driver if required. + */ +static void hd_request(void) +{ + unsigned int block, nsect, sec, track, head, cyl; + struct hd_i_struct *disk; + struct request *req; + + if (do_hd) + return; +repeat: + del_timer(&device_timer); + local_irq_enable(); + + req = CURRENT; + if (!req) { + do_hd = NULL; + return; + } + + if (reset) { + local_irq_disable(); + reset_hd(); + return; + } + disk = req->rq_disk->private_data; + block = req->sector; + nsect = req->nr_sectors; + if (block >= get_capacity(req->rq_disk) || + ((block+nsect) > get_capacity(req->rq_disk))) { + printk("%s: bad access: block=%d, count=%d\n", + req->rq_disk->disk_name, block, nsect); + end_request(req, 0); + goto repeat; + } + + if (disk->special_op) { + if (do_special_op(disk, req)) + goto repeat; + return; + } + sec = block % disk->sect + 1; + track = block / disk->sect; + head = track % disk->head; + cyl = track / disk->head; +#ifdef DEBUG + printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", + req->rq_disk->disk_name, + req_data_dir(req) == READ ? "read" : "writ", + cyl, head, sec, nsect, req->buffer); +#endif + if (blk_fs_request(req)) { + switch (rq_data_dir(req)) { + case READ: + hd_out(disk, nsect, sec, head, cyl, WIN_READ, + &read_intr); + if (reset) + goto repeat; + break; + case WRITE: + hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, + &write_intr); + if (reset) + goto repeat; + if (wait_DRQ()) { + bad_rw_intr(); + goto repeat; + } + outsw(HD_DATA, req->buffer, 256); + break; + default: + printk("unknown hd-command\n"); + end_request(req, 0); + break; + } + } +} + +static void do_hd_request(struct request_queue *q) +{ + disable_irq(HD_IRQ); + hd_request(); + enable_irq(HD_IRQ); +} + +static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct hd_i_struct *disk = bdev->bd_disk->private_data; + + geo->heads = disk->head; + geo->sectors = disk->sect; + geo->cylinders = disk->cyl; + return 0; +} + +/* + * Releasing a block device means we sync() it, so that it can safely + * be forgotten about... + */ + +static irqreturn_t hd_interrupt(int irq, void *dev_id) +{ + void (*handler)(void) = do_hd; + + do_hd = NULL; + del_timer(&device_timer); + if (!handler) + handler = unexpected_hd_interrupt; + handler(); + local_irq_enable(); + return IRQ_HANDLED; +} + +static struct block_device_operations hd_fops = { + .getgeo = hd_getgeo, +}; + +/* + * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags + * means we run the IRQ-handler with interrupts disabled: this is bad for + * interrupt latency, but anything else has led to problems on some + * machines. + * + * We enable interrupts in some of the routines after making sure it's + * safe. + */ + +static int __init hd_init(void) +{ + int drive; + + if (register_blkdev(MAJOR_NR, "hd")) + return -1; + + hd_queue = blk_init_queue(do_hd_request, &hd_lock); + if (!hd_queue) { + unregister_blkdev(MAJOR_NR, "hd"); + return -ENOMEM; + } + + blk_queue_max_sectors(hd_queue, 255); + init_timer(&device_timer); + device_timer.function = hd_times_out; + blk_queue_hardsect_size(hd_queue, 512); + + if (!NR_HD) { + /* + * We don't know anything about the drive. This means + * that you *MUST* specify the drive parameters to the + * kernel yourself. + * + * If we were on an i386, we used to read this info from + * the BIOS or CMOS. This doesn't work all that well, + * since this assumes that this is a primary or secondary + * drive, and if we're using this legacy driver, it's + * probably an auxilliary controller added to recover + * legacy data off an ST-506 drive. Either way, it's + * definitely safest to have the user explicitly specify + * the information. + */ + printk("hd: no drives specified - use hd=cyl,head,sectors" + " on kernel command line\n"); + goto out; + } + + for (drive = 0 ; drive < NR_HD ; drive++) { + struct gendisk *disk = alloc_disk(64); + struct hd_i_struct *p = &hd_info[drive]; + if (!disk) + goto Enomem; + disk->major = MAJOR_NR; + disk->first_minor = drive << 6; + disk->fops = &hd_fops; + sprintf(disk->disk_name, "hd%c", 'a'+drive); + disk->private_data = p; + set_capacity(disk, p->head * p->sect * p->cyl); + disk->queue = hd_queue; + p->unit = drive; + hd_gendisk[drive] = disk; + printk("%s: %luMB, CHS=%d/%d/%d\n", + disk->disk_name, (unsigned long)get_capacity(disk)/2048, + p->cyl, p->head, p->sect); + } + + if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) { + printk("hd: unable to get IRQ%d for the hard disk driver\n", + HD_IRQ); + goto out1; + } + if (!request_region(HD_DATA, 8, "hd")) { + printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA); + goto out2; + } + if (!request_region(HD_CMD, 1, "hd(cmd)")) { + printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD); + goto out3; + } + + /* Let them fly */ + for (drive = 0; drive < NR_HD; drive++) + add_disk(hd_gendisk[drive]); + + return 0; + +out3: + release_region(HD_DATA, 8); +out2: + free_irq(HD_IRQ, NULL); +out1: + for (drive = 0; drive < NR_HD; drive++) + put_disk(hd_gendisk[drive]); + NR_HD = 0; +out: + del_timer(&device_timer); + unregister_blkdev(MAJOR_NR, "hd"); + blk_cleanup_queue(hd_queue); + return -1; +Enomem: + while (drive--) + put_disk(hd_gendisk[drive]); + goto out; +} + +static int __init parse_hd_setup(char *line) +{ + int ints[6]; + + (void) get_options(line, ARRAY_SIZE(ints), ints); + hd_setup(NULL, ints); + + return 1; +} +__setup("hd=", parse_hd_setup); + +late_initcall(hd_init); diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c index da8a1658a273..aaca40283be9 100644 --- a/drivers/char/apm-emulation.c +++ b/drivers/char/apm-emulation.c @@ -58,6 +58,55 @@ struct apm_queue { apm_event_t events[APM_MAX_EVENTS]; }; +/* + * thread states (for threads using a writable /dev/apm_bios fd): + * + * SUSPEND_NONE: nothing happening + * SUSPEND_PENDING: suspend event queued for thread and pending to be read + * SUSPEND_READ: suspend event read, pending acknowledgement + * SUSPEND_ACKED: acknowledgement received from thread (via ioctl), + * waiting for resume + * SUSPEND_ACKTO: acknowledgement timeout + * SUSPEND_DONE: thread had acked suspend and is now notified of + * resume + * + * SUSPEND_WAIT: this thread invoked suspend and is waiting for resume + * + * A thread migrates in one of three paths: + * NONE -1-> PENDING -2-> READ -3-> ACKED -4-> DONE -5-> NONE + * -6-> ACKTO -7-> NONE + * NONE -8-> WAIT -9-> NONE + * + * While in PENDING or READ, the thread is accounted for in the + * suspend_acks_pending counter. + * + * The transitions are invoked as follows: + * 1: suspend event is signalled from the core PM code + * 2: the suspend event is read from the fd by the userspace thread + * 3: userspace thread issues the APM_IOC_SUSPEND ioctl (as ack) + * 4: core PM code signals that we have resumed + * 5: APM_IOC_SUSPEND ioctl returns + * + * 6: the notifier invoked from the core PM code timed out waiting + * for all relevant threds to enter ACKED state and puts those + * that haven't into ACKTO + * 7: those threads issue APM_IOC_SUSPEND ioctl too late, + * get an error + * + * 8: userspace thread issues the APM_IOC_SUSPEND ioctl (to suspend), + * ioctl code invokes pm_suspend() + * 9: pm_suspend() returns indicating resume + */ +enum apm_suspend_state { + SUSPEND_NONE, + SUSPEND_PENDING, + SUSPEND_READ, + SUSPEND_ACKED, + SUSPEND_ACKTO, + SUSPEND_WAIT, + SUSPEND_DONE, +}; + /* * The per-file APM data */ @@ -69,13 +118,7 @@ struct apm_user { unsigned int reader: 1; int suspend_result; - unsigned int suspend_state; -#define SUSPEND_NONE 0 /* no suspend pending */ -#define SUSPEND_PENDING 1 /* suspend pending read */ -#define SUSPEND_READ 2 /* suspend read, pending ack */ -#define SUSPEND_ACKED 3 /* suspend acked */ -#define SUSPEND_WAIT 4 /* waiting for suspend */ -#define SUSPEND_DONE 5 /* suspend completed */ + enum apm_suspend_state suspend_state; struct apm_queue queue; }; @@ -83,7 +126,8 @@ struct apm_user { /* * Local variables */ -static int suspends_pending; +static atomic_t suspend_acks_pending = ATOMIC_INIT(0); +static atomic_t userspace_notification_inhibit = ATOMIC_INIT(0); static int apm_disabled; static struct task_struct *kapmd_tsk; @@ -166,78 +210,6 @@ static void queue_event(apm_event_t event) wake_up_interruptible(&apm_waitqueue); } -/* - * queue_suspend_event - queue an APM suspend event. - * - * Check that we're in a state where we can suspend. If not, - * return -EBUSY. Otherwise, queue an event to all "writer" - * users. If there are no "writer" users, return '1' to - * indicate that we can immediately suspend. - */ -static int queue_suspend_event(apm_event_t event, struct apm_user *sender) -{ - struct apm_user *as; - int ret = 1; - - mutex_lock(&state_lock); - down_read(&user_list_lock); - - /* - * If a thread is still processing, we can't suspend, so reject - * the request. - */ - list_for_each_entry(as, &apm_user_list, list) { - if (as != sender && as->reader && as->writer && as->suser && - as->suspend_state != SUSPEND_NONE) { - ret = -EBUSY; - goto out; - } - } - - list_for_each_entry(as, &apm_user_list, list) { - if (as != sender && as->reader && as->writer && as->suser) { - as->suspend_state = SUSPEND_PENDING; - suspends_pending++; - queue_add_event(&as->queue, event); - ret = 0; - } - } - out: - up_read(&user_list_lock); - mutex_unlock(&state_lock); - wake_up_interruptible(&apm_waitqueue); - return ret; -} - -static void apm_suspend(void) -{ - struct apm_user *as; - int err = pm_suspend(PM_SUSPEND_MEM); - - /* - * Anyone on the APM queues will think we're still suspended. - * Send a message so everyone knows we're now awake again. - */ - queue_event(APM_NORMAL_RESUME); - - /* - * Finally, wake up anyone who is sleeping on the suspend. - */ - mutex_lock(&state_lock); - down_read(&user_list_lock); - list_for_each_entry(as, &apm_user_list, list) { - if (as->suspend_state == SUSPEND_WAIT || - as->suspend_state == SUSPEND_ACKED) { - as->suspend_result = err; - as->suspend_state = SUSPEND_DONE; - } - } - up_read(&user_list_lock); - mutex_unlock(&state_lock); - - wake_up(&apm_suspend_waitqueue); -} - static ssize_t apm_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { struct apm_user *as = fp->private_data; @@ -308,25 +280,22 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) as->suspend_result = -EINTR; - if (as->suspend_state == SUSPEND_READ) { - int pending; - + switch (as->suspend_state) { + case SUSPEND_READ: /* * If we read a suspend command from /dev/apm_bios, * then the corresponding APM_IOC_SUSPEND ioctl is * interpreted as an acknowledge. */ as->suspend_state = SUSPEND_ACKED; - suspends_pending--; - pending = suspends_pending == 0; + atomic_dec(&suspend_acks_pending); mutex_unlock(&state_lock); /* - * If there are no further acknowledges required, - * suspend the system. + * suspend_acks_pending changed, the notifier needs to + * be woken up for this */ - if (pending) - apm_suspend(); + wake_up(&apm_suspend_waitqueue); /* * Wait for the suspend/resume to complete. If there @@ -342,35 +311,21 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) * try_to_freeze() in freezer_count() will not trigger */ freezer_count(); - } else { + break; + case SUSPEND_ACKTO: + as->suspend_result = -ETIMEDOUT; + mutex_unlock(&state_lock); + break; + default: as->suspend_state = SUSPEND_WAIT; mutex_unlock(&state_lock); /* * Otherwise it is a request to suspend the system. - * Queue an event for all readers, and expect an - * acknowledge from all writers who haven't already - * acknowledged. - */ - err = queue_suspend_event(APM_USER_SUSPEND, as); - if (err < 0) { - /* - * Avoid taking the lock here - this - * should be fine. - */ - as->suspend_state = SUSPEND_NONE; - break; - } - - if (err > 0) - apm_suspend(); - - /* - * Wait for the suspend/resume to complete. If there - * are pending acknowledges, we wait here for them. + * Just invoke pm_suspend(), we'll handle it from + * there via the notifier. */ - wait_event_freezable(apm_suspend_waitqueue, - as->suspend_state == SUSPEND_DONE); + as->suspend_result = pm_suspend(PM_SUSPEND_MEM); } mutex_lock(&state_lock); @@ -386,7 +341,6 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) static int apm_release(struct inode * inode, struct file * filp) { struct apm_user *as = filp->private_data; - int pending = 0; filp->private_data = NULL; @@ -396,18 +350,15 @@ static int apm_release(struct inode * inode, struct file * filp) /* * We are now unhooked from the chain. As far as new - * events are concerned, we no longer exist. However, we - * need to balance suspends_pending, which means the - * possibility of sleeping. + * events are concerned, we no longer exist. */ mutex_lock(&state_lock); - if (as->suspend_state != SUSPEND_NONE) { - suspends_pending -= 1; - pending = suspends_pending == 0; - } + if (as->suspend_state == SUSPEND_PENDING || + as->suspend_state == SUSPEND_READ) + atomic_dec(&suspend_acks_pending); mutex_unlock(&state_lock); - if (pending) - apm_suspend(); + + wake_up(&apm_suspend_waitqueue); kfree(as); return 0; @@ -545,7 +496,6 @@ static int kapmd(void *arg) { do { apm_event_t event; - int ret; wait_event_interruptible(kapmd_wait, !queue_empty(&kapmd_queue) || kthread_should_stop()); @@ -570,20 +520,13 @@ static int kapmd(void *arg) case APM_USER_SUSPEND: case APM_SYS_SUSPEND: - ret = queue_suspend_event(event, NULL); - if (ret < 0) { - /* - * We were busy. Try again in 50ms. - */ - queue_add_event(&kapmd_queue, event); - msleep(50); - } - if (ret > 0) - apm_suspend(); + pm_suspend(PM_SUSPEND_MEM); break; case APM_CRITICAL_SUSPEND: - apm_suspend(); + atomic_inc(&userspace_notification_inhibit); + pm_suspend(PM_SUSPEND_MEM); + atomic_dec(&userspace_notification_inhibit); break; } } while (1); @@ -591,6 +534,120 @@ static int kapmd(void *arg) return 0; } +static int apm_suspend_notifier(struct notifier_block *nb, + unsigned long event, + void *dummy) +{ + struct apm_user *as; + int err; + + /* short-cut emergency suspends */ + if (atomic_read(&userspace_notification_inhibit)) + return NOTIFY_DONE; + + switch (event) { + case PM_SUSPEND_PREPARE: + /* + * Queue an event to all "writer" users that we want + * to suspend and need their ack. + */ + mutex_lock(&state_lock); + down_read(&user_list_lock); + + list_for_each_entry(as, &apm_user_list, list) { + if (as->suspend_state != SUSPEND_WAIT && as->reader && + as->writer && as->suser) { + as->suspend_state = SUSPEND_PENDING; + atomic_inc(&suspend_acks_pending); + queue_add_event(&as->queue, APM_USER_SUSPEND); + } + } + + up_read(&user_list_lock); + mutex_unlock(&state_lock); + wake_up_interruptible(&apm_waitqueue); + + /* + * Wait for the the suspend_acks_pending variable to drop to + * zero, meaning everybody acked the suspend event (or the + * process was killed.) + * + * If the app won't answer within a short while we assume it + * locked up and ignore it. + */ + err = wait_event_interruptible_timeout( + apm_suspend_waitqueue, + atomic_read(&suspend_acks_pending) == 0, + 5*HZ); + + /* timed out */ + if (err == 0) { + /* + * Move anybody who timed out to "ack timeout" state. + * + * We could time out and the userspace does the ACK + * right after we time out but before we enter the + * locked section here, but that's fine. + */ + mutex_lock(&state_lock); + down_read(&user_list_lock); + list_for_each_entry(as, &apm_user_list, list) { + if (as->suspend_state == SUSPEND_PENDING || + as->suspend_state == SUSPEND_READ) { + as->suspend_state = SUSPEND_ACKTO; + atomic_dec(&suspend_acks_pending); + } + } + up_read(&user_list_lock); + mutex_unlock(&state_lock); + } + + /* let suspend proceed */ + if (err >= 0) + return NOTIFY_OK; + + /* interrupted by signal */ + return NOTIFY_BAD; + + case PM_POST_SUSPEND: + /* + * Anyone on the APM queues will think we're still suspended. + * Send a message so everyone knows we're now awake again. + */ + queue_event(APM_NORMAL_RESUME); + + /* + * Finally, wake up anyone who is sleeping on the suspend. + */ + mutex_lock(&state_lock); + down_read(&user_list_lock); + list_for_each_entry(as, &apm_user_list, list) { + if (as->suspend_state == SUSPEND_ACKED) { + /* + * TODO: maybe grab error code, needs core + * changes to push the error to the notifier + * chain (could use the second parameter if + * implemented) + */ + as->suspend_result = 0; + as->suspend_state = SUSPEND_DONE; + } + } + up_read(&user_list_lock); + mutex_unlock(&state_lock); + + wake_up(&apm_suspend_waitqueue); + return NOTIFY_OK; + + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block apm_notif_block = { + .notifier_call = apm_suspend_notifier, +}; + static int __init apm_init(void) { int ret; @@ -604,7 +661,7 @@ static int __init apm_init(void) if (IS_ERR(kapmd_tsk)) { ret = PTR_ERR(kapmd_tsk); kapmd_tsk = NULL; - return ret; + goto out; } wake_up_process(kapmd_tsk); @@ -613,16 +670,27 @@ static int __init apm_init(void) #endif ret = misc_register(&apm_device); - if (ret != 0) { - remove_proc_entry("apm", NULL); - kthread_stop(kapmd_tsk); - } + if (ret) + goto out_stop; + ret = register_pm_notifier(&apm_notif_block); + if (ret) + goto out_unregister; + + return 0; + + out_unregister: + misc_deregister(&apm_device); + out_stop: + remove_proc_entry("apm", NULL); + kthread_stop(kapmd_tsk); + out: return ret; } static void __exit apm_exit(void) { + unregister_pm_notifier(&apm_notif_block); misc_deregister(&apm_device); remove_proc_entry("apm", NULL); diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index cf707c8f08d4..15b09b89588a 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -98,6 +98,9 @@ if BLK_DEV_IDE comment "Please see Documentation/ide/ide.txt for help/info on IDE drives" +config IDE_TIMINGS + bool + config IDE_ATAPI bool @@ -326,6 +329,7 @@ config BLK_DEV_PLATFORM config BLK_DEV_CMD640 tristate "CMD640 chipset bugfix/support" depends on X86 + select IDE_TIMINGS ---help--- The CMD-Technologies CMD640 IDE chip is used on many common 486 and Pentium motherboards, usually in combination with a "Neptune" or @@ -455,6 +459,7 @@ config BLK_DEV_AEC62XX config BLK_DEV_ALI15X3 tristate "ALI M15x3 chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver ensures (U)DMA support for ALI 1533, 1543 and 1543C @@ -469,6 +474,7 @@ config BLK_DEV_ALI15X3 config BLK_DEV_AMD74XX tristate "AMD and nVidia IDE support" depends on !ARM + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver adds explicit support for AMD-7xx and AMD-8111 chips @@ -489,6 +495,7 @@ config BLK_DEV_ATIIXP config BLK_DEV_CMD64X tristate "CMD64{3|6|8|9} chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help Say Y here if you have an IDE controller which uses any of these @@ -503,6 +510,7 @@ config BLK_DEV_TRIFLEX config BLK_DEV_CY82C693 tristate "CY82C693 chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver adds detection and support for the CY82C693 chipset @@ -695,6 +703,7 @@ config BLK_DEV_SIS5513 config BLK_DEV_SL82C105 tristate "Winbond SL82c105 support" depends on (PPC || ARM) + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help If you have a Winbond SL82c105 IDE controller, say Y here to enable @@ -725,6 +734,7 @@ config BLK_DEV_TRM290 config BLK_DEV_VIA82CXXX tristate "VIA82CXXX chipset support" + select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help This driver adds explicit support for VIA BusMastering IDE chips. @@ -751,6 +761,7 @@ endif config BLK_DEV_IDE_PMAC tristate "PowerMac on-board IDE support" depends on PPC_PMAC && IDE=y && BLK_DEV_IDE=y + select IDE_TIMINGS help This driver provides support for the on-board IDE controller on most of the recent Apple Power Macintoshes and PowerBooks. @@ -829,13 +840,6 @@ config BLK_DEV_IDE_RAPIDE Say Y here if you want to support the Yellowstone RapIDE controller manufactured for use with Acorn computers. -config BLK_DEV_IDE_BAST - tristate "Simtec BAST / Thorcom VR1000 IDE support" - depends on ARM && (ARCH_BAST || MACH_VR1000) - help - Say Y here if you want to support the onboard IDE channels on the - Simtec BAST or the Thorcom VR1000 - config IDE_H8300 tristate "H8300 IDE support" depends on H8300 @@ -919,51 +923,12 @@ config BLK_DEV_Q40IDE config BLK_DEV_PALMCHIP_BK3710 tristate "Palmchip bk3710 IDE controller support" depends on ARCH_DAVINCI + select IDE_TIMINGS select BLK_DEV_IDEDMA_SFF help Say Y here if you want to support the onchip IDE controller on the TI DaVinci SoC - -config BLK_DEV_MPC8xx_IDE - tristate "MPC8xx IDE support" - depends on 8xx && (LWMON || IVMS8 || IVML24 || TQM8xxL) && IDE=y && BLK_DEV_IDE=y && !PPC_MERGE - help - This option provides support for IDE on Motorola MPC8xx Systems. - Please see 'Type of MPC8xx IDE interface' for details. - - If unsure, say N. - -choice - prompt "Type of MPC8xx IDE interface" - depends on BLK_DEV_MPC8xx_IDE - default IDE_8xx_PCCARD - -config IDE_8xx_PCCARD - bool "8xx_PCCARD" - ---help--- - Select how the IDE devices are connected to the MPC8xx system: - - 8xx_PCCARD uses the 8xx internal PCMCIA interface in combination - with a PC Card (e.g. ARGOSY portable Hard Disk Adapter), - ATA PC Card HDDs or ATA PC Flash Cards (example: TQM8xxL - systems) - - 8xx_DIRECT is used for directly connected IDE devices using the 8xx - internal PCMCIA interface (example: IVMS8 systems) - - EXT_DIRECT is used for IDE devices directly connected to the 8xx - bus using some glue logic, but _not_ the 8xx internal - PCMCIA interface (example: IDIF860 systems) - -config IDE_8xx_DIRECT - bool "8xx_DIRECT" - -config IDE_EXT_DIRECT - bool "EXT_DIRECT" - -endchoice - # no isa -> no vlb if ISA && (ALPHA || X86 || MIPS) @@ -981,6 +946,7 @@ config BLK_DEV_4DRIVES config BLK_DEV_ALI14XX tristate "ALI M14xx support" + select IDE_TIMINGS help This driver is enabled at runtime using the "ali14xx.probe" kernel boot parameter. It enables support for the secondary IDE interface @@ -1000,6 +966,7 @@ config BLK_DEV_DTC2278 config BLK_DEV_HT6560B tristate "Holtek HT6560B support" + select IDE_TIMINGS help This driver is enabled at runtime using the "ht6560b.probe" kernel boot parameter. It enables support for the secondary IDE interface @@ -1009,6 +976,7 @@ config BLK_DEV_HT6560B config BLK_DEV_QD65XX tristate "QDI QD65xx support" + select IDE_TIMINGS help This driver is enabled at runtime using the "qd65xx.probe" kernel boot parameter. It permits faster I/O speeds to be set. See the @@ -1032,30 +1000,4 @@ config BLK_DEV_IDEDMA endif -config BLK_DEV_HD_ONLY - bool "Old hard disk (MFM/RLL/IDE) driver" - depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN - help - There are two drivers for MFM/RLL/IDE hard disks. Most people use - the newer enhanced driver, but this old one is still around for two - reasons. Some older systems have strange timing problems and seem to - work only with the old driver (which itself does not work with some - newer systems). The other reason is that the old driver is smaller, - since it lacks the enhanced functionality of the new one. This makes - it a good choice for systems with very tight memory restrictions, or - for systems with only older MFM/RLL/ESDI drives. Choosing the old - driver can save 13 KB or so of kernel memory. - - If you want to use this driver together with the new one you have - to use "hda=noprobe hdb=noprobe" kernel parameters to prevent the new - driver from probing the primary interface. - - If you are unsure, then just choose the Enhanced IDE/MFM/RLL driver - instead of this one. For more detailed information, read the - Disk-HOWTO, available from - . - -config BLK_DEV_HD - def_bool BLK_DEV_HD_ONLY - endif # IDE diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index a2b3f84d710d..5d414e301a5a 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -11,9 +11,11 @@ EXTRA_CFLAGS += -Idrivers/ide -ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o +ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o \ + ide-pio-blacklist.o # core IDE code +ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o ide-core-$(CONFIG_IDE_ATAPI) += ide-atapi.o ide-core-$(CONFIG_BLK_DEV_IDEPCI) += setup-pci.o ide-core-$(CONFIG_BLK_DEV_IDEDMA) += ide-dma.o @@ -59,9 +61,3 @@ ifeq ($(CONFIG_BLK_DEV_PLATFORM), y) endif obj-$(CONFIG_BLK_DEV_IDE) += arm/ mips/ - -# old hd driver must be last -ifeq ($(CONFIG_BLK_DEV_HD), y) - hd-core-y += legacy/hd.o - obj-y += hd-core.o -endif diff --git a/drivers/ide/arm/Makefile b/drivers/ide/arm/Makefile index 936e7b0237f5..5bc26053afa6 100644 --- a/drivers/ide/arm/Makefile +++ b/drivers/ide/arm/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_BLK_DEV_IDE_ICSIDE) += icside.o obj-$(CONFIG_BLK_DEV_IDE_RAPIDE) += rapide.o -obj-$(CONFIG_BLK_DEV_IDE_BAST) += bast-ide.o obj-$(CONFIG_BLK_DEV_PALMCHIP_BK3710) += palm_bk3710.o ifeq ($(CONFIG_IDE_ARM), m) diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c deleted file mode 100644 index 8e8c28104b45..000000000000 --- a/drivers/ide/arm/bast-ide.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2003-2004 Simtec Electronics - * Ben Dooks - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * -*/ - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#define DRV_NAME "bast-ide" - -static int __init bastide_register(unsigned int base, unsigned int aux, int irq) -{ - ide_hwif_t *hwif; - hw_regs_t hw; - int i; - u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; - - memset(&hw, 0, sizeof(hw)); - - base += BAST_IDE_CS; - aux += BAST_IDE_CS; - - for (i = 0; i <= 7; i++) { - hw.io_ports_array[i] = (unsigned long)base; - base += 0x20; - } - - hw.io_ports.ctl_addr = aux + (6 * 0x20); - hw.irq = irq; - hw.chipset = ide_generic; - - hwif = ide_find_port(); - if (hwif == NULL) - goto out; - - i = hwif->index; - - ide_init_port_data(hwif, i); - ide_init_port_hw(hwif, &hw); - hwif->port_ops = NULL; - - idx[0] = i; - - ide_device_add(idx, NULL); -out: - return 0; -} - -static int __init bastide_init(void) -{ - unsigned long base = BAST_VA_IDEPRI + BAST_IDE_CS; - - /* we can treat the VR1000 and the BAST the same */ - - if (!(machine_is_bast() || machine_is_vr1000())) - return 0; - - printk("BAST: IDE driver, (c) 2003-2004 Simtec Electronics\n"); - - if (!request_mem_region(base, 0x400000, DRV_NAME)) { - printk(KERN_ERR "%s: resources busy\n", DRV_NAME); - return -EBUSY; - } - - bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0); - bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1); - - return 0; -} - -module_init(bastide_init); - -MODULE_AUTHOR("Ben Dooks "); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Simtec BAST / Thorcom VR1000 IDE driver"); diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index 061456914ca3..52f58c885783 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c @@ -21,6 +21,8 @@ #include #include +#define DRV_NAME "icside" + #define ICS_IDENT_OFFSET 0x2280 #define ICS_ARCIN_V5_INTRSTAT 0x0000 @@ -68,6 +70,7 @@ struct icside_state { unsigned int enabled; void __iomem *irq_port; void __iomem *ioc_base; + unsigned int sel; unsigned int type; ide_hwif_t *hwif[2]; }; @@ -165,7 +168,8 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = { static void icside_maskproc(ide_drive_t *drive, int mask) { ide_hwif_t *hwif = HWIF(drive); - struct icside_state *state = hwif->hwif_data; + struct expansion_card *ec = ECARD_DEV(hwif->dev); + struct icside_state *state = ecard_get_drvdata(ec); unsigned long flags; local_irq_save(flags); @@ -308,6 +312,7 @@ static int icside_dma_setup(ide_drive_t *drive) { ide_hwif_t *hwif = HWIF(drive); struct expansion_card *ec = ECARD_DEV(hwif->dev); + struct icside_state *state = ecard_get_drvdata(ec); struct request *rq = hwif->hwgroup->rq; unsigned int dma_mode; @@ -331,7 +336,7 @@ static int icside_dma_setup(ide_drive_t *drive) /* * Route the DMA signals to the correct interface. */ - writeb(hwif->select_data, hwif->config_data); + writeb(state->sel | hwif->channel, state->ioc_base); /* * Select the correct timing for this drive. @@ -359,7 +364,8 @@ static void icside_dma_exec_cmd(ide_drive_t *drive, u8 cmd) static int icside_dma_test_irq(ide_drive_t *drive) { ide_hwif_t *hwif = HWIF(drive); - struct icside_state *state = hwif->hwif_data; + struct expansion_card *ec = ECARD_DEV(hwif->dev); + struct icside_state *state = ecard_get_drvdata(ec); return readb(state->irq_port + (hwif->channel ? @@ -411,36 +417,24 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d) return -EOPNOTSUPP; } -static ide_hwif_t * -icside_setup(void __iomem *base, struct cardinfo *info, struct expansion_card *ec) +static void icside_setup_ports(hw_regs_t *hw, void __iomem *base, + struct cardinfo *info, struct expansion_card *ec) { unsigned long port = (unsigned long)base + info->dataoffset; - ide_hwif_t *hwif; - hwif = ide_find_port(); - if (hwif) { - /* - * Ensure we're using MMIO - */ - default_hwif_mmiops(hwif); - - hwif->io_ports.data_addr = port; - hwif->io_ports.error_addr = port + (1 << info->stepping); - hwif->io_ports.nsect_addr = port + (2 << info->stepping); - hwif->io_ports.lbal_addr = port + (3 << info->stepping); - hwif->io_ports.lbam_addr = port + (4 << info->stepping); - hwif->io_ports.lbah_addr = port + (5 << info->stepping); - hwif->io_ports.device_addr = port + (6 << info->stepping); - hwif->io_ports.status_addr = port + (7 << info->stepping); - hwif->io_ports.ctl_addr = - (unsigned long)base + info->ctrloffset; - hwif->irq = ec->irq; - hwif->chipset = ide_acorn; - hwif->gendev.parent = &ec->dev; - hwif->dev = &ec->dev; - } - - return hwif; + hw->io_ports.data_addr = port; + hw->io_ports.error_addr = port + (1 << info->stepping); + hw->io_ports.nsect_addr = port + (2 << info->stepping); + hw->io_ports.lbal_addr = port + (3 << info->stepping); + hw->io_ports.lbam_addr = port + (4 << info->stepping); + hw->io_ports.lbah_addr = port + (5 << info->stepping); + hw->io_ports.device_addr = port + (6 << info->stepping); + hw->io_ports.status_addr = port + (7 << info->stepping); + hw->io_ports.ctl_addr = (unsigned long)base + info->ctrloffset; + + hw->irq = ec->irq; + hw->dev = &ec->dev; + hw->chipset = ide_acorn; } static int __init @@ -449,6 +443,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) ide_hwif_t *hwif; void __iomem *base; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; + hw_regs_t hw; base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0); if (!base) @@ -466,12 +461,19 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec) */ icside_irqdisable_arcin_v5(ec, 0); - hwif = icside_setup(base, &icside_cardinfo_v5, ec); + icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec); + + hwif = ide_find_port(); if (!hwif) return -ENODEV; + ide_init_port_hw(hwif, &hw); + default_hwif_mmiops(hwif); + state->hwif[0] = hwif; + ecard_set_drvdata(ec, state); + idx[0] = hwif->index; ide_device_add(idx, NULL); @@ -497,6 +499,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) int ret; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; struct ide_port_info d = icside_v6_port_info; + hw_regs_t hw[2]; ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0); if (!ioc_base) { @@ -525,43 +528,47 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) state->irq_port = easi_base; state->ioc_base = ioc_base; + state->sel = sel; /* * Be on the safe side - disable interrupts */ icside_irqdisable_arcin_v6(ec, 0); + icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec); + icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec); + /* * Find and register the interfaces. */ - hwif = icside_setup(easi_base, &icside_cardinfo_v6_1, ec); - mate = icside_setup(easi_base, &icside_cardinfo_v6_2, ec); + hwif = ide_find_port(); + if (hwif == NULL) + return -ENODEV; - if (!hwif || !mate) { - ret = -ENODEV; - goto out; + ide_init_port_hw(hwif, &hw[0]); + default_hwif_mmiops(hwif); + + idx[0] = hwif->index; + + mate = ide_find_port(); + if (mate) { + ide_init_port_hw(mate, &hw[1]); + default_hwif_mmiops(mate); + + idx[1] = mate->index; } state->hwif[0] = hwif; state->hwif[1] = mate; - hwif->hwif_data = state; - hwif->config_data = (unsigned long)ioc_base; - hwif->select_data = sel; - - mate->hwif_data = state; - mate->config_data = (unsigned long)ioc_base; - mate->select_data = sel | 1; + ecard_set_drvdata(ec, state); - if (ec->dma != NO_DMA && !request_dma(ec->dma, hwif->name)) { + if (ec->dma != NO_DMA && !request_dma(ec->dma, DRV_NAME)) { d.init_dma = icside_dma_init; d.port_ops = &icside_v6_port_ops; d.dma_ops = NULL; } - idx[0] = hwif->index; - idx[1] = mate->index; - ide_device_add(idx, &d); return 0; @@ -627,10 +634,8 @@ icside_probe(struct expansion_card *ec, const struct ecard_id *id) break; } - if (ret == 0) { - ecard_set_drvdata(ec, state); + if (ret == 0) goto out; - } kfree(state); release: diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c index 3839f5722985..c79b85b6e4a3 100644 --- a/drivers/ide/arm/palm_bk3710.c +++ b/drivers/ide/arm/palm_bk3710.c @@ -74,8 +74,6 @@ struct palm_bk3710_udmatiming { #define BK3710_IORDYTMP 0x78 #define BK3710_IORDYTMS 0x7C -#include "../ide-timing.h" - static unsigned ideclk_period; /* in nanoseconds */ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = { @@ -402,7 +400,6 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev) i = hwif->index; - ide_init_port_data(hwif, i); ide_init_port_hw(hwif, &hw); default_hwif_mmiops(hwif); diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c index 1747b2358775..43057e0303c8 100644 --- a/drivers/ide/arm/rapide.c +++ b/drivers/ide/arm/rapide.c @@ -11,6 +11,10 @@ #include +static struct const ide_port_info rapide_port_info = { + .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, +}; + static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base, void __iomem *ctrl, unsigned int sz, int irq) { @@ -44,25 +48,26 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) goto release; } - hwif = ide_find_port(); - if (hwif) { - memset(&hw, 0, sizeof(hw)); - rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); - hw.chipset = ide_generic; - hw.dev = &ec->dev; + memset(&hw, 0, sizeof(hw)); + rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); + hw.chipset = ide_generic; + hw.dev = &ec->dev; - ide_init_port_hw(hwif, &hw); + hwif = ide_find_port(); + if (hwif == NULL) { + ret = -ENOENT; + goto release; + } - hwif->host_flags = IDE_HFLAG_MMIO; - default_hwif_mmiops(hwif); + ide_init_port_hw(hwif, &hw); + default_hwif_mmiops(hwif); - idx[0] = hwif->index; + idx[0] = hwif->index; - ide_device_add(idx, NULL); + ide_device_add(idx, &rapide_port_info); - ecard_set_drvdata(ec, hwif); - goto out; - } + ecard_set_drvdata(ec, hwif); + goto out; release: ecard_release_resources(ec); diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index ae37ee58bae2..20fad6d542cc 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -8,6 +8,8 @@ #include #include +#define DRV_NAME "ide-h8300" + #define bswap(d) \ ({ \ u16 r; \ @@ -176,6 +178,10 @@ static inline void hwif_setup(ide_hwif_t *hwif) hwif->output_data = h8300_output_data; } +static const struct ide_port_info h8300_port_info = { + .host_flags = IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA, +}; + static int __init h8300_ide_init(void) { hw_regs_t hw; @@ -183,6 +189,8 @@ static int __init h8300_ide_init(void) int index; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; + printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n"); + if (!request_region(CONFIG_H8300_IDE_BASE, H8300_IDE_GAP*8, "ide-h8300")) goto out_busy; if (!request_region(CONFIG_H8300_IDE_ALT, H8300_IDE_GAP, "ide-h8300")) { @@ -192,22 +200,17 @@ static int __init h8300_ide_init(void) hw_setup(&hw); - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR "ide-h8300: IDE I/F register failed\n"); + hwif = ide_find_port_slot(&h8300_port_info); + if (hwif == NULL) return -ENOENT; - } index = hwif->index; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); hwif_setup(hwif); - hwif->host_flags = IDE_HFLAG_NO_IO_32BIT; - printk(KERN_INFO "ide%d: H8/300 generic IDE interface\n", index); idx[0] = index; - ide_device_add(idx, NULL); + ide_device_add(idx, &h8300_port_info); return 0; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index d99847157186..6e29dd532090 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -517,14 +517,9 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, int xferlen, ide_handler_t *handler) { - ide_startstop_t startstop; struct cdrom_info *info = drive->driver_data; ide_hwif_t *hwif = drive->hwif; - /* wait for the controller to be idle */ - if (ide_wait_stat(&startstop, drive, 0, BUSY_STAT, WAIT_READY)) - return startstop; - /* FIXME: for Virtual DMA we must check harder */ if (info->dma) info->dma = !hwif->dma_ops->dma_setup(drive); @@ -603,28 +598,6 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, return ide_started; } -/* - * Block read functions. - */ -static void ide_cd_pad_transfer(ide_drive_t *drive, xfer_func_t *xf, int len) -{ - while (len > 0) { - int dum = 0; - xf(drive, NULL, &dum, sizeof(dum)); - len -= sizeof(dum); - } -} - -static void ide_cd_drain_data(ide_drive_t *drive, int nsects) -{ - while (nsects > 0) { - static char dum[SECTOR_SIZE]; - - drive->hwif->input_data(drive, NULL, dum, sizeof(dum)); - nsects--; - } -} - /* * Check the contents of the interrupt reason register from the cdrom * and attempt to recover if there are problems. Returns 0 if everything's @@ -640,15 +613,12 @@ static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq, if (ireason == (!rw << 1)) return 0; else if (ireason == (rw << 1)) { - ide_hwif_t *hwif = drive->hwif; - xfer_func_t *xf; /* whoops... */ printk(KERN_ERR "%s: %s: wrong transfer direction!\n", drive->name, __func__); - xf = rw ? hwif->output_data : hwif->input_data; - ide_cd_pad_transfer(drive, xf, len); + ide_pad_transfer(drive, rw, len); } else if (rw == 0 && ireason == 1) { /* * Some drives (ASUS) seem to tell us that status info is @@ -696,16 +666,9 @@ static int ide_cd_check_transfer_size(ide_drive_t *drive, int len) static ide_startstop_t cdrom_newpc_intr(ide_drive_t *); -/* - * Routine to send a read/write packet command to the drive. This is usually - * called directly from cdrom_start_{read,write}(). However, for drq_interrupt - * devices, it is called from an interrupt when the drive is ready to accept - * the command. - */ -static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) +static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, + struct request *rq) { - struct request *rq = HWGROUP(drive)->rq; - if (rq_data_dir(rq) == READ) { unsigned short sectors_per_frame = queue_hardsect_size(drive->queue) >> SECTOR_BITS; @@ -742,6 +705,19 @@ static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) /* set up the command */ rq->timeout = ATAPI_WAIT_PC; + return ide_started; +} + +/* + * Routine to send a read/write packet command to the drive. This is usually + * called directly from cdrom_start_{read,write}(). However, for drq_interrupt + * devices, it is called from an interrupt when the drive is ready to accept + * the command. + */ +static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) +{ + struct request *rq = drive->hwif->hwgroup->rq; + /* send the command to the drive and return */ return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); } @@ -768,9 +744,8 @@ static ide_startstop_t cdrom_seek_intr(ide_drive_t *drive) return ide_stopped; } -static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) +static void ide_cd_prepare_seek_request(ide_drive_t *drive, struct request *rq) { - struct request *rq = HWGROUP(drive)->rq; sector_t frame = rq->sector; sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS); @@ -780,17 +755,13 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]); rq->timeout = ATAPI_WAIT_PC; - return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr); } -static ide_startstop_t cdrom_start_seek(ide_drive_t *drive, unsigned int block) +static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) { - struct cdrom_info *info = drive->driver_data; + struct request *rq = drive->hwif->hwgroup->rq; - info->dma = 0; - info->start_seek = jiffies; - return cdrom_start_packet_command(drive, 0, - cdrom_start_seek_continuation); + return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr); } /* @@ -1011,7 +982,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) - bio_cur_sectors(rq->bio), thislen >> 9); if (nskip > 0) { - ide_cd_drain_data(drive, nskip); + ide_pad_transfer(drive, write, nskip << 9); rq->current_nr_sectors -= nskip; thislen -= (nskip << 9); } @@ -1048,7 +1019,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) * If the buffers are full, pipe the rest into * oblivion. */ - ide_cd_drain_data(drive, thislen >> 9); + ide_pad_transfer(drive, 0, thislen); else { printk(KERN_ERR "%s: confused, missing data\n", drive->name); @@ -1096,7 +1067,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) /* pad, if necessary */ if (!blk_fs_request(rq) && len > 0) - ide_cd_pad_transfer(drive, xferfunc, len); + ide_pad_transfer(drive, write, len); if (blk_pc_request(rq)) { timeout = rq->timeout; @@ -1165,21 +1136,17 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) if (write) cd->devinfo.media_written = 1; - /* start sending the read/write request to the drive */ - return cdrom_start_packet_command(drive, 32768, cdrom_start_rw_cont); + return ide_started; } static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) { struct request *rq = HWGROUP(drive)->rq; - if (!rq->timeout) - rq->timeout = ATAPI_WAIT_PC; - return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); } -static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) +static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { struct cdrom_info *info = drive->driver_data; @@ -1191,10 +1158,16 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) info->dma = 0; /* sg request */ - if (rq->bio) { - int mask = drive->queue->dma_alignment; - unsigned long addr = - (unsigned long)page_address(bio_page(rq->bio)); + if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + struct request_queue *q = drive->queue; + unsigned int alignment; + unsigned long addr; + unsigned long stack_mask = ~(THREAD_SIZE - 1); + + if (rq->bio) + addr = (unsigned long)bio_data(rq->bio); + else + addr = (unsigned long)rq->data; info->dma = drive->using_dma; @@ -1204,23 +1177,25 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) * NOTE! The "len" and "addr" checks should possibly have * separate masks. */ - if ((rq->data_len & 15) || (addr & mask)) + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + if (addr & alignment || rq->data_len & alignment) info->dma = 0; - } - /* start sending the command to the drive */ - return cdrom_start_packet_command(drive, rq->data_len, - cdrom_do_newpc_cont); + if (!((addr & stack_mask) ^ + ((unsigned long)current->stack & stack_mask))) + info->dma = 0; + } } /* * cdrom driver request routine. */ -static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq, +static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { - ide_startstop_t action; struct cdrom_info *info = drive->driver_data; + ide_handler_t *fn; + int xferlen; if (blk_fs_request(rq)) { if (info->cd_flags & IDE_CD_FLAG_SEEKING) { @@ -1240,29 +1215,48 @@ static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq, } if (rq_data_dir(rq) == READ && IDE_LARGE_SEEK(info->last_block, block, - IDECD_SEEK_THRESHOLD) && - drive->dsc_overlap) - action = cdrom_start_seek(drive, block); - else - action = cdrom_start_rw(drive, rq); + IDECD_SEEK_THRESHOLD) && + drive->dsc_overlap) { + xferlen = 0; + fn = cdrom_start_seek_continuation; + + info->dma = 0; + info->start_seek = jiffies; + + ide_cd_prepare_seek_request(drive, rq); + } else { + xferlen = 32768; + fn = cdrom_start_rw_cont; + + if (cdrom_start_rw(drive, rq) == ide_stopped) + return ide_stopped; + + if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped) + return ide_stopped; + } info->last_block = block; - return action; } else if (blk_sense_request(rq) || blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { - return cdrom_do_block_pc(drive, rq); + xferlen = rq->data_len; + fn = cdrom_do_newpc_cont; + + if (!rq->timeout) + rq->timeout = ATAPI_WAIT_PC; + + cdrom_do_block_pc(drive, rq); } else if (blk_special_request(rq)) { /* right now this can only be a reset... */ cdrom_end_request(drive, 1); return ide_stopped; + } else { + blk_dump_rq_flags(rq, "ide-cd bad flags"); + cdrom_end_request(drive, 0); + return ide_stopped; } - blk_dump_rq_flags(rq, "ide-cd bad flags"); - cdrom_end_request(drive, 0); - return ide_stopped; + return cdrom_start_packet_command(drive, xferlen, fn); } - - /* * Ioctl handling. * @@ -1872,6 +1866,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); blk_queue_dma_alignment(drive->queue, 31); + blk_queue_update_dma_pad(drive->queue, 15); drive->queue->unplug_delay = (1 * HZ) / 1000; if (!drive->queue->unplug_delay) drive->queue->unplug_delay = 1; @@ -1954,10 +1949,9 @@ static ide_driver_t ide_cdrom_driver = { .version = IDECD_VERSION, .media = ide_cdrom, .supports_dsc_overlap = 1, - .do_request = ide_do_rw_cdrom, + .do_request = ide_cd_do_request, .end_request = ide_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idecd_proc, #endif diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 5f49a4ae9dd8..3a2e80237c10 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -985,7 +985,6 @@ static ide_driver_t idedisk_driver = { .do_request = ide_do_rw_disk, .end_request = ide_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idedisk_proc, #endif diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index b3689437269f..011d72011cc4 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -351,10 +351,7 @@ static void ide_floppy_callback(ide_drive_t *drive) static void idefloppy_init_pc(struct ide_atapi_pc *pc) { - memset(pc->c, 0, 12); - pc->retries = 0; - pc->flags = 0; - pc->req_xfer = 0; + memset(pc, 0, sizeof(*pc)); pc->buf = pc->pc_buf; pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE; pc->callback = ide_floppy_callback; @@ -561,12 +558,6 @@ static void idefloppy_create_start_stop_cmd(struct ide_atapi_pc *pc, int start) pc->c[4] = start; } -static void idefloppy_create_test_unit_ready_cmd(struct ide_atapi_pc *pc) -{ - idefloppy_init_pc(pc); - pc->c[0] = GPCMD_TEST_UNIT_READY; -} - static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, struct ide_atapi_pc *pc, struct request *rq, unsigned long sector) @@ -711,10 +702,10 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) set_disk_ro(floppy->disk, floppy->wp); page = &pc.buf[8]; - transfer_rate = be16_to_cpu(*(u16 *)&pc.buf[8 + 2]); - sector_size = be16_to_cpu(*(u16 *)&pc.buf[8 + 6]); - cyls = be16_to_cpu(*(u16 *)&pc.buf[8 + 8]); - rpm = be16_to_cpu(*(u16 *)&pc.buf[8 + 28]); + transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]); + sector_size = be16_to_cpup((__be16 *)&pc.buf[8 + 6]); + cyls = be16_to_cpup((__be16 *)&pc.buf[8 + 8]); + rpm = be16_to_cpup((__be16 *)&pc.buf[8 + 28]); heads = pc.buf[8 + 4]; sectors = pc.buf[8 + 5]; @@ -789,8 +780,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) for (i = 0; i < desc_cnt; i++) { unsigned int desc_start = 4 + i*8; - blocks = be32_to_cpu(*(u32 *)&pc.buf[desc_start]); - length = be16_to_cpu(*(u16 *)&pc.buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); debug_log("Descriptor %d: %dkB, %d blocks, %d sector size\n", i, blocks * length / 1024, blocks, length); @@ -911,8 +902,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) if (u_index >= u_array_size) break; /* User-supplied buffer too small */ - blocks = be32_to_cpu(*(u32 *)&pc.buf[desc_start]); - length = be16_to_cpu(*(u16 *)&pc.buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); if (put_user(blocks, argp)) return(-EFAULT); @@ -1138,7 +1129,6 @@ static ide_driver_t idefloppy_driver = { .do_request = idefloppy_do_request, .end_request = idefloppy_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idefloppy_proc, #endif @@ -1166,7 +1156,9 @@ static int idefloppy_open(struct inode *inode, struct file *filp) floppy->flags &= ~IDEFLOPPY_FLAG_FORMAT_IN_PROGRESS; /* Just in case */ - idefloppy_create_test_unit_ready_cmd(&pc); + idefloppy_init_pc(&pc); + pc.c[0] = GPCMD_TEST_UNIT_READY; + if (idefloppy_queue_pc_tail(drive, &pc)) { idefloppy_create_start_stop_cmd(&pc, 1); (void) idefloppy_queue_pc_tail(drive, &pc); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 28057747c1f8..661b75a89d4d 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -504,55 +504,6 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat) EXPORT_SYMBOL_GPL(ide_error); -ide_startstop_t __ide_abort(ide_drive_t *drive, struct request *rq) -{ - if (drive->media != ide_disk) - rq->errors |= ERROR_RESET; - - ide_kill_rq(drive, rq); - - return ide_stopped; -} - -EXPORT_SYMBOL_GPL(__ide_abort); - -/** - * ide_abort - abort pending IDE operations - * @drive: drive the error occurred on - * @msg: message to report - * - * ide_abort kills and cleans up when we are about to do a - * host initiated reset on active commands. Longer term we - * want handlers to have sensible abort handling themselves - * - * This differs fundamentally from ide_error because in - * this case the command is doing just fine when we - * blow it away. - */ - -ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg) -{ - struct request *rq; - - if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) - return ide_stopped; - - /* retry only "normal" I/O: */ - if (!blk_fs_request(rq)) { - rq->errors = 1; - ide_end_drive_cmd(drive, BUSY_STAT, 0); - return ide_stopped; - } - - if (rq->rq_disk) { - ide_driver_t *drv; - - drv = *(ide_driver_t **)rq->rq_disk->private_data; - return drv->abort(drive, rq); - } else - return __ide_abort(drive, rq); -} - static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) { tf->nsect = drive->sect; @@ -766,6 +717,18 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, return ide_stopped; } +static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) +{ + switch (rq->cmd[0]) { + case REQ_DRIVE_RESET: + return ide_do_reset(drive); + default: + blk_dump_rq_flags(rq, "ide_special_rq - bad request"); + ide_end_request(drive, 0, 0); + return ide_stopped; + } +} + static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { struct request_pm_state *pm = rq->data; @@ -869,7 +832,16 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) pm->pm_step == ide_pm_state_completed) ide_complete_pm_request(drive, rq); return startstop; - } + } else if (!rq->rq_disk && blk_special_request(rq)) + /* + * TODO: Once all ULDs have been modified to + * check for specific op codes rather than + * blindly accepting any special request, the + * check for ->rq_disk above may be replaced + * by a more suitable mechanism or even + * dropped entirely. + */ + return ide_special_rq(drive, rq); drv = *(ide_driver_t **)rq->rq_disk->private_data; return drv->do_request(drive, rq, block); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 80ad4f234f3f..44aaec256a30 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -905,6 +905,14 @@ void ide_execute_pkt_cmd(ide_drive_t *drive) } EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd); +static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) +{ + struct request *rq = drive->hwif->hwgroup->rq; + + if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) + ide_end_request(drive, err ? err : 1, 0); +} + /* needed below */ static ide_startstop_t do_reset1 (ide_drive_t *, int); @@ -940,7 +948,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) } /* done polling */ hwgroup->polling = 0; - hwgroup->resetting = 0; + ide_complete_drive_reset(drive, 0); return ide_stopped; } @@ -956,12 +964,14 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) ide_hwif_t *hwif = HWIF(drive); const struct ide_port_ops *port_ops = hwif->port_ops; u8 tmp; + int err = 0; if (port_ops && port_ops->reset_poll) { - if (port_ops->reset_poll(drive)) { + err = port_ops->reset_poll(drive); + if (err) { printk(KERN_ERR "%s: host reset_poll failure for %s.\n", hwif->name, drive->name); - return ide_stopped; + goto out; } } @@ -975,6 +985,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) } printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp); drive->failures++; + err = -EIO; } else { printk("%s: reset: ", hwif->name); tmp = ide_read_error(drive); @@ -1001,10 +1012,12 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) if (tmp & 0x80) printk("; slave: failed"); printk("\n"); + err = -EIO; } } +out: hwgroup->polling = 0; /* done polling */ - hwgroup->resetting = 0; /* done reset attempt */ + ide_complete_drive_reset(drive, err); return ide_stopped; } @@ -1090,7 +1103,6 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) /* For an ATAPI device, first try an ATAPI SRST. */ if (drive->media != ide_disk && !do_not_try_atapi) { - hwgroup->resetting = 1; pre_reset(drive); SELECT_DRIVE(drive); udelay (20); @@ -1112,10 +1124,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) if (io_ports->ctl_addr == 0) { spin_unlock_irqrestore(&ide_lock, flags); + ide_complete_drive_reset(drive, -ENXIO); return ide_stopped; } - hwgroup->resetting = 1; /* * Note that we also set nIEN while resetting the device, * to mask unwanted interrupts from the interface during the reset. diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 47af80df6872..13af72f09ec4 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -1,26 +1,11 @@ -#include #include #include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include - static const char *udma_str[] = { "UDMA/16", "UDMA/25", "UDMA/33", "UDMA/44", "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" }; @@ -90,142 +75,6 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed) return min(speed, mode); } -/* - * Standard (generic) timings for PIO modes, from ATA2 specification. - * These timings are for access to the IDE data port register *only*. - * Some drives may specify a mode, while also specifying a different - * value for cycle_time (from drive identification data). - */ -const ide_pio_timings_t ide_pio_timings[6] = { - { 70, 165, 600 }, /* PIO Mode 0 */ - { 50, 125, 383 }, /* PIO Mode 1 */ - { 30, 100, 240 }, /* PIO Mode 2 */ - { 30, 80, 180 }, /* PIO Mode 3 with IORDY */ - { 25, 70, 120 }, /* PIO Mode 4 with IORDY */ - { 20, 50, 100 } /* PIO Mode 5 with IORDY (nonstandard) */ -}; - -EXPORT_SYMBOL_GPL(ide_pio_timings); - -/* - * Shared data/functions for determining best PIO mode for an IDE drive. - * Most of this stuff originally lived in cmd640.c, and changes to the - * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid - * breaking the fragile cmd640.c support. - */ - -/* - * Black list. Some drives incorrectly report their maximal PIO mode, - * at least in respect to CMD640. Here we keep info on some known drives. - */ -static struct ide_pio_info { - const char *name; - int pio; -} ide_pio_blacklist [] = { - { "Conner Peripherals 540MB - CFS540A", 3 }, - - { "WDC AC2700", 3 }, - { "WDC AC2540", 3 }, - { "WDC AC2420", 3 }, - { "WDC AC2340", 3 }, - { "WDC AC2250", 0 }, - { "WDC AC2200", 0 }, - { "WDC AC21200", 4 }, - { "WDC AC2120", 0 }, - { "WDC AC2850", 3 }, - { "WDC AC1270", 3 }, - { "WDC AC1170", 1 }, - { "WDC AC1210", 1 }, - { "WDC AC280", 0 }, - { "WDC AC31000", 3 }, - { "WDC AC31200", 3 }, - - { "Maxtor 7131 AT", 1 }, - { "Maxtor 7171 AT", 1 }, - { "Maxtor 7213 AT", 1 }, - { "Maxtor 7245 AT", 1 }, - { "Maxtor 7345 AT", 1 }, - { "Maxtor 7546 AT", 3 }, - { "Maxtor 7540 AV", 3 }, - - { "SAMSUNG SHD-3121A", 1 }, - { "SAMSUNG SHD-3122A", 1 }, - { "SAMSUNG SHD-3172A", 1 }, - - { "ST5660A", 3 }, - { "ST3660A", 3 }, - { "ST3630A", 3 }, - { "ST3655A", 3 }, - { "ST3391A", 3 }, - { "ST3390A", 1 }, - { "ST3600A", 1 }, - { "ST3290A", 0 }, - { "ST3144A", 0 }, - { "ST3491A", 1 }, /* reports 3, should be 1 or 2 (depending on */ - /* drive) according to Seagates FIND-ATA program */ - - { "QUANTUM ELS127A", 0 }, - { "QUANTUM ELS170A", 0 }, - { "QUANTUM LPS240A", 0 }, - { "QUANTUM LPS210A", 3 }, - { "QUANTUM LPS270A", 3 }, - { "QUANTUM LPS365A", 3 }, - { "QUANTUM LPS540A", 3 }, - { "QUANTUM LIGHTNING 540A", 3 }, - { "QUANTUM LIGHTNING 730A", 3 }, - - { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */ - { "QUANTUM FIREBALL_640", 3 }, - { "QUANTUM FIREBALL_1080", 3 }, - { "QUANTUM FIREBALL_1280", 3 }, - { NULL, 0 } -}; - -/** - * ide_scan_pio_blacklist - check for a blacklisted drive - * @model: Drive model string - * - * This routine searches the ide_pio_blacklist for an entry - * matching the start/whole of the supplied model name. - * - * Returns -1 if no match found. - * Otherwise returns the recommended PIO mode from ide_pio_blacklist[]. - */ - -static int ide_scan_pio_blacklist (char *model) -{ - struct ide_pio_info *p; - - for (p = ide_pio_blacklist; p->name != NULL; p++) { - if (strncmp(p->name, model, strlen(p->name)) == 0) - return p->pio; - } - return -1; -} - -unsigned int ide_pio_cycle_time(ide_drive_t *drive, u8 pio) -{ - struct hd_driveid *id = drive->id; - int cycle_time = 0; - - if (id->field_valid & 2) { - if (id->capability & 8) - cycle_time = id->eide_pio_iordy; - else - cycle_time = id->eide_pio; - } - - /* conservative "downgrade" for all pre-ATA2 drives */ - if (pio < 3) { - if (cycle_time && cycle_time < ide_pio_timings[pio].cycle_time) - cycle_time = 0; /* use standard timing */ - } - - return cycle_time ? cycle_time : ide_pio_timings[pio].cycle_time; -} - -EXPORT_SYMBOL_GPL(ide_pio_cycle_time); - /** * ide_get_best_pio_mode - get PIO mode from drive * @drive: drive to consider diff --git a/drivers/ide/ide-pio-blacklist.c b/drivers/ide/ide-pio-blacklist.c new file mode 100644 index 000000000000..a8c2c8f8660a --- /dev/null +++ b/drivers/ide/ide-pio-blacklist.c @@ -0,0 +1,94 @@ +/* + * PIO blacklist. Some drives incorrectly report their maximal PIO mode, + * at least in respect to CMD640. Here we keep info on some known drives. + * + * Changes to the ide_pio_blacklist[] should be made with EXTREME CAUTION + * to avoid breaking the fragile cmd640.c support. + */ + +#include + +static struct ide_pio_info { + const char *name; + int pio; +} ide_pio_blacklist [] = { + { "Conner Peripherals 540MB - CFS540A", 3 }, + + { "WDC AC2700", 3 }, + { "WDC AC2540", 3 }, + { "WDC AC2420", 3 }, + { "WDC AC2340", 3 }, + { "WDC AC2250", 0 }, + { "WDC AC2200", 0 }, + { "WDC AC21200", 4 }, + { "WDC AC2120", 0 }, + { "WDC AC2850", 3 }, + { "WDC AC1270", 3 }, + { "WDC AC1170", 1 }, + { "WDC AC1210", 1 }, + { "WDC AC280", 0 }, + { "WDC AC31000", 3 }, + { "WDC AC31200", 3 }, + + { "Maxtor 7131 AT", 1 }, + { "Maxtor 7171 AT", 1 }, + { "Maxtor 7213 AT", 1 }, + { "Maxtor 7245 AT", 1 }, + { "Maxtor 7345 AT", 1 }, + { "Maxtor 7546 AT", 3 }, + { "Maxtor 7540 AV", 3 }, + + { "SAMSUNG SHD-3121A", 1 }, + { "SAMSUNG SHD-3122A", 1 }, + { "SAMSUNG SHD-3172A", 1 }, + + { "ST5660A", 3 }, + { "ST3660A", 3 }, + { "ST3630A", 3 }, + { "ST3655A", 3 }, + { "ST3391A", 3 }, + { "ST3390A", 1 }, + { "ST3600A", 1 }, + { "ST3290A", 0 }, + { "ST3144A", 0 }, + { "ST3491A", 1 }, /* reports 3, should be 1 or 2 (depending on drive) + according to Seagate's FIND-ATA program */ + + { "QUANTUM ELS127A", 0 }, + { "QUANTUM ELS170A", 0 }, + { "QUANTUM LPS240A", 0 }, + { "QUANTUM LPS210A", 3 }, + { "QUANTUM LPS270A", 3 }, + { "QUANTUM LPS365A", 3 }, + { "QUANTUM LPS540A", 3 }, + { "QUANTUM LIGHTNING 540A", 3 }, + { "QUANTUM LIGHTNING 730A", 3 }, + + { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */ + { "QUANTUM FIREBALL_640", 3 }, + { "QUANTUM FIREBALL_1080", 3 }, + { "QUANTUM FIREBALL_1280", 3 }, + { NULL, 0 } +}; + +/** + * ide_scan_pio_blacklist - check for a blacklisted drive + * @model: Drive model string + * + * This routine searches the ide_pio_blacklist for an entry + * matching the start/whole of the supplied model name. + * + * Returns -1 if no match found. + * Otherwise returns the recommended PIO mode from ide_pio_blacklist[]. + */ + +int ide_scan_pio_blacklist(char *model) +{ + struct ide_pio_info *p; + + for (p = ide_pio_blacklist; p->name != NULL; p++) { + if (strncmp(p->name, model, strlen(p->name)) == 0) + return p->pio; + } + return -1; +} diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c index adbd01784162..03f2ef5470a3 100644 --- a/drivers/ide/ide-pnp.c +++ b/drivers/ide/ide-pnp.c @@ -33,6 +33,8 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) ide_hwif_t *hwif; unsigned long base, ctl; + printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n"); + if (!(pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && pnp_irq_valid(dev, 0))) return -1; @@ -62,10 +64,8 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) u8 index = hwif->index; u8 idx[4] = { index, 0xff, 0xff, 0xff }; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); - printk(KERN_INFO "ide%d: generic PnP IDE interface\n", index); pnp_set_drvdata(dev, hwif); ide_device_add(idx, NULL); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index d21e51a02c3e..235ebdb29b28 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -39,6 +39,8 @@ #include #include +static ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */ + /** * generic_id - add a generic drive id * @drive: drive to make an ID block for @@ -1318,10 +1320,10 @@ static void ide_port_init_devices(ide_hwif_t *hwif) drive->unmask = 1; if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS) drive->no_unmask = 1; - } - if (port_ops && port_ops->port_init_devs) - port_ops->port_init_devs(hwif); + if (port_ops && port_ops->init_dev) + port_ops->init_dev(drive); + } } static void ide_init_port(ide_hwif_t *hwif, unsigned int port, @@ -1473,22 +1475,29 @@ ide_hwif_t *ide_find_port_slot(const struct ide_port_info *d) for (; i < MAX_HWIFS; i++) { hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown) - return hwif; + goto out_found; } } else { for (i = 2; i < MAX_HWIFS; i++) { hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown) - return hwif; + goto out_found; } for (i = 0; i < 2 && i < MAX_HWIFS; i++) { hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown) - return hwif; + goto out_found; } } + printk(KERN_ERR "%s: no free slot for interface\n", + d ? d->name : "ide"); + return NULL; + +out_found: + ide_init_port_data(hwif, i); + return hwif; } EXPORT_SYMBOL_GPL(ide_find_port_slot); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index f9cf1670e4e1..b711ab96e287 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2591,7 +2591,6 @@ static ide_driver_t idetape_driver = { .do_request = idetape_do_request, .end_request = idetape_end_request, .error = __ide_error, - .abort = __ide_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idetape_proc, #endif diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index cf55a48a7dd2..1fbdb746dc88 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -8,28 +8,18 @@ * The big the bad and the ugly. */ -#include #include #include #include -#include -#include #include #include -#include #include -#include -#include #include -#include #include #include #include -#include #include -#include -#include #include #include @@ -62,25 +52,6 @@ int taskfile_lib_get_identify (ide_drive_t *drive, u8 *buf) return ide_raw_taskfile(drive, &args, buf, 1); } -static int inline task_dma_ok(ide_task_t *task) -{ - if (blk_fs_request(task->rq) || (task->tf_flags & IDE_TFLAG_FLAGGED)) - return 1; - - switch (task->tf.command) { - case WIN_WRITEDMA_ONCE: - case WIN_WRITEDMA: - case WIN_WRITEDMA_EXT: - case WIN_READDMA_ONCE: - case WIN_READDMA: - case WIN_READDMA_EXT: - case WIN_IDENTIFY_DMA: - return 1; - } - - return 0; -} - static ide_startstop_t task_no_data_intr(ide_drive_t *); static ide_startstop_t set_geometry_intr(ide_drive_t *); static ide_startstop_t recal_intr(ide_drive_t *); @@ -139,8 +110,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) WAIT_WORSTCASE, NULL); return ide_started; default: - if (task_dma_ok(task) == 0 || drive->using_dma == 0 || - dma_ops->dma_setup(drive)) + if (drive->using_dma == 0 || dma_ops->dma_setup(drive)) return ide_stopped; dma_ops->dma_exec_cmd(drive, tf->command); dma_ops->dma_start(drive); @@ -183,7 +153,6 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) if (stat & (ERR_STAT|DRQ_STAT)) return ide_error(drive, "set_geometry_intr", stat); - BUG_ON(HWGROUP(drive)->handler != NULL); ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL); return ide_started; } diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h deleted file mode 100644 index 2e91c5870b4c..000000000000 --- a/drivers/ide/ide-timing.h +++ /dev/null @@ -1,217 +0,0 @@ -#ifndef _IDE_TIMING_H -#define _IDE_TIMING_H - -/* - * Copyright (c) 1999-2001 Vojtech Pavlik - */ - -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Should you need to contact me, the author, you can do so either by - * e-mail - mail your message to , or by paper mail: - * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic - */ - -#include -#include - -#define XFER_PIO_5 0x0d -#define XFER_UDMA_SLOW 0x4f - -struct ide_timing { - short mode; - short setup; /* t1 */ - short act8b; /* t2 for 8-bit io */ - short rec8b; /* t2i for 8-bit io */ - short cyc8b; /* t0 for 8-bit io */ - short active; /* t2 or tD */ - short recover; /* t2i or tK */ - short cycle; /* t0 */ - short udma; /* t2CYCTYP/2 */ -}; - -/* - * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). - * These were taken from ATA/ATAPI-6 standard, rev 0a, except - * for PIO 5, which is a nonstandard extension and UDMA6, which - * is currently supported only by Maxtor drives. - */ - -static struct ide_timing ide_timing[] = { - - { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, - { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, - { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, - { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, - - { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, - { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, - { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, - - { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 150 }, - - { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, - { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, - { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, - - { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, - { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, - { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, - - { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, - { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, - { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, - - { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, - { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, - { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, - - { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, - - { -1 } -}; - -#define IDE_TIMING_SETUP 0x01 -#define IDE_TIMING_ACT8B 0x02 -#define IDE_TIMING_REC8B 0x04 -#define IDE_TIMING_CYC8B 0x08 -#define IDE_TIMING_8BIT 0x0e -#define IDE_TIMING_ACTIVE 0x10 -#define IDE_TIMING_RECOVER 0x20 -#define IDE_TIMING_CYCLE 0x40 -#define IDE_TIMING_UDMA 0x80 -#define IDE_TIMING_ALL 0xff - -#define ENOUGH(v,unit) (((v)-1)/(unit)+1) -#define EZ(v,unit) ((v)?ENOUGH(v,unit):0) - -#define XFER_MODE 0xf0 -#define XFER_MWDMA 0x20 -#define XFER_EPIO 0x01 -#define XFER_PIO 0x00 - -static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, int T, int UT) -{ - q->setup = EZ(t->setup * 1000, T); - q->act8b = EZ(t->act8b * 1000, T); - q->rec8b = EZ(t->rec8b * 1000, T); - q->cyc8b = EZ(t->cyc8b * 1000, T); - q->active = EZ(t->active * 1000, T); - q->recover = EZ(t->recover * 1000, T); - q->cycle = EZ(t->cycle * 1000, T); - q->udma = EZ(t->udma * 1000, UT); -} - -static void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, struct ide_timing *m, unsigned int what) -{ - if (what & IDE_TIMING_SETUP ) m->setup = max(a->setup, b->setup); - if (what & IDE_TIMING_ACT8B ) m->act8b = max(a->act8b, b->act8b); - if (what & IDE_TIMING_REC8B ) m->rec8b = max(a->rec8b, b->rec8b); - if (what & IDE_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b); - if (what & IDE_TIMING_ACTIVE ) m->active = max(a->active, b->active); - if (what & IDE_TIMING_RECOVER) m->recover = max(a->recover, b->recover); - if (what & IDE_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle); - if (what & IDE_TIMING_UDMA ) m->udma = max(a->udma, b->udma); -} - -static struct ide_timing* ide_timing_find_mode(short speed) -{ - struct ide_timing *t; - - for (t = ide_timing; t->mode != speed; t++) - if (t->mode < 0) - return NULL; - return t; -} - -static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing *t, int T, int UT) -{ - struct hd_driveid *id = drive->id; - struct ide_timing *s, p; - -/* - * Find the mode. - */ - - if (!(s = ide_timing_find_mode(speed))) - return -EINVAL; - -/* - * Copy the timing from the table. - */ - - *t = *s; - -/* - * If the drive is an EIDE drive, it can tell us it needs extended - * PIO/MWDMA cycle timing. - */ - - if (id && id->field_valid & 2) { /* EIDE drive */ - - memset(&p, 0, sizeof(p)); - - switch (speed & XFER_MODE) { - - case XFER_PIO: - if (speed <= XFER_PIO_2) p.cycle = p.cyc8b = id->eide_pio; - else p.cycle = p.cyc8b = id->eide_pio_iordy; - break; - - case XFER_MWDMA: - p.cycle = id->eide_dma_min; - break; - } - - ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); - } - -/* - * Convert the timing to bus clock counts. - */ - - ide_timing_quantize(t, t, T, UT); - -/* - * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, S.M.A.R.T - * and some other commands. We have to ensure that the DMA cycle timing is - * slower/equal than the fastest PIO timing. - */ - - if ((speed & XFER_MODE) != XFER_PIO) { - u8 pio = ide_get_best_pio_mode(drive, 255, 5); - ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT); - ide_timing_merge(&p, t, t, IDE_TIMING_ALL); - } - -/* - * Lengthen active & recovery time so that cycle time is correct. - */ - - if (t->act8b + t->rec8b < t->cyc8b) { - t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2; - t->rec8b = t->cyc8b - t->act8b; - } - - if (t->active + t->recover < t->cycle) { - t->active += (t->cycle - (t->active + t->recover)) / 2; - t->recover = t->cycle - t->active; - } - - return 0; -} - -#endif diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c new file mode 100644 index 000000000000..8c2f8327f487 --- /dev/null +++ b/drivers/ide/ide-timings.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 1999-2001 Vojtech Pavlik + * Copyright (c) 2007-2008 Bartlomiej Zolnierkiewicz + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Should you need to contact me, the author, you can do so either by + * e-mail - mail your message to , or by paper mail: + * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic + */ + +#include +#include +#include +#include + +/* + * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds). + * These were taken from ATA/ATAPI-6 standard, rev 0a, except + * for PIO 5, which is a nonstandard extension and UDMA6, which + * is currently supported only by Maxtor drives. + */ + +static struct ide_timing ide_timing[] = { + + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, + { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, + + { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, + { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, + { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, + + { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, + { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, + { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, + + { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, + { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, + { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, + + { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, + { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, + { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, + + { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, + { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, + { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, + + { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, + + { 0xff } +}; + +struct ide_timing *ide_timing_find_mode(u8 speed) +{ + struct ide_timing *t; + + for (t = ide_timing; t->mode != speed; t++) + if (t->mode == 0xff) + return NULL; + return t; +} +EXPORT_SYMBOL_GPL(ide_timing_find_mode); + +u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio) +{ + struct hd_driveid *id = drive->id; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); + u16 cycle = 0; + + if (id->field_valid & 2) { + if (id->capability & 8) + cycle = id->eide_pio_iordy; + else + cycle = id->eide_pio; + + /* conservative "downgrade" for all pre-ATA2 drives */ + if (pio < 3 && cycle < t->cycle) + cycle = 0; /* use standard timing */ + } + + return cycle ? cycle : t->cycle; +} +EXPORT_SYMBOL_GPL(ide_pio_cycle_time); + +#define ENOUGH(v, unit) (((v) - 1) / (unit) + 1) +#define EZ(v, unit) ((v) ? ENOUGH(v, unit) : 0) + +static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, + int T, int UT) +{ + q->setup = EZ(t->setup * 1000, T); + q->act8b = EZ(t->act8b * 1000, T); + q->rec8b = EZ(t->rec8b * 1000, T); + q->cyc8b = EZ(t->cyc8b * 1000, T); + q->active = EZ(t->active * 1000, T); + q->recover = EZ(t->recover * 1000, T); + q->cycle = EZ(t->cycle * 1000, T); + q->udma = EZ(t->udma * 1000, UT); +} + +void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, + struct ide_timing *m, unsigned int what) +{ + if (what & IDE_TIMING_SETUP) + m->setup = max(a->setup, b->setup); + if (what & IDE_TIMING_ACT8B) + m->act8b = max(a->act8b, b->act8b); + if (what & IDE_TIMING_REC8B) + m->rec8b = max(a->rec8b, b->rec8b); + if (what & IDE_TIMING_CYC8B) + m->cyc8b = max(a->cyc8b, b->cyc8b); + if (what & IDE_TIMING_ACTIVE) + m->active = max(a->active, b->active); + if (what & IDE_TIMING_RECOVER) + m->recover = max(a->recover, b->recover); + if (what & IDE_TIMING_CYCLE) + m->cycle = max(a->cycle, b->cycle); + if (what & IDE_TIMING_UDMA) + m->udma = max(a->udma, b->udma); +} +EXPORT_SYMBOL_GPL(ide_timing_merge); + +int ide_timing_compute(ide_drive_t *drive, u8 speed, + struct ide_timing *t, int T, int UT) +{ + struct hd_driveid *id = drive->id; + struct ide_timing *s, p; + + /* + * Find the mode. + */ + s = ide_timing_find_mode(speed); + if (s == NULL) + return -EINVAL; + + /* + * Copy the timing from the table. + */ + *t = *s; + + /* + * If the drive is an EIDE drive, it can tell us it needs extended + * PIO/MWDMA cycle timing. + */ + if (id && id->field_valid & 2) { /* EIDE drive */ + + memset(&p, 0, sizeof(p)); + + if (speed <= XFER_PIO_2) + p.cycle = p.cyc8b = id->eide_pio; + else if (speed <= XFER_PIO_5) + p.cycle = p.cyc8b = id->eide_pio_iordy; + else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) + p.cycle = id->eide_dma_min; + + ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); + } + + /* + * Convert the timing to bus clock counts. + */ + ide_timing_quantize(t, t, T, UT); + + /* + * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, + * S.M.A.R.T and some other commands. We have to ensure that the + * DMA cycle timing is slower/equal than the fastest PIO timing. + */ + if (speed >= XFER_SW_DMA_0) { + u8 pio = ide_get_best_pio_mode(drive, 255, 5); + ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT); + ide_timing_merge(&p, t, t, IDE_TIMING_ALL); + } + + /* + * Lengthen active & recovery time so that cycle time is correct. + */ + if (t->act8b + t->rec8b < t->cyc8b) { + t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2; + t->rec8b = t->cyc8b - t->act8b; + } + + if (t->active + t->recover < t->cycle) { + t->active += (t->cycle - (t->active + t->recover)) / 2; + t->recover = t->cycle - t->active; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ide_timing_compute); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 2b8453510e09..d4a6b102a772 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -50,29 +50,16 @@ #include #include #include -#include -#include #include #include #include #include -#include #include #include #include -#include #include #include -#include -#include -#include #include -#include - -#include -#include -#include -#include /* default maximum number of failures */ @@ -91,8 +78,6 @@ DEFINE_MUTEX(ide_cfg_mtx); __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock); EXPORT_SYMBOL(ide_lock); -ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */ - static void ide_port_init_devices_data(ide_hwif_t *); /* @@ -121,7 +106,6 @@ void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) ide_port_init_devices_data(hwif); } -EXPORT_SYMBOL_GPL(ide_init_port_data); static void ide_port_init_devices_data(ide_hwif_t *hwif) { @@ -150,18 +134,6 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) } } -static void __init init_ide_data (void) -{ - unsigned int index; - - /* Initialise all interface structures */ - for (index = 0; index < MAX_HWIFS; ++index) { - ide_hwif_t *hwif = &ide_hwifs[index]; - - ide_init_port_data(hwif, index); - } -} - void ide_remove_port_from_hwgroup(ide_hwif_t *hwif) { ide_hwgroup_t *hwgroup = hwif->hwgroup; @@ -312,7 +284,8 @@ void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); hwif->irq = hw->irq; hwif->chipset = hw->chipset; - hwif->gendev.parent = hw->dev; + hwif->dev = hw->dev; + hwif->gendev.parent = hw->parent ? hw->parent : hw->dev; hwif->ack_intr = hw->ack_intr; } EXPORT_SYMBOL_GPL(ide_init_port_hw); @@ -556,6 +529,22 @@ static int generic_ide_resume(struct device *dev) return err; } +static int generic_drive_reset(ide_drive_t *drive) +{ + struct request *rq; + int ret = 0; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_len = 1; + rq->cmd[0] = REQ_DRIVE_RESET; + rq->cmd_flags |= REQ_SOFTBARRIER; + if (blk_execute_rq(drive->queue, NULL, rq, 1)) + ret = rq->errors; + blk_put_request(rq); + return ret; +} + int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev, unsigned int cmd, unsigned long arg) { @@ -630,33 +619,8 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device if (!capable(CAP_SYS_ADMIN)) return -EACCES; - /* - * Abort the current command on the - * group if there is one, taking - * care not to allow anything else - * to be queued and to die on the - * spot if we miss one somehow - */ - - spin_lock_irqsave(&ide_lock, flags); - - if (HWGROUP(drive)->resetting) { - spin_unlock_irqrestore(&ide_lock, flags); - return -EBUSY; - } + return generic_drive_reset(drive); - ide_abort(drive, "drive reset"); - - BUG_ON(HWGROUP(drive)->handler); - - /* Ensure nothing gets queued after we - drop the lock. Reset will clear the busy */ - - HWGROUP(drive)->busy = 1; - spin_unlock_irqrestore(&ide_lock, flags); - (void) ide_do_reset(drive); - - return 0; case HDIO_GET_BUSSTATE: if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1021,8 +985,6 @@ static int __init ide_init(void) goto out_port_class; } - init_ide_data(); - proc_ide_create(); return 0; diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c index 052125fafcfa..4ec19737f3c5 100644 --- a/drivers/ide/legacy/ali14xx.c +++ b/drivers/ide/legacy/ali14xx.c @@ -117,10 +117,11 @@ static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 param1, param2, param3, param4; unsigned long flags; int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); /* calculate timing, according to PIO mode */ time1 = ide_pio_cycle_time(drive, pio); - time2 = ide_pio_timings[pio].active_time; + time2 = t->active; param3 = param1 = (time2 * bus_speed + 999) / 1000; param4 = param2 = (time1 * bus_speed + 999) / 1000 - param1; if (pio < 3) { diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c index 9a1d27ef3f8a..0497e7f85b09 100644 --- a/drivers/ide/legacy/buddha.c +++ b/drivers/ide/legacy/buddha.c @@ -227,7 +227,6 @@ fail_base2: if (hwif) { u8 index = hwif->index; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); idx[i] = index; diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c index af11028b4794..129a812bb57f 100644 --- a/drivers/ide/legacy/falconide.c +++ b/drivers/ide/legacy/falconide.c @@ -111,7 +111,6 @@ static int __init falconide_init(void) u8 index = hwif->index; u8 idx[4] = { index, 0xff, 0xff, 0xff }; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); /* Atari has a byte-swapped IDE interface */ diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c index b78941680c32..7e74b20202df 100644 --- a/drivers/ide/legacy/gayle.c +++ b/drivers/ide/legacy/gayle.c @@ -185,7 +185,6 @@ found: if (hwif) { u8 index = hwif->index; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); idx[i] = index; diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c deleted file mode 100644 index abdedf56643e..000000000000 --- a/drivers/ide/legacy/hd.c +++ /dev/null @@ -1,815 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This is the low-level hd interrupt support. It traverses the - * request-list, using interrupts to jump between functions. As - * all the functions are called within interrupts, we may not - * sleep. Special care is recommended. - * - * modified by Drew Eckhardt to check nr of hd's from the CMOS. - * - * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug - * in the early extended-partition checks and added DM partitions - * - * IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", - * and general streamlining by Mark Lord. - * - * Removed 99% of above. Use Mark's ide driver for those options. - * This is now a lightweight ST-506 driver. (Paul Gortmaker) - * - * Modified 1995 Russell King for ARM processor. - * - * Bugfix: max_sectors must be <= 255 or the wheels tend to come - * off in a hurry once you queue things up - Paul G. 02/2001 - */ - -/* Uncomment the following if you want verbose error reports. */ -/* #define VERBOSE_ERRORS */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* CMOS defines */ -#include -#include -#include - -#define REALLY_SLOW_IO -#include -#include -#include - -#ifdef __arm__ -#undef HD_IRQ -#endif -#include -#ifdef __arm__ -#define HD_IRQ IRQ_HARDDISK -#endif - -/* Hd controller regster ports */ - -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ - -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ - -/* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define SERVICE_STAT SEEK_STAT -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ - -static DEFINE_SPINLOCK(hd_lock); -static struct request_queue *hd_queue; - -#define MAJOR_NR HD_MAJOR -#define QUEUE (hd_queue) -#define CURRENT elv_next_request(hd_queue) - -#define TIMEOUT_VALUE (6*HZ) -#define HD_DELAY 0 - -#define MAX_ERRORS 16 /* Max read/write errors/sector */ -#define RESET_FREQ 8 /* Reset controller every 8th retry */ -#define RECAL_FREQ 4 /* Recalibrate every 4th retry */ -#define MAX_HD 2 - -#define STAT_OK (READY_STAT|SEEK_STAT) -#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK) - -static void recal_intr(void); -static void bad_rw_intr(void); - -static int reset; -static int hd_error; - -/* - * This struct defines the HD's and their types. - */ -struct hd_i_struct { - unsigned int head, sect, cyl, wpcom, lzone, ctl; - int unit; - int recalibrate; - int special_op; -}; - -#ifdef HD_TYPE -static struct hd_i_struct hd_info[] = { HD_TYPE }; -static int NR_HD = ARRAY_SIZE(hd_info); -#else -static struct hd_i_struct hd_info[MAX_HD]; -static int NR_HD; -#endif - -static struct gendisk *hd_gendisk[MAX_HD]; - -static struct timer_list device_timer; - -#define TIMEOUT_VALUE (6*HZ) - -#define SET_TIMER \ - do { \ - mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \ - } while (0) - -static void (*do_hd)(void) = NULL; -#define SET_HANDLER(x) \ -if ((do_hd = (x)) != NULL) \ - SET_TIMER; \ -else \ - del_timer(&device_timer); - - -#if (HD_DELAY > 0) - -#include - -unsigned long last_req; - -unsigned long read_timer(void) -{ - unsigned long t, flags; - int i; - - spin_lock_irqsave(&i8253_lock, flags); - t = jiffies * 11932; - outb_p(0, 0x43); - i = inb_p(0x40); - i |= inb(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); - return(t - i); -} -#endif - -static void __init hd_setup(char *str, int *ints) -{ - int hdind = 0; - - if (ints[0] != 3) - return; - if (hd_info[0].head != 0) - hdind = 1; - hd_info[hdind].head = ints[2]; - hd_info[hdind].sect = ints[3]; - hd_info[hdind].cyl = ints[1]; - hd_info[hdind].wpcom = 0; - hd_info[hdind].lzone = ints[1]; - hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0); - NR_HD = hdind+1; -} - -static void dump_status(const char *msg, unsigned int stat) -{ - char *name = "hd?"; - if (CURRENT) - name = CURRENT->rq_disk->disk_name; - -#ifdef VERBOSE_ERRORS - printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & BUSY_STAT) printk("Busy "); - if (stat & READY_STAT) printk("DriveReady "); - if (stat & WRERR_STAT) printk("WriteFault "); - if (stat & SEEK_STAT) printk("SeekComplete "); - if (stat & DRQ_STAT) printk("DataRequest "); - if (stat & ECC_STAT) printk("CorrectedError "); - if (stat & INDEX_STAT) printk("Index "); - if (stat & ERR_STAT) printk("Error "); - printk("}\n"); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff); - if (hd_error & BBD_ERR) printk("BadSector "); - if (hd_error & ECC_ERR) printk("UncorrectableError "); - if (hd_error & ID_ERR) printk("SectorIdNotFound "); - if (hd_error & ABRT_ERR) printk("DriveStatusError "); - if (hd_error & TRK0_ERR) printk("TrackZeroNotFound "); - if (hd_error & MARK_ERR) printk("AddrMarkNotFound "); - printk("}"); - if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { - printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), - inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); - if (CURRENT) - printk(", sector=%ld", CURRENT->sector); - } - printk("\n"); - } -#else - printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff); - } -#endif -} - -static void check_status(void) -{ - int i = inb_p(HD_STATUS); - - if (!OK_STATUS(i)) { - dump_status("check_status", i); - bad_rw_intr(); - } -} - -static int controller_busy(void) -{ - int retries = 100000; - unsigned char status; - - do { - status = inb_p(HD_STATUS); - } while ((status & BUSY_STAT) && --retries); - return status; -} - -static int status_ok(void) -{ - unsigned char status = inb_p(HD_STATUS); - - if (status & BUSY_STAT) - return 1; /* Ancient, but does it make sense??? */ - if (status & WRERR_STAT) - return 0; - if (!(status & READY_STAT)) - return 0; - if (!(status & SEEK_STAT)) - return 0; - return 1; -} - -static int controller_ready(unsigned int drive, unsigned int head) -{ - int retry = 100; - - do { - if (controller_busy() & BUSY_STAT) - return 0; - outb_p(0xA0 | (drive<<4) | head, HD_CURRENT); - if (status_ok()) - return 1; - } while (--retry); - return 0; -} - -static void hd_out(struct hd_i_struct *disk, - unsigned int nsect, - unsigned int sect, - unsigned int head, - unsigned int cyl, - unsigned int cmd, - void (*intr_addr)(void)) -{ - unsigned short port; - -#if (HD_DELAY > 0) - while (read_timer() - last_req < HD_DELAY) - /* nothing */; -#endif - if (reset) - return; - if (!controller_ready(disk->unit, head)) { - reset = 1; - return; - } - SET_HANDLER(intr_addr); - outb_p(disk->ctl, HD_CMD); - port = HD_DATA; - outb_p(disk->wpcom >> 2, ++port); - outb_p(nsect, ++port); - outb_p(sect, ++port); - outb_p(cyl, ++port); - outb_p(cyl >> 8, ++port); - outb_p(0xA0 | (disk->unit << 4) | head, ++port); - outb_p(cmd, ++port); -} - -static void hd_request (void); - -static int drive_busy(void) -{ - unsigned int i; - unsigned char c; - - for (i = 0; i < 500000 ; i++) { - c = inb_p(HD_STATUS); - if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK) - return 0; - } - dump_status("reset timed out", c); - return 1; -} - -static void reset_controller(void) -{ - int i; - - outb_p(4, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - outb_p(hd_info[0].ctl & 0x0f, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - if (drive_busy()) - printk("hd: controller still busy\n"); - else if ((hd_error = inb(HD_ERROR)) != 1) - printk("hd: controller reset failed: %02x\n", hd_error); -} - -static void reset_hd(void) -{ - static int i; - -repeat: - if (reset) { - reset = 0; - i = -1; - reset_controller(); - } else { - check_status(); - if (reset) - goto repeat; - } - if (++i < NR_HD) { - struct hd_i_struct *disk = &hd_info[i]; - disk->special_op = disk->recalibrate = 1; - hd_out(disk, disk->sect, disk->sect, disk->head-1, - disk->cyl, WIN_SPECIFY, &reset_hd); - if (reset) - goto repeat; - } else - hd_request(); -} - -/* - * Ok, don't know what to do with the unexpected interrupts: on some machines - * doing a reset and a retry seems to result in an eternal loop. Right now I - * ignore it, and just set the timeout. - * - * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the - * drive enters "idle", "standby", or "sleep" mode, so if the status looks - * "good", we just ignore the interrupt completely. - */ -static void unexpected_hd_interrupt(void) -{ - unsigned int stat = inb_p(HD_STATUS); - - if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) { - dump_status("unexpected interrupt", stat); - SET_TIMER; - } -} - -/* - * bad_rw_intr() now tries to be a bit smarter and does things - * according to the error returned by the controller. - * -Mika Liljeberg (liljeber@cs.Helsinki.FI) - */ -static void bad_rw_intr(void) -{ - struct request *req = CURRENT; - if (req != NULL) { - struct hd_i_struct *disk = req->rq_disk->private_data; - if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - end_request(req, 0); - disk->special_op = disk->recalibrate = 1; - } else if (req->errors % RESET_FREQ == 0) - reset = 1; - else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0) - disk->special_op = disk->recalibrate = 1; - /* Otherwise just retry */ - } -} - -static inline int wait_DRQ(void) -{ - int retries; - int stat; - - for (retries = 0; retries < 100000; retries++) { - stat = inb_p(HD_STATUS); - if (stat & DRQ_STAT) - return 0; - } - dump_status("wait_DRQ", stat); - return -1; -} - -static void read_intr(void) -{ - struct request *req; - int i, retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if (i & DRQ_STAT) - goto ok_to_read; - } while (--retries > 0); - dump_status("read_intr", i); - bad_rw_intr(); - hd_request(); - return; -ok_to_read: - req = CURRENT; - insw(HD_DATA, req->buffer, 256); - req->sector++; - req->buffer += 512; - req->errors = 0; - i = --req->nr_sectors; - --req->current_nr_sectors; -#ifdef DEBUG - printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", - req->rq_disk->disk_name, req->sector, req->nr_sectors, - req->buffer+512); -#endif - if (req->current_nr_sectors <= 0) - end_request(req, 1); - if (i > 0) { - SET_HANDLER(&read_intr); - return; - } - (void) inb_p(HD_STATUS); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - if (elv_next_request(QUEUE)) - hd_request(); - return; -} - -static void write_intr(void) -{ - struct request *req = CURRENT; - int i; - int retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if ((req->nr_sectors <= 1) || (i & DRQ_STAT)) - goto ok_to_write; - } while (--retries > 0); - dump_status("write_intr", i); - bad_rw_intr(); - hd_request(); - return; -ok_to_write: - req->sector++; - i = --req->nr_sectors; - --req->current_nr_sectors; - req->buffer += 512; - if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); - if (i > 0) { - SET_HANDLER(&write_intr); - outsw(HD_DATA, req->buffer, 256); - local_irq_enable(); - } else { -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); - } - return; -} - -static void recal_intr(void) -{ - check_status(); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -/* - * This is another of the error-routines I don't know what to do with. The - * best idea seems to just set reset, and start all over again. - */ -static void hd_times_out(unsigned long dummy) -{ - char *name; - - do_hd = NULL; - - if (!CURRENT) - return; - - disable_irq(HD_IRQ); - local_irq_enable(); - reset = 1; - name = CURRENT->rq_disk->disk_name; - printk("%s: timeout\n", name); - if (++CURRENT->errors >= MAX_ERRORS) { -#ifdef DEBUG - printk("%s: too many errors\n", name); -#endif - end_request(CURRENT, 0); - } - local_irq_disable(); - hd_request(); - enable_irq(HD_IRQ); -} - -static int do_special_op(struct hd_i_struct *disk, struct request *req) -{ - if (disk->recalibrate) { - disk->recalibrate = 0; - hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); - return reset; - } - if (disk->head > 16) { - printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - end_request(req, 0); - } - disk->special_op = 0; - return 1; -} - -/* - * The driver enables interrupts as much as possible. In order to do this, - * (a) the device-interrupt is disabled before entering hd_request(), - * and (b) the timeout-interrupt is disabled before the sti(). - * - * Interrupts are still masked (by default) whenever we are exchanging - * data/cmds with a drive, because some drives seem to have very poor - * tolerance for latency during I/O. The IDE driver has support to unmask - * interrupts for non-broken hardware, so use that driver if required. - */ -static void hd_request(void) -{ - unsigned int block, nsect, sec, track, head, cyl; - struct hd_i_struct *disk; - struct request *req; - - if (do_hd) - return; -repeat: - del_timer(&device_timer); - local_irq_enable(); - - req = CURRENT; - if (!req) { - do_hd = NULL; - return; - } - - if (reset) { - local_irq_disable(); - reset_hd(); - return; - } - disk = req->rq_disk->private_data; - block = req->sector; - nsect = req->nr_sectors; - if (block >= get_capacity(req->rq_disk) || - ((block+nsect) > get_capacity(req->rq_disk))) { - printk("%s: bad access: block=%d, count=%d\n", - req->rq_disk->disk_name, block, nsect); - end_request(req, 0); - goto repeat; - } - - if (disk->special_op) { - if (do_special_op(disk, req)) - goto repeat; - return; - } - sec = block % disk->sect + 1; - track = block / disk->sect; - head = track % disk->head; - cyl = track / disk->head; -#ifdef DEBUG - printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", - req->rq_disk->disk_name, - req_data_dir(req) == READ ? "read" : "writ", - cyl, head, sec, nsect, req->buffer); -#endif - if (blk_fs_request(req)) { - switch (rq_data_dir(req)) { - case READ: - hd_out(disk, nsect, sec, head, cyl, WIN_READ, - &read_intr); - if (reset) - goto repeat; - break; - case WRITE: - hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, - &write_intr); - if (reset) - goto repeat; - if (wait_DRQ()) { - bad_rw_intr(); - goto repeat; - } - outsw(HD_DATA, req->buffer, 256); - break; - default: - printk("unknown hd-command\n"); - end_request(req, 0); - break; - } - } -} - -static void do_hd_request(struct request_queue *q) -{ - disable_irq(HD_IRQ); - hd_request(); - enable_irq(HD_IRQ); -} - -static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct hd_i_struct *disk = bdev->bd_disk->private_data; - - geo->heads = disk->head; - geo->sectors = disk->sect; - geo->cylinders = disk->cyl; - return 0; -} - -/* - * Releasing a block device means we sync() it, so that it can safely - * be forgotten about... - */ - -static irqreturn_t hd_interrupt(int irq, void *dev_id) -{ - void (*handler)(void) = do_hd; - - do_hd = NULL; - del_timer(&device_timer); - if (!handler) - handler = unexpected_hd_interrupt; - handler(); - local_irq_enable(); - return IRQ_HANDLED; -} - -static struct block_device_operations hd_fops = { - .getgeo = hd_getgeo, -}; - -/* - * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags - * means we run the IRQ-handler with interrupts disabled: this is bad for - * interrupt latency, but anything else has led to problems on some - * machines. - * - * We enable interrupts in some of the routines after making sure it's - * safe. - */ - -static int __init hd_init(void) -{ - int drive; - - if (register_blkdev(MAJOR_NR, "hd")) - return -1; - - hd_queue = blk_init_queue(do_hd_request, &hd_lock); - if (!hd_queue) { - unregister_blkdev(MAJOR_NR, "hd"); - return -ENOMEM; - } - - blk_queue_max_sectors(hd_queue, 255); - init_timer(&device_timer); - device_timer.function = hd_times_out; - blk_queue_hardsect_size(hd_queue, 512); - - if (!NR_HD) { - /* - * We don't know anything about the drive. This means - * that you *MUST* specify the drive parameters to the - * kernel yourself. - * - * If we were on an i386, we used to read this info from - * the BIOS or CMOS. This doesn't work all that well, - * since this assumes that this is a primary or secondary - * drive, and if we're using this legacy driver, it's - * probably an auxilliary controller added to recover - * legacy data off an ST-506 drive. Either way, it's - * definitely safest to have the user explicitly specify - * the information. - */ - printk("hd: no drives specified - use hd=cyl,head,sectors" - " on kernel command line\n"); - goto out; - } - - for (drive = 0 ; drive < NR_HD ; drive++) { - struct gendisk *disk = alloc_disk(64); - struct hd_i_struct *p = &hd_info[drive]; - if (!disk) - goto Enomem; - disk->major = MAJOR_NR; - disk->first_minor = drive << 6; - disk->fops = &hd_fops; - sprintf(disk->disk_name, "hd%c", 'a'+drive); - disk->private_data = p; - set_capacity(disk, p->head * p->sect * p->cyl); - disk->queue = hd_queue; - p->unit = drive; - hd_gendisk[drive] = disk; - printk("%s: %luMB, CHS=%d/%d/%d\n", - disk->disk_name, (unsigned long)get_capacity(disk)/2048, - p->cyl, p->head, p->sect); - } - - if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) { - printk("hd: unable to get IRQ%d for the hard disk driver\n", - HD_IRQ); - goto out1; - } - if (!request_region(HD_DATA, 8, "hd")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA); - goto out2; - } - if (!request_region(HD_CMD, 1, "hd(cmd)")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD); - goto out3; - } - - /* Let them fly */ - for (drive = 0; drive < NR_HD; drive++) - add_disk(hd_gendisk[drive]); - - return 0; - -out3: - release_region(HD_DATA, 8); -out2: - free_irq(HD_IRQ, NULL); -out1: - for (drive = 0; drive < NR_HD; drive++) - put_disk(hd_gendisk[drive]); - NR_HD = 0; -out: - del_timer(&device_timer); - unregister_blkdev(MAJOR_NR, "hd"); - blk_cleanup_queue(hd_queue); - return -1; -Enomem: - while (drive--) - put_disk(hd_gendisk[drive]); - goto out; -} - -static int __init parse_hd_setup(char *line) -{ - int ints[6]; - - (void) get_options(line, ARRAY_SIZE(ints), ints); - hd_setup(NULL, ints); - - return 1; -} -__setup("hd=", parse_hd_setup); - -module_init(hd_init); diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c index dd6dfb32e853..7bc8fd59ea9e 100644 --- a/drivers/ide/legacy/ht6560b.c +++ b/drivers/ide/legacy/ht6560b.c @@ -216,6 +216,7 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio) if (pio) { unsigned int cycle_time; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); cycle_time = ide_pio_cycle_time(drive, pio); @@ -224,10 +225,8 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio) * actual cycle time for recovery and activity * according system bus speed. */ - active_time = ide_pio_timings[pio].active_time; - recovery_time = cycle_time - - active_time - - ide_pio_timings[pio].setup_time; + active_time = t->active; + recovery_time = cycle_time - active_time - t->setup; /* * Cycle times should be Vesa bus cycles */ @@ -311,16 +310,16 @@ static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio) #endif } -static void __init ht6560b_port_init_devs(ide_hwif_t *hwif) +static void __init ht6560b_init_dev(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; /* Setting default configurations for drives. */ int t = (HT_CONFIG_DEFAULT << 8) | HT_TIMING_DEFAULT; if (hwif->channel) t |= (HT_SECONDARY_IF << 8); - hwif->drives[0].drive_data = t; - hwif->drives[1].drive_data = t; + drive->drive_data = t; } static int probe_ht6560b; @@ -329,7 +328,7 @@ module_param_named(probe, probe_ht6560b, bool, 0); MODULE_PARM_DESC(probe, "probe for HT6560B chipset"); static const struct ide_port_ops ht6560b_port_ops = { - .port_init_devs = ht6560b_port_init_devs, + .init_dev = ht6560b_init_dev, .set_pio_mode = ht6560b_set_pio_mode, .selectproc = ht6560b_selectproc, }; diff --git a/drivers/ide/legacy/ide-4drives.c b/drivers/ide/legacy/ide-4drives.c index ecae916a3385..89c8ff0a4d08 100644 --- a/drivers/ide/legacy/ide-4drives.c +++ b/drivers/ide/legacy/ide-4drives.c @@ -11,6 +11,21 @@ static int probe_4drives; module_param_named(probe, probe_4drives, bool, 0); MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port"); +static void ide_4drives_init_dev(ide_drive_t *drive) +{ + if (drive->hwif->channel) + drive->select.all ^= 0x20; +} + +static const struct ide_port_ops ide_4drives_port_ops = { + .init_dev = ide_4drives_init_dev, +}; + +static const struct ide_port_info ide_4drives_port_info = { + .port_ops = &ide_4drives_port_ops, + .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA, +}; + static int __init ide_4drives_init(void) { ide_hwif_t *hwif, *mate; @@ -49,18 +64,10 @@ static int __init ide_4drives_init(void) mate = ide_find_port(); if (mate) { ide_init_port_hw(mate, &hw); - mate->drives[0].select.all ^= 0x20; - mate->drives[1].select.all ^= 0x20; idx[1] = mate->index; - - if (hwif) { - hwif->mate = mate; - mate->mate = hwif; - hwif->serialized = mate->serialized = 1; - } } - ide_device_add(idx, NULL); + ide_device_add(idx, &ide_4drives_port_info); return 0; } diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c index 8dbf4d9b6447..fc53dcfbfe38 100644 --- a/drivers/ide/legacy/ide-cs.c +++ b/drivers/ide/legacy/ide-cs.c @@ -154,6 +154,11 @@ static const struct ide_port_ops idecs_port_ops = { .quirkproc = ide_undecoded_slave, }; +static const struct ide_port_info idecs_port_info = { + .port_ops = &idecs_port_ops, + .host_flags = IDE_HFLAG_NO_DMA, +}; + static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl, unsigned long irq, struct pcmcia_device *handle) { @@ -187,13 +192,11 @@ static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl, i = hwif->index; - ide_init_port_data(hwif, i); ide_init_port_hw(hwif, &hw); - hwif->port_ops = &idecs_port_ops; idx[0] = i; - ide_device_add(idx, NULL); + ide_device_add(idx, &idecs_port_info); if (hwif->present) return hwif; diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c index d3bc3f24e05d..a249562b34b5 100644 --- a/drivers/ide/legacy/ide_platform.c +++ b/drivers/ide/legacy/ide_platform.c @@ -44,6 +44,10 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw, hw->chipset = ide_generic; } +static const struct ide_port_info platform_ide_port_info = { + .host_flags = IDE_HFLAG_NO_DMA, +}; + static int __devinit plat_ide_probe(struct platform_device *pdev) { struct resource *res_base, *res_alt, *res_irq; @@ -54,6 +58,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) int ret = 0; int mmio = 0; hw_regs_t hw; + struct ide_port_info d = platform_ide_port_info; pdata = pdev->dev.platform_data; @@ -102,13 +107,13 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) ide_init_port_hw(hwif, &hw); if (mmio) { - hwif->host_flags = IDE_HFLAG_MMIO; + d.host_flags |= IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); } idx[0] = hwif->index; - ide_device_add(idx, NULL); + ide_device_add(idx, &d); platform_set_drvdata(pdev, hwif); diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c index 2e84290d0bcc..0a6195bcfeda 100644 --- a/drivers/ide/legacy/macide.c +++ b/drivers/ide/legacy/macide.c @@ -130,7 +130,6 @@ static int __init macide_init(void) u8 index = hwif->index; u8 idx[4] = { index, 0xff, 0xff, 0xff }; - ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); ide_device_add(idx, NULL); diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c index 8ff6e2d20834..9c2b9d078f69 100644 --- a/drivers/ide/legacy/q40ide.c +++ b/drivers/ide/legacy/q40ide.c @@ -142,7 +142,6 @@ static int __init q40ide_init(void) hwif = ide_find_port(); if (hwif) { - ide_init_port_data(hwif, hwif->index); ide_init_port_hw(hwif, &hw); /* Q40 has a byte-swapped IDE interface */ diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c index 51dba82f8812..2338f344ea24 100644 --- a/drivers/ide/legacy/qd65xx.c +++ b/drivers/ide/legacy/qd65xx.c @@ -207,6 +207,7 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = drive->hwif; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cycle_time; int active_time = 175; int recovery_time = 415; /* worst case values from the dos driver */ @@ -236,7 +237,7 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) active_time = 110; recovery_time = cycle_time - 120; } else { - active_time = ide_pio_timings[pio].active_time; + active_time = t->active; recovery_time = cycle_time - active_time; } } @@ -281,17 +282,18 @@ static int __init qd_testreg(int port) return (readreg != QD_TESTVAL); } -static void __init qd6500_port_init_devs(ide_hwif_t *hwif) +static void __init qd6500_init_dev(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; u8 base = (hwif->config_data & 0xff00) >> 8; u8 config = QD_CONFIG(hwif); - hwif->drives[0].drive_data = QD6500_DEF_DATA; - hwif->drives[1].drive_data = QD6500_DEF_DATA; + drive->drive_data = QD6500_DEF_DATA; } -static void __init qd6580_port_init_devs(ide_hwif_t *hwif) +static void __init qd6580_init_dev(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; u16 t1, t2; u8 base = (hwif->config_data & 0xff00) >> 8; u8 config = QD_CONFIG(hwif); @@ -302,18 +304,17 @@ static void __init qd6580_port_init_devs(ide_hwif_t *hwif) } else t2 = t1 = hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA; - hwif->drives[0].drive_data = t1; - hwif->drives[1].drive_data = t2; + drive->drive_data = drive->select.b.unit ? t2 : t1; } static const struct ide_port_ops qd6500_port_ops = { - .port_init_devs = qd6500_port_init_devs, + .init_dev = qd6500_init_dev, .set_pio_mode = qd6500_set_pio_mode, .selectproc = qd65xx_select, }; static const struct ide_port_ops qd6580_port_ops = { - .port_init_devs = qd6580_port_init_devs, + .init_dev = qd6580_init_dev, .set_pio_mode = qd6580_set_pio_mode, .selectproc = qd65xx_select, }; diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index 1a6c27b32498..48d57cae63c6 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c @@ -213,10 +213,8 @@ static int auide_build_dmatable(ide_drive_t *drive) { int i, iswrite, count = 0; ide_hwif_t *hwif = HWIF(drive); - struct request *rq = HWGROUP(drive)->rq; - - _auide_hwif *ahwif = (_auide_hwif*)hwif->hwif_data; + _auide_hwif *ahwif = &auide_hwif; struct scatterlist *sg; iswrite = (rq_data_dir(rq) == WRITE); @@ -402,7 +400,7 @@ static const struct ide_dma_ops au1xxx_dma_ops = { static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) { - _auide_hwif *auide = (_auide_hwif *)hwif->hwif_data; + _auide_hwif *auide = &auide_hwif; dbdev_tab_t source_dev_tab, target_dev_tab; u32 dev_id, tsize, devwidth, flags; @@ -463,7 +461,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) #else static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) { - _auide_hwif *auide = (_auide_hwif *)hwif->hwif_data; + _auide_hwif *auide = &auide_hwif; dbdev_tab_t source_dev_tab; int flags; @@ -600,8 +598,6 @@ static int au_ide_probe(struct device *dev) ide_init_port_hw(hwif, &hw); - hwif->dev = dev; - /* If the user has selected DDMA assisted copies, then set up a few local I/O function entry points */ @@ -610,11 +606,8 @@ static int au_ide_probe(struct device *dev) hwif->input_data = au1xxx_input_data; hwif->output_data = au1xxx_output_data; #endif - hwif->select_data = 0; /* no chipset-specific code */ - hwif->config_data = 0; /* no chipset-specific code */ auide_hwif.hwif = hwif; - hwif->hwif_data = &auide_hwif; idx[0] = hwif->index; diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 52fee3d2771a..9f1212cc4aed 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -61,6 +61,11 @@ static struct resource swarm_ide_resource = { static struct platform_device *swarm_ide_dev; +static const struct ide_port_info swarm_port_info = { + .name = DRV_NAME, + .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, +}; + /* * swarm_ide_probe - if the board header indicates the existence of * Generic Bus IDE, allocate a HWIF for it. @@ -77,12 +82,6 @@ static int __devinit swarm_ide_probe(struct device *dev) if (!SIBYTE_HAVE_IDE) return -ENODEV; - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR DRV_NAME ": no free slot for interface\n"); - return -ENOMEM; - } - base = ioremap(A_IO_EXT_BASE, 0x800); offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS)); @@ -109,10 +108,6 @@ static int __devinit swarm_ide_probe(struct device *dev) base = ioremap(offset, size); - /* Setup MMIO ops. */ - hwif->host_flags = IDE_HFLAG_MMIO; - default_hwif_mmiops(hwif); - for (i = 0; i <= 7; i++) hw.io_ports_array[i] = (unsigned long)(base + ((0x1f0 + i) << 5)); @@ -121,15 +116,26 @@ static int __devinit swarm_ide_probe(struct device *dev) hw.irq = K_INT_GB_IDE; hw.chipset = ide_generic; + hwif = ide_find_port_slot(&swarm_port_info); + if (hwif == NULL) + goto err; + ide_init_port_hw(hwif, &hw); + /* Setup MMIO ops. */ + default_hwif_mmiops(hwif); + idx[0] = hwif->index; - ide_device_add(idx, NULL); + ide_device_add(idx, &swarm_port_info); dev_set_drvdata(dev, hwif); return 0; +err: + release_resource(&swarm_ide_resource); + iounmap(base); + return -ENOMEM; } static struct device_driver swarm_ide_driver = { diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c index f2de00adf147..80d19c0eb780 100644 --- a/drivers/ide/pci/alim15x3.c +++ b/drivers/ide/pci/alim15x3.c @@ -69,7 +69,8 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = HWIF(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); - int s_time, a_time, c_time; + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); + int s_time = t->setup, a_time = t->active, c_time = t->cycle; u8 s_clc, a_clc, r_clc; unsigned long flags; int bus_speed = ide_pci_clk ? ide_pci_clk : 33; @@ -78,13 +79,10 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 cd_dma_fifo = 0; int unit = drive->select.b.unit & 1; - s_time = ide_pio_timings[pio].setup_time; - a_time = ide_pio_timings[pio].active_time; if ((s_clc = (s_time * bus_speed + 999) / 1000) >= 8) s_clc = 0; if ((a_clc = (a_time * bus_speed + 999) / 1000) >= 8) a_clc = 0; - c_time = ide_pio_timings[pio].cycle_time; if (!(r_clc = (c_time * bus_speed + 999) / 1000 - a_clc - s_clc)) { r_clc = 1; diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c index ad222206a429..0bfcdd0e77b3 100644 --- a/drivers/ide/pci/amd74xx.c +++ b/drivers/ide/pci/amd74xx.c @@ -21,8 +21,6 @@ #include #include -#include "ide-timing.h" - enum { AMD_IDE_CONFIG = 0x41, AMD_CABLE_DETECT = 0x42, diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c index cd1ba14984ab..1ad1e23e3105 100644 --- a/drivers/ide/pci/cmd640.c +++ b/drivers/ide/pci/cmd640.c @@ -521,6 +521,7 @@ static void program_drive_counts(ide_drive_t *drive, unsigned int index) static void cmd640_set_mode(ide_drive_t *drive, unsigned int index, u8 pio_mode, unsigned int cycle_time) { + struct ide_timing *t; int setup_time, active_time, recovery_time, clock_time; u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count; int bus_speed; @@ -532,8 +533,11 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index, if (pio_mode > 5) pio_mode = 5; - setup_time = ide_pio_timings[pio_mode].setup_time; - active_time = ide_pio_timings[pio_mode].active_time; + + t = ide_timing_find_mode(XFER_PIO_0 + pio_mode); + setup_time = t->setup; + active_time = t->active; + recovery_time = cycle_time - (setup_time + active_time); clock_time = 1000 / bus_speed; cycle_count = DIV_ROUND_UP(cycle_time, clock_time); @@ -607,11 +611,40 @@ static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio) display_clocks(index); } +#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ + +static void cmd640_init_dev(ide_drive_t *drive) +{ + unsigned int i = drive->hwif->channel * 2 + drive->select.b.unit; + +#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED + /* + * Reset timing to the slowest speed and turn off prefetch. + * This way, the drive identify code has a better chance. + */ + setup_counts[i] = 4; /* max possible */ + active_counts[i] = 16; /* max possible */ + recovery_counts[i] = 16; /* max possible */ + program_drive_counts(drive, i); + set_prefetch_mode(drive, i, 0); + printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch cleared\n", i); +#else + /* + * Set the drive unmask flags to match the prefetch setting. + */ + check_prefetch(drive, i); + printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch(%s) preserved\n", + i, drive->no_io_32bit ? "off" : "on"); +#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ +} + static const struct ide_port_ops cmd640_port_ops = { + .init_dev = cmd640_init_dev, +#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED .set_pio_mode = cmd640_set_pio_mode, +#endif }; -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ static int pci_conf1(void) { @@ -654,10 +687,8 @@ static const struct ide_port_info cmd640_port_info __initdata = { IDE_HFLAG_NO_DMA | IDE_HFLAG_ABUSE_PREFETCH | IDE_HFLAG_ABUSE_FAST_DEVSEL, -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED .port_ops = &cmd640_port_ops, .pio_mask = ATA_PIO5, -#endif }; static int cmd640x_init_one(unsigned long base, unsigned long ctl) @@ -683,12 +714,8 @@ static int cmd640x_init_one(unsigned long base, unsigned long ctl) */ static int __init cmd640x_init(void) { -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED - int second_port_toggled = 0; -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ int second_port_cmd640 = 0, rc; const char *bus_type, *port2; - unsigned int index; u8 b, cfr; u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; hw_regs_t hw[2]; @@ -774,88 +801,44 @@ static int __init cmd640x_init(void) put_cmd640_reg(CMDTIM, 0); put_cmd640_reg(BRST, 0x40); - cmd_hwif1 = ide_find_port(); + b = get_cmd640_reg(CNTRL); /* * Try to enable the secondary interface, if not already enabled */ - if (cmd_hwif1 && - cmd_hwif1->drives[0].noprobe && cmd_hwif1->drives[1].noprobe) { - port2 = "not probed"; + if (secondary_port_responding()) { + if ((b & CNTRL_ENA_2ND)) { + second_port_cmd640 = 1; + port2 = "okay"; + } else if (cmd640_vlb) { + second_port_cmd640 = 1; + port2 = "alive"; + } else + port2 = "not cmd640"; } else { - b = get_cmd640_reg(CNTRL); + put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */ if (secondary_port_responding()) { - if ((b & CNTRL_ENA_2ND)) { - second_port_cmd640 = 1; - port2 = "okay"; - } else if (cmd640_vlb) { - second_port_cmd640 = 1; - port2 = "alive"; - } else - port2 = "not cmd640"; + second_port_cmd640 = 1; + port2 = "enabled"; } else { - put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */ - if (secondary_port_responding()) { - second_port_cmd640 = 1; -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED - second_port_toggled = 1; -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ - port2 = "enabled"; - } else { - put_cmd640_reg(CNTRL, b); /* restore original setting */ - port2 = "not responding"; - } + put_cmd640_reg(CNTRL, b); /* restore original setting */ + port2 = "not responding"; } } /* * Initialize data for secondary cmd640 port, if enabled */ - if (second_port_cmd640 && cmd_hwif1) { - ide_init_port_hw(cmd_hwif1, &hw[1]); - idx[1] = cmd_hwif1->index; + if (second_port_cmd640) { + cmd_hwif1 = ide_find_port(); + if (cmd_hwif1) { + ide_init_port_hw(cmd_hwif1, &hw[1]); + idx[1] = cmd_hwif1->index; + } } printk(KERN_INFO "cmd640: %sserialized, secondary interface %s\n", second_port_cmd640 ? "" : "not ", port2); - /* - * Establish initial timings/prefetch for all drives. - * Do not unnecessarily disturb any prior BIOS setup of these. - */ - for (index = 0; index < (2 + (second_port_cmd640 << 1)); index++) { - ide_drive_t *drive; - - if (index > 1) { - if (cmd_hwif1 == NULL) - continue; - drive = &cmd_hwif1->drives[index & 1]; - } else { - if (cmd_hwif0 == NULL) - continue; - drive = &cmd_hwif0->drives[index & 1]; - } - -#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED - /* - * Reset timing to the slowest speed and turn off prefetch. - * This way, the drive identify code has a better chance. - */ - setup_counts [index] = 4; /* max possible */ - active_counts [index] = 16; /* max possible */ - recovery_counts [index] = 16; /* max possible */ - program_drive_counts(drive, index); - set_prefetch_mode(drive, index, 0); - printk("cmd640: drive%d timings/prefetch cleared\n", index); -#else - /* - * Set the drive unmask flags to match the prefetch setting - */ - check_prefetch(drive, index); - printk("cmd640: drive%d timings/prefetch(%s) preserved\n", - index, drive->no_io_32bit ? "off" : "on"); -#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */ - } - #ifdef CMD640_DUMP_REGS cmd640_dump_regs(); #endif diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c index ca4774aa27ee..cfa784bacf48 100644 --- a/drivers/ide/pci/cmd64x.c +++ b/drivers/ide/pci/cmd64x.c @@ -116,6 +116,7 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = HWIF(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cycle_time; u8 setup_count, arttim = 0; @@ -124,10 +125,9 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) cycle_time = ide_pio_cycle_time(drive, pio); - program_cycle_times(drive, cycle_time, - ide_pio_timings[pio].active_time); + program_cycle_times(drive, cycle_time, t->active); - setup_count = quantize_timing(ide_pio_timings[pio].setup_time, + setup_count = quantize_timing(t->setup, 1000 / (ide_pci_clk ? ide_pci_clk : 33)); /* diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c index 99fe91a191b8..dc97c48623f3 100644 --- a/drivers/ide/pci/cs5535.c +++ b/drivers/ide/pci/cs5535.c @@ -26,8 +26,6 @@ #include #include -#include "ide-timing.h" - #define MSR_ATAC_BASE 0x51300000 #define ATAC_GLD_MSR_CAP (MSR_ATAC_BASE+0) #define ATAC_GLD_MSR_CONFIG (MSR_ATAC_BASE+0x01) @@ -75,13 +73,11 @@ static unsigned int cs5535_udma_timings[5] = */ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed) { - u32 reg = 0, dummy; int unit = drive->select.b.unit; - /* Set the PIO timings */ - if ((speed & XFER_MODE) == XFER_PIO) { + if (speed < XFER_SW_DMA_0) { ide_drive_t *pair = ide_get_paired_drive(drive); u8 cmd, pioa; diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c index 8c534afcb6c8..e14ad5530fa4 100644 --- a/drivers/ide/pci/cy82c693.c +++ b/drivers/ide/pci/cy82c693.c @@ -133,6 +133,7 @@ static int calc_clk(int time, int bus_speed) */ static void compute_clocks(u8 pio, pio_clocks_t *p_pclk) { + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); int clk1, clk2; int bus_speed = ide_pci_clk ? ide_pci_clk : 33; @@ -141,15 +142,13 @@ static void compute_clocks(u8 pio, pio_clocks_t *p_pclk) */ /* let's calc the address setup time clocks */ - p_pclk->address_time = (u8)calc_clk(ide_pio_timings[pio].setup_time, bus_speed); + p_pclk->address_time = (u8)calc_clk(t->setup, bus_speed); /* let's calc the active and recovery time clocks */ - clk1 = calc_clk(ide_pio_timings[pio].active_time, bus_speed); + clk1 = calc_clk(t->active, bus_speed); /* calc recovery timing */ - clk2 = ide_pio_timings[pio].cycle_time - - ide_pio_timings[pio].active_time - - ide_pio_timings[pio].setup_time; + clk2 = t->cycle - t->active - t->setup; clk2 = calc_clk(clk2, bus_speed); diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c index af0f30051d5a..0106e2a2df77 100644 --- a/drivers/ide/pci/delkin_cb.c +++ b/drivers/ide/pci/delkin_cb.c @@ -93,7 +93,6 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id) i = hwif->index; - ide_init_port_data(hwif, i); ide_init_port_hw(hwif, &hw); idx[0] = i; diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c index 6ab04115286b..cbf647202994 100644 --- a/drivers/ide/pci/it821x.c +++ b/drivers/ide/pci/it821x.c @@ -512,8 +512,14 @@ static void __devinit it821x_quirkproc(ide_drive_t *drive) } static struct ide_dma_ops it821x_pass_through_dma_ops = { + .dma_host_set = ide_dma_host_set, + .dma_setup = ide_dma_setup, + .dma_exec_cmd = ide_dma_exec_cmd, .dma_start = it821x_dma_start, .dma_end = it821x_dma_end, + .dma_test_irq = ide_dma_test_irq, + .dma_timeout = ide_dma_timeout, + .dma_lost_irq = ide_dma_lost_irq, }; /** diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index 1584ebb6a185..789c66dfbde5 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -558,12 +558,9 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; int i; - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR "%s: too many IDE interfaces, " - "no room in table\n", SCC_PATA_NAME); + hwif = ide_find_port_slot(d); + if (hwif == NULL) return -ENOMEM; - } memset(&hw, 0, sizeof(hw)); for (i = 0; i <= 8; i++) @@ -572,7 +569,6 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev, hw.dev = &dev->dev; hw.chipset = ide_pci; ide_init_port_hw(hwif, &hw); - hwif->dev = &dev->dev; idx[0] = hwif->index; diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 24513e3dcd6b..c79ff5b41088 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -568,6 +568,7 @@ static const struct ide_dma_ops sgiioc4_dma_ops = { }; static const struct ide_port_info sgiioc4_port_info __devinitdata = { + .name = DRV_NAME, .chipset = ide_pci, .init_dma = ide_dma_sgiioc4, .port_ops = &sgiioc4_port_ops, @@ -587,13 +588,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) hw_regs_t hw; struct ide_port_info d = sgiioc4_port_info; - hwif = ide_find_port(); - if (hwif == NULL) { - printk(KERN_ERR "%s: too many IDE interfaces, no room in table\n", - DRV_NAME); - return -ENOMEM; - } - /* Get the CmdBlk and CtrlBlk Base Registers */ bar0 = pci_resource_start(dev, 0); virt_base = ioremap(bar0, pci_resource_len(dev, 0)); @@ -608,11 +602,11 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) cmd_phys_base = bar0 + IOC4_CMD_OFFSET; if (!request_mem_region(cmd_phys_base, IOC4_CMD_CTL_BLK_SIZE, - hwif->name)) { + DRV_NAME)) { printk(KERN_ERR "%s : %s -- ERROR, Addresses " "0x%p to 0x%p ALREADY in use\n", - __func__, hwif->name, (void *) cmd_phys_base, + __func__, DRV_NAME, (void *) cmd_phys_base, (void *) cmd_phys_base + IOC4_CMD_CTL_BLK_SIZE); return -ENOMEM; } @@ -623,9 +617,12 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) hw.irq = dev->irq; hw.chipset = ide_pci; hw.dev = &dev->dev; - ide_init_port_hw(hwif, &hw); - hwif->dev = &dev->dev; + hwif = ide_find_port_slot(&d); + if (hwif == NULL) + goto err; + + ide_init_port_hw(hwif, &hw); /* The IOC4 uses MMIO rather than Port IO. */ default_hwif_mmiops(hwif); @@ -641,6 +638,10 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev) return -EIO; return 0; +err: + release_mem_region(cmd_phys_base, IOC4_CMD_CTL_BLK_SIZE); + iounmap(virt_base); + return -ENOMEM; } static unsigned int __devinit diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c index b75e9bb390a7..6e9d7655d89c 100644 --- a/drivers/ide/pci/siimage.c +++ b/drivers/ide/pci/siimage.c @@ -421,8 +421,7 @@ static int sil_sata_reset_poll(ide_drive_t *drive) if ((sata_stat & 0x03) != 0x03) { printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n", hwif->name, sata_stat); - HWGROUP(drive)->polling = 0; - return ide_started; + return -ENXIO; } } diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c index e127eb25ab63..2389945ca95d 100644 --- a/drivers/ide/pci/sis5513.c +++ b/drivers/ide/pci/sis5513.c @@ -52,8 +52,6 @@ #include #include -#include "ide-timing.h" - /* registers layout and init values are chipset family dependant */ #define ATA_16 0x01 @@ -616,7 +614,6 @@ MODULE_LICENSE("GPL"); /* * TODO: * - CLEANUP - * - Use drivers/ide/ide-timing.h ! * - More checks in the config registers (force values instead of * relying on the BIOS setting them correctly). * - Further optimisations ? diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c index ce84fa045d39..6efbde297174 100644 --- a/drivers/ide/pci/sl82c105.c +++ b/drivers/ide/pci/sl82c105.c @@ -47,10 +47,11 @@ */ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio) { + struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cmd_on, cmd_off; u8 iordy = 0; - cmd_on = (ide_pio_timings[pio].active_time + 29) / 30; + cmd_on = (t->active + 29) / 30; cmd_off = (ide_pio_cycle_time(drive, pio) - 30 * cmd_on + 29) / 30; if (cmd_on == 0) diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index 3ed9728abd24..e47384c70c40 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c @@ -35,8 +35,6 @@ #include #endif -#include "ide-timing.h" - #define VIA_IDE_ENABLE 0x40 #define VIA_IDE_CONFIG 0x41 #define VIA_FIFO_CONFIG 0x43 diff --git a/drivers/ide/ppc/Makefile b/drivers/ide/ppc/Makefile index 65af5848b28c..74e52adcdf4b 100644 --- a/drivers/ide/ppc/Makefile +++ b/drivers/ide/ppc/Makefile @@ -1,3 +1,2 @@ obj-$(CONFIG_BLK_DEV_IDE_PMAC) += pmac.o -obj-$(CONFIG_BLK_DEV_MPC8xx_IDE) += mpc8xx.o diff --git a/drivers/ide/ppc/mpc8xx.c b/drivers/ide/ppc/mpc8xx.c deleted file mode 100644 index 236f9c38e519..000000000000 --- a/drivers/ide/ppc/mpc8xx.c +++ /dev/null @@ -1,851 +0,0 @@ -/* - * Copyright (C) 2000, 2001 Wolfgang Denk, wd@denx.de - * Modified for direct IDE interface - * by Thomas Lange, thomas@corelatus.com - * Modified for direct IDE interface on 8xx without using the PCMCIA - * controller - * by Steven.Scholz@imc-berlin.de - * Moved out of arch/ppc/kernel/m8xx_setup.c, other minor cleanups - * by Mathew Locke - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRV_NAME "ide-mpc8xx" - -static int identify (volatile u8 *p); -static void print_fixed (volatile u8 *p); -static void print_funcid (int func); -static int check_ide_device (unsigned long base); - -static void ide_interrupt_ack (void *dev); -static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio); - -typedef struct ide_ioport_desc { - unsigned long base_off; /* Offset to PCMCIA memory */ - unsigned long reg_off[IDE_NR_PORTS]; /* controller register offsets */ - int irq; /* IRQ */ -} ide_ioport_desc_t; - -ide_ioport_desc_t ioport_dsc[MAX_HWIFS] = { -#ifdef IDE0_BASE_OFFSET - { IDE0_BASE_OFFSET, - { - IDE0_DATA_REG_OFFSET, - IDE0_ERROR_REG_OFFSET, - IDE0_NSECTOR_REG_OFFSET, - IDE0_SECTOR_REG_OFFSET, - IDE0_LCYL_REG_OFFSET, - IDE0_HCYL_REG_OFFSET, - IDE0_SELECT_REG_OFFSET, - IDE0_STATUS_REG_OFFSET, - IDE0_CONTROL_REG_OFFSET, - IDE0_IRQ_REG_OFFSET, - }, - IDE0_INTERRUPT, - }, -#ifdef IDE1_BASE_OFFSET - { IDE1_BASE_OFFSET, - { - IDE1_DATA_REG_OFFSET, - IDE1_ERROR_REG_OFFSET, - IDE1_NSECTOR_REG_OFFSET, - IDE1_SECTOR_REG_OFFSET, - IDE1_LCYL_REG_OFFSET, - IDE1_HCYL_REG_OFFSET, - IDE1_SELECT_REG_OFFSET, - IDE1_STATUS_REG_OFFSET, - IDE1_CONTROL_REG_OFFSET, - IDE1_IRQ_REG_OFFSET, - }, - IDE1_INTERRUPT, - }, -#endif /* IDE1_BASE_OFFSET */ -#endif /* IDE0_BASE_OFFSET */ -}; - -ide_pio_timings_t ide_pio_clocks[6]; -int hold_time[6] = {30, 20, 15, 10, 10, 10 }; /* PIO Mode 5 with IORDY (nonstandard) */ - -/* - * Warning: only 1 (ONE) PCMCIA slot supported here, - * which must be correctly initialized by the firmware (PPCBoot). - */ -static int _slot_ = -1; /* will be read from PCMCIA registers */ - -/* Make clock cycles and always round up */ -#define PCMCIA_MK_CLKS( t, T ) (( (t) * ((T)/1000000) + 999U ) / 1000U ) - -#define M8XX_PCMCIA_CD2(slot) (0x10000000 >> (slot << 4)) -#define M8XX_PCMCIA_CD1(slot) (0x08000000 >> (slot << 4)) - -/* - * The TQM850L hardware has two pins swapped! Grrrrgh! - */ -#ifdef CONFIG_TQM850L -#define __MY_PCMCIA_GCRX_CXRESET PCMCIA_GCRX_CXOE -#define __MY_PCMCIA_GCRX_CXOE PCMCIA_GCRX_CXRESET -#else -#define __MY_PCMCIA_GCRX_CXRESET PCMCIA_GCRX_CXRESET -#define __MY_PCMCIA_GCRX_CXOE PCMCIA_GCRX_CXOE -#endif - -#if defined(CONFIG_BLK_DEV_MPC8xx_IDE) && defined(CONFIG_IDE_8xx_PCCARD) -#define PCMCIA_SCHLVL IDE0_INTERRUPT /* Status Change Interrupt Level */ -static int pcmcia_schlvl = PCMCIA_SCHLVL; -#endif - -/* - * See include/linux/ide.h for definition of hw_regs_t (p, base) - */ - -/* - * m8xx_ide_init_ports() for a direct IDE interface _using_ - * MPC8xx's internal PCMCIA interface - */ -#if defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_DIRECT) -static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port) -{ - unsigned long *p = hw->io_ports_array; - int i; - - typedef struct { - ulong br; - ulong or; - } pcmcia_win_t; - volatile pcmcia_win_t *win; - volatile pcmconf8xx_t *pcmp; - - uint *pgcrx; - u32 pcmcia_phy_base; - u32 pcmcia_phy_end; - static unsigned long pcmcia_base = 0; - unsigned long base; - - *p = 0; - - pcmp = (pcmconf8xx_t *)(&(((immap_t *)IMAP_ADDR)->im_pcmcia)); - - if (!pcmcia_base) { - /* - * Read out PCMCIA registers. Since the reset values - * are undefined, we sure hope that they have been - * set up by firmware - */ - - /* Scan all registers for valid settings */ - pcmcia_phy_base = 0xFFFFFFFF; - pcmcia_phy_end = 0; - /* br0 is start of brX and orX regs */ - win = (pcmcia_win_t *) \ - (&(((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0)); - for (i = 0; i < 8; i++) { - if (win->or & 1) { /* This bank is marked as valid */ - if (win->br < pcmcia_phy_base) { - pcmcia_phy_base = win->br; - } - if ((win->br + PCMCIA_MEM_SIZE) > pcmcia_phy_end) { - pcmcia_phy_end = win->br + PCMCIA_MEM_SIZE; - } - /* Check which slot that has been defined */ - _slot_ = (win->or >> 2) & 1; - - } /* Valid bank */ - win++; - } /* for */ - - printk ("PCMCIA slot %c: phys mem %08x...%08x (size %08x)\n", - 'A' + _slot_, - pcmcia_phy_base, pcmcia_phy_end, - pcmcia_phy_end - pcmcia_phy_base); - - if (!request_mem_region(pcmcia_phy_base, - pcmcia_phy_end - pcmcia_phy_base, - DRV_NAME)) { - printk(KERN_ERR "%s: resources busy\n", DRV_NAME); - return -EBUSY; - } - - pcmcia_base=(unsigned long)ioremap(pcmcia_phy_base, - pcmcia_phy_end-pcmcia_phy_base); - -#ifdef DEBUG - printk ("PCMCIA virt base: %08lx\n", pcmcia_base); -#endif - /* Compute clock cycles for PIO timings */ - for (i=0; i<6; ++i) { - bd_t *binfo = (bd_t *)__res; - - hold_time[i] = - PCMCIA_MK_CLKS (hold_time[i], - binfo->bi_busfreq); - ide_pio_clocks[i].setup_time = - PCMCIA_MK_CLKS (ide_pio_timings[i].setup_time, - binfo->bi_busfreq); - ide_pio_clocks[i].active_time = - PCMCIA_MK_CLKS (ide_pio_timings[i].active_time, - binfo->bi_busfreq); - ide_pio_clocks[i].cycle_time = - PCMCIA_MK_CLKS (ide_pio_timings[i].cycle_time, - binfo->bi_busfreq); -#if 0 - printk ("PIO mode %d timings: %d/%d/%d => %d/%d/%d\n", - i, - ide_pio_clocks[i].setup_time, - ide_pio_clocks[i].active_time, - ide_pio_clocks[i].hold_time, - ide_pio_clocks[i].cycle_time, - ide_pio_timings[i].setup_time, - ide_pio_timings[i].active_time, - ide_pio_timings[i].hold_time, - ide_pio_timings[i].cycle_time); -#endif - } - } - - if (_slot_ == -1) { - printk ("PCMCIA slot has not been defined! Using A as default\n"); - _slot_ = 0; - } - -#ifdef CONFIG_IDE_8xx_PCCARD - -#ifdef DEBUG - printk ("PIPR = 0x%08X slot %c ==> mask = 0x%X\n", - pcmp->pcmc_pipr, - 'A' + _slot_, - M8XX_PCMCIA_CD1(_slot_) | M8XX_PCMCIA_CD2(_slot_) ); -#endif /* DEBUG */ - - if (pcmp->pcmc_pipr & (M8XX_PCMCIA_CD1(_slot_)|M8XX_PCMCIA_CD2(_slot_))) { - printk ("No card in slot %c: PIPR=%08x\n", - 'A' + _slot_, (u32) pcmp->pcmc_pipr); - return -ENODEV; /* No card in slot */ - } - - check_ide_device (pcmcia_base); - -#endif /* CONFIG_IDE_8xx_PCCARD */ - - base = pcmcia_base + ioport_dsc[data_port].base_off; -#ifdef DEBUG - printk ("base: %08x + %08x = %08x\n", - pcmcia_base, ioport_dsc[data_port].base_off, base); -#endif - - for (i = 0; i < IDE_NR_PORTS; ++i) { -#ifdef DEBUG - printk ("port[%d]: %08x + %08x = %08x\n", - i, - base, - ioport_dsc[data_port].reg_off[i], - i, base + ioport_dsc[data_port].reg_off[i]); -#endif - *p++ = base + ioport_dsc[data_port].reg_off[i]; - } - - hw->irq = ioport_dsc[data_port].irq; - hw->ack_intr = (ide_ack_intr_t *)ide_interrupt_ack; - -#ifdef CONFIG_IDE_8xx_PCCARD - { - unsigned int reg; - - if (_slot_) - pgcrx = &((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pgcrb; - else - pgcrx = &((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pgcra; - - reg = *pgcrx; - reg |= mk_int_int_mask (pcmcia_schlvl) << 24; - reg |= mk_int_int_mask (pcmcia_schlvl) << 16; - *pgcrx = reg; - } -#endif /* CONFIG_IDE_8xx_PCCARD */ - - /* Enable Harddisk Interrupt, - * and make it edge sensitive - */ - /* (11-18) Set edge detect for irq, no wakeup from low power mode */ - ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel |= - (0x80000000 >> ioport_dsc[data_port].irq); - -#ifdef CONFIG_IDE_8xx_PCCARD - /* Make sure we don't get garbage irq */ - ((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pscr = 0xFFFF; - - /* Enable falling edge irq */ - pcmp->pcmc_per = 0x100000 >> (16 * _slot_); -#endif /* CONFIG_IDE_8xx_PCCARD */ - - hw->chipset = ide_generic; - - return 0; -} -#endif /* CONFIG_IDE_8xx_PCCARD || CONFIG_IDE_8xx_DIRECT */ - -/* - * m8xx_ide_init_ports() for a direct IDE interface _not_ using - * MPC8xx's internal PCMCIA interface - */ -#if defined(CONFIG_IDE_EXT_DIRECT) -static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port) -{ - unsigned long *p = hw->io_ports_array; - int i; - - u32 ide_phy_base; - u32 ide_phy_end; - static unsigned long ide_base = 0; - unsigned long base; - - *p = 0; - - if (!ide_base) { - - /* TODO: - * - add code to read ORx, BRx - */ - ide_phy_base = CFG_ATA_BASE_ADDR; - ide_phy_end = CFG_ATA_BASE_ADDR + 0x200; - - printk ("IDE phys mem : %08x...%08x (size %08x)\n", - ide_phy_base, ide_phy_end, - ide_phy_end - ide_phy_base); - - if (!request_mem_region(ide_phy_base, 0x200, DRV_NAME)) { - printk(KERN_ERR "%s: resources busy\n", DRV_NAME); - return -EBUSY; - } - - ide_base=(unsigned long)ioremap(ide_phy_base, - ide_phy_end-ide_phy_base); - -#ifdef DEBUG - printk ("IDE virt base: %08lx\n", ide_base); -#endif - } - - base = ide_base + ioport_dsc[data_port].base_off; -#ifdef DEBUG - printk ("base: %08x + %08x = %08x\n", - ide_base, ioport_dsc[data_port].base_off, base); -#endif - - for (i = 0; i < IDE_NR_PORTS; ++i) { -#ifdef DEBUG - printk ("port[%d]: %08x + %08x = %08x\n", - i, - base, - ioport_dsc[data_port].reg_off[i], - i, base + ioport_dsc[data_port].reg_off[i]); -#endif - *p++ = base + ioport_dsc[data_port].reg_off[i]; - } - - /* direct connected IDE drive, i.e. external IRQ */ - hw->irq = ioport_dsc[data_port].irq; - hw->ack_intr = (ide_ack_intr_t *)ide_interrupt_ack; - - /* Enable Harddisk Interrupt, - * and make it edge sensitive - */ - /* (11-18) Set edge detect for irq, no wakeup from low power mode */ - ((immap_t *) IMAP_ADDR)->im_siu_conf.sc_siel |= - (0x80000000 >> ioport_dsc[data_port].irq); - - hw->chipset = ide_generic; - - return 0; -} -#endif /* CONFIG_IDE_8xx_DIRECT */ - - -/* -------------------------------------------------------------------- */ - - -/* PCMCIA Timing */ -#ifndef PCMCIA_SHT -#define PCMCIA_SHT(t) ((t & 0x0F)<<16) /* Strobe Hold Time */ -#define PCMCIA_SST(t) ((t & 0x0F)<<12) /* Strobe Setup Time */ -#define PCMCIA_SL(t) ((t==32) ? 0 : ((t & 0x1F)<<7)) /* Strobe Length */ -#endif - -/* Calculate PIO timings */ -static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) -{ -#if defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_DIRECT) - volatile pcmconf8xx_t *pcmp; - ulong timing, mask, reg; - - pcmp = (pcmconf8xx_t *)(&(((immap_t *)IMAP_ADDR)->im_pcmcia)); - - mask = ~(PCMCIA_SHT(0xFF) | PCMCIA_SST(0xFF) | PCMCIA_SL(0xFF)); - - timing = PCMCIA_SHT(hold_time[pio] ) - | PCMCIA_SST(ide_pio_clocks[pio].setup_time ) - | PCMCIA_SL (ide_pio_clocks[pio].active_time) - ; - -#if 1 - printk ("Setting timing bits 0x%08lx in PCMCIA controller\n", timing); -#endif - if ((reg = pcmp->pcmc_por0 & mask) != 0) - pcmp->pcmc_por0 = reg | timing; - - if ((reg = pcmp->pcmc_por1 & mask) != 0) - pcmp->pcmc_por1 = reg | timing; - - if ((reg = pcmp->pcmc_por2 & mask) != 0) - pcmp->pcmc_por2 = reg | timing; - - if ((reg = pcmp->pcmc_por3 & mask) != 0) - pcmp->pcmc_por3 = reg | timing; - - if ((reg = pcmp->pcmc_por4 & mask) != 0) - pcmp->pcmc_por4 = reg | timing; - - if ((reg = pcmp->pcmc_por5 & mask) != 0) - pcmp->pcmc_por5 = reg | timing; - - if ((reg = pcmp->pcmc_por6 & mask) != 0) - pcmp->pcmc_por6 = reg | timing; - - if ((reg = pcmp->pcmc_por7 & mask) != 0) - pcmp->pcmc_por7 = reg | timing; - -#elif defined(CONFIG_IDE_EXT_DIRECT) - - printk("%s[%d] %s: not implemented yet!\n", - __FILE__, __LINE__, __func__); -#endif /* defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_PCMCIA */ -} - -static const struct ide_port_ops m8xx_port_ops = { - .set_pio_mode = m8xx_ide_set_pio_mode, -}; - -static void -ide_interrupt_ack (void *dev) -{ -#ifdef CONFIG_IDE_8xx_PCCARD - u_int pscr, pipr; - -#if (PCMCIA_SOCKETS_NO == 2) - u_int _slot_; -#endif - - /* get interrupt sources */ - - pscr = ((volatile immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr; - pipr = ((volatile immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pipr; - - /* - * report only if both card detect signals are the same - * not too nice done, - * we depend on that CD2 is the bit to the left of CD1... - */ - - if(_slot_==-1){ - printk("PCMCIA slot has not been defined! Using A as default\n"); - _slot_=0; - } - - if(((pipr & M8XX_PCMCIA_CD2(_slot_)) >> 1) ^ - (pipr & M8XX_PCMCIA_CD1(_slot_)) ) { - printk ("card detect interrupt\n"); - } - /* clear the interrupt sources */ - ((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr = pscr; - -#else /* ! CONFIG_IDE_8xx_PCCARD */ - /* - * Only CONFIG_IDE_8xx_PCCARD is using the interrupt of the - * MPC8xx's PCMCIA controller, so there is nothing to be done here - * for CONFIG_IDE_8xx_DIRECT and CONFIG_IDE_EXT_DIRECT. - * The interrupt is handled somewhere else. -- Steven - */ -#endif /* CONFIG_IDE_8xx_PCCARD */ -} - - - -/* - * CIS Tupel codes - */ -#define CISTPL_NULL 0x00 -#define CISTPL_DEVICE 0x01 -#define CISTPL_LONGLINK_CB 0x02 -#define CISTPL_INDIRECT 0x03 -#define CISTPL_CONFIG_CB 0x04 -#define CISTPL_CFTABLE_ENTRY_CB 0x05 -#define CISTPL_LONGLINK_MFC 0x06 -#define CISTPL_BAR 0x07 -#define CISTPL_PWR_MGMNT 0x08 -#define CISTPL_EXTDEVICE 0x09 -#define CISTPL_CHECKSUM 0x10 -#define CISTPL_LONGLINK_A 0x11 -#define CISTPL_LONGLINK_C 0x12 -#define CISTPL_LINKTARGET 0x13 -#define CISTPL_NO_LINK 0x14 -#define CISTPL_VERS_1 0x15 -#define CISTPL_ALTSTR 0x16 -#define CISTPL_DEVICE_A 0x17 -#define CISTPL_JEDEC_C 0x18 -#define CISTPL_JEDEC_A 0x19 -#define CISTPL_CONFIG 0x1a -#define CISTPL_CFTABLE_ENTRY 0x1b -#define CISTPL_DEVICE_OC 0x1c -#define CISTPL_DEVICE_OA 0x1d -#define CISTPL_DEVICE_GEO 0x1e -#define CISTPL_DEVICE_GEO_A 0x1f -#define CISTPL_MANFID 0x20 -#define CISTPL_FUNCID 0x21 -#define CISTPL_FUNCE 0x22 -#define CISTPL_SWIL 0x23 -#define CISTPL_END 0xff - -/* - * CIS Function ID codes - */ -#define CISTPL_FUNCID_MULTI 0x00 -#define CISTPL_FUNCID_MEMORY 0x01 -#define CISTPL_FUNCID_SERIAL 0x02 -#define CISTPL_FUNCID_PARALLEL 0x03 -#define CISTPL_FUNCID_FIXED 0x04 -#define CISTPL_FUNCID_VIDEO 0x05 -#define CISTPL_FUNCID_NETWORK 0x06 -#define CISTPL_FUNCID_AIMS 0x07 -#define CISTPL_FUNCID_SCSI 0x08 - -/* - * Fixed Disk FUNCE codes - */ -#define CISTPL_IDE_INTERFACE 0x01 - -#define CISTPL_FUNCE_IDE_IFACE 0x01 -#define CISTPL_FUNCE_IDE_MASTER 0x02 -#define CISTPL_FUNCE_IDE_SLAVE 0x03 - -/* First feature byte */ -#define CISTPL_IDE_SILICON 0x04 -#define CISTPL_IDE_UNIQUE 0x08 -#define CISTPL_IDE_DUAL 0x10 - -/* Second feature byte */ -#define CISTPL_IDE_HAS_SLEEP 0x01 -#define CISTPL_IDE_HAS_STANDBY 0x02 -#define CISTPL_IDE_HAS_IDLE 0x04 -#define CISTPL_IDE_LOW_POWER 0x08 -#define CISTPL_IDE_REG_INHIBIT 0x10 -#define CISTPL_IDE_HAS_INDEX 0x20 -#define CISTPL_IDE_IOIS16 0x40 - - -/* -------------------------------------------------------------------- */ - - -#define MAX_TUPEL_SZ 512 -#define MAX_FEATURES 4 - -static int check_ide_device (unsigned long base) -{ - volatile u8 *ident = NULL; - volatile u8 *feature_p[MAX_FEATURES]; - volatile u8 *p, *start; - int n_features = 0; - u8 func_id = ~0; - u8 code, len; - unsigned short config_base = 0; - int found = 0; - int i; - -#ifdef DEBUG - printk ("PCMCIA MEM: %08lX\n", base); -#endif - start = p = (volatile u8 *) base; - - while ((p - start) < MAX_TUPEL_SZ) { - - code = *p; p += 2; - - if (code == 0xFF) { /* End of chain */ - break; - } - - len = *p; p += 2; -#ifdef DEBUG_PCMCIA - { volatile u8 *q = p; - printk ("\nTuple code %02x length %d\n\tData:", - code, len); - - for (i = 0; i < len; ++i) { - printk (" %02x", *q); - q+= 2; - } - } -#endif /* DEBUG_PCMCIA */ - switch (code) { - case CISTPL_VERS_1: - ident = p + 4; - break; - case CISTPL_FUNCID: - func_id = *p; - break; - case CISTPL_FUNCE: - if (n_features < MAX_FEATURES) - feature_p[n_features++] = p; - break; - case CISTPL_CONFIG: - config_base = (*(p+6) << 8) + (*(p+4)); - default: - break; - } - p += 2 * len; - } - - found = identify (ident); - - if (func_id != ((u8)~0)) { - print_funcid (func_id); - - if (func_id == CISTPL_FUNCID_FIXED) - found = 1; - else - return (1); /* no disk drive */ - } - - for (i=0; i id_str) { - if (*t == ' ') - *t = '\0'; - else - break; - } - printk ("Card ID: %s\n", id_str); - - for (card=known_cards; *card; ++card) { - if (strcmp(*card, id_str) == 0) { /* found! */ - return (1); - } - } - - return (0); /* don't know */ -} - -static int __init mpc8xx_ide_probe(void) -{ - hw_regs_t hw; - u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; - -#ifdef IDE0_BASE_OFFSET - memset(&hw, 0, sizeof(hw)); - if (!m8xx_ide_init_ports(&hw, 0)) { - ide_hwif_t *hwif = ide_find_port(); - - if (hwif) { - ide_init_port_hw(hwif, &hw); - hwif->pio_mask = ATA_PIO4; - hwif->port_ops = &m8xx_port_ops; - - idx[0] = hwif->index; - } - } -#ifdef IDE1_BASE_OFFSET - memset(&hw, 0, sizeof(hw)); - if (!m8xx_ide_init_ports(&hw, 1)) { - ide_hwif_t *mate = ide_find_port(); - - if (mate) { - ide_init_port_hw(mate, &hw); - mate->pio_mask = ATA_PIO4; - mate->port_ops = &m8xx_port_ops; - - idx[1] = mate->index; - } - } -#endif -#endif - - ide_device_add(idx, NULL); - - return 0; -} - -module_init(mpc8xx_ide_probe); - -MODULE_LICENSE("GPL"); diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index dcb2c466bb97..93fb9067c043 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -5,7 +5,7 @@ * for doing DMA. * * Copyright (C) 1998-2003 Paul Mackerras & Ben. Herrenschmidt - * Copyright (C) 2007 Bartlomiej Zolnierkiewicz + * Copyright (C) 2007-2008 Bartlomiej Zolnierkiewicz * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -48,8 +48,6 @@ #include #endif -#include "../ide-timing.h" - #undef IDE_PMAC_DEBUG #define DMA_WAIT_TIMEOUT 50 @@ -495,6 +493,7 @@ static void pmac_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port) static void pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) { + struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio); u32 *timings, t; unsigned accessTicks, recTicks; unsigned accessTime, recTime; @@ -526,10 +525,9 @@ pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) } case controller_kl_ata4: /* 66Mhz cell */ - recTime = cycle_time - ide_pio_timings[pio].active_time - - ide_pio_timings[pio].setup_time; + recTime = cycle_time - tim->active - tim->setup; recTime = max(recTime, 150U); - accessTime = ide_pio_timings[pio].active_time; + accessTime = tim->active; accessTime = max(accessTime, 150U); accessTicks = SYSCLK_TICKS_66(accessTime); accessTicks = min(accessTicks, 0x1fU); @@ -542,10 +540,9 @@ pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) default: { /* 33Mhz cell */ int ebit = 0; - recTime = cycle_time - ide_pio_timings[pio].active_time - - ide_pio_timings[pio].setup_time; + recTime = cycle_time - tim->active - tim->setup; recTime = max(recTime, 150U); - accessTime = ide_pio_timings[pio].active_time; + accessTime = tim->active; accessTime = max(accessTime, 150U); accessTicks = SYSCLK_TICKS(accessTime); accessTicks = min(accessTicks, 0x1fU); @@ -1151,8 +1148,6 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) base = ioremap(macio_resource_start(mdev, 0), 0x400); regbase = (unsigned long) base; - hwif->dev = &mdev->bus->pdev->dev; - pmif->mdev = mdev; pmif->node = mdev->ofdev.node; pmif->regbase = regbase; @@ -1174,7 +1169,8 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) memset(&hw, 0, sizeof(hw)); pmac_ide_init_ports(&hw, pmif->regbase); hw.irq = irq; - hw.dev = &mdev->ofdev.dev; + hw.dev = &mdev->bus->pdev->dev; + hw.parent = &mdev->ofdev.dev; rc = pmac_ide_setup_device(pmif, hwif, &hw); if (rc != 0) { @@ -1274,7 +1270,6 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id) goto out_free_pmif; } - hwif->dev = &pdev->dev; pmif->mdev = NULL; pmif->node = np; diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index abcfb1739d4d..65fc08b6b6d0 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -6,19 +6,15 @@ * May be copied or modified under the terms of the GNU General Public License */ -#include #include #include #include #include -#include -#include #include #include #include #include -#include /** * ide_setup_pci_baseregs - place a PCI IDE controller native @@ -319,25 +315,22 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ctl = pci_resource_start(dev, 2*port+1); base = pci_resource_start(dev, 2*port); - if ((ctl && !base) || (base && !ctl)) { - printk(KERN_ERR "%s: inconsistent baseregs (BIOS) " - "for port %d, skipping\n", d->name, port); - return NULL; - } - } - if (!ctl) { + } else { /* Use default values */ ctl = port ? 0x374 : 0x3f4; base = port ? 0x170 : 0x1f0; } - hwif = ide_find_port_slot(d); - if (hwif == NULL) { - printk(KERN_ERR "%s: too many IDE interfaces, no room in " - "table\n", d->name); + if (!base || !ctl) { + printk(KERN_ERR "%s: bad PCI BARs for port %d, skipping\n", + d->name, port); return NULL; } + hwif = ide_find_port_slot(d); + if (hwif == NULL) + return NULL; + memset(&hw, 0, sizeof(hw)); hw.irq = irq; hw.dev = &dev->dev; @@ -346,8 +339,6 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ide_init_port_hw(hwif, &hw); - hwif->dev = &dev->dev; - return hwif; } diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 636af2862308..1921b8dbb242 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -179,17 +179,29 @@ config FUJITSU_LAPTOP tristate "Fujitsu Laptop Extras" depends on X86 depends on ACPI + depends on INPUT depends on BACKLIGHT_CLASS_DEVICE ---help--- This is a driver for laptops built by Fujitsu: * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks * Possibly other Fujitsu laptop models + * Tested with S6410 and S7020 - It adds support for LCD brightness control. + It adds support for LCD brightness control and some hotkeys. If you have a Fujitsu laptop, say Y or M here. +config FUJITSU_LAPTOP_DEBUG + bool "Verbose debug mode for Fujitsu Laptop Extras" + depends on FUJITSU_LAPTOP + default n + ---help--- + Enables extra debug output from the fujitsu extras driver, at the + expense of a slight increase in driver size. + + If you are not sure, say N here. + config TC1100_WMI tristate "HP Compaq TC1100 Tablet WMI Extras (EXPERIMENTAL)" depends on X86 && !X86_64 @@ -219,6 +231,23 @@ config MSI_LAPTOP If you have an MSI S270 laptop, say Y or M here. +config COMPAL_LAPTOP + tristate "Compal Laptop Extras" + depends on X86 + depends on ACPI_EC + depends on BACKLIGHT_CLASS_DEVICE + ---help--- + This is a driver for laptops built by Compal: + + Compal FL90/IFL90 + Compal FL91/IFL91 + Compal FL92/JFL92 + Compal FT00/IFT00 + + It adds support for Bluetooth, WLAN and LCD brightness control. + + If you have an Compal FL9x/IFL9x/FT00 laptop, say Y or M here. + config SONY_LAPTOP tristate "Sony Laptop Extras" depends on X86 && ACPI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 1952875a272e..a6dac6a2e7e5 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -5,10 +5,11 @@ obj- := misc.o # Dummy rule to force built-in.o to be made obj-$(CONFIG_IBM_ASM) += ibmasm/ obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/ -obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o -obj-$(CONFIG_ACER_WMI) += acer-wmi.o obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o +obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o +obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o +obj-$(CONFIG_ACER_WMI) += acer-wmi.o obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c index dd13a3749927..e7a3fe508dff 100644 --- a/drivers/misc/acer-wmi.c +++ b/drivers/misc/acer-wmi.c @@ -22,18 +22,18 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define ACER_WMI_VERSION "0.1" - #include #include #include #include #include +#include #include #include #include #include #include +#include #include @@ -87,6 +87,7 @@ struct acer_quirks { * Acer ACPI method GUIDs */ #define AMW0_GUID1 "67C3371D-95A3-4C37-BB61-DD47B491DAAB" +#define AMW0_GUID2 "431F16ED-0C2B-444C-B267-27DEB140CF9C" #define WMID_GUID1 "6AF4F258-B401-42fd-BE91-3D4AC2D7C0D3" #define WMID_GUID2 "95764E09-FB56-4e83-B31A-37761F60994A" @@ -150,6 +151,12 @@ struct acer_data { int brightness; }; +struct acer_debug { + struct dentry *root; + struct dentry *devices; + u32 wmid_devices; +}; + /* Each low-level interface must define at least some of the following */ struct wmi_interface { /* The WMI device type */ @@ -160,6 +167,9 @@ struct wmi_interface { /* Private data for the current interface */ struct acer_data data; + + /* debugfs entries associated with this interface */ + struct acer_debug debug; }; /* The static interface pointer, points to the currently detected interface */ @@ -174,7 +184,7 @@ static struct wmi_interface *interface; struct quirk_entry { u8 wireless; u8 mailled; - u8 brightness; + s8 brightness; u8 bluetooth; }; @@ -198,6 +208,10 @@ static int dmi_matched(const struct dmi_system_id *dmi) static struct quirk_entry quirk_unknown = { }; +static struct quirk_entry quirk_acer_aspire_1520 = { + .brightness = -1, +}; + static struct quirk_entry quirk_acer_travelmate_2490 = { .mailled = 1, }; @@ -207,7 +221,29 @@ static struct quirk_entry quirk_medion_md_98300 = { .wireless = 1, }; +static struct quirk_entry quirk_fujitsu_amilo_li_1718 = { + .wireless = 2, +}; + static struct dmi_system_id acer_quirks[] = { + { + .callback = dmi_matched, + .ident = "Acer Aspire 1360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"), + }, + .driver_data = &quirk_acer_aspire_1520, + }, + { + .callback = dmi_matched, + .ident = "Acer Aspire 1520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1520"), + }, + .driver_data = &quirk_acer_aspire_1520, + }, { .callback = dmi_matched, .ident = "Acer Aspire 3100", @@ -298,6 +334,15 @@ static struct dmi_system_id acer_quirks[] = { }, .driver_data = &quirk_acer_travelmate_2490, }, + { + .callback = dmi_matched, + .ident = "Fujitsu Siemens Amilo Li 1718", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Li 1718"), + }, + .driver_data = &quirk_fujitsu_amilo_li_1718, + }, { .callback = dmi_matched, .ident = "Medion MD 98300", @@ -393,6 +438,12 @@ struct wmi_interface *iface) return AE_ERROR; *value = result & 0x1; return AE_OK; + case 2: + err = ec_read(0x71, &result); + if (err) + return AE_ERROR; + *value = result & 0x1; + return AE_OK; default: err = ec_read(0xA, &result); if (err) @@ -506,6 +557,15 @@ static acpi_status AMW0_set_capabilities(void) struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; + /* + * On laptops with this strange GUID (non Acer), normal probing doesn't + * work. + */ + if (wmi_has_guid(AMW0_GUID2)) { + interface->capability |= ACER_CAP_WIRELESS; + return AE_OK; + } + args.eax = ACER_AMW0_WRITE; args.ecx = args.edx = 0; @@ -552,7 +612,8 @@ static acpi_status AMW0_set_capabilities(void) * appear to use the same EC register for brightness, even if they * differ for wireless, etc */ - interface->capability |= ACER_CAP_BRIGHTNESS; + if (quirks->brightness >= 0) + interface->capability |= ACER_CAP_BRIGHTNESS; return AE_OK; } @@ -807,7 +868,15 @@ static int read_brightness(struct backlight_device *bd) static int update_bl_status(struct backlight_device *bd) { - set_u32(bd->props.brightness, ACER_CAP_BRIGHTNESS); + int intensity = bd->props.brightness; + + if (bd->props.power != FB_BLANK_UNBLANK) + intensity = 0; + if (bd->props.fb_blank != FB_BLANK_UNBLANK) + intensity = 0; + + set_u32(intensity, ACER_CAP_BRIGHTNESS); + return 0; } @@ -829,8 +898,9 @@ static int __devinit acer_backlight_init(struct device *dev) acer_backlight_device = bd; + bd->props.power = FB_BLANK_UNBLANK; + bd->props.brightness = max_brightness; bd->props.max_brightness = max_brightness; - bd->props.brightness = read_brightness(NULL); backlight_update_status(bd); return 0; } @@ -893,6 +963,28 @@ static ssize_t show_interface(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(interface, S_IWUGO | S_IRUGO | S_IWUSR, show_interface, NULL); +/* + * debugfs functions + */ +static u32 get_wmid_devices(void) +{ + struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + acpi_status status; + + status = wmi_query_block(WMID_GUID2, 1, &out); + if (ACPI_FAILURE(status)) + return 0; + + obj = (union acpi_object *) out.pointer; + if (obj && obj->type == ACPI_TYPE_BUFFER && + obj->buffer.length == sizeof(u32)) { + return *((u32 *) obj->buffer.pointer); + } else { + return 0; + } +} + /* * Platform device */ @@ -1052,12 +1144,40 @@ error_sysfs: return retval; } +static void remove_debugfs(void) +{ + debugfs_remove(interface->debug.devices); + debugfs_remove(interface->debug.root); +} + +static int create_debugfs(void) +{ + interface->debug.root = debugfs_create_dir("acer-wmi", NULL); + if (!interface->debug.root) { + printk(ACER_ERR "Failed to create debugfs directory"); + return -ENOMEM; + } + + interface->debug.devices = debugfs_create_u32("devices", S_IRUGO, + interface->debug.root, + &interface->debug.wmid_devices); + if (!interface->debug.devices) + goto error_debugfs; + + return 0; + +error_debugfs: + remove_debugfs(); + return -ENOMEM; +} + static int __init acer_wmi_init(void) { int err; - printk(ACER_INFO "Acer Laptop ACPI-WMI Extras version %s\n", - ACER_WMI_VERSION); + printk(ACER_INFO "Acer Laptop ACPI-WMI Extras\n"); + + find_quirks(); /* * Detect which ACPI-WMI interface we're using. @@ -1092,8 +1212,6 @@ static int __init acer_wmi_init(void) if (wmi_has_guid(AMW0_GUID1)) AMW0_find_mailled(); - find_quirks(); - if (!interface) { printk(ACER_ERR "No or unsupported WMI interface, unable to " "load\n"); @@ -1111,6 +1229,13 @@ static int __init acer_wmi_init(void) if (err) return err; + if (wmi_has_guid(WMID_GUID2)) { + interface->debug.wmid_devices = get_wmid_devices(); + err = create_debugfs(); + if (err) + return err; + } + /* Override any initial settings with values from the commandline */ acer_commandline_init(); diff --git a/drivers/misc/compal-laptop.c b/drivers/misc/compal-laptop.c new file mode 100644 index 000000000000..344b790a6253 --- /dev/null +++ b/drivers/misc/compal-laptop.c @@ -0,0 +1,404 @@ +/*-*-linux-c-*-*/ + +/* + Copyright (C) 2008 Cezary Jackiewicz + + based on MSI driver + + Copyright (C) 2006 Lennart Poettering + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + */ + +/* + * comapl-laptop.c - Compal laptop support. + * + * This driver exports a few files in /sys/devices/platform/compal-laptop/: + * + * wlan - wlan subsystem state: contains 0 or 1 (rw) + * + * bluetooth - Bluetooth subsystem state: contains 0 or 1 (rw) + * + * raw - raw value taken from embedded controller register (ro) + * + * In addition to these platform device attributes the driver + * registers itself in the Linux backlight control subsystem and is + * available to userspace under /sys/class/backlight/compal-laptop/. + * + * This driver might work on other laptops produced by Compal. If you + * want to try it you can pass force=1 as argument to the module which + * will force it to load even when the DMI data doesn't identify the + * laptop as FL9x. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define COMPAL_DRIVER_VERSION "0.2.6" + +#define COMPAL_LCD_LEVEL_MAX 8 + +#define COMPAL_EC_COMMAND_WIRELESS 0xBB +#define COMPAL_EC_COMMAND_LCD_LEVEL 0xB9 + +#define KILLSWITCH_MASK 0x10 +#define WLAN_MASK 0x01 +#define BT_MASK 0x02 + +static int force; +module_param(force, bool, 0); +MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); + +/* Hardware access */ + +static int set_lcd_level(int level) +{ + if (level < 0 || level >= COMPAL_LCD_LEVEL_MAX) + return -EINVAL; + + ec_write(COMPAL_EC_COMMAND_LCD_LEVEL, level); + + return 0; +} + +static int get_lcd_level(void) +{ + u8 result; + + ec_read(COMPAL_EC_COMMAND_LCD_LEVEL, &result); + + return (int) result; +} + +static int set_wlan_state(int state) +{ + u8 result, value; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + if ((result & KILLSWITCH_MASK) == 0) + return -EINVAL; + else { + if (state) + value = (u8) (result | WLAN_MASK); + else + value = (u8) (result & ~WLAN_MASK); + ec_write(COMPAL_EC_COMMAND_WIRELESS, value); + } + + return 0; +} + +static int set_bluetooth_state(int state) +{ + u8 result, value; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + if ((result & KILLSWITCH_MASK) == 0) + return -EINVAL; + else { + if (state) + value = (u8) (result | BT_MASK); + else + value = (u8) (result & ~BT_MASK); + ec_write(COMPAL_EC_COMMAND_WIRELESS, value); + } + + return 0; +} + +static int get_wireless_state(int *wlan, int *bluetooth) +{ + u8 result; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + if (wlan) { + if ((result & KILLSWITCH_MASK) == 0) + *wlan = 0; + else + *wlan = result & WLAN_MASK; + } + + if (bluetooth) { + if ((result & KILLSWITCH_MASK) == 0) + *bluetooth = 0; + else + *bluetooth = (result & BT_MASK) >> 1; + } + + return 0; +} + +/* Backlight device stuff */ + +static int bl_get_brightness(struct backlight_device *b) +{ + return get_lcd_level(); +} + + +static int bl_update_status(struct backlight_device *b) +{ + return set_lcd_level(b->props.brightness); +} + +static struct backlight_ops compalbl_ops = { + .get_brightness = bl_get_brightness, + .update_status = bl_update_status, +}; + +static struct backlight_device *compalbl_device; + +/* Platform device */ + +static ssize_t show_wlan(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret, enabled; + + ret = get_wireless_state(&enabled, NULL); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", enabled); +} + +static ssize_t show_raw(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 result; + + ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + + return sprintf(buf, "%i\n", result); +} + +static ssize_t show_bluetooth(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret, enabled; + + ret = get_wireless_state(NULL, &enabled); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", enabled); +} + +static ssize_t store_wlan_state(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int state, ret; + + if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1)) + return -EINVAL; + + ret = set_wlan_state(state); + if (ret < 0) + return ret; + + return count; +} + +static ssize_t store_bluetooth_state(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int state, ret; + + if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1)) + return -EINVAL; + + ret = set_bluetooth_state(state); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(bluetooth, 0644, show_bluetooth, store_bluetooth_state); +static DEVICE_ATTR(wlan, 0644, show_wlan, store_wlan_state); +static DEVICE_ATTR(raw, 0444, show_raw, NULL); + +static struct attribute *compal_attributes[] = { + &dev_attr_bluetooth.attr, + &dev_attr_wlan.attr, + &dev_attr_raw.attr, + NULL +}; + +static struct attribute_group compal_attribute_group = { + .attrs = compal_attributes +}; + +static struct platform_driver compal_driver = { + .driver = { + .name = "compal-laptop", + .owner = THIS_MODULE, + } +}; + +static struct platform_device *compal_device; + +/* Initialization */ + +static int dmi_check_cb(const struct dmi_system_id *id) +{ + printk(KERN_INFO "compal-laptop: Identified laptop model '%s'.\n", + id->ident); + + return 0; +} + +static struct dmi_system_id __initdata compal_dmi_table[] = { + { + .ident = "FL90/IFL90", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFL90"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FL90/IFL90", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFL90"), + DMI_MATCH(DMI_BOARD_VERSION, "REFERENCE"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FL91/IFL91", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFL91"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FL92/JFL92", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "JFL92"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { + .ident = "FT00/IFT00", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "IFT00"), + DMI_MATCH(DMI_BOARD_VERSION, "IFT00"), + }, + .callback = dmi_check_cb + }, + { } +}; + +static int __init compal_init(void) +{ + int ret; + + if (acpi_disabled) + return -ENODEV; + + if (!force && !dmi_check_system(compal_dmi_table)) + return -ENODEV; + + /* Register backlight stuff */ + + compalbl_device = backlight_device_register("compal-laptop", NULL, NULL, + &compalbl_ops); + if (IS_ERR(compalbl_device)) + return PTR_ERR(compalbl_device); + + compalbl_device->props.max_brightness = COMPAL_LCD_LEVEL_MAX-1; + + ret = platform_driver_register(&compal_driver); + if (ret) + goto fail_backlight; + + /* Register platform stuff */ + + compal_device = platform_device_alloc("compal-laptop", -1); + if (!compal_device) { + ret = -ENOMEM; + goto fail_platform_driver; + } + + ret = platform_device_add(compal_device); + if (ret) + goto fail_platform_device1; + + ret = sysfs_create_group(&compal_device->dev.kobj, + &compal_attribute_group); + if (ret) + goto fail_platform_device2; + + printk(KERN_INFO "compal-laptop: driver "COMPAL_DRIVER_VERSION + " successfully loaded.\n"); + + return 0; + +fail_platform_device2: + + platform_device_del(compal_device); + +fail_platform_device1: + + platform_device_put(compal_device); + +fail_platform_driver: + + platform_driver_unregister(&compal_driver); + +fail_backlight: + + backlight_device_unregister(compalbl_device); + + return ret; +} + +static void __exit compal_cleanup(void) +{ + + sysfs_remove_group(&compal_device->dev.kobj, &compal_attribute_group); + platform_device_unregister(compal_device); + platform_driver_unregister(&compal_driver); + backlight_device_unregister(compalbl_device); + + printk(KERN_INFO "compal-laptop: driver unloaded.\n"); +} + +module_init(compal_init); +module_exit(compal_cleanup); + +MODULE_AUTHOR("Cezary Jackiewicz"); +MODULE_DESCRIPTION("Compal Laptop Support"); +MODULE_VERSION(COMPAL_DRIVER_VERSION); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("dmi:*:rnIFL90:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:rnIFL90:rvrREFERENCE:*"); +MODULE_ALIAS("dmi:*:rnIFL91:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:rnJFL92:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:rnIFT00:rvrIFT00:*"); diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c index 6d727609097f..9e8d79e7e9f4 100644 --- a/drivers/misc/eeepc-laptop.c +++ b/drivers/misc/eeepc-laptop.c @@ -87,7 +87,7 @@ enum { CM_ASL_LID }; -const char *cm_getv[] = { +static const char *cm_getv[] = { "WLDG", NULL, NULL, NULL, "CAMG", NULL, NULL, NULL, NULL, "PBLG", NULL, NULL, @@ -96,7 +96,7 @@ const char *cm_getv[] = { "CRDG", "LIDG" }; -const char *cm_setv[] = { +static const char *cm_setv[] = { "WLDS", NULL, NULL, NULL, "CAMS", NULL, NULL, NULL, "SDSP", "PBLS", "HDPS", NULL, diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/misc/fujitsu-laptop.c index 6d14e8fe1537..7a1ef6c262de 100644 --- a/drivers/misc/fujitsu-laptop.c +++ b/drivers/misc/fujitsu-laptop.c @@ -1,12 +1,14 @@ /*-*-linux-c-*-*/ /* - Copyright (C) 2007 Jonathan Woithe + Copyright (C) 2007,2008 Jonathan Woithe + Copyright (C) 2008 Peter Gruber Based on earlier work: Copyright (C) 2003 Shane Spencer Adrian Yee - Templated from msi-laptop.c which is copyright by its respective authors. + Templated from msi-laptop.c and thinkpad_acpi.c which is copyright + by its respective authors. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,8 +41,17 @@ * registers itself in the Linux backlight control subsystem and is * available to userspace under /sys/class/backlight/fujitsu-laptop/. * - * This driver has been tested on a Fujitsu Lifebook S7020. It should - * work on most P-series and S-series Lifebooks, but YMMV. + * Hotkeys present on certain Fujitsu laptops (eg: the S6xxx series) are + * also supported by this driver. + * + * This driver has been tested on a Fujitsu Lifebook S6410 and S7020. It + * should work on most P-series and S-series Lifebooks, but YMMV. + * + * The module parameter use_alt_lcd_levels switches between different ACPI + * brightness controls which are used by different Fujitsu laptops. In most + * cases the correct method is automatically detected. "use_alt_lcd_levels=1" + * is applicable for a Fujitsu Lifebook S6410 if autodetection fails. + * */ #include @@ -49,30 +60,105 @@ #include #include #include +#include +#include +#include #include -#define FUJITSU_DRIVER_VERSION "0.3" +#define FUJITSU_DRIVER_VERSION "0.4.2" #define FUJITSU_LCD_N_LEVELS 8 #define ACPI_FUJITSU_CLASS "fujitsu" #define ACPI_FUJITSU_HID "FUJ02B1" -#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI extras driver" +#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" #define ACPI_FUJITSU_DEVICE_NAME "Fujitsu FUJ02B1" - +#define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3" +#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" +#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3" + +#define ACPI_FUJITSU_NOTIFY_CODE1 0x80 + +#define ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS 0x86 +#define ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS 0x87 + +/* Hotkey details */ +#define LOCK_KEY 0x410 /* codes for the keys in the GIRB register */ +#define DISPLAY_KEY 0x411 /* keys are mapped to KEY_SCREENLOCK (the key with the key symbol) */ +#define ENERGY_KEY 0x412 /* KEY_MEDIA (the key with the laptop symbol, KEY_EMAIL (E key)) */ +#define REST_KEY 0x413 /* KEY_SUSPEND (R key) */ + +#define MAX_HOTKEY_RINGBUFFER_SIZE 100 +#define RINGBUFFERSIZE 40 + +/* Debugging */ +#define FUJLAPTOP_LOG ACPI_FUJITSU_HID ": " +#define FUJLAPTOP_ERR KERN_ERR FUJLAPTOP_LOG +#define FUJLAPTOP_NOTICE KERN_NOTICE FUJLAPTOP_LOG +#define FUJLAPTOP_INFO KERN_INFO FUJLAPTOP_LOG +#define FUJLAPTOP_DEBUG KERN_DEBUG FUJLAPTOP_LOG + +#define FUJLAPTOP_DBG_ALL 0xffff +#define FUJLAPTOP_DBG_ERROR 0x0001 +#define FUJLAPTOP_DBG_WARN 0x0002 +#define FUJLAPTOP_DBG_INFO 0x0004 +#define FUJLAPTOP_DBG_TRACE 0x0008 + +#define dbg_printk(a_dbg_level, format, arg...) \ + do { if (dbg_level & a_dbg_level) \ + printk(FUJLAPTOP_DEBUG "%s: " format, __func__ , ## arg); \ + } while (0) +#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG +#define vdbg_printk(a_dbg_level, format, arg...) \ + dbg_printk(a_dbg_level, format, ## arg) +#else +#define vdbg_printk(a_dbg_level, format, arg...) +#endif + +/* Device controlling the backlight and associated keys */ struct fujitsu_t { acpi_handle acpi_handle; + struct acpi_device *dev; + struct input_dev *input; + char phys[32]; struct backlight_device *bl_device; struct platform_device *pf_device; - unsigned long fuj02b1_state; + unsigned int max_brightness; unsigned int brightness_changed; unsigned int brightness_level; }; static struct fujitsu_t *fujitsu; +static int use_alt_lcd_levels = -1; +static int disable_brightness_keys = -1; +static int disable_brightness_adjust = -1; + +/* Device used to access other hotkeys on the laptop */ +struct fujitsu_hotkey_t { + acpi_handle acpi_handle; + struct acpi_device *dev; + struct input_dev *input; + char phys[32]; + struct platform_device *pf_device; + struct kfifo *fifo; + spinlock_t fifo_lock; + + unsigned int irb; /* info about the pressed buttons */ +}; -/* Hardware access */ +static struct fujitsu_hotkey_t *fujitsu_hotkey; + +static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event, + void *data); + +#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG +static u32 dbg_level = 0x03; +#endif + +static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data); + +/* Hardware access for LCD brightness control */ static int set_lcd_level(int level) { @@ -81,7 +167,10 @@ static int set_lcd_level(int level) struct acpi_object_list arg_list = { 1, &arg0 }; acpi_handle handle = NULL; - if (level < 0 || level >= FUJITSU_LCD_N_LEVELS) + vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n", + level); + + if (level < 0 || level >= fujitsu->max_brightness) return -EINVAL; if (!fujitsu) @@ -89,7 +178,38 @@ static int set_lcd_level(int level) status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle); if (ACPI_FAILURE(status)) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SBLL not present\n")); + vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n"); + return -ENODEV; + } + + arg0.integer.value = level; + + status = acpi_evaluate_object(handle, NULL, &arg_list, NULL); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return 0; +} + +static int set_lcd_level_alt(int level) +{ + acpi_status status = AE_OK; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list arg_list = { 1, &arg0 }; + acpi_handle handle = NULL; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n", + level); + + if (level < 0 || level >= fujitsu->max_brightness) + return -EINVAL; + + if (!fujitsu) + return -EINVAL; + + status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle); + if (ACPI_FAILURE(status)) { + vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n"); return -ENODEV; } @@ -107,13 +227,52 @@ static int get_lcd_level(void) unsigned long state = 0; acpi_status status = AE_OK; - // Get the Brightness + vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n"); + status = acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state); if (status < 0) return status; - fujitsu->fuj02b1_state = state; + fujitsu->brightness_level = state & 0x0fffffff; + + if (state & 0x80000000) + fujitsu->brightness_changed = 1; + else + fujitsu->brightness_changed = 0; + + return fujitsu->brightness_level; +} + +static int get_max_brightness(void) +{ + unsigned long state = 0; + acpi_status status = AE_OK; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n"); + + status = + acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state); + if (status < 0) + return status; + + fujitsu->max_brightness = state; + + return fujitsu->max_brightness; +} + +static int get_lcd_level_alt(void) +{ + unsigned long state = 0; + acpi_status status = AE_OK; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLS\n"); + + status = + acpi_evaluate_integer(fujitsu->acpi_handle, "GBLS", NULL, &state); + if (status < 0) + return status; + fujitsu->brightness_level = state & 0x0fffffff; if (state & 0x80000000) @@ -128,12 +287,18 @@ static int get_lcd_level(void) static int bl_get_brightness(struct backlight_device *b) { - return get_lcd_level(); + if (use_alt_lcd_levels) + return get_lcd_level_alt(); + else + return get_lcd_level(); } static int bl_update_status(struct backlight_device *b) { - return set_lcd_level(b->props.brightness); + if (use_alt_lcd_levels) + return set_lcd_level_alt(b->props.brightness); + else + return set_lcd_level(b->props.brightness); } static struct backlight_ops fujitsubl_ops = { @@ -141,7 +306,35 @@ static struct backlight_ops fujitsubl_ops = { .update_status = bl_update_status, }; -/* Platform device */ +/* Platform LCD brightness device */ + +static ssize_t +show_max_brightness(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + int ret; + + ret = get_max_brightness(); + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", ret); +} + +static ssize_t +show_brightness_changed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + int ret; + + ret = fujitsu->brightness_changed; + if (ret < 0) + return ret; + + return sprintf(buf, "%i\n", ret); +} static ssize_t show_lcd_level(struct device *dev, struct device_attribute *attr, char *buf) @@ -149,7 +342,10 @@ static ssize_t show_lcd_level(struct device *dev, int ret; - ret = get_lcd_level(); + if (use_alt_lcd_levels) + ret = get_lcd_level_alt(); + else + ret = get_lcd_level(); if (ret < 0) return ret; @@ -164,19 +360,61 @@ static ssize_t store_lcd_level(struct device *dev, int level, ret; if (sscanf(buf, "%i", &level) != 1 - || (level < 0 || level >= FUJITSU_LCD_N_LEVELS)) + || (level < 0 || level >= fujitsu->max_brightness)) return -EINVAL; - ret = set_lcd_level(level); + if (use_alt_lcd_levels) + ret = set_lcd_level_alt(level); + else + ret = set_lcd_level(level); + if (ret < 0) + return ret; + + if (use_alt_lcd_levels) + ret = get_lcd_level_alt(); + else + ret = get_lcd_level(); if (ret < 0) return ret; return count; } +/* Hardware access for hotkey device */ + +static int get_irb(void) +{ + unsigned long state = 0; + acpi_status status = AE_OK; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "Get irb\n"); + + status = + acpi_evaluate_integer(fujitsu_hotkey->acpi_handle, "GIRB", NULL, + &state); + if (status < 0) + return status; + + fujitsu_hotkey->irb = state; + + return fujitsu_hotkey->irb; +} + +static ssize_t +ignore_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return count; +} + +static DEVICE_ATTR(max_brightness, 0444, show_max_brightness, ignore_store); +static DEVICE_ATTR(brightness_changed, 0444, show_brightness_changed, + ignore_store); static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); static struct attribute *fujitsupf_attributes[] = { + &dev_attr_brightness_changed.attr, + &dev_attr_max_brightness.attr, &dev_attr_lcd_level.attr, NULL }; @@ -192,14 +430,52 @@ static struct platform_driver fujitsupf_driver = { } }; -/* ACPI device */ +static int dmi_check_cb_s6410(const struct dmi_system_id *id) +{ + acpi_handle handle; + int have_blnf; + printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n", + id->ident); + have_blnf = ACPI_SUCCESS + (acpi_get_handle(NULL, "\\_SB.PCI0.GFX0.LCD.BLNF", &handle)); + if (use_alt_lcd_levels == -1) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detecting usealt\n"); + use_alt_lcd_levels = 1; + } + if (disable_brightness_keys == -1) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "auto-detecting disable_keys\n"); + disable_brightness_keys = have_blnf ? 1 : 0; + } + if (disable_brightness_adjust == -1) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "auto-detecting disable_adjust\n"); + disable_brightness_adjust = have_blnf ? 0 : 1; + } + return 0; +} + +static struct dmi_system_id __initdata fujitsu_dmi_table[] = { + { + .ident = "Fujitsu Siemens", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S6410"), + }, + .callback = dmi_check_cb_s6410}, + {} +}; + +/* ACPI device for LCD brightness control */ static int acpi_fujitsu_add(struct acpi_device *device) { + acpi_status status; + acpi_handle handle; int result = 0; int state = 0; - - ACPI_FUNCTION_TRACE("acpi_fujitsu_add"); + struct input_dev *input; + int error; if (!device) return -EINVAL; @@ -209,10 +485,42 @@ static int acpi_fujitsu_add(struct acpi_device *device) sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); acpi_driver_data(device) = fujitsu; + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_notify, fujitsu); + + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "Error installing notify handler\n"); + error = -ENODEV; + goto err_stop; + } + + fujitsu->input = input = input_allocate_device(); + if (!input) { + error = -ENOMEM; + goto err_uninstall_notify; + } + + snprintf(fujitsu->phys, sizeof(fujitsu->phys), + "%s/video/input0", acpi_device_hid(device)); + + input->name = acpi_device_name(device); + input->phys = fujitsu->phys; + input->id.bustype = BUS_HOST; + input->id.product = 0x06; + input->dev.parent = &device->dev; + input->evbit[0] = BIT(EV_KEY); + set_bit(KEY_BRIGHTNESSUP, input->keybit); + set_bit(KEY_BRIGHTNESSDOWN, input->keybit); + set_bit(KEY_UNKNOWN, input->keybit); + + error = input_register_device(input); + if (error) + goto err_free_input_dev; + result = acpi_bus_get_power(fujitsu->acpi_handle, &state); if (result) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "Error reading power state\n")); + printk(KERN_ERR "Error reading power state\n"); goto end; } @@ -220,22 +528,373 @@ static int acpi_fujitsu_add(struct acpi_device *device) acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); - end: + fujitsu->dev = device; + + if (ACPI_SUCCESS + (acpi_get_handle(device->handle, METHOD_NAME__INI, &handle))) { + vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); + if (ACPI_FAILURE + (acpi_evaluate_object + (device->handle, METHOD_NAME__INI, NULL, NULL))) + printk(KERN_ERR "_INI Method failed\n"); + } + + /* do config (detect defaults) */ + dmi_check_system(fujitsu_dmi_table); + use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0; + disable_brightness_keys = disable_brightness_keys == 1 ? 1 : 0; + disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0; + vdbg_printk(FUJLAPTOP_DBG_INFO, + "config: [alt interface: %d], [key disable: %d], [adjust disable: %d]\n", + use_alt_lcd_levels, disable_brightness_keys, + disable_brightness_adjust); + + if (get_max_brightness() <= 0) + fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS; + if (use_alt_lcd_levels) + get_lcd_level_alt(); + else + get_lcd_level(); + + return result; + +end: +err_free_input_dev: + input_free_device(input); +err_uninstall_notify: + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_fujitsu_notify); +err_stop: return result; } static int acpi_fujitsu_remove(struct acpi_device *device, int type) { - ACPI_FUNCTION_TRACE("acpi_fujitsu_remove"); + acpi_status status; + struct fujitsu_t *fujitsu = NULL; if (!device || !acpi_driver_data(device)) return -EINVAL; + + fujitsu = acpi_driver_data(device); + + status = acpi_remove_notify_handler(fujitsu->acpi_handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_notify); + + if (!device || !acpi_driver_data(device)) + return -EINVAL; + fujitsu->acpi_handle = NULL; return 0; } +/* Brightness notify */ + +static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data) +{ + struct input_dev *input; + int keycode; + int oldb, newb; + + input = fujitsu->input; + + switch (event) { + case ACPI_FUJITSU_NOTIFY_CODE1: + keycode = 0; + oldb = fujitsu->brightness_level; + get_lcd_level(); /* the alt version always yields changed */ + newb = fujitsu->brightness_level; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "brightness button event [%i -> %i (%i)]\n", + oldb, newb, fujitsu->brightness_changed); + + if (oldb == newb && fujitsu->brightness_changed) { + keycode = 0; + if (disable_brightness_keys != 1) { + if (oldb == 0) { + acpi_bus_generate_proc_event(fujitsu-> + dev, + ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSDOWN; + } else if (oldb == + (fujitsu->max_brightness) - 1) { + acpi_bus_generate_proc_event(fujitsu-> + dev, + ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSUP; + } + } + } else if (oldb < newb) { + if (disable_brightness_adjust != 1) { + if (use_alt_lcd_levels) + set_lcd_level_alt(newb); + else + set_lcd_level(newb); + } + if (disable_brightness_keys != 1) { + acpi_bus_generate_proc_event(fujitsu->dev, + ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSUP; + } + } else if (oldb > newb) { + if (disable_brightness_adjust != 1) { + if (use_alt_lcd_levels) + set_lcd_level_alt(newb); + else + set_lcd_level(newb); + } + if (disable_brightness_keys != 1) { + acpi_bus_generate_proc_event(fujitsu->dev, + ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS, + 0); + keycode = KEY_BRIGHTNESSDOWN; + } + } else { + keycode = KEY_UNKNOWN; + } + break; + default: + keycode = KEY_UNKNOWN; + vdbg_printk(FUJLAPTOP_DBG_WARN, + "unsupported event [0x%x]\n", event); + break; + } + + if (keycode != 0) { + input_report_key(input, keycode, 1); + input_sync(input); + input_report_key(input, keycode, 0); + input_sync(input); + } + + return; +} + +/* ACPI device for hotkey handling */ + +static int acpi_fujitsu_hotkey_add(struct acpi_device *device) +{ + acpi_status status; + acpi_handle handle; + int result = 0; + int state = 0; + struct input_dev *input; + int error; + int i; + + if (!device) + return -EINVAL; + + fujitsu_hotkey->acpi_handle = device->handle; + sprintf(acpi_device_name(device), "%s", + ACPI_FUJITSU_HOTKEY_DEVICE_NAME); + sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); + acpi_driver_data(device) = fujitsu_hotkey; + + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_hotkey_notify, + fujitsu_hotkey); + + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "Error installing notify handler\n"); + error = -ENODEV; + goto err_stop; + } + + /* kfifo */ + spin_lock_init(&fujitsu_hotkey->fifo_lock); + fujitsu_hotkey->fifo = + kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL, + &fujitsu_hotkey->fifo_lock); + if (IS_ERR(fujitsu_hotkey->fifo)) { + printk(KERN_ERR "kfifo_alloc failed\n"); + error = PTR_ERR(fujitsu_hotkey->fifo); + goto err_stop; + } + + fujitsu_hotkey->input = input = input_allocate_device(); + if (!input) { + error = -ENOMEM; + goto err_uninstall_notify; + } + + snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys), + "%s/video/input0", acpi_device_hid(device)); + + input->name = acpi_device_name(device); + input->phys = fujitsu_hotkey->phys; + input->id.bustype = BUS_HOST; + input->id.product = 0x06; + input->dev.parent = &device->dev; + input->evbit[0] = BIT(EV_KEY); + set_bit(KEY_SCREENLOCK, input->keybit); + set_bit(KEY_MEDIA, input->keybit); + set_bit(KEY_EMAIL, input->keybit); + set_bit(KEY_SUSPEND, input->keybit); + set_bit(KEY_UNKNOWN, input->keybit); + + error = input_register_device(input); + if (error) + goto err_free_input_dev; + + result = acpi_bus_get_power(fujitsu_hotkey->acpi_handle, &state); + if (result) { + printk(KERN_ERR "Error reading power state\n"); + goto end; + } + + printk(KERN_INFO PREFIX "%s [%s] (%s)\n", + acpi_device_name(device), acpi_device_bid(device), + !device->power.state ? "on" : "off"); + + fujitsu_hotkey->dev = device; + + if (ACPI_SUCCESS + (acpi_get_handle(device->handle, METHOD_NAME__INI, &handle))) { + vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); + if (ACPI_FAILURE + (acpi_evaluate_object + (device->handle, METHOD_NAME__INI, NULL, NULL))) + printk(KERN_ERR "_INI Method failed\n"); + } + + i = 0; /* Discard hotkey ringbuffer */ + while (get_irb() != 0 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) ; + vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); + + return result; + +end: +err_free_input_dev: + input_free_device(input); +err_uninstall_notify: + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_fujitsu_hotkey_notify); + kfifo_free(fujitsu_hotkey->fifo); +err_stop: + + return result; +} + +static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type) +{ + acpi_status status; + struct fujitsu_hotkey_t *fujitsu_hotkey = NULL; + + if (!device || !acpi_driver_data(device)) + return -EINVAL; + + fujitsu_hotkey = acpi_driver_data(device); + + status = acpi_remove_notify_handler(fujitsu_hotkey->acpi_handle, + ACPI_DEVICE_NOTIFY, + acpi_fujitsu_hotkey_notify); + + fujitsu_hotkey->acpi_handle = NULL; + + kfifo_free(fujitsu_hotkey->fifo); + + return 0; +} + +static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event, + void *data) +{ + struct input_dev *input; + int keycode, keycode_r; + unsigned int irb = 1; + int i, status; + + input = fujitsu_hotkey->input; + + vdbg_printk(FUJLAPTOP_DBG_TRACE, "Hotkey event\n"); + + switch (event) { + case ACPI_FUJITSU_NOTIFY_CODE1: + i = 0; + while ((irb = get_irb()) != 0 + && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, "GIRB result [%x]\n", + irb); + + switch (irb & 0x4ff) { + case LOCK_KEY: + keycode = KEY_SCREENLOCK; + break; + case DISPLAY_KEY: + keycode = KEY_MEDIA; + break; + case ENERGY_KEY: + keycode = KEY_EMAIL; + break; + case REST_KEY: + keycode = KEY_SUSPEND; + break; + case 0: + keycode = 0; + break; + default: + vdbg_printk(FUJLAPTOP_DBG_WARN, + "Unknown GIRB result [%x]\n", irb); + keycode = -1; + break; + } + if (keycode > 0) { + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "Push keycode into ringbuffer [%d]\n", + keycode); + status = kfifo_put(fujitsu_hotkey->fifo, + (unsigned char *)&keycode, + sizeof(keycode)); + if (status != sizeof(keycode)) { + vdbg_printk(FUJLAPTOP_DBG_WARN, + "Could not push keycode [0x%x]\n", + keycode); + } else { + input_report_key(input, keycode, 1); + input_sync(input); + } + } else if (keycode == 0) { + while ((status = + kfifo_get + (fujitsu_hotkey->fifo, (unsigned char *) + &keycode_r, + sizeof + (keycode_r))) == sizeof(keycode_r)) { + input_report_key(input, keycode_r, 0); + input_sync(input); + vdbg_printk(FUJLAPTOP_DBG_TRACE, + "Pop keycode from ringbuffer [%d]\n", + keycode_r); + } + } + } + + break; + default: + keycode = KEY_UNKNOWN; + vdbg_printk(FUJLAPTOP_DBG_WARN, + "Unsupported event [0x%x]\n", event); + input_report_key(input, keycode, 1); + input_sync(input); + input_report_key(input, keycode, 0); + input_sync(input); + break; + } + + return; +} + +/* Initialization */ + static const struct acpi_device_id fujitsu_device_ids[] = { {ACPI_FUJITSU_HID, 0}, {"", 0}, @@ -251,11 +910,24 @@ static struct acpi_driver acpi_fujitsu_driver = { }, }; -/* Initialization */ +static const struct acpi_device_id fujitsu_hotkey_device_ids[] = { + {ACPI_FUJITSU_HOTKEY_HID, 0}, + {"", 0}, +}; + +static struct acpi_driver acpi_fujitsu_hotkey_driver = { + .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME, + .class = ACPI_FUJITSU_CLASS, + .ids = fujitsu_hotkey_device_ids, + .ops = { + .add = acpi_fujitsu_hotkey_add, + .remove = acpi_fujitsu_hotkey_remove, + }, +}; static int __init fujitsu_init(void) { - int ret, result; + int ret, result, max_brightness; if (acpi_disabled) return -ENODEV; @@ -271,19 +943,6 @@ static int __init fujitsu_init(void) goto fail_acpi; } - /* Register backlight stuff */ - - fujitsu->bl_device = - backlight_device_register("fujitsu-laptop", NULL, NULL, - &fujitsubl_ops); - if (IS_ERR(fujitsu->bl_device)) - return PTR_ERR(fujitsu->bl_device); - - fujitsu->bl_device->props.max_brightness = FUJITSU_LCD_N_LEVELS - 1; - ret = platform_driver_register(&fujitsupf_driver); - if (ret) - goto fail_backlight; - /* Register platform stuff */ fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1); @@ -302,28 +961,68 @@ static int __init fujitsu_init(void) if (ret) goto fail_platform_device2; + /* Register backlight stuff */ + + fujitsu->bl_device = + backlight_device_register("fujitsu-laptop", NULL, NULL, + &fujitsubl_ops); + if (IS_ERR(fujitsu->bl_device)) + return PTR_ERR(fujitsu->bl_device); + + max_brightness = fujitsu->max_brightness; + + fujitsu->bl_device->props.max_brightness = max_brightness - 1; + fujitsu->bl_device->props.brightness = fujitsu->brightness_level; + + ret = platform_driver_register(&fujitsupf_driver); + if (ret) + goto fail_backlight; + + /* Register hotkey driver */ + + fujitsu_hotkey = kmalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL); + if (!fujitsu_hotkey) { + ret = -ENOMEM; + goto fail_hotkey; + } + memset(fujitsu_hotkey, 0, sizeof(struct fujitsu_hotkey_t)); + + result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver); + if (result < 0) { + ret = -ENODEV; + goto fail_hotkey1; + } + printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION " successfully loaded.\n"); return 0; - fail_platform_device2: +fail_hotkey1: - platform_device_del(fujitsu->pf_device); - - fail_platform_device1: - - platform_device_put(fujitsu->pf_device); + kfree(fujitsu_hotkey); - fail_platform_driver: +fail_hotkey: platform_driver_unregister(&fujitsupf_driver); - fail_backlight: +fail_backlight: backlight_device_unregister(fujitsu->bl_device); - fail_acpi: +fail_platform_device2: + + platform_device_del(fujitsu->pf_device); + +fail_platform_device1: + + platform_device_put(fujitsu->pf_device); + +fail_platform_driver: + + acpi_bus_unregister_driver(&acpi_fujitsu_driver); + +fail_acpi: kfree(fujitsu); @@ -342,19 +1041,43 @@ static void __exit fujitsu_cleanup(void) kfree(fujitsu); + acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver); + + kfree(fujitsu_hotkey); + printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n"); } module_init(fujitsu_init); module_exit(fujitsu_cleanup); -MODULE_AUTHOR("Jonathan Woithe"); +module_param(use_alt_lcd_levels, uint, 0644); +MODULE_PARM_DESC(use_alt_lcd_levels, + "Use alternative interface for lcd_levels (needed for Lifebook s6410)."); +module_param(disable_brightness_keys, uint, 0644); +MODULE_PARM_DESC(disable_brightness_keys, + "Disable brightness keys (eg. if they are already handled by the generic ACPI_VIDEO device)."); +module_param(disable_brightness_adjust, uint, 0644); +MODULE_PARM_DESC(disable_brightness_adjust, "Disable brightness adjustment ."); +#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG +module_param_named(debug, dbg_level, uint, 0644); +MODULE_PARM_DESC(debug, "Sets debug level bit-mask"); +#endif + +MODULE_AUTHOR("Jonathan Woithe, Peter Gruber"); MODULE_DESCRIPTION("Fujitsu laptop extras support"); MODULE_VERSION(FUJITSU_DRIVER_VERSION); MODULE_LICENSE("GPL"); +MODULE_ALIAS + ("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*"); +MODULE_ALIAS + ("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*"); + static struct pnp_device_id pnp_ids[] = { { .id = "FUJ02bf" }, + { .id = "FUJ02B1" }, + { .id = "FUJ02E3" }, { .id = "" } }; MODULE_DEVICE_TABLE(pnp, pnp_ids); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index f9ad960d7c1a..66e5a5487c20 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2,7 +2,7 @@ * Block driver for media (i.e., flash cards) * * Copyright 2002 Hewlett-Packard Company - * Copyright 2005-2007 Pierre Ossman + * Copyright 2005-2008 Pierre Ossman * * Use consistent with the GNU GPL is permitted, * provided that this copyright notice is @@ -237,17 +237,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) if (brq.data.blocks > card->host->max_blk_count) brq.data.blocks = card->host->max_blk_count; - /* - * If the host doesn't support multiple block writes, force - * block writes to single block. SD cards are excepted from - * this rule as they support querying the number of - * successfully written sectors. - */ - if (rq_data_dir(req) != READ && - !(card->host->caps & MMC_CAP_MULTIWRITE) && - !mmc_card_sd(card)) - brq.data.blocks = 1; - if (brq.data.blocks > 1) { /* SPI multiblock writes terminate using a special * token, not a STOP_TRANSMISSION request. @@ -296,22 +285,24 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) mmc_queue_bounce_post(mq); + /* + * Check for errors here, but don't jump to cmd_err + * until later as we need to wait for the card to leave + * programming mode even when things go wrong. + */ if (brq.cmd.error) { printk(KERN_ERR "%s: error %d sending read/write command\n", req->rq_disk->disk_name, brq.cmd.error); - goto cmd_err; } if (brq.data.error) { printk(KERN_ERR "%s: error %d transferring data\n", req->rq_disk->disk_name, brq.data.error); - goto cmd_err; } if (brq.stop.error) { printk(KERN_ERR "%s: error %d sending stop command\n", req->rq_disk->disk_name, brq.stop.error); - goto cmd_err; } if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { @@ -344,6 +335,9 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) #endif } + if (brq.cmd.error || brq.data.error || brq.stop.error) + goto cmd_err; + /* * A block was successfully transferred. */ @@ -362,30 +356,32 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) * mark the known good sectors as ok. * * If the card is not SD, we can still ok written sectors - * if the controller can do proper error reporting. + * as reported by the controller (which might be less than + * the real number of written sectors, but never more). * * For reads we just fail the entire chunk as that should * be safe in all cases. */ - if (rq_data_dir(req) != READ && mmc_card_sd(card)) { - u32 blocks; - unsigned int bytes; - - blocks = mmc_sd_num_wr_blocks(card); - if (blocks != (u32)-1) { - if (card->csd.write_partial) - bytes = blocks << md->block_bits; - else - bytes = blocks << 9; + if (rq_data_dir(req) != READ) { + if (mmc_card_sd(card)) { + u32 blocks; + unsigned int bytes; + + blocks = mmc_sd_num_wr_blocks(card); + if (blocks != (u32)-1) { + if (card->csd.write_partial) + bytes = blocks << md->block_bits; + else + bytes = blocks << 9; + spin_lock_irq(&md->lock); + ret = __blk_end_request(req, 0, bytes); + spin_unlock_irq(&md->lock); + } + } else { spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, bytes); + ret = __blk_end_request(req, 0, brq.data.bytes_xfered); spin_unlock_irq(&md->lock); } - } else if (rq_data_dir(req) != READ && - (card->host->caps & MMC_CAP_MULTIWRITE)) { - spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, brq.data.bytes_xfered); - spin_unlock_irq(&md->lock); } mmc_release_host(card->host); diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index ffadee549a41..d6b9b486417c 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -1,7 +1,7 @@ /* * linux/drivers/mmc/card/mmc_test.c * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,13 +26,17 @@ struct mmc_test_card { struct mmc_card *card; + u8 scratch[BUFFER_SIZE]; u8 *buffer; }; /*******************************************************************/ -/* Helper functions */ +/* General helper functions */ /*******************************************************************/ +/* + * Configure correct block size in card + */ static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size) { struct mmc_command cmd; @@ -48,117 +52,61 @@ static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size) return 0; } -static int __mmc_test_transfer(struct mmc_test_card *test, int write, - unsigned broken_xfer, u8 *buffer, unsigned addr, - unsigned blocks, unsigned blksz) +/* + * Fill in the mmc_request structure given a set of transfer parameters. + */ +static void mmc_test_prepare_mrq(struct mmc_test_card *test, + struct mmc_request *mrq, struct scatterlist *sg, unsigned sg_len, + unsigned dev_addr, unsigned blocks, unsigned blksz, int write) { - int ret, busy; - - struct mmc_request mrq; - struct mmc_command cmd; - struct mmc_command stop; - struct mmc_data data; - - struct scatterlist sg; - - memset(&mrq, 0, sizeof(struct mmc_request)); - - mrq.cmd = &cmd; - mrq.data = &data; - - memset(&cmd, 0, sizeof(struct mmc_command)); + BUG_ON(!mrq || !mrq->cmd || !mrq->data || !mrq->stop); - if (broken_xfer) { - if (blocks > 1) { - cmd.opcode = write ? - MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; - } else { - cmd.opcode = MMC_SEND_STATUS; - } + if (blocks > 1) { + mrq->cmd->opcode = write ? + MMC_WRITE_MULTIPLE_BLOCK : MMC_READ_MULTIPLE_BLOCK; } else { - if (blocks > 1) { - cmd.opcode = write ? - MMC_WRITE_MULTIPLE_BLOCK : MMC_READ_MULTIPLE_BLOCK; - } else { - cmd.opcode = write ? - MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; - } + mrq->cmd->opcode = write ? + MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; } - if (broken_xfer && blocks == 1) - cmd.arg = test->card->rca << 16; - else - cmd.arg = addr; - cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; + mrq->cmd->arg = dev_addr; + mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC; - memset(&stop, 0, sizeof(struct mmc_command)); - - if (!broken_xfer && (blocks > 1)) { - stop.opcode = MMC_STOP_TRANSMISSION; - stop.arg = 0; - stop.flags = MMC_RSP_R1B | MMC_CMD_AC; - - mrq.stop = &stop; + if (blocks == 1) + mrq->stop = NULL; + else { + mrq->stop->opcode = MMC_STOP_TRANSMISSION; + mrq->stop->arg = 0; + mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC; } - memset(&data, 0, sizeof(struct mmc_data)); - - data.blksz = blksz; - data.blocks = blocks; - data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ; - data.sg = &sg; - data.sg_len = 1; - - sg_init_one(&sg, buffer, blocks * blksz); - - mmc_set_data_timeout(&data, test->card); + mrq->data->blksz = blksz; + mrq->data->blocks = blocks; + mrq->data->flags = write ? MMC_DATA_WRITE : MMC_DATA_READ; + mrq->data->sg = sg; + mrq->data->sg_len = sg_len; - mmc_wait_for_req(test->card->host, &mrq); - - ret = 0; - - if (broken_xfer) { - if (!ret && cmd.error) - ret = cmd.error; - if (!ret && data.error == 0) - ret = RESULT_FAIL; - if (!ret && data.error != -ETIMEDOUT) - ret = data.error; - if (!ret && stop.error) - ret = stop.error; - if (blocks > 1) { - if (!ret && data.bytes_xfered > blksz) - ret = RESULT_FAIL; - } else { - if (!ret && data.bytes_xfered > 0) - ret = RESULT_FAIL; - } - } else { - if (!ret && cmd.error) - ret = cmd.error; - if (!ret && data.error) - ret = data.error; - if (!ret && stop.error) - ret = stop.error; - if (!ret && data.bytes_xfered != blocks * blksz) - ret = RESULT_FAIL; - } + mmc_set_data_timeout(mrq->data, test->card); +} - if (ret == -EINVAL) - ret = RESULT_UNSUP_HOST; +/* + * Wait for the card to finish the busy state + */ +static int mmc_test_wait_busy(struct mmc_test_card *test) +{ + int ret, busy; + struct mmc_command cmd; busy = 0; do { - int ret2; - memset(&cmd, 0, sizeof(struct mmc_command)); cmd.opcode = MMC_SEND_STATUS; cmd.arg = test->card->rca << 16; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; - ret2 = mmc_wait_for_cmd(test->card->host, &cmd, 0); - if (ret2) + ret = mmc_wait_for_cmd(test->card->host, &cmd, 0); + if (ret) break; if (!busy && !(cmd.resp[0] & R1_READY_FOR_DATA)) { @@ -172,14 +120,57 @@ static int __mmc_test_transfer(struct mmc_test_card *test, int write, return ret; } -static int mmc_test_transfer(struct mmc_test_card *test, int write, - u8 *buffer, unsigned addr, unsigned blocks, unsigned blksz) +/* + * Transfer a single sector of kernel addressable data + */ +static int mmc_test_buffer_transfer(struct mmc_test_card *test, + u8 *buffer, unsigned addr, unsigned blksz, int write) { - return __mmc_test_transfer(test, write, 0, buffer, - addr, blocks, blksz); + int ret; + + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; + + struct scatterlist sg; + + memset(&mrq, 0, sizeof(struct mmc_request)); + memset(&cmd, 0, sizeof(struct mmc_command)); + memset(&data, 0, sizeof(struct mmc_data)); + memset(&stop, 0, sizeof(struct mmc_command)); + + mrq.cmd = &cmd; + mrq.data = &data; + mrq.stop = &stop; + + sg_init_one(&sg, buffer, blksz); + + mmc_test_prepare_mrq(test, &mrq, &sg, 1, addr, 1, blksz, write); + + mmc_wait_for_req(test->card->host, &mrq); + + if (cmd.error) + return cmd.error; + if (data.error) + return data.error; + + ret = mmc_test_wait_busy(test); + if (ret) + return ret; + + return 0; } -static int mmc_test_prepare_verify(struct mmc_test_card *test, int write) +/*******************************************************************/ +/* Test preparation and cleanup */ +/*******************************************************************/ + +/* + * Fill the first couple of sectors of the card with known data + * so that bad reads/writes can be detected + */ +static int __mmc_test_prepare(struct mmc_test_card *test, int write) { int ret, i; @@ -188,15 +179,14 @@ static int mmc_test_prepare_verify(struct mmc_test_card *test, int write) return ret; if (write) - memset(test->buffer, 0xDF, BUFFER_SIZE); + memset(test->buffer, 0xDF, 512); else { - for (i = 0;i < BUFFER_SIZE;i++) + for (i = 0;i < 512;i++) test->buffer[i] = i; } for (i = 0;i < BUFFER_SIZE / 512;i++) { - ret = mmc_test_transfer(test, 1, test->buffer + i * 512, - i * 512, 1, 512); + ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1); if (ret) return ret; } @@ -204,41 +194,218 @@ static int mmc_test_prepare_verify(struct mmc_test_card *test, int write) return 0; } -static int mmc_test_prepare_verify_write(struct mmc_test_card *test) +static int mmc_test_prepare_write(struct mmc_test_card *test) +{ + return __mmc_test_prepare(test, 1); +} + +static int mmc_test_prepare_read(struct mmc_test_card *test) +{ + return __mmc_test_prepare(test, 0); +} + +static int mmc_test_cleanup(struct mmc_test_card *test) { - return mmc_test_prepare_verify(test, 1); + int ret, i; + + ret = mmc_test_set_blksize(test, 512); + if (ret) + return ret; + + memset(test->buffer, 0, 512); + + for (i = 0;i < BUFFER_SIZE / 512;i++) { + ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1); + if (ret) + return ret; + } + + return 0; } -static int mmc_test_prepare_verify_read(struct mmc_test_card *test) +/*******************************************************************/ +/* Test execution helpers */ +/*******************************************************************/ + +/* + * Modifies the mmc_request to perform the "short transfer" tests + */ +static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test, + struct mmc_request *mrq, int write) { - return mmc_test_prepare_verify(test, 0); + BUG_ON(!mrq || !mrq->cmd || !mrq->data); + + if (mrq->data->blocks > 1) { + mrq->cmd->opcode = write ? + MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK; + mrq->stop = NULL; + } else { + mrq->cmd->opcode = MMC_SEND_STATUS; + mrq->cmd->arg = test->card->rca << 16; + } } -static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, - u8 *buffer, unsigned addr, unsigned blocks, unsigned blksz) +/* + * Checks that a normal transfer didn't have any errors + */ +static int mmc_test_check_result(struct mmc_test_card *test, + struct mmc_request *mrq) { - int ret, i, sectors; + int ret; - /* - * It is assumed that the above preparation has been done. - */ + BUG_ON(!mrq || !mrq->cmd || !mrq->data); + + ret = 0; - memset(test->buffer, 0, BUFFER_SIZE); + if (!ret && mrq->cmd->error) + ret = mrq->cmd->error; + if (!ret && mrq->data->error) + ret = mrq->data->error; + if (!ret && mrq->stop && mrq->stop->error) + ret = mrq->stop->error; + if (!ret && mrq->data->bytes_xfered != + mrq->data->blocks * mrq->data->blksz) + ret = RESULT_FAIL; + + if (ret == -EINVAL) + ret = RESULT_UNSUP_HOST; + + return ret; +} + +/* + * Checks that a "short transfer" behaved as expected + */ +static int mmc_test_check_broken_result(struct mmc_test_card *test, + struct mmc_request *mrq) +{ + int ret; + + BUG_ON(!mrq || !mrq->cmd || !mrq->data); + + ret = 0; + + if (!ret && mrq->cmd->error) + ret = mrq->cmd->error; + if (!ret && mrq->data->error == 0) + ret = RESULT_FAIL; + if (!ret && mrq->data->error != -ETIMEDOUT) + ret = mrq->data->error; + if (!ret && mrq->stop && mrq->stop->error) + ret = mrq->stop->error; + if (mrq->data->blocks > 1) { + if (!ret && mrq->data->bytes_xfered > mrq->data->blksz) + ret = RESULT_FAIL; + } else { + if (!ret && mrq->data->bytes_xfered > 0) + ret = RESULT_FAIL; + } + + if (ret == -EINVAL) + ret = RESULT_UNSUP_HOST; + + return ret; +} + +/* + * Tests a basic transfer with certain parameters + */ +static int mmc_test_simple_transfer(struct mmc_test_card *test, + struct scatterlist *sg, unsigned sg_len, unsigned dev_addr, + unsigned blocks, unsigned blksz, int write) +{ + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; + + memset(&mrq, 0, sizeof(struct mmc_request)); + memset(&cmd, 0, sizeof(struct mmc_command)); + memset(&data, 0, sizeof(struct mmc_data)); + memset(&stop, 0, sizeof(struct mmc_command)); + + mrq.cmd = &cmd; + mrq.data = &data; + mrq.stop = &stop; + + mmc_test_prepare_mrq(test, &mrq, sg, sg_len, dev_addr, + blocks, blksz, write); + + mmc_wait_for_req(test->card->host, &mrq); + + mmc_test_wait_busy(test); + + return mmc_test_check_result(test, &mrq); +} + +/* + * Tests a transfer where the card will fail completely or partly + */ +static int mmc_test_broken_transfer(struct mmc_test_card *test, + unsigned blocks, unsigned blksz, int write) +{ + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; + + struct scatterlist sg; + + memset(&mrq, 0, sizeof(struct mmc_request)); + memset(&cmd, 0, sizeof(struct mmc_command)); + memset(&data, 0, sizeof(struct mmc_data)); + memset(&stop, 0, sizeof(struct mmc_command)); + + mrq.cmd = &cmd; + mrq.data = &data; + mrq.stop = &stop; + + sg_init_one(&sg, test->buffer, blocks * blksz); + + mmc_test_prepare_mrq(test, &mrq, &sg, 1, 0, blocks, blksz, write); + mmc_test_prepare_broken_mrq(test, &mrq, write); + + mmc_wait_for_req(test->card->host, &mrq); + + mmc_test_wait_busy(test); + + return mmc_test_check_broken_result(test, &mrq); +} + +/* + * Does a complete transfer test where data is also validated + * + * Note: mmc_test_prepare() must have been done before this call + */ +static int mmc_test_transfer(struct mmc_test_card *test, + struct scatterlist *sg, unsigned sg_len, unsigned dev_addr, + unsigned blocks, unsigned blksz, int write) +{ + int ret, i; + unsigned long flags; if (write) { for (i = 0;i < blocks * blksz;i++) - buffer[i] = i; + test->scratch[i] = i; + } else { + memset(test->scratch, 0, BUFFER_SIZE); } + local_irq_save(flags); + sg_copy_from_buffer(sg, sg_len, test->scratch, BUFFER_SIZE); + local_irq_restore(flags); ret = mmc_test_set_blksize(test, blksz); if (ret) return ret; - ret = mmc_test_transfer(test, write, buffer, addr, blocks, blksz); + ret = mmc_test_simple_transfer(test, sg, sg_len, dev_addr, + blocks, blksz, write); if (ret) return ret; if (write) { + int sectors; + ret = mmc_test_set_blksize(test, 512); if (ret) return ret; @@ -253,9 +420,9 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, memset(test->buffer, 0, sectors * 512); for (i = 0;i < sectors;i++) { - ret = mmc_test_transfer(test, 0, + ret = mmc_test_buffer_transfer(test, test->buffer + i * 512, - addr + i * 512, 1, 512); + dev_addr + i * 512, 512, 0); if (ret) return ret; } @@ -270,8 +437,11 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, return RESULT_FAIL; } } else { + local_irq_save(flags); + sg_copy_to_buffer(sg, sg_len, test->scratch, BUFFER_SIZE); + local_irq_restore(flags); for (i = 0;i < blocks * blksz;i++) { - if (buffer[i] != (u8)i) + if (test->scratch[i] != (u8)i) return RESULT_FAIL; } } @@ -279,26 +449,6 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write, return 0; } -static int mmc_test_cleanup_verify(struct mmc_test_card *test) -{ - int ret, i; - - ret = mmc_test_set_blksize(test, 512); - if (ret) - return ret; - - memset(test->buffer, 0, BUFFER_SIZE); - - for (i = 0;i < BUFFER_SIZE / 512;i++) { - ret = mmc_test_transfer(test, 1, test->buffer + i * 512, - i * 512, 1, 512); - if (ret) - return ret; - } - - return 0; -} - /*******************************************************************/ /* Tests */ /*******************************************************************/ @@ -314,12 +464,15 @@ struct mmc_test_case { static int mmc_test_basic_write(struct mmc_test_card *test) { int ret; + struct scatterlist sg; ret = mmc_test_set_blksize(test, 512); if (ret) return ret; - ret = mmc_test_transfer(test, 1, test->buffer, 0, 1, 512); + sg_init_one(&sg, test->buffer, 512); + + ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; @@ -329,12 +482,15 @@ static int mmc_test_basic_write(struct mmc_test_card *test) static int mmc_test_basic_read(struct mmc_test_card *test) { int ret; + struct scatterlist sg; ret = mmc_test_set_blksize(test, 512); if (ret) return ret; - ret = mmc_test_transfer(test, 0, test->buffer, 0, 1, 512); + sg_init_one(&sg, test->buffer, 512); + + ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; @@ -344,8 +500,11 @@ static int mmc_test_basic_read(struct mmc_test_card *test) static int mmc_test_verify_write(struct mmc_test_card *test) { int ret; + struct scatterlist sg; + + sg_init_one(&sg, test->buffer, 512); - ret = mmc_test_verified_transfer(test, 1, test->buffer, 0, 1, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; @@ -355,8 +514,11 @@ static int mmc_test_verify_write(struct mmc_test_card *test) static int mmc_test_verify_read(struct mmc_test_card *test) { int ret; + struct scatterlist sg; + + sg_init_one(&sg, test->buffer, 512); - ret = mmc_test_verified_transfer(test, 0, test->buffer, 0, 1, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 0); if (ret) return ret; @@ -367,6 +529,7 @@ static int mmc_test_multi_write(struct mmc_test_card *test) { int ret; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -379,8 +542,9 @@ static int mmc_test_multi_write(struct mmc_test_card *test) if (size < 1024) return RESULT_UNSUP_HOST; - ret = mmc_test_verified_transfer(test, 1, test->buffer, 0, - size / 512, 512); + sg_init_one(&sg, test->buffer, size); + + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1); if (ret) return ret; @@ -391,6 +555,7 @@ static int mmc_test_multi_read(struct mmc_test_card *test) { int ret; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -403,8 +568,9 @@ static int mmc_test_multi_read(struct mmc_test_card *test) if (size < 1024) return RESULT_UNSUP_HOST; - ret = mmc_test_verified_transfer(test, 0, test->buffer, 0, - size / 512, 512); + sg_init_one(&sg, test->buffer, size); + + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0); if (ret) return ret; @@ -414,13 +580,14 @@ static int mmc_test_multi_read(struct mmc_test_card *test) static int mmc_test_pow2_write(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.write_partial) return RESULT_UNSUP_CARD; for (i = 1; i < 512;i <<= 1) { - ret = mmc_test_verified_transfer(test, 1, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 1); if (ret) return ret; } @@ -431,13 +598,14 @@ static int mmc_test_pow2_write(struct mmc_test_card *test) static int mmc_test_pow2_read(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.read_partial) return RESULT_UNSUP_CARD; for (i = 1; i < 512;i <<= 1) { - ret = mmc_test_verified_transfer(test, 0, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 0); if (ret) return ret; } @@ -448,13 +616,14 @@ static int mmc_test_pow2_read(struct mmc_test_card *test) static int mmc_test_weird_write(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.write_partial) return RESULT_UNSUP_CARD; for (i = 3; i < 512;i += 7) { - ret = mmc_test_verified_transfer(test, 1, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 1); if (ret) return ret; } @@ -465,13 +634,14 @@ static int mmc_test_weird_write(struct mmc_test_card *test) static int mmc_test_weird_read(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; if (!test->card->csd.read_partial) return RESULT_UNSUP_CARD; for (i = 3; i < 512;i += 7) { - ret = mmc_test_verified_transfer(test, 0, - test->buffer, 0, 1, i); + sg_init_one(&sg, test->buffer, i); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 0); if (ret) return ret; } @@ -482,10 +652,11 @@ static int mmc_test_weird_read(struct mmc_test_card *test) static int mmc_test_align_write(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 1, test->buffer + i, - 0, 1, 512); + sg_init_one(&sg, test->buffer + i, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 1); if (ret) return ret; } @@ -496,10 +667,11 @@ static int mmc_test_align_write(struct mmc_test_card *test) static int mmc_test_align_read(struct mmc_test_card *test) { int ret, i; + struct scatterlist sg; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 0, test->buffer + i, - 0, 1, 512); + sg_init_one(&sg, test->buffer + i, 512); + ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 0); if (ret) return ret; } @@ -511,6 +683,7 @@ static int mmc_test_align_multi_write(struct mmc_test_card *test) { int ret, i; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -524,8 +697,8 @@ static int mmc_test_align_multi_write(struct mmc_test_card *test) return RESULT_UNSUP_HOST; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 1, test->buffer + i, - 0, size / 512, 512); + sg_init_one(&sg, test->buffer + i, size); + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1); if (ret) return ret; } @@ -537,6 +710,7 @@ static int mmc_test_align_multi_read(struct mmc_test_card *test) { int ret, i; unsigned int size; + struct scatterlist sg; if (test->card->host->max_blk_count == 1) return RESULT_UNSUP_HOST; @@ -550,8 +724,8 @@ static int mmc_test_align_multi_read(struct mmc_test_card *test) return RESULT_UNSUP_HOST; for (i = 1;i < 4;i++) { - ret = mmc_test_verified_transfer(test, 0, test->buffer + i, - 0, size / 512, 512); + sg_init_one(&sg, test->buffer + i, size); + ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0); if (ret) return ret; } @@ -567,7 +741,7 @@ static int mmc_test_xfersize_write(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 1, 1, test->buffer, 0, 1, 512); + ret = mmc_test_broken_transfer(test, 1, 512, 1); if (ret) return ret; @@ -582,7 +756,7 @@ static int mmc_test_xfersize_read(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 0, 1, test->buffer, 0, 1, 512); + ret = mmc_test_broken_transfer(test, 1, 512, 0); if (ret) return ret; @@ -600,7 +774,7 @@ static int mmc_test_multi_xfersize_write(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 1, 1, test->buffer, 0, 2, 512); + ret = mmc_test_broken_transfer(test, 2, 512, 1); if (ret) return ret; @@ -618,7 +792,7 @@ static int mmc_test_multi_xfersize_read(struct mmc_test_card *test) if (ret) return ret; - ret = __mmc_test_transfer(test, 0, 1, test->buffer, 0, 2, 512); + ret = mmc_test_broken_transfer(test, 2, 512, 0); if (ret) return ret; @@ -638,86 +812,86 @@ static const struct mmc_test_case mmc_test_cases[] = { { .name = "Basic write (with data verification)", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_verify_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Basic read (with data verification)", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_verify_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Multi-block write", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_multi_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Multi-block read", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_multi_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Power of two block writes", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_pow2_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Power of two block reads", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_pow2_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Weird sized block writes", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_weird_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Weird sized block reads", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_weird_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned write", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_align_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned read", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_align_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned multi-block write", - .prepare = mmc_test_prepare_verify_write, + .prepare = mmc_test_prepare_write, .run = mmc_test_align_multi_write, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { .name = "Badly aligned multi-block read", - .prepare = mmc_test_prepare_verify_read, + .prepare = mmc_test_prepare_read, .run = mmc_test_align_multi_read, - .cleanup = mmc_test_cleanup_verify, + .cleanup = mmc_test_cleanup, }, { @@ -743,7 +917,7 @@ static const struct mmc_test_case mmc_test_cases[] = { static struct mutex mmc_test_lock; -static void mmc_test_run(struct mmc_test_card *test) +static void mmc_test_run(struct mmc_test_card *test, int testcase) { int i, ret; @@ -753,6 +927,9 @@ static void mmc_test_run(struct mmc_test_card *test) mmc_claim_host(test->card->host); for (i = 0;i < ARRAY_SIZE(mmc_test_cases);i++) { + if (testcase && ((i + 1) != testcase)) + continue; + printk(KERN_INFO "%s: Test case %d. %s...\n", mmc_hostname(test->card->host), i + 1, mmc_test_cases[i].name); @@ -824,9 +1001,12 @@ static ssize_t mmc_test_store(struct device *dev, { struct mmc_card *card; struct mmc_test_card *test; + int testcase; card = container_of(dev, struct mmc_card, dev); + testcase = simple_strtol(buf, NULL, 10); + test = kzalloc(sizeof(struct mmc_test_card), GFP_KERNEL); if (!test) return -ENOMEM; @@ -836,7 +1016,7 @@ static ssize_t mmc_test_store(struct device *dev, test->buffer = kzalloc(BUFFER_SIZE, GFP_KERNEL); if (test->buffer) { mutex_lock(&mmc_test_lock); - mmc_test_run(test); + mmc_test_run(test, testcase); mutex_unlock(&mmc_test_lock); } @@ -852,6 +1032,9 @@ static int mmc_test_probe(struct mmc_card *card) { int ret; + if ((card->type != MMC_TYPE_MMC) && (card->type != MMC_TYPE_SD)) + return -ENODEV; + mutex_init(&mmc_test_lock); ret = device_create_file(&card->dev, &dev_attr_test); diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c index eeea84c309e6..78ad48718ab0 100644 --- a/drivers/mmc/card/sdio_uart.c +++ b/drivers/mmc/card/sdio_uart.c @@ -885,12 +885,14 @@ static void sdio_uart_set_termios(struct tty_struct *tty, struct ktermios *old_t sdio_uart_release_func(port); } -static void sdio_uart_break_ctl(struct tty_struct *tty, int break_state) +static int sdio_uart_break_ctl(struct tty_struct *tty, int break_state) { struct sdio_uart_port *port = tty->driver_data; + int result; - if (sdio_uart_claim_func(port) != 0) - return; + result = sdio_uart_claim_func(port); + if (result != 0) + return result; if (break_state == -1) port->lcr |= UART_LCR_SBC; @@ -899,6 +901,7 @@ static void sdio_uart_break_ctl(struct tty_struct *tty, int break_state) sdio_out(port, UART_LCR, port->lcr); sdio_uart_release_func(port); + return 0; } static int sdio_uart_tiocmget(struct tty_struct *tty, struct file *file) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 01ced4c5a61d..3ee5b8c3b5ce 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -3,7 +3,7 @@ * * Copyright (C) 2003-2004 Russell King, All Rights Reserved. * SD support Copyright (C) 2004 Ian Molton, All Rights Reserved. - * Copyright (C) 2005-2007 Pierre Ossman, All Rights Reserved. + * Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved. * MMCv4 support Copyright (C) 2006 Philip Langdale, All Rights Reserved. * * This program is free software; you can redistribute it and/or modify @@ -294,6 +294,33 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card) } EXPORT_SYMBOL(mmc_set_data_timeout); +/** + * mmc_align_data_size - pads a transfer size to a more optimal value + * @card: the MMC card associated with the data transfer + * @sz: original transfer size + * + * Pads the original data size with a number of extra bytes in + * order to avoid controller bugs and/or performance hits + * (e.g. some controllers revert to PIO for certain sizes). + * + * Returns the improved size, which might be unmodified. + * + * Note that this function is only relevant when issuing a + * single scatter gather entry. + */ +unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz) +{ + /* + * FIXME: We don't have a system for the controller to tell + * the core about its problems yet, so for now we just 32-bit + * align the size. + */ + sz = ((sz + 3) / 4) * 4; + + return sz; +} +EXPORT_SYMBOL(mmc_align_data_size); + /** * __mmc_claim_host - exclusively claim a host * @host: mmc host to claim @@ -638,6 +665,9 @@ void mmc_rescan(struct work_struct *work) */ mmc_bus_put(host); + if (host->ops->get_cd && host->ops->get_cd(host) == 0) + goto out; + mmc_claim_host(host); mmc_power_up(host); @@ -652,7 +682,7 @@ void mmc_rescan(struct work_struct *work) if (!err) { if (mmc_attach_sdio(host, ocr)) mmc_power_off(host); - return; + goto out; } /* @@ -662,7 +692,7 @@ void mmc_rescan(struct work_struct *work) if (!err) { if (mmc_attach_sd(host, ocr)) mmc_power_off(host); - return; + goto out; } /* @@ -672,7 +702,7 @@ void mmc_rescan(struct work_struct *work) if (!err) { if (mmc_attach_mmc(host, ocr)) mmc_power_off(host); - return; + goto out; } mmc_release_host(host); @@ -683,6 +713,9 @@ void mmc_rescan(struct work_struct *work) mmc_bus_put(host); } +out: + if (host->caps & MMC_CAP_NEEDS_POLL) + mmc_schedule_delayed_work(&host->detect, HZ); } void mmc_start_host(struct mmc_host *host) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3da29eef8f7d..fdd7c760be8c 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -288,7 +288,7 @@ static struct device_type mmc_type = { /* * Handle the detection and initialisation of a card. * - * In the case of a resume, "curcard" will contain the card + * In the case of a resume, "oldcard" will contain the card * we're trying to reinitialise. */ static int mmc_init_card(struct mmc_host *host, u32 ocr, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 7ef3b15c5e3d..26fc098d77cd 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -326,7 +326,7 @@ static struct device_type sd_type = { /* * Handle the detection and initialisation of a card. * - * In the case of a resume, "curcard" will contain the card + * In the case of a resume, "oldcard" will contain the card * we're trying to reinitialise. */ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, @@ -494,13 +494,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, * Check if read-only switch is active. */ if (!oldcard) { - if (!host->ops->get_ro) { + if (!host->ops->get_ro || host->ops->get_ro(host) < 0) { printk(KERN_WARNING "%s: host does not " "support reading read-only " "switch. assuming write-enable.\n", mmc_hostname(host)); } else { - if (host->ops->get_ro(host)) + if (host->ops->get_ro(host) > 0) mmc_card_set_readonly(card); } } diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c index d5e51b1c7b3f..956bd7677502 100644 --- a/drivers/mmc/core/sdio_cis.c +++ b/drivers/mmc/core/sdio_cis.c @@ -129,6 +129,12 @@ static int cistpl_funce_func(struct sdio_func *func, /* TPLFE_MAX_BLK_SIZE */ func->max_blksize = buf[12] | (buf[13] << 8); + /* TPLFE_ENABLE_TIMEOUT_VAL, present in ver 1.1 and above */ + if (vsn > SDIO_SDIO_REV_1_00) + func->enable_timeout = (buf[28] | (buf[29] << 8)) * 10; + else + func->enable_timeout = jiffies_to_msecs(HZ); + return 0; } diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 625b92ce9cef..f61fc2d4cd0a 100644 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -1,7 +1,7 @@ /* * linux/drivers/mmc/core/sdio_io.c * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -76,11 +76,7 @@ int sdio_enable_func(struct sdio_func *func) if (ret) goto err; - /* - * FIXME: This should timeout based on information in the CIS, - * but we don't have card to parse that yet. - */ - timeout = jiffies + HZ; + timeout = jiffies + msecs_to_jiffies(func->enable_timeout); while (1) { ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_CCCR_IORx, 0, ®); @@ -167,10 +163,8 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz) return -EINVAL; if (blksz == 0) { - blksz = min(min( - func->max_blksize, - func->card->host->max_blk_size), - 512u); + blksz = min(func->max_blksize, func->card->host->max_blk_size); + blksz = min(blksz, 512u); } ret = mmc_io_rw_direct(func->card, 1, 0, @@ -186,9 +180,116 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz) func->cur_blksize = blksz; return 0; } - EXPORT_SYMBOL_GPL(sdio_set_block_size); +/* + * Calculate the maximum byte mode transfer size + */ +static inline unsigned int sdio_max_byte_size(struct sdio_func *func) +{ + unsigned mval = min(func->card->host->max_seg_size, + func->card->host->max_blk_size); + mval = min(mval, func->max_blksize); + return min(mval, 512u); /* maximum size for byte mode */ +} + +/** + * sdio_align_size - pads a transfer size to a more optimal value + * @func: SDIO function + * @sz: original transfer size + * + * Pads the original data size with a number of extra bytes in + * order to avoid controller bugs and/or performance hits + * (e.g. some controllers revert to PIO for certain sizes). + * + * If possible, it will also adjust the size so that it can be + * handled in just a single request. + * + * Returns the improved size, which might be unmodified. + */ +unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz) +{ + unsigned int orig_sz; + unsigned int blk_sz, byte_sz; + unsigned chunk_sz; + + orig_sz = sz; + + /* + * Do a first check with the controller, in case it + * wants to increase the size up to a point where it + * might need more than one block. + */ + sz = mmc_align_data_size(func->card, sz); + + /* + * If we can still do this with just a byte transfer, then + * we're done. + */ + if (sz <= sdio_max_byte_size(func)) + return sz; + + if (func->card->cccr.multi_block) { + /* + * Check if the transfer is already block aligned + */ + if ((sz % func->cur_blksize) == 0) + return sz; + + /* + * Realign it so that it can be done with one request, + * and recheck if the controller still likes it. + */ + blk_sz = ((sz + func->cur_blksize - 1) / + func->cur_blksize) * func->cur_blksize; + blk_sz = mmc_align_data_size(func->card, blk_sz); + + /* + * This value is only good if it is still just + * one request. + */ + if ((blk_sz % func->cur_blksize) == 0) + return blk_sz; + + /* + * We failed to do one request, but at least try to + * pad the remainder properly. + */ + byte_sz = mmc_align_data_size(func->card, + sz % func->cur_blksize); + if (byte_sz <= sdio_max_byte_size(func)) { + blk_sz = sz / func->cur_blksize; + return blk_sz * func->cur_blksize + byte_sz; + } + } else { + /* + * We need multiple requests, so first check that the + * controller can handle the chunk size; + */ + chunk_sz = mmc_align_data_size(func->card, + sdio_max_byte_size(func)); + if (chunk_sz == sdio_max_byte_size(func)) { + /* + * Fix up the size of the remainder (if any) + */ + byte_sz = orig_sz % chunk_sz; + if (byte_sz) { + byte_sz = mmc_align_data_size(func->card, + byte_sz); + } + + return (orig_sz / chunk_sz) * chunk_sz + byte_sz; + } + } + + /* + * The controller is simply incapable of transferring the size + * we want in decent manner, so just return the original size. + */ + return orig_sz; +} +EXPORT_SYMBOL_GPL(sdio_align_size); + /* Split an arbitrarily sized data transfer into several * IO_RW_EXTENDED commands. */ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, @@ -199,14 +300,13 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, int ret; /* Do the bulk of the transfer using block mode (if supported). */ - if (func->card->cccr.multi_block) { + if (func->card->cccr.multi_block && (size > sdio_max_byte_size(func))) { /* Blocks per command is limited by host count, host transfer * size (we only use a single sg entry) and the maximum for * IO_RW_EXTENDED of 511 blocks. */ - max_blocks = min(min( - func->card->host->max_blk_count, - func->card->host->max_seg_size / func->cur_blksize), - 511u); + max_blocks = min(func->card->host->max_blk_count, + func->card->host->max_seg_size / func->cur_blksize); + max_blocks = min(max_blocks, 511u); while (remainder > func->cur_blksize) { unsigned blocks; @@ -231,11 +331,7 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, /* Write the remainder using byte mode. */ while (remainder > 0) { - size = remainder; - if (size > func->cur_blksize) - size = func->cur_blksize; - if (size > 512) - size = 512; /* maximum size for byte mode */ + size = min(remainder, sdio_max_byte_size(func)); ret = mmc_io_rw_extended(func->card, write, func->num, addr, incr_addr, buf, 1, size); @@ -260,11 +356,10 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write, * function. If there is a problem reading the address, 0xff * is returned and @err_ret will contain the error code. */ -unsigned char sdio_readb(struct sdio_func *func, unsigned int addr, - int *err_ret) +u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret) { int ret; - unsigned char val; + u8 val; BUG_ON(!func); @@ -293,8 +388,7 @@ EXPORT_SYMBOL_GPL(sdio_readb); * function. @err_ret will contain the status of the actual * transfer. */ -void sdio_writeb(struct sdio_func *func, unsigned char b, unsigned int addr, - int *err_ret) +void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret) { int ret; @@ -355,7 +449,6 @@ int sdio_readsb(struct sdio_func *func, void *dst, unsigned int addr, { return sdio_io_rw_ext_helper(func, 0, addr, 0, dst, count); } - EXPORT_SYMBOL_GPL(sdio_readsb); /** @@ -385,8 +478,7 @@ EXPORT_SYMBOL_GPL(sdio_writesb); * function. If there is a problem reading the address, 0xffff * is returned and @err_ret will contain the error code. */ -unsigned short sdio_readw(struct sdio_func *func, unsigned int addr, - int *err_ret) +u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret) { int ret; @@ -400,7 +492,7 @@ unsigned short sdio_readw(struct sdio_func *func, unsigned int addr, return 0xFFFF; } - return le16_to_cpu(*(u16*)func->tmpbuf); + return le16_to_cpup((__le16 *)func->tmpbuf); } EXPORT_SYMBOL_GPL(sdio_readw); @@ -415,12 +507,11 @@ EXPORT_SYMBOL_GPL(sdio_readw); * function. @err_ret will contain the status of the actual * transfer. */ -void sdio_writew(struct sdio_func *func, unsigned short b, unsigned int addr, - int *err_ret) +void sdio_writew(struct sdio_func *func, u16 b, unsigned int addr, int *err_ret) { int ret; - *(u16*)func->tmpbuf = cpu_to_le16(b); + *(__le16 *)func->tmpbuf = cpu_to_le16(b); ret = sdio_memcpy_toio(func, addr, func->tmpbuf, 2); if (err_ret) @@ -439,8 +530,7 @@ EXPORT_SYMBOL_GPL(sdio_writew); * 0xffffffff is returned and @err_ret will contain the error * code. */ -unsigned long sdio_readl(struct sdio_func *func, unsigned int addr, - int *err_ret) +u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret) { int ret; @@ -454,7 +544,7 @@ unsigned long sdio_readl(struct sdio_func *func, unsigned int addr, return 0xFFFFFFFF; } - return le32_to_cpu(*(u32*)func->tmpbuf); + return le32_to_cpup((__le32 *)func->tmpbuf); } EXPORT_SYMBOL_GPL(sdio_readl); @@ -469,12 +559,11 @@ EXPORT_SYMBOL_GPL(sdio_readl); * function. @err_ret will contain the status of the actual * transfer. */ -void sdio_writel(struct sdio_func *func, unsigned long b, unsigned int addr, - int *err_ret) +void sdio_writel(struct sdio_func *func, u32 b, unsigned int addr, int *err_ret) { int ret; - *(u32*)func->tmpbuf = cpu_to_le32(b); + *(__le32 *)func->tmpbuf = cpu_to_le32(b); ret = sdio_memcpy_toio(func, addr, func->tmpbuf, 4); if (err_ret) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index dead61754ad7..dc6f2579f85c 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -26,18 +26,31 @@ config MMC_PXA config MMC_SDHCI tristate "Secure Digital Host Controller Interface support" - depends on PCI + depends on HAS_DMA help - This select the generic Secure Digital Host Controller Interface. + This selects the generic Secure Digital Host Controller Interface. It is used by manufacturers such as Texas Instruments(R), Ricoh(R) and Toshiba(R). Most controllers found in laptops are of this type. + + If you have a controller with this interface, say Y or M here. You + also need to enable an appropriate bus interface. + + If unsure, say N. + +config MMC_SDHCI_PCI + tristate "SDHCI support on PCI bus" + depends on MMC_SDHCI && PCI + help + This selects the PCI Secure Digital Host Controller Interface. + Most controllers found today are PCI devices. + If you have a controller with this interface, say Y or M here. If unsure, say N. config MMC_RICOH_MMC tristate "Ricoh MMC Controller Disabler (EXPERIMENTAL)" - depends on PCI && EXPERIMENTAL && MMC_SDHCI + depends on MMC_SDHCI_PCI help This selects the disabler for the Ricoh MMC Controller. This proprietary controller is unnecessary because the SDHCI driver @@ -91,6 +104,16 @@ config MMC_AT91 If unsure, say N. +config MMC_ATMELMCI + tristate "Atmel Multimedia Card Interface support" + depends on AVR32 + help + This selects the Atmel Multimedia Card Interface driver. If + you have an AT32 (AVR32) platform with a Multimedia Card + slot, say Y or M here. + + If unsure, say N. + config MMC_IMX tristate "Motorola i.MX Multimedia Card Interface support" depends on ARCH_IMX @@ -130,3 +153,24 @@ config MMC_SPI If unsure, or if your system has no SPI master driver, say N. +config MMC_S3C + tristate "Samsung S3C SD/MMC Card Interface support" + depends on ARCH_S3C2410 && MMC + help + This selects a driver for the MCI interface found in + Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs. + If you have a board based on one of those and a MMC/SD + slot, say Y or M here. + + If unsure, say N. + +config MMC_SDRICOH_CS + tristate "MMC/SD driver for Ricoh Bay1Controllers (EXPERIMENTAL)" + depends on EXPERIMENTAL && MMC && PCI && PCMCIA + help + Say Y here if your Notebook reports a Ricoh Bay1Controller PCMCIA + card whenever you insert a MMC or SD card into the card slot. + + To compile this driver as a module, choose M here: the + module will be called sdricoh_cs. + diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 3877c87e6da2..db52eebfb50e 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -10,11 +10,15 @@ obj-$(CONFIG_MMC_ARMMMCI) += mmci.o obj-$(CONFIG_MMC_PXA) += pxamci.o obj-$(CONFIG_MMC_IMX) += imxmmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o +obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o obj-$(CONFIG_MMC_RICOH_MMC) += ricoh_mmc.o obj-$(CONFIG_MMC_WBSD) += wbsd.o obj-$(CONFIG_MMC_AU1X) += au1xmmc.o obj-$(CONFIG_MMC_OMAP) += omap.o obj-$(CONFIG_MMC_AT91) += at91_mci.o +obj-$(CONFIG_MMC_ATMELMCI) += atmel-mci.o obj-$(CONFIG_MMC_TIFM_SD) += tifm_sd.o obj-$(CONFIG_MMC_SPI) += mmc_spi.o +obj-$(CONFIG_MMC_S3C) += s3cmci.o +obj-$(CONFIG_MMC_SDRICOH_CS) += sdricoh_cs.o diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index 8979ad330a4d..f15e2064305c 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -125,8 +125,71 @@ struct at91mci_host /* Latest in the scatterlist that has been enabled for transfer */ int transfer_index; + + /* Timer for timeouts */ + struct timer_list timer; }; +/* + * Reset the controller and restore most of the state + */ +static void at91_reset_host(struct at91mci_host *host) +{ + unsigned long flags; + u32 mr; + u32 sdcr; + u32 dtor; + u32 imr; + + local_irq_save(flags); + imr = at91_mci_read(host, AT91_MCI_IMR); + + at91_mci_write(host, AT91_MCI_IDR, 0xffffffff); + + /* save current state */ + mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff; + sdcr = at91_mci_read(host, AT91_MCI_SDCR); + dtor = at91_mci_read(host, AT91_MCI_DTOR); + + /* reset the controller */ + at91_mci_write(host, AT91_MCI_CR, AT91_MCI_MCIDIS | AT91_MCI_SWRST); + + /* restore state */ + at91_mci_write(host, AT91_MCI_CR, AT91_MCI_MCIEN); + at91_mci_write(host, AT91_MCI_MR, mr); + at91_mci_write(host, AT91_MCI_SDCR, sdcr); + at91_mci_write(host, AT91_MCI_DTOR, dtor); + at91_mci_write(host, AT91_MCI_IER, imr); + + /* make sure sdio interrupts will fire */ + at91_mci_read(host, AT91_MCI_SR); + + local_irq_restore(flags); +} + +static void at91_timeout_timer(unsigned long data) +{ + struct at91mci_host *host; + + host = (struct at91mci_host *)data; + + if (host->request) { + dev_err(host->mmc->parent, "Timeout waiting end of packet\n"); + + if (host->cmd && host->cmd->data) { + host->cmd->data->error = -ETIMEDOUT; + } else { + if (host->cmd) + host->cmd->error = -ETIMEDOUT; + else + host->request->cmd->error = -ETIMEDOUT; + } + + at91_reset_host(host); + mmc_request_done(host->mmc, host->request); + } +} + /* * Copy from sg to a dma block - used for transfers */ @@ -135,9 +198,14 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data unsigned int len, i, size; unsigned *dmabuf = host->buffer; - size = host->total_length; + size = data->blksz * data->blocks; len = data->sg_len; + /* AT91SAM926[0/3] Data Write Operation and number of bytes erratum */ + if (cpu_is_at91sam9260() || cpu_is_at91sam9263()) + if (host->total_length == 12) + memset(dmabuf, 0, 12); + /* * Just loop through all entries. Size might not * be the entire list though so make sure that @@ -159,9 +227,10 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data for (index = 0; index < (amount / 4); index++) *dmabuf++ = swab32(sgbuffer[index]); - } - else + } else { memcpy(dmabuf, sgbuffer, amount); + dmabuf += amount; + } kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ); @@ -233,11 +302,11 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host) if (i == 0) { at91_mci_write(host, ATMEL_PDC_RPR, sg->dma_address); - at91_mci_write(host, ATMEL_PDC_RCR, sg->length / 4); + at91_mci_write(host, ATMEL_PDC_RCR, (data->blksz & 0x3) ? sg->length : sg->length / 4); } else { at91_mci_write(host, ATMEL_PDC_RNPR, sg->dma_address); - at91_mci_write(host, ATMEL_PDC_RNCR, sg->length / 4); + at91_mci_write(host, ATMEL_PDC_RNCR, (data->blksz & 0x3) ? sg->length : sg->length / 4); } } @@ -277,8 +346,6 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) dma_unmap_page(NULL, sg->dma_address, sg->length, DMA_FROM_DEVICE); - data->bytes_xfered += sg->length; - if (cpu_is_at91rm9200()) { /* AT91RM9200 errata */ unsigned int *buffer; int index; @@ -294,6 +361,8 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) } flush_dcache_page(sg_page(sg)); + + data->bytes_xfered += sg->length; } /* Is there another transfer to trigger? */ @@ -334,10 +403,32 @@ static void at91_mci_handle_transmitted(struct at91mci_host *host) at91_mci_write(host, AT91_MCI_IER, AT91_MCI_BLKE); } else at91_mci_write(host, AT91_MCI_IER, AT91_MCI_NOTBUSY); +} + +/* + * Update bytes tranfered count during a write operation + */ +static void at91_mci_update_bytes_xfered(struct at91mci_host *host) +{ + struct mmc_data *data; + + /* always deal with the effective request (and not the current cmd) */ + + if (host->request->cmd && host->request->cmd->error != 0) + return; - data->bytes_xfered = host->total_length; + if (host->request->data) { + data = host->request->data; + if (data->flags & MMC_DATA_WRITE) { + /* card is in IDLE mode now */ + pr_debug("-> bytes_xfered %d, total_length = %d\n", + data->bytes_xfered, host->total_length); + data->bytes_xfered = data->blksz * data->blocks; + } + } } + /*Handle after command sent ready*/ static int at91_mci_handle_cmdrdy(struct at91mci_host *host) { @@ -350,8 +441,7 @@ static int at91_mci_handle_cmdrdy(struct at91mci_host *host) } else return 1; } else if (host->cmd->data->flags & MMC_DATA_WRITE) { /*After sendding multi-block-write command, start DMA transfer*/ - at91_mci_write(host, AT91_MCI_IER, AT91_MCI_TXBUFE); - at91_mci_write(host, AT91_MCI_IER, AT91_MCI_BLKE); + at91_mci_write(host, AT91_MCI_IER, AT91_MCI_TXBUFE | AT91_MCI_BLKE); at91_mci_write(host, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN); } @@ -430,11 +520,19 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command if (data) { - if ( data->blksz & 0x3 ) { - pr_debug("Unsupported block size\n"); - cmd->error = -EINVAL; - mmc_request_done(host->mmc, host->request); - return; + if (cpu_is_at91rm9200() || cpu_is_at91sam9261()) { + if (data->blksz & 0x3) { + pr_debug("Unsupported block size\n"); + cmd->error = -EINVAL; + mmc_request_done(host->mmc, host->request); + return; + } + if (data->flags & MMC_DATA_STREAM) { + pr_debug("Stream commands not supported\n"); + cmd->error = -EINVAL; + mmc_request_done(host->mmc, host->request); + return; + } } block_length = data->blksz; @@ -481,8 +579,16 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command ier = AT91_MCI_CMDRDY; } else { /* zero block length and PDC mode */ - mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff; - at91_mci_write(host, AT91_MCI_MR, mr | (block_length << 16) | AT91_MCI_PDCMODE); + mr = at91_mci_read(host, AT91_MCI_MR) & 0x5fff; + mr |= (data->blksz & 0x3) ? AT91_MCI_PDCFBYTE : 0; + mr |= (block_length << 16); + mr |= AT91_MCI_PDCMODE; + at91_mci_write(host, AT91_MCI_MR, mr); + + if (!(cpu_is_at91rm9200() || cpu_is_at91sam9261())) + at91_mci_write(host, AT91_MCI_BLKR, + AT91_MCI_BLKR_BCNT(blocks) | + AT91_MCI_BLKR_BLKLEN(block_length)); /* * Disable the PDC controller @@ -508,6 +614,13 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command * Handle a write */ host->total_length = block_length * blocks; + /* + * AT91SAM926[0/3] Data Write Operation and + * number of bytes erratum + */ + if (cpu_is_at91sam9260 () || cpu_is_at91sam9263()) + if (host->total_length < 12) + host->total_length = 12; host->buffer = dma_alloc_coherent(NULL, host->total_length, &host->physical_address, GFP_KERNEL); @@ -517,7 +630,9 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command pr_debug("Transmitting %d bytes\n", host->total_length); at91_mci_write(host, ATMEL_PDC_TPR, host->physical_address); - at91_mci_write(host, ATMEL_PDC_TCR, host->total_length / 4); + at91_mci_write(host, ATMEL_PDC_TCR, (data->blksz & 0x3) ? + host->total_length : host->total_length / 4); + ier = AT91_MCI_CMDRDY; } } @@ -552,20 +667,26 @@ static void at91_mci_process_next(struct at91mci_host *host) else if ((!(host->flags & FL_SENT_STOP)) && host->request->stop) { host->flags |= FL_SENT_STOP; at91_mci_send_command(host, host->request->stop); - } - else + } else { + del_timer(&host->timer); + /* the at91rm9200 mci controller hangs after some transfers, + * and the workaround is to reset it after each transfer. + */ + if (cpu_is_at91rm9200()) + at91_reset_host(host); mmc_request_done(host->mmc, host->request); + } } /* * Handle a command that has been completed */ -static void at91_mci_completed_command(struct at91mci_host *host) +static void at91_mci_completed_command(struct at91mci_host *host, unsigned int status) { struct mmc_command *cmd = host->cmd; - unsigned int status; + struct mmc_data *data = cmd->data; - at91_mci_write(host, AT91_MCI_IDR, 0xffffffff); + at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); cmd->resp[0] = at91_mci_read(host, AT91_MCI_RSPR(0)); cmd->resp[1] = at91_mci_read(host, AT91_MCI_RSPR(1)); @@ -577,25 +698,34 @@ static void at91_mci_completed_command(struct at91mci_host *host) host->buffer = NULL; } - status = at91_mci_read(host, AT91_MCI_SR); - - pr_debug("Status = %08X [%08X %08X %08X %08X]\n", - status, cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]); + pr_debug("Status = %08X/%08x [%08X %08X %08X %08X]\n", + status, at91_mci_read(host, AT91_MCI_SR), + cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]); if (status & AT91_MCI_ERRORS) { if ((status & AT91_MCI_RCRCE) && !(mmc_resp_type(cmd) & MMC_RSP_CRC)) { cmd->error = 0; } else { - if (status & (AT91_MCI_RTOE | AT91_MCI_DTOE)) - cmd->error = -ETIMEDOUT; - else if (status & (AT91_MCI_RCRCE | AT91_MCI_DCRCE)) - cmd->error = -EILSEQ; - else - cmd->error = -EIO; + if (status & (AT91_MCI_DTOE | AT91_MCI_DCRCE)) { + if (data) { + if (status & AT91_MCI_DTOE) + data->error = -ETIMEDOUT; + else if (status & AT91_MCI_DCRCE) + data->error = -EILSEQ; + } + } else { + if (status & AT91_MCI_RTOE) + cmd->error = -ETIMEDOUT; + else if (status & AT91_MCI_RCRCE) + cmd->error = -EILSEQ; + else + cmd->error = -EIO; + } - pr_debug("Error detected and set to %d (cmd = %d, retries = %d)\n", - cmd->error, cmd->opcode, cmd->retries); + pr_debug("Error detected and set to %d/%d (cmd = %d, retries = %d)\n", + cmd->error, data ? data->error : 0, + cmd->opcode, cmd->retries); } } else @@ -613,6 +743,8 @@ static void at91_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->request = mrq; host->flags = 0; + mod_timer(&host->timer, jiffies + HZ); + at91_mci_process_next(host); } @@ -736,6 +868,7 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (int_status & AT91_MCI_NOTBUSY) { pr_debug("Card is ready\n"); + at91_mci_update_bytes_xfered(host); completed = 1; } @@ -744,9 +877,21 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (int_status & AT91_MCI_BLKE) { pr_debug("Block transfer has ended\n"); - completed = 1; + if (host->request->data && host->request->data->blocks > 1) { + /* multi block write : complete multi write + * command and send stop */ + completed = 1; + } else { + at91_mci_write(host, AT91_MCI_IER, AT91_MCI_NOTBUSY); + } } + if (int_status & AT91_MCI_SDIOIRQA) + mmc_signal_sdio_irq(host->mmc); + + if (int_status & AT91_MCI_SDIOIRQB) + mmc_signal_sdio_irq(host->mmc); + if (int_status & AT91_MCI_TXRDY) pr_debug("Ready to transmit\n"); @@ -761,10 +906,10 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) if (completed) { pr_debug("Completed command\n"); - at91_mci_write(host, AT91_MCI_IDR, 0xffffffff); - at91_mci_completed_command(host); + at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); + at91_mci_completed_command(host, int_status); } else - at91_mci_write(host, AT91_MCI_IDR, int_status); + at91_mci_write(host, AT91_MCI_IDR, int_status & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB)); return IRQ_HANDLED; } @@ -793,25 +938,33 @@ static irqreturn_t at91_mmc_det_irq(int irq, void *_host) static int at91_mci_get_ro(struct mmc_host *mmc) { - int read_only = 0; struct at91mci_host *host = mmc_priv(mmc); - if (host->board->wp_pin) { - read_only = gpio_get_value(host->board->wp_pin); - printk(KERN_WARNING "%s: card is %s\n", mmc_hostname(mmc), - (read_only ? "read-only" : "read-write") ); - } - else { - printk(KERN_WARNING "%s: host does not support reading read-only " - "switch. Assuming write-enable.\n", mmc_hostname(mmc)); - } - return read_only; + if (host->board->wp_pin) + return !!gpio_get_value(host->board->wp_pin); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; +} + +static void at91_mci_enable_sdio_irq(struct mmc_host *mmc, int enable) +{ + struct at91mci_host *host = mmc_priv(mmc); + + pr_debug("%s: sdio_irq %c : %s\n", mmc_hostname(host->mmc), + host->board->slot_b ? 'B':'A', enable ? "enable" : "disable"); + at91_mci_write(host, enable ? AT91_MCI_IER : AT91_MCI_IDR, + host->board->slot_b ? AT91_MCI_SDIOIRQB : AT91_MCI_SDIOIRQA); + } static const struct mmc_host_ops at91_mci_ops = { .request = at91_mci_request, .set_ios = at91_mci_set_ios, .get_ro = at91_mci_get_ro, + .enable_sdio_irq = at91_mci_enable_sdio_irq, }; /* @@ -842,6 +995,7 @@ static int __init at91_mci_probe(struct platform_device *pdev) mmc->f_min = 375000; mmc->f_max = 25000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps = MMC_CAP_SDIO_IRQ; mmc->max_blk_size = 4095; mmc->max_blk_count = mmc->max_req_size; @@ -935,6 +1089,8 @@ static int __init at91_mci_probe(struct platform_device *pdev) mmc_add_host(mmc); + setup_timer(&host->timer, at91_timeout_timer, (unsigned long)host); + /* * monitor card insertion/removal if we can */ @@ -995,6 +1151,7 @@ static int __exit at91_mci_remove(struct platform_device *pdev) } at91_mci_disable(host); + del_timer_sync(&host->timer); mmc_remove_host(mmc); free_irq(host->irq, host); diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h new file mode 100644 index 000000000000..a9a5657706c6 --- /dev/null +++ b/drivers/mmc/host/atmel-mci-regs.h @@ -0,0 +1,91 @@ +/* + * Atmel MultiMedia Card Interface driver + * + * Copyright (C) 2004-2006 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __DRIVERS_MMC_ATMEL_MCI_H__ +#define __DRIVERS_MMC_ATMEL_MCI_H__ + +/* MCI Register Definitions */ +#define MCI_CR 0x0000 /* Control */ +# define MCI_CR_MCIEN ( 1 << 0) /* MCI Enable */ +# define MCI_CR_MCIDIS ( 1 << 1) /* MCI Disable */ +# define MCI_CR_SWRST ( 1 << 7) /* Software Reset */ +#define MCI_MR 0x0004 /* Mode */ +# define MCI_MR_CLKDIV(x) ((x) << 0) /* Clock Divider */ +# define MCI_MR_RDPROOF ( 1 << 11) /* Read Proof */ +# define MCI_MR_WRPROOF ( 1 << 12) /* Write Proof */ +#define MCI_DTOR 0x0008 /* Data Timeout */ +# define MCI_DTOCYC(x) ((x) << 0) /* Data Timeout Cycles */ +# define MCI_DTOMUL(x) ((x) << 4) /* Data Timeout Multiplier */ +#define MCI_SDCR 0x000c /* SD Card / SDIO */ +# define MCI_SDCSEL_SLOT_A ( 0 << 0) /* Select SD slot A */ +# define MCI_SDCSEL_SLOT_B ( 1 << 0) /* Select SD slot A */ +# define MCI_SDCBUS_1BIT ( 0 << 7) /* 1-bit data bus */ +# define MCI_SDCBUS_4BIT ( 1 << 7) /* 4-bit data bus */ +#define MCI_ARGR 0x0010 /* Command Argument */ +#define MCI_CMDR 0x0014 /* Command */ +# define MCI_CMDR_CMDNB(x) ((x) << 0) /* Command Opcode */ +# define MCI_CMDR_RSPTYP_NONE ( 0 << 6) /* No response */ +# define MCI_CMDR_RSPTYP_48BIT ( 1 << 6) /* 48-bit response */ +# define MCI_CMDR_RSPTYP_136BIT ( 2 << 6) /* 136-bit response */ +# define MCI_CMDR_SPCMD_INIT ( 1 << 8) /* Initialization command */ +# define MCI_CMDR_SPCMD_SYNC ( 2 << 8) /* Synchronized command */ +# define MCI_CMDR_SPCMD_INT ( 4 << 8) /* Interrupt command */ +# define MCI_CMDR_SPCMD_INTRESP ( 5 << 8) /* Interrupt response */ +# define MCI_CMDR_OPDCMD ( 1 << 11) /* Open Drain */ +# define MCI_CMDR_MAXLAT_5CYC ( 0 << 12) /* Max latency 5 cycles */ +# define MCI_CMDR_MAXLAT_64CYC ( 1 << 12) /* Max latency 64 cycles */ +# define MCI_CMDR_START_XFER ( 1 << 16) /* Start data transfer */ +# define MCI_CMDR_STOP_XFER ( 2 << 16) /* Stop data transfer */ +# define MCI_CMDR_TRDIR_WRITE ( 0 << 18) /* Write data */ +# define MCI_CMDR_TRDIR_READ ( 1 << 18) /* Read data */ +# define MCI_CMDR_BLOCK ( 0 << 19) /* Single-block transfer */ +# define MCI_CMDR_MULTI_BLOCK ( 1 << 19) /* Multi-block transfer */ +# define MCI_CMDR_STREAM ( 2 << 19) /* MMC Stream transfer */ +# define MCI_CMDR_SDIO_BYTE ( 4 << 19) /* SDIO Byte transfer */ +# define MCI_CMDR_SDIO_BLOCK ( 5 << 19) /* SDIO Block transfer */ +# define MCI_CMDR_SDIO_SUSPEND ( 1 << 24) /* SDIO Suspend Command */ +# define MCI_CMDR_SDIO_RESUME ( 2 << 24) /* SDIO Resume Command */ +#define MCI_BLKR 0x0018 /* Block */ +# define MCI_BCNT(x) ((x) << 0) /* Data Block Count */ +# define MCI_BLKLEN(x) ((x) << 16) /* Data Block Length */ +#define MCI_RSPR 0x0020 /* Response 0 */ +#define MCI_RSPR1 0x0024 /* Response 1 */ +#define MCI_RSPR2 0x0028 /* Response 2 */ +#define MCI_RSPR3 0x002c /* Response 3 */ +#define MCI_RDR 0x0030 /* Receive Data */ +#define MCI_TDR 0x0034 /* Transmit Data */ +#define MCI_SR 0x0040 /* Status */ +#define MCI_IER 0x0044 /* Interrupt Enable */ +#define MCI_IDR 0x0048 /* Interrupt Disable */ +#define MCI_IMR 0x004c /* Interrupt Mask */ +# define MCI_CMDRDY ( 1 << 0) /* Command Ready */ +# define MCI_RXRDY ( 1 << 1) /* Receiver Ready */ +# define MCI_TXRDY ( 1 << 2) /* Transmitter Ready */ +# define MCI_BLKE ( 1 << 3) /* Data Block Ended */ +# define MCI_DTIP ( 1 << 4) /* Data Transfer In Progress */ +# define MCI_NOTBUSY ( 1 << 5) /* Data Not Busy */ +# define MCI_SDIOIRQA ( 1 << 8) /* SDIO IRQ in slot A */ +# define MCI_SDIOIRQB ( 1 << 9) /* SDIO IRQ in slot B */ +# define MCI_RINDE ( 1 << 16) /* Response Index Error */ +# define MCI_RDIRE ( 1 << 17) /* Response Direction Error */ +# define MCI_RCRCE ( 1 << 18) /* Response CRC Error */ +# define MCI_RENDE ( 1 << 19) /* Response End Bit Error */ +# define MCI_RTOE ( 1 << 20) /* Response Time-Out Error */ +# define MCI_DCRCE ( 1 << 21) /* Data CRC Error */ +# define MCI_DTOE ( 1 << 22) /* Data Time-Out Error */ +# define MCI_OVRE ( 1 << 30) /* RX Overrun Error */ +# define MCI_UNRE ( 1 << 31) /* TX Underrun Error */ + +/* Register access macros */ +#define mci_readl(port,reg) \ + __raw_readl((port)->regs + MCI_##reg) +#define mci_writel(port,reg,value) \ + __raw_writel((value), (port)->regs + MCI_##reg) + +#endif /* __DRIVERS_MMC_ATMEL_MCI_H__ */ diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c new file mode 100644 index 000000000000..cce873c5a149 --- /dev/null +++ b/drivers/mmc/host/atmel-mci.c @@ -0,0 +1,981 @@ +/* + * Atmel MultiMedia Card Interface driver + * + * Copyright (C) 2004-2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include "atmel-mci-regs.h" + +#define ATMCI_DATA_ERROR_FLAGS (MCI_DCRCE | MCI_DTOE | MCI_OVRE | MCI_UNRE) + +enum { + EVENT_CMD_COMPLETE = 0, + EVENT_DATA_ERROR, + EVENT_DATA_COMPLETE, + EVENT_STOP_SENT, + EVENT_STOP_COMPLETE, + EVENT_XFER_COMPLETE, +}; + +struct atmel_mci { + struct mmc_host *mmc; + void __iomem *regs; + + struct scatterlist *sg; + unsigned int pio_offset; + + struct mmc_request *mrq; + struct mmc_command *cmd; + struct mmc_data *data; + + u32 cmd_status; + u32 data_status; + u32 stop_status; + u32 stop_cmdr; + + u32 mode_reg; + u32 sdc_reg; + + struct tasklet_struct tasklet; + unsigned long pending_events; + unsigned long completed_events; + + int present; + int detect_pin; + int wp_pin; + + /* For detect pin debouncing */ + struct timer_list detect_timer; + + unsigned long bus_hz; + unsigned long mapbase; + struct clk *mck; + struct platform_device *pdev; +}; + +#define atmci_is_completed(host, event) \ + test_bit(event, &host->completed_events) +#define atmci_test_and_clear_pending(host, event) \ + test_and_clear_bit(event, &host->pending_events) +#define atmci_test_and_set_completed(host, event) \ + test_and_set_bit(event, &host->completed_events) +#define atmci_set_completed(host, event) \ + set_bit(event, &host->completed_events) +#define atmci_set_pending(host, event) \ + set_bit(event, &host->pending_events) +#define atmci_clear_pending(host, event) \ + clear_bit(event, &host->pending_events) + + +static void atmci_enable(struct atmel_mci *host) +{ + clk_enable(host->mck); + mci_writel(host, CR, MCI_CR_MCIEN); + mci_writel(host, MR, host->mode_reg); + mci_writel(host, SDCR, host->sdc_reg); +} + +static void atmci_disable(struct atmel_mci *host) +{ + mci_writel(host, CR, MCI_CR_SWRST); + + /* Stall until write is complete, then disable the bus clock */ + mci_readl(host, SR); + clk_disable(host->mck); +} + +static inline unsigned int ns_to_clocks(struct atmel_mci *host, + unsigned int ns) +{ + return (ns * (host->bus_hz / 1000000) + 999) / 1000; +} + +static void atmci_set_timeout(struct atmel_mci *host, + struct mmc_data *data) +{ + static unsigned dtomul_to_shift[] = { + 0, 4, 7, 8, 10, 12, 16, 20 + }; + unsigned timeout; + unsigned dtocyc; + unsigned dtomul; + + timeout = ns_to_clocks(host, data->timeout_ns) + data->timeout_clks; + + for (dtomul = 0; dtomul < 8; dtomul++) { + unsigned shift = dtomul_to_shift[dtomul]; + dtocyc = (timeout + (1 << shift) - 1) >> shift; + if (dtocyc < 15) + break; + } + + if (dtomul >= 8) { + dtomul = 7; + dtocyc = 15; + } + + dev_vdbg(&host->mmc->class_dev, "setting timeout to %u cycles\n", + dtocyc << dtomul_to_shift[dtomul]); + mci_writel(host, DTOR, (MCI_DTOMUL(dtomul) | MCI_DTOCYC(dtocyc))); +} + +/* + * Return mask with command flags to be enabled for this command. + */ +static u32 atmci_prepare_command(struct mmc_host *mmc, + struct mmc_command *cmd) +{ + struct mmc_data *data; + u32 cmdr; + + cmd->error = -EINPROGRESS; + + cmdr = MCI_CMDR_CMDNB(cmd->opcode); + + if (cmd->flags & MMC_RSP_PRESENT) { + if (cmd->flags & MMC_RSP_136) + cmdr |= MCI_CMDR_RSPTYP_136BIT; + else + cmdr |= MCI_CMDR_RSPTYP_48BIT; + } + + /* + * This should really be MAXLAT_5 for CMD2 and ACMD41, but + * it's too difficult to determine whether this is an ACMD or + * not. Better make it 64. + */ + cmdr |= MCI_CMDR_MAXLAT_64CYC; + + if (mmc->ios.bus_mode == MMC_BUSMODE_OPENDRAIN) + cmdr |= MCI_CMDR_OPDCMD; + + data = cmd->data; + if (data) { + cmdr |= MCI_CMDR_START_XFER; + if (data->flags & MMC_DATA_STREAM) + cmdr |= MCI_CMDR_STREAM; + else if (data->blocks > 1) + cmdr |= MCI_CMDR_MULTI_BLOCK; + else + cmdr |= MCI_CMDR_BLOCK; + + if (data->flags & MMC_DATA_READ) + cmdr |= MCI_CMDR_TRDIR_READ; + } + + return cmdr; +} + +static void atmci_start_command(struct atmel_mci *host, + struct mmc_command *cmd, + u32 cmd_flags) +{ + /* Must read host->cmd after testing event flags */ + smp_rmb(); + WARN_ON(host->cmd); + host->cmd = cmd; + + dev_vdbg(&host->mmc->class_dev, + "start command: ARGR=0x%08x CMDR=0x%08x\n", + cmd->arg, cmd_flags); + + mci_writel(host, ARGR, cmd->arg); + mci_writel(host, CMDR, cmd_flags); +} + +static void send_stop_cmd(struct mmc_host *mmc, struct mmc_data *data) +{ + struct atmel_mci *host = mmc_priv(mmc); + + atmci_start_command(host, data->stop, host->stop_cmdr); + mci_writel(host, IER, MCI_CMDRDY); +} + +static void atmci_request_end(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct atmel_mci *host = mmc_priv(mmc); + + WARN_ON(host->cmd || host->data); + host->mrq = NULL; + + atmci_disable(host); + + mmc_request_done(mmc, mrq); +} + +/* + * Returns a mask of interrupt flags to be enabled after the whole + * request has been prepared. + */ +static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data) +{ + struct atmel_mci *host = mmc_priv(mmc); + u32 iflags; + + data->error = -EINPROGRESS; + + WARN_ON(host->data); + host->sg = NULL; + host->data = data; + + mci_writel(host, BLKR, MCI_BCNT(data->blocks) + | MCI_BLKLEN(data->blksz)); + dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n", + MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz)); + + iflags = ATMCI_DATA_ERROR_FLAGS; + host->sg = data->sg; + host->pio_offset = 0; + if (data->flags & MMC_DATA_READ) + iflags |= MCI_RXRDY; + else + iflags |= MCI_TXRDY; + + return iflags; +} + +static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct atmel_mci *host = mmc_priv(mmc); + struct mmc_data *data; + struct mmc_command *cmd; + u32 iflags; + u32 cmdflags = 0; + + iflags = mci_readl(host, IMR); + if (iflags) + dev_warn(&mmc->class_dev, "WARNING: IMR=0x%08x\n", + mci_readl(host, IMR)); + + WARN_ON(host->mrq != NULL); + + /* + * We may "know" the card is gone even though there's still an + * electrical connection. If so, we really need to communicate + * this to the MMC core since there won't be any more + * interrupts as the card is completely removed. Otherwise, + * the MMC core might believe the card is still there even + * though the card was just removed very slowly. + */ + if (!host->present) { + mrq->cmd->error = -ENOMEDIUM; + mmc_request_done(mmc, mrq); + return; + } + + host->mrq = mrq; + host->pending_events = 0; + host->completed_events = 0; + + atmci_enable(host); + + /* We don't support multiple blocks of weird lengths. */ + data = mrq->data; + if (data) { + if (data->blocks > 1 && data->blksz & 3) + goto fail; + atmci_set_timeout(host, data); + } + + iflags = MCI_CMDRDY; + cmd = mrq->cmd; + cmdflags = atmci_prepare_command(mmc, cmd); + atmci_start_command(host, cmd, cmdflags); + + if (data) + iflags |= atmci_submit_data(mmc, data); + + if (mrq->stop) { + host->stop_cmdr = atmci_prepare_command(mmc, mrq->stop); + host->stop_cmdr |= MCI_CMDR_STOP_XFER; + if (!(data->flags & MMC_DATA_WRITE)) + host->stop_cmdr |= MCI_CMDR_TRDIR_READ; + if (data->flags & MMC_DATA_STREAM) + host->stop_cmdr |= MCI_CMDR_STREAM; + else + host->stop_cmdr |= MCI_CMDR_MULTI_BLOCK; + } + + /* + * We could have enabled interrupts earlier, but I suspect + * that would open up a nice can of interesting race + * conditions (e.g. command and data complete, but stop not + * prepared yet.) + */ + mci_writel(host, IER, iflags); + + return; + +fail: + atmci_disable(host); + host->mrq = NULL; + mrq->cmd->error = -EINVAL; + mmc_request_done(mmc, mrq); +} + +static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct atmel_mci *host = mmc_priv(mmc); + + if (ios->clock) { + u32 clkdiv; + + /* Set clock rate */ + clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * ios->clock) - 1; + if (clkdiv > 255) { + dev_warn(&mmc->class_dev, + "clock %u too slow; using %lu\n", + ios->clock, host->bus_hz / (2 * 256)); + clkdiv = 255; + } + + host->mode_reg = MCI_MR_CLKDIV(clkdiv) | MCI_MR_WRPROOF + | MCI_MR_RDPROOF; + } + + switch (ios->bus_width) { + case MMC_BUS_WIDTH_1: + host->sdc_reg = 0; + break; + case MMC_BUS_WIDTH_4: + host->sdc_reg = MCI_SDCBUS_4BIT; + break; + } + + switch (ios->power_mode) { + case MMC_POWER_ON: + /* Send init sequence (74 clock cycles) */ + atmci_enable(host); + mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT); + while (!(mci_readl(host, SR) & MCI_CMDRDY)) + cpu_relax(); + atmci_disable(host); + break; + default: + /* + * TODO: None of the currently available AVR32-based + * boards allow MMC power to be turned off. Implement + * power control when this can be tested properly. + */ + break; + } +} + +static int atmci_get_ro(struct mmc_host *mmc) +{ + int read_only = 0; + struct atmel_mci *host = mmc_priv(mmc); + + if (host->wp_pin >= 0) { + read_only = gpio_get_value(host->wp_pin); + dev_dbg(&mmc->class_dev, "card is %s\n", + read_only ? "read-only" : "read-write"); + } else { + dev_dbg(&mmc->class_dev, + "no pin for checking read-only switch." + " Assuming write-enable.\n"); + } + + return read_only; +} + +static struct mmc_host_ops atmci_ops = { + .request = atmci_request, + .set_ios = atmci_set_ios, + .get_ro = atmci_get_ro, +}; + +static void atmci_command_complete(struct atmel_mci *host, + struct mmc_command *cmd, u32 status) +{ + /* Read the response from the card (up to 16 bytes) */ + cmd->resp[0] = mci_readl(host, RSPR); + cmd->resp[1] = mci_readl(host, RSPR); + cmd->resp[2] = mci_readl(host, RSPR); + cmd->resp[3] = mci_readl(host, RSPR); + + if (status & MCI_RTOE) + cmd->error = -ETIMEDOUT; + else if ((cmd->flags & MMC_RSP_CRC) && (status & MCI_RCRCE)) + cmd->error = -EILSEQ; + else if (status & (MCI_RINDE | MCI_RDIRE | MCI_RENDE)) + cmd->error = -EIO; + else + cmd->error = 0; + + if (cmd->error) { + dev_dbg(&host->mmc->class_dev, + "command error: status=0x%08x\n", status); + + if (cmd->data) { + host->data = NULL; + mci_writel(host, IDR, MCI_NOTBUSY + | MCI_TXRDY | MCI_RXRDY + | ATMCI_DATA_ERROR_FLAGS); + } + } +} + +static void atmci_detect_change(unsigned long data) +{ + struct atmel_mci *host = (struct atmel_mci *)data; + struct mmc_request *mrq = host->mrq; + int present; + + /* + * atmci_remove() sets detect_pin to -1 before freeing the + * interrupt. We must not re-enable the interrupt if it has + * been freed. + */ + smp_rmb(); + if (host->detect_pin < 0) + return; + + enable_irq(gpio_to_irq(host->detect_pin)); + present = !gpio_get_value(host->detect_pin); + + dev_vdbg(&host->pdev->dev, "detect change: %d (was %d)\n", + present, host->present); + + if (present != host->present) { + dev_dbg(&host->mmc->class_dev, "card %s\n", + present ? "inserted" : "removed"); + host->present = present; + + /* Reset controller if card is gone */ + if (!present) { + mci_writel(host, CR, MCI_CR_SWRST); + mci_writel(host, IDR, ~0UL); + mci_writel(host, CR, MCI_CR_MCIEN); + } + + /* Clean up queue if present */ + if (mrq) { + /* + * Reset controller to terminate any ongoing + * commands or data transfers. + */ + mci_writel(host, CR, MCI_CR_SWRST); + + if (!atmci_is_completed(host, EVENT_CMD_COMPLETE)) + mrq->cmd->error = -ENOMEDIUM; + + if (mrq->data && !atmci_is_completed(host, + EVENT_DATA_COMPLETE)) { + host->data = NULL; + mrq->data->error = -ENOMEDIUM; + } + if (mrq->stop && !atmci_is_completed(host, + EVENT_STOP_COMPLETE)) + mrq->stop->error = -ENOMEDIUM; + + host->cmd = NULL; + atmci_request_end(host->mmc, mrq); + } + + mmc_detect_change(host->mmc, 0); + } +} + +static void atmci_tasklet_func(unsigned long priv) +{ + struct mmc_host *mmc = (struct mmc_host *)priv; + struct atmel_mci *host = mmc_priv(mmc); + struct mmc_request *mrq = host->mrq; + struct mmc_data *data = host->data; + + dev_vdbg(&mmc->class_dev, + "tasklet: pending/completed/mask %lx/%lx/%x\n", + host->pending_events, host->completed_events, + mci_readl(host, IMR)); + + if (atmci_test_and_clear_pending(host, EVENT_CMD_COMPLETE)) { + /* + * host->cmd must be set to NULL before the interrupt + * handler sees EVENT_CMD_COMPLETE + */ + host->cmd = NULL; + smp_wmb(); + atmci_set_completed(host, EVENT_CMD_COMPLETE); + atmci_command_complete(host, mrq->cmd, host->cmd_status); + + if (!mrq->cmd->error && mrq->stop + && atmci_is_completed(host, EVENT_XFER_COMPLETE) + && !atmci_test_and_set_completed(host, + EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, mrq->data); + } + if (atmci_test_and_clear_pending(host, EVENT_STOP_COMPLETE)) { + /* + * host->cmd must be set to NULL before the interrupt + * handler sees EVENT_STOP_COMPLETE + */ + host->cmd = NULL; + smp_wmb(); + atmci_set_completed(host, EVENT_STOP_COMPLETE); + atmci_command_complete(host, mrq->stop, host->stop_status); + } + if (atmci_test_and_clear_pending(host, EVENT_DATA_ERROR)) { + u32 status = host->data_status; + + dev_vdbg(&mmc->class_dev, "data error: status=%08x\n", status); + + atmci_set_completed(host, EVENT_DATA_ERROR); + atmci_set_completed(host, EVENT_DATA_COMPLETE); + + if (status & MCI_DTOE) { + dev_dbg(&mmc->class_dev, + "data timeout error\n"); + data->error = -ETIMEDOUT; + } else if (status & MCI_DCRCE) { + dev_dbg(&mmc->class_dev, "data CRC error\n"); + data->error = -EILSEQ; + } else { + dev_dbg(&mmc->class_dev, + "data FIFO error (status=%08x)\n", + status); + data->error = -EIO; + } + + if (host->present && data->stop + && atmci_is_completed(host, EVENT_CMD_COMPLETE) + && !atmci_test_and_set_completed( + host, EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, data); + + host->data = NULL; + } + if (atmci_test_and_clear_pending(host, EVENT_DATA_COMPLETE)) { + atmci_set_completed(host, EVENT_DATA_COMPLETE); + + if (!atmci_is_completed(host, EVENT_DATA_ERROR)) { + data->bytes_xfered = data->blocks * data->blksz; + data->error = 0; + } + + host->data = NULL; + } + + if (host->mrq && !host->cmd && !host->data) + atmci_request_end(mmc, host->mrq); +} + +static void atmci_read_data_pio(struct atmel_mci *host) +{ + struct scatterlist *sg = host->sg; + void *buf = sg_virt(sg); + unsigned int offset = host->pio_offset; + struct mmc_data *data = host->data; + u32 value; + u32 status; + unsigned int nbytes = 0; + + do { + value = mci_readl(host, RDR); + if (likely(offset + 4 <= sg->length)) { + put_unaligned(value, (u32 *)(buf + offset)); + + offset += 4; + nbytes += 4; + + if (offset == sg->length) { + host->sg = sg = sg_next(sg); + if (!sg) + goto done; + + offset = 0; + buf = sg_virt(sg); + } + } else { + unsigned int remaining = sg->length - offset; + memcpy(buf + offset, &value, remaining); + nbytes += remaining; + + flush_dcache_page(sg_page(sg)); + host->sg = sg = sg_next(sg); + if (!sg) + goto done; + + offset = 4 - remaining; + buf = sg_virt(sg); + memcpy(buf, (u8 *)&value + remaining, offset); + nbytes += offset; + } + + status = mci_readl(host, SR); + if (status & ATMCI_DATA_ERROR_FLAGS) { + mci_writel(host, IDR, (MCI_NOTBUSY | MCI_RXRDY + | ATMCI_DATA_ERROR_FLAGS)); + host->data_status = status; + atmci_set_pending(host, EVENT_DATA_ERROR); + tasklet_schedule(&host->tasklet); + break; + } + } while (status & MCI_RXRDY); + + host->pio_offset = offset; + data->bytes_xfered += nbytes; + + return; + +done: + mci_writel(host, IDR, MCI_RXRDY); + mci_writel(host, IER, MCI_NOTBUSY); + data->bytes_xfered += nbytes; + atmci_set_completed(host, EVENT_XFER_COMPLETE); + if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE) + && !atmci_test_and_set_completed(host, EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, data); +} + +static void atmci_write_data_pio(struct atmel_mci *host) +{ + struct scatterlist *sg = host->sg; + void *buf = sg_virt(sg); + unsigned int offset = host->pio_offset; + struct mmc_data *data = host->data; + u32 value; + u32 status; + unsigned int nbytes = 0; + + do { + if (likely(offset + 4 <= sg->length)) { + value = get_unaligned((u32 *)(buf + offset)); + mci_writel(host, TDR, value); + + offset += 4; + nbytes += 4; + if (offset == sg->length) { + host->sg = sg = sg_next(sg); + if (!sg) + goto done; + + offset = 0; + buf = sg_virt(sg); + } + } else { + unsigned int remaining = sg->length - offset; + + value = 0; + memcpy(&value, buf + offset, remaining); + nbytes += remaining; + + host->sg = sg = sg_next(sg); + if (!sg) { + mci_writel(host, TDR, value); + goto done; + } + + offset = 4 - remaining; + buf = sg_virt(sg); + memcpy((u8 *)&value + remaining, buf, offset); + mci_writel(host, TDR, value); + nbytes += offset; + } + + status = mci_readl(host, SR); + if (status & ATMCI_DATA_ERROR_FLAGS) { + mci_writel(host, IDR, (MCI_NOTBUSY | MCI_TXRDY + | ATMCI_DATA_ERROR_FLAGS)); + host->data_status = status; + atmci_set_pending(host, EVENT_DATA_ERROR); + tasklet_schedule(&host->tasklet); + break; + } + } while (status & MCI_TXRDY); + + host->pio_offset = offset; + data->bytes_xfered += nbytes; + + return; + +done: + mci_writel(host, IDR, MCI_TXRDY); + mci_writel(host, IER, MCI_NOTBUSY); + data->bytes_xfered += nbytes; + atmci_set_completed(host, EVENT_XFER_COMPLETE); + if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE) + && !atmci_test_and_set_completed(host, EVENT_STOP_SENT)) + send_stop_cmd(host->mmc, data); +} + +static void atmci_cmd_interrupt(struct mmc_host *mmc, u32 status) +{ + struct atmel_mci *host = mmc_priv(mmc); + + mci_writel(host, IDR, MCI_CMDRDY); + + if (atmci_is_completed(host, EVENT_STOP_SENT)) { + host->stop_status = status; + atmci_set_pending(host, EVENT_STOP_COMPLETE); + } else { + host->cmd_status = status; + atmci_set_pending(host, EVENT_CMD_COMPLETE); + } + + tasklet_schedule(&host->tasklet); +} + +static irqreturn_t atmci_interrupt(int irq, void *dev_id) +{ + struct mmc_host *mmc = dev_id; + struct atmel_mci *host = mmc_priv(mmc); + u32 status, mask, pending; + unsigned int pass_count = 0; + + spin_lock(&mmc->lock); + + do { + status = mci_readl(host, SR); + mask = mci_readl(host, IMR); + pending = status & mask; + if (!pending) + break; + + if (pending & ATMCI_DATA_ERROR_FLAGS) { + mci_writel(host, IDR, ATMCI_DATA_ERROR_FLAGS + | MCI_RXRDY | MCI_TXRDY); + pending &= mci_readl(host, IMR); + host->data_status = status; + atmci_set_pending(host, EVENT_DATA_ERROR); + tasklet_schedule(&host->tasklet); + } + if (pending & MCI_NOTBUSY) { + mci_writel(host, IDR, (MCI_NOTBUSY + | ATMCI_DATA_ERROR_FLAGS)); + atmci_set_pending(host, EVENT_DATA_COMPLETE); + tasklet_schedule(&host->tasklet); + } + if (pending & MCI_RXRDY) + atmci_read_data_pio(host); + if (pending & MCI_TXRDY) + atmci_write_data_pio(host); + + if (pending & MCI_CMDRDY) + atmci_cmd_interrupt(mmc, status); + } while (pass_count++ < 5); + + spin_unlock(&mmc->lock); + + return pass_count ? IRQ_HANDLED : IRQ_NONE; +} + +static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id) +{ + struct mmc_host *mmc = dev_id; + struct atmel_mci *host = mmc_priv(mmc); + + /* + * Disable interrupts until the pin has stabilized and check + * the state then. Use mod_timer() since we may be in the + * middle of the timer routine when this interrupt triggers. + */ + disable_irq_nosync(irq); + mod_timer(&host->detect_timer, jiffies + msecs_to_jiffies(20)); + + return IRQ_HANDLED; +} + +static int __init atmci_probe(struct platform_device *pdev) +{ + struct mci_platform_data *pdata; + struct atmel_mci *host; + struct mmc_host *mmc; + struct resource *regs; + int irq; + int ret; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) + return -ENXIO; + pdata = pdev->dev.platform_data; + if (!pdata) + return -ENXIO; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + mmc = mmc_alloc_host(sizeof(struct atmel_mci), &pdev->dev); + if (!mmc) + return -ENOMEM; + + host = mmc_priv(mmc); + host->pdev = pdev; + host->mmc = mmc; + host->detect_pin = pdata->detect_pin; + host->wp_pin = pdata->wp_pin; + + host->mck = clk_get(&pdev->dev, "mci_clk"); + if (IS_ERR(host->mck)) { + ret = PTR_ERR(host->mck); + goto err_clk_get; + } + + ret = -ENOMEM; + host->regs = ioremap(regs->start, regs->end - regs->start + 1); + if (!host->regs) + goto err_ioremap; + + clk_enable(host->mck); + mci_writel(host, CR, MCI_CR_SWRST); + host->bus_hz = clk_get_rate(host->mck); + clk_disable(host->mck); + + host->mapbase = regs->start; + + mmc->ops = &atmci_ops; + mmc->f_min = (host->bus_hz + 511) / 512; + mmc->f_max = host->bus_hz / 2; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps |= MMC_CAP_4_BIT_DATA; + + mmc->max_hw_segs = 64; + mmc->max_phys_segs = 64; + mmc->max_req_size = 32768 * 512; + mmc->max_blk_size = 32768; + mmc->max_blk_count = 512; + + tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)mmc); + + ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, mmc); + if (ret) + goto err_request_irq; + + /* Assume card is present if we don't have a detect pin */ + host->present = 1; + if (host->detect_pin >= 0) { + if (gpio_request(host->detect_pin, "mmc_detect")) { + dev_dbg(&mmc->class_dev, "no detect pin available\n"); + host->detect_pin = -1; + } else { + host->present = !gpio_get_value(host->detect_pin); + } + } + if (host->wp_pin >= 0) { + if (gpio_request(host->wp_pin, "mmc_wp")) { + dev_dbg(&mmc->class_dev, "no WP pin available\n"); + host->wp_pin = -1; + } + } + + platform_set_drvdata(pdev, host); + + mmc_add_host(mmc); + + if (host->detect_pin >= 0) { + setup_timer(&host->detect_timer, atmci_detect_change, + (unsigned long)host); + + ret = request_irq(gpio_to_irq(host->detect_pin), + atmci_detect_interrupt, + IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, + "mmc-detect", mmc); + if (ret) { + dev_dbg(&mmc->class_dev, + "could not request IRQ %d for detect pin\n", + gpio_to_irq(host->detect_pin)); + gpio_free(host->detect_pin); + host->detect_pin = -1; + } + } + + dev_info(&mmc->class_dev, + "Atmel MCI controller at 0x%08lx irq %d\n", + host->mapbase, irq); + + return 0; + +err_request_irq: + iounmap(host->regs); +err_ioremap: + clk_put(host->mck); +err_clk_get: + mmc_free_host(mmc); + return ret; +} + +static int __exit atmci_remove(struct platform_device *pdev) +{ + struct atmel_mci *host = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + if (host) { + if (host->detect_pin >= 0) { + int pin = host->detect_pin; + + /* Make sure the timer doesn't enable the interrupt */ + host->detect_pin = -1; + smp_wmb(); + + free_irq(gpio_to_irq(pin), host->mmc); + del_timer_sync(&host->detect_timer); + gpio_free(pin); + } + + mmc_remove_host(host->mmc); + + clk_enable(host->mck); + mci_writel(host, IDR, ~0UL); + mci_writel(host, CR, MCI_CR_MCIDIS); + mci_readl(host, SR); + clk_disable(host->mck); + + if (host->wp_pin >= 0) + gpio_free(host->wp_pin); + + free_irq(platform_get_irq(pdev, 0), host->mmc); + iounmap(host->regs); + + clk_put(host->mck); + + mmc_free_host(host->mmc); + } + return 0; +} + +static struct platform_driver atmci_driver = { + .remove = __exit_p(atmci_remove), + .driver = { + .name = "atmel_mci", + }, +}; + +static int __init atmci_init(void) +{ + return platform_driver_probe(&atmci_driver, atmci_probe); +} + +static void __exit atmci_exit(void) +{ + platform_driver_unregister(&atmci_driver); +} + +module_init(atmci_init); +module_exit(atmci_exit); + +MODULE_DESCRIPTION("Atmel Multimedia Card Interface driver"); +MODULE_AUTHOR("Haavard Skinnemoen "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index cc5f7bc546af..3f15eb204895 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -21,7 +21,7 @@ * published by the Free Software Foundation. */ -/* Why is a timer used to detect insert events? +/* Why don't we use the SD controllers' carddetect feature? * * From the AU1100 MMC application guide: * If the Au1100-based design is intended to support both MultiMediaCards @@ -30,8 +30,6 @@ * In doing so, a MMC card never enters SPI-mode communications, * but now the SecureDigital card-detect feature of CD/DAT3 is ineffective * (the low to high transition will not occur). - * - * So we use the timer to check the status manually. */ #include @@ -41,51 +39,110 @@ #include #include #include - +#include #include + #include #include #include #include -#include -#include "au1xmmc.h" - #define DRIVER_NAME "au1xxx-mmc" /* Set this to enable special debugging macros */ +/* #define DEBUG */ #ifdef DEBUG -#define DBG(fmt, idx, args...) printk("au1xx(%d): DEBUG: " fmt, idx, ##args) +#define DBG(fmt, idx, args...) \ + printk(KERN_DEBUG "au1xmmc(%d): DEBUG: " fmt, idx, ##args) #else -#define DBG(fmt, idx, args...) +#define DBG(fmt, idx, args...) do {} while (0) #endif -const struct { +/* Hardware definitions */ +#define AU1XMMC_DESCRIPTOR_COUNT 1 +#define AU1XMMC_DESCRIPTOR_SIZE 2048 + +#define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \ + MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \ + MMC_VDD_33_34 | MMC_VDD_34_35 | MMC_VDD_35_36) + +/* This gives us a hard value for the stop command that we can write directly + * to the command register. + */ +#define STOP_CMD \ + (SD_CMD_RT_1B | SD_CMD_CT_7 | (0xC << SD_CMD_CI_SHIFT) | SD_CMD_GO) + +/* This is the set of interrupts that we configure by default. */ +#define AU1XMMC_INTERRUPTS \ + (SD_CONFIG_SC | SD_CONFIG_DT | SD_CONFIG_RAT | \ + SD_CONFIG_CR | SD_CONFIG_I) + +/* The poll event (looking for insert/remove events runs twice a second. */ +#define AU1XMMC_DETECT_TIMEOUT (HZ/2) + +struct au1xmmc_host { + struct mmc_host *mmc; + struct mmc_request *mrq; + + u32 flags; u32 iobase; - u32 tx_devid, rx_devid; - u16 bcsrpwr; - u16 bcsrstatus; - u16 wpstatus; -} au1xmmc_card_table[] = { - { SD0_BASE, DSCR_CMD0_SDMS_TX0, DSCR_CMD0_SDMS_RX0, - BCSR_BOARD_SD0PWR, BCSR_INT_SD0INSERT, BCSR_STATUS_SD0WP }, -#ifndef CONFIG_MIPS_DB1200 - { SD1_BASE, DSCR_CMD0_SDMS_TX1, DSCR_CMD0_SDMS_RX1, - BCSR_BOARD_DS1PWR, BCSR_INT_SD1INSERT, BCSR_STATUS_SD1WP } -#endif -}; + u32 clock; + u32 bus_width; + u32 power_mode; -#define AU1XMMC_CONTROLLER_COUNT (ARRAY_SIZE(au1xmmc_card_table)) + int status; -/* This array stores pointers for the hosts (used by the IRQ handler) */ -struct au1xmmc_host *au1xmmc_hosts[AU1XMMC_CONTROLLER_COUNT]; -static int dma = 1; + struct { + int len; + int dir; + } dma; -#ifdef MODULE -module_param(dma, bool, 0); -MODULE_PARM_DESC(dma, "Use DMA engine for data transfers (0 = disabled)"); -#endif + struct { + int index; + int offset; + int len; + } pio; + + u32 tx_chan; + u32 rx_chan; + + int irq; + + struct tasklet_struct finish_task; + struct tasklet_struct data_task; + struct au1xmmc_platform_data *platdata; + struct platform_device *pdev; + struct resource *ioarea; +}; + +/* Status flags used by the host structure */ +#define HOST_F_XMIT 0x0001 +#define HOST_F_RECV 0x0002 +#define HOST_F_DMA 0x0010 +#define HOST_F_ACTIVE 0x0100 +#define HOST_F_STOP 0x1000 + +#define HOST_S_IDLE 0x0001 +#define HOST_S_CMD 0x0002 +#define HOST_S_DATA 0x0003 +#define HOST_S_STOP 0x0004 + +/* Easy access macros */ +#define HOST_STATUS(h) ((h)->iobase + SD_STATUS) +#define HOST_CONFIG(h) ((h)->iobase + SD_CONFIG) +#define HOST_ENABLE(h) ((h)->iobase + SD_ENABLE) +#define HOST_TXPORT(h) ((h)->iobase + SD_TXPORT) +#define HOST_RXPORT(h) ((h)->iobase + SD_RXPORT) +#define HOST_CMDARG(h) ((h)->iobase + SD_CMDARG) +#define HOST_BLKSIZE(h) ((h)->iobase + SD_BLKSIZE) +#define HOST_CMD(h) ((h)->iobase + SD_CMD) +#define HOST_CONFIG2(h) ((h)->iobase + SD_CONFIG2) +#define HOST_TIMEOUT(h) ((h)->iobase + SD_TIMEOUT) +#define HOST_DEBUG(h) ((h)->iobase + SD_DEBUG) + +#define DMA_CHANNEL(h) \ + (((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan) static inline void IRQ_ON(struct au1xmmc_host *host, u32 mask) { @@ -119,14 +176,13 @@ static inline void IRQ_OFF(struct au1xmmc_host *host, u32 mask) static inline void SEND_STOP(struct au1xmmc_host *host) { - - /* We know the value of CONFIG2, so avoid a read we don't need */ - u32 mask = SD_CONFIG2_EN; + u32 config2; WARN_ON(host->status != HOST_S_DATA); host->status = HOST_S_STOP; - au_writel(mask | SD_CONFIG2_DF, HOST_CONFIG2(host)); + config2 = au_readl(HOST_CONFIG2(host)); + au_writel(config2 | SD_CONFIG2_DF, HOST_CONFIG2(host)); au_sync(); /* Send the stop commmand */ @@ -135,35 +191,36 @@ static inline void SEND_STOP(struct au1xmmc_host *host) static void au1xmmc_set_power(struct au1xmmc_host *host, int state) { - - u32 val = au1xmmc_card_table[host->id].bcsrpwr; - - bcsr->board &= ~val; - if (state) bcsr->board |= val; - - au_sync_delay(1); + if (host->platdata && host->platdata->set_power) + host->platdata->set_power(host->mmc, state); } -static inline int au1xmmc_card_inserted(struct au1xmmc_host *host) +static int au1xmmc_card_inserted(struct mmc_host *mmc) { - return (bcsr->sig_status & au1xmmc_card_table[host->id].bcsrstatus) - ? 1 : 0; + struct au1xmmc_host *host = mmc_priv(mmc); + + if (host->platdata && host->platdata->card_inserted) + return !!host->platdata->card_inserted(host->mmc); + + return -ENOSYS; } static int au1xmmc_card_readonly(struct mmc_host *mmc) { struct au1xmmc_host *host = mmc_priv(mmc); - return (bcsr->status & au1xmmc_card_table[host->id].wpstatus) - ? 1 : 0; + + if (host->platdata && host->platdata->card_readonly) + return !!host->platdata->card_readonly(mmc); + + return -ENOSYS; } static void au1xmmc_finish_request(struct au1xmmc_host *host) { - struct mmc_request *mrq = host->mrq; host->mrq = NULL; - host->flags &= HOST_F_ACTIVE; + host->flags &= HOST_F_ACTIVE | HOST_F_DMA; host->dma.len = 0; host->dma.dir = 0; @@ -174,8 +231,6 @@ static void au1xmmc_finish_request(struct au1xmmc_host *host) host->status = HOST_S_IDLE; - bcsr->disk_leds |= (1 << 8); - mmc_request_done(host->mmc, mrq); } @@ -235,18 +290,14 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait, au_sync(); /* Wait for the command to go on the line */ - - while(1) { - if (!(au_readl(HOST_CMD(host)) & SD_CMD_GO)) - break; - } + while (au_readl(HOST_CMD(host)) & SD_CMD_GO) + /* nop */; /* Wait for the command to come back */ - if (wait) { u32 status = au_readl(HOST_STATUS(host)); - while(!(status & SD_STATUS_CR)) + while (!(status & SD_STATUS_CR)) status = au_readl(HOST_STATUS(host)); /* Clear the CR status */ @@ -260,12 +311,11 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait, static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) { - struct mmc_request *mrq = host->mrq; struct mmc_data *data; u32 crc; - WARN_ON(host->status != HOST_S_DATA && host->status != HOST_S_STOP); + WARN_ON((host->status != HOST_S_DATA) && (host->status != HOST_S_STOP)); if (host->mrq == NULL) return; @@ -276,15 +326,13 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) status = au_readl(HOST_STATUS(host)); /* The transaction is really over when the SD_STATUS_DB bit is clear */ - - while((host->flags & HOST_F_XMIT) && (status & SD_STATUS_DB)) + while ((host->flags & HOST_F_XMIT) && (status & SD_STATUS_DB)) status = au_readl(HOST_STATUS(host)); data->error = 0; dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, host->dma.dir); /* Process any errors */ - crc = (status & (SD_STATUS_WC | SD_STATUS_RC)); if (host->flags & HOST_F_XMIT) crc |= ((status & 0x07) == 0x02) ? 0 : 1; @@ -299,16 +347,16 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) if (!data->error) { if (host->flags & HOST_F_DMA) { +#ifdef CONFIG_SOC_AU1200 /* DBDMA */ u32 chan = DMA_CHANNEL(host); - chan_tab_t *c = *((chan_tab_t **) chan); + chan_tab_t *c = *((chan_tab_t **)chan); au1x_dma_chan_t *cp = c->chan_ptr; data->bytes_xfered = cp->ddma_bytecnt; - } - else +#endif + } else data->bytes_xfered = - (data->blocks * data->blksz) - - host->pio.len; + (data->blocks * data->blksz) - host->pio.len; } au1xmmc_finish_request(host); @@ -316,7 +364,7 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status) static void au1xmmc_tasklet_data(unsigned long param) { - struct au1xmmc_host *host = (struct au1xmmc_host *) param; + struct au1xmmc_host *host = (struct au1xmmc_host *)param; u32 status = au_readl(HOST_STATUS(host)); au1xmmc_data_complete(host, status); @@ -326,11 +374,10 @@ static void au1xmmc_tasklet_data(unsigned long param) static void au1xmmc_send_pio(struct au1xmmc_host *host) { - - struct mmc_data *data = 0; - int sg_len, max, count = 0; - unsigned char *sg_ptr; - u32 status = 0; + struct mmc_data *data; + int sg_len, max, count; + unsigned char *sg_ptr, val; + u32 status; struct scatterlist *sg; data = host->mrq->data; @@ -345,14 +392,12 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) /* This is the space left inside the buffer */ sg_len = data->sg[host->pio.index].length - host->pio.offset; - /* Check to if we need less then the size of the sg_buffer */ - + /* Check if we need less than the size of the sg_buffer */ max = (sg_len > host->pio.len) ? host->pio.len : sg_len; - if (max > AU1XMMC_MAX_TRANSFER) max = AU1XMMC_MAX_TRANSFER; - - for(count = 0; count < max; count++ ) { - unsigned char val; + if (max > AU1XMMC_MAX_TRANSFER) + max = AU1XMMC_MAX_TRANSFER; + for (count = 0; count < max; count++) { status = au_readl(HOST_STATUS(host)); if (!(status & SD_STATUS_TH)) @@ -360,7 +405,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) val = *sg_ptr++; - au_writel((unsigned long) val, HOST_TXPORT(host)); + au_writel((unsigned long)val, HOST_TXPORT(host)); au_sync(); } @@ -384,11 +429,10 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host) static void au1xmmc_receive_pio(struct au1xmmc_host *host) { - - struct mmc_data *data = 0; - int sg_len = 0, max = 0, count = 0; - unsigned char *sg_ptr = 0; - u32 status = 0; + struct mmc_data *data; + int max, count, sg_len = 0; + unsigned char *sg_ptr = NULL; + u32 status, val; struct scatterlist *sg; data = host->mrq->data; @@ -405,33 +449,33 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) /* This is the space left inside the buffer */ sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset; - /* Check to if we need less then the size of the sg_buffer */ - if (sg_len < max) max = sg_len; + /* Check if we need less than the size of the sg_buffer */ + if (sg_len < max) + max = sg_len; } if (max > AU1XMMC_MAX_TRANSFER) max = AU1XMMC_MAX_TRANSFER; - for(count = 0; count < max; count++ ) { - u32 val; + for (count = 0; count < max; count++) { status = au_readl(HOST_STATUS(host)); if (!(status & SD_STATUS_NE)) break; if (status & SD_STATUS_RC) { - DBG("RX CRC Error [%d + %d].\n", host->id, + DBG("RX CRC Error [%d + %d].\n", host->pdev->id, host->pio.len, count); break; } if (status & SD_STATUS_RO) { - DBG("RX Overrun [%d + %d]\n", host->id, + DBG("RX Overrun [%d + %d]\n", host->pdev->id, host->pio.len, count); break; } else if (status & SD_STATUS_RU) { - DBG("RX Underrun [%d + %d]\n", host->id, + DBG("RX Underrun [%d + %d]\n", host->pdev->id, host->pio.len, count); break; } @@ -439,7 +483,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) val = au_readl(HOST_RXPORT(host)); if (sg_ptr) - *sg_ptr++ = (unsigned char) (val & 0xFF); + *sg_ptr++ = (unsigned char)(val & 0xFF); } host->pio.len -= count; @@ -451,7 +495,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) } if (host->pio.len == 0) { - //IRQ_OFF(host, SD_CONFIG_RA | SD_CONFIG_RF); + /* IRQ_OFF(host, SD_CONFIG_RA | SD_CONFIG_RF); */ IRQ_OFF(host, SD_CONFIG_NE); if (host->flags & HOST_F_STOP) @@ -461,17 +505,15 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host) } } -/* static void au1xmmc_cmd_complete - This is called when a command has been completed - grab the response - and check for errors. Then start the data transfer if it is indicated. -*/ - +/* This is called when a command has been completed - grab the response + * and check for errors. Then start the data transfer if it is indicated. + */ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) { - struct mmc_request *mrq = host->mrq; struct mmc_command *cmd; - int trans; + u32 r[4]; + int i, trans; if (!host->mrq) return; @@ -481,9 +523,6 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) if (cmd->flags & MMC_RSP_PRESENT) { if (cmd->flags & MMC_RSP_136) { - u32 r[4]; - int i; - r[0] = au_readl(host->iobase + SD_RESP3); r[1] = au_readl(host->iobase + SD_RESP2); r[2] = au_readl(host->iobase + SD_RESP1); @@ -491,10 +530,9 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) /* The CRC is omitted from the response, so really * we only got 120 bytes, but the engine expects - * 128 bits, so we have to shift things up + * 128 bits, so we have to shift things up. */ - - for(i = 0; i < 4; i++) { + for (i = 0; i < 4; i++) { cmd->resp[i] = (r[i] & 0x00FFFFFF) << 8; if (i != 3) cmd->resp[i] |= (r[i + 1] & 0xFF000000) >> 24; @@ -505,22 +543,20 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) * our response omits the CRC, our data ends up * being shifted 8 bits to the right. In this case, * that means that the OSR data starts at bit 31, - * so we can just read RESP0 and return that + * so we can just read RESP0 and return that. */ cmd->resp[0] = au_readl(host->iobase + SD_RESP0); } } /* Figure out errors */ - if (status & (SD_STATUS_SC | SD_STATUS_WC | SD_STATUS_RC)) cmd->error = -EILSEQ; trans = host->flags & (HOST_F_XMIT | HOST_F_RECV); if (!trans || cmd->error) { - - IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA|SD_CONFIG_RF); + IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA | SD_CONFIG_RF); tasklet_schedule(&host->finish_task); return; } @@ -528,6 +564,7 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) host->status = HOST_S_DATA; if (host->flags & HOST_F_DMA) { +#ifdef CONFIG_SOC_AU1200 /* DBDMA */ u32 channel = DMA_CHANNEL(host); /* Start the DMA as soon as the buffer gets something in it */ @@ -540,23 +577,21 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status) } au1xxx_dbdma_start(channel); +#endif } } static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate) { - unsigned int pbus = get_au1x00_speed(); unsigned int divisor; u32 config; /* From databook: - divisor = ((((cpuclock / sbus_divisor) / 2) / mmcclock) / 2) - 1 - */ - + * divisor = ((((cpuclock / sbus_divisor) / 2) / mmcclock) / 2) - 1 + */ pbus /= ((au_readl(SYS_POWERCTRL) & 0x3) + 2); pbus /= 2; - divisor = ((pbus / rate) / 2) - 1; config = au_readl(HOST_CONFIG(host)); @@ -568,15 +603,11 @@ static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate) au_sync(); } -static int -au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) +static int au1xmmc_prepare_data(struct au1xmmc_host *host, + struct mmc_data *data) { - int datalen = data->blocks * data->blksz; - if (dma != 0) - host->flags |= HOST_F_DMA; - if (data->flags & MMC_DATA_READ) host->flags |= HOST_F_RECV; else @@ -596,12 +627,13 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) au_writel(data->blksz - 1, HOST_BLKSIZE(host)); if (host->flags & HOST_F_DMA) { +#ifdef CONFIG_SOC_AU1200 /* DBDMA */ int i; u32 channel = DMA_CHANNEL(host); au1xxx_dbdma_stop(channel); - for(i = 0; i < host->dma.len; i++) { + for (i = 0; i < host->dma.len; i++) { u32 ret = 0, flags = DDMA_FLAGS_NOIE; struct scatterlist *sg = &data->sg[i]; int sg_len = sg->length; @@ -611,23 +643,21 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) if (i == host->dma.len - 1) flags = DDMA_FLAGS_IE; - if (host->flags & HOST_F_XMIT){ - ret = au1xxx_dbdma_put_source_flags(channel, - (void *) sg_virt(sg), len, flags); - } - else { - ret = au1xxx_dbdma_put_dest_flags(channel, - (void *) sg_virt(sg), - len, flags); + if (host->flags & HOST_F_XMIT) { + ret = au1xxx_dbdma_put_source_flags(channel, + (void *)sg_virt(sg), len, flags); + } else { + ret = au1xxx_dbdma_put_dest_flags(channel, + (void *)sg_virt(sg), len, flags); } - if (!ret) + if (!ret) goto dataerr; datalen -= len; } - } - else { +#endif + } else { host->pio.index = 0; host->pio.offset = 0; host->pio.len = datalen; @@ -636,25 +666,21 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data) IRQ_ON(host, SD_CONFIG_TH); else IRQ_ON(host, SD_CONFIG_NE); - //IRQ_ON(host, SD_CONFIG_RA|SD_CONFIG_RF); + /* IRQ_ON(host, SD_CONFIG_RA | SD_CONFIG_RF); */ } return 0; - dataerr: - dma_unmap_sg(mmc_dev(host->mmc),data->sg,data->sg_len,host->dma.dir); +dataerr: + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + host->dma.dir); return -ETIMEDOUT; } -/* static void au1xmmc_request - This actually starts a command or data transaction -*/ - +/* This actually starts a command or data transaction */ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) { - struct au1xmmc_host *host = mmc_priv(mmc); - unsigned int flags = 0; int ret = 0; WARN_ON(irqs_disabled()); @@ -663,11 +689,15 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) host->mrq = mrq; host->status = HOST_S_CMD; - bcsr->disk_leds &= ~(1 << 8); + /* fail request immediately if no card is present */ + if (0 == au1xmmc_card_inserted(mmc)) { + mrq->cmd->error = -ENOMEDIUM; + au1xmmc_finish_request(host); + return; + } if (mrq->data) { FLUSH_FIFO(host); - flags = mrq->data->flags; ret = au1xmmc_prepare_data(host, mrq->data); } @@ -682,7 +712,6 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq) static void au1xmmc_reset_controller(struct au1xmmc_host *host) { - /* Apply the clock */ au_writel(SD_ENABLE_CE, HOST_ENABLE(host)); au_sync_delay(1); @@ -712,9 +741,10 @@ static void au1xmmc_reset_controller(struct au1xmmc_host *host) } -static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) +static void au1xmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct au1xmmc_host *host = mmc_priv(mmc); + u32 config2; if (ios->power_mode == MMC_POWER_OFF) au1xmmc_set_power(host, 0); @@ -726,21 +756,18 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios) au1xmmc_set_clock(host, ios->clock); host->clock = ios->clock; } -} - -static void au1xmmc_dma_callback(int irq, void *dev_id) -{ - struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id; - - /* Avoid spurious interrupts */ - if (!host->mrq) - return; - - if (host->flags & HOST_F_STOP) - SEND_STOP(host); - - tasklet_schedule(&host->data_task); + config2 = au_readl(HOST_CONFIG2(host)); + switch (ios->bus_width) { + case MMC_BUS_WIDTH_4: + config2 |= SD_CONFIG2_WB; + break; + case MMC_BUS_WIDTH_1: + config2 &= ~SD_CONFIG2_WB; + break; + } + au_writel(config2, HOST_CONFIG2(host)); + au_sync(); } #define STATUS_TIMEOUT (SD_STATUS_RAT | SD_STATUS_DT) @@ -749,245 +776,354 @@ static void au1xmmc_dma_callback(int irq, void *dev_id) static irqreturn_t au1xmmc_irq(int irq, void *dev_id) { - + struct au1xmmc_host *host = dev_id; u32 status; - int i, ret = 0; - - disable_irq(AU1100_SD_IRQ); - for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) { - struct au1xmmc_host * host = au1xmmc_hosts[i]; - u32 handled = 1; + status = au_readl(HOST_STATUS(host)); - status = au_readl(HOST_STATUS(host)); + if (!(status & SD_STATUS_I)) + return IRQ_NONE; /* not ours */ - if (host->mrq && (status & STATUS_TIMEOUT)) { - if (status & SD_STATUS_RAT) - host->mrq->cmd->error = -ETIMEDOUT; + if (status & SD_STATUS_SI) /* SDIO */ + mmc_signal_sdio_irq(host->mmc); - else if (status & SD_STATUS_DT) - host->mrq->data->error = -ETIMEDOUT; + if (host->mrq && (status & STATUS_TIMEOUT)) { + if (status & SD_STATUS_RAT) + host->mrq->cmd->error = -ETIMEDOUT; + else if (status & SD_STATUS_DT) + host->mrq->data->error = -ETIMEDOUT; - /* In PIO mode, interrupts might still be enabled */ - IRQ_OFF(host, SD_CONFIG_NE | SD_CONFIG_TH); + /* In PIO mode, interrupts might still be enabled */ + IRQ_OFF(host, SD_CONFIG_NE | SD_CONFIG_TH); - //IRQ_OFF(host, SD_CONFIG_TH|SD_CONFIG_RA|SD_CONFIG_RF); - tasklet_schedule(&host->finish_task); - } + /* IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA | SD_CONFIG_RF); */ + tasklet_schedule(&host->finish_task); + } #if 0 - else if (status & SD_STATUS_DD) { - - /* Sometimes we get a DD before a NE in PIO mode */ - - if (!(host->flags & HOST_F_DMA) && - (status & SD_STATUS_NE)) - au1xmmc_receive_pio(host); - else { - au1xmmc_data_complete(host, status); - //tasklet_schedule(&host->data_task); - } + else if (status & SD_STATUS_DD) { + /* Sometimes we get a DD before a NE in PIO mode */ + if (!(host->flags & HOST_F_DMA) && (status & SD_STATUS_NE)) + au1xmmc_receive_pio(host); + else { + au1xmmc_data_complete(host, status); + /* tasklet_schedule(&host->data_task); */ } + } #endif - else if (status & (SD_STATUS_CR)) { - if (host->status == HOST_S_CMD) - au1xmmc_cmd_complete(host,status); - } - else if (!(host->flags & HOST_F_DMA)) { - if ((host->flags & HOST_F_XMIT) && - (status & STATUS_DATA_OUT)) - au1xmmc_send_pio(host); - else if ((host->flags & HOST_F_RECV) && - (status & STATUS_DATA_IN)) - au1xmmc_receive_pio(host); - } - else if (status & 0x203FBC70) { - DBG("Unhandled status %8.8x\n", host->id, status); - handled = 0; - } - - au_writel(status, HOST_STATUS(host)); - au_sync(); - - ret |= handled; + else if (status & SD_STATUS_CR) { + if (host->status == HOST_S_CMD) + au1xmmc_cmd_complete(host, status); + + } else if (!(host->flags & HOST_F_DMA)) { + if ((host->flags & HOST_F_XMIT) && (status & STATUS_DATA_OUT)) + au1xmmc_send_pio(host); + else if ((host->flags & HOST_F_RECV) && (status & STATUS_DATA_IN)) + au1xmmc_receive_pio(host); + + } else if (status & 0x203F3C70) { + DBG("Unhandled status %8.8x\n", host->pdev->id, + status); } - enable_irq(AU1100_SD_IRQ); - return ret; + au_writel(status, HOST_STATUS(host)); + au_sync(); + + return IRQ_HANDLED; } -static void au1xmmc_poll_event(unsigned long arg) -{ - struct au1xmmc_host *host = (struct au1xmmc_host *) arg; +#ifdef CONFIG_SOC_AU1200 +/* 8bit memory DMA device */ +static dbdev_tab_t au1xmmc_mem_dbdev = { + .dev_id = DSCR_CMD0_ALWAYS, + .dev_flags = DEV_FLAGS_ANYUSE, + .dev_tsize = 0, + .dev_devwidth = 8, + .dev_physaddr = 0x00000000, + .dev_intlevel = 0, + .dev_intpolarity = 0, +}; +static int memid; - int card = au1xmmc_card_inserted(host); - int controller = (host->flags & HOST_F_ACTIVE) ? 1 : 0; +static void au1xmmc_dbdma_callback(int irq, void *dev_id) +{ + struct au1xmmc_host *host = (struct au1xmmc_host *)dev_id; - if (card != controller) { - host->flags &= ~HOST_F_ACTIVE; - if (card) host->flags |= HOST_F_ACTIVE; - mmc_detect_change(host->mmc, 0); - } + /* Avoid spurious interrupts */ + if (!host->mrq) + return; - if (host->mrq != NULL) { - u32 status = au_readl(HOST_STATUS(host)); - DBG("PENDING - %8.8x\n", host->id, status); - } + if (host->flags & HOST_F_STOP) + SEND_STOP(host); - mod_timer(&host->timer, jiffies + AU1XMMC_DETECT_TIMEOUT); + tasklet_schedule(&host->data_task); } -static dbdev_tab_t au1xmmc_mem_dbdev = -{ - DSCR_CMD0_ALWAYS, DEV_FLAGS_ANYUSE, 0, 8, 0x00000000, 0, 0 -}; - -static void au1xmmc_init_dma(struct au1xmmc_host *host) +static int au1xmmc_dbdma_init(struct au1xmmc_host *host) { + struct resource *res; + int txid, rxid; + + res = platform_get_resource(host->pdev, IORESOURCE_DMA, 0); + if (!res) + return -ENODEV; + txid = res->start; + + res = platform_get_resource(host->pdev, IORESOURCE_DMA, 1); + if (!res) + return -ENODEV; + rxid = res->start; + + if (!memid) + return -ENODEV; + + host->tx_chan = au1xxx_dbdma_chan_alloc(memid, txid, + au1xmmc_dbdma_callback, (void *)host); + if (!host->tx_chan) { + dev_err(&host->pdev->dev, "cannot allocate TX DMA\n"); + return -ENODEV; + } - u32 rxchan, txchan; - - int txid = au1xmmc_card_table[host->id].tx_devid; - int rxid = au1xmmc_card_table[host->id].rx_devid; + host->rx_chan = au1xxx_dbdma_chan_alloc(rxid, memid, + au1xmmc_dbdma_callback, (void *)host); + if (!host->rx_chan) { + dev_err(&host->pdev->dev, "cannot allocate RX DMA\n"); + au1xxx_dbdma_chan_free(host->tx_chan); + return -ENODEV; + } - /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride - of 8 bits. And since devices are shared, we need to create - our own to avoid freaking out other devices - */ + au1xxx_dbdma_set_devwidth(host->tx_chan, 8); + au1xxx_dbdma_set_devwidth(host->rx_chan, 8); - int memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev); + au1xxx_dbdma_ring_alloc(host->tx_chan, AU1XMMC_DESCRIPTOR_COUNT); + au1xxx_dbdma_ring_alloc(host->rx_chan, AU1XMMC_DESCRIPTOR_COUNT); - txchan = au1xxx_dbdma_chan_alloc(memid, txid, - au1xmmc_dma_callback, (void *) host); + /* DBDMA is good to go */ + host->flags |= HOST_F_DMA; - rxchan = au1xxx_dbdma_chan_alloc(rxid, memid, - au1xmmc_dma_callback, (void *) host); + return 0; +} - au1xxx_dbdma_set_devwidth(txchan, 8); - au1xxx_dbdma_set_devwidth(rxchan, 8); +static void au1xmmc_dbdma_shutdown(struct au1xmmc_host *host) +{ + if (host->flags & HOST_F_DMA) { + host->flags &= ~HOST_F_DMA; + au1xxx_dbdma_chan_free(host->tx_chan); + au1xxx_dbdma_chan_free(host->rx_chan); + } +} +#endif - au1xxx_dbdma_ring_alloc(txchan, AU1XMMC_DESCRIPTOR_COUNT); - au1xxx_dbdma_ring_alloc(rxchan, AU1XMMC_DESCRIPTOR_COUNT); +static void au1xmmc_enable_sdio_irq(struct mmc_host *mmc, int en) +{ + struct au1xmmc_host *host = mmc_priv(mmc); - host->tx_chan = txchan; - host->rx_chan = rxchan; + if (en) + IRQ_ON(host, SD_CONFIG_SI); + else + IRQ_OFF(host, SD_CONFIG_SI); } static const struct mmc_host_ops au1xmmc_ops = { .request = au1xmmc_request, .set_ios = au1xmmc_set_ios, .get_ro = au1xmmc_card_readonly, + .get_cd = au1xmmc_card_inserted, + .enable_sdio_irq = au1xmmc_enable_sdio_irq, }; static int __devinit au1xmmc_probe(struct platform_device *pdev) { + struct mmc_host *mmc; + struct au1xmmc_host *host; + struct resource *r; + int ret; + + mmc = mmc_alloc_host(sizeof(struct au1xmmc_host), &pdev->dev); + if (!mmc) { + dev_err(&pdev->dev, "no memory for mmc_host\n"); + ret = -ENOMEM; + goto out0; + } - int i, ret = 0; - - /* THe interrupt is shared among all controllers */ - ret = request_irq(AU1100_SD_IRQ, au1xmmc_irq, IRQF_DISABLED, "MMC", 0); + host = mmc_priv(mmc); + host->mmc = mmc; + host->platdata = pdev->dev.platform_data; + host->pdev = pdev; - if (ret) { - printk(DRIVER_NAME "ERROR: Couldn't get int %d: %d\n", - AU1100_SD_IRQ, ret); - return -ENXIO; + ret = -ENODEV; + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "no mmio defined\n"); + goto out1; } - disable_irq(AU1100_SD_IRQ); + host->ioarea = request_mem_region(r->start, r->end - r->start + 1, + pdev->name); + if (!host->ioarea) { + dev_err(&pdev->dev, "mmio already in use\n"); + goto out1; + } - for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) { - struct mmc_host *mmc = mmc_alloc_host(sizeof(struct au1xmmc_host), &pdev->dev); - struct au1xmmc_host *host = 0; + host->iobase = (unsigned long)ioremap(r->start, 0x3c); + if (!host->iobase) { + dev_err(&pdev->dev, "cannot remap mmio\n"); + goto out2; + } - if (!mmc) { - printk(DRIVER_NAME "ERROR: no mem for host %d\n", i); - au1xmmc_hosts[i] = 0; - continue; - } + r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!r) { + dev_err(&pdev->dev, "no IRQ defined\n"); + goto out3; + } - mmc->ops = &au1xmmc_ops; + host->irq = r->start; + /* IRQ is shared among both SD controllers */ + ret = request_irq(host->irq, au1xmmc_irq, IRQF_SHARED, + DRIVER_NAME, host); + if (ret) { + dev_err(&pdev->dev, "cannot grab IRQ\n"); + goto out3; + } - mmc->f_min = 450000; - mmc->f_max = 24000000; + mmc->ops = &au1xmmc_ops; - mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE; - mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT; + mmc->f_min = 450000; + mmc->f_max = 24000000; - mmc->max_blk_size = 2048; - mmc->max_blk_count = 512; + mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE; + mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT; - mmc->ocr_avail = AU1XMMC_OCR; + mmc->max_blk_size = 2048; + mmc->max_blk_count = 512; - host = mmc_priv(mmc); - host->mmc = mmc; + mmc->ocr_avail = AU1XMMC_OCR; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; - host->id = i; - host->iobase = au1xmmc_card_table[host->id].iobase; - host->clock = 0; - host->power_mode = MMC_POWER_OFF; + host->status = HOST_S_IDLE; - host->flags = au1xmmc_card_inserted(host) ? HOST_F_ACTIVE : 0; - host->status = HOST_S_IDLE; + /* board-specific carddetect setup, if any */ + if (host->platdata && host->platdata->cd_setup) { + ret = host->platdata->cd_setup(mmc, 1); + if (ret) { + dev_warn(&pdev->dev, "board CD setup failed\n"); + mmc->caps |= MMC_CAP_NEEDS_POLL; + } + } else + mmc->caps |= MMC_CAP_NEEDS_POLL; - init_timer(&host->timer); + tasklet_init(&host->data_task, au1xmmc_tasklet_data, + (unsigned long)host); - host->timer.function = au1xmmc_poll_event; - host->timer.data = (unsigned long) host; - host->timer.expires = jiffies + AU1XMMC_DETECT_TIMEOUT; + tasklet_init(&host->finish_task, au1xmmc_tasklet_finish, + (unsigned long)host); - tasklet_init(&host->data_task, au1xmmc_tasklet_data, - (unsigned long) host); +#ifdef CONFIG_SOC_AU1200 + ret = au1xmmc_dbdma_init(host); + if (ret) + printk(KERN_INFO DRIVER_NAME ": DBDMA init failed; using PIO\n"); +#endif - tasklet_init(&host->finish_task, au1xmmc_tasklet_finish, - (unsigned long) host); +#ifdef CONFIG_LEDS_CLASS + if (host->platdata && host->platdata->led) { + struct led_classdev *led = host->platdata->led; + led->name = mmc_hostname(mmc); + led->brightness = LED_OFF; + led->default_trigger = mmc_hostname(mmc); + ret = led_classdev_register(mmc_dev(mmc), led); + if (ret) + goto out5; + } +#endif - spin_lock_init(&host->lock); + au1xmmc_reset_controller(host); - if (dma != 0) - au1xmmc_init_dma(host); + ret = mmc_add_host(mmc); + if (ret) { + dev_err(&pdev->dev, "cannot add mmc host\n"); + goto out6; + } - au1xmmc_reset_controller(host); + platform_set_drvdata(pdev, mmc); - mmc_add_host(mmc); - au1xmmc_hosts[i] = host; + printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X" + " (mode=%s)\n", pdev->id, host->iobase, + host->flags & HOST_F_DMA ? "dma" : "pio"); - add_timer(&host->timer); + return 0; /* all ok */ - printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X (mode=%s)\n", - host->id, host->iobase, dma ? "dma" : "pio"); - } +out6: +#ifdef CONFIG_LEDS_CLASS + if (host->platdata && host->platdata->led) + led_classdev_unregister(host->platdata->led); +out5: +#endif + au_writel(0, HOST_ENABLE(host)); + au_writel(0, HOST_CONFIG(host)); + au_writel(0, HOST_CONFIG2(host)); + au_sync(); - enable_irq(AU1100_SD_IRQ); +#ifdef CONFIG_SOC_AU1200 + au1xmmc_dbdma_shutdown(host); +#endif - return 0; + tasklet_kill(&host->data_task); + tasklet_kill(&host->finish_task); + + if (host->platdata && host->platdata->cd_setup && + !(mmc->caps & MMC_CAP_NEEDS_POLL)) + host->platdata->cd_setup(mmc, 0); + + free_irq(host->irq, host); +out3: + iounmap((void *)host->iobase); +out2: + release_resource(host->ioarea); + kfree(host->ioarea); +out1: + mmc_free_host(mmc); +out0: + return ret; } static int __devexit au1xmmc_remove(struct platform_device *pdev) { + struct mmc_host *mmc = platform_get_drvdata(pdev); + struct au1xmmc_host *host; + + if (mmc) { + host = mmc_priv(mmc); - int i; + mmc_remove_host(mmc); - disable_irq(AU1100_SD_IRQ); +#ifdef CONFIG_LEDS_CLASS + if (host->platdata && host->platdata->led) + led_classdev_unregister(host->platdata->led); +#endif - for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) { - struct au1xmmc_host *host = au1xmmc_hosts[i]; - if (!host) continue; + if (host->platdata && host->platdata->cd_setup && + !(mmc->caps & MMC_CAP_NEEDS_POLL)) + host->platdata->cd_setup(mmc, 0); + + au_writel(0, HOST_ENABLE(host)); + au_writel(0, HOST_CONFIG(host)); + au_writel(0, HOST_CONFIG2(host)); + au_sync(); tasklet_kill(&host->data_task); tasklet_kill(&host->finish_task); - del_timer_sync(&host->timer); +#ifdef CONFIG_SOC_AU1200 + au1xmmc_dbdma_shutdown(host); +#endif au1xmmc_set_power(host, 0); - mmc_remove_host(host->mmc); - - au1xxx_dbdma_chan_free(host->tx_chan); - au1xxx_dbdma_chan_free(host->rx_chan); + free_irq(host->irq, host); + iounmap((void *)host->iobase); + release_resource(host->ioarea); + kfree(host->ioarea); - au_writel(0x0, HOST_ENABLE(host)); - au_sync(); + mmc_free_host(mmc); } - - free_irq(AU1100_SD_IRQ, 0); return 0; } @@ -1004,21 +1140,31 @@ static struct platform_driver au1xmmc_driver = { static int __init au1xmmc_init(void) { +#ifdef CONFIG_SOC_AU1200 + /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride + * of 8 bits. And since devices are shared, we need to create + * our own to avoid freaking out other devices. + */ + memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev); + if (!memid) + printk(KERN_ERR "au1xmmc: cannot add memory dbdma dev\n"); +#endif return platform_driver_register(&au1xmmc_driver); } static void __exit au1xmmc_exit(void) { +#ifdef CONFIG_SOC_AU1200 + if (memid) + au1xxx_ddma_del_device(memid); +#endif platform_driver_unregister(&au1xmmc_driver); } module_init(au1xmmc_init); module_exit(au1xmmc_exit); -#ifdef MODULE MODULE_AUTHOR("Advanced Micro Devices, Inc"); MODULE_DESCRIPTION("MMC/SD driver for the Alchemy Au1XXX"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:au1xxx-mmc"); -#endif - diff --git a/drivers/mmc/host/au1xmmc.h b/drivers/mmc/host/au1xmmc.h deleted file mode 100644 index 341cbdf0baca..000000000000 --- a/drivers/mmc/host/au1xmmc.h +++ /dev/null @@ -1,96 +0,0 @@ -#ifndef _AU1XMMC_H_ -#define _AU1XMMC_H_ - -/* Hardware definitions */ - -#define AU1XMMC_DESCRIPTOR_COUNT 1 -#define AU1XMMC_DESCRIPTOR_SIZE 2048 - -#define AU1XMMC_OCR ( MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \ - MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \ - MMC_VDD_33_34 | MMC_VDD_34_35 | MMC_VDD_35_36) - -/* Easy access macros */ - -#define HOST_STATUS(h) ((h)->iobase + SD_STATUS) -#define HOST_CONFIG(h) ((h)->iobase + SD_CONFIG) -#define HOST_ENABLE(h) ((h)->iobase + SD_ENABLE) -#define HOST_TXPORT(h) ((h)->iobase + SD_TXPORT) -#define HOST_RXPORT(h) ((h)->iobase + SD_RXPORT) -#define HOST_CMDARG(h) ((h)->iobase + SD_CMDARG) -#define HOST_BLKSIZE(h) ((h)->iobase + SD_BLKSIZE) -#define HOST_CMD(h) ((h)->iobase + SD_CMD) -#define HOST_CONFIG2(h) ((h)->iobase + SD_CONFIG2) -#define HOST_TIMEOUT(h) ((h)->iobase + SD_TIMEOUT) -#define HOST_DEBUG(h) ((h)->iobase + SD_DEBUG) - -#define DMA_CHANNEL(h) \ - ( ((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan) - -/* This gives us a hard value for the stop command that we can write directly - * to the command register - */ - -#define STOP_CMD (SD_CMD_RT_1B|SD_CMD_CT_7|(0xC << SD_CMD_CI_SHIFT)|SD_CMD_GO) - -/* This is the set of interrupts that we configure by default */ - -#if 0 -#define AU1XMMC_INTERRUPTS (SD_CONFIG_SC | SD_CONFIG_DT | SD_CONFIG_DD | \ - SD_CONFIG_RAT | SD_CONFIG_CR | SD_CONFIG_I) -#endif - -#define AU1XMMC_INTERRUPTS (SD_CONFIG_SC | SD_CONFIG_DT | \ - SD_CONFIG_RAT | SD_CONFIG_CR | SD_CONFIG_I) -/* The poll event (looking for insert/remove events runs twice a second */ -#define AU1XMMC_DETECT_TIMEOUT (HZ/2) - -struct au1xmmc_host { - struct mmc_host *mmc; - struct mmc_request *mrq; - - u32 id; - - u32 flags; - u32 iobase; - u32 clock; - u32 bus_width; - u32 power_mode; - - int status; - - struct { - int len; - int dir; - } dma; - - struct { - int index; - int offset; - int len; - } pio; - - u32 tx_chan; - u32 rx_chan; - - struct timer_list timer; - struct tasklet_struct finish_task; - struct tasklet_struct data_task; - - spinlock_t lock; -}; - -/* Status flags used by the host structure */ - -#define HOST_F_XMIT 0x0001 -#define HOST_F_RECV 0x0002 -#define HOST_F_DMA 0x0010 -#define HOST_F_ACTIVE 0x0100 -#define HOST_F_STOP 0x1000 - -#define HOST_S_IDLE 0x0001 -#define HOST_S_CMD 0x0002 -#define HOST_S_DATA 0x0003 -#define HOST_S_STOP 0x0004 - -#endif diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index eed211b2ac70..5e880c0f1349 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c @@ -892,9 +892,12 @@ static int imxmci_get_ro(struct mmc_host *mmc) struct imxmci_host *host = mmc_priv(mmc); if (host->pdata && host->pdata->get_ro) - return host->pdata->get_ro(mmc_dev(mmc)); - /* Host doesn't support read only detection so assume writeable */ - return 0; + return !!host->pdata->get_ro(mmc_dev(mmc)); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 35508584ac2a..41cc63360e43 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1126,16 +1126,28 @@ static int mmc_spi_get_ro(struct mmc_host *mmc) struct mmc_spi_host *host = mmc_priv(mmc); if (host->pdata && host->pdata->get_ro) - return host->pdata->get_ro(mmc->parent); - /* board doesn't support read only detection; assume writeable */ - return 0; + return !!host->pdata->get_ro(mmc->parent); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } +static int mmc_spi_get_cd(struct mmc_host *mmc) +{ + struct mmc_spi_host *host = mmc_priv(mmc); + + if (host->pdata && host->pdata->get_cd) + return !!host->pdata->get_cd(mmc->parent); + return -ENOSYS; +} static const struct mmc_host_ops mmc_spi_ops = { .request = mmc_spi_request, .set_ios = mmc_spi_set_ios, .get_ro = mmc_spi_get_ro, + .get_cd = mmc_spi_get_cd, }; @@ -1240,10 +1252,7 @@ static int mmc_spi_probe(struct spi_device *spi) mmc->ops = &mmc_spi_ops; mmc->max_blk_size = MMC_SPI_BLOCKSIZE; - /* As long as we keep track of the number of successfully - * transmitted blocks, we're good for multiwrite. - */ - mmc->caps = MMC_CAP_SPI | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_SPI; /* SPI doesn't need the lowspeed device identification thing for * MMC or SD cards, since it never comes up in open drain mode. @@ -1319,17 +1328,23 @@ static int mmc_spi_probe(struct spi_device *spi) goto fail_glue_init; } + /* pass platform capabilities, if any */ + if (host->pdata) + mmc->caps |= host->pdata->caps; + status = mmc_add_host(mmc); if (status != 0) goto fail_add_host; - dev_info(&spi->dev, "SD/MMC host %s%s%s%s\n", + dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", mmc->class_dev.bus_id, host->dma_dev ? "" : ", no DMA", (host->pdata && host->pdata->get_ro) ? "" : ", no WP", (host->pdata && host->pdata->setpower) - ? "" : ", no poweroff"); + ? "" : ", no poweroff", + (mmc->caps & MMC_CAP_NEEDS_POLL) + ? ", cd polling" : ""); return 0; fail_add_host: diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index da5fecad74d9..696cf3647ceb 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -535,7 +535,6 @@ static int mmci_probe(struct amba_device *dev, void *id) mmc->f_min = (host->mclk + 511) / 512; mmc->f_max = min(host->mclk, fmax); mmc->ocr_avail = plat->ocr_mask; - mmc->caps = MMC_CAP_MULTIWRITE; /* * We can do SGIO diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index 549517c35675..dbc26eb6a89e 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1317,7 +1317,7 @@ static int __init mmc_omap_new_slot(struct mmc_omap_host *host, int id) host->slots[id] = slot; - mmc->caps = MMC_CAP_MULTIWRITE; + mmc->caps = 0; if (host->pdata->conf.wire4) mmc->caps |= MMC_CAP_4_BIT_DATA; diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index d89475d36988..d39f59738866 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -374,9 +374,12 @@ static int pxamci_get_ro(struct mmc_host *mmc) struct pxamci_host *host = mmc_priv(mmc); if (host->pdata && host->pdata->get_ro) - return host->pdata->get_ro(mmc_dev(mmc)); - /* Host doesn't support read only detection so assume writeable */ - return 0; + return !!host->pdata->get_ro(mmc_dev(mmc)); + /* + * Board doesn't support read only detection; let the mmc core + * decide what to do. + */ + return -ENOSYS; } static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c new file mode 100644 index 000000000000..6a1e4994b724 --- /dev/null +++ b/drivers/mmc/host/s3cmci.c @@ -0,0 +1,1446 @@ +/* + * linux/drivers/mmc/s3cmci.h - Samsung S3C MCI driver + * + * Copyright (C) 2004-2006 maintech GmbH, Thomas Kleffel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include "s3cmci.h" + +#define DRIVER_NAME "s3c-mci" + +enum dbg_channels { + dbg_err = (1 << 0), + dbg_debug = (1 << 1), + dbg_info = (1 << 2), + dbg_irq = (1 << 3), + dbg_sg = (1 << 4), + dbg_dma = (1 << 5), + dbg_pio = (1 << 6), + dbg_fail = (1 << 7), + dbg_conf = (1 << 8), +}; + +static const int dbgmap_err = dbg_err | dbg_fail; +static const int dbgmap_info = dbg_info | dbg_conf; +static const int dbgmap_debug = dbg_debug; + +#define dbg(host, channels, args...) \ + do { \ + if (dbgmap_err & channels) \ + dev_err(&host->pdev->dev, args); \ + else if (dbgmap_info & channels) \ + dev_info(&host->pdev->dev, args); \ + else if (dbgmap_debug & channels) \ + dev_dbg(&host->pdev->dev, args); \ + } while (0) + +#define RESSIZE(ressource) (((ressource)->end - (ressource)->start)+1) + +static struct s3c2410_dma_client s3cmci_dma_client = { + .name = "s3c-mci", +}; + +static void finalize_request(struct s3cmci_host *host); +static void s3cmci_send_request(struct mmc_host *mmc); +static void s3cmci_reset(struct s3cmci_host *host); + +#ifdef CONFIG_MMC_DEBUG + +static void dbg_dumpregs(struct s3cmci_host *host, char *prefix) +{ + u32 con, pre, cmdarg, cmdcon, cmdsta, r0, r1, r2, r3, timer, bsize; + u32 datcon, datcnt, datsta, fsta, imask; + + con = readl(host->base + S3C2410_SDICON); + pre = readl(host->base + S3C2410_SDIPRE); + cmdarg = readl(host->base + S3C2410_SDICMDARG); + cmdcon = readl(host->base + S3C2410_SDICMDCON); + cmdsta = readl(host->base + S3C2410_SDICMDSTAT); + r0 = readl(host->base + S3C2410_SDIRSP0); + r1 = readl(host->base + S3C2410_SDIRSP1); + r2 = readl(host->base + S3C2410_SDIRSP2); + r3 = readl(host->base + S3C2410_SDIRSP3); + timer = readl(host->base + S3C2410_SDITIMER); + bsize = readl(host->base + S3C2410_SDIBSIZE); + datcon = readl(host->base + S3C2410_SDIDCON); + datcnt = readl(host->base + S3C2410_SDIDCNT); + datsta = readl(host->base + S3C2410_SDIDSTA); + fsta = readl(host->base + S3C2410_SDIFSTA); + imask = readl(host->base + host->sdiimsk); + + dbg(host, dbg_debug, "%s CON:[%08x] PRE:[%08x] TMR:[%08x]\n", + prefix, con, pre, timer); + + dbg(host, dbg_debug, "%s CCON:[%08x] CARG:[%08x] CSTA:[%08x]\n", + prefix, cmdcon, cmdarg, cmdsta); + + dbg(host, dbg_debug, "%s DCON:[%08x] FSTA:[%08x]" + " DSTA:[%08x] DCNT:[%08x]\n", + prefix, datcon, fsta, datsta, datcnt); + + dbg(host, dbg_debug, "%s R0:[%08x] R1:[%08x]" + " R2:[%08x] R3:[%08x]\n", + prefix, r0, r1, r2, r3); +} + +static void prepare_dbgmsg(struct s3cmci_host *host, struct mmc_command *cmd, + int stop) +{ + snprintf(host->dbgmsg_cmd, 300, + "#%u%s op:%i arg:0x%08x flags:0x08%x retries:%u", + host->ccnt, (stop ? " (STOP)" : ""), + cmd->opcode, cmd->arg, cmd->flags, cmd->retries); + + if (cmd->data) { + snprintf(host->dbgmsg_dat, 300, + "#%u bsize:%u blocks:%u bytes:%u", + host->dcnt, cmd->data->blksz, + cmd->data->blocks, + cmd->data->blocks * cmd->data->blksz); + } else { + host->dbgmsg_dat[0] = '\0'; + } +} + +static void dbg_dumpcmd(struct s3cmci_host *host, struct mmc_command *cmd, + int fail) +{ + unsigned int dbglvl = fail ? dbg_fail : dbg_debug; + + if (!cmd) + return; + + if (cmd->error == 0) { + dbg(host, dbglvl, "CMD[OK] %s R0:0x%08x\n", + host->dbgmsg_cmd, cmd->resp[0]); + } else { + dbg(host, dbglvl, "CMD[ERR %i] %s Status:%s\n", + cmd->error, host->dbgmsg_cmd, host->status); + } + + if (!cmd->data) + return; + + if (cmd->data->error == 0) { + dbg(host, dbglvl, "DAT[OK] %s\n", host->dbgmsg_dat); + } else { + dbg(host, dbglvl, "DAT[ERR %i] %s DCNT:0x%08x\n", + cmd->data->error, host->dbgmsg_dat, + readl(host->base + S3C2410_SDIDCNT)); + } +} +#else +static void dbg_dumpcmd(struct s3cmci_host *host, + struct mmc_command *cmd, int fail) { } + +static void prepare_dbgmsg(struct s3cmci_host *host, struct mmc_command *cmd, + int stop) { } + +static void dbg_dumpregs(struct s3cmci_host *host, char *prefix) { } + +#endif /* CONFIG_MMC_DEBUG */ + +static inline u32 enable_imask(struct s3cmci_host *host, u32 imask) +{ + u32 newmask; + + newmask = readl(host->base + host->sdiimsk); + newmask |= imask; + + writel(newmask, host->base + host->sdiimsk); + + return newmask; +} + +static inline u32 disable_imask(struct s3cmci_host *host, u32 imask) +{ + u32 newmask; + + newmask = readl(host->base + host->sdiimsk); + newmask &= ~imask; + + writel(newmask, host->base + host->sdiimsk); + + return newmask; +} + +static inline void clear_imask(struct s3cmci_host *host) +{ + writel(0, host->base + host->sdiimsk); +} + +static inline int get_data_buffer(struct s3cmci_host *host, + u32 *words, u32 **pointer) +{ + struct scatterlist *sg; + + if (host->pio_active == XFER_NONE) + return -EINVAL; + + if ((!host->mrq) || (!host->mrq->data)) + return -EINVAL; + + if (host->pio_sgptr >= host->mrq->data->sg_len) { + dbg(host, dbg_debug, "no more buffers (%i/%i)\n", + host->pio_sgptr, host->mrq->data->sg_len); + return -EBUSY; + } + sg = &host->mrq->data->sg[host->pio_sgptr]; + + *words = sg->length >> 2; + *pointer = sg_virt(sg); + + host->pio_sgptr++; + + dbg(host, dbg_sg, "new buffer (%i/%i)\n", + host->pio_sgptr, host->mrq->data->sg_len); + + return 0; +} + +static inline u32 fifo_count(struct s3cmci_host *host) +{ + u32 fifostat = readl(host->base + S3C2410_SDIFSTA); + + fifostat &= S3C2410_SDIFSTA_COUNTMASK; + return fifostat >> 2; +} + +static inline u32 fifo_free(struct s3cmci_host *host) +{ + u32 fifostat = readl(host->base + S3C2410_SDIFSTA); + + fifostat &= S3C2410_SDIFSTA_COUNTMASK; + return (63 - fifostat) >> 2; +} + +static void do_pio_read(struct s3cmci_host *host) +{ + int res; + u32 fifo; + void __iomem *from_ptr; + + /* write real prescaler to host, it might be set slow to fix */ + writel(host->prescaler, host->base + S3C2410_SDIPRE); + + from_ptr = host->base + host->sdidata; + + while ((fifo = fifo_count(host))) { + if (!host->pio_words) { + res = get_data_buffer(host, &host->pio_words, + &host->pio_ptr); + if (res) { + host->pio_active = XFER_NONE; + host->complete_what = COMPLETION_FINALIZE; + + dbg(host, dbg_pio, "pio_read(): " + "complete (no more data).\n"); + return; + } + + dbg(host, dbg_pio, + "pio_read(): new target: [%i]@[%p]\n", + host->pio_words, host->pio_ptr); + } + + dbg(host, dbg_pio, + "pio_read(): fifo:[%02i] buffer:[%03i] dcnt:[%08X]\n", + fifo, host->pio_words, + readl(host->base + S3C2410_SDIDCNT)); + + if (fifo > host->pio_words) + fifo = host->pio_words; + + host->pio_words -= fifo; + host->pio_count += fifo; + + while (fifo--) + *(host->pio_ptr++) = readl(from_ptr); + } + + if (!host->pio_words) { + res = get_data_buffer(host, &host->pio_words, &host->pio_ptr); + if (res) { + dbg(host, dbg_pio, + "pio_read(): complete (no more buffers).\n"); + host->pio_active = XFER_NONE; + host->complete_what = COMPLETION_FINALIZE; + + return; + } + } + + enable_imask(host, + S3C2410_SDIIMSK_RXFIFOHALF | S3C2410_SDIIMSK_RXFIFOLAST); +} + +static void do_pio_write(struct s3cmci_host *host) +{ + void __iomem *to_ptr; + int res; + u32 fifo; + + to_ptr = host->base + host->sdidata; + + while ((fifo = fifo_free(host))) { + if (!host->pio_words) { + res = get_data_buffer(host, &host->pio_words, + &host->pio_ptr); + if (res) { + dbg(host, dbg_pio, + "pio_write(): complete (no more data).\n"); + host->pio_active = XFER_NONE; + + return; + } + + dbg(host, dbg_pio, + "pio_write(): new source: [%i]@[%p]\n", + host->pio_words, host->pio_ptr); + + } + + if (fifo > host->pio_words) + fifo = host->pio_words; + + host->pio_words -= fifo; + host->pio_count += fifo; + + while (fifo--) + writel(*(host->pio_ptr++), to_ptr); + } + + enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF); +} + +static void pio_tasklet(unsigned long data) +{ + struct s3cmci_host *host = (struct s3cmci_host *) data; + + + disable_irq(host->irq); + + if (host->pio_active == XFER_WRITE) + do_pio_write(host); + + if (host->pio_active == XFER_READ) + do_pio_read(host); + + if (host->complete_what == COMPLETION_FINALIZE) { + clear_imask(host); + if (host->pio_active != XFER_NONE) { + dbg(host, dbg_err, "unfinished %s " + "- pio_count:[%u] pio_words:[%u]\n", + (host->pio_active == XFER_READ) ? "read" : "write", + host->pio_count, host->pio_words); + + if (host->mrq->data) + host->mrq->data->error = -EINVAL; + } + + finalize_request(host); + } else + enable_irq(host->irq); +} + +/* + * ISR for SDI Interface IRQ + * Communication between driver and ISR works as follows: + * host->mrq points to current request + * host->complete_what Indicates when the request is considered done + * COMPLETION_CMDSENT when the command was sent + * COMPLETION_RSPFIN when a response was received + * COMPLETION_XFERFINISH when the data transfer is finished + * COMPLETION_XFERFINISH_RSPFIN both of the above. + * host->complete_request is the completion-object the driver waits for + * + * 1) Driver sets up host->mrq and host->complete_what + * 2) Driver prepares the transfer + * 3) Driver enables interrupts + * 4) Driver starts transfer + * 5) Driver waits for host->complete_rquest + * 6) ISR checks for request status (errors and success) + * 6) ISR sets host->mrq->cmd->error and host->mrq->data->error + * 7) ISR completes host->complete_request + * 8) ISR disables interrupts + * 9) Driver wakes up and takes care of the request + * + * Note: "->error"-fields are expected to be set to 0 before the request + * was issued by mmc.c - therefore they are only set, when an error + * contition comes up + */ + +static irqreturn_t s3cmci_irq(int irq, void *dev_id) +{ + struct s3cmci_host *host = dev_id; + struct mmc_command *cmd; + u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt, mci_imsk; + u32 mci_cclear, mci_dclear; + unsigned long iflags; + + spin_lock_irqsave(&host->complete_lock, iflags); + + mci_csta = readl(host->base + S3C2410_SDICMDSTAT); + mci_dsta = readl(host->base + S3C2410_SDIDSTA); + mci_dcnt = readl(host->base + S3C2410_SDIDCNT); + mci_fsta = readl(host->base + S3C2410_SDIFSTA); + mci_imsk = readl(host->base + host->sdiimsk); + mci_cclear = 0; + mci_dclear = 0; + + if ((host->complete_what == COMPLETION_NONE) || + (host->complete_what == COMPLETION_FINALIZE)) { + host->status = "nothing to complete"; + clear_imask(host); + goto irq_out; + } + + if (!host->mrq) { + host->status = "no active mrq"; + clear_imask(host); + goto irq_out; + } + + cmd = host->cmd_is_stop ? host->mrq->stop : host->mrq->cmd; + + if (!cmd) { + host->status = "no active cmd"; + clear_imask(host); + goto irq_out; + } + + if (!host->dodma) { + if ((host->pio_active == XFER_WRITE) && + (mci_fsta & S3C2410_SDIFSTA_TFDET)) { + + disable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF); + tasklet_schedule(&host->pio_tasklet); + host->status = "pio tx"; + } + + if ((host->pio_active == XFER_READ) && + (mci_fsta & S3C2410_SDIFSTA_RFDET)) { + + disable_imask(host, + S3C2410_SDIIMSK_RXFIFOHALF | + S3C2410_SDIIMSK_RXFIFOLAST); + + tasklet_schedule(&host->pio_tasklet); + host->status = "pio rx"; + } + } + + if (mci_csta & S3C2410_SDICMDSTAT_CMDTIMEOUT) { + dbg(host, dbg_err, "CMDSTAT: error CMDTIMEOUT\n"); + cmd->error = -ETIMEDOUT; + host->status = "error: command timeout"; + goto fail_transfer; + } + + if (mci_csta & S3C2410_SDICMDSTAT_CMDSENT) { + if (host->complete_what == COMPLETION_CMDSENT) { + host->status = "ok: command sent"; + goto close_transfer; + } + + mci_cclear |= S3C2410_SDICMDSTAT_CMDSENT; + } + + if (mci_csta & S3C2410_SDICMDSTAT_CRCFAIL) { + if (cmd->flags & MMC_RSP_CRC) { + if (host->mrq->cmd->flags & MMC_RSP_136) { + dbg(host, dbg_irq, + "fixup: ignore CRC fail with long rsp\n"); + } else { + /* note, we used to fail the transfer + * here, but it seems that this is just + * the hardware getting it wrong. + * + * cmd->error = -EILSEQ; + * host->status = "error: bad command crc"; + * goto fail_transfer; + */ + } + } + + mci_cclear |= S3C2410_SDICMDSTAT_CRCFAIL; + } + + if (mci_csta & S3C2410_SDICMDSTAT_RSPFIN) { + if (host->complete_what == COMPLETION_RSPFIN) { + host->status = "ok: command response received"; + goto close_transfer; + } + + if (host->complete_what == COMPLETION_XFERFINISH_RSPFIN) + host->complete_what = COMPLETION_XFERFINISH; + + mci_cclear |= S3C2410_SDICMDSTAT_RSPFIN; + } + + /* errors handled after this point are only relevant + when a data transfer is in progress */ + + if (!cmd->data) + goto clear_status_bits; + + /* Check for FIFO failure */ + if (host->is2440) { + if (mci_fsta & S3C2440_SDIFSTA_FIFOFAIL) { + dbg(host, dbg_err, "FIFO failure\n"); + host->mrq->data->error = -EILSEQ; + host->status = "error: 2440 fifo failure"; + goto fail_transfer; + } + } else { + if (mci_dsta & S3C2410_SDIDSTA_FIFOFAIL) { + dbg(host, dbg_err, "FIFO failure\n"); + cmd->data->error = -EILSEQ; + host->status = "error: fifo failure"; + goto fail_transfer; + } + } + + if (mci_dsta & S3C2410_SDIDSTA_RXCRCFAIL) { + dbg(host, dbg_err, "bad data crc (outgoing)\n"); + cmd->data->error = -EILSEQ; + host->status = "error: bad data crc (outgoing)"; + goto fail_transfer; + } + + if (mci_dsta & S3C2410_SDIDSTA_CRCFAIL) { + dbg(host, dbg_err, "bad data crc (incoming)\n"); + cmd->data->error = -EILSEQ; + host->status = "error: bad data crc (incoming)"; + goto fail_transfer; + } + + if (mci_dsta & S3C2410_SDIDSTA_DATATIMEOUT) { + dbg(host, dbg_err, "data timeout\n"); + cmd->data->error = -ETIMEDOUT; + host->status = "error: data timeout"; + goto fail_transfer; + } + + if (mci_dsta & S3C2410_SDIDSTA_XFERFINISH) { + if (host->complete_what == COMPLETION_XFERFINISH) { + host->status = "ok: data transfer completed"; + goto close_transfer; + } + + if (host->complete_what == COMPLETION_XFERFINISH_RSPFIN) + host->complete_what = COMPLETION_RSPFIN; + + mci_dclear |= S3C2410_SDIDSTA_XFERFINISH; + } + +clear_status_bits: + writel(mci_cclear, host->base + S3C2410_SDICMDSTAT); + writel(mci_dclear, host->base + S3C2410_SDIDSTA); + + goto irq_out; + +fail_transfer: + host->pio_active = XFER_NONE; + +close_transfer: + host->complete_what = COMPLETION_FINALIZE; + + clear_imask(host); + tasklet_schedule(&host->pio_tasklet); + + goto irq_out; + +irq_out: + dbg(host, dbg_irq, + "csta:0x%08x dsta:0x%08x fsta:0x%08x dcnt:0x%08x status:%s.\n", + mci_csta, mci_dsta, mci_fsta, mci_dcnt, host->status); + + spin_unlock_irqrestore(&host->complete_lock, iflags); + return IRQ_HANDLED; + +} + +/* + * ISR for the CardDetect Pin +*/ + +static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id) +{ + struct s3cmci_host *host = (struct s3cmci_host *)dev_id; + + dbg(host, dbg_irq, "card detect\n"); + + mmc_detect_change(host->mmc, msecs_to_jiffies(500)); + + return IRQ_HANDLED; +} + +void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch, void *buf_id, + int size, enum s3c2410_dma_buffresult result) +{ + struct s3cmci_host *host = buf_id; + unsigned long iflags; + u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt; + + mci_csta = readl(host->base + S3C2410_SDICMDSTAT); + mci_dsta = readl(host->base + S3C2410_SDIDSTA); + mci_fsta = readl(host->base + S3C2410_SDIFSTA); + mci_dcnt = readl(host->base + S3C2410_SDIDCNT); + + BUG_ON(!host->mrq); + BUG_ON(!host->mrq->data); + BUG_ON(!host->dmatogo); + + spin_lock_irqsave(&host->complete_lock, iflags); + + if (result != S3C2410_RES_OK) { + dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x " + "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n", + mci_csta, mci_dsta, mci_fsta, + mci_dcnt, result, host->dmatogo); + + goto fail_request; + } + + host->dmatogo--; + if (host->dmatogo) { + dbg(host, dbg_dma, "DMA DONE Size:%i DSTA:[%08x] " + "DCNT:[%08x] toGo:%u\n", + size, mci_dsta, mci_dcnt, host->dmatogo); + + goto out; + } + + dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n", + size, mci_dsta, mci_dcnt); + + host->complete_what = COMPLETION_FINALIZE; + +out: + tasklet_schedule(&host->pio_tasklet); + spin_unlock_irqrestore(&host->complete_lock, iflags); + return; + +fail_request: + host->mrq->data->error = -EINVAL; + host->complete_what = COMPLETION_FINALIZE; + writel(0, host->base + host->sdiimsk); + goto out; + +} + +static void finalize_request(struct s3cmci_host *host) +{ + struct mmc_request *mrq = host->mrq; + struct mmc_command *cmd = host->cmd_is_stop ? mrq->stop : mrq->cmd; + int debug_as_failure = 0; + + if (host->complete_what != COMPLETION_FINALIZE) + return; + + if (!mrq) + return; + + if (cmd->data && (cmd->error == 0) && + (cmd->data->error == 0)) { + if (host->dodma && (!host->dma_complete)) { + dbg(host, dbg_dma, "DMA Missing!\n"); + return; + } + } + + /* Read response from controller. */ + cmd->resp[0] = readl(host->base + S3C2410_SDIRSP0); + cmd->resp[1] = readl(host->base + S3C2410_SDIRSP1); + cmd->resp[2] = readl(host->base + S3C2410_SDIRSP2); + cmd->resp[3] = readl(host->base + S3C2410_SDIRSP3); + + writel(host->prescaler, host->base + S3C2410_SDIPRE); + + if (cmd->error) + debug_as_failure = 1; + + if (cmd->data && cmd->data->error) + debug_as_failure = 1; + + dbg_dumpcmd(host, cmd, debug_as_failure); + + /* Cleanup controller */ + writel(0, host->base + S3C2410_SDICMDARG); + writel(S3C2410_SDIDCON_STOP, host->base + S3C2410_SDIDCON); + writel(0, host->base + S3C2410_SDICMDCON); + writel(0, host->base + host->sdiimsk); + + if (cmd->data && cmd->error) + cmd->data->error = cmd->error; + + if (cmd->data && cmd->data->stop && (!host->cmd_is_stop)) { + host->cmd_is_stop = 1; + s3cmci_send_request(host->mmc); + return; + } + + /* If we have no data transfer we are finished here */ + if (!mrq->data) + goto request_done; + + /* Calulate the amout of bytes transfer if there was no error */ + if (mrq->data->error == 0) { + mrq->data->bytes_xfered = + (mrq->data->blocks * mrq->data->blksz); + } else { + mrq->data->bytes_xfered = 0; + } + + /* If we had an error while transfering data we flush the + * DMA channel and the fifo to clear out any garbage. */ + if (mrq->data->error != 0) { + if (host->dodma) + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + + if (host->is2440) { + /* Clear failure register and reset fifo. */ + writel(S3C2440_SDIFSTA_FIFORESET | + S3C2440_SDIFSTA_FIFOFAIL, + host->base + S3C2410_SDIFSTA); + } else { + u32 mci_con; + + /* reset fifo */ + mci_con = readl(host->base + S3C2410_SDICON); + mci_con |= S3C2410_SDICON_FIFORESET; + + writel(mci_con, host->base + S3C2410_SDICON); + } + } + +request_done: + host->complete_what = COMPLETION_NONE; + host->mrq = NULL; + mmc_request_done(host->mmc, mrq); +} + + +void s3cmci_dma_setup(struct s3cmci_host *host, enum s3c2410_dmasrc source) +{ + static enum s3c2410_dmasrc last_source = -1; + static int setup_ok; + + if (last_source == source) + return; + + last_source = source; + + s3c2410_dma_devconfig(host->dma, source, 3, + host->mem->start + host->sdidata); + + if (!setup_ok) { + s3c2410_dma_config(host->dma, 4, + (S3C2410_DCON_HWTRIG | S3C2410_DCON_CH0_SDI)); + s3c2410_dma_set_buffdone_fn(host->dma, + s3cmci_dma_done_callback); + s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART); + setup_ok = 1; + } +} + +static void s3cmci_send_command(struct s3cmci_host *host, + struct mmc_command *cmd) +{ + u32 ccon, imsk; + + imsk = S3C2410_SDIIMSK_CRCSTATUS | S3C2410_SDIIMSK_CMDTIMEOUT | + S3C2410_SDIIMSK_RESPONSEND | S3C2410_SDIIMSK_CMDSENT | + S3C2410_SDIIMSK_RESPONSECRC; + + enable_imask(host, imsk); + + if (cmd->data) + host->complete_what = COMPLETION_XFERFINISH_RSPFIN; + else if (cmd->flags & MMC_RSP_PRESENT) + host->complete_what = COMPLETION_RSPFIN; + else + host->complete_what = COMPLETION_CMDSENT; + + writel(cmd->arg, host->base + S3C2410_SDICMDARG); + + ccon = cmd->opcode & S3C2410_SDICMDCON_INDEX; + ccon |= S3C2410_SDICMDCON_SENDERHOST | S3C2410_SDICMDCON_CMDSTART; + + if (cmd->flags & MMC_RSP_PRESENT) + ccon |= S3C2410_SDICMDCON_WAITRSP; + + if (cmd->flags & MMC_RSP_136) + ccon |= S3C2410_SDICMDCON_LONGRSP; + + writel(ccon, host->base + S3C2410_SDICMDCON); +} + +static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data) +{ + u32 dcon, imsk, stoptries = 3; + + /* write DCON register */ + + if (!data) { + writel(0, host->base + S3C2410_SDIDCON); + return 0; + } + + if ((data->blksz & 3) != 0) { + /* We cannot deal with unaligned blocks with more than + * one block being transfered. */ + + if (data->blocks > 1) + return -EINVAL; + + /* No support yet for non-word block transfers. */ + return -EINVAL; + } + + while (readl(host->base + S3C2410_SDIDSTA) & + (S3C2410_SDIDSTA_TXDATAON | S3C2410_SDIDSTA_RXDATAON)) { + + dbg(host, dbg_err, + "mci_setup_data() transfer stillin progress.\n"); + + writel(S3C2410_SDIDCON_STOP, host->base + S3C2410_SDIDCON); + s3cmci_reset(host); + + if ((stoptries--) == 0) { + dbg_dumpregs(host, "DRF"); + return -EINVAL; + } + } + + dcon = data->blocks & S3C2410_SDIDCON_BLKNUM_MASK; + + if (host->dodma) + dcon |= S3C2410_SDIDCON_DMAEN; + + if (host->bus_width == MMC_BUS_WIDTH_4) + dcon |= S3C2410_SDIDCON_WIDEBUS; + + if (!(data->flags & MMC_DATA_STREAM)) + dcon |= S3C2410_SDIDCON_BLOCKMODE; + + if (data->flags & MMC_DATA_WRITE) { + dcon |= S3C2410_SDIDCON_TXAFTERRESP; + dcon |= S3C2410_SDIDCON_XFER_TXSTART; + } + + if (data->flags & MMC_DATA_READ) { + dcon |= S3C2410_SDIDCON_RXAFTERCMD; + dcon |= S3C2410_SDIDCON_XFER_RXSTART; + } + + if (host->is2440) { + dcon |= S3C2440_SDIDCON_DS_WORD; + dcon |= S3C2440_SDIDCON_DATSTART; + } + + writel(dcon, host->base + S3C2410_SDIDCON); + + /* write BSIZE register */ + + writel(data->blksz, host->base + S3C2410_SDIBSIZE); + + /* add to IMASK register */ + imsk = S3C2410_SDIIMSK_FIFOFAIL | S3C2410_SDIIMSK_DATACRC | + S3C2410_SDIIMSK_DATATIMEOUT | S3C2410_SDIIMSK_DATAFINISH; + + enable_imask(host, imsk); + + /* write TIMER register */ + + if (host->is2440) { + writel(0x007FFFFF, host->base + S3C2410_SDITIMER); + } else { + writel(0x0000FFFF, host->base + S3C2410_SDITIMER); + + /* FIX: set slow clock to prevent timeouts on read */ + if (data->flags & MMC_DATA_READ) + writel(0xFF, host->base + S3C2410_SDIPRE); + } + + return 0; +} + +#define BOTH_DIR (MMC_DATA_WRITE | MMC_DATA_READ) + +static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data) +{ + int rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0; + + BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR); + + host->pio_sgptr = 0; + host->pio_words = 0; + host->pio_count = 0; + host->pio_active = rw ? XFER_WRITE : XFER_READ; + + if (rw) { + do_pio_write(host); + enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF); + } else { + enable_imask(host, S3C2410_SDIIMSK_RXFIFOHALF + | S3C2410_SDIIMSK_RXFIFOLAST); + } + + return 0; +} + +static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data) +{ + int dma_len, i; + int rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0; + + BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR); + + s3cmci_dma_setup(host, rw ? S3C2410_DMASRC_MEM : S3C2410_DMASRC_HW); + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + + dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + (rw) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + + if (dma_len == 0) + return -ENOMEM; + + host->dma_complete = 0; + host->dmatogo = dma_len; + + for (i = 0; i < dma_len; i++) { + int res; + + dbg(host, dbg_dma, "enqueue %i:%u@%u\n", i, + sg_dma_address(&data->sg[i]), + sg_dma_len(&data->sg[i])); + + res = s3c2410_dma_enqueue(host->dma, (void *) host, + sg_dma_address(&data->sg[i]), + sg_dma_len(&data->sg[i])); + + if (res) { + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + return -EBUSY; + } + } + + s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START); + + return 0; +} + +static void s3cmci_send_request(struct mmc_host *mmc) +{ + struct s3cmci_host *host = mmc_priv(mmc); + struct mmc_request *mrq = host->mrq; + struct mmc_command *cmd = host->cmd_is_stop ? mrq->stop : mrq->cmd; + + host->ccnt++; + prepare_dbgmsg(host, cmd, host->cmd_is_stop); + + /* Clear command, data and fifo status registers + Fifo clear only necessary on 2440, but doesn't hurt on 2410 + */ + writel(0xFFFFFFFF, host->base + S3C2410_SDICMDSTAT); + writel(0xFFFFFFFF, host->base + S3C2410_SDIDSTA); + writel(0xFFFFFFFF, host->base + S3C2410_SDIFSTA); + + if (cmd->data) { + int res = s3cmci_setup_data(host, cmd->data); + + host->dcnt++; + + if (res) { + dbg(host, dbg_err, "setup data error %d\n", res); + cmd->error = res; + cmd->data->error = res; + + mmc_request_done(mmc, mrq); + return; + } + + if (host->dodma) + res = s3cmci_prepare_dma(host, cmd->data); + else + res = s3cmci_prepare_pio(host, cmd->data); + + if (res) { + dbg(host, dbg_err, "data prepare error %d\n", res); + cmd->error = res; + cmd->data->error = res; + + mmc_request_done(mmc, mrq); + return; + } + } + + /* Send command */ + s3cmci_send_command(host, cmd); + + /* Enable Interrupt */ + enable_irq(host->irq); +} + +static int s3cmci_card_present(struct s3cmci_host *host) +{ + struct s3c24xx_mci_pdata *pdata = host->pdata; + int ret; + + if (pdata->gpio_detect == 0) + return -ENOSYS; + + ret = s3c2410_gpio_getpin(pdata->gpio_detect) ? 0 : 1; + return ret ^ pdata->detect_invert; +} + +static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct s3cmci_host *host = mmc_priv(mmc); + + host->status = "mmc request"; + host->cmd_is_stop = 0; + host->mrq = mrq; + + if (s3cmci_card_present(host) == 0) { + dbg(host, dbg_err, "%s: no medium present\n", __func__); + host->mrq->cmd->error = -ENOMEDIUM; + mmc_request_done(mmc, mrq); + } else + s3cmci_send_request(mmc); +} + +static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct s3cmci_host *host = mmc_priv(mmc); + u32 mci_psc, mci_con; + + /* Set the power state */ + + mci_con = readl(host->base + S3C2410_SDICON); + + switch (ios->power_mode) { + case MMC_POWER_ON: + case MMC_POWER_UP: + s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_SDCLK); + s3c2410_gpio_cfgpin(S3C2410_GPE6, S3C2410_GPE6_SDCMD); + s3c2410_gpio_cfgpin(S3C2410_GPE7, S3C2410_GPE7_SDDAT0); + s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1); + s3c2410_gpio_cfgpin(S3C2410_GPE9, S3C2410_GPE9_SDDAT2); + s3c2410_gpio_cfgpin(S3C2410_GPE10, S3C2410_GPE10_SDDAT3); + + if (host->pdata->set_power) + host->pdata->set_power(ios->power_mode, ios->vdd); + + if (!host->is2440) + mci_con |= S3C2410_SDICON_FIFORESET; + + break; + + case MMC_POWER_OFF: + default: + s3c2410_gpio_setpin(S3C2410_GPE5, 0); + s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_OUTP); + + if (host->is2440) + mci_con |= S3C2440_SDICON_SDRESET; + + if (host->pdata->set_power) + host->pdata->set_power(ios->power_mode, ios->vdd); + + break; + } + + /* Set clock */ + for (mci_psc = 0; mci_psc < 255; mci_psc++) { + host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1)); + + if (host->real_rate <= ios->clock) + break; + } + + if (mci_psc > 255) + mci_psc = 255; + + host->prescaler = mci_psc; + writel(host->prescaler, host->base + S3C2410_SDIPRE); + + /* If requested clock is 0, real_rate will be 0, too */ + if (ios->clock == 0) + host->real_rate = 0; + + /* Set CLOCK_ENABLE */ + if (ios->clock) + mci_con |= S3C2410_SDICON_CLOCKTYPE; + else + mci_con &= ~S3C2410_SDICON_CLOCKTYPE; + + writel(mci_con, host->base + S3C2410_SDICON); + + if ((ios->power_mode == MMC_POWER_ON) || + (ios->power_mode == MMC_POWER_UP)) { + dbg(host, dbg_conf, "running at %lukHz (requested: %ukHz).\n", + host->real_rate/1000, ios->clock/1000); + } else { + dbg(host, dbg_conf, "powered down.\n"); + } + + host->bus_width = ios->bus_width; +} + +static void s3cmci_reset(struct s3cmci_host *host) +{ + u32 con = readl(host->base + S3C2410_SDICON); + + con |= S3C2440_SDICON_SDRESET; + writel(con, host->base + S3C2410_SDICON); +} + +static int s3cmci_get_ro(struct mmc_host *mmc) +{ + struct s3cmci_host *host = mmc_priv(mmc); + struct s3c24xx_mci_pdata *pdata = host->pdata; + int ret; + + if (pdata->gpio_wprotect == 0) + return 0; + + ret = s3c2410_gpio_getpin(pdata->gpio_wprotect); + + if (pdata->wprotect_invert) + ret = !ret; + + return ret; +} + +static struct mmc_host_ops s3cmci_ops = { + .request = s3cmci_request, + .set_ios = s3cmci_set_ios, + .get_ro = s3cmci_get_ro, +}; + +static struct s3c24xx_mci_pdata s3cmci_def_pdata = { + /* This is currently here to avoid a number of if (host->pdata) + * checks. Any zero fields to ensure reaonable defaults are picked. */ +}; + +static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440) +{ + struct s3cmci_host *host; + struct mmc_host *mmc; + int ret; + + mmc = mmc_alloc_host(sizeof(struct s3cmci_host), &pdev->dev); + if (!mmc) { + ret = -ENOMEM; + goto probe_out; + } + + host = mmc_priv(mmc); + host->mmc = mmc; + host->pdev = pdev; + host->is2440 = is2440; + + host->pdata = pdev->dev.platform_data; + if (!host->pdata) { + pdev->dev.platform_data = &s3cmci_def_pdata; + host->pdata = &s3cmci_def_pdata; + } + + spin_lock_init(&host->complete_lock); + tasklet_init(&host->pio_tasklet, pio_tasklet, (unsigned long) host); + + if (is2440) { + host->sdiimsk = S3C2440_SDIIMSK; + host->sdidata = S3C2440_SDIDATA; + host->clk_div = 1; + } else { + host->sdiimsk = S3C2410_SDIIMSK; + host->sdidata = S3C2410_SDIDATA; + host->clk_div = 2; + } + + host->dodma = 0; + host->complete_what = COMPLETION_NONE; + host->pio_active = XFER_NONE; + + host->dma = S3CMCI_DMA; + + host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!host->mem) { + dev_err(&pdev->dev, + "failed to get io memory region resouce.\n"); + + ret = -ENOENT; + goto probe_free_host; + } + + host->mem = request_mem_region(host->mem->start, + RESSIZE(host->mem), pdev->name); + + if (!host->mem) { + dev_err(&pdev->dev, "failed to request io memory region.\n"); + ret = -ENOENT; + goto probe_free_host; + } + + host->base = ioremap(host->mem->start, RESSIZE(host->mem)); + if (host->base == 0) { + dev_err(&pdev->dev, "failed to ioremap() io memory region.\n"); + ret = -EINVAL; + goto probe_free_mem_region; + } + + host->irq = platform_get_irq(pdev, 0); + if (host->irq == 0) { + dev_err(&pdev->dev, "failed to get interrupt resouce.\n"); + ret = -EINVAL; + goto probe_iounmap; + } + + if (request_irq(host->irq, s3cmci_irq, 0, DRIVER_NAME, host)) { + dev_err(&pdev->dev, "failed to request mci interrupt.\n"); + ret = -ENOENT; + goto probe_iounmap; + } + + /* We get spurious interrupts even when we have set the IMSK + * register to ignore everything, so use disable_irq() to make + * ensure we don't lock the system with un-serviceable requests. */ + + disable_irq(host->irq); + + host->irq_cd = s3c2410_gpio_getirq(host->pdata->gpio_detect); + + if (host->irq_cd >= 0) { + if (request_irq(host->irq_cd, s3cmci_irq_cd, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + DRIVER_NAME, host)) { + dev_err(&pdev->dev, "can't get card detect irq.\n"); + ret = -ENOENT; + goto probe_free_irq; + } + } else { + dev_warn(&pdev->dev, "host detect has no irq available\n"); + s3c2410_gpio_cfgpin(host->pdata->gpio_detect, + S3C2410_GPIO_INPUT); + } + + if (host->pdata->gpio_wprotect) + s3c2410_gpio_cfgpin(host->pdata->gpio_wprotect, + S3C2410_GPIO_INPUT); + + if (s3c2410_dma_request(S3CMCI_DMA, &s3cmci_dma_client, NULL) < 0) { + dev_err(&pdev->dev, "unable to get DMA channel.\n"); + ret = -EBUSY; + goto probe_free_irq_cd; + } + + host->clk = clk_get(&pdev->dev, "sdi"); + if (IS_ERR(host->clk)) { + dev_err(&pdev->dev, "failed to find clock source.\n"); + ret = PTR_ERR(host->clk); + host->clk = NULL; + goto probe_free_host; + } + + ret = clk_enable(host->clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable clock source.\n"); + goto clk_free; + } + + host->clk_rate = clk_get_rate(host->clk); + + mmc->ops = &s3cmci_ops; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps = MMC_CAP_4_BIT_DATA; + mmc->f_min = host->clk_rate / (host->clk_div * 256); + mmc->f_max = host->clk_rate / host->clk_div; + + if (host->pdata->ocr_avail) + mmc->ocr_avail = host->pdata->ocr_avail; + + mmc->max_blk_count = 4095; + mmc->max_blk_size = 4095; + mmc->max_req_size = 4095 * 512; + mmc->max_seg_size = mmc->max_req_size; + + mmc->max_phys_segs = 128; + mmc->max_hw_segs = 128; + + dbg(host, dbg_debug, + "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n", + (host->is2440?"2440":""), + host->base, host->irq, host->irq_cd, host->dma); + + ret = mmc_add_host(mmc); + if (ret) { + dev_err(&pdev->dev, "failed to add mmc host.\n"); + goto free_dmabuf; + } + + platform_set_drvdata(pdev, mmc); + dev_info(&pdev->dev, "initialisation done.\n"); + + return 0; + + free_dmabuf: + clk_disable(host->clk); + + clk_free: + clk_put(host->clk); + + probe_free_irq_cd: + if (host->irq_cd >= 0) + free_irq(host->irq_cd, host); + + probe_free_irq: + free_irq(host->irq, host); + + probe_iounmap: + iounmap(host->base); + + probe_free_mem_region: + release_mem_region(host->mem->start, RESSIZE(host->mem)); + + probe_free_host: + mmc_free_host(mmc); + probe_out: + return ret; +} + +static int __devexit s3cmci_remove(struct platform_device *pdev) +{ + struct mmc_host *mmc = platform_get_drvdata(pdev); + struct s3cmci_host *host = mmc_priv(mmc); + + mmc_remove_host(mmc); + + clk_disable(host->clk); + clk_put(host->clk); + + tasklet_disable(&host->pio_tasklet); + s3c2410_dma_free(S3CMCI_DMA, &s3cmci_dma_client); + + if (host->irq_cd >= 0) + free_irq(host->irq_cd, host); + free_irq(host->irq, host); + + iounmap(host->base); + release_mem_region(host->mem->start, RESSIZE(host->mem)); + + mmc_free_host(mmc); + return 0; +} + +static int __devinit s3cmci_probe_2410(struct platform_device *dev) +{ + return s3cmci_probe(dev, 0); +} + +static int __devinit s3cmci_probe_2412(struct platform_device *dev) +{ + return s3cmci_probe(dev, 1); +} + +static int __devinit s3cmci_probe_2440(struct platform_device *dev) +{ + return s3cmci_probe(dev, 1); +} + +#ifdef CONFIG_PM + +static int s3cmci_suspend(struct platform_device *dev, pm_message_t state) +{ + struct mmc_host *mmc = platform_get_drvdata(dev); + + return mmc_suspend_host(mmc, state); +} + +static int s3cmci_resume(struct platform_device *dev) +{ + struct mmc_host *mmc = platform_get_drvdata(dev); + + return mmc_resume_host(mmc); +} + +#else /* CONFIG_PM */ +#define s3cmci_suspend NULL +#define s3cmci_resume NULL +#endif /* CONFIG_PM */ + + +static struct platform_driver s3cmci_driver_2410 = { + .driver.name = "s3c2410-sdi", + .driver.owner = THIS_MODULE, + .probe = s3cmci_probe_2410, + .remove = __devexit_p(s3cmci_remove), + .suspend = s3cmci_suspend, + .resume = s3cmci_resume, +}; + +static struct platform_driver s3cmci_driver_2412 = { + .driver.name = "s3c2412-sdi", + .driver.owner = THIS_MODULE, + .probe = s3cmci_probe_2412, + .remove = __devexit_p(s3cmci_remove), + .suspend = s3cmci_suspend, + .resume = s3cmci_resume, +}; + +static struct platform_driver s3cmci_driver_2440 = { + .driver.name = "s3c2440-sdi", + .driver.owner = THIS_MODULE, + .probe = s3cmci_probe_2440, + .remove = __devexit_p(s3cmci_remove), + .suspend = s3cmci_suspend, + .resume = s3cmci_resume, +}; + + +static int __init s3cmci_init(void) +{ + platform_driver_register(&s3cmci_driver_2410); + platform_driver_register(&s3cmci_driver_2412); + platform_driver_register(&s3cmci_driver_2440); + return 0; +} + +static void __exit s3cmci_exit(void) +{ + platform_driver_unregister(&s3cmci_driver_2410); + platform_driver_unregister(&s3cmci_driver_2412); + platform_driver_unregister(&s3cmci_driver_2440); +} + +module_init(s3cmci_init); +module_exit(s3cmci_exit); + +MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Thomas Kleffel "); +MODULE_ALIAS("platform:s3c2410-sdi"); +MODULE_ALIAS("platform:s3c2412-sdi"); +MODULE_ALIAS("platform:s3c2440-sdi"); diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h new file mode 100644 index 000000000000..37d9c60010c9 --- /dev/null +++ b/drivers/mmc/host/s3cmci.h @@ -0,0 +1,70 @@ +/* + * linux/drivers/mmc/s3cmci.h - Samsung S3C MCI driver + * + * Copyright (C) 2004-2006 Thomas Kleffel, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* FIXME: DMA Resource management ?! */ +#define S3CMCI_DMA 0 + +enum s3cmci_waitfor { + COMPLETION_NONE, + COMPLETION_FINALIZE, + COMPLETION_CMDSENT, + COMPLETION_RSPFIN, + COMPLETION_XFERFINISH, + COMPLETION_XFERFINISH_RSPFIN, +}; + +struct s3cmci_host { + struct platform_device *pdev; + struct s3c24xx_mci_pdata *pdata; + struct mmc_host *mmc; + struct resource *mem; + struct clk *clk; + void __iomem *base; + int irq; + int irq_cd; + int dma; + + unsigned long clk_rate; + unsigned long clk_div; + unsigned long real_rate; + u8 prescaler; + + int is2440; + unsigned sdiimsk; + unsigned sdidata; + int dodma; + int dmatogo; + + struct mmc_request *mrq; + int cmd_is_stop; + + spinlock_t complete_lock; + enum s3cmci_waitfor complete_what; + + int dma_complete; + + u32 pio_sgptr; + u32 pio_words; + u32 pio_count; + u32 *pio_ptr; +#define XFER_NONE 0 +#define XFER_READ 1 +#define XFER_WRITE 2 + u32 pio_active; + + int bus_width; + + char dbgmsg_cmd[301]; + char dbgmsg_dat[301]; + char *status; + + unsigned int ccnt, dcnt; + struct tasklet_struct pio_tasklet; +}; diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c new file mode 100644 index 000000000000..deb607c52c0d --- /dev/null +++ b/drivers/mmc/host/sdhci-pci.c @@ -0,0 +1,732 @@ +/* linux/drivers/mmc/host/sdhci-pci.c - SDHCI on PCI bus interface + * + * Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * Thanks to the following companies for their support: + * + * - JMicron (hardware and technical support) + */ + +#include +#include +#include +#include + +#include + +#include +#include + +#include "sdhci.h" + +/* + * PCI registers + */ + +#define PCI_SDHCI_IFPIO 0x00 +#define PCI_SDHCI_IFDMA 0x01 +#define PCI_SDHCI_IFVENDOR 0x02 + +#define PCI_SLOT_INFO 0x40 /* 8 bits */ +#define PCI_SLOT_INFO_SLOTS(x) ((x >> 4) & 7) +#define PCI_SLOT_INFO_FIRST_BAR_MASK 0x07 + +#define MAX_SLOTS 8 + +struct sdhci_pci_chip; +struct sdhci_pci_slot; + +struct sdhci_pci_fixes { + unsigned int quirks; + + int (*probe)(struct sdhci_pci_chip*); + + int (*probe_slot)(struct sdhci_pci_slot*); + void (*remove_slot)(struct sdhci_pci_slot*, int); + + int (*suspend)(struct sdhci_pci_chip*, + pm_message_t); + int (*resume)(struct sdhci_pci_chip*); +}; + +struct sdhci_pci_slot { + struct sdhci_pci_chip *chip; + struct sdhci_host *host; + + int pci_bar; +}; + +struct sdhci_pci_chip { + struct pci_dev *pdev; + + unsigned int quirks; + const struct sdhci_pci_fixes *fixes; + + int num_slots; /* Slots on controller */ + struct sdhci_pci_slot *slots[MAX_SLOTS]; /* Pointers to host slots */ +}; + + +/*****************************************************************************\ + * * + * Hardware specific quirk handling * + * * +\*****************************************************************************/ + +static int ricoh_probe(struct sdhci_pci_chip *chip) +{ + if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_IBM) + chip->quirks |= SDHCI_QUIRK_CLOCK_BEFORE_RESET; + + if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG) + chip->quirks |= SDHCI_QUIRK_NO_CARD_NO_RESET; + + return 0; +} + +static const struct sdhci_pci_fixes sdhci_ricoh = { + .probe = ricoh_probe, + .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR, +}; + +static const struct sdhci_pci_fixes sdhci_ene_712 = { + .quirks = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_BROKEN_DMA, +}; + +static const struct sdhci_pci_fixes sdhci_ene_714 = { + .quirks = SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | + SDHCI_QUIRK_BROKEN_DMA, +}; + +static const struct sdhci_pci_fixes sdhci_cafe = { + .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, +}; + +static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) +{ + u8 scratch; + int ret; + + ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch); + if (ret) + return ret; + + /* + * Turn PMOS on [bit 0], set over current detection to 2.4 V + * [bit 1:2] and enable over current debouncing [bit 6]. + */ + if (on) + scratch |= 0x47; + else + scratch &= ~0x47; + + ret = pci_write_config_byte(chip->pdev, 0xAE, scratch); + if (ret) + return ret; + + return 0; +} + +static int jmicron_probe(struct sdhci_pci_chip *chip) +{ + int ret; + + if (chip->pdev->revision == 0) { + chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR | + SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_32BIT_ADMA_SIZE | + SDHCI_QUIRK_RESET_AFTER_REQUEST; + } + + /* + * JMicron chips can have two interfaces to the same hardware + * in order to work around limitations in Microsoft's driver. + * We need to make sure we only bind to one of them. + * + * This code assumes two things: + * + * 1. The PCI code adds subfunctions in order. + * + * 2. The MMC interface has a lower subfunction number + * than the SD interface. + */ + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) { + struct pci_dev *sd_dev; + + sd_dev = NULL; + while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON, + PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) { + if ((PCI_SLOT(chip->pdev->devfn) == + PCI_SLOT(sd_dev->devfn)) && + (chip->pdev->bus == sd_dev->bus)) + break; + } + + if (sd_dev) { + pci_dev_put(sd_dev); + dev_info(&chip->pdev->dev, "Refusing to bind to " + "secondary interface.\n"); + return -ENODEV; + } + } + + /* + * JMicron chips need a bit of a nudge to enable the power + * output pins. + */ + ret = jmicron_pmos(chip, 1); + if (ret) { + dev_err(&chip->pdev->dev, "Failure enabling card power\n"); + return ret; + } + + return 0; +} + +static void jmicron_enable_mmc(struct sdhci_host *host, int on) +{ + u8 scratch; + + scratch = readb(host->ioaddr + 0xC0); + + if (on) + scratch |= 0x01; + else + scratch &= ~0x01; + + writeb(scratch, host->ioaddr + 0xC0); +} + +static int jmicron_probe_slot(struct sdhci_pci_slot *slot) +{ + if (slot->chip->pdev->revision == 0) { + u16 version; + + version = readl(slot->host->ioaddr + SDHCI_HOST_VERSION); + version = (version & SDHCI_VENDOR_VER_MASK) >> + SDHCI_VENDOR_VER_SHIFT; + + /* + * Older versions of the chip have lots of nasty glitches + * in the ADMA engine. It's best just to avoid it + * completely. + */ + if (version < 0xAC) + slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA; + } + + /* + * The secondary interface requires a bit set to get the + * interrupts. + */ + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) + jmicron_enable_mmc(slot->host, 1); + + return 0; +} + +static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead) +{ + if (dead) + return; + + if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) + jmicron_enable_mmc(slot->host, 0); +} + +static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state) +{ + int i; + + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) { + for (i = 0;i < chip->num_slots;i++) + jmicron_enable_mmc(chip->slots[i]->host, 0); + } + + return 0; +} + +static int jmicron_resume(struct sdhci_pci_chip *chip) +{ + int ret, i; + + if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) { + for (i = 0;i < chip->num_slots;i++) + jmicron_enable_mmc(chip->slots[i]->host, 1); + } + + ret = jmicron_pmos(chip, 1); + if (ret) { + dev_err(&chip->pdev->dev, "Failure enabling card power\n"); + return ret; + } + + return 0; +} + +static const struct sdhci_pci_fixes sdhci_jmicron = { + .probe = jmicron_probe, + + .probe_slot = jmicron_probe_slot, + .remove_slot = jmicron_remove_slot, + + .suspend = jmicron_suspend, + .resume = jmicron_resume, +}; + +static const struct pci_device_id pci_ids[] __devinitdata = { + { + .vendor = PCI_VENDOR_ID_RICOH, + .device = PCI_DEVICE_ID_RICOH_R5C822, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_ricoh, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB712_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_ene_712, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB712_SD_2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_ene_712, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB714_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_ene_714, + }, + + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB714_SD_2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_ene_714, + }, + + { + .vendor = PCI_VENDOR_ID_MARVELL, + .device = PCI_DEVICE_ID_MARVELL_CAFE_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_cafe, + }, + + { + .vendor = PCI_VENDOR_ID_JMICRON, + .device = PCI_DEVICE_ID_JMICRON_JMB38X_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_jmicron, + }, + + { + .vendor = PCI_VENDOR_ID_JMICRON, + .device = PCI_DEVICE_ID_JMICRON_JMB38X_MMC, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_jmicron, + }, + + { /* Generic SD host controller */ + PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) + }, + + { /* end: all zeroes */ }, +}; + +MODULE_DEVICE_TABLE(pci, pci_ids); + +/*****************************************************************************\ + * * + * SDHCI core callbacks * + * * +\*****************************************************************************/ + +static int sdhci_pci_enable_dma(struct sdhci_host *host) +{ + struct sdhci_pci_slot *slot; + struct pci_dev *pdev; + int ret; + + slot = sdhci_priv(host); + pdev = slot->chip->pdev; + + if (((pdev->class & 0xFFFF00) == (PCI_CLASS_SYSTEM_SDHCI << 8)) && + ((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) && + (host->flags & SDHCI_USE_DMA)) { + dev_warn(&pdev->dev, "Will use DMA mode even though HW " + "doesn't fully claim to support it.\n"); + } + + ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + +static struct sdhci_ops sdhci_pci_ops = { + .enable_dma = sdhci_pci_enable_dma, +}; + +/*****************************************************************************\ + * * + * Suspend/resume * + * * +\*****************************************************************************/ + +#ifdef CONFIG_PM + +static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot; + int i, ret; + + chip = pci_get_drvdata(pdev); + if (!chip) + return 0; + + for (i = 0;i < chip->num_slots;i++) { + slot = chip->slots[i]; + if (!slot) + continue; + + ret = sdhci_suspend_host(slot->host, state); + + if (ret) { + for (i--;i >= 0;i--) + sdhci_resume_host(chip->slots[i]->host); + return ret; + } + } + + if (chip->fixes && chip->fixes->suspend) { + ret = chip->fixes->suspend(chip, state); + if (ret) { + for (i = chip->num_slots - 1;i >= 0;i--) + sdhci_resume_host(chip->slots[i]->host); + return ret; + } + } + + pci_save_state(pdev); + pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int sdhci_pci_resume (struct pci_dev *pdev) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot; + int i, ret; + + chip = pci_get_drvdata(pdev); + if (!chip) + return 0; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + ret = pci_enable_device(pdev); + if (ret) + return ret; + + if (chip->fixes && chip->fixes->resume) { + ret = chip->fixes->resume(chip); + if (ret) + return ret; + } + + for (i = 0;i < chip->num_slots;i++) { + slot = chip->slots[i]; + if (!slot) + continue; + + ret = sdhci_resume_host(slot->host); + if (ret) + return ret; + } + + return 0; +} + +#else /* CONFIG_PM */ + +#define sdhci_pci_suspend NULL +#define sdhci_pci_resume NULL + +#endif /* CONFIG_PM */ + +/*****************************************************************************\ + * * + * Device probing/removal * + * * +\*****************************************************************************/ + +static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot( + struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar) +{ + struct sdhci_pci_slot *slot; + struct sdhci_host *host; + + resource_size_t addr; + + int ret; + + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar); + return ERR_PTR(-ENODEV); + } + + if (pci_resource_len(pdev, bar) != 0x100) { + dev_err(&pdev->dev, "Invalid iomem size. You may " + "experience problems.\n"); + } + + if ((pdev->class & 0x0000FF) == PCI_SDHCI_IFVENDOR) { + dev_err(&pdev->dev, "Vendor specific interface. Aborting.\n"); + return ERR_PTR(-ENODEV); + } + + if ((pdev->class & 0x0000FF) > PCI_SDHCI_IFVENDOR) { + dev_err(&pdev->dev, "Unknown interface. Aborting.\n"); + return ERR_PTR(-ENODEV); + } + + host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pci_slot)); + if (IS_ERR(host)) { + ret = PTR_ERR(host); + goto unmap; + } + + slot = sdhci_priv(host); + + slot->chip = chip; + slot->host = host; + slot->pci_bar = bar; + + host->hw_name = "PCI"; + host->ops = &sdhci_pci_ops; + host->quirks = chip->quirks; + + host->irq = pdev->irq; + + ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc)); + if (ret) { + dev_err(&pdev->dev, "cannot request region\n"); + return ERR_PTR(ret); + } + + addr = pci_resource_start(pdev, bar); + host->ioaddr = ioremap_nocache(addr, pci_resource_len(pdev, bar)); + if (!host->ioaddr) { + dev_err(&pdev->dev, "failed to remap registers\n"); + goto release; + } + + if (chip->fixes && chip->fixes->probe_slot) { + ret = chip->fixes->probe_slot(slot); + if (ret) + goto unmap; + } + + ret = sdhci_add_host(host); + if (ret) + goto remove; + + return slot; + +remove: + if (chip->fixes && chip->fixes->remove_slot) + chip->fixes->remove_slot(slot, 0); + +unmap: + iounmap(host->ioaddr); + +release: + pci_release_region(pdev, bar); + sdhci_free_host(host); + + return ERR_PTR(ret); +} + +static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot) +{ + int dead; + u32 scratch; + + dead = 0; + scratch = readl(slot->host->ioaddr + SDHCI_INT_STATUS); + if (scratch == (u32)-1) + dead = 1; + + sdhci_remove_host(slot->host, dead); + + if (slot->chip->fixes && slot->chip->fixes->remove_slot) + slot->chip->fixes->remove_slot(slot, dead); + + pci_release_region(slot->chip->pdev, slot->pci_bar); + + sdhci_free_host(slot->host); +} + +static int __devinit sdhci_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot; + + u8 slots, rev, first_bar; + int ret, i; + + BUG_ON(pdev == NULL); + BUG_ON(ent == NULL); + + pci_read_config_byte(pdev, PCI_CLASS_REVISION, &rev); + + dev_info(&pdev->dev, "SDHCI controller found [%04x:%04x] (rev %x)\n", + (int)pdev->vendor, (int)pdev->device, (int)rev); + + ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots); + if (ret) + return ret; + + slots = PCI_SLOT_INFO_SLOTS(slots) + 1; + dev_dbg(&pdev->dev, "found %d slot(s)\n", slots); + if (slots == 0) + return -ENODEV; + + BUG_ON(slots > MAX_SLOTS); + + ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar); + if (ret) + return ret; + + first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK; + + if (first_bar > 5) { + dev_err(&pdev->dev, "Invalid first BAR. Aborting.\n"); + return -ENODEV; + } + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + chip = kzalloc(sizeof(struct sdhci_pci_chip), GFP_KERNEL); + if (!chip) { + ret = -ENOMEM; + goto err; + } + + chip->pdev = pdev; + chip->fixes = (const struct sdhci_pci_fixes*)ent->driver_data; + if (chip->fixes) + chip->quirks = chip->fixes->quirks; + chip->num_slots = slots; + + pci_set_drvdata(pdev, chip); + + if (chip->fixes && chip->fixes->probe) { + ret = chip->fixes->probe(chip); + if (ret) + goto free; + } + + for (i = 0;i < slots;i++) { + slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i); + if (IS_ERR(slot)) { + for (i--;i >= 0;i--) + sdhci_pci_remove_slot(chip->slots[i]); + ret = PTR_ERR(slot); + goto free; + } + + chip->slots[i] = slot; + } + + return 0; + +free: + pci_set_drvdata(pdev, NULL); + kfree(chip); + +err: + pci_disable_device(pdev); + return ret; +} + +static void __devexit sdhci_pci_remove(struct pci_dev *pdev) +{ + int i; + struct sdhci_pci_chip *chip; + + chip = pci_get_drvdata(pdev); + + if (chip) { + for (i = 0;i < chip->num_slots; i++) + sdhci_pci_remove_slot(chip->slots[i]); + + pci_set_drvdata(pdev, NULL); + kfree(chip); + } + + pci_disable_device(pdev); +} + +static struct pci_driver sdhci_driver = { + .name = "sdhci-pci", + .id_table = pci_ids, + .probe = sdhci_pci_probe, + .remove = __devexit_p(sdhci_pci_remove), + .suspend = sdhci_pci_suspend, + .resume = sdhci_pci_resume, +}; + +/*****************************************************************************\ + * * + * Driver init/exit * + * * +\*****************************************************************************/ + +static int __init sdhci_drv_init(void) +{ + return pci_register_driver(&sdhci_driver); +} + +static void __exit sdhci_drv_exit(void) +{ + pci_unregister_driver(&sdhci_driver); +} + +module_init(sdhci_drv_init); +module_exit(sdhci_drv_exit); + +MODULE_AUTHOR("Pierre Ossman "); +MODULE_DESCRIPTION("Secure Digital Host Controller Interface PCI driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b413aa6c246b..17701c3da733 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include @@ -32,135 +32,6 @@ static unsigned int debug_quirks = 0; -/* - * Different quirks to handle when the hardware deviates from a strict - * interpretation of the SDHCI specification. - */ - -/* Controller doesn't honor resets unless we touch the clock register */ -#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1<<0) -/* Controller has bad caps bits, but really supports DMA */ -#define SDHCI_QUIRK_FORCE_DMA (1<<1) -/* Controller doesn't like to be reset when there is no card inserted. */ -#define SDHCI_QUIRK_NO_CARD_NO_RESET (1<<2) -/* Controller doesn't like clearing the power reg before a change */ -#define SDHCI_QUIRK_SINGLE_POWER_WRITE (1<<3) -/* Controller has flaky internal state so reset it on each ios change */ -#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4) -/* Controller has an unusable DMA engine */ -#define SDHCI_QUIRK_BROKEN_DMA (1<<5) -/* Controller can only DMA from 32-bit aligned addresses */ -#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<6) -/* Controller can only DMA chunk sizes that are a multiple of 32 bits */ -#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<7) -/* Controller needs to be reset after each request to stay stable */ -#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8) -/* Controller needs voltage and power writes to happen separately */ -#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9) -/* Controller has an off-by-one issue with timeout value */ -#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL (1<<10) - -static const struct pci_device_id pci_ids[] __devinitdata = { - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_VENDOR_ID_IBM, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET | - SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_VENDOR_ID_SAMSUNG, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA | - SDHCI_QUIRK_NO_CARD_NO_RESET, - }, - - { - .vendor = PCI_VENDOR_ID_RICOH, - .device = PCI_DEVICE_ID_RICOH_R5C822, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_TI, - .device = PCI_DEVICE_ID_TI_XX21_XX11_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_FORCE_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB712_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB712_SD_2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB714_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_ENE, - .device = PCI_DEVICE_ID_ENE_CB714_SD_2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS | - SDHCI_QUIRK_BROKEN_DMA, - }, - - { - .vendor = PCI_VENDOR_ID_MARVELL, - .device = PCI_DEVICE_ID_MARVELL_CAFE_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | - SDHCI_QUIRK_INCR_TIMEOUT_CONTROL, - }, - - { - .vendor = PCI_VENDOR_ID_JMICRON, - .device = PCI_DEVICE_ID_JMICRON_JMB38X_SD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = SDHCI_QUIRK_32BIT_DMA_ADDR | - SDHCI_QUIRK_32BIT_DMA_SIZE | - SDHCI_QUIRK_RESET_AFTER_REQUEST, - }, - - { /* Generic SD host controller */ - PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) - }, - - { /* end: all zeroes */ }, -}; - -MODULE_DEVICE_TABLE(pci, pci_ids); - static void sdhci_prepare_data(struct sdhci_host *, struct mmc_data *); static void sdhci_finish_data(struct sdhci_host *); @@ -215,7 +86,7 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask) { unsigned long timeout; - if (host->chip->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) { + if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) { if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)) return; @@ -253,7 +124,8 @@ static void sdhci_init(struct sdhci_host *host) SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT | SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT | SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | - SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE; + SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE | + SDHCI_INT_ADMA_ERROR; writel(intmask, host->ioaddr + SDHCI_INT_ENABLE); writel(intmask, host->ioaddr + SDHCI_SIGNAL_ENABLE); @@ -443,23 +315,226 @@ static void sdhci_transfer_pio(struct sdhci_host *host) DBG("PIO transfer complete.\n"); } -static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) +static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags) { - u8 count; - unsigned target_timeout, current_timeout; + local_irq_save(*flags); + return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; +} - WARN_ON(host->data); +static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags) +{ + kunmap_atomic(buffer, KM_BIO_SRC_IRQ); + local_irq_restore(*flags); +} - if (data == NULL) - return; +static int sdhci_adma_table_pre(struct sdhci_host *host, + struct mmc_data *data) +{ + int direction; - /* Sanity checks */ - BUG_ON(data->blksz * data->blocks > 524288); - BUG_ON(data->blksz > host->mmc->max_blk_size); - BUG_ON(data->blocks > 65535); + u8 *desc; + u8 *align; + dma_addr_t addr; + dma_addr_t align_addr; + int len, offset; - host->data = data; - host->data_early = 0; + struct scatterlist *sg; + int i; + char *buffer; + unsigned long flags; + + /* + * The spec does not specify endianness of descriptor table. + * We currently guess that it is LE. + */ + + if (data->flags & MMC_DATA_READ) + direction = DMA_FROM_DEVICE; + else + direction = DMA_TO_DEVICE; + + /* + * The ADMA descriptor table is mapped further down as we + * need to fill it with data first. + */ + + host->align_addr = dma_map_single(mmc_dev(host->mmc), + host->align_buffer, 128 * 4, direction); + if (dma_mapping_error(host->align_addr)) + goto fail; + BUG_ON(host->align_addr & 0x3); + + host->sg_count = dma_map_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, direction); + if (host->sg_count == 0) + goto unmap_align; + + desc = host->adma_desc; + align = host->align_buffer; + + align_addr = host->align_addr; + + for_each_sg(data->sg, sg, host->sg_count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + + /* + * The SDHCI specification states that ADMA + * addresses must be 32-bit aligned. If they + * aren't, then we use a bounce buffer for + * the (up to three) bytes that screw up the + * alignment. + */ + offset = (4 - (addr & 0x3)) & 0x3; + if (offset) { + if (data->flags & MMC_DATA_WRITE) { + buffer = sdhci_kmap_atomic(sg, &flags); + memcpy(align, buffer, offset); + sdhci_kunmap_atomic(buffer, &flags); + } + + desc[7] = (align_addr >> 24) & 0xff; + desc[6] = (align_addr >> 16) & 0xff; + desc[5] = (align_addr >> 8) & 0xff; + desc[4] = (align_addr >> 0) & 0xff; + + BUG_ON(offset > 65536); + + desc[3] = (offset >> 8) & 0xff; + desc[2] = (offset >> 0) & 0xff; + + desc[1] = 0x00; + desc[0] = 0x21; /* tran, valid */ + + align += 4; + align_addr += 4; + + desc += 8; + + addr += offset; + len -= offset; + } + + desc[7] = (addr >> 24) & 0xff; + desc[6] = (addr >> 16) & 0xff; + desc[5] = (addr >> 8) & 0xff; + desc[4] = (addr >> 0) & 0xff; + + BUG_ON(len > 65536); + + desc[3] = (len >> 8) & 0xff; + desc[2] = (len >> 0) & 0xff; + + desc[1] = 0x00; + desc[0] = 0x21; /* tran, valid */ + + desc += 8; + + /* + * If this triggers then we have a calculation bug + * somewhere. :/ + */ + WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4); + } + + /* + * Add a terminating entry. + */ + desc[7] = 0; + desc[6] = 0; + desc[5] = 0; + desc[4] = 0; + + desc[3] = 0; + desc[2] = 0; + + desc[1] = 0x00; + desc[0] = 0x03; /* nop, end, valid */ + + /* + * Resync align buffer as we might have changed it. + */ + if (data->flags & MMC_DATA_WRITE) { + dma_sync_single_for_device(mmc_dev(host->mmc), + host->align_addr, 128 * 4, direction); + } + + host->adma_addr = dma_map_single(mmc_dev(host->mmc), + host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE); + if (dma_mapping_error(host->align_addr)) + goto unmap_entries; + BUG_ON(host->adma_addr & 0x3); + + return 0; + +unmap_entries: + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); +unmap_align: + dma_unmap_single(mmc_dev(host->mmc), host->align_addr, + 128 * 4, direction); +fail: + return -EINVAL; +} + +static void sdhci_adma_table_post(struct sdhci_host *host, + struct mmc_data *data) +{ + int direction; + + struct scatterlist *sg; + int i, size; + u8 *align; + char *buffer; + unsigned long flags; + + if (data->flags & MMC_DATA_READ) + direction = DMA_FROM_DEVICE; + else + direction = DMA_TO_DEVICE; + + dma_unmap_single(mmc_dev(host->mmc), host->adma_addr, + (128 * 2 + 1) * 4, DMA_TO_DEVICE); + + dma_unmap_single(mmc_dev(host->mmc), host->align_addr, + 128 * 4, direction); + + if (data->flags & MMC_DATA_READ) { + dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); + + align = host->align_buffer; + + for_each_sg(data->sg, sg, host->sg_count, i) { + if (sg_dma_address(sg) & 0x3) { + size = 4 - (sg_dma_address(sg) & 0x3); + + buffer = sdhci_kmap_atomic(sg, &flags); + memcpy(buffer, align, size); + sdhci_kunmap_atomic(buffer, &flags); + + align += 4; + } + } + } + + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); +} + +static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data) +{ + u8 count; + unsigned target_timeout, current_timeout; + + /* + * If the host controller provides us with an incorrect timeout + * value, just skip the check and use 0xE. The hardware may take + * longer to time out, but that's much better than having a too-short + * timeout value. + */ + if ((host->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL)) + return 0xE; /* timeout in us */ target_timeout = data->timeout_ns / 1000 + @@ -484,52 +559,158 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) break; } - /* - * Compensate for an off-by-one error in the CaFe hardware; otherwise, - * a too-small count gives us interrupt timeouts. - */ - if ((host->chip->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) - count++; - if (count >= 0xF) { printk(KERN_WARNING "%s: Too large timeout requested!\n", mmc_hostname(host->mmc)); count = 0xE; } + return count; +} + +static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) +{ + u8 count; + u8 ctrl; + int ret; + + WARN_ON(host->data); + + if (data == NULL) + return; + + /* Sanity checks */ + BUG_ON(data->blksz * data->blocks > 524288); + BUG_ON(data->blksz > host->mmc->max_blk_size); + BUG_ON(data->blocks > 65535); + + host->data = data; + host->data_early = 0; + + count = sdhci_calc_timeout(host, data); writeb(count, host->ioaddr + SDHCI_TIMEOUT_CONTROL); if (host->flags & SDHCI_USE_DMA) host->flags |= SDHCI_REQ_USE_DMA; - if (unlikely((host->flags & SDHCI_REQ_USE_DMA) && - (host->chip->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) && - ((data->blksz * data->blocks) & 0x3))) { - DBG("Reverting to PIO because of transfer size (%d)\n", - data->blksz * data->blocks); - host->flags &= ~SDHCI_REQ_USE_DMA; + /* + * FIXME: This doesn't account for merging when mapping the + * scatterlist. + */ + if (host->flags & SDHCI_REQ_USE_DMA) { + int broken, i; + struct scatterlist *sg; + + broken = 0; + if (host->flags & SDHCI_USE_ADMA) { + if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE) + broken = 1; + } else { + if (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) + broken = 1; + } + + if (unlikely(broken)) { + for_each_sg(data->sg, sg, data->sg_len, i) { + if (sg->length & 0x3) { + DBG("Reverting to PIO because of " + "transfer size (%d)\n", + sg->length); + host->flags &= ~SDHCI_REQ_USE_DMA; + break; + } + } + } } /* * The assumption here being that alignment is the same after * translation to device address space. */ - if (unlikely((host->flags & SDHCI_REQ_USE_DMA) && - (host->chip->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) && - (data->sg->offset & 0x3))) { - DBG("Reverting to PIO because of bad alignment\n"); - host->flags &= ~SDHCI_REQ_USE_DMA; + if (host->flags & SDHCI_REQ_USE_DMA) { + int broken, i; + struct scatterlist *sg; + + broken = 0; + if (host->flags & SDHCI_USE_ADMA) { + /* + * As we use 3 byte chunks to work around + * alignment problems, we need to check this + * quirk. + */ + if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE) + broken = 1; + } else { + if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) + broken = 1; + } + + if (unlikely(broken)) { + for_each_sg(data->sg, sg, data->sg_len, i) { + if (sg->offset & 0x3) { + DBG("Reverting to PIO because of " + "bad alignment\n"); + host->flags &= ~SDHCI_REQ_USE_DMA; + break; + } + } + } } if (host->flags & SDHCI_REQ_USE_DMA) { - int count; + if (host->flags & SDHCI_USE_ADMA) { + ret = sdhci_adma_table_pre(host, data); + if (ret) { + /* + * This only happens when someone fed + * us an invalid request. + */ + WARN_ON(1); + host->flags &= ~SDHCI_USE_DMA; + } else { + writel(host->adma_addr, + host->ioaddr + SDHCI_ADMA_ADDRESS); + } + } else { + int sg_cnt; + + sg_cnt = dma_map_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : + DMA_TO_DEVICE); + if (sg_cnt == 0) { + /* + * This only happens when someone fed + * us an invalid request. + */ + WARN_ON(1); + host->flags &= ~SDHCI_USE_DMA; + } else { + WARN_ON(count != 1); + writel(sg_dma_address(data->sg), + host->ioaddr + SDHCI_DMA_ADDRESS); + } + } + } - count = pci_map_sg(host->chip->pdev, data->sg, data->sg_len, - (data->flags & MMC_DATA_READ)?PCI_DMA_FROMDEVICE:PCI_DMA_TODEVICE); - BUG_ON(count != 1); + /* + * Always adjust the DMA selection as some controllers + * (e.g. JMicron) can't do PIO properly when the selection + * is ADMA. + */ + if (host->version >= SDHCI_SPEC_200) { + ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL); + ctrl &= ~SDHCI_CTRL_DMA_MASK; + if ((host->flags & SDHCI_REQ_USE_DMA) && + (host->flags & SDHCI_USE_ADMA)) + ctrl |= SDHCI_CTRL_ADMA32; + else + ctrl |= SDHCI_CTRL_SDMA; + writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL); + } - writel(sg_dma_address(data->sg), host->ioaddr + SDHCI_DMA_ADDRESS); - } else { + if (!(host->flags & SDHCI_REQ_USE_DMA)) { host->cur_sg = data->sg; host->num_sg = data->sg_len; @@ -567,7 +748,6 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host, static void sdhci_finish_data(struct sdhci_host *host) { struct mmc_data *data; - u16 blocks; BUG_ON(!host->data); @@ -575,25 +755,26 @@ static void sdhci_finish_data(struct sdhci_host *host) host->data = NULL; if (host->flags & SDHCI_REQ_USE_DMA) { - pci_unmap_sg(host->chip->pdev, data->sg, data->sg_len, - (data->flags & MMC_DATA_READ)?PCI_DMA_FROMDEVICE:PCI_DMA_TODEVICE); + if (host->flags & SDHCI_USE_ADMA) + sdhci_adma_table_post(host, data); + else { + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } } /* - * Controller doesn't count down when in single block mode. + * The specification states that the block count register must + * be updated, but it does not specify at what point in the + * data flow. That makes the register entirely useless to read + * back so we have to assume that nothing made it to the card + * in the event of an error. */ - if (data->blocks == 1) - blocks = (data->error == 0) ? 0 : 1; + if (data->error) + data->bytes_xfered = 0; else - blocks = readw(host->ioaddr + SDHCI_BLOCK_COUNT); - data->bytes_xfered = data->blksz * (data->blocks - blocks); - - if (!data->error && blocks) { - printk(KERN_ERR "%s: Controller signalled completion even " - "though there were blocks left.\n", - mmc_hostname(host->mmc)); - data->error = -EIO; - } + data->bytes_xfered = data->blksz * data->blocks; if (data->stop) { /* @@ -775,7 +956,7 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) * Spec says that we should clear the power reg before setting * a new value. Some controllers don't seem to like this though. */ - if (!(host->chip->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE)) + if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE)) writeb(0, host->ioaddr + SDHCI_POWER_CONTROL); pwr = SDHCI_POWER_ON; @@ -797,10 +978,10 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power) } /* - * At least the CaFe chip gets confused if we set the voltage + * At least the Marvell CaFe chip gets confused if we set the voltage * and set turn on power at the same time, so set the voltage first. */ - if ((host->chip->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)) + if ((host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)) writeb(pwr & ~SDHCI_POWER_ON, host->ioaddr + SDHCI_POWER_CONTROL); @@ -833,7 +1014,8 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->mrq = mrq; - if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)) { + if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT) + || (host->flags & SDHCI_DEVICE_DEAD)) { host->mrq->cmd->error = -ENOMEDIUM; tasklet_schedule(&host->finish_tasklet); } else @@ -853,6 +1035,9 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) spin_lock_irqsave(&host->lock, flags); + if (host->flags & SDHCI_DEVICE_DEAD) + goto out; + /* * Reset the chip on each power off. * Should clear out any weird states. @@ -888,9 +1073,10 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) * signalling timeout and CRC errors even on CMD0. Resetting * it on each ios seems to solve the problem. */ - if(host->chip->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS) + if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS) sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); +out: mmiowb(); spin_unlock_irqrestore(&host->lock, flags); } @@ -905,7 +1091,10 @@ static int sdhci_get_ro(struct mmc_host *mmc) spin_lock_irqsave(&host->lock, flags); - present = readl(host->ioaddr + SDHCI_PRESENT_STATE); + if (host->flags & SDHCI_DEVICE_DEAD) + present = 0; + else + present = readl(host->ioaddr + SDHCI_PRESENT_STATE); spin_unlock_irqrestore(&host->lock, flags); @@ -922,6 +1111,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) spin_lock_irqsave(&host->lock, flags); + if (host->flags & SDHCI_DEVICE_DEAD) + goto out; + ier = readl(host->ioaddr + SDHCI_INT_ENABLE); ier &= ~SDHCI_INT_CARD_INT; @@ -931,6 +1123,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) writel(ier, host->ioaddr + SDHCI_INT_ENABLE); writel(ier, host->ioaddr + SDHCI_SIGNAL_ENABLE); +out: mmiowb(); spin_unlock_irqrestore(&host->lock, flags); @@ -996,13 +1189,14 @@ static void sdhci_tasklet_finish(unsigned long param) * The controller needs a reset of internal state machines * upon error conditions. */ - if (mrq->cmd->error || - (mrq->data && (mrq->data->error || - (mrq->data->stop && mrq->data->stop->error))) || - (host->chip->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) { + if (!(host->flags & SDHCI_DEVICE_DEAD) && + (mrq->cmd->error || + (mrq->data && (mrq->data->error || + (mrq->data->stop && mrq->data->stop->error))) || + (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) { /* Some controllers need this kick or reset won't work here */ - if (host->chip->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) { + if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) { unsigned int clock; /* This is to force an update */ @@ -1116,6 +1310,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) host->data->error = -ETIMEDOUT; else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT)) host->data->error = -EILSEQ; + else if (intmask & SDHCI_INT_ADMA_ERROR) + host->data->error = -EIO; if (host->data->error) sdhci_finish_data(host); @@ -1234,218 +1430,167 @@ out: #ifdef CONFIG_PM -static int sdhci_suspend (struct pci_dev *pdev, pm_message_t state) +int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state) { - struct sdhci_chip *chip; - int i, ret; - - chip = pci_get_drvdata(pdev); - if (!chip) - return 0; - - DBG("Suspending...\n"); - - for (i = 0;i < chip->num_slots;i++) { - if (!chip->hosts[i]) - continue; - ret = mmc_suspend_host(chip->hosts[i]->mmc, state); - if (ret) { - for (i--;i >= 0;i--) - mmc_resume_host(chip->hosts[i]->mmc); - return ret; - } - } - - pci_save_state(pdev); - pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); + int ret; - for (i = 0;i < chip->num_slots;i++) { - if (!chip->hosts[i]) - continue; - free_irq(chip->hosts[i]->irq, chip->hosts[i]); - } + ret = mmc_suspend_host(host->mmc, state); + if (ret) + return ret; - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); + free_irq(host->irq, host); return 0; } -static int sdhci_resume (struct pci_dev *pdev) -{ - struct sdhci_chip *chip; - int i, ret; +EXPORT_SYMBOL_GPL(sdhci_suspend_host); - chip = pci_get_drvdata(pdev); - if (!chip) - return 0; +int sdhci_resume_host(struct sdhci_host *host) +{ + int ret; - DBG("Resuming...\n"); + if (host->flags & SDHCI_USE_DMA) { + if (host->ops->enable_dma) + host->ops->enable_dma(host); + } - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - ret = pci_enable_device(pdev); + ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, + mmc_hostname(host->mmc), host); if (ret) return ret; - for (i = 0;i < chip->num_slots;i++) { - if (!chip->hosts[i]) - continue; - if (chip->hosts[i]->flags & SDHCI_USE_DMA) - pci_set_master(pdev); - ret = request_irq(chip->hosts[i]->irq, sdhci_irq, - IRQF_SHARED, mmc_hostname(chip->hosts[i]->mmc), - chip->hosts[i]); - if (ret) - return ret; - sdhci_init(chip->hosts[i]); - mmiowb(); - ret = mmc_resume_host(chip->hosts[i]->mmc); - if (ret) - return ret; - } + sdhci_init(host); + mmiowb(); + + ret = mmc_resume_host(host->mmc); + if (ret) + return ret; return 0; } -#else /* CONFIG_PM */ - -#define sdhci_suspend NULL -#define sdhci_resume NULL +EXPORT_SYMBOL_GPL(sdhci_resume_host); #endif /* CONFIG_PM */ /*****************************************************************************\ * * - * Device probing/removal * + * Device allocation/registration * * * \*****************************************************************************/ -static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) +struct sdhci_host *sdhci_alloc_host(struct device *dev, + size_t priv_size) { - int ret; - unsigned int version; - struct sdhci_chip *chip; struct mmc_host *mmc; struct sdhci_host *host; - u8 first_bar; - unsigned int caps; - - chip = pci_get_drvdata(pdev); - BUG_ON(!chip); - - ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar); - if (ret) - return ret; - - first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK; - - if (first_bar > 5) { - printk(KERN_ERR DRIVER_NAME ": Invalid first BAR. Aborting.\n"); - return -ENODEV; - } - - if (!(pci_resource_flags(pdev, first_bar + slot) & IORESOURCE_MEM)) { - printk(KERN_ERR DRIVER_NAME ": BAR is not iomem. Aborting.\n"); - return -ENODEV; - } - - if (pci_resource_len(pdev, first_bar + slot) != 0x100) { - printk(KERN_ERR DRIVER_NAME ": Invalid iomem size. " - "You may experience problems.\n"); - } - - if ((pdev->class & 0x0000FF) == PCI_SDHCI_IFVENDOR) { - printk(KERN_ERR DRIVER_NAME ": Vendor specific interface. Aborting.\n"); - return -ENODEV; - } - - if ((pdev->class & 0x0000FF) > PCI_SDHCI_IFVENDOR) { - printk(KERN_ERR DRIVER_NAME ": Unknown interface. Aborting.\n"); - return -ENODEV; - } + WARN_ON(dev == NULL); - mmc = mmc_alloc_host(sizeof(struct sdhci_host), &pdev->dev); + mmc = mmc_alloc_host(sizeof(struct sdhci_host) + priv_size, dev); if (!mmc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); host = mmc_priv(mmc); host->mmc = mmc; - host->chip = chip; - chip->hosts[slot] = host; + return host; +} - host->bar = first_bar + slot; +EXPORT_SYMBOL_GPL(sdhci_alloc_host); - host->addr = pci_resource_start(pdev, host->bar); - host->irq = pdev->irq; +int sdhci_add_host(struct sdhci_host *host) +{ + struct mmc_host *mmc; + unsigned int caps; + int ret; - DBG("slot %d at 0x%08lx, irq %d\n", slot, host->addr, host->irq); + WARN_ON(host == NULL); + if (host == NULL) + return -EINVAL; - ret = pci_request_region(pdev, host->bar, mmc_hostname(mmc)); - if (ret) - goto free; + mmc = host->mmc; - host->ioaddr = ioremap_nocache(host->addr, - pci_resource_len(pdev, host->bar)); - if (!host->ioaddr) { - ret = -ENOMEM; - goto release; - } + if (debug_quirks) + host->quirks = debug_quirks; sdhci_reset(host, SDHCI_RESET_ALL); - version = readw(host->ioaddr + SDHCI_HOST_VERSION); - version = (version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT; - if (version > 1) { + host->version = readw(host->ioaddr + SDHCI_HOST_VERSION); + host->version = (host->version & SDHCI_SPEC_VER_MASK) + >> SDHCI_SPEC_VER_SHIFT; + if (host->version > SDHCI_SPEC_200) { printk(KERN_ERR "%s: Unknown controller version (%d). " "You may experience problems.\n", mmc_hostname(mmc), - version); + host->version); } caps = readl(host->ioaddr + SDHCI_CAPABILITIES); - if (chip->quirks & SDHCI_QUIRK_FORCE_DMA) + if (host->quirks & SDHCI_QUIRK_FORCE_DMA) host->flags |= SDHCI_USE_DMA; else if (!(caps & SDHCI_CAN_DO_DMA)) DBG("Controller doesn't have DMA capability\n"); else host->flags |= SDHCI_USE_DMA; - if ((chip->quirks & SDHCI_QUIRK_BROKEN_DMA) && + if ((host->quirks & SDHCI_QUIRK_BROKEN_DMA) && (host->flags & SDHCI_USE_DMA)) { DBG("Disabling DMA as it is marked broken\n"); host->flags &= ~SDHCI_USE_DMA; } - if (((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) && - (host->flags & SDHCI_USE_DMA)) { - printk(KERN_WARNING "%s: Will use DMA " - "mode even though HW doesn't fully " - "claim to support it.\n", mmc_hostname(mmc)); + if (host->flags & SDHCI_USE_DMA) { + if ((host->version >= SDHCI_SPEC_200) && + (caps & SDHCI_CAN_DO_ADMA2)) + host->flags |= SDHCI_USE_ADMA; + } + + if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) && + (host->flags & SDHCI_USE_ADMA)) { + DBG("Disabling ADMA as it is marked broken\n"); + host->flags &= ~SDHCI_USE_ADMA; } if (host->flags & SDHCI_USE_DMA) { - if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { - printk(KERN_WARNING "%s: No suitable DMA available. " - "Falling back to PIO.\n", mmc_hostname(mmc)); - host->flags &= ~SDHCI_USE_DMA; + if (host->ops->enable_dma) { + if (host->ops->enable_dma(host)) { + printk(KERN_WARNING "%s: No suitable DMA " + "available. Falling back to PIO.\n", + mmc_hostname(mmc)); + host->flags &= ~(SDHCI_USE_DMA | SDHCI_USE_ADMA); + } } } - if (host->flags & SDHCI_USE_DMA) - pci_set_master(pdev); - else /* XXX: Hack to get MMC layer to avoid highmem */ - pdev->dma_mask = 0; + if (host->flags & SDHCI_USE_ADMA) { + /* + * We need to allocate descriptors for all sg entries + * (128) and potentially one alignment transfer for + * each of those entries. + */ + host->adma_desc = kmalloc((128 * 2 + 1) * 4, GFP_KERNEL); + host->align_buffer = kmalloc(128 * 4, GFP_KERNEL); + if (!host->adma_desc || !host->align_buffer) { + kfree(host->adma_desc); + kfree(host->align_buffer); + printk(KERN_WARNING "%s: Unable to allocate ADMA " + "buffers. Falling back to standard DMA.\n", + mmc_hostname(mmc)); + host->flags &= ~SDHCI_USE_ADMA; + } + } + + /* XXX: Hack to get MMC layer to avoid highmem */ + if (!(host->flags & SDHCI_USE_DMA)) + mmc_dev(host->mmc)->dma_mask = NULL; host->max_clk = (caps & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; if (host->max_clk == 0) { printk(KERN_ERR "%s: Hardware doesn't specify base clock " "frequency.\n", mmc_hostname(mmc)); - ret = -ENODEV; - goto unmap; + return -ENODEV; } host->max_clk *= 1000000; @@ -1454,8 +1599,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) if (host->timeout_clk == 0) { printk(KERN_ERR "%s: Hardware doesn't specify timeout clock " "frequency.\n", mmc_hostname(mmc)); - ret = -ENODEV; - goto unmap; + return -ENODEV; } if (caps & SDHCI_TIMEOUT_CLK_UNIT) host->timeout_clk *= 1000; @@ -1466,7 +1610,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) mmc->ops = &sdhci_ops; mmc->f_min = host->max_clk / 256; mmc->f_max = host->max_clk; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_SDIO_IRQ; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; if (caps & SDHCI_CAN_DO_HISPD) mmc->caps |= MMC_CAP_SD_HIGHSPEED; @@ -1482,20 +1626,22 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) if (mmc->ocr_avail == 0) { printk(KERN_ERR "%s: Hardware doesn't report any " "support voltages.\n", mmc_hostname(mmc)); - ret = -ENODEV; - goto unmap; + return -ENODEV; } spin_lock_init(&host->lock); /* - * Maximum number of segments. Hardware cannot do scatter lists. + * Maximum number of segments. Depends on if the hardware + * can do scatter/gather or not. */ - if (host->flags & SDHCI_USE_DMA) + if (host->flags & SDHCI_USE_ADMA) + mmc->max_hw_segs = 128; + else if (host->flags & SDHCI_USE_DMA) mmc->max_hw_segs = 1; - else - mmc->max_hw_segs = 16; - mmc->max_phys_segs = 16; + else /* PIO */ + mmc->max_hw_segs = 128; + mmc->max_phys_segs = 128; /* * Maximum number of sectors in one transfer. Limited by DMA boundary @@ -1505,9 +1651,13 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) /* * Maximum segment size. Could be one segment with the maximum number - * of bytes. + * of bytes. When doing hardware scatter/gather, each entry cannot + * be larger than 64 KiB though. */ - mmc->max_seg_size = mmc->max_req_size; + if (host->flags & SDHCI_USE_ADMA) + mmc->max_seg_size = 65536; + else + mmc->max_seg_size = mmc->max_req_size; /* * Maximum block size. This varies from controller to controller and @@ -1553,7 +1703,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) host->led.default_trigger = mmc_hostname(mmc); host->led.brightness_set = sdhci_led_control; - ret = led_classdev_register(&pdev->dev, &host->led); + ret = led_classdev_register(mmc_dev(mmc), &host->led); if (ret) goto reset; #endif @@ -1562,8 +1712,9 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot) mmc_add_host(mmc); - printk(KERN_INFO "%s: SDHCI at 0x%08lx irq %d %s\n", - mmc_hostname(mmc), host->addr, host->irq, + printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n", + mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id, + (host->flags & SDHCI_USE_ADMA)?"A":"", (host->flags & SDHCI_USE_DMA)?"DMA":"PIO"); return 0; @@ -1576,35 +1727,40 @@ reset: untasklet: tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); -unmap: - iounmap(host->ioaddr); -release: - pci_release_region(pdev, host->bar); -free: - mmc_free_host(mmc); return ret; } -static void sdhci_remove_slot(struct pci_dev *pdev, int slot) +EXPORT_SYMBOL_GPL(sdhci_add_host); + +void sdhci_remove_host(struct sdhci_host *host, int dead) { - struct sdhci_chip *chip; - struct mmc_host *mmc; - struct sdhci_host *host; + unsigned long flags; - chip = pci_get_drvdata(pdev); - host = chip->hosts[slot]; - mmc = host->mmc; + if (dead) { + spin_lock_irqsave(&host->lock, flags); + + host->flags |= SDHCI_DEVICE_DEAD; + + if (host->mrq) { + printk(KERN_ERR "%s: Controller removed during " + " transfer!\n", mmc_hostname(host->mmc)); - chip->hosts[slot] = NULL; + host->mrq->cmd->error = -ENOMEDIUM; + tasklet_schedule(&host->finish_tasklet); + } + + spin_unlock_irqrestore(&host->lock, flags); + } - mmc_remove_host(mmc); + mmc_remove_host(host->mmc); #ifdef CONFIG_LEDS_CLASS led_classdev_unregister(&host->led); #endif - sdhci_reset(host, SDHCI_RESET_ALL); + if (!dead) + sdhci_reset(host, SDHCI_RESET_ALL); free_irq(host->irq, host); @@ -1613,106 +1769,21 @@ static void sdhci_remove_slot(struct pci_dev *pdev, int slot) tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); - iounmap(host->ioaddr); - - pci_release_region(pdev, host->bar); + kfree(host->adma_desc); + kfree(host->align_buffer); - mmc_free_host(mmc); + host->adma_desc = NULL; + host->align_buffer = NULL; } -static int __devinit sdhci_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - int ret, i; - u8 slots, rev; - struct sdhci_chip *chip; - - BUG_ON(pdev == NULL); - BUG_ON(ent == NULL); +EXPORT_SYMBOL_GPL(sdhci_remove_host); - pci_read_config_byte(pdev, PCI_CLASS_REVISION, &rev); - - printk(KERN_INFO DRIVER_NAME - ": SDHCI controller found at %s [%04x:%04x] (rev %x)\n", - pci_name(pdev), (int)pdev->vendor, (int)pdev->device, - (int)rev); - - ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots); - if (ret) - return ret; - - slots = PCI_SLOT_INFO_SLOTS(slots) + 1; - DBG("found %d slot(s)\n", slots); - if (slots == 0) - return -ENODEV; - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - chip = kzalloc(sizeof(struct sdhci_chip) + - sizeof(struct sdhci_host*) * slots, GFP_KERNEL); - if (!chip) { - ret = -ENOMEM; - goto err; - } - - chip->pdev = pdev; - chip->quirks = ent->driver_data; - - if (debug_quirks) - chip->quirks = debug_quirks; - - chip->num_slots = slots; - pci_set_drvdata(pdev, chip); - - for (i = 0;i < slots;i++) { - ret = sdhci_probe_slot(pdev, i); - if (ret) { - for (i--;i >= 0;i--) - sdhci_remove_slot(pdev, i); - goto free; - } - } - - return 0; - -free: - pci_set_drvdata(pdev, NULL); - kfree(chip); - -err: - pci_disable_device(pdev); - return ret; -} - -static void __devexit sdhci_remove(struct pci_dev *pdev) +void sdhci_free_host(struct sdhci_host *host) { - int i; - struct sdhci_chip *chip; - - chip = pci_get_drvdata(pdev); - - if (chip) { - for (i = 0;i < chip->num_slots;i++) - sdhci_remove_slot(pdev, i); - - pci_set_drvdata(pdev, NULL); - - kfree(chip); - } - - pci_disable_device(pdev); + mmc_free_host(host->mmc); } -static struct pci_driver sdhci_driver = { - .name = DRIVER_NAME, - .id_table = pci_ids, - .probe = sdhci_probe, - .remove = __devexit_p(sdhci_remove), - .suspend = sdhci_suspend, - .resume = sdhci_resume, -}; +EXPORT_SYMBOL_GPL(sdhci_free_host); /*****************************************************************************\ * * @@ -1726,14 +1797,11 @@ static int __init sdhci_drv_init(void) ": Secure Digital Host Controller Interface driver\n"); printk(KERN_INFO DRIVER_NAME ": Copyright(c) Pierre Ossman\n"); - return pci_register_driver(&sdhci_driver); + return 0; } static void __exit sdhci_drv_exit(void) { - DBG("Exiting\n"); - - pci_unregister_driver(&sdhci_driver); } module_init(sdhci_drv_init); @@ -1742,7 +1810,7 @@ module_exit(sdhci_drv_exit); module_param(debug_quirks, uint, 0444); MODULE_AUTHOR("Pierre Ossman "); -MODULE_DESCRIPTION("Secure Digital Host Controller Interface driver"); +MODULE_DESCRIPTION("Secure Digital Host Controller Interface core driver"); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(debug_quirks, "Force certain quirks."); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 299118de8933..5bb355281765 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -9,18 +9,6 @@ * your option) any later version. */ -/* - * PCI registers - */ - -#define PCI_SDHCI_IFPIO 0x00 -#define PCI_SDHCI_IFDMA 0x01 -#define PCI_SDHCI_IFVENDOR 0x02 - -#define PCI_SLOT_INFO 0x40 /* 8 bits */ -#define PCI_SLOT_INFO_SLOTS(x) ((x >> 4) & 7) -#define PCI_SLOT_INFO_FIRST_BAR_MASK 0x07 - /* * Controller registers */ @@ -72,6 +60,11 @@ #define SDHCI_CTRL_LED 0x01 #define SDHCI_CTRL_4BITBUS 0x02 #define SDHCI_CTRL_HISPD 0x04 +#define SDHCI_CTRL_DMA_MASK 0x18 +#define SDHCI_CTRL_SDMA 0x00 +#define SDHCI_CTRL_ADMA1 0x08 +#define SDHCI_CTRL_ADMA32 0x10 +#define SDHCI_CTRL_ADMA64 0x18 #define SDHCI_POWER_CONTROL 0x29 #define SDHCI_POWER_ON 0x01 @@ -117,6 +110,7 @@ #define SDHCI_INT_DATA_END_BIT 0x00400000 #define SDHCI_INT_BUS_POWER 0x00800000 #define SDHCI_INT_ACMD12ERR 0x01000000 +#define SDHCI_INT_ADMA_ERROR 0x02000000 #define SDHCI_INT_NORMAL_MASK 0x00007FFF #define SDHCI_INT_ERROR_MASK 0xFFFF8000 @@ -140,11 +134,14 @@ #define SDHCI_CLOCK_BASE_SHIFT 8 #define SDHCI_MAX_BLOCK_MASK 0x00030000 #define SDHCI_MAX_BLOCK_SHIFT 16 +#define SDHCI_CAN_DO_ADMA2 0x00080000 +#define SDHCI_CAN_DO_ADMA1 0x00100000 #define SDHCI_CAN_DO_HISPD 0x00200000 #define SDHCI_CAN_DO_DMA 0x00400000 #define SDHCI_CAN_VDD_330 0x01000000 #define SDHCI_CAN_VDD_300 0x02000000 #define SDHCI_CAN_VDD_180 0x04000000 +#define SDHCI_CAN_64BIT 0x10000000 /* 44-47 reserved for more caps */ @@ -152,7 +149,16 @@ /* 4C-4F reserved for more max current */ -/* 50-FB reserved */ +#define SDHCI_SET_ACMD12_ERROR 0x50 +#define SDHCI_SET_INT_ERROR 0x52 + +#define SDHCI_ADMA_ERROR 0x54 + +/* 55-57 reserved */ + +#define SDHCI_ADMA_ADDRESS 0x58 + +/* 60-FB reserved */ #define SDHCI_SLOT_INT_STATUS 0xFC @@ -161,11 +167,50 @@ #define SDHCI_VENDOR_VER_SHIFT 8 #define SDHCI_SPEC_VER_MASK 0x00FF #define SDHCI_SPEC_VER_SHIFT 0 +#define SDHCI_SPEC_100 0 +#define SDHCI_SPEC_200 1 -struct sdhci_chip; +struct sdhci_ops; struct sdhci_host { - struct sdhci_chip *chip; + /* Data set by hardware interface driver */ + const char *hw_name; /* Hardware bus name */ + + unsigned int quirks; /* Deviations from spec. */ + +/* Controller doesn't honor resets unless we touch the clock register */ +#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1<<0) +/* Controller has bad caps bits, but really supports DMA */ +#define SDHCI_QUIRK_FORCE_DMA (1<<1) +/* Controller doesn't like to be reset when there is no card inserted. */ +#define SDHCI_QUIRK_NO_CARD_NO_RESET (1<<2) +/* Controller doesn't like clearing the power reg before a change */ +#define SDHCI_QUIRK_SINGLE_POWER_WRITE (1<<3) +/* Controller has flaky internal state so reset it on each ios change */ +#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4) +/* Controller has an unusable DMA engine */ +#define SDHCI_QUIRK_BROKEN_DMA (1<<5) +/* Controller has an unusable ADMA engine */ +#define SDHCI_QUIRK_BROKEN_ADMA (1<<6) +/* Controller can only DMA from 32-bit aligned addresses */ +#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<7) +/* Controller can only DMA chunk sizes that are a multiple of 32 bits */ +#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<8) +/* Controller can only ADMA chunks that are a multiple of 32 bits */ +#define SDHCI_QUIRK_32BIT_ADMA_SIZE (1<<9) +/* Controller needs to be reset after each request to stay stable */ +#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<10) +/* Controller needs voltage and power writes to happen separately */ +#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11) +/* Controller provides an incorrect timeout value for transfers */ +#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12) + + int irq; /* Device IRQ */ + void __iomem * ioaddr; /* Mapped address */ + + const struct sdhci_ops *ops; /* Low level hw interface */ + + /* Internal data */ struct mmc_host *mmc; /* MMC structure */ #ifdef CONFIG_LEDS_CLASS @@ -176,7 +221,11 @@ struct sdhci_host { int flags; /* Host attributes */ #define SDHCI_USE_DMA (1<<0) /* Host is DMA capable */ -#define SDHCI_REQ_USE_DMA (1<<1) /* Use DMA for this req. */ +#define SDHCI_USE_ADMA (1<<1) /* Host is ADMA capable */ +#define SDHCI_REQ_USE_DMA (1<<2) /* Use DMA for this req. */ +#define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */ + + unsigned int version; /* SDHCI spec. version */ unsigned int max_clk; /* Max possible freq (MHz) */ unsigned int timeout_clk; /* Timeout freq (KHz) */ @@ -194,22 +243,41 @@ struct sdhci_host { int offset; /* Offset into current sg */ int remain; /* Bytes left in current */ - int irq; /* Device IRQ */ - int bar; /* PCI BAR index */ - unsigned long addr; /* Bus address */ - void __iomem * ioaddr; /* Mapped address */ + int sg_count; /* Mapped sg entries */ + + u8 *adma_desc; /* ADMA descriptor table */ + u8 *align_buffer; /* Bounce buffer */ + + dma_addr_t adma_addr; /* Mapped ADMA descr. table */ + dma_addr_t align_addr; /* Mapped bounce buffer */ struct tasklet_struct card_tasklet; /* Tasklet structures */ struct tasklet_struct finish_tasklet; struct timer_list timer; /* Timer for timeouts */ -}; -struct sdhci_chip { - struct pci_dev *pdev; + unsigned long private[0] ____cacheline_aligned; +}; - unsigned long quirks; - int num_slots; /* Slots on controller */ - struct sdhci_host *hosts[0]; /* Pointers to hosts */ +struct sdhci_ops { + int (*enable_dma)(struct sdhci_host *host); }; + + +extern struct sdhci_host *sdhci_alloc_host(struct device *dev, + size_t priv_size); +extern void sdhci_free_host(struct sdhci_host *host); + +static inline void *sdhci_priv(struct sdhci_host *host) +{ + return (void *)host->private; +} + +extern int sdhci_add_host(struct sdhci_host *host); +extern void sdhci_remove_host(struct sdhci_host *host, int dead); + +#ifdef CONFIG_PM +extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); +extern int sdhci_resume_host(struct sdhci_host *host); +#endif diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c new file mode 100644 index 000000000000..f99e9f721629 --- /dev/null +++ b/drivers/mmc/host/sdricoh_cs.c @@ -0,0 +1,575 @@ +/* + * sdricoh_cs.c - driver for Ricoh Secure Digital Card Readers that can be + * found on some Ricoh RL5c476 II cardbus bridge + * + * Copyright (C) 2006 - 2008 Sascha Sommer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* +#define DEBUG +#define VERBOSE_DEBUG +*/ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "sdricoh_cs" + +static unsigned int switchlocked; + +/* i/o region */ +#define SDRICOH_PCI_REGION 0 +#define SDRICOH_PCI_REGION_SIZE 0x1000 + +/* registers */ +#define R104_VERSION 0x104 +#define R200_CMD 0x200 +#define R204_CMD_ARG 0x204 +#define R208_DATAIO 0x208 +#define R20C_RESP 0x20c +#define R21C_STATUS 0x21c +#define R2E0_INIT 0x2e0 +#define R2E4_STATUS_RESP 0x2e4 +#define R2F0_RESET 0x2f0 +#define R224_MODE 0x224 +#define R226_BLOCKSIZE 0x226 +#define R228_POWER 0x228 +#define R230_DATA 0x230 + +/* flags for the R21C_STATUS register */ +#define STATUS_CMD_FINISHED 0x00000001 +#define STATUS_TRANSFER_FINISHED 0x00000004 +#define STATUS_CARD_INSERTED 0x00000020 +#define STATUS_CARD_LOCKED 0x00000080 +#define STATUS_CMD_TIMEOUT 0x00400000 +#define STATUS_READY_TO_READ 0x01000000 +#define STATUS_READY_TO_WRITE 0x02000000 +#define STATUS_BUSY 0x40000000 + +/* timeouts */ +#define INIT_TIMEOUT 100 +#define CMD_TIMEOUT 100000 +#define TRANSFER_TIMEOUT 100000 +#define BUSY_TIMEOUT 32767 + +/* list of supported pcmcia devices */ +static struct pcmcia_device_id pcmcia_ids[] = { + /* vendor and device strings followed by their crc32 hashes */ + PCMCIA_DEVICE_PROD_ID12("RICOH", "Bay1Controller", 0xd9f522ed, + 0xc3901202), + PCMCIA_DEVICE_NULL, +}; + +MODULE_DEVICE_TABLE(pcmcia, pcmcia_ids); + +/* mmc privdata */ +struct sdricoh_host { + struct device *dev; + struct mmc_host *mmc; /* MMC structure */ + unsigned char __iomem *iobase; + struct pci_dev *pci_dev; + int app_cmd; +}; + +/***************** register i/o helper functions *****************************/ + +static inline unsigned int sdricoh_readl(struct sdricoh_host *host, + unsigned int reg) +{ + unsigned int value = readl(host->iobase + reg); + dev_vdbg(host->dev, "rl %x 0x%x\n", reg, value); + return value; +} + +static inline void sdricoh_writel(struct sdricoh_host *host, unsigned int reg, + unsigned int value) +{ + writel(value, host->iobase + reg); + dev_vdbg(host->dev, "wl %x 0x%x\n", reg, value); + +} + +static inline unsigned int sdricoh_readw(struct sdricoh_host *host, + unsigned int reg) +{ + unsigned int value = readw(host->iobase + reg); + dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value); + return value; +} + +static inline void sdricoh_writew(struct sdricoh_host *host, unsigned int reg, + unsigned short value) +{ + writew(value, host->iobase + reg); + dev_vdbg(host->dev, "ww %x 0x%x\n", reg, value); +} + +static inline unsigned int sdricoh_readb(struct sdricoh_host *host, + unsigned int reg) +{ + unsigned int value = readb(host->iobase + reg); + dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value); + return value; +} + +static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted, + unsigned int timeout){ + unsigned int loop; + unsigned int status = 0; + struct device *dev = host->dev; + for (loop = 0; loop < timeout; loop++) { + status = sdricoh_readl(host, R21C_STATUS); + sdricoh_writel(host, R2E4_STATUS_RESP, status); + if (status & wanted) + break; + } + + if (loop == timeout) { + dev_err(dev, "query_status: timeout waiting for %x\n", wanted); + return -ETIMEDOUT; + } + + /* do not do this check in the loop as some commands fail otherwise */ + if (status & 0x7F0000) { + dev_err(dev, "waiting for status bit %x failed\n", wanted); + return -EINVAL; + } + return 0; + +} + +static int sdricoh_mmc_cmd(struct sdricoh_host *host, unsigned char opcode, + unsigned int arg) +{ + unsigned int status; + int result = 0; + unsigned int loop = 0; + /* reset status reg? */ + sdricoh_writel(host, R21C_STATUS, 0x18); + /* fill parameters */ + sdricoh_writel(host, R204_CMD_ARG, arg); + sdricoh_writel(host, R200_CMD, (0x10000 << 8) | opcode); + /* wait for command completion */ + if (opcode) { + for (loop = 0; loop < CMD_TIMEOUT; loop++) { + status = sdricoh_readl(host, R21C_STATUS); + sdricoh_writel(host, R2E4_STATUS_RESP, status); + if (status & STATUS_CMD_FINISHED) + break; + } + /* don't check for timeout in the loop it is not always + reset correctly + */ + if (loop == CMD_TIMEOUT || status & STATUS_CMD_TIMEOUT) + result = -ETIMEDOUT; + + } + + return result; + +} + +static int sdricoh_reset(struct sdricoh_host *host) +{ + dev_dbg(host->dev, "reset\n"); + sdricoh_writel(host, R2F0_RESET, 0x10001); + sdricoh_writel(host, R2E0_INIT, 0x10000); + if (sdricoh_readl(host, R2E0_INIT) != 0x10000) + return -EIO; + sdricoh_writel(host, R2E0_INIT, 0x10007); + + sdricoh_writel(host, R224_MODE, 0x2000000); + sdricoh_writel(host, R228_POWER, 0xe0); + + + /* status register ? */ + sdricoh_writel(host, R21C_STATUS, 0x18); + + return 0; +} + +static int sdricoh_blockio(struct sdricoh_host *host, int read, + u8 *buf, int len) +{ + int size; + u32 data = 0; + /* wait until the data is available */ + if (read) { + if (sdricoh_query_status(host, STATUS_READY_TO_READ, + TRANSFER_TIMEOUT)) + return -ETIMEDOUT; + sdricoh_writel(host, R21C_STATUS, 0x18); + /* read data */ + while (len) { + data = sdricoh_readl(host, R230_DATA); + size = min(len, 4); + len -= size; + while (size) { + *buf = data & 0xFF; + buf++; + data >>= 8; + size--; + } + } + } else { + if (sdricoh_query_status(host, STATUS_READY_TO_WRITE, + TRANSFER_TIMEOUT)) + return -ETIMEDOUT; + sdricoh_writel(host, R21C_STATUS, 0x18); + /* write data */ + while (len) { + size = min(len, 4); + len -= size; + while (size) { + data >>= 8; + data |= (u32)*buf << 24; + buf++; + size--; + } + sdricoh_writel(host, R230_DATA, data); + } + } + + if (len) + return -EIO; + + return 0; +} + +static void sdricoh_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct sdricoh_host *host = mmc_priv(mmc); + struct mmc_command *cmd = mrq->cmd; + struct mmc_data *data = cmd->data; + struct device *dev = host->dev; + unsigned char opcode = cmd->opcode; + int i; + + dev_dbg(dev, "=============================\n"); + dev_dbg(dev, "sdricoh_request opcode=%i\n", opcode); + + sdricoh_writel(host, R21C_STATUS, 0x18); + + /* MMC_APP_CMDs need some special handling */ + if (host->app_cmd) { + opcode |= 64; + host->app_cmd = 0; + } else if (opcode == 55) + host->app_cmd = 1; + + /* read/write commands seem to require this */ + if (data) { + sdricoh_writew(host, R226_BLOCKSIZE, data->blksz); + sdricoh_writel(host, R208_DATAIO, 0); + } + + cmd->error = sdricoh_mmc_cmd(host, opcode, cmd->arg); + + /* read response buffer */ + if (cmd->flags & MMC_RSP_PRESENT) { + if (cmd->flags & MMC_RSP_136) { + /* CRC is stripped so we need to do some shifting. */ + for (i = 0; i < 4; i++) { + cmd->resp[i] = + sdricoh_readl(host, + R20C_RESP + (3 - i) * 4) << 8; + if (i != 3) + cmd->resp[i] |= + sdricoh_readb(host, R20C_RESP + + (3 - i) * 4 - 1); + } + } else + cmd->resp[0] = sdricoh_readl(host, R20C_RESP); + } + + /* transfer data */ + if (data && cmd->error == 0) { + dev_dbg(dev, "transfer: blksz %i blocks %i sg_len %i " + "sg length %i\n", data->blksz, data->blocks, + data->sg_len, data->sg->length); + + /* enter data reading mode */ + sdricoh_writel(host, R21C_STATUS, 0x837f031e); + for (i = 0; i < data->blocks; i++) { + size_t len = data->blksz; + u8 *buf; + struct page *page; + int result; + page = sg_page(data->sg); + + buf = kmap(page) + data->sg->offset + (len * i); + result = + sdricoh_blockio(host, + data->flags & MMC_DATA_READ, buf, len); + kunmap(page); + flush_dcache_page(page); + if (result) { + dev_err(dev, "sdricoh_request: cmd %i " + "block transfer failed\n", cmd->opcode); + cmd->error = result; + break; + } else + data->bytes_xfered += len; + } + + sdricoh_writel(host, R208_DATAIO, 1); + + if (sdricoh_query_status(host, STATUS_TRANSFER_FINISHED, + TRANSFER_TIMEOUT)) { + dev_err(dev, "sdricoh_request: transfer end error\n"); + cmd->error = -EINVAL; + } + } + /* FIXME check busy flag */ + + mmc_request_done(mmc, mrq); + dev_dbg(dev, "=============================\n"); +} + +static void sdricoh_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdricoh_host *host = mmc_priv(mmc); + dev_dbg(host->dev, "set_ios\n"); + + if (ios->power_mode == MMC_POWER_ON) { + sdricoh_writel(host, R228_POWER, 0xc0e0); + + if (ios->bus_width == MMC_BUS_WIDTH_4) { + sdricoh_writel(host, R224_MODE, 0x2000300); + sdricoh_writel(host, R228_POWER, 0x40e0); + } else { + sdricoh_writel(host, R224_MODE, 0x2000340); + } + + } else if (ios->power_mode == MMC_POWER_UP) { + sdricoh_writel(host, R224_MODE, 0x2000320); + sdricoh_writel(host, R228_POWER, 0xe0); + } +} + +static int sdricoh_get_ro(struct mmc_host *mmc) +{ + struct sdricoh_host *host = mmc_priv(mmc); + unsigned int status; + + status = sdricoh_readl(host, R21C_STATUS); + sdricoh_writel(host, R2E4_STATUS_RESP, status); + + /* some notebooks seem to have the locked flag switched */ + if (switchlocked) + return !(status & STATUS_CARD_LOCKED); + + return (status & STATUS_CARD_LOCKED); +} + +static struct mmc_host_ops sdricoh_ops = { + .request = sdricoh_request, + .set_ios = sdricoh_set_ios, + .get_ro = sdricoh_get_ro, +}; + +/* initialize the control and register it to the mmc framework */ +static int sdricoh_init_mmc(struct pci_dev *pci_dev, + struct pcmcia_device *pcmcia_dev) +{ + int result = 0; + void __iomem *iobase = NULL; + struct mmc_host *mmc = NULL; + struct sdricoh_host *host = NULL; + struct device *dev = &pcmcia_dev->dev; + /* map iomem */ + if (pci_resource_len(pci_dev, SDRICOH_PCI_REGION) != + SDRICOH_PCI_REGION_SIZE) { + dev_dbg(dev, "unexpected pci resource len\n"); + return -ENODEV; + } + iobase = + pci_iomap(pci_dev, SDRICOH_PCI_REGION, SDRICOH_PCI_REGION_SIZE); + if (!iobase) { + dev_err(dev, "unable to map iobase\n"); + return -ENODEV; + } + /* check version? */ + if (readl(iobase + R104_VERSION) != 0x4000) { + dev_dbg(dev, "no supported mmc controller found\n"); + result = -ENODEV; + goto err; + } + /* allocate privdata */ + mmc = pcmcia_dev->priv = + mmc_alloc_host(sizeof(struct sdricoh_host), &pcmcia_dev->dev); + if (!mmc) { + dev_err(dev, "mmc_alloc_host failed\n"); + result = -ENOMEM; + goto err; + } + host = mmc_priv(mmc); + + host->iobase = iobase; + host->dev = dev; + host->pci_dev = pci_dev; + + mmc->ops = &sdricoh_ops; + + /* FIXME: frequency and voltage handling is done by the controller + */ + mmc->f_min = 450000; + mmc->f_max = 24000000; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + mmc->caps |= MMC_CAP_4_BIT_DATA; + + mmc->max_seg_size = 1024 * 512; + mmc->max_blk_size = 512; + + /* reset the controler */ + if (sdricoh_reset(host)) { + dev_dbg(dev, "could not reset\n"); + result = -EIO; + goto err; + + } + + result = mmc_add_host(mmc); + + if (!result) { + dev_dbg(dev, "mmc host registered\n"); + return 0; + } + +err: + if (iobase) + iounmap(iobase); + if (mmc) + mmc_free_host(mmc); + + return result; +} + +/* search for supported mmc controllers */ +static int sdricoh_pcmcia_probe(struct pcmcia_device *pcmcia_dev) +{ + struct pci_dev *pci_dev = NULL; + + dev_info(&pcmcia_dev->dev, "Searching MMC controller for pcmcia device" + " %s %s ...\n", pcmcia_dev->prod_id[0], pcmcia_dev->prod_id[1]); + + /* search pci cardbus bridge that contains the mmc controler */ + /* the io region is already claimed by yenta_socket... */ + while ((pci_dev = + pci_get_device(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C476, + pci_dev))) { + /* try to init the device */ + if (!sdricoh_init_mmc(pci_dev, pcmcia_dev)) { + dev_info(&pcmcia_dev->dev, "MMC controller found\n"); + return 0; + } + + } + dev_err(&pcmcia_dev->dev, "No MMC controller was found.\n"); + return -ENODEV; +} + +static void sdricoh_pcmcia_detach(struct pcmcia_device *link) +{ + struct mmc_host *mmc = link->priv; + + dev_dbg(&link->dev, "detach\n"); + + /* remove mmc host */ + if (mmc) { + struct sdricoh_host *host = mmc_priv(mmc); + mmc_remove_host(mmc); + pci_iounmap(host->pci_dev, host->iobase); + pci_dev_put(host->pci_dev); + mmc_free_host(mmc); + } + pcmcia_disable_device(link); + +} + +#ifdef CONFIG_PM +static int sdricoh_pcmcia_suspend(struct pcmcia_device *link) +{ + struct mmc_host *mmc = link->priv; + dev_dbg(&link->dev, "suspend\n"); + mmc_suspend_host(mmc, PMSG_SUSPEND); + return 0; +} + +static int sdricoh_pcmcia_resume(struct pcmcia_device *link) +{ + struct mmc_host *mmc = link->priv; + dev_dbg(&link->dev, "resume\n"); + sdricoh_reset(mmc_priv(mmc)); + mmc_resume_host(mmc); + return 0; +} +#else +#define sdricoh_pcmcia_suspend NULL +#define sdricoh_pcmcia_resume NULL +#endif + +static struct pcmcia_driver sdricoh_driver = { + .drv = { + .name = DRIVER_NAME, + }, + .probe = sdricoh_pcmcia_probe, + .remove = sdricoh_pcmcia_detach, + .id_table = pcmcia_ids, + .suspend = sdricoh_pcmcia_suspend, + .resume = sdricoh_pcmcia_resume, +}; + +/*****************************************************************************\ + * * + * Driver init/exit * + * * +\*****************************************************************************/ + +static int __init sdricoh_drv_init(void) +{ + return pcmcia_register_driver(&sdricoh_driver); +} + +static void __exit sdricoh_drv_exit(void) +{ + pcmcia_unregister_driver(&sdricoh_driver); +} + +module_init(sdricoh_drv_init); +module_exit(sdricoh_drv_exit); + +module_param(switchlocked, uint, 0444); + +MODULE_AUTHOR("Sascha Sommer "); +MODULE_DESCRIPTION("Ricoh PCMCIA Secure Digital Interface driver"); +MODULE_LICENSE("GPL"); + +MODULE_PARM_DESC(switchlocked, "Switch the cards locked status." + "Use this when unlocked cards are shown readonly (default 0)"); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 1c14a186f000..13844843e8de 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -973,7 +973,7 @@ static int tifm_sd_probe(struct tifm_dev *sock) mmc->ops = &tifm_sd_ops; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_4_BIT_DATA; mmc->f_min = 20000000 / 60; mmc->f_max = 24000000; diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index c303e7f57ab4..adda37952032 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -68,16 +68,16 @@ static const int unlock_codes[] = { 0x83, 0x87 }; static const int valid_ids[] = { 0x7112, - }; +}; #ifdef CONFIG_PNP -static unsigned int nopnp = 0; +static unsigned int param_nopnp = 0; #else -static const unsigned int nopnp = 1; +static const unsigned int param_nopnp = 1; #endif -static unsigned int io = 0x248; -static unsigned int irq = 6; -static int dma = 2; +static unsigned int param_io = 0x248; +static unsigned int param_irq = 6; +static int param_dma = 2; /* * Basic functions @@ -939,7 +939,7 @@ static int wbsd_get_ro(struct mmc_host *mmc) spin_unlock_bh(&host->lock); - return csr & WBSD_WRPT; + return !!(csr & WBSD_WRPT); } static const struct mmc_host_ops wbsd_ops = { @@ -1219,7 +1219,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev) mmc->f_min = 375000; mmc->f_max = 24000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE; + mmc->caps = MMC_CAP_4_BIT_DATA; spin_lock_init(&host->lock); @@ -1420,7 +1420,7 @@ kfree: dma_unmap_single(mmc_dev(host->mmc), host->dma_addr, WBSD_DMA_SIZE, DMA_BIDIRECTIONAL); - host->dma_addr = (dma_addr_t)NULL; + host->dma_addr = 0; kfree(host->dma_buffer); host->dma_buffer = NULL; @@ -1445,7 +1445,7 @@ static void wbsd_release_dma(struct wbsd_host *host) host->dma = -1; host->dma_buffer = NULL; - host->dma_addr = (dma_addr_t)NULL; + host->dma_addr = 0; } /* @@ -1765,7 +1765,7 @@ static void __devexit wbsd_shutdown(struct device *dev, int pnp) static int __devinit wbsd_probe(struct platform_device *dev) { /* Use the module parameters for resources */ - return wbsd_init(&dev->dev, io, irq, dma, 0); + return wbsd_init(&dev->dev, param_io, param_irq, param_dma, 0); } static int __devexit wbsd_remove(struct platform_device *dev) @@ -1979,14 +1979,14 @@ static int __init wbsd_drv_init(void) #ifdef CONFIG_PNP - if (!nopnp) { + if (!param_nopnp) { result = pnp_register_driver(&wbsd_pnp_driver); if (result < 0) return result; } #endif /* CONFIG_PNP */ - if (nopnp) { + if (param_nopnp) { result = platform_driver_register(&wbsd_driver); if (result < 0) return result; @@ -2012,12 +2012,12 @@ static void __exit wbsd_drv_exit(void) { #ifdef CONFIG_PNP - if (!nopnp) + if (!param_nopnp) pnp_unregister_driver(&wbsd_pnp_driver); #endif /* CONFIG_PNP */ - if (nopnp) { + if (param_nopnp) { platform_device_unregister(wbsd_device); platform_driver_unregister(&wbsd_driver); @@ -2029,11 +2029,11 @@ static void __exit wbsd_drv_exit(void) module_init(wbsd_drv_init); module_exit(wbsd_drv_exit); #ifdef CONFIG_PNP -module_param(nopnp, uint, 0444); +module_param_named(nopnp, param_nopnp, uint, 0444); #endif -module_param(io, uint, 0444); -module_param(irq, uint, 0444); -module_param(dma, int, 0444); +module_param_named(io, param_io, uint, 0444); +module_param_named(irq, param_irq, uint, 0444); +module_param_named(dma, param_dma, int, 0444); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pierre Ossman "); diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 3dd537be87d8..b54e2ea8346b 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -1,7 +1,7 @@ /* * linux/drivers/net/wireless/libertas/if_sdio.c * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * Inspired by if_cs.c, Copyright 2007 Holger Schurig * @@ -266,13 +266,10 @@ static int if_sdio_card_to_host(struct if_sdio_card *card) /* * The transfer must be in one transaction or the firmware - * goes suicidal. + * goes suicidal. There's no way to guarantee that for all + * controllers, but we can at least try. */ - chunk = size; - if ((chunk > card->func->cur_blksize) || (chunk > 512)) { - chunk = (chunk + card->func->cur_blksize - 1) / - card->func->cur_blksize * card->func->cur_blksize; - } + chunk = sdio_align_size(card->func, size); ret = sdio_readsb(card->func, card->buffer, card->ioport, chunk); if (ret) @@ -696,13 +693,10 @@ static int if_sdio_host_to_card(struct lbs_private *priv, /* * The transfer must be in one transaction or the firmware - * goes suicidal. + * goes suicidal. There's no way to guarantee that for all + * controllers, but we can at least try. */ - size = nb + 4; - if ((size > card->func->cur_blksize) || (size > 512)) { - size = (size + card->func->cur_blksize - 1) / - card->func->cur_blksize * card->func->cur_blksize; - } + size = sdio_align_size(card->func, nb + 4); packet = kzalloc(sizeof(struct if_sdio_packet) + size, GFP_ATOMIC); diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4d1ce2e7361e..7d63f8ced24b 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -2,7 +2,7 @@ # Makefile for the PCI bus specific drivers. # -obj-y += access.o bus.o probe.o remove.o pci.o quirks.o \ +obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \ pci-driver.o search.o pci-sysfs.o rom.o setup-res.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index f8c187a763bd..93e37f0666ab 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -299,7 +300,7 @@ free_and_return: * * @handle - the handle of the hotplug controller. */ -acpi_status acpi_run_oshp(acpi_handle handle) +static acpi_status acpi_run_oshp(acpi_handle handle) { acpi_status status; struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -322,9 +323,6 @@ acpi_status acpi_run_oshp(acpi_handle handle) kfree(string.pointer); return status; } -EXPORT_SYMBOL_GPL(acpi_run_oshp); - - /* acpi_get_hp_params_from_firmware * @@ -374,6 +372,85 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, } EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware); +/** + * acpi_get_hp_hw_control_from_firmware + * @dev: the pci_dev of the bridge that has a hotplug controller + * @flags: requested control bits for _OSC + * + * Attempt to take hotplug control from firmware. + */ +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) +{ + acpi_status status; + acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev)); + struct pci_dev *pdev = dev; + struct pci_bus *parent; + struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; + + flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | + OSC_SHPC_NATIVE_HP_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + if (!flags) { + err("Invalid flags %u specified!\n", flags); + return -EINVAL; + } + + /* + * Per PCI firmware specification, we should run the ACPI _OSC + * method to get control of hotplug hardware before using it. If + * an _OSC is missing, we look for an OSHP to do the same thing. + * To handle different BIOS behavior, we look for _OSC and OSHP + * within the scope of the hotplug controller and its parents, + * upto the host bridge under which this controller exists. + */ + while (!handle) { + /* + * This hotplug controller was not listed in the ACPI name + * space at all. Try to get acpi handle of parent pci bus. + */ + if (!pdev || !pdev->bus->parent) + break; + parent = pdev->bus->parent; + dbg("Could not find %s in acpi namespace, trying parent\n", + pci_name(pdev)); + if (!parent->self) + /* Parent must be a host bridge */ + handle = acpi_get_pci_rootbridge_handle( + pci_domain_nr(parent), + parent->number); + else + handle = DEVICE_ACPI_HANDLE(&(parent->self->dev)); + pdev = parent->self; + } + + while (handle) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); + dbg("Trying to get hotplug control for %s \n", + (char *)string.pointer); + status = pci_osc_control_set(handle, flags); + if (status == AE_NOT_FOUND) + status = acpi_run_oshp(handle); + if (ACPI_SUCCESS(status)) { + dbg("Gained control for hotplug HW for pci %s (%s)\n", + pci_name(dev), (char *)string.pointer); + kfree(string.pointer); + return 0; + } + if (acpi_root_bridge(handle)) + break; + chandle = handle; + status = acpi_get_parent(chandle, &handle); + if (ACPI_FAILURE(status)) + break; + } + + dbg("Cannot get control of hotplug hardware for pci %s\n", + pci_name(dev)); + + kfree(string.pointer); + return -ENODEV; +} +EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware); /* acpi_root_bridge - check to see if this acpi object is a root bridge * diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h index 7a29164d4b32..eecf7cbf4139 100644 --- a/drivers/pci/hotplug/acpiphp.h +++ b/drivers/pci/hotplug/acpiphp.h @@ -215,7 +215,6 @@ extern u8 acpiphp_get_power_status (struct acpiphp_slot *slot); extern u8 acpiphp_get_attention_status (struct acpiphp_slot *slot); extern u8 acpiphp_get_latch_status (struct acpiphp_slot *slot); extern u8 acpiphp_get_adapter_status (struct acpiphp_slot *slot); -extern u32 acpiphp_get_address (struct acpiphp_slot *slot); /* variables */ extern int acpiphp_debug; diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index 7af68ba27903..0e496e866a84 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -70,7 +70,6 @@ static int disable_slot (struct hotplug_slot *slot); static int set_attention_status (struct hotplug_slot *slot, u8 value); static int get_power_status (struct hotplug_slot *slot, u8 *value); static int get_attention_status (struct hotplug_slot *slot, u8 *value); -static int get_address (struct hotplug_slot *slot, u32 *value); static int get_latch_status (struct hotplug_slot *slot, u8 *value); static int get_adapter_status (struct hotplug_slot *slot, u8 *value); @@ -83,7 +82,6 @@ static struct hotplug_slot_ops acpi_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_status, - .get_address = get_address, }; @@ -274,23 +272,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) return 0; } - -/** - * get_address - get pci address of a slot - * @hotplug_slot: slot to get status - * @value: pointer to struct pci_busdev (seg, bus, dev) - */ -static int get_address(struct hotplug_slot *hotplug_slot, u32 *value) -{ - struct slot *slot = hotplug_slot->private; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - - *value = acpiphp_get_address(slot->acpi_slot); - - return 0; -} - static int __init init_acpi(void) { int retval; @@ -357,7 +338,11 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot) acpiphp_slot->slot = slot; snprintf(slot->name, sizeof(slot->name), "%u", slot->acpi_slot->sun); - retval = pci_hp_register(slot->hotplug_slot); + retval = pci_hp_register(slot->hotplug_slot, + acpiphp_slot->bridge->pci_bus, + acpiphp_slot->device); + if (retval == -EBUSY) + goto error_hpslot; if (retval) { err("pci_hp_register failed with error %d\n", retval); goto error_hpslot; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 91156f85a926..a3e4705dd8f0 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -258,7 +258,12 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) bridge->pci_bus->number, slot->device); retval = acpiphp_register_hotplug_slot(slot); if (retval) { - warn("acpiphp_register_hotplug_slot failed(err code = 0x%x)\n", retval); + if (retval == -EBUSY) + warn("Slot %d already registered by another " + "hotplug driver\n", slot->sun); + else + warn("acpiphp_register_hotplug_slot failed " + "(err code = 0x%x)\n", retval); goto err_exit; } } @@ -1878,19 +1883,3 @@ u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot) return (sta == 0) ? 0 : 1; } - - -/* - * pci address (seg/bus/dev) - */ -u32 acpiphp_get_address(struct acpiphp_slot *slot) -{ - u32 address; - struct pci_bus *pci_bus = slot->bridge->pci_bus; - - address = (pci_domain_nr(pci_bus) << 16) | - (pci_bus->number << 8) | - slot->device; - - return address; -} diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index ede9051fdb5d..2b7c45e39370 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -33,8 +33,10 @@ #include #include #include +#include #include "acpiphp.h" +#include "../pci.h" #define DRIVER_VERSION "1.0.1" #define DRIVER_AUTHOR "Irene Zubarev , Vernon Mauery " @@ -430,7 +432,7 @@ static int __init ibm_acpiphp_init(void) int retval = 0; acpi_status status; struct acpi_device *device; - struct kobject *sysdir = &pci_hotplug_slots_kset->kobj; + struct kobject *sysdir = &pci_slots_kset->kobj; dbg("%s\n", __func__); @@ -477,7 +479,7 @@ init_return: static void __exit ibm_acpiphp_exit(void) { acpi_status status; - struct kobject *sysdir = &pci_hotplug_slots_kset->kobj; + struct kobject *sysdir = &pci_slots_kset->kobj; dbg("%s\n", __func__); diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index d8a6b80ab42a..935947991dc9 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -285,7 +285,7 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last) info->attention_status = cpci_get_attention_status(slot); dbg("registering slot %s", slot->hotplug_slot->name); - status = pci_hp_register(slot->hotplug_slot); + status = pci_hp_register(slot->hotplug_slot, bus, i); if (status) { err("pci_hp_register failed with error %d", status); goto error_name; diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 36b115b27b0b..54defec51d08 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -434,7 +434,9 @@ static int ctrl_slot_setup(struct controller *ctrl, slot->bus, slot->device, slot->number, ctrl->slot_device_offset, slot_number); - result = pci_hp_register(hotplug_slot); + result = pci_hp_register(hotplug_slot, + ctrl->pci_dev->subordinate, + slot->device); if (result) { err("pci_hp_register failed with error %d\n", result); goto error_name; diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 7e9a827c2687..40337a06c18a 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -66,6 +66,7 @@ struct dummy_slot { struct pci_dev *dev; struct work_struct remove_work; unsigned long removed; + char name[8]; }; static int debug; @@ -100,6 +101,7 @@ static int add_slot(struct pci_dev *dev) struct dummy_slot *dslot; struct hotplug_slot *slot; int retval = -ENOMEM; + static int count = 1; slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); if (!slot) @@ -113,18 +115,18 @@ static int add_slot(struct pci_dev *dev) slot->info->max_bus_speed = PCI_SPEED_UNKNOWN; slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN; - slot->name = &dev->dev.bus_id[0]; - dbg("slot->name = %s\n", slot->name); - dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL); if (!dslot) goto error_info; + slot->name = dslot->name; + snprintf(slot->name, sizeof(dslot->name), "fake%d", count++); + dbg("slot->name = %s\n", slot->name); slot->ops = &dummy_hotplug_slot_ops; slot->release = &dummy_release; slot->private = dslot; - retval = pci_hp_register(slot); + retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn)); if (retval) { err("pci_hp_register failed with error %d\n", retval); goto error_dslot; @@ -148,17 +150,17 @@ error: static int __init pci_scan_buses(void) { struct pci_dev *dev = NULL; - int retval = 0; + int lastslot = 0; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - retval = add_slot(dev); - if (retval) { - pci_dev_put(dev); - break; - } + if (PCI_FUNC(dev->devfn) > 0 && + lastslot == PCI_SLOT(dev->devfn)) + continue; + lastslot = PCI_SLOT(dev->devfn); + add_slot(dev); } - return retval; + return 0; } static void remove_slot(struct dummy_slot *dslot) @@ -296,23 +298,9 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return 0; } -/* find the hotplug_slot for the pci_dev */ -static struct hotplug_slot *get_slot_from_dev(struct pci_dev *dev) -{ - struct dummy_slot *dslot; - - list_for_each_entry(dslot, &slot_list, node) { - if (dslot->dev == dev) - return dslot->slot; - } - return NULL; -} - - static int disable_slot(struct hotplug_slot *slot) { struct dummy_slot *dslot; - struct hotplug_slot *hslot; struct pci_dev *dev; int func; @@ -322,41 +310,27 @@ static int disable_slot(struct hotplug_slot *slot) dbg("%s - physical_slot = %s\n", __func__, slot->name); - /* don't disable bridged devices just yet, we can't handle them easily... */ - if (dslot->dev->subordinate) { - err("Can't remove PCI devices with other PCI devices behind it yet.\n"); - return -ENODEV; - } - if (test_and_set_bit(0, &dslot->removed)) { - dbg("Slot already scheduled for removal\n"); - return -ENODEV; - } - /* search for subfunctions and disable them first */ - if (!(dslot->dev->devfn & 7)) { - for (func = 1; func < 8; func++) { - dev = pci_get_slot(dslot->dev->bus, - dslot->dev->devfn + func); - if (dev) { - hslot = get_slot_from_dev(dev); - if (hslot) - disable_slot(hslot); - else { - err("Hotplug slot not found for subfunction of PCI device\n"); - return -ENODEV; - } - pci_dev_put(dev); - } else - dbg("No device in slot found\n"); + for (func = 7; func >= 0; func--) { + dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func); + if (!dev) + continue; + + if (test_and_set_bit(0, &dslot->removed)) { + dbg("Slot already scheduled for removal\n"); + return -ENODEV; } - } - /* remove the device from the pci core */ - pci_remove_bus_device(dslot->dev); + /* queue work item to blow away this sysfs entry and other + * parts. + */ + INIT_WORK(&dslot->remove_work, remove_slot_worker); + queue_work(dummyphp_wq, &dslot->remove_work); - /* queue work item to blow away this sysfs entry and other parts. */ - INIT_WORK(&dslot->remove_work, remove_slot_worker); - queue_work(dummyphp_wq, &dslot->remove_work); + /* blow away this sysfs entry and other parts. */ + remove_slot(dslot); + pci_dev_put(dev); + } return 0; } diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index dca7efc14be2..8467d0287325 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -1001,7 +1001,8 @@ static int __init ebda_rsrc_controller (void) tmp_slot = list_entry (list, struct slot, ibm_slot_list); snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot)); - pci_hp_register (tmp_slot->hotplug_slot); + pci_hp_register(tmp_slot->hotplug_slot, + pci_find_bus(0, tmp_slot->bus), tmp_slot->device); } print_ebda_hpc (); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index a11021e8ce37..5f85b1b120e3 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -40,6 +40,7 @@ #include #include #include +#include "../pci.h" #define MY_NAME "pci_hotplug" @@ -60,41 +61,7 @@ static int debug; ////////////////////////////////////////////////////////////////// static LIST_HEAD(pci_hotplug_slot_list); - -struct kset *pci_hotplug_slots_kset; - -static ssize_t hotplug_slot_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct hotplug_slot *slot = to_hotplug_slot(kobj); - struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr); - return attribute->show ? attribute->show(slot, buf) : -EIO; -} - -static ssize_t hotplug_slot_attr_store(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t len) -{ - struct hotplug_slot *slot = to_hotplug_slot(kobj); - struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr); - return attribute->store ? attribute->store(slot, buf, len) : -EIO; -} - -static struct sysfs_ops hotplug_slot_sysfs_ops = { - .show = hotplug_slot_attr_show, - .store = hotplug_slot_attr_store, -}; - -static void hotplug_slot_release(struct kobject *kobj) -{ - struct hotplug_slot *slot = to_hotplug_slot(kobj); - if (slot->release) - slot->release(slot); -} - -static struct kobj_type hotplug_slot_ktype = { - .sysfs_ops = &hotplug_slot_sysfs_ops, - .release = &hotplug_slot_release, -}; +static DEFINE_SPINLOCK(pci_hotplug_slot_list_lock); /* these strings match up with the values in pci_bus_speed */ static char *pci_bus_speed_strings[] = { @@ -149,16 +116,15 @@ GET_STATUS(power_status, u8) GET_STATUS(attention_status, u8) GET_STATUS(latch_status, u8) GET_STATUS(adapter_status, u8) -GET_STATUS(address, u32) GET_STATUS(max_bus_speed, enum pci_bus_speed) GET_STATUS(cur_bus_speed, enum pci_bus_speed) -static ssize_t power_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t power_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_power_status (slot, &value); + retval = get_power_status(slot->hotplug, &value); if (retval) goto exit; retval = sprintf (buf, "%d\n", value); @@ -166,9 +132,10 @@ exit: return retval; } -static ssize_t power_write_file (struct hotplug_slot *slot, const char *buf, +static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf, size_t count) { + struct hotplug_slot *slot = pci_slot->hotplug; unsigned long lpower; u8 power; int retval = 0; @@ -204,29 +171,30 @@ exit: return count; } -static struct hotplug_slot_attribute hotplug_slot_attr_power = { +static struct pci_slot_attribute hotplug_slot_attr_power = { .attr = {.name = "power", .mode = S_IFREG | S_IRUGO | S_IWUSR}, .show = power_read_file, .store = power_write_file }; -static ssize_t attention_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t attention_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_attention_status (slot, &value); + retval = get_attention_status(slot->hotplug, &value); if (retval) goto exit; - retval = sprintf (buf, "%d\n", value); + retval = sprintf(buf, "%d\n", value); exit: return retval; } -static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf, +static ssize_t attention_write_file(struct pci_slot *slot, const char *buf, size_t count) { + struct hotplug_slot_ops *ops = slot->hotplug->ops; unsigned long lattention; u8 attention; int retval = 0; @@ -235,13 +203,13 @@ static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf, attention = (u8)(lattention & 0xff); dbg (" - attention = %d\n", attention); - if (!try_module_get(slot->ops->owner)) { + if (!try_module_get(ops->owner)) { retval = -ENODEV; goto exit; } - if (slot->ops->set_attention_status) - retval = slot->ops->set_attention_status(slot, attention); - module_put(slot->ops->owner); + if (ops->set_attention_status) + retval = ops->set_attention_status(slot->hotplug, attention); + module_put(ops->owner); exit: if (retval) @@ -249,18 +217,18 @@ exit: return count; } -static struct hotplug_slot_attribute hotplug_slot_attr_attention = { +static struct pci_slot_attribute hotplug_slot_attr_attention = { .attr = {.name = "attention", .mode = S_IFREG | S_IRUGO | S_IWUSR}, .show = attention_read_file, .store = attention_write_file }; -static ssize_t latch_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t latch_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_latch_status (slot, &value); + retval = get_latch_status(slot->hotplug, &value); if (retval) goto exit; retval = sprintf (buf, "%d\n", value); @@ -269,17 +237,17 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_latch = { +static struct pci_slot_attribute hotplug_slot_attr_latch = { .attr = {.name = "latch", .mode = S_IFREG | S_IRUGO}, .show = latch_read_file, }; -static ssize_t presence_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t presence_read_file(struct pci_slot *slot, char *buf) { int retval; u8 value; - retval = get_adapter_status (slot, &value); + retval = get_adapter_status(slot->hotplug, &value); if (retval) goto exit; retval = sprintf (buf, "%d\n", value); @@ -288,42 +256,20 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_presence = { +static struct pci_slot_attribute hotplug_slot_attr_presence = { .attr = {.name = "adapter", .mode = S_IFREG | S_IRUGO}, .show = presence_read_file, }; -static ssize_t address_read_file (struct hotplug_slot *slot, char *buf) -{ - int retval; - u32 address; - - retval = get_address (slot, &address); - if (retval) - goto exit; - retval = sprintf (buf, "%04x:%02x:%02x\n", - (address >> 16) & 0xffff, - (address >> 8) & 0xff, - address & 0xff); - -exit: - return retval; -} - -static struct hotplug_slot_attribute hotplug_slot_attr_address = { - .attr = {.name = "address", .mode = S_IFREG | S_IRUGO}, - .show = address_read_file, -}; - static char *unknown_speed = "Unknown bus speed"; -static ssize_t max_bus_speed_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t max_bus_speed_read_file(struct pci_slot *slot, char *buf) { char *speed_string; int retval; enum pci_bus_speed value; - retval = get_max_bus_speed (slot, &value); + retval = get_max_bus_speed(slot->hotplug, &value); if (retval) goto exit; @@ -338,18 +284,18 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_max_bus_speed = { +static struct pci_slot_attribute hotplug_slot_attr_max_bus_speed = { .attr = {.name = "max_bus_speed", .mode = S_IFREG | S_IRUGO}, .show = max_bus_speed_read_file, }; -static ssize_t cur_bus_speed_read_file (struct hotplug_slot *slot, char *buf) +static ssize_t cur_bus_speed_read_file(struct pci_slot *slot, char *buf) { char *speed_string; int retval; enum pci_bus_speed value; - retval = get_cur_bus_speed (slot, &value); + retval = get_cur_bus_speed(slot->hotplug, &value); if (retval) goto exit; @@ -364,14 +310,15 @@ exit: return retval; } -static struct hotplug_slot_attribute hotplug_slot_attr_cur_bus_speed = { +static struct pci_slot_attribute hotplug_slot_attr_cur_bus_speed = { .attr = {.name = "cur_bus_speed", .mode = S_IFREG | S_IRUGO}, .show = cur_bus_speed_read_file, }; -static ssize_t test_write_file (struct hotplug_slot *slot, const char *buf, +static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf, size_t count) { + struct hotplug_slot *slot = pci_slot->hotplug; unsigned long ltest; u32 test; int retval = 0; @@ -394,13 +341,14 @@ exit: return count; } -static struct hotplug_slot_attribute hotplug_slot_attr_test = { +static struct pci_slot_attribute hotplug_slot_attr_test = { .attr = {.name = "test", .mode = S_IFREG | S_IRUGO | S_IWUSR}, .store = test_write_file }; -static int has_power_file (struct hotplug_slot *slot) +static int has_power_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if ((slot->ops->enable_slot) || @@ -410,8 +358,9 @@ static int has_power_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_attention_file (struct hotplug_slot *slot) +static int has_attention_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if ((slot->ops->set_attention_status) || @@ -420,8 +369,9 @@ static int has_attention_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_latch_file (struct hotplug_slot *slot) +static int has_latch_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_latch_status) @@ -429,8 +379,9 @@ static int has_latch_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_adapter_file (struct hotplug_slot *slot) +static int has_adapter_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_adapter_status) @@ -438,17 +389,9 @@ static int has_adapter_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_address_file (struct hotplug_slot *slot) -{ - if ((!slot) || (!slot->ops)) - return -ENODEV; - if (slot->ops->get_address) - return 0; - return -ENOENT; -} - -static int has_max_bus_speed_file (struct hotplug_slot *slot) +static int has_max_bus_speed_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_max_bus_speed) @@ -456,8 +399,9 @@ static int has_max_bus_speed_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_cur_bus_speed_file (struct hotplug_slot *slot) +static int has_cur_bus_speed_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->get_cur_bus_speed) @@ -465,8 +409,9 @@ static int has_cur_bus_speed_file (struct hotplug_slot *slot) return -ENOENT; } -static int has_test_file (struct hotplug_slot *slot) +static int has_test_file(struct pci_slot *pci_slot) { + struct hotplug_slot *slot = pci_slot->hotplug; if ((!slot) || (!slot->ops)) return -ENODEV; if (slot->ops->hardware_test) @@ -474,7 +419,7 @@ static int has_test_file (struct hotplug_slot *slot) return -ENOENT; } -static int fs_add_slot (struct hotplug_slot *slot) +static int fs_add_slot(struct pci_slot *slot) { int retval = 0; @@ -505,13 +450,6 @@ static int fs_add_slot (struct hotplug_slot *slot) goto exit_adapter; } - if (has_address_file(slot) == 0) { - retval = sysfs_create_file(&slot->kobj, - &hotplug_slot_attr_address.attr); - if (retval) - goto exit_address; - } - if (has_max_bus_speed_file(slot) == 0) { retval = sysfs_create_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr); @@ -544,10 +482,6 @@ exit_cur_speed: sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr); exit_max_speed: - if (has_address_file(slot) == 0) - sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr); - -exit_address: if (has_adapter_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr); @@ -567,7 +501,7 @@ exit: return retval; } -static void fs_remove_slot (struct hotplug_slot *slot) +static void fs_remove_slot(struct pci_slot *slot) { if (has_power_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_power.attr); @@ -581,9 +515,6 @@ static void fs_remove_slot (struct hotplug_slot *slot) if (has_adapter_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr); - if (has_address_file(slot) == 0) - sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr); - if (has_max_bus_speed_file(slot) == 0) sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr); @@ -599,27 +530,33 @@ static struct hotplug_slot *get_slot_from_name (const char *name) struct hotplug_slot *slot; struct list_head *tmp; + spin_lock(&pci_hotplug_slot_list_lock); list_for_each (tmp, &pci_hotplug_slot_list) { slot = list_entry (tmp, struct hotplug_slot, slot_list); if (strcmp(slot->name, name) == 0) - return slot; + goto out; } - return NULL; + slot = NULL; +out: + spin_unlock(&pci_hotplug_slot_list_lock); + return slot; } /** * pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem + * @bus: bus this slot is on * @slot: pointer to the &struct hotplug_slot to register + * @slot_nr: slot number * * Registers a hotplug slot with the pci hotplug subsystem, which will allow * userspace interaction to the slot. * * Returns 0 if successful, anything else for an error. */ -int pci_hp_register (struct hotplug_slot *slot) +int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr) { int result; - struct hotplug_slot *tmp; + struct pci_slot *pci_slot; if (slot == NULL) return -ENODEV; @@ -632,57 +569,89 @@ int pci_hp_register (struct hotplug_slot *slot) } /* Check if we have already registered a slot with the same name. */ - tmp = get_slot_from_name(slot->name); - if (tmp) + if (get_slot_from_name(slot->name)) return -EEXIST; - slot->kobj.kset = pci_hotplug_slots_kset; - result = kobject_init_and_add(&slot->kobj, &hotplug_slot_ktype, NULL, - "%s", slot->name); - if (result) { - err("Unable to register kobject '%s'", slot->name); - return -EINVAL; + /* + * No problems if we call this interface from both ACPI_PCI_SLOT + * driver and call it here again. If we've already created the + * pci_slot, the interface will simply bump the refcount. + */ + pci_slot = pci_create_slot(bus, slot_nr, slot->name); + if (IS_ERR(pci_slot)) + return PTR_ERR(pci_slot); + + if (pci_slot->hotplug) { + dbg("%s: already claimed\n", __func__); + pci_destroy_slot(pci_slot); + return -EBUSY; } - list_add (&slot->slot_list, &pci_hotplug_slot_list); + slot->pci_slot = pci_slot; + pci_slot->hotplug = slot; + + /* + * Allow pcihp drivers to override the ACPI_PCI_SLOT name. + */ + if (strcmp(kobject_name(&pci_slot->kobj), slot->name)) { + result = kobject_rename(&pci_slot->kobj, slot->name); + if (result) { + pci_destroy_slot(pci_slot); + return result; + } + } + + spin_lock(&pci_hotplug_slot_list_lock); + list_add(&slot->slot_list, &pci_hotplug_slot_list); + spin_unlock(&pci_hotplug_slot_list_lock); + + result = fs_add_slot(pci_slot); + kobject_uevent(&pci_slot->kobj, KOBJ_ADD); + dbg("Added slot %s to the list\n", slot->name); + - result = fs_add_slot (slot); - kobject_uevent(&slot->kobj, KOBJ_ADD); - dbg ("Added slot %s to the list\n", slot->name); return result; } /** * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem - * @slot: pointer to the &struct hotplug_slot to deregister + * @hotplug: pointer to the &struct hotplug_slot to deregister * * The @slot must have been registered with the pci hotplug subsystem * previously with a call to pci_hp_register(). * * Returns 0 if successful, anything else for an error. */ -int pci_hp_deregister (struct hotplug_slot *slot) +int pci_hp_deregister(struct hotplug_slot *hotplug) { struct hotplug_slot *temp; + struct pci_slot *slot; - if (slot == NULL) + if (!hotplug) return -ENODEV; - temp = get_slot_from_name (slot->name); - if (temp != slot) { + temp = get_slot_from_name(hotplug->name); + if (temp != hotplug) return -ENODEV; - } - list_del (&slot->slot_list); - fs_remove_slot (slot); - dbg ("Removed slot %s from the list\n", slot->name); - kobject_put(&slot->kobj); + spin_lock(&pci_hotplug_slot_list_lock); + list_del(&hotplug->slot_list); + spin_unlock(&pci_hotplug_slot_list_lock); + + slot = hotplug->pci_slot; + fs_remove_slot(slot); + dbg("Removed slot %s from the list\n", hotplug->name); + + hotplug->release(hotplug); + slot->hotplug = NULL; + pci_destroy_slot(slot); + return 0; } /** * pci_hp_change_slot_info - changes the slot's information structure in the core - * @slot: pointer to the slot whose info has changed + * @hotplug: pointer to the slot whose info has changed * @info: pointer to the info copy into the slot's info structure * * @slot must have been registered with the pci @@ -690,13 +659,15 @@ int pci_hp_deregister (struct hotplug_slot *slot) * * Returns 0 if successful, anything else for an error. */ -int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot, +int __must_check pci_hp_change_slot_info(struct hotplug_slot *hotplug, struct hotplug_slot_info *info) { - if ((slot == NULL) || (info == NULL)) + struct pci_slot *slot; + if (!hotplug || !info) return -ENODEV; + slot = hotplug->pci_slot; - memcpy (slot->info, info, sizeof (struct hotplug_slot_info)); + memcpy(hotplug->info, info, sizeof(struct hotplug_slot_info)); return 0; } @@ -704,36 +675,22 @@ int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot, static int __init pci_hotplug_init (void) { int result; - struct kset *pci_bus_kset; - pci_bus_kset = bus_get_kset(&pci_bus_type); - - pci_hotplug_slots_kset = kset_create_and_add("slots", NULL, - &pci_bus_kset->kobj); - if (!pci_hotplug_slots_kset) { - result = -ENOMEM; - err("Register subsys error\n"); - goto exit; - } result = cpci_hotplug_init(debug); if (result) { err ("cpci_hotplug_init with error %d\n", result); - goto err_subsys; + goto err_cpci; } info (DRIVER_DESC " version: " DRIVER_VERSION "\n"); - goto exit; -err_subsys: - kset_unregister(pci_hotplug_slots_kset); -exit: +err_cpci: return result; } static void __exit pci_hotplug_exit (void) { cpci_hotplug_exit(); - kset_unregister(pci_hotplug_slots_kset); } module_init(pci_hotplug_init); @@ -745,7 +702,6 @@ MODULE_LICENSE("GPL"); module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); -EXPORT_SYMBOL_GPL(pci_hotplug_slots_kset); EXPORT_SYMBOL_GPL(pci_hp_register); EXPORT_SYMBOL_GPL(pci_hp_deregister); EXPORT_SYMBOL_GPL(pci_hp_change_slot_info); diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 79c9ddaad3fb..e3a1e7e7dba2 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -43,6 +43,7 @@ extern int pciehp_poll_mode; extern int pciehp_poll_time; extern int pciehp_debug; extern int pciehp_force; +extern int pciehp_slot_with_bus; extern struct workqueue_struct *pciehp_wq; #define dbg(format, arg...) \ @@ -96,7 +97,7 @@ struct controller { u32 slot_cap; u8 cap_base; struct timer_list poll_timer; - volatile int cmd_busy; + int cmd_busy; unsigned int no_cmd_complete:1; }; @@ -156,10 +157,10 @@ extern u8 pciehp_handle_power_fault(struct slot *p_slot); extern int pciehp_configure_device(struct slot *p_slot); extern int pciehp_unconfigure_device(struct slot *p_slot); extern void pciehp_queue_pushbutton_work(struct work_struct *work); -int pcie_init(struct controller *ctrl, struct pcie_device *dev); +struct controller *pcie_init(struct pcie_device *dev); int pciehp_enable_slot(struct slot *p_slot); int pciehp_disable_slot(struct slot *p_slot); -int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev); +int pcie_enable_notification(struct controller *ctrl); static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device) { @@ -202,8 +203,13 @@ struct hpc_ops { #include #include -#define pciehp_get_hp_hw_control_from_firmware(dev) \ - pciehp_acpi_get_hp_hw_control_from_firmware(dev) +static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev) +{ + u32 flags = (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + return acpi_get_hp_hw_control_from_firmware(dev, flags); +} + static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev, struct hotplug_params *hpp) { diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 48a2ed378914..3677495c4f91 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -72,7 +72,6 @@ static int get_power_status (struct hotplug_slot *slot, u8 *value); static int get_attention_status (struct hotplug_slot *slot, u8 *value); static int get_latch_status (struct hotplug_slot *slot, u8 *value); static int get_adapter_status (struct hotplug_slot *slot, u8 *value); -static int get_address (struct hotplug_slot *slot, u32 *value); static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); @@ -85,7 +84,6 @@ static struct hotplug_slot_ops pciehp_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_status, - .get_address = get_address, .get_max_bus_speed = get_max_bus_speed, .get_cur_bus_speed = get_cur_bus_speed, }; @@ -185,23 +183,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = { */ static void release_slot(struct hotplug_slot *hotplug_slot) { - struct slot *slot = hotplug_slot->private; - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - kfree(slot->hotplug_slot->info); - kfree(slot->hotplug_slot); - kfree(slot); -} - -static void make_slot_name(struct slot *slot) -{ - if (pciehp_slot_with_bus) - snprintf(slot->hotplug_slot->name, SLOT_NAME_SIZE, "%04d_%04d", - slot->bus, slot->number); - else - snprintf(slot->hotplug_slot->name, SLOT_NAME_SIZE, "%d", - slot->number); + kfree(hotplug_slot->info); + kfree(hotplug_slot); } static int init_slots(struct controller *ctrl) @@ -210,49 +195,34 @@ static int init_slots(struct controller *ctrl) struct hotplug_slot *hotplug_slot; struct hotplug_slot_info *info; int retval = -ENOMEM; - int i; - - for (i = 0; i < ctrl->num_slots; i++) { - slot = kzalloc(sizeof(*slot), GFP_KERNEL); - if (!slot) - goto error; + list_for_each_entry(slot, &ctrl->slot_list, slot_list) { hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL); if (!hotplug_slot) - goto error_slot; - slot->hotplug_slot = hotplug_slot; + goto error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) goto error_hpslot; - hotplug_slot->info = info; - - hotplug_slot->name = slot->name; - - slot->hp_slot = i; - slot->ctrl = ctrl; - slot->bus = ctrl->pci_dev->subordinate->number; - slot->device = ctrl->slot_device_offset + i; - slot->hpc_ops = ctrl->hpc_ops; - slot->number = ctrl->first_slot; - mutex_init(&slot->lock); - INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); /* register this slot with the hotplug pci core */ + hotplug_slot->info = info; + hotplug_slot->name = slot->name; hotplug_slot->private = slot; hotplug_slot->release = &release_slot; - make_slot_name(slot); hotplug_slot->ops = &pciehp_hotplug_slot_ops; - get_power_status(hotplug_slot, &info->power_status); get_attention_status(hotplug_slot, &info->attention_status); get_latch_status(hotplug_slot, &info->latch_status); get_adapter_status(hotplug_slot, &info->adapter_status); + slot->hotplug_slot = hotplug_slot; dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x " "slot_device_offset=%x\n", slot->bus, slot->device, slot->hp_slot, slot->number, ctrl->slot_device_offset); - retval = pci_hp_register(hotplug_slot); + retval = pci_hp_register(hotplug_slot, + ctrl->pci_dev->subordinate, + slot->device); if (retval) { err("pci_hp_register failed with error %d\n", retval); if (retval == -EEXIST) @@ -263,7 +233,7 @@ static int init_slots(struct controller *ctrl) } /* create additional sysfs entries */ if (EMI(ctrl)) { - retval = sysfs_create_file(&hotplug_slot->kobj, + retval = sysfs_create_file(&hotplug_slot->pci_slot->kobj, &hotplug_slot_attr_lock.attr); if (retval) { pci_hp_deregister(hotplug_slot); @@ -271,8 +241,6 @@ static int init_slots(struct controller *ctrl) goto error_info; } } - - list_add(&slot->slot_list, &ctrl->slot_list); } return 0; @@ -280,27 +248,18 @@ error_info: kfree(info); error_hpslot: kfree(hotplug_slot); -error_slot: - kfree(slot); error: return retval; } static void cleanup_slots(struct controller *ctrl) { - struct list_head *tmp; - struct list_head *next; struct slot *slot; - list_for_each_safe(tmp, next, &ctrl->slot_list) { - slot = list_entry(tmp, struct slot, slot_list); - list_del(&slot->slot_list); + list_for_each_entry(slot, &ctrl->slot_list, slot_list) { if (EMI(ctrl)) - sysfs_remove_file(&slot->hotplug_slot->kobj, + sysfs_remove_file(&slot->hotplug_slot->pci_slot->kobj, &hotplug_slot_attr_lock.attr); - cancel_delayed_work(&slot->work); - flush_scheduled_work(); - flush_workqueue(pciehp_wq); pci_hp_deregister(slot->hotplug_slot); } } @@ -398,19 +357,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) return 0; } -static int get_address(struct hotplug_slot *hotplug_slot, u32 *value) -{ - struct slot *slot = hotplug_slot->private; - struct pci_bus *bus = slot->ctrl->pci_dev->subordinate; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - - *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device; - - return 0; -} - -static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value) +static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, + enum pci_bus_speed *value) { struct slot *slot = hotplug_slot->private; int retval; @@ -444,34 +392,30 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_ struct controller *ctrl; struct slot *t_slot; u8 value; - struct pci_dev *pdev; + struct pci_dev *pdev = dev->port; - ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); - if (!ctrl) { - err("%s : out of memory\n", __func__); + if (pciehp_force) + dbg("Bypassing BIOS check for pciehp use on %s\n", + pci_name(pdev)); + else if (pciehp_get_hp_hw_control_from_firmware(pdev)) goto err_out_none; - } - INIT_LIST_HEAD(&ctrl->slot_list); - - pdev = dev->port; - ctrl->pci_dev = pdev; - rc = pcie_init(ctrl, dev); - if (rc) { + ctrl = pcie_init(dev); + if (!ctrl) { dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME); - goto err_out_free_ctrl; + goto err_out_none; } - - pci_set_drvdata(pdev, ctrl); - - dbg("%s: ctrl bus=0x%x, device=%x, function=%x, irq=%x\n", - __func__, pdev->bus->number, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), pdev->irq); + set_service_data(dev, ctrl); /* Setup the slot information structures */ rc = init_slots(ctrl); if (rc) { - err("%s: slot initialization failed\n", PCIE_MODULE_NAME); + if (rc == -EBUSY) + warn("%s: slot already registered by another " + "hotplug driver\n", PCIE_MODULE_NAME); + else + err("%s: slot initialization failed\n", + PCIE_MODULE_NAME); goto err_out_release_ctlr; } @@ -495,20 +439,16 @@ err_out_free_ctrl_slot: cleanup_slots(ctrl); err_out_release_ctlr: ctrl->hpc_ops->release_ctlr(ctrl); -err_out_free_ctrl: - kfree(ctrl); err_out_none: return -ENODEV; } static void pciehp_remove (struct pcie_device *dev) { - struct pci_dev *pdev = dev->port; - struct controller *ctrl = pci_get_drvdata(pdev); + struct controller *ctrl = get_service_data(dev); cleanup_slots(ctrl); ctrl->hpc_ops->release_ctlr(ctrl); - kfree(ctrl); } #ifdef CONFIG_PM @@ -522,13 +462,12 @@ static int pciehp_resume (struct pcie_device *dev) { printk("%s ENTRY\n", __func__); if (pciehp_force) { - struct pci_dev *pdev = dev->port; - struct controller *ctrl = pci_get_drvdata(pdev); + struct controller *ctrl = get_service_data(dev); struct slot *t_slot; u8 status; /* reinitialize the chipset's event detection logic */ - pcie_init_hardware_part2(ctrl, dev); + pcie_enable_notification(ctrl); t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 79f104963166..1323a43285d7 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -247,30 +247,32 @@ static inline void pciehp_free_irq(struct controller *ctrl) free_irq(ctrl->pci_dev->irq, ctrl); } -static inline int pcie_poll_cmd(struct controller *ctrl) +static int pcie_poll_cmd(struct controller *ctrl) { u16 slot_status; int timeout = 1000; - if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) - if (slot_status & CMD_COMPLETED) - goto completed; - for (timeout = 1000; timeout > 0; timeout -= 100) { - msleep(100); - if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) - if (slot_status & CMD_COMPLETED) - goto completed; + if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { + if (slot_status & CMD_COMPLETED) { + pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); + return 1; + } + } + while (timeout > 1000) { + msleep(10); + timeout -= 10; + if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { + if (slot_status & CMD_COMPLETED) { + pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); + return 1; + } + } } return 0; /* timeout */ - -completed: - pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); - return timeout; } -static inline int pcie_wait_cmd(struct controller *ctrl, int poll) +static void pcie_wait_cmd(struct controller *ctrl, int poll) { - int retval = 0; unsigned int msecs = pciehp_poll_mode ? 2500 : 1000; unsigned long timeout = msecs_to_jiffies(msecs); int rc; @@ -278,16 +280,9 @@ static inline int pcie_wait_cmd(struct controller *ctrl, int poll) if (poll) rc = pcie_poll_cmd(ctrl); else - rc = wait_event_interruptible_timeout(ctrl->queue, - !ctrl->cmd_busy, timeout); + rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout); if (!rc) dbg("Command not completed in 1000 msec\n"); - else if (rc < 0) { - retval = -EINTR; - info("Command was interrupted by a signal\n"); - } - - return retval; } /** @@ -342,10 +337,6 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) slot_ctrl &= ~mask; slot_ctrl |= (cmd & mask); - /* Don't enable command completed if caller is changing it. */ - if (!(mask & CMD_CMPL_INTR_ENABLE)) - slot_ctrl |= CMD_CMPL_INTR_ENABLE; - ctrl->cmd_busy = 1; smp_mb(); retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl); @@ -365,7 +356,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) if (!(slot_ctrl & HP_INTR_ENABLE) || !(slot_ctrl & CMD_CMPL_INTR_ENABLE)) poll = 1; - retval = pcie_wait_cmd(ctrl, poll); + pcie_wait_cmd(ctrl, poll); } out: mutex_unlock(&ctrl->ctrl_lock); @@ -614,23 +605,6 @@ static void hpc_set_green_led_blink(struct slot *slot) __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); } -static void hpc_release_ctlr(struct controller *ctrl) -{ - /* Mask Hot-plug Interrupt Enable */ - if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) - err("%s: Cannot mask hotplut interrupt enable\n", __func__); - - /* Free interrupt handler or interrupt polling timer */ - pciehp_free_irq(ctrl); - - /* - * If this is the last controller to be released, destroy the - * pciehp work queue - */ - if (atomic_dec_and_test(&pciehp_num_controllers)) - destroy_workqueue(pciehp_wq); -} - static int hpc_power_on_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; @@ -785,7 +759,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) intr_loc |= detected; if (!intr_loc) return IRQ_NONE; - if (pciehp_writew(ctrl, SLOTSTATUS, detected)) { + if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) { err("%s: Cannot write to SLOTSTATUS\n", __func__); return IRQ_NONE; } @@ -797,25 +771,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) if (intr_loc & CMD_COMPLETED) { ctrl->cmd_busy = 0; smp_mb(); - wake_up_interruptible(&ctrl->queue); + wake_up(&ctrl->queue); } if (!(intr_loc & ~CMD_COMPLETED)) return IRQ_HANDLED; - /* - * Return without handling events if this handler routine is - * called before controller initialization is done. This may - * happen if hotplug event or another interrupt that shares - * the IRQ with pciehp arrives before slot initialization is - * done after interrupt handler is registered. - * - * FIXME - Need more structural fixes. We need to be ready to - * handle the event before installing interrupt handler. - */ p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); - if (!p_slot || !p_slot->hpc_ops) - return IRQ_HANDLED; /* Check MRL Sensor Changed */ if (intr_loc & MRL_SENS_CHANGED) @@ -992,6 +954,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot, return retval; } +static void pcie_release_ctrl(struct controller *ctrl); static struct hpc_ops pciehp_hpc_ops = { .power_on_slot = hpc_power_on_slot, .power_off_slot = hpc_power_off_slot, @@ -1013,97 +976,11 @@ static struct hpc_ops pciehp_hpc_ops = { .green_led_off = hpc_set_green_led_off, .green_led_blink = hpc_set_green_led_blink, - .release_ctlr = hpc_release_ctlr, + .release_ctlr = pcie_release_ctrl, .check_lnk_status = hpc_check_lnk_status, }; -#ifdef CONFIG_ACPI -static int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev) -{ - acpi_status status; - acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev)); - struct pci_dev *pdev = dev; - struct pci_bus *parent; - struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; - - /* - * Per PCI firmware specification, we should run the ACPI _OSC - * method to get control of hotplug hardware before using it. - * If an _OSC is missing, we look for an OSHP to do the same thing. - * To handle different BIOS behavior, we look for _OSC and OSHP - * within the scope of the hotplug controller and its parents, upto - * the host bridge under which this controller exists. - */ - while (!handle) { - /* - * This hotplug controller was not listed in the ACPI name - * space at all. Try to get acpi handle of parent pci bus. - */ - if (!pdev || !pdev->bus->parent) - break; - parent = pdev->bus->parent; - dbg("Could not find %s in acpi namespace, trying parent\n", - pci_name(pdev)); - if (!parent->self) - /* Parent must be a host bridge */ - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(parent), - parent->number); - else - handle = DEVICE_ACPI_HANDLE( - &(parent->self->dev)); - pdev = parent->self; - } - - while (handle) { - acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); - dbg("Trying to get hotplug control for %s \n", - (char *)string.pointer); - status = pci_osc_control_set(handle, - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL | - OSC_PCI_EXPRESS_NATIVE_HP_CONTROL); - if (status == AE_NOT_FOUND) - status = acpi_run_oshp(handle); - if (ACPI_SUCCESS(status)) { - dbg("Gained control for hotplug HW for pci %s (%s)\n", - pci_name(dev), (char *)string.pointer); - kfree(string.pointer); - return 0; - } - if (acpi_root_bridge(handle)) - break; - chandle = handle; - status = acpi_get_parent(chandle, &handle); - if (ACPI_FAILURE(status)) - break; - } - - dbg("Cannot get control of hotplug hardware for pci %s\n", - pci_name(dev)); - - kfree(string.pointer); - return -1; -} -#endif - -static int pcie_init_hardware_part1(struct controller *ctrl, - struct pcie_device *dev) -{ - /* Clear all remaining event bits in Slot Status register */ - if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) { - err("%s: Cannot write to SLOTSTATUS register\n", __func__); - return -1; - } - - /* Mask Hot-plug Interrupt Enable */ - if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) { - err("%s: Cannot mask hotplug interrupt enable\n", __func__); - return -1; - } - return 0; -} - -int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev) +int pcie_enable_notification(struct controller *ctrl) { u16 cmd, mask; @@ -1115,30 +992,83 @@ int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev) if (MRL_SENS(ctrl)) cmd |= MRL_DETECT_ENABLE; if (!pciehp_poll_mode) - cmd |= HP_INTR_ENABLE; + cmd |= HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; - mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | - PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | HP_INTR_ENABLE; + mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | + PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; if (pcie_write_cmd(ctrl, cmd, mask)) { err("%s: Cannot enable software notification\n", __func__); - goto abort; + return -1; } + return 0; +} - if (pciehp_force) - dbg("Bypassing BIOS check for pciehp use on %s\n", - pci_name(ctrl->pci_dev)); - else if (pciehp_get_hp_hw_control_from_firmware(ctrl->pci_dev)) - goto abort_disable_intr; +static void pcie_disable_notification(struct controller *ctrl) +{ + u16 mask; + mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | + PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; + if (pcie_write_cmd(ctrl, 0, mask)) + warn("%s: Cannot disable software notification\n", __func__); +} +static int pcie_init_notification(struct controller *ctrl) +{ + if (pciehp_request_irq(ctrl)) + return -1; + if (pcie_enable_notification(ctrl)) { + pciehp_free_irq(ctrl); + return -1; + } return 0; +} - /* We end up here for the many possible ways to fail this API. */ -abort_disable_intr: - if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE)) - err("%s : disabling interrupts failed\n", __func__); -abort: - return -1; +static void pcie_shutdown_notification(struct controller *ctrl) +{ + pcie_disable_notification(ctrl); + pciehp_free_irq(ctrl); +} + +static void make_slot_name(struct slot *slot) +{ + if (pciehp_slot_with_bus) + snprintf(slot->name, SLOT_NAME_SIZE, "%04d_%04d", + slot->bus, slot->number); + else + snprintf(slot->name, SLOT_NAME_SIZE, "%d", slot->number); +} + +static int pcie_init_slot(struct controller *ctrl) +{ + struct slot *slot; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) + return -ENOMEM; + + slot->hp_slot = 0; + slot->ctrl = ctrl; + slot->bus = ctrl->pci_dev->subordinate->number; + slot->device = ctrl->slot_device_offset + slot->hp_slot; + slot->hpc_ops = ctrl->hpc_ops; + slot->number = ctrl->first_slot; + make_slot_name(slot); + mutex_init(&slot->lock); + INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); + list_add(&slot->slot_list, &ctrl->slot_list); + return 0; +} + +static void pcie_cleanup_slot(struct controller *ctrl) +{ + struct slot *slot; + slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list); + list_del(&slot->slot_list); + cancel_delayed_work(&slot->work); + flush_scheduled_work(); + flush_workqueue(pciehp_wq); + kfree(slot); } static inline void dbg_ctrl(struct controller *ctrl) @@ -1176,15 +1106,23 @@ static inline void dbg_ctrl(struct controller *ctrl) dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); pciehp_readw(ctrl, SLOTSTATUS, ®16); dbg("Slot Status : 0x%04x\n", reg16); - pciehp_readw(ctrl, SLOTSTATUS, ®16); + pciehp_readw(ctrl, SLOTCTRL, ®16); dbg("Slot Control : 0x%04x\n", reg16); } -int pcie_init(struct controller *ctrl, struct pcie_device *dev) +struct controller *pcie_init(struct pcie_device *dev) { + struct controller *ctrl; u32 slot_cap; struct pci_dev *pdev = dev->port; + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) { + err("%s : out of memory\n", __func__); + goto abort; + } + INIT_LIST_HEAD(&ctrl->slot_list); + ctrl->pci_dev = pdev; ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); if (!ctrl->cap_base) { @@ -1215,15 +1153,12 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev) !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl))) ctrl->no_cmd_complete = 1; - info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", - pdev->vendor, pdev->device, - pdev->subsystem_vendor, pdev->subsystem_device); + /* Clear all remaining event bits in Slot Status register */ + if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) + goto abort_ctrl; - if (pcie_init_hardware_part1(ctrl, dev)) - goto abort; - - if (pciehp_request_irq(ctrl)) - goto abort; + /* Disable sotfware notification */ + pcie_disable_notification(ctrl); /* * If this is the first controller to be initialized, @@ -1231,18 +1166,39 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev) */ if (atomic_add_return(1, &pciehp_num_controllers) == 1) { pciehp_wq = create_singlethread_workqueue("pciehpd"); - if (!pciehp_wq) { - goto abort_free_irq; - } + if (!pciehp_wq) + goto abort_ctrl; } - if (pcie_init_hardware_part2(ctrl, dev)) - goto abort_free_irq; + info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", + pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); + + if (pcie_init_slot(ctrl)) + goto abort_ctrl; - return 0; + if (pcie_init_notification(ctrl)) + goto abort_slot; -abort_free_irq: - pciehp_free_irq(ctrl); + return ctrl; + +abort_slot: + pcie_cleanup_slot(ctrl); +abort_ctrl: + kfree(ctrl); abort: - return -1; + return NULL; +} + +void pcie_release_ctrl(struct controller *ctrl) +{ + pcie_shutdown_notification(ctrl); + pcie_cleanup_slot(ctrl); + /* + * If this is the last controller to be released, destroy the + * pciehp work queue + */ + if (atomic_dec_and_test(&pciehp_num_controllers)) + destroy_workqueue(pciehp_wq); + kfree(ctrl); } diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index 779c5db71be4..a796301ea03f 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -14,8 +14,10 @@ */ #include #include +#include #include #include "rpadlpar.h" +#include "../pci.h" #define DLPAR_KOBJ_NAME "control" @@ -27,7 +29,6 @@ #define MAX_DRC_NAME_LEN 64 - static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t nbytes) { @@ -112,7 +113,7 @@ int dlpar_sysfs_init(void) int error; dlpar_kobj = kobject_create_and_add(DLPAR_KOBJ_NAME, - &pci_hotplug_slots_kset->kobj); + &pci_slots_kset->kobj); if (!dlpar_kobj) return -EINVAL; diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index 56197b600d36..9b714ea93d20 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -33,33 +33,6 @@ #include #include "rpaphp.h" -static ssize_t address_read_file (struct hotplug_slot *php_slot, char *buf) -{ - int retval; - struct slot *slot = (struct slot *)php_slot->private; - struct pci_bus *bus; - - if (!slot) - return -ENOENT; - - bus = slot->bus; - if (!bus) - return -ENOENT; - - if (bus->self) - retval = sprintf(buf, pci_name(bus->self)); - else - retval = sprintf(buf, "%04x:%02x:00.0", - pci_domain_nr(bus), bus->number); - - return retval; -} - -static struct hotplug_slot_attribute php_attr_address = { - .attr = {.name = "address", .mode = S_IFREG | S_IRUGO}, - .show = address_read_file, -}; - /* free up the memory used by a slot */ static void rpaphp_release_slot(struct hotplug_slot *hotplug_slot) { @@ -135,9 +108,6 @@ int rpaphp_deregister_slot(struct slot *slot) list_del(&slot->rpaphp_slot_list); - /* remove "address" file */ - sysfs_remove_file(&php_slot->kobj, &php_attr_address.attr); - retval = pci_hp_deregister(php_slot); if (retval) err("Problem unregistering a slot %s\n", slot->name); @@ -151,6 +121,7 @@ int rpaphp_register_slot(struct slot *slot) { struct hotplug_slot *php_slot = slot->hotplug_slot; int retval; + int slotno; dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", __func__, slot->dn->full_name, slot->index, slot->name, @@ -162,19 +133,16 @@ int rpaphp_register_slot(struct slot *slot) return -EAGAIN; } - retval = pci_hp_register(php_slot); + if (slot->dn->child) + slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); + else + slotno = -1; + retval = pci_hp_register(php_slot, slot->bus, slotno); if (retval) { err("pci_hp_register failed with error %d\n", retval); return retval; } - /* create "address" file */ - retval = sysfs_create_file(&php_slot->kobj, &php_attr_address.attr); - if (retval) { - err("sysfs_create_file failed with error %d\n", retval); - goto sysfs_fail; - } - /* add slot to our internal list */ list_add(&slot->rpaphp_slot_list, &rpaphp_slot_head); info("Slot [%s] registered\n", slot->name); diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index 2fe37cd85b69..410fe0394a8e 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -197,13 +197,15 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot, static struct hotplug_slot * sn_hp_destroy(void) { struct slot *slot; + struct pci_slot *pci_slot; struct hotplug_slot *bss_hotplug_slot = NULL; list_for_each_entry(slot, &sn_hp_list, hp_list) { bss_hotplug_slot = slot->hotplug_slot; + pci_slot = bss_hotplug_slot->pci_slot; list_del(&((struct slot *)bss_hotplug_slot->private)-> hp_list); - sysfs_remove_file(&bss_hotplug_slot->kobj, + sysfs_remove_file(&pci_slot->kobj, &sn_slot_path_attr.attr); break; } @@ -614,6 +616,7 @@ static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot) static int sn_hotplug_slot_register(struct pci_bus *pci_bus) { int device; + struct pci_slot *pci_slot; struct hotplug_slot *bss_hotplug_slot; int rc = 0; @@ -650,11 +653,12 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) bss_hotplug_slot->ops = &sn_hotplug_slot_ops; bss_hotplug_slot->release = &sn_release_slot; - rc = pci_hp_register(bss_hotplug_slot); + rc = pci_hp_register(bss_hotplug_slot, pci_bus, device); if (rc) goto register_err; - rc = sysfs_create_file(&bss_hotplug_slot->kobj, + pci_slot = bss_hotplug_slot->pci_slot; + rc = sysfs_create_file(&pci_slot->kobj, &sn_slot_path_attr.attr); if (rc) goto register_err; @@ -664,7 +668,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) register_err: dev_dbg(&pci_bus->self->dev, "bus failed to register with err = %d\n", - rc); + rc); alloc_err: if (rc == -ENOMEM) diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index f66e8d6315ab..8a026f750deb 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -170,6 +170,7 @@ extern void shpchp_queue_pushbutton_work(struct work_struct *work); extern int shpc_init( struct controller *ctrl, struct pci_dev *pdev); #ifdef CONFIG_ACPI +#include static inline int get_hp_params_from_firmware(struct pci_dev *dev, struct hotplug_params *hpp) { @@ -177,14 +178,15 @@ static inline int get_hp_params_from_firmware(struct pci_dev *dev, return -ENODEV; return 0; } -#define get_hp_hw_control_from_firmware(pdev) \ - do { \ - if (DEVICE_ACPI_HANDLE(&(pdev->dev))) \ - acpi_run_oshp(DEVICE_ACPI_HANDLE(&(pdev->dev)));\ - } while (0) + +static inline int get_hp_hw_control_from_firmware(struct pci_dev *dev) +{ + u32 flags = OSC_SHPC_NATIVE_HP_CONTROL; + return acpi_get_hp_hw_control_from_firmware(dev, flags); +} #else #define get_hp_params_from_firmware(dev, hpp) (-ENODEV) -#define get_hp_hw_control_from_firmware(dev) do { } while (0) +#define get_hp_hw_control_from_firmware(dev) (0) #endif struct ctrl_reg { diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index 97848654652a..a8cbd039b85b 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -39,7 +39,7 @@ int shpchp_debug; int shpchp_poll_mode; int shpchp_poll_time; -int shpchp_slot_with_bus; +static int shpchp_slot_with_bus; struct workqueue_struct *shpchp_wq; #define DRIVER_VERSION "0.4" @@ -68,7 +68,6 @@ static int get_power_status (struct hotplug_slot *slot, u8 *value); static int get_attention_status (struct hotplug_slot *slot, u8 *value); static int get_latch_status (struct hotplug_slot *slot, u8 *value); static int get_adapter_status (struct hotplug_slot *slot, u8 *value); -static int get_address (struct hotplug_slot *slot, u32 *value); static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value); @@ -81,7 +80,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_status, - .get_address = get_address, .get_max_bus_speed = get_max_bus_speed, .get_cur_bus_speed = get_cur_bus_speed, }; @@ -159,7 +157,8 @@ static int init_slots(struct controller *ctrl) dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x " "slot_device_offset=%x\n", slot->bus, slot->device, slot->hp_slot, slot->number, ctrl->slot_device_offset); - retval = pci_hp_register(slot->hotplug_slot); + retval = pci_hp_register(slot->hotplug_slot, + ctrl->pci_dev->subordinate, slot->device); if (retval) { err("pci_hp_register failed with error %d\n", retval); if (retval == -EEXIST) @@ -288,19 +287,8 @@ static int get_adapter_status (struct hotplug_slot *hotplug_slot, u8 *value) return 0; } -static int get_address (struct hotplug_slot *hotplug_slot, u32 *value) -{ - struct slot *slot = get_slot(hotplug_slot); - struct pci_bus *bus = slot->ctrl->pci_dev->subordinate; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); - - *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device; - - return 0; -} - -static int get_max_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value) +static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, + enum pci_bus_speed *value) { struct slot *slot = get_slot(hotplug_slot); int retval; @@ -330,13 +318,14 @@ static int get_cur_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_sp static int is_shpc_capable(struct pci_dev *dev) { - if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device == - PCI_DEVICE_ID_AMD_GOLAM_7450)) - return 1; - if (pci_find_capability(dev, PCI_CAP_ID_SHPC)) - return 1; - - return 0; + if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device == + PCI_DEVICE_ID_AMD_GOLAM_7450)) + return 1; + if (!pci_find_capability(dev, PCI_CAP_ID_SHPC)) + return 0; + if (get_hp_hw_control_from_firmware(dev)) + return 0; + return 1; } static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index 7d770b2cd889..7a0bff364cd4 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -1084,7 +1084,6 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev) dbg("%s: HPC at b:d:f:irq=0x%x:%x:%x:%x\n", __func__, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev->irq); - get_hp_hw_control_from_firmware(pdev); /* * If this is the first controller to be initialized, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bb0642318a95..3f7b81c065d2 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1748,7 +1748,6 @@ int __init init_dmars(void) deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 8c61304cbb37..15af618d36e2 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -70,12 +70,10 @@ arch_teardown_msi_irqs(struct pci_dev *dev) } } -static void msi_set_enable(struct pci_dev *dev, int enable) +static void __msi_set_enable(struct pci_dev *dev, int pos, int enable) { - int pos; u16 control; - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (pos) { pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); control &= ~PCI_MSI_FLAGS_ENABLE; @@ -85,6 +83,11 @@ static void msi_set_enable(struct pci_dev *dev, int enable) } } +static void msi_set_enable(struct pci_dev *dev, int enable) +{ + __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable); +} + static void msix_set_enable(struct pci_dev *dev, int enable) { int pos; @@ -141,7 +144,8 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) mask_bits |= flag & mask; pci_write_config_dword(entry->dev, pos, mask_bits); } else { - msi_set_enable(entry->dev, !flag); + __msi_set_enable(entry->dev, entry->msi_attrib.pos, + !flag); } break; case PCI_CAP_ID_MSIX: @@ -561,9 +565,8 @@ int pci_enable_msi(struct pci_dev* dev) /* Check whether driver already requested for MSI-X irqs */ if (dev->msix_enabled) { - printk(KERN_INFO "PCI: %s: Can't enable MSI. " - "Device already has MSI-X enabled\n", - pci_name(dev)); + dev_info(&dev->dev, "can't enable MSI " + "(MSI-X already enabled)\n"); return -EINVAL; } status = msi_capability_init(dev); @@ -686,9 +689,8 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) /* Check whether driver already requested for MSI irq */ if (dev->msi_enabled) { - printk(KERN_INFO "PCI: %s: Can't enable MSI-X. " - "Device already has an MSI irq assigned\n", - pci_name(dev)); + dev_info(&dev->dev, "can't enable MSI-X " + "(MSI IRQ already assigned)\n"); return -EINVAL; } status = msix_capability_init(dev, entries, nvec); diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 9d6fc8e6285d..7764768b6a0e 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -21,12 +21,19 @@ struct acpi_osc_data { acpi_handle handle; - u32 ctrlset_buf[3]; - u32 global_ctrlsets; + u32 support_set; + u32 control_set; + int is_queried; + u32 query_result; struct list_head sibiling; }; static LIST_HEAD(acpi_osc_data_list); +struct acpi_osc_args { + u32 capbuf[3]; + u32 query_result; +}; + static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) { struct acpi_osc_data *data; @@ -44,42 +51,18 @@ static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) return data; } -static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; +static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, + 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; -static acpi_status -acpi_query_osc ( - acpi_handle handle, - u32 level, - void *context, - void **retval ) +static acpi_status acpi_run_osc(acpi_handle handle, + struct acpi_osc_args *osc_args) { - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 osc_dw0; - acpi_status *ret_status = (acpi_status *)retval; - struct acpi_osc_data *osc_data; - u32 flags = (unsigned long)context, temp; - acpi_handle tmp; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; - - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - return AE_ERROR; - } - - osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] |= (flags & OSC_SUPPORT_MASKS); - - /* do _OSC query for all possible controls */ - temp = osc_data->ctrlset_buf[OSC_CONTROL_TYPE]; - osc_data->ctrlset_buf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + acpi_status status; + struct acpi_object_list input; + union acpi_object in_params[4]; + struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *out_obj; + u32 osc_dw0, flags = osc_args->capbuf[OSC_QUERY_TYPE]; /* Setting up input parameters */ input.count = 4; @@ -93,20 +76,19 @@ acpi_query_osc ( in_params[2].integer.value = 3; in_params[3].type = ACPI_TYPE_BUFFER; in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)osc_data->ctrlset_buf; + in_params[3].buffer.pointer = (u8 *)osc_args->capbuf; status = acpi_evaluate_object(handle, "_OSC", &input, &output); if (ACPI_FAILURE(status)) - goto out_nofree; - out_obj = output.pointer; + return status; + out_obj = output.pointer; if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG - "Evaluate _OSC returns wrong type\n"); + printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n"); status = AE_TYPE; - goto query_osc_out; + goto out_kfree; } - osc_dw0 = *((u32 *) out_obj->buffer.pointer); + osc_dw0 = *((u32 *)out_obj->buffer.pointer); if (osc_dw0) { if (osc_dw0 & OSC_REQUEST_ERROR) printk(KERN_DEBUG "_OSC request fails\n"); @@ -115,93 +97,58 @@ acpi_query_osc ( if (osc_dw0 & OSC_INVALID_REVISION_ERROR) printk(KERN_DEBUG "_OSC invalid revision\n"); if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) { - /* Update Global Control Set */ - osc_data->global_ctrlsets = - *((u32 *)(out_obj->buffer.pointer + 8)); - status = AE_OK; - goto query_osc_out; + if (flags & OSC_QUERY_ENABLE) + goto out_success; + printk(KERN_DEBUG "_OSC FW not grant req. control\n"); + status = AE_SUPPORT; + goto out_kfree; } status = AE_ERROR; - goto query_osc_out; + goto out_kfree; } - - /* Update Global Control Set */ - osc_data->global_ctrlsets = *((u32 *)(out_obj->buffer.pointer + 8)); +out_success: + if (flags & OSC_QUERY_ENABLE) + osc_args->query_result = + *((u32 *)(out_obj->buffer.pointer + 8)); status = AE_OK; -query_osc_out: +out_kfree: kfree(output.pointer); -out_nofree: - *ret_status = status; - - osc_data->ctrlset_buf[OSC_QUERY_TYPE] = !OSC_QUERY_ENABLE; - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = temp; - if (ACPI_FAILURE(status)) { - /* no osc support at all */ - osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] = 0; - } - return status; } - -static acpi_status -acpi_run_osc ( - acpi_handle handle, - void *context) +static acpi_status acpi_query_osc(acpi_handle handle, + u32 level, void *context, void **retval) { - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 osc_dw0; - - /* Setting up input parameters */ - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 3; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)context; + acpi_status status; + struct acpi_osc_data *osc_data; + u32 flags = (unsigned long)context, support_set; + acpi_handle tmp; + struct acpi_osc_args osc_args; - status = acpi_evaluate_object(handle, "_OSC", &input, &output); - if (ACPI_FAILURE (status)) + status = acpi_get_handle(handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) return status; - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG - "Evaluate _OSC returns wrong type\n"); - status = AE_TYPE; - goto run_osc_out; + osc_data = acpi_get_osc_data(handle); + if (!osc_data) { + printk(KERN_ERR "acpi osc data array is full\n"); + return AE_ERROR; } - osc_dw0 = *((u32 *) out_obj->buffer.pointer); - if (osc_dw0) { - if (osc_dw0 & OSC_REQUEST_ERROR) - printk(KERN_DEBUG "_OSC request fails\n"); - if (osc_dw0 & OSC_INVALID_UUID_ERROR) - printk(KERN_DEBUG "_OSC invalid UUID\n"); - if (osc_dw0 & OSC_INVALID_REVISION_ERROR) - printk(KERN_DEBUG "_OSC invalid revision\n"); - if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) { - printk(KERN_DEBUG "_OSC FW not grant req. control\n"); - status = AE_SUPPORT; - goto run_osc_out; - } - status = AE_ERROR; - goto run_osc_out; + + /* do _OSC query for all possible controls */ + support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS); + osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; + osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + + status = acpi_run_osc(handle, &osc_args); + if (ACPI_SUCCESS(status)) { + osc_data->support_set = support_set; + osc_data->query_result = osc_args.query_result; + osc_data->is_queried = 1; } - status = AE_OK; -run_osc_out: - kfree(output.pointer); return status; } @@ -215,15 +162,11 @@ run_osc_out: **/ acpi_status __pci_osc_support_set(u32 flags, const char *hid) { - acpi_status retval = AE_NOT_FOUND; - - if (!(flags & OSC_SUPPORT_MASKS)) { + if (!(flags & OSC_SUPPORT_MASKS)) return AE_TYPE; - } - acpi_get_devices(hid, - acpi_query_osc, - (void *)(unsigned long)flags, - (void **) &retval ); + + acpi_get_devices(hid, acpi_query_osc, + (void *)(unsigned long)flags, NULL); return AE_OK; } @@ -236,10 +179,11 @@ acpi_status __pci_osc_support_set(u32 flags, const char *hid) **/ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) { - acpi_status status; - u32 ctrlset; + acpi_status status; + u32 ctrlset, control_set; acpi_handle tmp; struct acpi_osc_data *osc_data; + struct acpi_osc_args osc_args; status = acpi_get_handle(handle, "_OSC", &tmp); if (ACPI_FAILURE(status)) @@ -252,24 +196,25 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) } ctrlset = (flags & OSC_CONTROL_MASKS); - if (!ctrlset) { + if (!ctrlset) return AE_TYPE; - } - if (osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] && - ((osc_data->global_ctrlsets & ctrlset) != ctrlset)) { + + if (osc_data->is_queried && + ((osc_data->query_result & ctrlset) != ctrlset)) return AE_SUPPORT; - } - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] |= ctrlset; - status = acpi_run_osc(handle, osc_data->ctrlset_buf); - if (ACPI_FAILURE (status)) { - osc_data->ctrlset_buf[OSC_CONTROL_TYPE] &= ~ctrlset; - } - + + control_set = osc_data->control_set | ctrlset; + osc_args.capbuf[OSC_QUERY_TYPE] = 0; + osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; + osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; + status = acpi_run_osc(handle, &osc_args); + if (ACPI_SUCCESS(status)) + osc_data->control_set = control_set; + return status; } EXPORT_SYMBOL(pci_osc_control_set); -#ifdef CONFIG_ACPI_SLEEP /* * _SxD returns the D-state with the highest power * (lowest D-state number) supported in the S-state "x". @@ -293,13 +238,11 @@ EXPORT_SYMBOL(pci_osc_control_set); * choose highest power _SxD or any lower power */ -static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, - pm_message_t state) +static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) { int acpi_state; - acpi_state = acpi_pm_device_sleep_state(&pdev->dev, - device_may_wakeup(&pdev->dev), NULL); + acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL); if (acpi_state < 0) return PCI_POWER_ERROR; @@ -315,7 +258,13 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, } return PCI_POWER_ERROR; } -#endif + +static bool acpi_pci_power_manageable(struct pci_dev *dev) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); + + return handle ? acpi_bus_power_manageable(handle) : false; +} static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) { @@ -328,12 +277,11 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) [PCI_D3hot] = ACPI_STATE_D3, [PCI_D3cold] = ACPI_STATE_D3 }; + int error = -EINVAL; - if (!handle) - return -ENODEV; /* If the ACPI device has _EJ0, ignore the device */ - if (ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp))) - return 0; + if (!handle || ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp))) + return -ENODEV; switch (state) { case PCI_D0: @@ -341,11 +289,41 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) case PCI_D2: case PCI_D3hot: case PCI_D3cold: - return acpi_bus_set_power(handle, state_conv[state]); + error = acpi_bus_set_power(handle, state_conv[state]); } - return -EINVAL; + + if (!error) + dev_printk(KERN_INFO, &dev->dev, + "power state changed by ACPI to D%d\n", state); + + return error; +} + +static bool acpi_pci_can_wakeup(struct pci_dev *dev) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); + + return handle ? acpi_bus_can_wakeup(handle) : false; +} + +static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable) +{ + int error = acpi_pm_device_sleep_wake(&dev->dev, enable); + + if (!error) + dev_printk(KERN_INFO, &dev->dev, + "wake-up capability %s by ACPI\n", + enable ? "enabled" : "disabled"); + return error; } +static struct pci_platform_pm_ops acpi_pci_platform_pm = { + .is_manageable = acpi_pci_power_manageable, + .set_state = acpi_pci_set_power_state, + .choose_state = acpi_pci_choose_state, + .can_wakeup = acpi_pci_can_wakeup, + .sleep_wake = acpi_pci_sleep_wake, +}; /* ACPI bus type */ static int acpi_pci_find_device(struct device *dev, acpi_handle *handle) @@ -397,10 +375,7 @@ static int __init acpi_pci_init(void) ret = register_acpi_bus_type(&acpi_pci_bus); if (ret) return 0; -#ifdef CONFIG_ACPI_SLEEP - platform_pci_choose_state = acpi_pci_choose_state; -#endif - platform_pci_set_power_state = acpi_pci_set_power_state; + pci_set_platform_pm(&acpi_pci_platform_pm); return 0; } arch_initcall(acpi_pci_init); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index e1637bd82b8e..a13f53486114 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -274,7 +274,57 @@ static int pci_device_remove(struct device * dev) return 0; } -static int pci_device_suspend(struct device * dev, pm_message_t state) +static void pci_device_shutdown(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + + if (drv && drv->shutdown) + drv->shutdown(pci_dev); + pci_msi_shutdown(pci_dev); + pci_msix_shutdown(pci_dev); +} + +#ifdef CONFIG_PM_SLEEP + +/* + * Default "suspend" method for devices that have no driver provided suspend, + * or not even a driver at all. + */ +static void pci_default_pm_suspend(struct pci_dev *pci_dev) +{ + pci_save_state(pci_dev); + /* + * mark its power state as "unknown", since we don't know if + * e.g. the BIOS will change its device state when we suspend. + */ + if (pci_dev->current_state == PCI_D0) + pci_dev->current_state = PCI_UNKNOWN; +} + +/* + * Default "resume" method for devices that have no driver provided resume, + * or not even a driver at all. + */ +static int pci_default_pm_resume(struct pci_dev *pci_dev) +{ + int retval = 0; + + /* restore the PCI config space */ + pci_restore_state(pci_dev); + /* if the device was enabled before suspend, reenable */ + retval = pci_reenable_device(pci_dev); + /* + * if the device was busmaster before the suspend, make it busmaster + * again + */ + if (pci_dev->is_busmaster) + pci_set_master(pci_dev); + + return retval; +} + +static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; @@ -284,18 +334,12 @@ static int pci_device_suspend(struct device * dev, pm_message_t state) i = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, i); } else { - pci_save_state(pci_dev); - /* - * mark its power state as "unknown", since we don't know if - * e.g. the BIOS will change its device state when we suspend. - */ - if (pci_dev->current_state == PCI_D0) - pci_dev->current_state = PCI_UNKNOWN; + pci_default_pm_suspend(pci_dev); } return i; } -static int pci_device_suspend_late(struct device * dev, pm_message_t state) +static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) { struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; @@ -308,26 +352,7 @@ static int pci_device_suspend_late(struct device * dev, pm_message_t state) return i; } -/* - * Default resume method for devices that have no driver provided resume, - * or not even a driver at all. - */ -static int pci_default_resume(struct pci_dev *pci_dev) -{ - int retval = 0; - - /* restore the PCI config space */ - pci_restore_state(pci_dev); - /* if the device was enabled before suspend, reenable */ - retval = pci_reenable_device(pci_dev); - /* if the device was busmaster before the suspend, make it busmaster again */ - if (pci_dev->is_busmaster) - pci_set_master(pci_dev); - - return retval; -} - -static int pci_device_resume(struct device * dev) +static int pci_legacy_resume(struct device *dev) { int error; struct pci_dev * pci_dev = to_pci_dev(dev); @@ -336,34 +361,313 @@ static int pci_device_resume(struct device * dev) if (drv && drv->resume) error = drv->resume(pci_dev); else - error = pci_default_resume(pci_dev); + error = pci_default_pm_resume(pci_dev); return error; } -static int pci_device_resume_early(struct device * dev) +static int pci_legacy_resume_early(struct device *dev) { int error = 0; struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - pci_fixup_device(pci_fixup_resume, pci_dev); - if (drv && drv->resume_early) error = drv->resume_early(pci_dev); return error; } -static void pci_device_shutdown(struct device *dev) +static int pci_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm && drv->pm->prepare) + error = drv->pm->prepare(dev); + + return error; +} + +static void pci_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int pci_pm_suspend(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->suspend) { + error = drv->pm->suspend(dev); + suspend_report_result(drv->pm->suspend, error); + } else { + pci_default_pm_suspend(pci_dev); + } + } else { + error = pci_legacy_suspend(dev, PMSG_SUSPEND); + } + pci_fixup_device(pci_fixup_suspend, pci_dev); + + return error; +} + +static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; + int error = 0; - if (drv && drv->shutdown) - drv->shutdown(pci_dev); - pci_msi_shutdown(pci_dev); - pci_msix_shutdown(pci_dev); + if (drv && drv->pm) { + if (drv->pm->suspend_noirq) { + error = drv->pm->suspend_noirq(dev); + suspend_report_result(drv->pm->suspend_noirq, error); + } + } else { + error = pci_legacy_suspend_late(dev, PMSG_SUSPEND); + } + + return error; } +static int pci_pm_resume(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error; + + pci_fixup_device(pci_fixup_resume, pci_dev); + + if (drv && drv->pm) { + error = drv->pm->resume ? drv->pm->resume(dev) : + pci_default_pm_resume(pci_dev); + } else { + error = pci_legacy_resume(dev); + } + + return error; +} + +static int pci_pm_resume_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + pci_fixup_device(pci_fixup_resume_early, pci_dev); + + if (drv && drv->pm) { + if (drv->pm->resume_noirq) + error = drv->pm->resume_noirq(dev); + } else { + error = pci_legacy_resume_early(dev); + } + + return error; +} + +#else /* !CONFIG_SUSPEND */ + +#define pci_pm_suspend NULL +#define pci_pm_suspend_noirq NULL +#define pci_pm_resume NULL +#define pci_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATION + +static int pci_pm_freeze(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->freeze) { + error = drv->pm->freeze(dev); + suspend_report_result(drv->pm->freeze, error); + } else { + pci_default_pm_suspend(pci_dev); + } + } else { + error = pci_legacy_suspend(dev, PMSG_FREEZE); + pci_fixup_device(pci_fixup_suspend, pci_dev); + } + + return error; +} + +static int pci_pm_freeze_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->freeze_noirq) { + error = drv->pm->freeze_noirq(dev); + suspend_report_result(drv->pm->freeze_noirq, error); + } + } else { + error = pci_legacy_suspend_late(dev, PMSG_FREEZE); + } + + return error; +} + +static int pci_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->thaw) + error = drv->pm->thaw(dev); + } else { + pci_fixup_device(pci_fixup_resume, to_pci_dev(dev)); + error = pci_legacy_resume(dev); + } + + return error; +} + +static int pci_pm_thaw_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->thaw_noirq) + error = drv->pm->thaw_noirq(dev); + } else { + pci_fixup_device(pci_fixup_resume_early, pci_dev); + error = pci_legacy_resume_early(dev); + } + + return error; +} + +static int pci_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int error = 0; + + pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev)); + + if (drv && drv->pm) { + if (drv->pm->poweroff) { + error = drv->pm->poweroff(dev); + suspend_report_result(drv->pm->poweroff, error); + } + } else { + error = pci_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return error; +} + +static int pci_pm_poweroff_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + if (drv && drv->pm) { + if (drv->pm->poweroff_noirq) { + error = drv->pm->poweroff_noirq(dev); + suspend_report_result(drv->pm->poweroff_noirq, error); + } + } else { + error = pci_legacy_suspend_late(dev, PMSG_HIBERNATE); + } + + return error; +} + +static int pci_pm_restore(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct device_driver *drv = dev->driver; + int error; + + if (drv && drv->pm) { + error = drv->pm->restore ? drv->pm->restore(dev) : + pci_default_pm_resume(pci_dev); + } else { + error = pci_legacy_resume(dev); + } + pci_fixup_device(pci_fixup_resume, pci_dev); + + return error; +} + +static int pci_pm_restore_noirq(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = pci_dev->driver; + int error = 0; + + pci_fixup_device(pci_fixup_resume, pci_dev); + + if (drv && drv->pm) { + if (drv->pm->restore_noirq) + error = drv->pm->restore_noirq(dev); + } else { + error = pci_legacy_resume_early(dev); + } + pci_fixup_device(pci_fixup_resume_early, pci_dev); + + return error; +} + +#else /* !CONFIG_HIBERNATION */ + +#define pci_pm_freeze NULL +#define pci_pm_freeze_noirq NULL +#define pci_pm_thaw NULL +#define pci_pm_thaw_noirq NULL +#define pci_pm_poweroff NULL +#define pci_pm_poweroff_noirq NULL +#define pci_pm_restore NULL +#define pci_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATION */ + +struct pm_ext_ops pci_pm_ops = { + .base = { + .prepare = pci_pm_prepare, + .complete = pci_pm_complete, + .suspend = pci_pm_suspend, + .resume = pci_pm_resume, + .freeze = pci_pm_freeze, + .thaw = pci_pm_thaw, + .poweroff = pci_pm_poweroff, + .restore = pci_pm_restore, + }, + .suspend_noirq = pci_pm_suspend_noirq, + .resume_noirq = pci_pm_resume_noirq, + .freeze_noirq = pci_pm_freeze_noirq, + .thaw_noirq = pci_pm_thaw_noirq, + .poweroff_noirq = pci_pm_poweroff_noirq, + .restore_noirq = pci_pm_restore_noirq, +}; + +#define PCI_PM_OPS_PTR &pci_pm_ops + +#else /* !CONFIG_PM_SLEEP */ + +#define PCI_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + /** * __pci_register_driver - register a new pci driver * @drv: the driver structure to register @@ -386,6 +690,9 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, drv->driver.owner = owner; drv->driver.mod_name = mod_name; + if (drv->pm) + drv->driver.pm = &drv->pm->base; + spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); @@ -511,12 +818,9 @@ struct bus_type pci_bus_type = { .uevent = pci_uevent, .probe = pci_device_probe, .remove = pci_device_remove, - .suspend = pci_device_suspend, - .suspend_late = pci_device_suspend_late, - .resume_early = pci_device_resume_early, - .resume = pci_device_resume, .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, + .pm = PCI_PM_OPS_PTR, }; static int __init pci_driver_init(void) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e4548ab2a93c..44a46c92b721 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1,6 +1,4 @@ /* - * $Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $ - * * PCI Bus Services, see include/linux/pci.h for further explanation. * * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter, @@ -19,6 +17,7 @@ #include #include #include +#include #include /* isa_dma_bridge_buggy */ #include "pci.h" @@ -378,74 +377,90 @@ pci_restore_bars(struct pci_dev *dev) pci_update_resource(dev, &dev->resource[i], i); } -int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t t); +static struct pci_platform_pm_ops *pci_platform_pm; -/** - * pci_set_power_state - Set the power state of a PCI device - * @dev: PCI device to be suspended - * @state: PCI power state (D0, D1, D2, D3hot, D3cold) we're entering - * - * Transition a device to a new power state, using the Power Management - * Capabilities in the device's config space. - * - * RETURN VALUE: - * -EINVAL if trying to enter a lower state than we're already in. - * 0 if we're already in the requested state. - * -EIO if device does not support PCI PM. - * 0 if we can successfully change the power state. - */ -int -pci_set_power_state(struct pci_dev *dev, pci_power_t state) +int pci_set_platform_pm(struct pci_platform_pm_ops *ops) { - int pm, need_restore = 0; - u16 pmcsr, pmc; + if (!ops->is_manageable || !ops->set_state || !ops->choose_state + || !ops->sleep_wake || !ops->can_wakeup) + return -EINVAL; + pci_platform_pm = ops; + return 0; +} - /* bound the state we're entering */ - if (state > PCI_D3hot) - state = PCI_D3hot; +static inline bool platform_pci_power_manageable(struct pci_dev *dev) +{ + return pci_platform_pm ? pci_platform_pm->is_manageable(dev) : false; +} - /* - * If the device or the parent bridge can't support PCI PM, ignore - * the request if we're doing anything besides putting it into D0 - * (which would only happen on boot). - */ - if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev)) - return 0; +static inline int platform_pci_set_power_state(struct pci_dev *dev, + pci_power_t t) +{ + return pci_platform_pm ? pci_platform_pm->set_state(dev, t) : -ENOSYS; +} - /* find PCI PM capability in list */ - pm = pci_find_capability(dev, PCI_CAP_ID_PM); +static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) +{ + return pci_platform_pm ? + pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR; +} - /* abort if the device doesn't support PM capabilities */ - if (!pm) +static inline bool platform_pci_can_wakeup(struct pci_dev *dev) +{ + return pci_platform_pm ? pci_platform_pm->can_wakeup(dev) : false; +} + +static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) +{ + return pci_platform_pm ? + pci_platform_pm->sleep_wake(dev, enable) : -ENODEV; +} + +/** + * pci_raw_set_power_state - Use PCI PM registers to set the power state of + * given PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if device already is in the requested state. + * 0 if device's power state has been successfully changed. + */ +static int +pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + u16 pmcsr; + bool need_restore = false; + + if (!dev->pm_cap) return -EIO; + if (state < PCI_D0 || state > PCI_D3hot) + return -EINVAL; + /* Validate current state: * Can enter D0 from any state, but if we can only go deeper * to sleep if we're already in a low power state */ - if (state != PCI_D0 && dev->current_state > state) { - printk(KERN_ERR "%s(): %s: state=%d, current state=%d\n", - __func__, pci_name(dev), state, dev->current_state); + if (dev->current_state == state) { + /* we're already there */ + return 0; + } else if (state != PCI_D0 && dev->current_state <= PCI_D3cold + && dev->current_state > state) { + dev_err(&dev->dev, "invalid power transition " + "(from state %d to %d)\n", dev->current_state, state); return -EINVAL; - } else if (dev->current_state == state) - return 0; /* we're already there */ - - - pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc); - if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { - printk(KERN_DEBUG - "PCI: %s has unsupported PM cap regs version (%u)\n", - pci_name(dev), pmc & PCI_PM_CAP_VER_MASK); - return -EIO; } /* check if this device supports the desired state */ - if (state == PCI_D1 && !(pmc & PCI_PM_CAP_D1)) - return -EIO; - else if (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2)) + if ((state == PCI_D1 && !dev->d1_support) + || (state == PCI_D2 && !dev->d2_support)) return -EIO; - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); /* If we're (effectively) in D3, force entire word to 0. * This doesn't affect PME_Status, disables PME_En, and @@ -461,7 +476,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) case PCI_UNKNOWN: /* Boot-up */ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) - need_restore = 1; + need_restore = true; /* Fall-through: force to D0 */ default: pmcsr = 0; @@ -469,7 +484,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) } /* enter specified state */ - pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr); + pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ @@ -478,13 +493,6 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) else if (state == PCI_D2 || dev->current_state == PCI_D2) udelay(200); - /* - * Give firmware a chance to be called, such as ACPI _PRx, _PSx - * Firmware method after native method ? - */ - if (platform_pci_set_power_state) - platform_pci_set_power_state(dev, state); - dev->current_state = state; /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT @@ -508,8 +516,77 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } -pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); - +/** + * pci_update_current_state - Read PCI power state of given device from its + * PCI PM registers and cache it + * @dev: PCI device to handle. + */ +static void pci_update_current_state(struct pci_dev *dev) +{ + if (dev->pm_cap) { + u16 pmcsr; + + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } +} + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * Transition a device to a new power state, using the platform formware and/or + * the device's PCI PM registers. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if device already is in the requested state. + * 0 if device's power state has been successfully changed. + */ +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + int error; + + /* bound the state we're entering */ + if (state > PCI_D3hot) + state = PCI_D3hot; + else if (state < PCI_D0) + state = PCI_D0; + else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev)) + /* + * If the device or the parent bridge do not support PCI PM, + * ignore the request if we're doing anything other than putting + * it into D0 (which would only happen on boot). + */ + return 0; + + if (state == PCI_D0 && platform_pci_power_manageable(dev)) { + /* + * Allow the platform to change the state, for example via ACPI + * _PR0, _PS0 and some such, but do not trust it. + */ + int ret = platform_pci_set_power_state(dev, PCI_D0); + if (!ret) + pci_update_current_state(dev); + } + + error = pci_raw_set_power_state(dev, state); + + if (state > PCI_D0 && platform_pci_power_manageable(dev)) { + /* Allow the platform to finalize the transition */ + int ret = platform_pci_set_power_state(dev, state); + if (!ret) { + pci_update_current_state(dev); + error = 0; + } + } + + return error; +} + /** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended @@ -527,11 +604,9 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) if (!pci_find_capability(dev, PCI_CAP_ID_PM)) return PCI_D0; - if (platform_pci_choose_state) { - ret = platform_pci_choose_state(dev, state); - if (ret != PCI_POWER_ERROR) - return ret; - } + ret = platform_pci_choose_state(dev); + if (ret != PCI_POWER_ERROR) + return ret; switch (state.event) { case PM_EVENT_ON: @@ -543,7 +618,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) case PM_EVENT_HIBERNATE: return PCI_D3hot; default: - printk("Unrecognized suspend event %d\n", state.event); + dev_info(&dev->dev, "unrecognized suspend event %d\n", + state.event); BUG(); } return PCI_D0; @@ -568,7 +644,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) else found = 1; if (!save_state) { - dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n"); + dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n"); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -619,7 +695,7 @@ static int pci_save_pcix_state(struct pci_dev *dev) else found = 1; if (!save_state) { - dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n"); + dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n"); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -685,10 +761,9 @@ pci_restore_state(struct pci_dev *dev) for (i = 15; i >= 0; i--) { pci_read_config_dword(dev, i * 4, &val); if (val != dev->saved_config_space[i]) { - printk(KERN_DEBUG "PM: Writing back config space on " - "device %s at offset %x (was %x, writing %x)\n", - pci_name(dev), i, - val, (int)dev->saved_config_space[i]); + dev_printk(KERN_DEBUG, &dev->dev, "restoring config " + "space at offset %#x (was %#x, writing %#x)\n", + i, val, (int)dev->saved_config_space[i]); pci_write_config_dword(dev,i * 4, dev->saved_config_space[i]); } @@ -960,6 +1035,46 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) return pcibios_set_pcie_reset_state(dev, state); } +/** + * pci_pme_capable - check the capability of PCI device to generate PME# + * @dev: PCI device to handle. + * @state: PCI state from which device will issue PME#. + */ +static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state) +{ + if (!dev->pm_cap) + return false; + + return !!(dev->pme_support & (1 << state)); +} + +/** + * pci_pme_active - enable or disable PCI device's PME# function + * @dev: PCI device to handle. + * @enable: 'true' to enable PME# generation; 'false' to disable it. + * + * The caller must verify that the device is capable of generating PME# before + * calling this function with @enable equal to 'true'. + */ +static void pci_pme_active(struct pci_dev *dev, bool enable) +{ + u16 pmcsr; + + if (!dev->pm_cap) + return; + + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + /* Clear PME_Status by writing 1 to it and enable PME# */ + pmcsr |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE; + if (!enable) + pmcsr &= ~PCI_PM_CTRL_PME_ENABLE; + + pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); + + dev_printk(KERN_INFO, &dev->dev, "PME# %s\n", + enable ? "enabled" : "disabled"); +} + /** * pci_enable_wake - enable PCI device as wakeup event source * @dev: PCI device affected @@ -971,66 +1086,173 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) * called automatically by this routine. * * Devices with legacy power management (no standard PCI PM capabilities) - * always require such platform hooks. Depending on the platform, devices - * supporting the standard PCI PME# signal may require such platform hooks; - * they always update bits in config space to allow PME# generation. + * always require such platform hooks. * - * -EIO is returned if the device can't ever be a wakeup event source. - * -EINVAL is returned if the device can't generate wakeup events from - * the specified PCI state. Returns zero if the operation is successful. + * RETURN VALUE: + * 0 is returned on success + * -EINVAL is returned if device is not supposed to wake up the system + * Error code depending on the platform is returned if both the platform and + * the native mechanism fail to enable the generation of wake-up events */ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) { - int pm; - int status; - u16 value; - - /* Note that drivers should verify device_may_wakeup(&dev->dev) - * before calling this function. Platform code should report - * errors when drivers try to enable wakeup on devices that - * can't issue wakeups, or on which wakeups were disabled by - * userspace updating the /sys/devices.../power/wakeup file. + int error = 0; + bool pme_done = false; + + if (!device_may_wakeup(&dev->dev)) + return -EINVAL; + + /* + * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don + * Anderson we should be doing PME# wake enable followed by ACPI wake + * enable. To disable wake-up we call the platform first, for symmetry. */ - status = call_platform_enable_wakeup(&dev->dev, enable); + if (!enable && platform_pci_can_wakeup(dev)) + error = platform_pci_sleep_wake(dev, false); - /* find PCI PM capability in list */ - pm = pci_find_capability(dev, PCI_CAP_ID_PM); + if (!enable || pci_pme_capable(dev, state)) { + pci_pme_active(dev, enable); + pme_done = true; + } - /* If device doesn't support PM Capabilities, but caller wants to - * disable wake events, it's a NOP. Otherwise fail unless the - * platform hooks handled this legacy device already. - */ - if (!pm) - return enable ? status : 0; + if (enable && platform_pci_can_wakeup(dev)) + error = platform_pci_sleep_wake(dev, true); - /* Check device's ability to generate PME# */ - pci_read_config_word(dev,pm+PCI_PM_PMC,&value); + return pme_done ? 0 : error; +} - value &= PCI_PM_CAP_PME_MASK; - value >>= ffs(PCI_PM_CAP_PME_MASK) - 1; /* First bit of mask */ +/** + * pci_prepare_to_sleep - prepare PCI device for system-wide transition into + * a sleep state + * @dev: Device to handle. + * + * Choose the power state appropriate for the device depending on whether + * it can wake up the system and/or is power manageable by the platform + * (PCI_D3hot is the default) and put the device into that state. + */ +int pci_prepare_to_sleep(struct pci_dev *dev) +{ + pci_power_t target_state = PCI_D3hot; + int error; - /* Check if it can generate PME# from requested state. */ - if (!value || !(value & (1 << state))) { - /* if it can't, revert what the platform hook changed, - * always reporting the base "EINVAL, can't PME#" error + if (platform_pci_power_manageable(dev)) { + /* + * Call the platform to choose the target state of the device + * and enable wake-up from this state if supported. */ - if (enable) - call_platform_enable_wakeup(&dev->dev, 0); - return enable ? -EINVAL : 0; + pci_power_t state = platform_pci_choose_state(dev); + + switch (state) { + case PCI_POWER_ERROR: + case PCI_UNKNOWN: + break; + case PCI_D1: + case PCI_D2: + if (pci_no_d1d2(dev)) + break; + default: + target_state = state; + } + } else if (device_may_wakeup(&dev->dev)) { + /* + * Find the deepest state from which the device can generate + * wake-up events, make it the target state and enable device + * to generate PME#. + */ + if (!dev->pm_cap) + return -EIO; + + if (dev->pme_support) { + while (target_state + && !(dev->pme_support & (1 << target_state))) + target_state--; + } } - pci_read_config_word(dev, pm + PCI_PM_CTRL, &value); + pci_enable_wake(dev, target_state, true); - /* Clear PME_Status by writing 1 to it and enable PME# */ - value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE; + error = pci_set_power_state(dev, target_state); - if (!enable) - value &= ~PCI_PM_CTRL_PME_ENABLE; + if (error) + pci_enable_wake(dev, target_state, false); - pci_write_config_word(dev, pm + PCI_PM_CTRL, value); + return error; +} - return 0; +/** + * pci_back_from_sleep - turn PCI device on during system-wide transition into + * the working state a sleep state + * @dev: Device to handle. + * + * Disable device's sytem wake-up capability and put it into D0. + */ +int pci_back_from_sleep(struct pci_dev *dev) +{ + pci_enable_wake(dev, PCI_D0, false); + return pci_set_power_state(dev, PCI_D0); +} + +/** + * pci_pm_init - Initialize PM functions of given PCI device + * @dev: PCI device to handle. + */ +void pci_pm_init(struct pci_dev *dev) +{ + int pm; + u16 pmc; + + dev->pm_cap = 0; + + /* find PCI PM capability in list */ + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + if (!pm) + return; + /* Check device's ability to generate PME# */ + pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); + + if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { + dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", + pmc & PCI_PM_CAP_VER_MASK); + return; + } + + dev->pm_cap = pm; + + dev->d1_support = false; + dev->d2_support = false; + if (!pci_no_d1d2(dev)) { + if (pmc & PCI_PM_CAP_D1) { + dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n"); + dev->d1_support = true; + } + if (pmc & PCI_PM_CAP_D2) { + dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n"); + dev->d2_support = true; + } + } + + pmc &= PCI_PM_CAP_PME_MASK; + if (pmc) { + dev_printk(KERN_INFO, &dev->dev, + "PME# supported from%s%s%s%s%s\n", + (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "", + (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "", + (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "", + (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "", + (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : ""); + dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT; + /* + * Make device's PM flags reflect the wake-up capability, but + * let the user space enable it to wake up the system as needed. + */ + device_set_wakeup_capable(&dev->dev, true); + device_set_wakeup_enable(&dev->dev, false); + /* Disable the PME# generation functionality */ + pci_pme_active(dev, false); + } else { + dev->pme_support = 0; + } } int @@ -1116,13 +1338,11 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) return 0; err_out: - printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%llx@%llx " - "for device %s\n", - pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", - bar + 1, /* PCI BAR # */ - (unsigned long long)pci_resource_len(pdev, bar), - (unsigned long long)pci_resource_start(pdev, bar), - pci_name(pdev)); + dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n", + bar, + pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem", + (unsigned long long)pci_resource_start(pdev, bar), + (unsigned long long)pci_resource_end(pdev, bar)); return -EBUSY; } @@ -1214,7 +1434,7 @@ pci_set_master(struct pci_dev *dev) pci_read_config_word(dev, PCI_COMMAND, &cmd); if (! (cmd & PCI_COMMAND_MASTER)) { - pr_debug("PCI: Enabling bus mastering for device %s\n", pci_name(dev)); + dev_dbg(&dev->dev, "enabling bus mastering\n"); cmd |= PCI_COMMAND_MASTER; pci_write_config_word(dev, PCI_COMMAND, cmd); } @@ -1279,8 +1499,8 @@ pci_set_cacheline_size(struct pci_dev *dev) if (cacheline_size == pci_cache_line_size) return 0; - printk(KERN_DEBUG "PCI: cache line size of %d is not supported " - "by device %s\n", pci_cache_line_size << 2, pci_name(dev)); + dev_printk(KERN_DEBUG, &dev->dev, "cache line size of %d is not " + "supported\n", pci_cache_line_size << 2); return -EINVAL; } @@ -1305,8 +1525,7 @@ pci_set_mwi(struct pci_dev *dev) pci_read_config_word(dev, PCI_COMMAND, &cmd); if (! (cmd & PCI_COMMAND_INVALIDATE)) { - pr_debug("PCI: Enabling Mem-Wr-Inval for device %s\n", - pci_name(dev)); + dev_dbg(&dev->dev, "enabling Mem-Wr-Inval\n"); cmd |= PCI_COMMAND_INVALIDATE; pci_write_config_word(dev, PCI_COMMAND, cmd); } @@ -1702,5 +1921,7 @@ EXPORT_SYMBOL(pci_set_power_state); EXPORT_SYMBOL(pci_save_state); EXPORT_SYMBOL(pci_restore_state); EXPORT_SYMBOL(pci_enable_wake); +EXPORT_SYMBOL(pci_prepare_to_sleep); +EXPORT_SYMBOL(pci_back_from_sleep); EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 00408c97e5fc..d807cd786f20 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -5,11 +5,36 @@ extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); -/* Firmware callbacks */ -extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, - pm_message_t state); -extern int (*platform_pci_set_power_state)(struct pci_dev *dev, - pci_power_t state); +/** + * Firmware PM callbacks + * + * @is_manageable - returns 'true' if given device is power manageable by the + * platform firmware + * + * @set_state - invokes the platform firmware to set the device's power state + * + * @choose_state - returns PCI power state of given device preferred by the + * platform; to be used during system-wide transitions from a + * sleeping state to the working state and vice versa + * + * @can_wakeup - returns 'true' if given device is capable of waking up the + * system from a sleeping state + * + * @sleep_wake - enables/disables the system wake up capability of given device + * + * If given platform is generally capable of power managing PCI devices, all of + * these callbacks are mandatory. + */ +struct pci_platform_pm_ops { + bool (*is_manageable)(struct pci_dev *dev); + int (*set_state)(struct pci_dev *dev, pci_power_t state); + pci_power_t (*choose_state)(struct pci_dev *dev); + bool (*can_wakeup)(struct pci_dev *dev); + int (*sleep_wake)(struct pci_dev *dev, bool enable); +}; + +extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); +extern void pci_pm_init(struct pci_dev *dev); extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); @@ -106,3 +131,16 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) } struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev); + +/* PCI slot sysfs helper code */ +#define to_pci_slot(s) container_of(s, struct pci_slot, kobj) + +extern struct kset *pci_slots_kset; + +struct pci_slot_attribute { + struct attribute attr; + ssize_t (*show)(struct pci_slot *, char *); + ssize_t (*store)(struct pci_slot *, const char *, size_t); +}; +#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) + diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 07c3bdb6edc2..77036f46acfe 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -26,6 +26,7 @@ #include #include "aerdrv.h" +#include "../../pci.h" /* * Version Information @@ -219,8 +220,7 @@ static int __devinit aer_probe (struct pcie_device *dev, /* Alloc rpc data structure */ if (!(rpc = aer_alloc_rpc(dev))) { - printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n", - __func__, device->bus_id); + dev_printk(KERN_DEBUG, device, "alloc rpc failed\n"); aer_remove(dev); return -ENOMEM; } @@ -228,8 +228,7 @@ static int __devinit aer_probe (struct pcie_device *dev, /* Request IRQ ISR */ if ((status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev))) { - printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n", - __func__, device->bus_id); + dev_printk(KERN_DEBUG, device, "request IRQ failed\n"); aer_remove(dev); return status; } @@ -273,7 +272,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) * to issue Configuration Requests to those devices. */ msleep(200); - printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id); + dev_printk(KERN_DEBUG, &dev->dev, "Root Port link has been reset\n"); /* Enable Root Port's interrupt in response to error messages */ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index d39a78dbd026..30f581b8791f 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -50,10 +50,10 @@ int aer_osc_setup(struct pcie_device *pciedev) } if (ACPI_FAILURE(status)) { - printk(KERN_DEBUG "AER service couldn't init device %s - %s\n", - pciedev->device.bus_id, - (status == AE_SUPPORT || status == AE_NOT_FOUND) ? - "no _OSC support" : "Run ACPI _OSC fails"); + dev_printk(KERN_DEBUG, &pciedev->device, "AER service couldn't " + "init device: %s\n", + (status == AE_SUPPORT || status == AE_NOT_FOUND) ? + "no _OSC support" : "_OSC failed"); return -1; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index aaa82392d1dc..ee5e7b5176d0 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -221,9 +221,9 @@ static void report_error_detected(struct pci_dev *dev, void *data) * of a driver for this device is unaware of * its hw state. */ - printk(KERN_DEBUG "Device ID[%s] has %s\n", - dev->dev.bus_id, (dev->driver) ? - "no AER-aware driver" : "no driver"); + dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n", + dev->driver ? + "no AER-aware driver" : "no driver"); } return; } @@ -304,7 +304,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, { struct aer_broadcast_data result_data; - printk(KERN_DEBUG "Broadcast %s message\n", error_mesg); + dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg); result_data.state = state; if (cb == report_error_detected) result_data.result = PCI_ERS_RESULT_CAN_RECOVER; @@ -404,18 +404,16 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, data.aer_driver = to_service_driver(aerdev->device.driver); } else { - printk(KERN_DEBUG "No link-reset support to Device ID" - "[%s]\n", - dev->dev.bus_id); + dev_printk(KERN_DEBUG, &dev->dev, "no link-reset " + "support\n"); return PCI_ERS_RESULT_DISCONNECT; } } status = data.aer_driver->reset_link(udev); if (status != PCI_ERS_RESULT_RECOVERED) { - printk(KERN_DEBUG "Link reset at upstream Device ID" - "[%s] failed\n", - udev->dev.bus_id); + dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream " + "device %s failed\n", pci_name(udev)); return PCI_ERS_RESULT_DISCONNECT; } @@ -511,10 +509,12 @@ static void handle_error_source(struct pcie_device * aerdev, } else { status = do_recovery(aerdev, dev, info.severity); if (status == PCI_ERS_RESULT_RECOVERED) { - printk(KERN_DEBUG "AER driver successfully recovered\n"); + dev_printk(KERN_DEBUG, &dev->dev, "AER driver " + "successfully recovered\n"); } else { /* TODO: Should kernel panic here? */ - printk(KERN_DEBUG "AER driver didn't recover\n"); + dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't " + "recover\n"); } } } diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 3f0976868eda..359fe5568df1 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -13,6 +13,7 @@ #include #include +#include "portdrv.h" static int pcie_port_bus_match(struct device *dev, struct device_driver *drv); static int pcie_port_bus_suspend(struct device *dev, pm_message_t state); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index fb0abfa508dc..890f0d2b370a 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -23,20 +23,20 @@ static int pcie_port_probe_service(struct device *dev) { struct pcie_device *pciedev; struct pcie_port_service_driver *driver; - int status = -ENODEV; + int status; if (!dev || !dev->driver) - return status; + return -ENODEV; driver = to_service_driver(dev->driver); if (!driver || !driver->probe) - return status; + return -ENODEV; pciedev = to_pcie_device(dev); status = driver->probe(pciedev, driver->id_table); if (!status) { - printk(KERN_DEBUG "Load service driver %s on pcie device %s\n", - driver->name, dev->bus_id); + dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", + driver->name); get_device(dev); } return status; @@ -53,8 +53,8 @@ static int pcie_port_remove_service(struct device *dev) pciedev = to_pcie_device(dev); driver = to_service_driver(dev->driver); if (driver && driver->remove) { - printk(KERN_DEBUG "Unload service driver %s on pcie device %s\n", - driver->name, dev->bus_id); + dev_printk(KERN_DEBUG, dev, "unloading service driver %s\n", + driver->name); driver->remove(pciedev); put_device(dev); } @@ -103,7 +103,7 @@ static int pcie_port_resume_service(struct device *dev) */ static void release_pcie_device(struct device *dev) { - printk(KERN_DEBUG "Free Port Service[%s]\n", dev->bus_id); + dev_printk(KERN_DEBUG, dev, "free port service\n"); kfree(to_pcie_device(dev)); } @@ -150,7 +150,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) if (pos) { struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - printk("%s Found MSIX capability\n", __func__); + dev_info(&dev->dev, "found MSI-X capability\n"); status = pci_enable_msix(dev, msix_entries, nvec); if (!status) { int j = 0; @@ -165,7 +165,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) if (status) { pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (pos) { - printk("%s Found MSI capability\n", __func__); + dev_info(&dev->dev, "found MSI capability\n"); status = pci_enable_msi(dev); if (!status) { interrupt_mode = PCIE_PORT_MSI_MODE; @@ -252,7 +252,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, return NULL; pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); - printk(KERN_DEBUG "Allocate Port Service[%s]\n", device->device.bus_id); + dev_printk(KERN_DEBUG, &device->device, "allocate port service\n"); return device; } diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 51d163238d93..367c9c20000d 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -91,9 +91,8 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev, pci_set_master(dev); if (!dev->irq && dev->pin) { - printk(KERN_WARNING - "%s->Dev[%04x:%04x] has invalid IRQ. Check vendor BIOS\n", - __func__, dev->vendor, dev->device); + dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; " + "check vendor BIOS\n", dev->vendor, dev->device); } if (pcie_port_device_register(dev)) { pci_disable_device(dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3706ce7972dd..b1724cf31b66 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -277,8 +277,8 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) res->end = res->start + sz64; #else if (sz64 > 0x100000000ULL) { - printk(KERN_ERR "PCI: Unable to handle 64-bit " - "BAR for device %s\n", pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't handle 64-bit" + " BAR\n", pos); res->start = 0; res->flags = 0; } else if (lhi) { @@ -329,7 +329,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) return; if (dev->transparent) { - printk(KERN_INFO "PCI: Transparent bridge - %s\n", pci_name(dev)); + dev_info(&dev->dev, "transparent bridge\n"); for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) child->resource[i] = child->parent->resource[i - 3]; } @@ -392,7 +392,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) limit |= ((long) mem_limit_hi) << 32; #else if (mem_base_hi || mem_limit_hi) { - printk(KERN_ERR "PCI: Unable to handle 64-bit address space for bridge %s\n", pci_name(dev)); + dev_err(&dev->dev, "can't handle 64-bit " + "address space for bridge\n"); return; } #endif @@ -414,6 +415,7 @@ static struct pci_bus * pci_alloc_bus(void) INIT_LIST_HEAD(&b->node); INIT_LIST_HEAD(&b->children); INIT_LIST_HEAD(&b->devices); + INIT_LIST_HEAD(&b->slots); } return b; } @@ -511,8 +513,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses); - pr_debug("PCI: Scanning behind PCI bridge %s, config %06x, pass %d\n", - pci_name(dev), buses & 0xffffff, pass); + dev_dbg(&dev->dev, "scanning behind bridge, config %06x, pass %d\n", + buses & 0xffffff, pass); /* Disable MasterAbortMode during probing to avoid reporting of bus errors (in some architectures) */ @@ -535,8 +537,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, * ignore it. This can happen with the i450NX chipset. */ if (pci_find_bus(pci_domain_nr(bus), busnr)) { - printk(KERN_INFO "PCI: Bus %04x:%02x already known\n", - pci_domain_nr(bus), busnr); + dev_info(&dev->dev, "bus %04x:%02x already known\n", + pci_domain_nr(bus), busnr); goto out; } @@ -711,8 +713,9 @@ static int pci_setup_device(struct pci_dev * dev) { u32 class; - sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), - dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); dev->revision = class & 0xff; @@ -720,7 +723,7 @@ static int pci_setup_device(struct pci_dev * dev) dev->class = class; class >>= 8; - pr_debug("PCI: Found %s [%04x/%04x] %06x %02x\n", pci_name(dev), + dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n", dev->vendor, dev->device, class, dev->hdr_type); /* "Unknown power state" */ @@ -788,13 +791,13 @@ static int pci_setup_device(struct pci_dev * dev) break; default: /* unknown header */ - printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n", - pci_name(dev), dev->hdr_type); + dev_err(&dev->dev, "unknown header type %02x, " + "ignoring device\n", dev->hdr_type); return -1; bad: - printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n", - pci_name(dev), class, dev->hdr_type); + dev_err(&dev->dev, "ignoring class %02x (doesn't match header " + "type %02x)\n", class, dev->hdr_type); dev->class = PCI_CLASS_NOT_DEFINED; } @@ -927,7 +930,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) return NULL; /* Card hasn't responded in 60 seconds? Must be stuck. */ if (delay > 60 * 1000) { - printk(KERN_WARNING "Device %04x:%02x:%02x.%d not " + printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not " "responding\n", pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -984,6 +987,9 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) /* Fix up broken headers */ pci_fixup_device(pci_fixup_header, dev); + /* Initialize power management of the device */ + pci_pm_init(dev); + /* * Add the device to our list of discovered devices * and the bus list for fixup functions, etc. diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 963a97642ae9..4400dffbd93a 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -1,6 +1,4 @@ /* - * $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $ - * * Procfs interface for the PCI bus. * * Copyright (c) 1997--1999 Martin Mares @@ -482,5 +480,5 @@ static int __init pci_proc_init(void) return 0; } -__initcall(pci_proc_init); +device_initcall(pci_proc_init); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 338a3f94b4d4..12d489395fad 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -556,7 +556,7 @@ static void quirk_via_ioapic(struct pci_dev *dev) pci_write_config_byte (dev, 0x58, tmp); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); /* * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit. @@ -576,7 +576,7 @@ static void quirk_via_vt8237_bypass_apic_deassert(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, quirk_via_vt8237_bypass_apic_deassert); /* * The AMD io apic can hang the box when an apic irq is masked. @@ -622,7 +622,7 @@ static void quirk_amd_8131_ioapic(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); #endif /* CONFIG_X86_IO_APIC */ /* @@ -774,7 +774,7 @@ static void quirk_cardbus_legacy(struct pci_dev *dev) pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0); } DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); -DECLARE_PCI_FIXUP_RESUME(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy); /* * Following the PCI ordering rules is optional on the AMD762. I'm not @@ -797,7 +797,7 @@ static void quirk_amd_ordering(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering); /* * DreamWorks provided workaround for Dunord I-3000 problem @@ -865,7 +865,7 @@ static void quirk_disable_pxb(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb); static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev) { @@ -885,9 +885,9 @@ static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode); /* * Serverworks CSB5 IDE does not fully support native mode @@ -1054,6 +1054,20 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) * its on-board VGA controller */ asus_hides_smbus = 1; } + else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG) + switch(dev->subsystem_device) { + case 0x00b8: /* Compaq Evo D510 CMT */ + case 0x00b9: /* Compaq Evo D510 SFF */ + asus_hides_smbus = 1; + } + else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC) + switch (dev->subsystem_device) { + case 0x001A: /* Compaq Deskpro EN SSF P667 815E */ + /* Motherboard doesn't have host bridge + * subvendor/subdevice IDs, therefore checking + * its on-board VGA controller */ + asus_hides_smbus = 1; + } } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845_HB, asus_hides_smbus_hostbridge); @@ -1068,6 +1082,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) { @@ -1093,31 +1109,61 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, asu DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, asus_hides_smbus_lpc); -static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev) +/* It appears we just have one such device. If not, we have a warning */ +static void __iomem *asus_rcba_base; +static void asus_hides_smbus_lpc_ich6_suspend(struct pci_dev *dev) { - u32 val, rcba; - void __iomem *base; + u32 rcba; if (likely(!asus_hides_smbus)) return; + WARN_ON(asus_rcba_base); + pci_read_config_dword(dev, 0xF0, &rcba); - base = ioremap_nocache(rcba & 0xFFFFC000, 0x4000); /* use bits 31:14, 16 kB aligned */ - if (base == NULL) return; - val=readl(base + 0x3418); /* read the Function Disable register, dword mode only */ - writel(val & 0xFFFFFFF7, base + 0x3418); /* enable the SMBus device */ - iounmap(base); + /* use bits 31:14, 16 kB aligned */ + asus_rcba_base = ioremap_nocache(rcba & 0xFFFFC000, 0x4000); + if (asus_rcba_base == NULL) + return; +} + +static void asus_hides_smbus_lpc_ich6_resume_early(struct pci_dev *dev) +{ + u32 val; + + if (likely(!asus_hides_smbus || !asus_rcba_base)) + return; + /* read the Function Disable register, dword mode only */ + val = readl(asus_rcba_base + 0x3418); + writel(val & 0xFFFFFFF7, asus_rcba_base + 0x3418); /* enable the SMBus device */ +} + +static void asus_hides_smbus_lpc_ich6_resume(struct pci_dev *dev) +{ + if (likely(!asus_hides_smbus || !asus_rcba_base)) + return; + iounmap(asus_rcba_base); + asus_rcba_base = NULL; dev_info(&dev->dev, "Enabled ICH6/i801 SMBus device\n"); } + +static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev) +{ + asus_hides_smbus_lpc_ich6_suspend(dev); + asus_hides_smbus_lpc_ich6_resume_early(dev); + asus_hides_smbus_lpc_ich6_resume(dev); +} DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6); +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6_resume); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, asus_hides_smbus_lpc_ich6_resume_early); /* * SiS 96x south bridge: BIOS typically hides SMBus device... @@ -1135,10 +1181,10 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_961, quirk_sis_96x_ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_962, quirk_sis_96x_smbus); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_963, quirk_sis_96x_smbus); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_961, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_962, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_963, quirk_sis_96x_smbus); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_961, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_962, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_963, quirk_sis_96x_smbus); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_LPC, quirk_sis_96x_smbus); /* * ... This is further complicated by the fact that some SiS96x south @@ -1172,7 +1218,7 @@ static void quirk_sis_503(struct pci_dev *dev) quirk_sis_96x_smbus(dev); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, quirk_sis_503); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, quirk_sis_503); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, quirk_sis_503); /* @@ -1205,7 +1251,7 @@ static void asus_hides_ac97_lpc(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc); #if defined(CONFIG_ATA) || defined(CONFIG_ATA_MODULE) @@ -1270,12 +1316,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, qui DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata); #endif @@ -1521,6 +1567,10 @@ extern struct pci_fixup __start_pci_fixups_enable[]; extern struct pci_fixup __end_pci_fixups_enable[]; extern struct pci_fixup __start_pci_fixups_resume[]; extern struct pci_fixup __end_pci_fixups_resume[]; +extern struct pci_fixup __start_pci_fixups_resume_early[]; +extern struct pci_fixup __end_pci_fixups_resume_early[]; +extern struct pci_fixup __start_pci_fixups_suspend[]; +extern struct pci_fixup __end_pci_fixups_suspend[]; void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) @@ -1553,6 +1603,16 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) end = __end_pci_fixups_resume; break; + case pci_fixup_resume_early: + start = __start_pci_fixups_resume_early; + end = __end_pci_fixups_resume_early; + break; + + case pci_fixup_suspend: + start = __start_pci_fixups_suspend; + end = __end_pci_fixups_suspend; + break; + default: /* stupid compiler warning, you would think with an enum... */ return; @@ -1629,7 +1689,7 @@ static void quirk_nvidia_ck804_pcie_aer_ext_cap(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, quirk_nvidia_ck804_pcie_aer_ext_cap); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE, quirk_nvidia_ck804_pcie_aer_ext_cap); static void __devinit quirk_via_cx700_pci_parking_caching(struct pci_dev *dev) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 8ddb918f5f57..827c0a520e2b 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -27,13 +27,6 @@ #include -#define DEBUG_CONFIG 1 -#if DEBUG_CONFIG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - static void pbus_assign_resources_sorted(struct pci_bus *bus) { struct pci_dev *dev; @@ -81,8 +74,8 @@ void pci_setup_cardbus(struct pci_bus *bus) struct pci_dev *bridge = bus->self; struct pci_bus_region region; - printk("PCI: Bus %d, cardbus bridge: %s\n", - bus->number, pci_name(bridge)); + dev_info(&bridge->dev, "CardBus bridge, secondary bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); if (bus->resource[0]->flags & IORESOURCE_IO) { @@ -90,7 +83,7 @@ void pci_setup_cardbus(struct pci_bus *bus) * The IO resource is allocated a range twice as large as it * would normally need. This allows us to set both IO regs. */ - printk(KERN_INFO " IO window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_IO_BASE_0, @@ -101,7 +94,7 @@ void pci_setup_cardbus(struct pci_bus *bus) pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]); if (bus->resource[1]->flags & IORESOURCE_IO) { - printk(KERN_INFO " IO window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_IO_BASE_1, @@ -112,7 +105,7 @@ void pci_setup_cardbus(struct pci_bus *bus) pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); if (bus->resource[2]->flags & IORESOURCE_MEM) { - printk(KERN_INFO " PREFETCH window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " PREFETCH window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0, @@ -123,7 +116,7 @@ void pci_setup_cardbus(struct pci_bus *bus) pcibios_resource_to_bus(bridge, ®ion, bus->resource[3]); if (bus->resource[3]->flags & IORESOURCE_MEM) { - printk(KERN_INFO " MEM window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1, @@ -151,7 +144,8 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_bus_region region; u32 l, bu, lu, io_upper16; - DBG(KERN_INFO "PCI: Bridge: %s\n", pci_name(bridge)); + dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); /* Set up the top and bottom of the PCI I/O segment for this bus. */ pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); @@ -162,7 +156,7 @@ static void pci_setup_bridge(struct pci_bus *bus) l |= region.end & 0xf000; /* Set up upper 16 bits of I/O base/limit. */ io_upper16 = (region.end & 0xffff0000) | (region.start >> 16); - DBG(KERN_INFO " IO window: %04lx-%04lx\n", + dev_info(&bridge->dev, " IO window: %#04lx-%#04lx\n", (unsigned long)region.start, (unsigned long)region.end); } @@ -170,7 +164,7 @@ static void pci_setup_bridge(struct pci_bus *bus) /* Clear upper 16 bits of I/O base/limit. */ io_upper16 = 0; l = 0x00f0; - DBG(KERN_INFO " IO window: disabled.\n"); + dev_info(&bridge->dev, " IO window: disabled\n"); } /* Temporarily disable the I/O range before updating PCI_IO_BASE. */ pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff); @@ -185,13 +179,13 @@ static void pci_setup_bridge(struct pci_bus *bus) if (bus->resource[1]->flags & IORESOURCE_MEM) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - DBG(KERN_INFO " MEM window: 0x%08lx-0x%08lx\n", + dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", (unsigned long)region.start, (unsigned long)region.end); } else { l = 0x0000fff0; - DBG(KERN_INFO " MEM window: disabled.\n"); + dev_info(&bridge->dev, " MEM window: disabled\n"); } pci_write_config_dword(bridge, PCI_MEMORY_BASE, l); @@ -208,13 +202,13 @@ static void pci_setup_bridge(struct pci_bus *bus) l |= region.end & 0xfff00000; bu = upper_32_bits(region.start); lu = upper_32_bits(region.end); - DBG(KERN_INFO " PREFETCH window: 0x%016llx-0x%016llx\n", + dev_info(&bridge->dev, " PREFETCH window: %#016llx-%#016llx\n", (unsigned long long)region.start, (unsigned long long)region.end); } else { l = 0x0000fff0; - DBG(KERN_INFO " PREFETCH window: disabled.\n"); + dev_info(&bridge->dev, " PREFETCH window: disabled\n"); } pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); @@ -361,9 +355,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start; order = __ffs(align) - 20; if (order > 11) { - printk(KERN_WARNING "PCI: region %s/%d " - "too large: 0x%016llx-0x%016llx\n", - pci_name(dev), i, + dev_warn(&dev->dev, "BAR %d too large: " + "%#016llx-%#016llx\n", i, (unsigned long long)r->start, (unsigned long long)r->end); r->flags = 0; @@ -529,8 +522,8 @@ void __ref pci_bus_assign_resources(struct pci_bus *bus) break; default: - printk(KERN_INFO "PCI: not setting up bridge %s " - "for bus %d\n", pci_name(dev), b->number); + dev_info(&dev->dev, "not setting up bridge for bus " + "%04x:%02x\n", pci_domain_nr(b), b->number); break; } } diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index 05ca2ed9eb51..aa795fd428de 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -47,8 +47,7 @@ pdev_fixup_irq(struct pci_dev *dev, } dev->irq = irq; - pr_debug("PCI: fixup irq: (%s) got %d\n", - kobject_name(&dev->dev.kobj), dev->irq); + dev_dbg(&dev->dev, "fixup irq: got %d\n", dev->irq); /* Always tell the device, so the driver knows what is the real IRQ to use; the device does not use it. */ diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 7d35cdf4579f..1a5fc83c71b3 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,8 +26,7 @@ #include "pci.h" -void -pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; u32 new, check, mask; @@ -43,20 +42,20 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) /* * Ignore non-moveable resources. This might be legacy resources for * which no functional BAR register exists or another important - * system resource we should better not move around in system address - * space. + * system resource we shouldn't move around. */ if (res->flags & IORESOURCE_PCI_FIXED) return; pcibios_resource_to_bus(dev, ®ion, res); - pr_debug(" got res [%llx:%llx] bus [%llx:%llx] flags %lx for " - "BAR %d of %s\n", (unsigned long long)res->start, + dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] " + "flags %#lx\n", resno, + (unsigned long long)res->start, (unsigned long long)res->end, (unsigned long long)region.start, (unsigned long long)region.end, - (unsigned long)res->flags, resno, pci_name(dev)); + (unsigned long)res->flags); new = region.start | (res->flags & PCI_REGION_FLAG_MASK); if (res->flags & IORESOURCE_IO) @@ -81,9 +80,8 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) pci_read_config_dword(dev, reg, &check); if ((new ^ check) & mask) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", pci_name(dev), resno, - new, check); + dev_err(&dev->dev, "BAR %d: error updating (%#08x != %#08x)\n", + resno, new, check); } if ((new & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == @@ -92,15 +90,14 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) pci_write_config_dword(dev, reg + 4, new); pci_read_config_dword(dev, reg + 4, &check); if (check != new) { - printk(KERN_ERR "PCI: Error updating region " - "%s/%d (high %08x != %08x)\n", - pci_name(dev), resno, new, check); + dev_err(&dev->dev, "BAR %d: error updating " + "(high %#08x != %#08x)\n", resno, new, check); } } res->flags &= ~IORESOURCE_UNSET; - pr_debug("PCI: moved device %s resource %d (%lx) to %x\n", - pci_name(dev), resno, res->flags, - new & ~PCI_REGION_FLAG_MASK); + dev_dbg(&dev->dev, "BAR %d: moved to bus [%#llx-%#llx] flags %#lx\n", + resno, (unsigned long long)region.start, + (unsigned long long)region.end, res->flags); } int pci_claim_resource(struct pci_dev *dev, int resource) @@ -117,10 +114,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource) err = insert_resource(root, res); if (err) { - printk(KERN_ERR "PCI: %s region %d of %s %s [%llx:%llx]\n", - root ? "Address space collision on" : - "No parent found for", - resource, dtype, pci_name(dev), + dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n", + resource, + root ? "address space collision on" : + "no parent found for", + dtype, (unsigned long long)res->start, (unsigned long long)res->end); } @@ -140,11 +138,10 @@ int pci_assign_resource(struct pci_dev *dev, int resno) align = resource_alignment(res); if (!align) { - printk(KERN_ERR "PCI: Cannot allocate resource (bogus " - "alignment) %d [%llx:%llx] (flags %lx) of %s\n", + dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " + "alignment) [%#llx-%#llx] flags %#lx\n", resno, (unsigned long long)res->start, - (unsigned long long)res->end, res->flags, - pci_name(dev)); + (unsigned long long)res->end, res->flags); return -EINVAL; } @@ -165,11 +162,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } if (ret) { - printk(KERN_ERR "PCI: Failed to allocate %s resource " - "#%d:%llx@%llx for %s\n", + dev_err(&dev->dev, "BAR %d: can't allocate %s resource " + "[%#llx-%#llx]\n", resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", - resno, (unsigned long long)size, - (unsigned long long)res->start, pci_name(dev)); + (unsigned long long)res->start, + (unsigned long long)res->end); } else { res->flags &= ~IORESOURCE_STARTALIGN; if (resno < PCI_BRIDGE_RESOURCES) @@ -205,11 +202,11 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno) } if (ret) { - printk(KERN_ERR "PCI: Failed to allocate %s resource " - "#%d:%llx@%llx for %s\n", + dev_err(&dev->dev, "BAR %d: can't allocate %s resource " + "[%#llx-%#llx\n]", resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", - resno, (unsigned long long)(res->end - res->start + 1), - (unsigned long long)res->start, pci_name(dev)); + (unsigned long long)res->start, + (unsigned long long)res->end); } else if (resno < PCI_BRIDGE_RESOURCES) { pci_update_resource(dev, res, resno); } @@ -239,11 +236,10 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) r_align = resource_alignment(r); if (!r_align) { - printk(KERN_WARNING "PCI: bogus alignment of resource " - "%d [%llx:%llx] (flags %lx) of %s\n", + dev_warn(&dev->dev, "BAR %d: bogus alignment " + "[%#llx-%#llx] flags %#lx\n", i, (unsigned long long)r->start, - (unsigned long long)r->end, r->flags, - pci_name(dev)); + (unsigned long long)r->end, r->flags); continue; } for (list = head; ; list = list->next) { @@ -291,7 +287,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask) if (!r->parent) { dev_err(&dev->dev, "device not available because of " - "BAR %d [%llx:%llx] collisions\n", i, + "BAR %d [%#llx-%#llx] collisions\n", i, (unsigned long long) r->start, (unsigned long long) r->end); return -EINVAL; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c new file mode 100644 index 000000000000..7e5b85cbd948 --- /dev/null +++ b/drivers/pci/slot.c @@ -0,0 +1,233 @@ +/* + * drivers/pci/slot.c + * Copyright (C) 2006 Matthew Wilcox + * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P. + * Alex Chiang + */ + +#include +#include +#include +#include "pci.h" + +struct kset *pci_slots_kset; +EXPORT_SYMBOL_GPL(pci_slots_kset); + +static ssize_t pci_slot_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct pci_slot *slot = to_pci_slot(kobj); + struct pci_slot_attribute *attribute = to_pci_slot_attr(attr); + return attribute->show ? attribute->show(slot, buf) : -EIO; +} + +static ssize_t pci_slot_attr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct pci_slot *slot = to_pci_slot(kobj); + struct pci_slot_attribute *attribute = to_pci_slot_attr(attr); + return attribute->store ? attribute->store(slot, buf, len) : -EIO; +} + +static struct sysfs_ops pci_slot_sysfs_ops = { + .show = pci_slot_attr_show, + .store = pci_slot_attr_store, +}; + +static ssize_t address_read_file(struct pci_slot *slot, char *buf) +{ + if (slot->number == 0xff) + return sprintf(buf, "%04x:%02x\n", + pci_domain_nr(slot->bus), + slot->bus->number); + else + return sprintf(buf, "%04x:%02x:%02x\n", + pci_domain_nr(slot->bus), + slot->bus->number, + slot->number); +} + +static void pci_slot_release(struct kobject *kobj) +{ + struct pci_slot *slot = to_pci_slot(kobj); + + pr_debug("%s: releasing pci_slot on %x:%d\n", __func__, + slot->bus->number, slot->number); + + list_del(&slot->list); + + kfree(slot); +} + +static struct pci_slot_attribute pci_slot_attr_address = + __ATTR(address, (S_IFREG | S_IRUGO), address_read_file, NULL); + +static struct attribute *pci_slot_default_attrs[] = { + &pci_slot_attr_address.attr, + NULL, +}; + +static struct kobj_type pci_slot_ktype = { + .sysfs_ops = &pci_slot_sysfs_ops, + .release = &pci_slot_release, + .default_attrs = pci_slot_default_attrs, +}; + +/** + * pci_create_slot - create or increment refcount for physical PCI slot + * @parent: struct pci_bus of parent bridge + * @slot_nr: PCI_SLOT(pci_dev->devfn) or -1 for placeholder + * @name: user visible string presented in /sys/bus/pci/slots/ + * + * PCI slots have first class attributes such as address, speed, width, + * and a &struct pci_slot is used to manage them. This interface will + * either return a new &struct pci_slot to the caller, or if the pci_slot + * already exists, its refcount will be incremented. + * + * Slots are uniquely identified by a @pci_bus, @slot_nr, @name tuple. + * + * Placeholder slots: + * In most cases, @pci_bus, @slot_nr will be sufficient to uniquely identify + * a slot. There is one notable exception - pSeries (rpaphp), where the + * @slot_nr cannot be determined until a device is actually inserted into + * the slot. In this scenario, the caller may pass -1 for @slot_nr. + * + * The following semantics are imposed when the caller passes @slot_nr == + * -1. First, the check for existing %struct pci_slot is skipped, as the + * caller may know about several unpopulated slots on a given %struct + * pci_bus, and each slot would have a @slot_nr of -1. Uniqueness for + * these slots is then determined by the @name parameter. We expect + * kobject_init_and_add() to warn us if the caller attempts to create + * multiple slots with the same name. The other change in semantics is + * user-visible, which is the 'address' parameter presented in sysfs will + * consist solely of a dddd:bb tuple, where dddd is the PCI domain of the + * %struct pci_bus and bb is the bus number. In other words, the devfn of + * the 'placeholder' slot will not be displayed. + */ + +struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, + const char *name) +{ + struct pci_slot *slot; + int err; + + down_write(&pci_bus_sem); + + if (slot_nr == -1) + goto placeholder; + + /* If we've already created this slot, bump refcount and return. */ + list_for_each_entry(slot, &parent->slots, list) { + if (slot->number == slot_nr) { + kobject_get(&slot->kobj); + pr_debug("%s: inc refcount to %d on %04x:%02x:%02x\n", + __func__, + atomic_read(&slot->kobj.kref.refcount), + pci_domain_nr(parent), parent->number, + slot_nr); + goto out; + } + } + +placeholder: + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + slot = ERR_PTR(-ENOMEM); + goto out; + } + + slot->bus = parent; + slot->number = slot_nr; + + slot->kobj.kset = pci_slots_kset; + err = kobject_init_and_add(&slot->kobj, &pci_slot_ktype, NULL, + "%s", name); + if (err) { + printk(KERN_ERR "Unable to register kobject %s\n", name); + goto err; + } + + INIT_LIST_HEAD(&slot->list); + list_add(&slot->list, &parent->slots); + + /* Don't care if debug printk has a -1 for slot_nr */ + pr_debug("%s: created pci_slot on %04x:%02x:%02x\n", + __func__, pci_domain_nr(parent), parent->number, slot_nr); + + out: + up_write(&pci_bus_sem); + return slot; + err: + kfree(slot); + slot = ERR_PTR(err); + goto out; +} +EXPORT_SYMBOL_GPL(pci_create_slot); + +/** + * pci_update_slot_number - update %struct pci_slot -> number + * @slot - %struct pci_slot to update + * @slot_nr - new number for slot + * + * The primary purpose of this interface is to allow callers who earlier + * created a placeholder slot in pci_create_slot() by passing a -1 as + * slot_nr, to update their %struct pci_slot with the correct @slot_nr. + */ + +void pci_update_slot_number(struct pci_slot *slot, int slot_nr) +{ + int name_count = 0; + struct pci_slot *tmp; + + down_write(&pci_bus_sem); + + list_for_each_entry(tmp, &slot->bus->slots, list) { + WARN_ON(tmp->number == slot_nr); + if (!strcmp(kobject_name(&tmp->kobj), kobject_name(&slot->kobj))) + name_count++; + } + + if (name_count > 1) + printk(KERN_WARNING "pci_update_slot_number found %d slots with the same name: %s\n", name_count, kobject_name(&slot->kobj)); + + slot->number = slot_nr; + up_write(&pci_bus_sem); +} +EXPORT_SYMBOL_GPL(pci_update_slot_number); + +/** + * pci_destroy_slot - decrement refcount for physical PCI slot + * @slot: struct pci_slot to decrement + * + * %struct pci_slot is refcounted, so destroying them is really easy; we + * just call kobject_put on its kobj and let our release methods do the + * rest. + */ + +void pci_destroy_slot(struct pci_slot *slot) +{ + pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__, + atomic_read(&slot->kobj.kref.refcount) - 1, + pci_domain_nr(slot->bus), slot->bus->number, slot->number); + + down_write(&pci_bus_sem); + kobject_put(&slot->kobj); + up_write(&pci_bus_sem); +} +EXPORT_SYMBOL_GPL(pci_destroy_slot); + +static int pci_slot_init(void) +{ + struct kset *pci_bus_kset; + + pci_bus_kset = bus_get_kset(&pci_bus_type); + pci_slots_kset = kset_create_and_add("slots", NULL, + &pci_bus_kset->kobj); + if (!pci_slots_kset) { + printk(KERN_ERR "PCI: Slot initialization failure\n"); + return -ENOMEM; + } + return 0; +} + +subsys_initcall(pci_slot_init); diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 886dac823ed6..e3fa9a2d9a3d 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -1,3 +1,8 @@ +/* + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + */ + extern spinlock_t pnp_lock; void *pnp_alloc(long size); @@ -19,22 +24,118 @@ void pnp_remove_card(struct pnp_card *card); int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); void pnp_remove_card_device(struct pnp_dev *dev); -struct pnp_option *pnp_build_option(int priority); -struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); -struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, - int priority); -int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_irq *data); -int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_dma *data); -int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_port *data); -int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_mem *data); +struct pnp_port { + resource_size_t min; /* min base number */ + resource_size_t max; /* max base number */ + resource_size_t align; /* align boundary */ + resource_size_t size; /* size of range */ + unsigned char flags; /* port flags */ +}; + +#define PNP_IRQ_NR 256 +typedef struct { DECLARE_BITMAP(bits, PNP_IRQ_NR); } pnp_irq_mask_t; + +struct pnp_irq { + pnp_irq_mask_t map; /* bitmap for IRQ lines */ + unsigned char flags; /* IRQ flags */ +}; + +struct pnp_dma { + unsigned char map; /* bitmask for DMA channels */ + unsigned char flags; /* DMA flags */ +}; + +struct pnp_mem { + resource_size_t min; /* min base number */ + resource_size_t max; /* max base number */ + resource_size_t align; /* align boundary */ + resource_size_t size; /* size of range */ + unsigned char flags; /* memory flags */ +}; + +#define PNP_OPTION_DEPENDENT 0x80000000 +#define PNP_OPTION_SET_MASK 0xffff +#define PNP_OPTION_SET_SHIFT 12 +#define PNP_OPTION_PRIORITY_MASK 0xfff +#define PNP_OPTION_PRIORITY_SHIFT 0 + +#define PNP_RES_PRIORITY_PREFERRED 0 +#define PNP_RES_PRIORITY_ACCEPTABLE 1 +#define PNP_RES_PRIORITY_FUNCTIONAL 2 +#define PNP_RES_PRIORITY_INVALID PNP_OPTION_PRIORITY_MASK + +struct pnp_option { + struct list_head list; + unsigned int flags; /* independent/dependent, set, priority */ + + unsigned long type; /* IORESOURCE_{IO,MEM,IRQ,DMA} */ + union { + struct pnp_port port; + struct pnp_irq irq; + struct pnp_dma dma; + struct pnp_mem mem; + } u; +}; + +int pnp_register_irq_resource(struct pnp_dev *dev, unsigned int option_flags, + pnp_irq_mask_t *map, unsigned char flags); +int pnp_register_dma_resource(struct pnp_dev *dev, unsigned int option_flags, + unsigned char map, unsigned char flags); +int pnp_register_port_resource(struct pnp_dev *dev, unsigned int option_flags, + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags); +int pnp_register_mem_resource(struct pnp_dev *dev, unsigned int option_flags, + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags); + +static inline int pnp_option_is_dependent(struct pnp_option *option) +{ + return option->flags & PNP_OPTION_DEPENDENT ? 1 : 0; +} + +static inline unsigned int pnp_option_set(struct pnp_option *option) +{ + return (option->flags >> PNP_OPTION_SET_SHIFT) & PNP_OPTION_SET_MASK; +} + +static inline unsigned int pnp_option_priority(struct pnp_option *option) +{ + return (option->flags >> PNP_OPTION_PRIORITY_SHIFT) & + PNP_OPTION_PRIORITY_MASK; +} + +static inline unsigned int pnp_new_dependent_set(struct pnp_dev *dev, + int priority) +{ + unsigned int flags; + + if (priority > PNP_RES_PRIORITY_FUNCTIONAL) { + dev_warn(&dev->dev, "invalid dependent option priority %d " + "clipped to %d", priority, + PNP_RES_PRIORITY_INVALID); + priority = PNP_RES_PRIORITY_INVALID; + } + + flags = PNP_OPTION_DEPENDENT | + ((dev->num_dependent_sets & PNP_OPTION_SET_MASK) << + PNP_OPTION_SET_SHIFT) | + ((priority & PNP_OPTION_PRIORITY_MASK) << + PNP_OPTION_PRIORITY_SHIFT); + + dev->num_dependent_sets++; + + return flags; +} + +char *pnp_option_priority_name(struct pnp_option *option); +void dbg_pnp_show_option(struct pnp_dev *dev, struct pnp_option *option); + void pnp_init_resources(struct pnp_dev *dev); void pnp_fixup_device(struct pnp_dev *dev); -void pnp_free_option(struct pnp_option *option); +void pnp_free_options(struct pnp_dev *dev); int __pnp_add_device(struct pnp_dev *dev); void __pnp_remove_device(struct pnp_dev *dev); @@ -43,29 +144,18 @@ int pnp_check_mem(struct pnp_dev *dev, struct resource *res); int pnp_check_irq(struct pnp_dev *dev, struct resource *res); int pnp_check_dma(struct pnp_dev *dev, struct resource *res); +char *pnp_resource_type_name(struct resource *res); void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc); -void pnp_init_resource(struct resource *res); - -struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, - unsigned int type, unsigned int num); - -#define PNP_MAX_PORT 40 -#define PNP_MAX_MEM 24 -#define PNP_MAX_IRQ 2 -#define PNP_MAX_DMA 2 +void pnp_free_resources(struct pnp_dev *dev); +int pnp_resource_type(struct resource *res); struct pnp_resource { + struct list_head list; struct resource res; - unsigned int index; /* ISAPNP config register index */ }; -struct pnp_resource_table { - struct pnp_resource port[PNP_MAX_PORT]; - struct pnp_resource mem[PNP_MAX_MEM]; - struct pnp_resource dma[PNP_MAX_DMA]; - struct pnp_resource irq[PNP_MAX_IRQ]; -}; +void pnp_free_resource(struct pnp_resource *pnp_res); struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, int flags); diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index 20771b7d4482..a411582bcd72 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -99,14 +99,28 @@ static void pnp_free_ids(struct pnp_dev *dev) } } +void pnp_free_resource(struct pnp_resource *pnp_res) +{ + list_del(&pnp_res->list); + kfree(pnp_res); +} + +void pnp_free_resources(struct pnp_dev *dev) +{ + struct pnp_resource *pnp_res, *tmp; + + list_for_each_entry_safe(pnp_res, tmp, &dev->resources, list) { + pnp_free_resource(pnp_res); + } +} + static void pnp_release_device(struct device *dmdev) { struct pnp_dev *dev = to_pnp_dev(dmdev); - pnp_free_option(dev->independent); - pnp_free_option(dev->dependent); pnp_free_ids(dev); - kfree(dev->res); + pnp_free_resources(dev); + pnp_free_options(dev); kfree(dev); } @@ -119,12 +133,8 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid if (!dev) return NULL; - dev->res = kzalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); - if (!dev->res) { - kfree(dev); - return NULL; - } - + INIT_LIST_HEAD(&dev->resources); + INIT_LIST_HEAD(&dev->options); dev->protocol = protocol; dev->number = id; dev->dma_mask = DMA_24BIT_MASK; @@ -140,7 +150,6 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid dev_id = pnp_add_id(dev, pnpid); if (!dev_id) { - kfree(dev->res); kfree(dev); return NULL; } diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 5695a79f3a52..a876ecf7028c 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -3,6 +3,8 @@ * * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela * Copyright 2002 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -53,11 +55,13 @@ static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt, ...) static void pnp_print_port(pnp_info_buffer_t * buffer, char *space, struct pnp_port *port) { - pnp_printf(buffer, - "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n", - space, port->min, port->max, - port->align ? (port->align - 1) : 0, port->size, - port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10); + pnp_printf(buffer, "%sport %#llx-%#llx, align %#llx, size %#llx, " + "%i-bit address decoding\n", space, + (unsigned long long) port->min, + (unsigned long long) port->max, + port->align ? ((unsigned long long) port->align - 1) : 0, + (unsigned long long) port->size, + port->flags & IORESOURCE_IO_16BIT_ADDR ? 16 : 10); } static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, @@ -67,7 +71,7 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, pnp_printf(buffer, "%sirq ", space); for (i = 0; i < PNP_IRQ_NR; i++) - if (test_bit(i, irq->map)) { + if (test_bit(i, irq->map.bits)) { if (!first) { pnp_printf(buffer, ","); } else { @@ -78,7 +82,7 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, else pnp_printf(buffer, "%i", i); } - if (bitmap_empty(irq->map, PNP_IRQ_NR)) + if (bitmap_empty(irq->map.bits, PNP_IRQ_NR)) pnp_printf(buffer, ""); if (irq->flags & IORESOURCE_IRQ_HIGHEDGE) pnp_printf(buffer, " High-Edge"); @@ -88,6 +92,8 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space, pnp_printf(buffer, " High-Level"); if (irq->flags & IORESOURCE_IRQ_LOWLEVEL) pnp_printf(buffer, " Low-Level"); + if (irq->flags & IORESOURCE_IRQ_OPTIONAL) + pnp_printf(buffer, " (optional)"); pnp_printf(buffer, "\n"); } @@ -148,8 +154,11 @@ static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space, { char *s; - pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x", - space, mem->min, mem->max, mem->align, mem->size); + pnp_printf(buffer, "%sMemory %#llx-%#llx, align %#llx, size %#llx", + space, (unsigned long long) mem->min, + (unsigned long long) mem->max, + (unsigned long long) mem->align, + (unsigned long long) mem->size); if (mem->flags & IORESOURCE_MEM_WRITEABLE) pnp_printf(buffer, ", writeable"); if (mem->flags & IORESOURCE_MEM_CACHEABLE) @@ -177,65 +186,58 @@ static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space, } static void pnp_print_option(pnp_info_buffer_t * buffer, char *space, - struct pnp_option *option, int dep) + struct pnp_option *option) { - char *s; - struct pnp_port *port; - struct pnp_irq *irq; - struct pnp_dma *dma; - struct pnp_mem *mem; - - if (dep) { - switch (option->priority) { - case PNP_RES_PRIORITY_PREFERRED: - s = "preferred"; - break; - case PNP_RES_PRIORITY_ACCEPTABLE: - s = "acceptable"; - break; - case PNP_RES_PRIORITY_FUNCTIONAL: - s = "functional"; - break; - default: - s = "invalid"; - } - pnp_printf(buffer, "Dependent: %02i - Priority %s\n", dep, s); + switch (option->type) { + case IORESOURCE_IO: + pnp_print_port(buffer, space, &option->u.port); + break; + case IORESOURCE_MEM: + pnp_print_mem(buffer, space, &option->u.mem); + break; + case IORESOURCE_IRQ: + pnp_print_irq(buffer, space, &option->u.irq); + break; + case IORESOURCE_DMA: + pnp_print_dma(buffer, space, &option->u.dma); + break; } - - for (port = option->port; port; port = port->next) - pnp_print_port(buffer, space, port); - for (irq = option->irq; irq; irq = irq->next) - pnp_print_irq(buffer, space, irq); - for (dma = option->dma; dma; dma = dma->next) - pnp_print_dma(buffer, space, dma); - for (mem = option->mem; mem; mem = mem->next) - pnp_print_mem(buffer, space, mem); } static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *attr, char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); - struct pnp_option *independent = dev->independent; - struct pnp_option *dependent = dev->dependent; - int ret, dep = 1; + pnp_info_buffer_t *buffer; + struct pnp_option *option; + int ret, dep = 0, set = 0; + char *indent; - pnp_info_buffer_t *buffer = (pnp_info_buffer_t *) - pnp_alloc(sizeof(pnp_info_buffer_t)); + buffer = pnp_alloc(sizeof(pnp_info_buffer_t)); if (!buffer) return -ENOMEM; buffer->len = PAGE_SIZE; buffer->buffer = buf; buffer->curr = buffer->buffer; - if (independent) - pnp_print_option(buffer, "", independent, 0); - while (dependent) { - pnp_print_option(buffer, " ", dependent, dep); - dependent = dependent->next; - dep++; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option)) { + indent = " "; + if (!dep || pnp_option_set(option) != set) { + set = pnp_option_set(option); + dep = 1; + pnp_printf(buffer, "Dependent: %02i - " + "Priority %s\n", set, + pnp_option_priority_name(option)); + } + } else { + dep = 0; + indent = ""; + } + pnp_print_option(buffer, indent, option); } + ret = (buffer->curr - buf); kfree(buffer); return ret; @@ -248,79 +250,59 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); - struct resource *res; - int i, ret; pnp_info_buffer_t *buffer; + struct pnp_resource *pnp_res; + struct resource *res; + int ret; if (!dev) return -EINVAL; - buffer = (pnp_info_buffer_t *) pnp_alloc(sizeof(pnp_info_buffer_t)); + buffer = pnp_alloc(sizeof(pnp_info_buffer_t)); if (!buffer) return -ENOMEM; + buffer->len = PAGE_SIZE; buffer->buffer = buf; buffer->curr = buffer->buffer; - pnp_printf(buffer, "state = "); - if (dev->active) - pnp_printf(buffer, "active\n"); - else - pnp_printf(buffer, "disabled\n"); - - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "io"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " 0x%llx-0x%llx\n", - (unsigned long long) res->start, - (unsigned long long) res->end); - } - } - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "mem"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " 0x%llx-0x%llx\n", - (unsigned long long) res->start, - (unsigned long long) res->end); - } - } - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IRQ, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "irq"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " %lld\n", - (unsigned long long) res->start); + pnp_printf(buffer, "state = %s\n", dev->active ? "active" : "disabled"); + + list_for_each_entry(pnp_res, &dev->resources, list) { + res = &pnp_res->res; + + pnp_printf(buffer, pnp_resource_type_name(res)); + + if (res->flags & IORESOURCE_DISABLED) { + pnp_printf(buffer, " disabled\n"); + continue; } - } - for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_DMA, i)); i++) { - if (pnp_resource_valid(res)) { - pnp_printf(buffer, "dma"); - if (res->flags & IORESOURCE_DISABLED) - pnp_printf(buffer, " disabled\n"); - else - pnp_printf(buffer, " %lld\n", - (unsigned long long) res->start); + + switch (pnp_resource_type(res)) { + case IORESOURCE_IO: + case IORESOURCE_MEM: + pnp_printf(buffer, " %#llx-%#llx\n", + (unsigned long long) res->start, + (unsigned long long) res->end); + break; + case IORESOURCE_IRQ: + case IORESOURCE_DMA: + pnp_printf(buffer, " %lld\n", + (unsigned long long) res->start); + break; } } + ret = (buffer->curr - buf); kfree(buffer); return ret; } -static ssize_t -pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, - const char *ubuf, size_t count) +static ssize_t pnp_set_current_resources(struct device *dmdev, + struct device_attribute *attr, + const char *ubuf, size_t count) { struct pnp_dev *dev = to_pnp_dev(dmdev); - struct pnp_resource *pnp_res; char *buf = (void *)ubuf; int retval = 0; resource_size_t start, end; @@ -368,7 +350,6 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, goto done; } if (!strnicmp(buf, "set", 3)) { - int nport = 0, nmem = 0, nirq = 0, ndma = 0; if (dev->active) goto done; buf += 3; @@ -391,10 +372,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, end = simple_strtoul(buf, &buf, 0); } else end = start; - pnp_res = pnp_add_io_resource(dev, start, end, - 0); - if (pnp_res) - pnp_res->index = nport++; + pnp_add_io_resource(dev, start, end, 0); continue; } if (!strnicmp(buf, "mem", 3)) { @@ -411,10 +389,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, end = simple_strtoul(buf, &buf, 0); } else end = start; - pnp_res = pnp_add_mem_resource(dev, start, end, - 0); - if (pnp_res) - pnp_res->index = nmem++; + pnp_add_mem_resource(dev, start, end, 0); continue; } if (!strnicmp(buf, "irq", 3)) { @@ -422,9 +397,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, while (isspace(*buf)) ++buf; start = simple_strtoul(buf, &buf, 0); - pnp_res = pnp_add_irq_resource(dev, start, 0); - if (pnp_res) - pnp_res->index = nirq++; + pnp_add_irq_resource(dev, start, 0); continue; } if (!strnicmp(buf, "dma", 3)) { @@ -432,9 +405,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, while (isspace(*buf)) ++buf; start = simple_strtoul(buf, &buf, 0); - pnp_res = pnp_add_dma_resource(dev, start, 0); - if (pnp_res) - pnp_res->index = ndma++; + pnp_add_dma_resource(dev, start, 0); continue; } break; diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index f1bccdbdeb08..101a835e8759 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -429,154 +429,135 @@ static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card, * Add IRQ resource to resources list. */ static void __init isapnp_parse_irq_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[3]; - struct pnp_irq *irq; unsigned long bits; + pnp_irq_mask_t map; + unsigned char flags = IORESOURCE_IRQ_HIGHEDGE; isapnp_peek(tmp, size); - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; bits = (tmp[1] << 8) | tmp[0]; - bitmap_copy(irq->map, &bits, 16); + + bitmap_zero(map.bits, PNP_IRQ_NR); + bitmap_copy(map.bits, &bits, 16); + if (size > 2) - irq->flags = tmp[2]; - else - irq->flags = IORESOURCE_IRQ_HIGHEDGE; - pnp_register_irq_resource(dev, option, irq); + flags = tmp[2]; + + pnp_register_irq_resource(dev, option_flags, &map, flags); } /* * Add DMA resource to resources list. */ static void __init isapnp_parse_dma_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[2]; - struct pnp_dma *dma; isapnp_peek(tmp, size); - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) - return; - dma->map = tmp[0]; - dma->flags = tmp[1]; - pnp_register_dma_resource(dev, option, dma); + pnp_register_dma_resource(dev, option_flags, tmp[0], tmp[1]); } /* * Add port resource to resources list. */ static void __init isapnp_parse_port_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[7]; - struct pnp_port *port; + resource_size_t min, max, align, len; + unsigned char flags; isapnp_peek(tmp, size); - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = (tmp[2] << 8) | tmp[1]; - port->max = (tmp[4] << 8) | tmp[3]; - port->align = tmp[5]; - port->size = tmp[6]; - port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0; - pnp_register_port_resource(dev, option, port); + min = (tmp[2] << 8) | tmp[1]; + max = (tmp[4] << 8) | tmp[3]; + align = tmp[5]; + len = tmp[6]; + flags = tmp[0] ? IORESOURCE_IO_16BIT_ADDR : 0; + pnp_register_port_resource(dev, option_flags, + min, max, align, len, flags); } /* * Add fixed port resource to resources list. */ static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[3]; - struct pnp_port *port; + resource_size_t base, len; isapnp_peek(tmp, size); - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = (tmp[1] << 8) | tmp[0]; - port->size = tmp[2]; - port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(dev, option, port); + base = (tmp[1] << 8) | tmp[0]; + len = tmp[2]; + pnp_register_port_resource(dev, option_flags, base, base, 0, len, + IORESOURCE_IO_FIXED); } /* * Add memory resource to resources list. */ static void __init isapnp_parse_mem_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[9]; - struct pnp_mem *mem; + resource_size_t min, max, align, len; + unsigned char flags; isapnp_peek(tmp, size); - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = ((tmp[2] << 8) | tmp[1]) << 8; - mem->max = ((tmp[4] << 8) | tmp[3]) << 8; - mem->align = (tmp[6] << 8) | tmp[5]; - mem->size = ((tmp[8] << 8) | tmp[7]) << 8; - mem->flags = tmp[0]; - pnp_register_mem_resource(dev, option, mem); + min = ((tmp[2] << 8) | tmp[1]) << 8; + max = ((tmp[4] << 8) | tmp[3]) << 8; + align = (tmp[6] << 8) | tmp[5]; + len = ((tmp[8] << 8) | tmp[7]) << 8; + flags = tmp[0]; + pnp_register_mem_resource(dev, option_flags, + min, max, align, len, flags); } /* * Add 32-bit memory resource to resources list. */ static void __init isapnp_parse_mem32_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[17]; - struct pnp_mem *mem; + resource_size_t min, max, align, len; + unsigned char flags; isapnp_peek(tmp, size); - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; - mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; - mem->align = - (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; - mem->size = - (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; - mem->flags = tmp[0]; - pnp_register_mem_resource(dev, option, mem); + min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; + max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; + align = (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; + len = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; + flags = tmp[0]; + pnp_register_mem_resource(dev, option_flags, + min, max, align, len, flags); } /* * Add 32-bit fixed memory resource to resources list. */ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, int size) { unsigned char tmp[9]; - struct pnp_mem *mem; + resource_size_t base, len; + unsigned char flags; isapnp_peek(tmp, size); - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = - (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; - mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; - mem->align = 0; - mem->flags = tmp[0]; - pnp_register_mem_resource(dev, option, mem); + base = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; + len = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; + flags = tmp[0]; + pnp_register_mem_resource(dev, option_flags, base, base, 0, len, flags); } /* @@ -604,20 +585,16 @@ isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size) static int __init isapnp_create_device(struct pnp_card *card, unsigned short size) { - int number = 0, skip = 0, priority = 0, compat = 0; + int number = 0, skip = 0, priority, compat = 0; unsigned char type, tmp[17]; - struct pnp_option *option; + unsigned int option_flags; struct pnp_dev *dev; u32 eisa_id; char id[8]; if ((dev = isapnp_parse_device(card, size, number++)) == NULL) return 1; - option = pnp_register_independent_option(dev); - if (!option) { - kfree(dev); - return 1; - } + option_flags = 0; pnp_add_card_device(card, dev); while (1) { @@ -634,16 +611,11 @@ static int __init isapnp_create_device(struct pnp_card *card, return 1; size = 0; skip = 0; - option = pnp_register_independent_option(dev); - if (!option) { - kfree(dev); - return 1; - } + option_flags = 0; pnp_add_card_device(card, dev); } else { skip = 1; } - priority = 0; compat = 0; break; case _STAG_COMPATDEVID: @@ -660,44 +632,42 @@ static int __init isapnp_create_device(struct pnp_card *card, case _STAG_IRQ: if (size < 2 || size > 3) goto __skip; - isapnp_parse_irq_resource(dev, option, size); + isapnp_parse_irq_resource(dev, option_flags, size); size = 0; break; case _STAG_DMA: if (size != 2) goto __skip; - isapnp_parse_dma_resource(dev, option, size); + isapnp_parse_dma_resource(dev, option_flags, size); size = 0; break; case _STAG_STARTDEP: if (size > 1) goto __skip; - priority = 0x100 | PNP_RES_PRIORITY_ACCEPTABLE; + priority = PNP_RES_PRIORITY_ACCEPTABLE; if (size > 0) { isapnp_peek(tmp, size); - priority = 0x100 | tmp[0]; + priority = tmp[0]; size = 0; } - option = pnp_register_dependent_option(dev, priority); - if (!option) - return 1; + option_flags = pnp_new_dependent_set(dev, priority); break; case _STAG_ENDDEP: if (size != 0) goto __skip; - priority = 0; - dev_dbg(&dev->dev, "end dependent options\n"); + option_flags = 0; break; case _STAG_IOPORT: if (size != 7) goto __skip; - isapnp_parse_port_resource(dev, option, size); + isapnp_parse_port_resource(dev, option_flags, size); size = 0; break; case _STAG_FIXEDIO: if (size != 3) goto __skip; - isapnp_parse_fixed_port_resource(dev, option, size); + isapnp_parse_fixed_port_resource(dev, option_flags, + size); size = 0; break; case _STAG_VENDOR: @@ -705,7 +675,7 @@ static int __init isapnp_create_device(struct pnp_card *card, case _LTAG_MEMRANGE: if (size != 9) goto __skip; - isapnp_parse_mem_resource(dev, option, size); + isapnp_parse_mem_resource(dev, option_flags, size); size = 0; break; case _LTAG_ANSISTR: @@ -720,13 +690,14 @@ static int __init isapnp_create_device(struct pnp_card *card, case _LTAG_MEM32RANGE: if (size != 17) goto __skip; - isapnp_parse_mem32_resource(dev, option, size); + isapnp_parse_mem32_resource(dev, option_flags, size); size = 0; break; case _LTAG_FIXEDMEM32RANGE: if (size != 9) goto __skip; - isapnp_parse_fixed_mem32_resource(dev, option, size); + isapnp_parse_fixed_mem32_resource(dev, option_flags, + size); size = 0; break; case _STAG_END: @@ -928,7 +899,6 @@ EXPORT_SYMBOL(isapnp_write_byte); static int isapnp_get_resources(struct pnp_dev *dev) { - struct pnp_resource *pnp_res; int i, ret; dev_dbg(&dev->dev, "get resources\n"); @@ -940,35 +910,23 @@ static int isapnp_get_resources(struct pnp_dev *dev) for (i = 0; i < ISAPNP_MAX_PORT; i++) { ret = isapnp_read_word(ISAPNP_CFG_PORT + (i << 1)); - if (ret) { - pnp_res = pnp_add_io_resource(dev, ret, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_io_resource(dev, ret, ret, + ret == 0 ? IORESOURCE_DISABLED : 0); } for (i = 0; i < ISAPNP_MAX_MEM; i++) { ret = isapnp_read_word(ISAPNP_CFG_MEM + (i << 3)) << 8; - if (ret) { - pnp_res = pnp_add_mem_resource(dev, ret, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_mem_resource(dev, ret, ret, + ret == 0 ? IORESOURCE_DISABLED : 0); } for (i = 0; i < ISAPNP_MAX_IRQ; i++) { ret = isapnp_read_word(ISAPNP_CFG_IRQ + (i << 1)) >> 8; - if (ret) { - pnp_res = pnp_add_irq_resource(dev, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_irq_resource(dev, ret, + ret == 0 ? IORESOURCE_DISABLED : 0); } for (i = 0; i < ISAPNP_MAX_DMA; i++) { ret = isapnp_read_byte(ISAPNP_CFG_DMA + i); - if (ret != 4) { - pnp_res = pnp_add_dma_resource(dev, ret, 0); - if (pnp_res) - pnp_res->index = i; - } + pnp_add_dma_resource(dev, ret, + ret == 4 ? IORESOURCE_DISABLED : 0); } __end: @@ -978,62 +936,45 @@ __end: static int isapnp_set_resources(struct pnp_dev *dev) { - struct pnp_resource *pnp_res; struct resource *res; - int tmp, index; + int tmp; dev_dbg(&dev->dev, "set resources\n"); isapnp_cfg_begin(dev->card->number, dev->number); dev->active = 1; for (tmp = 0; tmp < ISAPNP_MAX_PORT; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { - index = pnp_res->index; + res = pnp_get_resource(dev, IORESOURCE_IO, tmp); + if (pnp_resource_enabled(res)) { dev_dbg(&dev->dev, " set io %d to %#llx\n", - index, (unsigned long long) res->start); - isapnp_write_word(ISAPNP_CFG_PORT + (index << 1), + tmp, (unsigned long long) res->start); + isapnp_write_word(ISAPNP_CFG_PORT + (tmp << 1), res->start); } } for (tmp = 0; tmp < ISAPNP_MAX_IRQ; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { + res = pnp_get_resource(dev, IORESOURCE_IRQ, tmp); + if (pnp_resource_enabled(res)) { int irq = res->start; if (irq == 2) irq = 9; - index = pnp_res->index; - dev_dbg(&dev->dev, " set irq %d to %d\n", index, irq); - isapnp_write_byte(ISAPNP_CFG_IRQ + (index << 1), irq); + dev_dbg(&dev->dev, " set irq %d to %d\n", tmp, irq); + isapnp_write_byte(ISAPNP_CFG_IRQ + (tmp << 1), irq); } } for (tmp = 0; tmp < ISAPNP_MAX_DMA; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { - index = pnp_res->index; + res = pnp_get_resource(dev, IORESOURCE_DMA, tmp); + if (pnp_resource_enabled(res)) { dev_dbg(&dev->dev, " set dma %d to %lld\n", - index, (unsigned long long) res->start); - isapnp_write_byte(ISAPNP_CFG_DMA + index, res->start); + tmp, (unsigned long long) res->start); + isapnp_write_byte(ISAPNP_CFG_DMA + tmp, res->start); } } for (tmp = 0; tmp < ISAPNP_MAX_MEM; tmp++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, tmp); - if (!pnp_res) - continue; - res = &pnp_res->res; - if (pnp_resource_valid(res)) { - index = pnp_res->index; + res = pnp_get_resource(dev, IORESOURCE_MEM, tmp); + if (pnp_resource_enabled(res)) { dev_dbg(&dev->dev, " set mem %d to %#llx\n", - index, (unsigned long long) res->start); - isapnp_write_word(ISAPNP_CFG_MEM + (index << 3), + tmp, (unsigned long long) res->start); + isapnp_write_word(ISAPNP_CFG_MEM + (tmp << 3), (res->start >> 8) & 0xffff); } } diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index bea0914ff947..b526eaad3f6c 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -3,6 +3,8 @@ * * based on isapnp.c resource management (c) Jaroslav Kysela * Copyright 2003 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -19,82 +21,64 @@ DEFINE_MUTEX(pnp_res_mutex); static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; - - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many I/O port resources\n"); - /* pretend we were successful so at least the manager won't try again */ - return 1; - } - - res = &pnp_res->res; + struct resource *res, local_res; - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_IO, idx); + if (res) { dev_dbg(&dev->dev, " io %d already set to %#llx-%#llx " "flags %#lx\n", idx, (unsigned long long) res->start, (unsigned long long) res->end, res->flags); - return 1; + return 0; } - /* set the initial values */ - pnp_res->index = idx; - res->flags |= rule->flags | IORESOURCE_IO; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = 0; + res->end = 0; if (!rule->size) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " io %d disabled\n", idx); - return 1; /* skip disabled resource requests */ + goto __add; } res->start = rule->min; res->end = res->start + rule->size - 1; - /* run through until pnp_check_port is happy */ while (!pnp_check_port(dev, res)) { res->start += rule->align; res->end = res->start + rule->size - 1; if (res->start > rule->max || !rule->align) { - dev_dbg(&dev->dev, " couldn't assign io %d\n", idx); - return 0; + dev_dbg(&dev->dev, " couldn't assign io %d " + "(min %#llx max %#llx)\n", idx, + (unsigned long long) rule->min, + (unsigned long long) rule->max); + return -EBUSY; } } - dev_dbg(&dev->dev, " assign io %d %#llx-%#llx\n", idx, - (unsigned long long) res->start, (unsigned long long) res->end); - return 1; + +__add: + pnp_add_io_resource(dev, res->start, res->end, res->flags); + return 0; } static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; - - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many memory resources\n"); - /* pretend we were successful so at least the manager won't try again */ - return 1; - } + struct resource *res, local_res; - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_MEM, idx); + if (res) { dev_dbg(&dev->dev, " mem %d already set to %#llx-%#llx " "flags %#lx\n", idx, (unsigned long long) res->start, (unsigned long long) res->end, res->flags); - return 1; + return 0; } - /* set the initial values */ - pnp_res->index = idx; - res->flags |= rule->flags | IORESOURCE_MEM; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = 0; + res->end = 0; - /* convert pnp flags to standard Linux flags */ if (!(rule->flags & IORESOURCE_MEM_WRITEABLE)) res->flags |= IORESOURCE_READONLY; if (rule->flags & IORESOURCE_MEM_CACHEABLE) @@ -107,30 +91,32 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) if (!rule->size) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " mem %d disabled\n", idx); - return 1; /* skip disabled resource requests */ + goto __add; } res->start = rule->min; res->end = res->start + rule->size - 1; - /* run through until pnp_check_mem is happy */ while (!pnp_check_mem(dev, res)) { res->start += rule->align; res->end = res->start + rule->size - 1; if (res->start > rule->max || !rule->align) { - dev_dbg(&dev->dev, " couldn't assign mem %d\n", idx); - return 0; + dev_dbg(&dev->dev, " couldn't assign mem %d " + "(min %#llx max %#llx)\n", idx, + (unsigned long long) rule->min, + (unsigned long long) rule->max); + return -EBUSY; } } - dev_dbg(&dev->dev, " assign mem %d %#llx-%#llx\n", idx, - (unsigned long long) res->start, (unsigned long long) res->end); - return 1; + +__add: + pnp_add_mem_resource(dev, res->start, res->end, res->flags); + return 0; } static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; + struct resource *res, local_res; int i; /* IRQ priority: this table is good for i386 */ @@ -138,59 +124,57 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx) 5, 10, 11, 12, 9, 14, 15, 7, 3, 4, 13, 0, 1, 6, 8, 2 }; - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many IRQ resources\n"); - /* pretend we were successful so at least the manager won't try again */ - return 1; - } - - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_IRQ, idx); + if (res) { dev_dbg(&dev->dev, " irq %d already set to %d flags %#lx\n", idx, (int) res->start, res->flags); - return 1; + return 0; } - /* set the initial values */ - pnp_res->index = idx; - res->flags |= rule->flags | IORESOURCE_IRQ; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = -1; + res->end = -1; - if (bitmap_empty(rule->map, PNP_IRQ_NR)) { + if (bitmap_empty(rule->map.bits, PNP_IRQ_NR)) { res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " irq %d disabled\n", idx); - return 1; /* skip disabled resource requests */ + goto __add; } /* TBD: need check for >16 IRQ */ - res->start = find_next_bit(rule->map, PNP_IRQ_NR, 16); + res->start = find_next_bit(rule->map.bits, PNP_IRQ_NR, 16); if (res->start < PNP_IRQ_NR) { res->end = res->start; - dev_dbg(&dev->dev, " assign irq %d %d\n", idx, - (int) res->start); - return 1; + goto __add; } for (i = 0; i < 16; i++) { - if (test_bit(xtab[i], rule->map)) { + if (test_bit(xtab[i], rule->map.bits)) { res->start = res->end = xtab[i]; - if (pnp_check_irq(dev, res)) { - dev_dbg(&dev->dev, " assign irq %d %d\n", idx, - (int) res->start); - return 1; - } + if (pnp_check_irq(dev, res)) + goto __add; } } + + if (rule->flags & IORESOURCE_IRQ_OPTIONAL) { + res->start = -1; + res->end = -1; + res->flags |= IORESOURCE_DISABLED; + dev_dbg(&dev->dev, " irq %d disabled (optional)\n", idx); + goto __add; + } + dev_dbg(&dev->dev, " couldn't assign irq %d\n", idx); + return -EBUSY; + +__add: + pnp_add_irq_resource(dev, res->start, res->flags); return 0; } -static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) +static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) { - struct pnp_resource *pnp_res; - struct resource *res; + struct resource *res, local_res; int i; /* DMA priority: this table is good for i386 */ @@ -198,231 +182,99 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) 1, 3, 5, 6, 7, 0, 2, 4 }; - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, idx); - if (!pnp_res) { - dev_err(&dev->dev, "too many DMA resources\n"); - return; - } - - res = &pnp_res->res; - - /* check if this resource has been manually set, if so skip */ - if (!(res->flags & IORESOURCE_AUTO)) { + res = pnp_get_resource(dev, IORESOURCE_DMA, idx); + if (res) { dev_dbg(&dev->dev, " dma %d already set to %d flags %#lx\n", idx, (int) res->start, res->flags); - return; + return 0; } - /* set the initial values */ - pnp_res->index = idx; - res->flags |= rule->flags | IORESOURCE_DMA; - res->flags &= ~IORESOURCE_UNSET; + res = &local_res; + res->flags = rule->flags | IORESOURCE_AUTO; + res->start = -1; + res->end = -1; for (i = 0; i < 8; i++) { if (rule->map & (1 << xtab[i])) { res->start = res->end = xtab[i]; - if (pnp_check_dma(dev, res)) { - dev_dbg(&dev->dev, " assign dma %d %d\n", idx, - (int) res->start); - return; - } + if (pnp_check_dma(dev, res)) + goto __add; } } #ifdef MAX_DMA_CHANNELS res->start = res->end = MAX_DMA_CHANNELS; #endif - res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; + res->flags |= IORESOURCE_DISABLED; dev_dbg(&dev->dev, " disable dma %d\n", idx); -} - -void pnp_init_resource(struct resource *res) -{ - unsigned long type; - - type = res->flags & (IORESOURCE_IO | IORESOURCE_MEM | - IORESOURCE_IRQ | IORESOURCE_DMA); - res->name = NULL; - res->flags = type | IORESOURCE_AUTO | IORESOURCE_UNSET; - if (type == IORESOURCE_IRQ || type == IORESOURCE_DMA) { - res->start = -1; - res->end = -1; - } else { - res->start = 0; - res->end = 0; - } +__add: + pnp_add_dma_resource(dev, res->start, res->flags); + return 0; } -/** - * pnp_init_resources - Resets a resource table to default values. - * @table: pointer to the desired resource table - */ void pnp_init_resources(struct pnp_dev *dev) { - struct resource *res; - int idx; - - for (idx = 0; idx < PNP_MAX_IRQ; idx++) { - res = &dev->res->irq[idx].res; - res->flags = IORESOURCE_IRQ; - pnp_init_resource(res); - } - for (idx = 0; idx < PNP_MAX_DMA; idx++) { - res = &dev->res->dma[idx].res; - res->flags = IORESOURCE_DMA; - pnp_init_resource(res); - } - for (idx = 0; idx < PNP_MAX_PORT; idx++) { - res = &dev->res->port[idx].res; - res->flags = IORESOURCE_IO; - pnp_init_resource(res); - } - for (idx = 0; idx < PNP_MAX_MEM; idx++) { - res = &dev->res->mem[idx].res; - res->flags = IORESOURCE_MEM; - pnp_init_resource(res); - } + pnp_free_resources(dev); } -/** - * pnp_clean_resources - clears resources that were not manually set - * @res: the resources to clean - */ static void pnp_clean_resource_table(struct pnp_dev *dev) { - struct resource *res; - int idx; - - for (idx = 0; idx < PNP_MAX_IRQ; idx++) { - res = &dev->res->irq[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_IRQ; - pnp_init_resource(res); - } - } - for (idx = 0; idx < PNP_MAX_DMA; idx++) { - res = &dev->res->dma[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_DMA; - pnp_init_resource(res); - } - } - for (idx = 0; idx < PNP_MAX_PORT; idx++) { - res = &dev->res->port[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_IO; - pnp_init_resource(res); - } - } - for (idx = 0; idx < PNP_MAX_MEM; idx++) { - res = &dev->res->mem[idx].res; - if (res->flags & IORESOURCE_AUTO) { - res->flags = IORESOURCE_MEM; - pnp_init_resource(res); - } + struct pnp_resource *pnp_res, *tmp; + + list_for_each_entry_safe(pnp_res, tmp, &dev->resources, list) { + if (pnp_res->res.flags & IORESOURCE_AUTO) + pnp_free_resource(pnp_res); } } /** * pnp_assign_resources - assigns resources to the device based on the specified dependent number * @dev: pointer to the desired device - * @depnum: the dependent function number - * - * Only set depnum to 0 if the device does not have dependent options. + * @set: the dependent function number */ -static int pnp_assign_resources(struct pnp_dev *dev, int depnum) +static int pnp_assign_resources(struct pnp_dev *dev, int set) { - struct pnp_port *port; - struct pnp_mem *mem; - struct pnp_irq *irq; - struct pnp_dma *dma; + struct pnp_option *option; int nport = 0, nmem = 0, nirq = 0, ndma = 0; + int ret = 0; - if (!pnp_can_configure(dev)) - return -ENODEV; - - dbg_pnp_show_resources(dev, "before pnp_assign_resources"); + dev_dbg(&dev->dev, "pnp_assign_resources, try dependent set %d\n", set); mutex_lock(&pnp_res_mutex); pnp_clean_resource_table(dev); - if (dev->independent) { - dev_dbg(&dev->dev, "assigning independent options\n"); - port = dev->independent->port; - mem = dev->independent->mem; - irq = dev->independent->irq; - dma = dev->independent->dma; - while (port) { - if (!pnp_assign_port(dev, port, nport)) - goto fail; - nport++; - port = port->next; - } - while (mem) { - if (!pnp_assign_mem(dev, mem, nmem)) - goto fail; - nmem++; - mem = mem->next; - } - while (irq) { - if (!pnp_assign_irq(dev, irq, nirq)) - goto fail; - nirq++; - irq = irq->next; - } - while (dma) { - pnp_assign_dma(dev, dma, ndma); - ndma++; - dma = dma->next; - } - } - if (depnum) { - struct pnp_option *dep; - int i; - - dev_dbg(&dev->dev, "assigning dependent option %d\n", depnum); - for (i = 1, dep = dev->dependent; i < depnum; - i++, dep = dep->next) - if (!dep) - goto fail; - port = dep->port; - mem = dep->mem; - irq = dep->irq; - dma = dep->dma; - while (port) { - if (!pnp_assign_port(dev, port, nport)) - goto fail; - nport++; - port = port->next; - } - while (mem) { - if (!pnp_assign_mem(dev, mem, nmem)) - goto fail; - nmem++; - mem = mem->next; - } - while (irq) { - if (!pnp_assign_irq(dev, irq, nirq)) - goto fail; - nirq++; - irq = irq->next; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option) && + pnp_option_set(option) != set) + continue; + + switch (option->type) { + case IORESOURCE_IO: + ret = pnp_assign_port(dev, &option->u.port, nport++); + break; + case IORESOURCE_MEM: + ret = pnp_assign_mem(dev, &option->u.mem, nmem++); + break; + case IORESOURCE_IRQ: + ret = pnp_assign_irq(dev, &option->u.irq, nirq++); + break; + case IORESOURCE_DMA: + ret = pnp_assign_dma(dev, &option->u.dma, ndma++); + break; + default: + ret = -EINVAL; + break; } - while (dma) { - pnp_assign_dma(dev, dma, ndma); - ndma++; - dma = dma->next; - } - } else if (dev->dependent) - goto fail; - - mutex_unlock(&pnp_res_mutex); - dbg_pnp_show_resources(dev, "after pnp_assign_resources"); - return 1; + if (ret < 0) + break; + } -fail: - pnp_clean_resource_table(dev); mutex_unlock(&pnp_res_mutex); - dbg_pnp_show_resources(dev, "after pnp_assign_resources (failed)"); - return 0; + if (ret < 0) { + dev_dbg(&dev->dev, "pnp_assign_resources failed (%d)\n", ret); + pnp_clean_resource_table(dev); + } else + dbg_pnp_show_resources(dev, "pnp_assign_resources succeeded"); + return ret; } /** @@ -431,29 +283,25 @@ fail: */ int pnp_auto_config_dev(struct pnp_dev *dev) { - struct pnp_option *dep; - int i = 1; + int i, ret; if (!pnp_can_configure(dev)) { dev_dbg(&dev->dev, "configuration not supported\n"); return -ENODEV; } - if (!dev->dependent) { - if (pnp_assign_resources(dev, 0)) + ret = pnp_assign_resources(dev, 0); + if (ret == 0) + return 0; + + for (i = 1; i < dev->num_dependent_sets; i++) { + ret = pnp_assign_resources(dev, i); + if (ret == 0) return 0; - } else { - dep = dev->dependent; - do { - if (pnp_assign_resources(dev, i)) - return 0; - dep = dep->next; - i++; - } while (dep); } dev_err(&dev->dev, "unable to assign resources\n"); - return -EBUSY; + return ret; } /** diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 50902773beaf..c1b9ea34977b 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -117,9 +117,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) { int power_state; - power_state = acpi_pm_device_sleep_state(&dev->dev, - device_may_wakeup(&dev->dev), - NULL); + power_state = acpi_pm_device_sleep_state(&dev->dev, NULL); if (power_state < 0) power_state = (state.event == PM_EVENT_ON) ? ACPI_STATE_D0 : ACPI_STATE_D3; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 46c791adb894..d7e9f2152df0 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -3,6 +3,8 @@ * * Copyright (c) 2004 Matthieu Castet * Copyright (c) 2004 Li Shaohua + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -98,8 +100,10 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, int irq, flags; int p, t; - if (!valid_IRQ(gsi)) + if (!valid_IRQ(gsi)) { + pnp_add_irq_resource(dev, gsi, IORESOURCE_DISABLED); return; + } /* * in IO-APIC mode, use overrided attribute. Two reasons: @@ -178,13 +182,68 @@ static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 start, u64 end = start + len - 1; if (io_decode == ACPI_DECODE_16) - flags |= PNP_PORT_FLAG_16BITADDR; + flags |= IORESOURCE_IO_16BIT_ADDR; if (len == 0 || end >= 0x10003) flags |= IORESOURCE_DISABLED; pnp_add_io_resource(dev, start, end, flags); } +/* + * Device CSRs that do not appear in PCI config space should be described + * via ACPI. This would normally be done with Address Space Descriptors + * marked as "consumer-only," but old versions of Windows and Linux ignore + * the producer/consumer flag, so HP invented a vendor-defined resource to + * describe the location and size of CSR space. + */ +static struct acpi_vendor_uuid hp_ccsr_uuid = { + .subtype = 2, + .data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a, + 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad }, +}; + +static int vendor_resource_matches(struct pnp_dev *dev, + struct acpi_resource_vendor_typed *vendor, + struct acpi_vendor_uuid *match, + int expected_len) +{ + int uuid_len = sizeof(vendor->uuid); + u8 uuid_subtype = vendor->uuid_subtype; + u8 *uuid = vendor->uuid; + int actual_len; + + /* byte_length includes uuid_subtype and uuid */ + actual_len = vendor->byte_length - uuid_len - 1; + + if (uuid_subtype == match->subtype && + uuid_len == sizeof(match->data) && + memcmp(uuid, match->data, uuid_len) == 0) { + if (expected_len && expected_len != actual_len) { + dev_err(&dev->dev, "wrong vendor descriptor size; " + "expected %d, found %d bytes\n", + expected_len, actual_len); + return 0; + } + + return 1; + } + + return 0; +} + +static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev, + struct acpi_resource_vendor_typed *vendor) +{ + if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) { + u64 start, length; + + memcpy(&start, vendor->byte_data, sizeof(start)); + memcpy(&length, vendor->byte_data + 8, sizeof(length)); + + pnp_add_mem_resource(dev, start, start + length - 1, 0); + } +} + static void pnpacpi_parse_allocated_memresource(struct pnp_dev *dev, u64 start, u64 len, int write_protect) @@ -235,6 +294,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, struct acpi_resource_dma *dma; struct acpi_resource_io *io; struct acpi_resource_fixed_io *fixed_io; + struct acpi_resource_vendor_typed *vendor_typed; struct acpi_resource_memory24 *memory24; struct acpi_resource_memory32 *memory32; struct acpi_resource_fixed_memory32 *fixed_memory32; @@ -248,24 +308,39 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, * _CRS, but some firmware violates this, so parse them all. */ irq = &res->data.irq; - for (i = 0; i < irq->interrupt_count; i++) { - pnpacpi_parse_allocated_irqresource(dev, - irq->interrupts[i], - irq->triggering, - irq->polarity, - irq->sharable); + if (irq->interrupt_count == 0) + pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); + else { + for (i = 0; i < irq->interrupt_count; i++) { + pnpacpi_parse_allocated_irqresource(dev, + irq->interrupts[i], + irq->triggering, + irq->polarity, + irq->sharable); + } + + /* + * The IRQ encoder puts a single interrupt in each + * descriptor, so if a _CRS descriptor has more than + * one interrupt, we won't be able to re-encode it. + */ + if (pnp_can_write(dev) && irq->interrupt_count > 1) { + dev_warn(&dev->dev, "multiple interrupts in " + "_CRS descriptor; configuration can't " + "be changed\n"); + dev->capabilities &= ~PNP_WRITE; + } } break; case ACPI_RESOURCE_TYPE_DMA: dma = &res->data.dma; - if (dma->channel_count > 0) { + if (dma->channel_count > 0 && dma->channels[0] != (u8) -1) flags = dma_flags(dma->type, dma->bus_master, dma->transfer); - if (dma->channels[0] == (u8) -1) - flags |= IORESOURCE_DISABLED; - pnp_add_dma_resource(dev, dma->channels[0], flags); - } + else + flags = IORESOURCE_DISABLED; + pnp_add_dma_resource(dev, dma->channels[0], flags); break; case ACPI_RESOURCE_TYPE_IO: @@ -289,6 +364,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, break; case ACPI_RESOURCE_TYPE_VENDOR: + vendor_typed = &res->data.vendor_typed; + pnpacpi_parse_allocated_vendor(dev, vendor_typed); break; case ACPI_RESOURCE_TYPE_END_TAG: @@ -331,12 +408,29 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, if (extended_irq->producer_consumer == ACPI_PRODUCER) return AE_OK; - for (i = 0; i < extended_irq->interrupt_count; i++) { - pnpacpi_parse_allocated_irqresource(dev, - extended_irq->interrupts[i], - extended_irq->triggering, - extended_irq->polarity, - extended_irq->sharable); + if (extended_irq->interrupt_count == 0) + pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); + else { + for (i = 0; i < extended_irq->interrupt_count; i++) { + pnpacpi_parse_allocated_irqresource(dev, + extended_irq->interrupts[i], + extended_irq->triggering, + extended_irq->polarity, + extended_irq->sharable); + } + + /* + * The IRQ encoder puts a single interrupt in each + * descriptor, so if a _CRS descriptor has more than + * one interrupt, we won't be able to re-encode it. + */ + if (pnp_can_write(dev) && + extended_irq->interrupt_count > 1) { + dev_warn(&dev->dev, "multiple interrupts in " + "_CRS descriptor; configuration can't " + "be changed\n"); + dev->capabilities &= ~PNP_WRITE; + } } break; @@ -373,179 +467,147 @@ int pnpacpi_parse_allocated_resource(struct pnp_dev *dev) } static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_dma *p) { int i; - struct pnp_dma *dma; + unsigned char map = 0, flags; if (p->channel_count == 0) return; - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) - return; for (i = 0; i < p->channel_count; i++) - dma->map |= 1 << p->channels[i]; - - dma->flags = dma_flags(p->type, p->bus_master, p->transfer); + map |= 1 << p->channels[i]; - pnp_register_dma_resource(dev, option, dma); + flags = dma_flags(p->type, p->bus_master, p->transfer); + pnp_register_dma_resource(dev, option_flags, map, flags); } static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_irq *p) { int i; - struct pnp_irq *irq; + pnp_irq_mask_t map; + unsigned char flags; if (p->interrupt_count == 0) return; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; + bitmap_zero(map.bits, PNP_IRQ_NR); for (i = 0; i < p->interrupt_count; i++) if (p->interrupts[i]) - __set_bit(p->interrupts[i], irq->map); - irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); + __set_bit(p->interrupts[i], map.bits); - pnp_register_irq_resource(dev, option, irq); + flags = irq_flags(p->triggering, p->polarity, p->sharable); + pnp_register_irq_resource(dev, option_flags, &map, flags); } static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_extended_irq *p) { int i; - struct pnp_irq *irq; + pnp_irq_mask_t map; + unsigned char flags; if (p->interrupt_count == 0) return; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; - for (i = 0; i < p->interrupt_count; i++) - if (p->interrupts[i]) - __set_bit(p->interrupts[i], irq->map); - irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); + bitmap_zero(map.bits, PNP_IRQ_NR); + for (i = 0; i < p->interrupt_count; i++) { + if (p->interrupts[i]) { + if (p->interrupts[i] < PNP_IRQ_NR) + __set_bit(p->interrupts[i], map.bits); + else + dev_err(&dev->dev, "ignoring IRQ %d option " + "(too large for %d entry bitmap)\n", + p->interrupts[i], PNP_IRQ_NR); + } + } - pnp_register_irq_resource(dev, option, irq); + flags = irq_flags(p->triggering, p->polarity, p->sharable); + pnp_register_irq_resource(dev, option_flags, &map, flags); } static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_io *io) { - struct pnp_port *port; + unsigned char flags = 0; if (io->address_length == 0) return; - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = io->minimum; - port->max = io->maximum; - port->align = io->alignment; - port->size = io->address_length; - port->flags = ACPI_DECODE_16 == io->io_decode ? - PNP_PORT_FLAG_16BITADDR : 0; - pnp_register_port_resource(dev, option, port); + + if (io->io_decode == ACPI_DECODE_16) + flags = IORESOURCE_IO_16BIT_ADDR; + pnp_register_port_resource(dev, option_flags, io->minimum, io->maximum, + io->alignment, io->address_length, flags); } static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_fixed_io *io) { - struct pnp_port *port; - if (io->address_length == 0) return; - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = io->address; - port->size = io->address_length; - port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(dev, option, port); + + pnp_register_port_resource(dev, option_flags, io->address, io->address, + 0, io->address_length, IORESOURCE_IO_FIXED); } static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_memory24 *p) { - struct pnp_mem *mem; + unsigned char flags = 0; if (p->address_length == 0) return; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = p->minimum; - mem->max = p->maximum; - mem->align = p->alignment; - mem->size = p->address_length; - - mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? - IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(dev, option, mem); + if (p->write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum, + p->alignment, p->address_length, flags); } static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_memory32 *p) { - struct pnp_mem *mem; + unsigned char flags = 0; if (p->address_length == 0) return; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = p->minimum; - mem->max = p->maximum; - mem->align = p->alignment; - mem->size = p->address_length; - - mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? - IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(dev, option, mem); + if (p->write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum, + p->alignment, p->address_length, flags); } static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource_fixed_memory32 *p) { - struct pnp_mem *mem; + unsigned char flags = 0; if (p->address_length == 0) return; - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = p->address; - mem->size = p->address_length; - mem->align = 0; - - mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? - IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(dev, option, mem); + if (p->write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option_flags, p->address, p->address, + 0, p->address_length, flags); } static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, - struct pnp_option *option, + unsigned int option_flags, struct acpi_resource *r) { struct acpi_resource_address64 addr, *p = &addr; acpi_status status; - struct pnp_mem *mem; - struct pnp_port *port; + unsigned char flags = 0; status = acpi_resource_to_address64(r, p); if (!ACPI_SUCCESS(status)) { @@ -558,49 +620,37 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, return; if (p->resource_type == ACPI_MEMORY_RANGE) { - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = p->minimum; - mem->size = p->address_length; - mem->align = 0; - mem->flags = (p->info.mem.write_protect == - ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE - : 0; - pnp_register_mem_resource(dev, option, mem); - } else if (p->resource_type == ACPI_IO_RANGE) { - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = p->minimum; - port->size = p->address_length; - port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(dev, option, port); - } + if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY) + flags = IORESOURCE_MEM_WRITEABLE; + pnp_register_mem_resource(dev, option_flags, p->minimum, + p->minimum, 0, p->address_length, + flags); + } else if (p->resource_type == ACPI_IO_RANGE) + pnp_register_port_resource(dev, option_flags, p->minimum, + p->minimum, 0, p->address_length, + IORESOURCE_IO_FIXED); } struct acpipnp_parse_option_s { - struct pnp_option *option; - struct pnp_option *option_independent; struct pnp_dev *dev; + unsigned int option_flags; }; static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, void *data) { - int priority = 0; + int priority; struct acpipnp_parse_option_s *parse_data = data; struct pnp_dev *dev = parse_data->dev; - struct pnp_option *option = parse_data->option; + unsigned int option_flags = parse_data->option_flags; switch (res->type) { case ACPI_RESOURCE_TYPE_IRQ: - pnpacpi_parse_irq_option(dev, option, &res->data.irq); + pnpacpi_parse_irq_option(dev, option_flags, &res->data.irq); break; case ACPI_RESOURCE_TYPE_DMA: - pnpacpi_parse_dma_option(dev, option, &res->data.dma); + pnpacpi_parse_dma_option(dev, option_flags, &res->data.dma); break; case ACPI_RESOURCE_TYPE_START_DEPENDENT: @@ -620,31 +670,19 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, priority = PNP_RES_PRIORITY_INVALID; break; } - /* TBD: Consider performance/robustness bits */ - option = pnp_register_dependent_option(dev, priority); - if (!option) - return AE_ERROR; - parse_data->option = option; + parse_data->option_flags = pnp_new_dependent_set(dev, priority); break; case ACPI_RESOURCE_TYPE_END_DEPENDENT: - /*only one EndDependentFn is allowed */ - if (!parse_data->option_independent) { - dev_warn(&dev->dev, "more than one EndDependentFn " - "in _PRS\n"); - return AE_ERROR; - } - parse_data->option = parse_data->option_independent; - parse_data->option_independent = NULL; - dev_dbg(&dev->dev, "end dependent options\n"); + parse_data->option_flags = 0; break; case ACPI_RESOURCE_TYPE_IO: - pnpacpi_parse_port_option(dev, option, &res->data.io); + pnpacpi_parse_port_option(dev, option_flags, &res->data.io); break; case ACPI_RESOURCE_TYPE_FIXED_IO: - pnpacpi_parse_fixed_port_option(dev, option, + pnpacpi_parse_fixed_port_option(dev, option_flags, &res->data.fixed_io); break; @@ -653,29 +691,31 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, break; case ACPI_RESOURCE_TYPE_MEMORY24: - pnpacpi_parse_mem24_option(dev, option, &res->data.memory24); + pnpacpi_parse_mem24_option(dev, option_flags, + &res->data.memory24); break; case ACPI_RESOURCE_TYPE_MEMORY32: - pnpacpi_parse_mem32_option(dev, option, &res->data.memory32); + pnpacpi_parse_mem32_option(dev, option_flags, + &res->data.memory32); break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: - pnpacpi_parse_fixed_mem32_option(dev, option, + pnpacpi_parse_fixed_mem32_option(dev, option_flags, &res->data.fixed_memory32); break; case ACPI_RESOURCE_TYPE_ADDRESS16: case ACPI_RESOURCE_TYPE_ADDRESS32: case ACPI_RESOURCE_TYPE_ADDRESS64: - pnpacpi_parse_address_option(dev, option, res); + pnpacpi_parse_address_option(dev, option_flags, res); break; case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: break; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: - pnpacpi_parse_ext_irq_option(dev, option, + pnpacpi_parse_ext_irq_option(dev, option_flags, &res->data.extended_irq); break; @@ -699,12 +739,9 @@ int __init pnpacpi_parse_resource_option_data(struct pnp_dev *dev) dev_dbg(&dev->dev, "parse resource options\n"); - parse_data.option = pnp_register_independent_option(dev); - if (!parse_data.option) - return -ENOMEM; - - parse_data.option_independent = parse_data.option; parse_data.dev = dev; + parse_data.option_flags = 0; + status = acpi_walk_resources(handle, METHOD_NAME__PRS, pnpacpi_option_resource, &parse_data); @@ -806,6 +843,13 @@ static void pnpacpi_encode_irq(struct pnp_dev *dev, struct acpi_resource_irq *irq = &resource->data.irq; int triggering, polarity, shareable; + if (!pnp_resource_enabled(p)) { + irq->interrupt_count = 0; + dev_dbg(&dev->dev, " encode irq (%s)\n", + p ? "disabled" : "missing"); + return; + } + decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable); irq->triggering = triggering; irq->polarity = polarity; @@ -828,6 +872,13 @@ static void pnpacpi_encode_ext_irq(struct pnp_dev *dev, struct acpi_resource_extended_irq *extended_irq = &resource->data.extended_irq; int triggering, polarity, shareable; + if (!pnp_resource_enabled(p)) { + extended_irq->interrupt_count = 0; + dev_dbg(&dev->dev, " encode extended irq (%s)\n", + p ? "disabled" : "missing"); + return; + } + decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable); extended_irq->producer_consumer = ACPI_CONSUMER; extended_irq->triggering = triggering; @@ -848,6 +899,13 @@ static void pnpacpi_encode_dma(struct pnp_dev *dev, { struct acpi_resource_dma *dma = &resource->data.dma; + if (!pnp_resource_enabled(p)) { + dma->channel_count = 0; + dev_dbg(&dev->dev, " encode dma (%s)\n", + p ? "disabled" : "missing"); + return; + } + /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */ switch (p->flags & IORESOURCE_DMA_SPEED_MASK) { case IORESOURCE_DMA_TYPEA: @@ -889,17 +947,21 @@ static void pnpacpi_encode_io(struct pnp_dev *dev, { struct acpi_resource_io *io = &resource->data.io; - /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */ - io->io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ? - ACPI_DECODE_16 : ACPI_DECODE_10; - io->minimum = p->start; - io->maximum = p->end; - io->alignment = 0; /* Correct? */ - io->address_length = p->end - p->start + 1; - - dev_dbg(&dev->dev, " encode io %#llx-%#llx decode %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, - io->io_decode); + if (pnp_resource_enabled(p)) { + /* Note: pnp_assign_port copies pnp_port->flags into p->flags */ + io->io_decode = (p->flags & IORESOURCE_IO_16BIT_ADDR) ? + ACPI_DECODE_16 : ACPI_DECODE_10; + io->minimum = p->start; + io->maximum = p->end; + io->alignment = 0; /* Correct? */ + io->address_length = p->end - p->start + 1; + } else { + io->minimum = 0; + io->address_length = 0; + } + + dev_dbg(&dev->dev, " encode io %#x-%#x decode %#x\n", io->minimum, + io->minimum + io->address_length - 1, io->io_decode); } static void pnpacpi_encode_fixed_io(struct pnp_dev *dev, @@ -908,11 +970,16 @@ static void pnpacpi_encode_fixed_io(struct pnp_dev *dev, { struct acpi_resource_fixed_io *fixed_io = &resource->data.fixed_io; - fixed_io->address = p->start; - fixed_io->address_length = p->end - p->start + 1; + if (pnp_resource_enabled(p)) { + fixed_io->address = p->start; + fixed_io->address_length = p->end - p->start + 1; + } else { + fixed_io->address = 0; + fixed_io->address_length = 0; + } - dev_dbg(&dev->dev, " encode fixed_io %#llx-%#llx\n", - (unsigned long long) p->start, (unsigned long long) p->end); + dev_dbg(&dev->dev, " encode fixed_io %#x-%#x\n", fixed_io->address, + fixed_io->address + fixed_io->address_length - 1); } static void pnpacpi_encode_mem24(struct pnp_dev *dev, @@ -921,17 +988,22 @@ static void pnpacpi_encode_mem24(struct pnp_dev *dev, { struct acpi_resource_memory24 *memory24 = &resource->data.memory24; - /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */ - memory24->write_protect = - (p->flags & IORESOURCE_MEM_WRITEABLE) ? - ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; - memory24->minimum = p->start; - memory24->maximum = p->end; - memory24->alignment = 0; - memory24->address_length = p->end - p->start + 1; - - dev_dbg(&dev->dev, " encode mem24 %#llx-%#llx write_protect %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, + if (pnp_resource_enabled(p)) { + /* Note: pnp_assign_mem copies pnp_mem->flags into p->flags */ + memory24->write_protect = p->flags & IORESOURCE_MEM_WRITEABLE ? + ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; + memory24->minimum = p->start; + memory24->maximum = p->end; + memory24->alignment = 0; + memory24->address_length = p->end - p->start + 1; + } else { + memory24->minimum = 0; + memory24->address_length = 0; + } + + dev_dbg(&dev->dev, " encode mem24 %#x-%#x write_protect %#x\n", + memory24->minimum, + memory24->minimum + memory24->address_length - 1, memory24->write_protect); } @@ -941,16 +1013,21 @@ static void pnpacpi_encode_mem32(struct pnp_dev *dev, { struct acpi_resource_memory32 *memory32 = &resource->data.memory32; - memory32->write_protect = - (p->flags & IORESOURCE_MEM_WRITEABLE) ? - ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; - memory32->minimum = p->start; - memory32->maximum = p->end; - memory32->alignment = 0; - memory32->address_length = p->end - p->start + 1; + if (pnp_resource_enabled(p)) { + memory32->write_protect = p->flags & IORESOURCE_MEM_WRITEABLE ? + ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; + memory32->minimum = p->start; + memory32->maximum = p->end; + memory32->alignment = 0; + memory32->address_length = p->end - p->start + 1; + } else { + memory32->minimum = 0; + memory32->alignment = 0; + } - dev_dbg(&dev->dev, " encode mem32 %#llx-%#llx write_protect %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, + dev_dbg(&dev->dev, " encode mem32 %#x-%#x write_protect %#x\n", + memory32->minimum, + memory32->minimum + memory32->address_length - 1, memory32->write_protect); } @@ -960,15 +1037,20 @@ static void pnpacpi_encode_fixed_mem32(struct pnp_dev *dev, { struct acpi_resource_fixed_memory32 *fixed_memory32 = &resource->data.fixed_memory32; - fixed_memory32->write_protect = - (p->flags & IORESOURCE_MEM_WRITEABLE) ? - ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; - fixed_memory32->address = p->start; - fixed_memory32->address_length = p->end - p->start + 1; + if (pnp_resource_enabled(p)) { + fixed_memory32->write_protect = + p->flags & IORESOURCE_MEM_WRITEABLE ? + ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; + fixed_memory32->address = p->start; + fixed_memory32->address_length = p->end - p->start + 1; + } else { + fixed_memory32->address = 0; + fixed_memory32->address_length = 0; + } - dev_dbg(&dev->dev, " encode fixed_mem32 %#llx-%#llx " - "write_protect %#x\n", - (unsigned long long) p->start, (unsigned long long) p->end, + dev_dbg(&dev->dev, " encode fixed_mem32 %#x-%#x write_protect %#x\n", + fixed_memory32->address, + fixed_memory32->address + fixed_memory32->address_length - 1, fixed_memory32->write_protect); } diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 5ff9a4c0447e..ca567671379e 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -216,137 +216,116 @@ len_err: static __init void pnpbios_parse_mem_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_mem *mem; - - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = ((p[5] << 8) | p[4]) << 8; - mem->max = ((p[7] << 8) | p[6]) << 8; - mem->align = (p[9] << 8) | p[8]; - mem->size = ((p[11] << 8) | p[10]) << 8; - mem->flags = p[3]; - pnp_register_mem_resource(dev, option, mem); + resource_size_t min, max, align, len; + unsigned char flags; + + min = ((p[5] << 8) | p[4]) << 8; + max = ((p[7] << 8) | p[6]) << 8; + align = (p[9] << 8) | p[8]; + len = ((p[11] << 8) | p[10]) << 8; + flags = p[3]; + pnp_register_mem_resource(dev, option_flags, min, max, align, len, + flags); } static __init void pnpbios_parse_mem32_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_mem *mem; - - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; - mem->max = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; - mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; - mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; - mem->flags = p[3]; - pnp_register_mem_resource(dev, option, mem); + resource_size_t min, max, align, len; + unsigned char flags; + + min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; + max = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; + align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; + len = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; + flags = p[3]; + pnp_register_mem_resource(dev, option_flags, min, max, align, len, + flags); } static __init void pnpbios_parse_fixed_mem32_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_mem *mem; - - mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); - if (!mem) - return; - mem->min = mem->max = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; - mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; - mem->align = 0; - mem->flags = p[3]; - pnp_register_mem_resource(dev, option, mem); + resource_size_t base, len; + unsigned char flags; + + base = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4]; + len = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; + flags = p[3]; + pnp_register_mem_resource(dev, option_flags, base, base, 0, len, flags); } static __init void pnpbios_parse_irq_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_irq *irq; unsigned long bits; + pnp_irq_mask_t map; + unsigned char flags = IORESOURCE_IRQ_HIGHEDGE; - irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); - if (!irq) - return; bits = (p[2] << 8) | p[1]; - bitmap_copy(irq->map, &bits, 16); + + bitmap_zero(map.bits, PNP_IRQ_NR); + bitmap_copy(map.bits, &bits, 16); + if (size > 2) - irq->flags = p[3]; - else - irq->flags = IORESOURCE_IRQ_HIGHEDGE; - pnp_register_irq_resource(dev, option, irq); + flags = p[3]; + + pnp_register_irq_resource(dev, option_flags, &map, flags); } static __init void pnpbios_parse_dma_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_dma *dma; - - dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); - if (!dma) - return; - dma->map = p[1]; - dma->flags = p[2]; - pnp_register_dma_resource(dev, option, dma); + pnp_register_dma_resource(dev, option_flags, p[1], p[2]); } static __init void pnpbios_parse_port_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_port *port; - - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = (p[3] << 8) | p[2]; - port->max = (p[5] << 8) | p[4]; - port->align = p[6]; - port->size = p[7]; - port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0; - pnp_register_port_resource(dev, option, port); + resource_size_t min, max, align, len; + unsigned char flags; + + min = (p[3] << 8) | p[2]; + max = (p[5] << 8) | p[4]; + align = p[6]; + len = p[7]; + flags = p[1] ? IORESOURCE_IO_16BIT_ADDR : 0; + pnp_register_port_resource(dev, option_flags, min, max, align, len, + flags); } static __init void pnpbios_parse_fixed_port_option(struct pnp_dev *dev, unsigned char *p, int size, - struct pnp_option *option) + unsigned int option_flags) { - struct pnp_port *port; - - port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); - if (!port) - return; - port->min = port->max = (p[2] << 8) | p[1]; - port->size = p[3]; - port->align = 0; - port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(dev, option, port); + resource_size_t base, len; + + base = (p[2] << 8) | p[1]; + len = p[3]; + pnp_register_port_resource(dev, option_flags, base, base, 0, len, + IORESOURCE_IO_FIXED); } static __init unsigned char * pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, - struct pnp_dev *dev) + struct pnp_dev *dev) { unsigned int len, tag; - int priority = 0; - struct pnp_option *option, *option_independent; + int priority; + unsigned int option_flags; if (!p) return NULL; dev_dbg(&dev->dev, "parse resource options\n"); - - option_independent = option = pnp_register_independent_option(dev); - if (!option) - return NULL; - + option_flags = 0; while ((char *)p < (char *)end) { /* determine the type of tag */ @@ -363,37 +342,38 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case LARGE_TAG_MEM: if (len != 9) goto len_err; - pnpbios_parse_mem_option(dev, p, len, option); + pnpbios_parse_mem_option(dev, p, len, option_flags); break; case LARGE_TAG_MEM32: if (len != 17) goto len_err; - pnpbios_parse_mem32_option(dev, p, len, option); + pnpbios_parse_mem32_option(dev, p, len, option_flags); break; case LARGE_TAG_FIXEDMEM32: if (len != 9) goto len_err; - pnpbios_parse_fixed_mem32_option(dev, p, len, option); + pnpbios_parse_fixed_mem32_option(dev, p, len, + option_flags); break; case SMALL_TAG_IRQ: if (len < 2 || len > 3) goto len_err; - pnpbios_parse_irq_option(dev, p, len, option); + pnpbios_parse_irq_option(dev, p, len, option_flags); break; case SMALL_TAG_DMA: if (len != 2) goto len_err; - pnpbios_parse_dma_option(dev, p, len, option); + pnpbios_parse_dma_option(dev, p, len, option_flags); break; case SMALL_TAG_PORT: if (len != 7) goto len_err; - pnpbios_parse_port_option(dev, p, len, option); + pnpbios_parse_port_option(dev, p, len, option_flags); break; case SMALL_TAG_VENDOR: @@ -403,28 +383,23 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case SMALL_TAG_FIXEDPORT: if (len != 3) goto len_err; - pnpbios_parse_fixed_port_option(dev, p, len, option); + pnpbios_parse_fixed_port_option(dev, p, len, + option_flags); break; case SMALL_TAG_STARTDEP: if (len > 1) goto len_err; - priority = 0x100 | PNP_RES_PRIORITY_ACCEPTABLE; + priority = PNP_RES_PRIORITY_ACCEPTABLE; if (len > 0) - priority = 0x100 | p[1]; - option = pnp_register_dependent_option(dev, priority); - if (!option) - return NULL; + priority = p[1]; + option_flags = pnp_new_dependent_set(dev, priority); break; case SMALL_TAG_ENDDEP: if (len != 0) goto len_err; - if (option_independent == option) - dev_warn(&dev->dev, "missing " - "SMALL_TAG_STARTDEP tag\n"); - option = option_independent; - dev_dbg(&dev->dev, "end dependent options\n"); + option_flags = 0; break; case SMALL_TAG_END: @@ -526,8 +501,16 @@ len_err: static void pnpbios_encode_mem(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[4] = (base >> 8) & 0xff; p[5] = ((base >> 8) >> 8) & 0xff; @@ -536,15 +519,22 @@ static void pnpbios_encode_mem(struct pnp_dev *dev, unsigned char *p, p[10] = (len >> 8) & 0xff; p[11] = ((len >> 8) >> 8) & 0xff; - dev_dbg(&dev->dev, " encode mem %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode mem %#lx-%#lx\n", base, base + len - 1); } static void pnpbios_encode_mem32(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[4] = base & 0xff; p[5] = (base >> 8) & 0xff; @@ -559,15 +549,22 @@ static void pnpbios_encode_mem32(struct pnp_dev *dev, unsigned char *p, p[18] = (len >> 16) & 0xff; p[19] = (len >> 24) & 0xff; - dev_dbg(&dev->dev, " encode mem32 %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode mem32 %#lx-%#lx\n", base, base + len - 1); } static void pnpbios_encode_fixed_mem32(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[4] = base & 0xff; p[5] = (base >> 8) & 0xff; @@ -578,40 +575,54 @@ static void pnpbios_encode_fixed_mem32(struct pnp_dev *dev, unsigned char *p, p[10] = (len >> 16) & 0xff; p[11] = (len >> 24) & 0xff; - dev_dbg(&dev->dev, " encode fixed_mem32 %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode fixed_mem32 %#lx-%#lx\n", base, + base + len - 1); } static void pnpbios_encode_irq(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long map = 0; + unsigned long map; + + if (pnp_resource_enabled(res)) + map = 1 << res->start; + else + map = 0; - map = 1 << res->start; p[1] = map & 0xff; p[2] = (map >> 8) & 0xff; - dev_dbg(&dev->dev, " encode irq %llu\n", - (unsigned long long)res->start); + dev_dbg(&dev->dev, " encode irq mask %#lx\n", map); } static void pnpbios_encode_dma(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long map = 0; + unsigned long map; + + if (pnp_resource_enabled(res)) + map = 1 << res->start; + else + map = 0; - map = 1 << res->start; p[1] = map & 0xff; - dev_dbg(&dev->dev, " encode dma %llu\n", - (unsigned long long)res->start); + dev_dbg(&dev->dev, " encode dma mask %#lx\n", map); } static void pnpbios_encode_port(struct pnp_dev *dev, unsigned char *p, struct resource *res) { - unsigned long base = res->start; - unsigned long len = res->end - res->start + 1; + unsigned long base; + unsigned long len; + + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } p[2] = base & 0xff; p[3] = (base >> 8) & 0xff; @@ -619,8 +630,7 @@ static void pnpbios_encode_port(struct pnp_dev *dev, unsigned char *p, p[5] = (base >> 8) & 0xff; p[7] = len & 0xff; - dev_dbg(&dev->dev, " encode io %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode io %#lx-%#lx\n", base, base + len - 1); } static void pnpbios_encode_fixed_port(struct pnp_dev *dev, unsigned char *p, @@ -629,12 +639,20 @@ static void pnpbios_encode_fixed_port(struct pnp_dev *dev, unsigned char *p, unsigned long base = res->start; unsigned long len = res->end - res->start + 1; + if (pnp_resource_enabled(res)) { + base = res->start; + len = res->end - res->start + 1; + } else { + base = 0; + len = 0; + } + p[1] = base & 0xff; p[2] = (base >> 8) & 0xff; p[3] = len & 0xff; - dev_dbg(&dev->dev, " encode fixed_io %#llx-%#llx\n", - (unsigned long long) res->start, (unsigned long long) res->end); + dev_dbg(&dev->dev, " encode fixed_io %#lx-%#lx\n", base, + base + len - 1); } static unsigned char *pnpbios_encode_allocated_resource_data(struct pnp_dev diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 1ff3bb585ab2..55f55ed72dc7 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -5,6 +5,8 @@ * when building up the resource structure for the first time. * * Copyright (c) 2000 Peter Denison + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas * * Heavily based on PCI quirks handling which is * @@ -20,203 +22,207 @@ #include #include "base.h" +static void quirk_awe32_add_ports(struct pnp_dev *dev, + struct pnp_option *option, + unsigned int offset) +{ + struct pnp_option *new_option; + + new_option = kmalloc(sizeof(struct pnp_option), GFP_KERNEL); + if (!new_option) { + dev_err(&dev->dev, "couldn't add ioport region to option set " + "%d\n", pnp_option_set(option)); + return; + } + + *new_option = *option; + new_option->u.port.min += offset; + new_option->u.port.max += offset; + list_add(&new_option->list, &option->list); + + dev_info(&dev->dev, "added ioport region %#llx-%#llx to set %d\n", + (unsigned long long) new_option->u.port.min, + (unsigned long long) new_option->u.port.max, + pnp_option_set(option)); +} + static void quirk_awe32_resources(struct pnp_dev *dev) { - struct pnp_port *port, *port2, *port3; - struct pnp_option *res = dev->dependent; + struct pnp_option *option; + unsigned int set = ~0; /* - * Unfortunately the isapnp_add_port_resource is too tightly bound - * into the PnP discovery sequence, and cannot be used. Link in the - * two extra ports (at offset 0x400 and 0x800 from the one given) by - * hand. + * Add two extra ioport regions (at offset 0x400 and 0x800 from the + * one given) to every dependent option set. */ - for (; res; res = res->next) { - port2 = pnp_alloc(sizeof(struct pnp_port)); - if (!port2) - return; - port3 = pnp_alloc(sizeof(struct pnp_port)); - if (!port3) { - kfree(port2); - return; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option) && + pnp_option_set(option) != set) { + set = pnp_option_set(option); + quirk_awe32_add_ports(dev, option, 0x800); + quirk_awe32_add_ports(dev, option, 0x400); } - port = res->port; - memcpy(port2, port, sizeof(struct pnp_port)); - memcpy(port3, port, sizeof(struct pnp_port)); - port->next = port2; - port2->next = port3; - port2->min += 0x400; - port2->max += 0x400; - port3->min += 0x800; - port3->max += 0x800; - dev_info(&dev->dev, - "AWE32 quirk - added ioports 0x%lx and 0x%lx\n", - (unsigned long)port2->min, - (unsigned long)port3->min); } } static void quirk_cmi8330_resources(struct pnp_dev *dev) { - struct pnp_option *res = dev->dependent; - unsigned long tmp; - - for (; res; res = res->next) { - - struct pnp_irq *irq; - struct pnp_dma *dma; + struct pnp_option *option; + struct pnp_irq *irq; + struct pnp_dma *dma; - for (irq = res->irq; irq; irq = irq->next) { // Valid irqs are 5, 7, 10 - tmp = 0x04A0; - bitmap_copy(irq->map, &tmp, 16); // 0000 0100 1010 0000 - } + list_for_each_entry(option, &dev->options, list) { + if (!pnp_option_is_dependent(option)) + continue; - for (dma = res->dma; dma; dma = dma->next) // Valid 8bit dma channels are 1,3 + if (option->type == IORESOURCE_IRQ) { + irq = &option->u.irq; + bitmap_zero(irq->map.bits, PNP_IRQ_NR); + __set_bit(5, irq->map.bits); + __set_bit(7, irq->map.bits); + __set_bit(10, irq->map.bits); + dev_info(&dev->dev, "set possible IRQs in " + "option set %d to 5, 7, 10\n", + pnp_option_set(option)); + } else if (option->type == IORESOURCE_DMA) { + dma = &option->u.dma; if ((dma->flags & IORESOURCE_DMA_TYPE_MASK) == - IORESOURCE_DMA_8BIT) - dma->map = 0x000A; + IORESOURCE_DMA_8BIT && + dma->map != 0x0A) { + dev_info(&dev->dev, "changing possible " + "DMA channel mask in option set %d " + "from %#02x to 0x0A (1, 3)\n", + pnp_option_set(option), dma->map); + dma->map = 0x0A; + } + } } - dev_info(&dev->dev, "CMI8330 quirk - forced possible IRQs to 5, 7, 10 " - "and DMA channels to 1, 3\n"); } static void quirk_sb16audio_resources(struct pnp_dev *dev) { + struct pnp_option *option; + unsigned int prev_option_flags = ~0, n = 0; struct pnp_port *port; - struct pnp_option *res = dev->dependent; - int changed = 0; /* - * The default range on the mpu port for these devices is 0x388-0x388. + * The default range on the OPL port for these devices is 0x388-0x388. * Here we increase that range so that two such cards can be * auto-configured. */ + list_for_each_entry(option, &dev->options, list) { + if (prev_option_flags != option->flags) { + prev_option_flags = option->flags; + n = 0; + } - for (; res; res = res->next) { - port = res->port; - if (!port) - continue; - port = port->next; - if (!port) - continue; - port = port->next; - if (!port) - continue; - if (port->min != port->max) - continue; - port->max += 0x70; - changed = 1; + if (pnp_option_is_dependent(option) && + option->type == IORESOURCE_IO) { + n++; + port = &option->u.port; + if (n == 3 && port->min == port->max) { + port->max += 0x70; + dev_info(&dev->dev, "increased option port " + "range from %#llx-%#llx to " + "%#llx-%#llx\n", + (unsigned long long) port->min, + (unsigned long long) port->min, + (unsigned long long) port->min, + (unsigned long long) port->max); + } + } } - if (changed) - dev_info(&dev->dev, "SB audio device quirk - increased port range\n"); } -static struct pnp_option *quirk_isapnp_mpu_options(struct pnp_dev *dev) +static struct pnp_option *pnp_clone_dependent_set(struct pnp_dev *dev, + unsigned int set) { - struct pnp_option *head = NULL; - struct pnp_option *prev = NULL; - struct pnp_option *res; - - /* - * Build a functional IRQ-less variant of each MPU option. - */ - - for (res = dev->dependent; res; res = res->next) { - struct pnp_option *curr; - struct pnp_port *port; - struct pnp_port *copy; + struct pnp_option *tail = NULL, *first_new_option = NULL; + struct pnp_option *option, *new_option; + unsigned int flags; - port = res->port; - if (!port || !res->irq) - continue; + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option)) + tail = option; + } + if (!tail) { + dev_err(&dev->dev, "no dependent option sets\n"); + return NULL; + } - copy = pnp_alloc(sizeof *copy); - if (!copy) - break; + flags = pnp_new_dependent_set(dev, PNP_RES_PRIORITY_FUNCTIONAL); + list_for_each_entry(option, &dev->options, list) { + if (pnp_option_is_dependent(option) && + pnp_option_set(option) == set) { + new_option = kmalloc(sizeof(struct pnp_option), + GFP_KERNEL); + if (!new_option) { + dev_err(&dev->dev, "couldn't clone dependent " + "set %d\n", set); + return NULL; + } - copy->min = port->min; - copy->max = port->max; - copy->align = port->align; - copy->size = port->size; - copy->flags = port->flags; + *new_option = *option; + new_option->flags = flags; + if (!first_new_option) + first_new_option = new_option; - curr = pnp_build_option(PNP_RES_PRIORITY_FUNCTIONAL); - if (!curr) { - kfree(copy); - break; + list_add(&new_option->list, &tail->list); + tail = new_option; } - curr->port = copy; - - if (prev) - prev->next = curr; - else - head = curr; - prev = curr; } - if (head) - dev_info(&dev->dev, "adding IRQ-less MPU options\n"); - return head; + return first_new_option; } -static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) + +static void quirk_add_irq_optional_dependent_sets(struct pnp_dev *dev) { - struct pnp_option *res; + struct pnp_option *new_option; + unsigned int num_sets, i, set; struct pnp_irq *irq; - /* - * Distribute the independent IRQ over the dependent options - */ - - res = dev->independent; - if (!res) - return; - - irq = res->irq; - if (!irq || irq->next) - return; - - res = dev->dependent; - if (!res) - return; - - while (1) { - struct pnp_irq *copy; - - copy = pnp_alloc(sizeof *copy); - if (!copy) - break; - - memcpy(copy->map, irq->map, sizeof copy->map); - copy->flags = irq->flags; + num_sets = dev->num_dependent_sets; + for (i = 0; i < num_sets; i++) { + new_option = pnp_clone_dependent_set(dev, i); + if (!new_option) + return; - copy->next = res->irq; /* Yes, this is NULL */ - res->irq = copy; + set = pnp_option_set(new_option); + while (new_option && pnp_option_set(new_option) == set) { + if (new_option->type == IORESOURCE_IRQ) { + irq = &new_option->u.irq; + irq->flags |= IORESOURCE_IRQ_OPTIONAL; + } + dbg_pnp_show_option(dev, new_option); + new_option = list_entry(new_option->list.next, + struct pnp_option, list); + } - if (!res->next) - break; - res = res->next; + dev_info(&dev->dev, "added dependent option set %d (same as " + "set %d except IRQ optional)\n", set, i); } - kfree(irq); - - res->next = quirk_isapnp_mpu_options(dev); - - res = dev->independent; - res->irq = NULL; } -static void quirk_isapnp_mpu_resources(struct pnp_dev *dev) +static void quirk_ad1815_mpu_resources(struct pnp_dev *dev) { - struct pnp_option *res; + struct pnp_option *option; + struct pnp_irq *irq = NULL; + unsigned int independent_irqs = 0; + + list_for_each_entry(option, &dev->options, list) { + if (option->type == IORESOURCE_IRQ && + !pnp_option_is_dependent(option)) { + independent_irqs++; + irq = &option->u.irq; + } + } - res = dev->dependent; - if (!res) + if (independent_irqs != 1) return; - while (res->next) - res = res->next; - - res->next = quirk_isapnp_mpu_options(dev); + irq->flags |= IORESOURCE_IRQ_OPTIONAL; + dev_info(&dev->dev, "made independent IRQ optional\n"); } #include @@ -248,8 +254,7 @@ static void quirk_system_pci_resources(struct pnp_dev *dev) for (j = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, j)); j++) { - if (res->flags & IORESOURCE_UNSET || - (res->start == 0 && res->end == 0)) + if (res->start == 0 && res->end == 0) continue; pnp_start = res->start; @@ -312,10 +317,10 @@ static struct pnp_fixup pnp_fixups[] = { {"CTL0043", quirk_sb16audio_resources}, {"CTL0044", quirk_sb16audio_resources}, {"CTL0045", quirk_sb16audio_resources}, - /* Add IRQ-less MPU options */ + /* Add IRQ-optional MPU options */ {"ADS7151", quirk_ad1815_mpu_resources}, - {"ADS7181", quirk_isapnp_mpu_resources}, - {"AZT0002", quirk_isapnp_mpu_resources}, + {"ADS7181", quirk_add_irq_optional_dependent_sets}, + {"AZT0002", quirk_add_irq_optional_dependent_sets}, /* PnP resources that might overlap PCI BARs */ {"PNP0c01", quirk_system_pci_resources}, {"PNP0c02", quirk_system_pci_resources}, diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 390b50096e30..4cfe3a1efdfb 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -3,6 +3,8 @@ * * based on isapnp.c resource management (c) Jaroslav Kysela * Copyright 2003 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -28,201 +30,121 @@ static int pnp_reserve_mem[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some * option registration */ -struct pnp_option *pnp_build_option(int priority) +struct pnp_option *pnp_build_option(struct pnp_dev *dev, unsigned long type, + unsigned int option_flags) { - struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option)); + struct pnp_option *option; + option = kzalloc(sizeof(struct pnp_option), GFP_KERNEL); if (!option) return NULL; - option->priority = priority & 0xff; - /* make sure the priority is valid */ - if (option->priority > PNP_RES_PRIORITY_FUNCTIONAL) - option->priority = PNP_RES_PRIORITY_INVALID; - - return option; -} - -struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev) -{ - struct pnp_option *option; - - option = pnp_build_option(PNP_RES_PRIORITY_PREFERRED); - - /* this should never happen but if it does we'll try to continue */ - if (dev->independent) - dev_err(&dev->dev, "independent resource already registered\n"); - dev->independent = option; + option->flags = option_flags; + option->type = type; - dev_dbg(&dev->dev, "new independent option\n"); + list_add_tail(&option->list, &dev->options); return option; } -struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, - int priority) +int pnp_register_irq_resource(struct pnp_dev *dev, unsigned int option_flags, + pnp_irq_mask_t *map, unsigned char flags) { struct pnp_option *option; + struct pnp_irq *irq; - option = pnp_build_option(priority); - - if (dev->dependent) { - struct pnp_option *parent = dev->dependent; - while (parent->next) - parent = parent->next; - parent->next = option; - } else - dev->dependent = option; - - dev_dbg(&dev->dev, "new dependent option (priority %#x)\n", priority); - return option; -} - -int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_irq *data) -{ - struct pnp_irq *ptr; -#ifdef DEBUG - char buf[PNP_IRQ_NR]; /* hex-encoded, so this is overkill but safe */ -#endif + option = pnp_build_option(dev, IORESOURCE_IRQ, option_flags); + if (!option) + return -ENOMEM; - ptr = option->irq; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = data; - else - option->irq = data; + irq = &option->u.irq; + irq->map = *map; + irq->flags = flags; #ifdef CONFIG_PCI { int i; for (i = 0; i < 16; i++) - if (test_bit(i, data->map)) + if (test_bit(i, irq->map.bits)) pcibios_penalize_isa_irq(i, 0); } #endif -#ifdef DEBUG - bitmap_scnprintf(buf, sizeof(buf), data->map, PNP_IRQ_NR); - dev_dbg(&dev->dev, " irq bitmask %s flags %#x\n", buf, - data->flags); -#endif + dbg_pnp_show_option(dev, option); return 0; } -int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_dma *data) +int pnp_register_dma_resource(struct pnp_dev *dev, unsigned int option_flags, + unsigned char map, unsigned char flags) { - struct pnp_dma *ptr; - - ptr = option->dma; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = data; - else - option->dma = data; - - dev_dbg(&dev->dev, " dma bitmask %#x flags %#x\n", data->map, - data->flags); - return 0; -} + struct pnp_option *option; + struct pnp_dma *dma; -int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_port *data) -{ - struct pnp_port *ptr; - - ptr = option->port; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = data; - else - option->port = data; - - dev_dbg(&dev->dev, " io " - "min %#x max %#x align %d size %d flags %#x\n", - data->min, data->max, data->align, data->size, data->flags); - return 0; -} + option = pnp_build_option(dev, IORESOURCE_DMA, option_flags); + if (!option) + return -ENOMEM; -int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_mem *data) -{ - struct pnp_mem *ptr; - - ptr = option->mem; - while (ptr && ptr->next) - ptr = ptr->next; - if (ptr) - ptr->next = data; - else - option->mem = data; - - dev_dbg(&dev->dev, " mem " - "min %#x max %#x align %d size %d flags %#x\n", - data->min, data->max, data->align, data->size, data->flags); + dma = &option->u.dma; + dma->map = map; + dma->flags = flags; + + dbg_pnp_show_option(dev, option); return 0; } -static void pnp_free_port(struct pnp_port *port) +int pnp_register_port_resource(struct pnp_dev *dev, unsigned int option_flags, + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags) { - struct pnp_port *next; + struct pnp_option *option; + struct pnp_port *port; - while (port) { - next = port->next; - kfree(port); - port = next; - } -} + option = pnp_build_option(dev, IORESOURCE_IO, option_flags); + if (!option) + return -ENOMEM; -static void pnp_free_irq(struct pnp_irq *irq) -{ - struct pnp_irq *next; + port = &option->u.port; + port->min = min; + port->max = max; + port->align = align; + port->size = size; + port->flags = flags; - while (irq) { - next = irq->next; - kfree(irq); - irq = next; - } + dbg_pnp_show_option(dev, option); + return 0; } -static void pnp_free_dma(struct pnp_dma *dma) +int pnp_register_mem_resource(struct pnp_dev *dev, unsigned int option_flags, + resource_size_t min, resource_size_t max, + resource_size_t align, resource_size_t size, + unsigned char flags) { - struct pnp_dma *next; + struct pnp_option *option; + struct pnp_mem *mem; - while (dma) { - next = dma->next; - kfree(dma); - dma = next; - } -} + option = pnp_build_option(dev, IORESOURCE_MEM, option_flags); + if (!option) + return -ENOMEM; -static void pnp_free_mem(struct pnp_mem *mem) -{ - struct pnp_mem *next; + mem = &option->u.mem; + mem->min = min; + mem->max = max; + mem->align = align; + mem->size = size; + mem->flags = flags; - while (mem) { - next = mem->next; - kfree(mem); - mem = next; - } + dbg_pnp_show_option(dev, option); + return 0; } -void pnp_free_option(struct pnp_option *option) +void pnp_free_options(struct pnp_dev *dev) { - struct pnp_option *next; - - while (option) { - next = option->next; - pnp_free_port(option->port); - pnp_free_irq(option->irq); - pnp_free_dma(option->dma); - pnp_free_mem(option->mem); + struct pnp_option *option, *tmp; + + list_for_each_entry_safe(option, tmp, &dev->options, list) { + list_del(&option->list); kfree(option); - option = next; } } @@ -237,7 +159,7 @@ void pnp_free_option(struct pnp_option *option) !((*(enda) < *(startb)) || (*(endb) < *(starta))) #define cannot_compare(flags) \ -((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED)) +((flags) & IORESOURCE_DISABLED) int pnp_check_port(struct pnp_dev *dev, struct resource *res) { @@ -364,6 +286,61 @@ static irqreturn_t pnp_test_handler(int irq, void *dev_id) return IRQ_HANDLED; } +#ifdef CONFIG_PCI +static int pci_dev_uses_irq(struct pnp_dev *pnp, struct pci_dev *pci, + unsigned int irq) +{ + u32 class; + u8 progif; + + if (pci->irq == irq) { + dev_dbg(&pnp->dev, "device %s using irq %d\n", + pci_name(pci), irq); + return 1; + } + + /* + * See pci_setup_device() and ata_pci_sff_activate_host() for + * similar IDE legacy detection. + */ + pci_read_config_dword(pci, PCI_CLASS_REVISION, &class); + class >>= 8; /* discard revision ID */ + progif = class & 0xff; + class >>= 8; + + if (class == PCI_CLASS_STORAGE_IDE) { + /* + * Unless both channels are native-PCI mode only, + * treat the compatibility IRQs as busy. + */ + if ((progif & 0x5) != 0x5) + if (pci_get_legacy_ide_irq(pci, 0) == irq || + pci_get_legacy_ide_irq(pci, 1) == irq) { + dev_dbg(&pnp->dev, "legacy IDE device %s " + "using irq %d\n", pci_name(pci), irq); + return 1; + } + } + + return 0; +} +#endif + +static int pci_uses_irq(struct pnp_dev *pnp, unsigned int irq) +{ +#ifdef CONFIG_PCI + struct pci_dev *pci = NULL; + + for_each_pci_dev(pci) { + if (pci_dev_uses_irq(pnp, pci, irq)) { + pci_dev_put(pci); + return 1; + } + } +#endif + return 0; +} + int pnp_check_irq(struct pnp_dev *dev, struct resource *res) { int i; @@ -395,18 +372,9 @@ int pnp_check_irq(struct pnp_dev *dev, struct resource *res) } } -#ifdef CONFIG_PCI /* check if the resource is being used by a pci device */ - { - struct pci_dev *pci = NULL; - for_each_pci_dev(pci) { - if (pci->irq == *irq) { - pci_dev_put(pci); - return 0; - } - } - } -#endif + if (pci_uses_irq(dev, *irq)) + return 0; /* check if the resource is already in use, skip if the * device is active because it itself may be in use */ @@ -499,81 +467,37 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res) #endif } -struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev, - unsigned int type, unsigned int num) +int pnp_resource_type(struct resource *res) { - struct pnp_resource_table *res = dev->res; - - switch (type) { - case IORESOURCE_IO: - if (num >= PNP_MAX_PORT) - return NULL; - return &res->port[num]; - case IORESOURCE_MEM: - if (num >= PNP_MAX_MEM) - return NULL; - return &res->mem[num]; - case IORESOURCE_IRQ: - if (num >= PNP_MAX_IRQ) - return NULL; - return &res->irq[num]; - case IORESOURCE_DMA: - if (num >= PNP_MAX_DMA) - return NULL; - return &res->dma[num]; - } - return NULL; + return res->flags & (IORESOURCE_IO | IORESOURCE_MEM | + IORESOURCE_IRQ | IORESOURCE_DMA); } struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned int type, unsigned int num) { struct pnp_resource *pnp_res; + struct resource *res; - pnp_res = pnp_get_pnp_resource(dev, type, num); - if (pnp_res) - return &pnp_res->res; - + list_for_each_entry(pnp_res, &dev->resources, list) { + res = &pnp_res->res; + if (pnp_resource_type(res) == type && num-- == 0) + return res; + } return NULL; } EXPORT_SYMBOL(pnp_get_resource); -static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev, int type) +static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev) { struct pnp_resource *pnp_res; - int i; - switch (type) { - case IORESOURCE_IO: - for (i = 0; i < PNP_MAX_PORT; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - case IORESOURCE_MEM: - for (i = 0; i < PNP_MAX_MEM; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - case IORESOURCE_IRQ: - for (i = 0; i < PNP_MAX_IRQ; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - case IORESOURCE_DMA: - for (i = 0; i < PNP_MAX_DMA; i++) { - pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, i); - if (pnp_res && !pnp_resource_valid(&pnp_res->res)) - return pnp_res; - } - break; - } - return NULL; + pnp_res = kzalloc(sizeof(struct pnp_resource), GFP_KERNEL); + if (!pnp_res) + return NULL; + + list_add_tail(&pnp_res->list, &dev->resources); + return pnp_res; } struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, @@ -581,15 +505,10 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_IRQ); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for IRQ %d\n", - irq); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for IRQ %d\n", irq); return NULL; } @@ -607,15 +526,10 @@ struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_DMA); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for DMA %d\n", - dma); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for DMA %d\n", dma); return NULL; } @@ -634,16 +548,12 @@ struct pnp_resource *pnp_add_io_resource(struct pnp_dev *dev, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_IO); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for IO " - "%#llx-%#llx\n",(unsigned long long) start, - (unsigned long long) end); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for IO %#llx-%#llx\n", + (unsigned long long) start, + (unsigned long long) end); return NULL; } @@ -663,16 +573,12 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, { struct pnp_resource *pnp_res; struct resource *res; - static unsigned char warned; - pnp_res = pnp_new_resource(dev, IORESOURCE_MEM); + pnp_res = pnp_new_resource(dev); if (!pnp_res) { - if (!warned) { - dev_err(&dev->dev, "can't add resource for MEM " - "%#llx-%#llx\n",(unsigned long long) start, - (unsigned long long) end); - warned = 1; - } + dev_err(&dev->dev, "can't add resource for MEM %#llx-%#llx\n", + (unsigned long long) start, + (unsigned long long) end); return NULL; } @@ -686,6 +592,52 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, return pnp_res; } +/* + * Determine whether the specified resource is a possible configuration + * for this device. + */ +int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start, + resource_size_t size) +{ + struct pnp_option *option; + struct pnp_port *port; + struct pnp_mem *mem; + struct pnp_irq *irq; + struct pnp_dma *dma; + + list_for_each_entry(option, &dev->options, list) { + if (option->type != type) + continue; + + switch (option->type) { + case IORESOURCE_IO: + port = &option->u.port; + if (port->min == start && port->size == size) + return 1; + break; + case IORESOURCE_MEM: + mem = &option->u.mem; + if (mem->min == start && mem->size == size) + return 1; + break; + case IORESOURCE_IRQ: + irq = &option->u.irq; + if (start < PNP_IRQ_NR && + test_bit(start, irq->map.bits)) + return 1; + break; + case IORESOURCE_DMA: + dma = &option->u.dma; + if (dma->map & (1 << start)) + return 1; + break; + } + } + + return 0; +} +EXPORT_SYMBOL(pnp_possible_config); + /* format is: pnp_reserve_irq=irq1[,irq2] .... */ static int __init pnp_setup_reserve_irq(char *str) { diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 95b076c18c07..bbf78ef4ba02 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -2,6 +2,8 @@ * support.c - standard functions for the use of pnp protocol drivers * * Copyright 2003 Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #include @@ -16,6 +18,10 @@ */ int pnp_is_active(struct pnp_dev *dev) { + /* + * I don't think this is very reliable because pnp_disable_dev() + * only clears out auto-assigned resources. + */ if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 && !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 && pnp_irq(dev, 0) == -1 && pnp_dma(dev, 0) == -1) @@ -52,39 +58,154 @@ void pnp_eisa_id_to_string(u32 id, char *str) str[7] = '\0'; } +char *pnp_resource_type_name(struct resource *res) +{ + switch (pnp_resource_type(res)) { + case IORESOURCE_IO: + return "io"; + case IORESOURCE_MEM: + return "mem"; + case IORESOURCE_IRQ: + return "irq"; + case IORESOURCE_DMA: + return "dma"; + } + return NULL; +} + void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) { #ifdef DEBUG + char buf[128]; + int len = 0; + struct pnp_resource *pnp_res; struct resource *res; - int i; - dev_dbg(&dev->dev, "current resources: %s\n", desc); - - for (i = 0; i < PNP_MAX_IRQ; i++) { - res = pnp_get_resource(dev, IORESOURCE_IRQ, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " irq %lld flags %#lx\n", - (unsigned long long) res->start, res->flags); + if (list_empty(&dev->resources)) { + dev_dbg(&dev->dev, "%s: no current resources\n", desc); + return; } - for (i = 0; i < PNP_MAX_DMA; i++) { - res = pnp_get_resource(dev, IORESOURCE_DMA, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " dma %lld flags %#lx\n", - (unsigned long long) res->start, res->flags); + + dev_dbg(&dev->dev, "%s: current resources:\n", desc); + list_for_each_entry(pnp_res, &dev->resources, list) { + res = &pnp_res->res; + + len += snprintf(buf + len, sizeof(buf) - len, " %-3s ", + pnp_resource_type_name(res)); + + if (res->flags & IORESOURCE_DISABLED) { + dev_dbg(&dev->dev, "%sdisabled\n", buf); + continue; + } + + switch (pnp_resource_type(res)) { + case IORESOURCE_IO: + case IORESOURCE_MEM: + len += snprintf(buf + len, sizeof(buf) - len, + "%#llx-%#llx flags %#lx", + (unsigned long long) res->start, + (unsigned long long) res->end, + res->flags); + break; + case IORESOURCE_IRQ: + case IORESOURCE_DMA: + len += snprintf(buf + len, sizeof(buf) - len, + "%lld flags %#lx", + (unsigned long long) res->start, + res->flags); + break; + } + dev_dbg(&dev->dev, "%s\n", buf); } - for (i = 0; i < PNP_MAX_PORT; i++) { - res = pnp_get_resource(dev, IORESOURCE_IO, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " io %#llx-%#llx flags %#lx\n", - (unsigned long long) res->start, - (unsigned long long) res->end, res->flags); +#endif +} + +char *pnp_option_priority_name(struct pnp_option *option) +{ + switch (pnp_option_priority(option)) { + case PNP_RES_PRIORITY_PREFERRED: + return "preferred"; + case PNP_RES_PRIORITY_ACCEPTABLE: + return "acceptable"; + case PNP_RES_PRIORITY_FUNCTIONAL: + return "functional"; } - for (i = 0; i < PNP_MAX_MEM; i++) { - res = pnp_get_resource(dev, IORESOURCE_MEM, i); - if (res && !(res->flags & IORESOURCE_UNSET)) - dev_dbg(&dev->dev, " mem %#llx-%#llx flags %#lx\n", - (unsigned long long) res->start, - (unsigned long long) res->end, res->flags); + return "invalid"; +} + +void dbg_pnp_show_option(struct pnp_dev *dev, struct pnp_option *option) +{ +#ifdef DEBUG + char buf[128]; + int len = 0, i; + struct pnp_port *port; + struct pnp_mem *mem; + struct pnp_irq *irq; + struct pnp_dma *dma; + + if (pnp_option_is_dependent(option)) + len += snprintf(buf + len, sizeof(buf) - len, + " dependent set %d (%s) ", + pnp_option_set(option), + pnp_option_priority_name(option)); + else + len += snprintf(buf + len, sizeof(buf) - len, " independent "); + + switch (option->type) { + case IORESOURCE_IO: + port = &option->u.port; + len += snprintf(buf + len, sizeof(buf) - len, "io min %#llx " + "max %#llx align %lld size %lld flags %#x", + (unsigned long long) port->min, + (unsigned long long) port->max, + (unsigned long long) port->align, + (unsigned long long) port->size, port->flags); + break; + case IORESOURCE_MEM: + mem = &option->u.mem; + len += snprintf(buf + len, sizeof(buf) - len, "mem min %#llx " + "max %#llx align %lld size %lld flags %#x", + (unsigned long long) mem->min, + (unsigned long long) mem->max, + (unsigned long long) mem->align, + (unsigned long long) mem->size, mem->flags); + break; + case IORESOURCE_IRQ: + irq = &option->u.irq; + len += snprintf(buf + len, sizeof(buf) - len, "irq"); + if (bitmap_empty(irq->map.bits, PNP_IRQ_NR)) + len += snprintf(buf + len, sizeof(buf) - len, + " "); + else { + for (i = 0; i < PNP_IRQ_NR; i++) + if (test_bit(i, irq->map.bits)) + len += snprintf(buf + len, + sizeof(buf) - len, + " %d", i); + } + len += snprintf(buf + len, sizeof(buf) - len, " flags %#x", + irq->flags); + if (irq->flags & IORESOURCE_IRQ_OPTIONAL) + len += snprintf(buf + len, sizeof(buf) - len, + " (optional)"); + break; + case IORESOURCE_DMA: + dma = &option->u.dma; + len += snprintf(buf + len, sizeof(buf) - len, "dma"); + if (!dma->map) + len += snprintf(buf + len, sizeof(buf) - len, + " "); + else { + for (i = 0; i < 8; i++) + if (dma->map & (1 << i)) + len += snprintf(buf + len, + sizeof(buf) - len, + " %d", i); + } + len += snprintf(buf + len, sizeof(buf) - len, " (bitmask %#x) " + "flags %#x", dma->map, dma->flags); + break; } + dev_dbg(&dev->dev, "%s\n", buf); #endif } diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index cf4e07b01d48..764f3a310685 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -60,7 +60,7 @@ static void reserve_resources_of_dev(struct pnp_dev *dev) int i; for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) { - if (res->flags & IORESOURCE_UNSET) + if (res->flags & IORESOURCE_DISABLED) continue; if (res->start == 0) continue; /* disabled */ @@ -81,7 +81,7 @@ static void reserve_resources_of_dev(struct pnp_dev *dev) } for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) { - if (res->flags & (IORESOURCE_UNSET | IORESOURCE_DISABLED)) + if (res->flags & IORESOURCE_DISABLED) continue; reserve_range(dev, res->start, res->end, 0); diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index d91df38ee4f7..85fcb4371054 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -333,7 +333,8 @@ dasd_diag_check_device(struct dasd_device *device) if (IS_ERR(block)) { DEV_MESSAGE(KERN_WARNING, device, "%s", "could not allocate dasd block structure"); - kfree(device->private); + device->private = NULL; + kfree(private); return PTR_ERR(block); } device->block = block; @@ -348,7 +349,8 @@ dasd_diag_check_device(struct dasd_device *device) if (rc) { DEV_MESSAGE(KERN_WARNING, device, "failed to retrieve device " "information (rc=%d)", rc); - return -ENOTSUPP; + rc = -EOPNOTSUPP; + goto out; } /* Figure out position of label block */ @@ -362,7 +364,8 @@ dasd_diag_check_device(struct dasd_device *device) default: DEV_MESSAGE(KERN_WARNING, device, "unsupported device class " "(class=%d)", private->rdc_data.vdev_class); - return -ENOTSUPP; + rc = -EOPNOTSUPP; + goto out; } DBF_DEV_EVENT(DBF_INFO, device, @@ -379,7 +382,8 @@ dasd_diag_check_device(struct dasd_device *device) if (label == NULL) { DEV_MESSAGE(KERN_WARNING, device, "%s", "No memory to allocate initialization request"); - return -ENOMEM; + rc = -ENOMEM; + goto out; } rc = 0; end_block = 0; @@ -403,7 +407,7 @@ dasd_diag_check_device(struct dasd_device *device) DEV_MESSAGE(KERN_WARNING, device, "%s", "DIAG call failed"); rc = -EOPNOTSUPP; - goto out; + goto out_label; } mdsk_term_io(device); if (rc == 0) @@ -413,7 +417,7 @@ dasd_diag_check_device(struct dasd_device *device) DEV_MESSAGE(KERN_WARNING, device, "device access failed " "(rc=%d)", rc); rc = -EIO; - goto out; + goto out_label; } /* check for label block */ if (memcmp(label->label_id, DASD_DIAG_CMS1, @@ -439,8 +443,15 @@ dasd_diag_check_device(struct dasd_device *device) (unsigned long) (block->blocks << block->s2b_shift) >> 1); } -out: +out_label: free_page((long) label); +out: + if (rc) { + device->block = NULL; + dasd_free_block(block); + device->private = NULL; + kfree(private); + } return rc; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e0b77210d37a..3590fdb5b2fd 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1418,8 +1418,10 @@ static void dasd_eckd_handle_unsolicited_interrupt(struct dasd_device *device, /* service information message SIM */ - if ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE) { + if (irb->esw.esw0.erw.cons && (irb->ecw[27] & DASD_SENSE_BIT_0) && + ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE)) { dasd_3990_erp_handle_sim(device, irb->ecw); + dasd_schedule_device_bh(device); return; } diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index aee4656127f7..aa0c533423a5 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -139,7 +139,8 @@ dasd_fba_check_characteristics(struct dasd_device *device) if (IS_ERR(block)) { DEV_MESSAGE(KERN_WARNING, device, "%s", "could not allocate dasd block structure"); - kfree(device->private); + device->private = NULL; + kfree(private); return PTR_ERR(block); } device->block = block; @@ -152,6 +153,10 @@ dasd_fba_check_characteristics(struct dasd_device *device) DEV_MESSAGE(KERN_WARNING, device, "Read device characteristics returned error %d", rc); + device->block = NULL; + dasd_free_block(block); + device->private = NULL; + kfree(private); return rc; } diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index 0a9f1cccbe58..b0ac44b27127 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -345,7 +345,7 @@ static int get_urd_class(struct urdev *urd) cc = diag210(&ur_diag210); switch (cc) { case 0: - return -ENOTSUPP; + return -EOPNOTSUPP; case 2: return ur_diag210.vrdcvcla; /* virtual device class */ case 3: @@ -621,7 +621,7 @@ static int verify_device(struct urdev *urd) case DEV_CLASS_UR_I: return verify_uri_device(urd); default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } @@ -654,7 +654,7 @@ static int get_file_reclen(struct urdev *urd) case DEV_CLASS_UR_I: return get_uri_file_reclen(urd); default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } @@ -827,7 +827,7 @@ static int ur_probe(struct ccw_device *cdev) goto fail_remove_attr; } if ((urd->class != DEV_CLASS_UR_I) && (urd->class != DEV_CLASS_UR_O)) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto fail_remove_attr; } spin_lock_irq(get_ccwdev_lock(cdev)); @@ -892,7 +892,7 @@ static int ur_set_online(struct ccw_device *cdev) } else if (urd->cdev->id.cu_type == PRINTER_DEVTYPE) { sprintf(node_id, "vmprt-%s", cdev->dev.bus_id); } else { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto fail_free_cdev; } diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 047dd92ae804..7fd84be11931 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -29,6 +29,7 @@ #define TO_USER 0 #define TO_KERNEL 1 +#define CHUNK_INFO_SIZE 34 /* 2 16-byte char, each followed by blank */ enum arch_id { ARCH_S390 = 0, @@ -51,6 +52,7 @@ static struct debug_info *zcore_dbf; static int hsa_available; static struct dentry *zcore_dir; static struct dentry *zcore_file; +static struct dentry *zcore_memmap_file; /* * Copy memory from HSA to kernel or user memory (not reentrant): @@ -476,6 +478,54 @@ static const struct file_operations zcore_fops = { .release = zcore_release, }; +static ssize_t zcore_memmap_read(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, filp->private_data, + MEMORY_CHUNKS * CHUNK_INFO_SIZE); +} + +static int zcore_memmap_open(struct inode *inode, struct file *filp) +{ + int i; + char *buf; + struct mem_chunk *chunk_array; + + chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk), + GFP_KERNEL); + if (!chunk_array) + return -ENOMEM; + detect_memory_layout(chunk_array); + buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL); + if (!buf) { + kfree(chunk_array); + return -ENOMEM; + } + for (i = 0; i < MEMORY_CHUNKS; i++) { + sprintf(buf + (i * CHUNK_INFO_SIZE), "%016llx %016llx ", + (unsigned long long) chunk_array[i].addr, + (unsigned long long) chunk_array[i].size); + if (chunk_array[i].size == 0) + break; + } + kfree(chunk_array); + filp->private_data = buf; + return 0; +} + +static int zcore_memmap_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static const struct file_operations zcore_memmap_fops = { + .owner = THIS_MODULE, + .read = zcore_memmap_read, + .open = zcore_memmap_open, + .release = zcore_memmap_release, +}; + static void __init set_s390_lc_mask(union save_area *map) { @@ -554,18 +604,44 @@ static int __init check_sdias(void) return 0; } -static void __init zcore_header_init(int arch, struct zcore_header *hdr) +static int __init get_mem_size(unsigned long *mem) +{ + int i; + struct mem_chunk *chunk_array; + + chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk), + GFP_KERNEL); + if (!chunk_array) + return -ENOMEM; + detect_memory_layout(chunk_array); + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunk_array[i].size == 0) + break; + *mem += chunk_array[i].size; + } + kfree(chunk_array); + return 0; +} + +static int __init zcore_header_init(int arch, struct zcore_header *hdr) { + int rc; + unsigned long memory = 0; + if (arch == ARCH_S390X) hdr->arch_id = DUMP_ARCH_S390X; else hdr->arch_id = DUMP_ARCH_S390; - hdr->mem_size = sys_info.mem_size; - hdr->rmem_size = sys_info.mem_size; + rc = get_mem_size(&memory); + if (rc) + return rc; + hdr->mem_size = memory; + hdr->rmem_size = memory; hdr->mem_end = sys_info.mem_size; - hdr->num_pages = sys_info.mem_size / PAGE_SIZE; + hdr->num_pages = memory / PAGE_SIZE; hdr->tod = get_clock(); get_cpu_id(&hdr->cpu_id); + return 0; } static int __init zcore_init(void) @@ -608,7 +684,9 @@ static int __init zcore_init(void) if (rc) goto fail; - zcore_header_init(arch, &zcore_header); + rc = zcore_header_init(arch, &zcore_header); + if (rc) + goto fail; zcore_dir = debugfs_create_dir("zcore" , NULL); if (!zcore_dir) { @@ -618,13 +696,22 @@ static int __init zcore_init(void) zcore_file = debugfs_create_file("mem", S_IRUSR, zcore_dir, NULL, &zcore_fops); if (!zcore_file) { - debugfs_remove(zcore_dir); rc = -ENOMEM; - goto fail; + goto fail_dir; + } + zcore_memmap_file = debugfs_create_file("memmap", S_IRUSR, zcore_dir, + NULL, &zcore_memmap_fops); + if (!zcore_memmap_file) { + rc = -ENOMEM; + goto fail_file; } hsa_available = 1; return 0; +fail_file: + debugfs_remove(zcore_file); +fail_dir: + debugfs_remove(zcore_dir); fail: diag308(DIAG308_REL_HSA, NULL); return rc; diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile index 91e9e3f3073a..bd79bd165396 100644 --- a/drivers/s390/cio/Makefile +++ b/drivers/s390/cio/Makefile @@ -9,4 +9,6 @@ ccw_device-objs += device_id.o device_pgid.o device_status.o obj-y += ccw_device.o cmf.o obj-$(CONFIG_CHSC_SCH) += chsc_sch.o obj-$(CONFIG_CCWGROUP) += ccwgroup.o + +qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_perf.o qdio_setup.o obj-$(CONFIG_QDIO) += qdio.o diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 65264a38057d..29826fdd47b8 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -27,7 +27,13 @@ static void *sei_page; -static int chsc_error_from_response(int response) +/** + * chsc_error_from_response() - convert a chsc response to an error + * @response: chsc response code + * + * Returns an appropriate Linux error code for @response. + */ +int chsc_error_from_response(int response) { switch (response) { case 0x0001: @@ -45,6 +51,7 @@ static int chsc_error_from_response(int response) return -EIO; } } +EXPORT_SYMBOL_GPL(chsc_error_from_response); struct chsc_ssd_area { struct chsc_header request; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index fb6c4d6c45b4..ba59bceace98 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -101,4 +101,6 @@ void chsc_chp_online(struct chp_id chpid); void chsc_chp_offline(struct chp_id chpid); int chsc_get_channel_measurement_chars(struct channel_path *chp); +int chsc_error_from_response(int response); + #endif diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c deleted file mode 100644 index 2bf36e14b102..000000000000 --- a/drivers/s390/cio/qdio.c +++ /dev/null @@ -1,3929 +0,0 @@ -/* - * - * linux/drivers/s390/cio/qdio.c - * - * Linux for S/390 QDIO base support, Hipersocket base support - * version 2 - * - * Copyright 2000,2002 IBM Corporation - * Author(s): Utz Bacher - * 2.6 cio integration by Cornelia Huck - * - * Restriction: only 63 iqdio subchannels would have its own indicator, - * after that, subsequent subchannels share one indicator - * - * - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "cio.h" -#include "css.h" -#include "device.h" -#include "qdio.h" -#include "ioasm.h" -#include "chsc.h" - -/****************** MODULE PARAMETER VARIABLES ********************/ -MODULE_AUTHOR("Utz Bacher "); -MODULE_DESCRIPTION("QDIO base support version 2, " \ - "Copyright 2000 IBM Corporation"); -MODULE_LICENSE("GPL"); - -/******************** HERE WE GO ***********************************/ - -static const char version[] = "QDIO base support version 2"; - -static int qdio_performance_stats = 0; -static int proc_perf_file_registration; -static struct qdio_perf_stats perf_stats; - -static int hydra_thinints; -static int is_passthrough = 0; -static int omit_svs; - -static int indicator_used[INDICATORS_PER_CACHELINE]; -static __u32 * volatile indicators; -static __u32 volatile spare_indicator; -static atomic_t spare_indicator_usecount; -#define QDIO_MEMPOOL_SCSSC_ELEMENTS 2 -static mempool_t *qdio_mempool_scssc; -static struct kmem_cache *qdio_q_cache; - -static debug_info_t *qdio_dbf_setup; -static debug_info_t *qdio_dbf_sbal; -static debug_info_t *qdio_dbf_trace; -static debug_info_t *qdio_dbf_sense; -#ifdef CONFIG_QDIO_DEBUG -static debug_info_t *qdio_dbf_slsb_out; -static debug_info_t *qdio_dbf_slsb_in; -#endif /* CONFIG_QDIO_DEBUG */ - -/* iQDIO stuff: */ -static volatile struct qdio_q *tiq_list=NULL; /* volatile as it could change - during a while loop */ -static DEFINE_SPINLOCK(ttiq_list_lock); -static void *tiqdio_ind; -static void tiqdio_tl(unsigned long); -static DECLARE_TASKLET(tiqdio_tasklet,tiqdio_tl,0); - -/* not a macro, as one of the arguments is atomic_read */ -static inline int -qdio_min(int a,int b) -{ - if (a> 12); /* time>>12 is microseconds */ -} - -/* - * unfortunately, we can't just xchg the values; in do_QDIO we want to reserve - * the q in any case, so that we'll not be interrupted when we are in - * qdio_mark_tiq... shouldn't have a really bad impact, as reserving almost - * ever works (last famous words) - */ -static inline int -qdio_reserve_q(struct qdio_q *q) -{ - return atomic_add_return(1,&q->use_count) - 1; -} - -static inline void -qdio_release_q(struct qdio_q *q) -{ - atomic_dec(&q->use_count); -} - -/*check ccq */ -static int -qdio_check_ccq(struct qdio_q *q, unsigned int ccq) -{ - char dbf_text[15]; - - if (ccq == 0 || ccq == 32) - return 0; - if (ccq == 96 || ccq == 97) - return 1; - /*notify devices immediately*/ - sprintf(dbf_text,"%d", ccq); - QDIO_DBF_TEXT2(1,trace,dbf_text); - return -EIO; -} -/* EQBS: extract buffer states */ -static int -qdio_do_eqbs(struct qdio_q *q, unsigned char *state, - unsigned int *start, unsigned int *cnt) -{ - struct qdio_irq *irq; - unsigned int tmp_cnt, q_no, ccq; - int rc ; - char dbf_text[15]; - - ccq = 0; - tmp_cnt = *cnt; - irq = (struct qdio_irq*)q->irq_ptr; - q_no = q->q_no; - if(!q->is_input_q) - q_no += irq->no_input_qs; -again: - ccq = do_eqbs(irq->sch_token, state, q_no, start, cnt); - rc = qdio_check_ccq(q, ccq); - if ((ccq == 96) && (tmp_cnt != *cnt)) - rc = 0; - if (rc == 1) { - QDIO_DBF_TEXT5(1,trace,"eqAGAIN"); - goto again; - } - if (rc < 0) { - QDIO_DBF_TEXT2(1,trace,"eqberr"); - sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt, *cnt, ccq, q_no); - QDIO_DBF_TEXT2(1,trace,dbf_text); - q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| - QDIO_STATUS_LOOK_FOR_ERROR, - 0, 0, 0, -1, -1, q->int_parm); - return 0; - } - return (tmp_cnt - *cnt); -} - -/* SQBS: set buffer states */ -static int -qdio_do_sqbs(struct qdio_q *q, unsigned char state, - unsigned int *start, unsigned int *cnt) -{ - struct qdio_irq *irq; - unsigned int tmp_cnt, q_no, ccq; - int rc; - char dbf_text[15]; - - ccq = 0; - tmp_cnt = *cnt; - irq = (struct qdio_irq*)q->irq_ptr; - q_no = q->q_no; - if(!q->is_input_q) - q_no += irq->no_input_qs; -again: - ccq = do_sqbs(irq->sch_token, state, q_no, start, cnt); - rc = qdio_check_ccq(q, ccq); - if (rc == 1) { - QDIO_DBF_TEXT5(1,trace,"sqAGAIN"); - goto again; - } - if (rc < 0) { - QDIO_DBF_TEXT3(1,trace,"sqberr"); - sprintf(dbf_text,"%2x,%2x",tmp_cnt,*cnt); - QDIO_DBF_TEXT3(1,trace,dbf_text); - sprintf(dbf_text,"%d,%d",ccq,q_no); - QDIO_DBF_TEXT3(1,trace,dbf_text); - q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| - QDIO_STATUS_LOOK_FOR_ERROR, - 0, 0, 0, -1, -1, q->int_parm); - return 0; - } - return (tmp_cnt - *cnt); -} - -static inline int -qdio_set_slsb(struct qdio_q *q, unsigned int *bufno, - unsigned char state, unsigned int *count) -{ - volatile char *slsb; - struct qdio_irq *irq; - - irq = (struct qdio_irq*)q->irq_ptr; - if (!irq->is_qebsm) { - slsb = (char *)&q->slsb.acc.val[(*bufno)]; - xchg(slsb, state); - return 1; - } - return qdio_do_sqbs(q, state, bufno, count); -} - -#ifdef CONFIG_QDIO_DEBUG -static inline void -qdio_trace_slsb(struct qdio_q *q) -{ - if (q->queue_type==QDIO_TRACE_QTYPE) { - if (q->is_input_q) - QDIO_DBF_HEX2(0,slsb_in,&q->slsb, - QDIO_MAX_BUFFERS_PER_Q); - else - QDIO_DBF_HEX2(0,slsb_out,&q->slsb, - QDIO_MAX_BUFFERS_PER_Q); - } -} -#endif - -static inline int -set_slsb(struct qdio_q *q, unsigned int *bufno, - unsigned char state, unsigned int *count) -{ - int rc; -#ifdef CONFIG_QDIO_DEBUG - qdio_trace_slsb(q); -#endif - rc = qdio_set_slsb(q, bufno, state, count); -#ifdef CONFIG_QDIO_DEBUG - qdio_trace_slsb(q); -#endif - return rc; -} -static inline int -qdio_siga_sync(struct qdio_q *q, unsigned int gpr2, - unsigned int gpr3) -{ - int cc; - - QDIO_DBF_TEXT4(0,trace,"sigasync"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - qdio_perf_stat_inc(&perf_stats.siga_syncs); - - cc = do_siga_sync(q->schid, gpr2, gpr3); - if (cc) - QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); - - return cc; -} - -static inline int -qdio_siga_sync_q(struct qdio_q *q) -{ - if (q->is_input_q) - return qdio_siga_sync(q, 0, q->mask); - return qdio_siga_sync(q, q->mask, 0); -} - -static int -__do_siga_output(struct qdio_q *q, unsigned int *busy_bit) -{ - struct qdio_irq *irq; - unsigned int fc = 0; - unsigned long schid; - - irq = (struct qdio_irq *) q->irq_ptr; - if (!irq->is_qebsm) - schid = *((u32 *)&q->schid); - else { - schid = irq->sch_token; - fc |= 0x80; - } - return do_siga_output(schid, q->mask, busy_bit, fc); -} - -/* - * returns QDIO_SIGA_ERROR_ACCESS_EXCEPTION as cc, when SIGA returns - * an access exception - */ -static int -qdio_siga_output(struct qdio_q *q) -{ - int cc; - __u32 busy_bit; - __u64 start_time=0; - - qdio_perf_stat_inc(&perf_stats.siga_outs); - - QDIO_DBF_TEXT4(0,trace,"sigaout"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - for (;;) { - cc = __do_siga_output(q, &busy_bit); -//QDIO_PRINT_ERR("cc=%x, busy=%x\n",cc,busy_bit); - if ((cc==2) && (busy_bit) && (q->is_iqdio_q)) { - if (!start_time) - start_time=NOW; - if ((NOW-start_time)>QDIO_BUSY_BIT_PATIENCE) - break; - } else - break; - } - - if ((cc==2) && (busy_bit)) - cc |= QDIO_SIGA_ERROR_B_BIT_SET; - - if (cc) - QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); - - return cc; -} - -static int -qdio_siga_input(struct qdio_q *q) -{ - int cc; - - QDIO_DBF_TEXT4(0,trace,"sigain"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - qdio_perf_stat_inc(&perf_stats.siga_ins); - - cc = do_siga_input(q->schid, q->mask); - - if (cc) - QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); - - return cc; -} - -/* locked by the locks in qdio_activate and qdio_cleanup */ -static __u32 * -qdio_get_indicator(void) -{ - int i; - - for (i = 0; i < INDICATORS_PER_CACHELINE; i++) - if (!indicator_used[i]) { - indicator_used[i]=1; - return indicators+i; - } - atomic_inc(&spare_indicator_usecount); - return (__u32 * volatile) &spare_indicator; -} - -/* locked by the locks in qdio_activate and qdio_cleanup */ -static void -qdio_put_indicator(__u32 *addr) -{ - int i; - - if ( (addr) && (addr!=&spare_indicator) ) { - i=addr-indicators; - indicator_used[i]=0; - } - if (addr == &spare_indicator) - atomic_dec(&spare_indicator_usecount); -} - -static inline void -tiqdio_clear_summary_bit(__u32 *location) -{ - QDIO_DBF_TEXT5(0,trace,"clrsummb"); - QDIO_DBF_HEX5(0,trace,&location,sizeof(void*)); - - xchg(location,0); -} - -static inline void -tiqdio_set_summary_bit(__u32 *location) -{ - QDIO_DBF_TEXT5(0,trace,"setsummb"); - QDIO_DBF_HEX5(0,trace,&location,sizeof(void*)); - - xchg(location,-1); -} - -static inline void -tiqdio_sched_tl(void) -{ - tasklet_hi_schedule(&tiqdio_tasklet); -} - -static void -qdio_mark_tiq(struct qdio_q *q) -{ - unsigned long flags; - - QDIO_DBF_TEXT4(0,trace,"mark iq"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - spin_lock_irqsave(&ttiq_list_lock,flags); - if (unlikely(atomic_read(&q->is_in_shutdown))) - goto out_unlock; - - if (!q->is_input_q) - goto out_unlock; - - if ((q->list_prev) || (q->list_next)) - goto out_unlock; - - if (!tiq_list) { - tiq_list=q; - q->list_prev=q; - q->list_next=q; - } else { - q->list_next=tiq_list; - q->list_prev=tiq_list->list_prev; - tiq_list->list_prev->list_next=q; - tiq_list->list_prev=q; - } - spin_unlock_irqrestore(&ttiq_list_lock,flags); - - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - return; -out_unlock: - spin_unlock_irqrestore(&ttiq_list_lock,flags); - return; -} - -static inline void -qdio_mark_q(struct qdio_q *q) -{ - QDIO_DBF_TEXT4(0,trace,"mark q"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if (unlikely(atomic_read(&q->is_in_shutdown))) - return; - - tasklet_schedule(&q->tasklet); -} - -static int -qdio_stop_polling(struct qdio_q *q) -{ -#ifdef QDIO_USE_PROCESSING_STATE - unsigned int tmp, gsf, count = 1; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - - if (!atomic_xchg(&q->polling,0)) - return 1; - - QDIO_DBF_TEXT4(0,trace,"stoppoll"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - /* show the card that we are not polling anymore */ - if (!q->is_input_q) - return 1; - - tmp = gsf = GET_SAVED_FRONTIER(q); - tmp = ((tmp + QDIO_MAX_BUFFERS_PER_Q-1) & (QDIO_MAX_BUFFERS_PER_Q-1) ); - set_slsb(q, &tmp, SLSB_P_INPUT_NOT_INIT, &count); - - /* - * we don't issue this SYNC_MEMORY, as we trust Rick T and - * moreover will not use the PROCESSING state under VM, so - * q->polling was 0 anyway - */ - /*SYNC_MEMORY;*/ - if (irq->is_qebsm) { - count = 1; - qdio_do_eqbs(q, &state, &gsf, &count); - } else - state = q->slsb.acc.val[gsf]; - if (state != SLSB_P_INPUT_PRIMED) - return 1; - /* - * set our summary bit again, as otherwise there is a - * small window we can miss between resetting it and - * checking for PRIMED state - */ - if (q->is_thinint_q) - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - return 0; - -#else /* QDIO_USE_PROCESSING_STATE */ - return 1; -#endif /* QDIO_USE_PROCESSING_STATE */ -} - -/* - * see the comment in do_QDIO and before qdio_reserve_q about the - * sophisticated locking outside of unmark_q, so that we don't need to - * disable the interrupts :-) -*/ -static void -qdio_unmark_q(struct qdio_q *q) -{ - unsigned long flags; - - QDIO_DBF_TEXT4(0,trace,"unmark q"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if ((!q->list_prev)||(!q->list_next)) - return; - - if ((q->is_thinint_q)&&(q->is_input_q)) { - /* iQDIO */ - spin_lock_irqsave(&ttiq_list_lock,flags); - /* in case cleanup has done this already and simultanously - * qdio_unmark_q is called from the interrupt handler, we've - * got to check this in this specific case again */ - if ((!q->list_prev)||(!q->list_next)) - goto out; - if (q->list_next==q) { - /* q was the only interesting q */ - tiq_list=NULL; - q->list_next=NULL; - q->list_prev=NULL; - } else { - q->list_next->list_prev=q->list_prev; - q->list_prev->list_next=q->list_next; - tiq_list=q->list_next; - q->list_next=NULL; - q->list_prev=NULL; - } -out: - spin_unlock_irqrestore(&ttiq_list_lock,flags); - } -} - -static inline unsigned long -tiqdio_clear_global_summary(void) -{ - unsigned long time; - - QDIO_DBF_TEXT5(0,trace,"clrglobl"); - - time = do_clear_global_summary(); - - QDIO_DBF_HEX5(0,trace,&time,sizeof(unsigned long)); - - return time; -} - - -/************************* OUTBOUND ROUTINES *******************************/ -static int -qdio_qebsm_get_outbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - unsigned char state; - unsigned int cnt, count, ftc; - - irq = (struct qdio_irq *) q->irq_ptr; - if ((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis)) - SYNC_MEMORY; - - ftc = q->first_to_check; - count = qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - if (count == 0) - return q->first_to_check; - cnt = qdio_do_eqbs(q, &state, &ftc, &count); - if (cnt == 0) - return q->first_to_check; - switch (state) { - case SLSB_P_OUTPUT_ERROR: - QDIO_DBF_TEXT3(0,trace,"outperr"); - atomic_sub(cnt , &q->number_of_buffers_used); - if (q->qdio_error) - q->error_status_flags |= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error = SLSB_P_OUTPUT_ERROR; - q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR; - q->first_to_check = ftc; - break; - case SLSB_P_OUTPUT_EMPTY: - QDIO_DBF_TEXT5(0,trace,"outpempt"); - atomic_sub(cnt, &q->number_of_buffers_used); - q->first_to_check = ftc; - break; - case SLSB_CU_OUTPUT_PRIMED: - /* all buffers primed */ - QDIO_DBF_TEXT5(0,trace,"outpprim"); - break; - default: - break; - } - QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); - return q->first_to_check; -} - -static int -qdio_qebsm_get_inbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - unsigned char state; - int tmp, ftc, count, cnt; - char dbf_text[15]; - - - irq = (struct qdio_irq *) q->irq_ptr; - ftc = q->first_to_check; - count = qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - if (count == 0) - return q->first_to_check; - cnt = qdio_do_eqbs(q, &state, &ftc, &count); - if (cnt == 0) - return q->first_to_check; - switch (state) { - case SLSB_P_INPUT_ERROR : -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT3(1,trace,"inperr"); - sprintf(dbf_text,"%2x,%2x",ftc,count); - QDIO_DBF_TEXT3(1,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - if (q->qdio_error) - q->error_status_flags |= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error = SLSB_P_INPUT_ERROR; - q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR; - atomic_sub(cnt, &q->number_of_buffers_used); - q->first_to_check = ftc; - break; - case SLSB_P_INPUT_PRIMED : - QDIO_DBF_TEXT3(0,trace,"inptprim"); - sprintf(dbf_text,"%2x,%2x",ftc,count); - QDIO_DBF_TEXT3(1,trace,dbf_text); - tmp = 0; - ftc = q->first_to_check; -#ifdef QDIO_USE_PROCESSING_STATE - if (cnt > 1) { - cnt -= 1; - tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt); - if (!tmp) - break; - } - cnt = 1; - tmp += set_slsb(q, &ftc, - SLSB_P_INPUT_PROCESSING, &cnt); - atomic_set(&q->polling, 1); -#else - tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt); -#endif - atomic_sub(tmp, &q->number_of_buffers_used); - q->first_to_check = ftc; - break; - case SLSB_CU_INPUT_EMPTY: - case SLSB_P_INPUT_NOT_INIT: - case SLSB_P_INPUT_PROCESSING: - QDIO_DBF_TEXT5(0,trace,"inpnipro"); - break; - default: - break; - } - QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); - return q->first_to_check; -} - -static int -qdio_get_outbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - volatile char *slsb; - unsigned int count = 1; - int first_not_to_check, f, f_mod_no; - char dbf_text[15]; - - QDIO_DBF_TEXT4(0,trace,"getobfro"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - irq = (struct qdio_irq *) q->irq_ptr; - if (irq->is_qebsm) - return qdio_qebsm_get_outbound_buffer_frontier(q); - - slsb=&q->slsb.acc.val[0]; - f_mod_no=f=q->first_to_check; - /* - * f points to already processed elements, so f+no_used is correct... - * ... but: we don't check 128 buffers, as otherwise - * qdio_has_outbound_q_moved would return 0 - */ - first_not_to_check=f+qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - - if (((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis)) || - (q->queue_type == QDIO_IQDIO_QFMT_ASYNCH)) - SYNC_MEMORY; - -check_next: - if (f==first_not_to_check) - goto out; - - switch(slsb[f_mod_no]) { - - /* the adapter has not fetched the output yet */ - case SLSB_CU_OUTPUT_PRIMED: - QDIO_DBF_TEXT5(0,trace,"outpprim"); - break; - - /* the adapter got it */ - case SLSB_P_OUTPUT_EMPTY: - atomic_dec(&q->number_of_buffers_used); - f++; - f_mod_no=f&(QDIO_MAX_BUFFERS_PER_Q-1); - QDIO_DBF_TEXT5(0,trace,"outpempt"); - goto check_next; - - case SLSB_P_OUTPUT_ERROR: - QDIO_DBF_TEXT3(0,trace,"outperr"); - sprintf(dbf_text,"%x-%x-%x",f_mod_no, - q->sbal[f_mod_no]->element[14].sbalf.value, - q->sbal[f_mod_no]->element[15].sbalf.value); - QDIO_DBF_TEXT3(1,trace,dbf_text); - QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256); - - /* kind of process the buffer */ - set_slsb(q, &f_mod_no, SLSB_P_OUTPUT_NOT_INIT, &count); - - /* - * we increment the frontier, as this buffer - * was processed obviously - */ - atomic_dec(&q->number_of_buffers_used); - f_mod_no=(f_mod_no+1)&(QDIO_MAX_BUFFERS_PER_Q-1); - - if (q->qdio_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error=SLSB_P_OUTPUT_ERROR; - q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR; - - break; - - /* no new buffers */ - default: - QDIO_DBF_TEXT5(0,trace,"outpni"); - } -out: - return (q->first_to_check=f_mod_no); -} - -/* all buffers are processed */ -static int -qdio_is_outbound_q_done(struct qdio_q *q) -{ - int no_used; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - no_used=atomic_read(&q->number_of_buffers_used); - -#ifdef CONFIG_QDIO_DEBUG - if (no_used) { - sprintf(dbf_text,"oqisnt%02x",no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); - } else { - QDIO_DBF_TEXT4(0,trace,"oqisdone"); - } - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#endif /* CONFIG_QDIO_DEBUG */ - return (no_used==0); -} - -static int -qdio_has_outbound_q_moved(struct qdio_q *q) -{ - int i; - - i=qdio_get_outbound_buffer_frontier(q); - - if ( (i!=GET_SAVED_FRONTIER(q)) || - (q->error_status_flags&QDIO_STATUS_LOOK_FOR_ERROR) ) { - SAVE_FRONTIER(q,i); - QDIO_DBF_TEXT4(0,trace,"oqhasmvd"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 1; - } else { - QDIO_DBF_TEXT4(0,trace,"oqhsntmv"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 0; - } -} - -static void -qdio_kick_outbound_q(struct qdio_q *q) -{ - int result; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; - - QDIO_DBF_TEXT4(0,trace,"kickoutq"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#endif /* CONFIG_QDIO_DEBUG */ - - if (!q->siga_out) - return; - - /* here's the story with cc=2 and busy bit set (thanks, Rick): - * VM's CP could present us cc=2 and busy bit set on SIGA-write - * during reconfiguration of their Guest LAN (only in HIPERS mode, - * QDIO mode is asynchronous -- cc=2 and busy bit there will take - * the queues down immediately; and not being under VM we have a - * problem on cc=2 and busy bit set right away). - * - * Therefore qdio_siga_output will try for a short time constantly, - * if such a condition occurs. If it doesn't change, it will - * increase the busy_siga_counter and save the timestamp, and - * schedule the queue for later processing (via mark_q, using the - * queue tasklet). __qdio_outbound_processing will check out the - * counter. If non-zero, it will call qdio_kick_outbound_q as often - * as the value of the counter. This will attempt further SIGA - * instructions. For each successful SIGA, the counter is - * decreased, for failing SIGAs the counter remains the same, after - * all. - * After some time of no movement, qdio_kick_outbound_q will - * finally fail and reflect corresponding error codes to call - * the upper layer module and have it take the queues down. - * - * Note that this is a change from the original HiperSockets design - * (saying cc=2 and busy bit means take the queues down), but in - * these days Guest LAN didn't exist... excessive cc=2 with busy bit - * conditions will still take the queues down, but the threshold is - * higher due to the Guest LAN environment. - */ - - - result=qdio_siga_output(q); - - switch (result) { - case 0: - /* went smooth this time, reset timestamp */ -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT3(0,trace,"cc2reslv"); - sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no, - atomic_read(&q->busy_siga_counter)); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - q->timing.busy_start=0; - break; - case (2|QDIO_SIGA_ERROR_B_BIT_SET): - /* cc=2 and busy bit: */ - atomic_inc(&q->busy_siga_counter); - - /* if the last siga was successful, save - * timestamp here */ - if (!q->timing.busy_start) - q->timing.busy_start=NOW; - - /* if we're in time, don't touch error_status_flags - * and siga_error */ - if (NOW-q->timing.busy_startschid.sch_no,q->q_no, - atomic_read(&q->busy_siga_counter)); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - /* else fallthrough and report error */ - default: - /* for plain cc=1, 2 or 3: */ - if (q->siga_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR; - q->error_status_flags|= - QDIO_STATUS_LOOK_FOR_ERROR; - q->siga_error=result; - } -} - -static void -qdio_kick_outbound_handler(struct qdio_q *q) -{ - int start, end, real_end, count; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - start = q->first_element_to_kick; - /* last_move_ftc was just updated */ - real_end = GET_SAVED_FRONTIER(q); - end = (real_end+QDIO_MAX_BUFFERS_PER_Q-1)& - (QDIO_MAX_BUFFERS_PER_Q-1); - count = (end+QDIO_MAX_BUFFERS_PER_Q+1-start)& - (QDIO_MAX_BUFFERS_PER_Q-1); - -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0,trace,"kickouth"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - sprintf(dbf_text,"s=%2xc=%2x",start,count); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (q->state==QDIO_IRQ_STATE_ACTIVE) - q->handler(q->cdev,QDIO_STATUS_OUTBOUND_INT| - q->error_status_flags, - q->qdio_error,q->siga_error,q->q_no,start,count, - q->int_parm); - - /* for the next time: */ - q->first_element_to_kick=real_end; - q->qdio_error=0; - q->siga_error=0; - q->error_status_flags=0; -} - -static void -__qdio_outbound_processing(struct qdio_q *q) -{ - int siga_attempts; - - QDIO_DBF_TEXT4(0,trace,"qoutproc"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.outbound_tl_runs_resched); - /* as we're sissies, we'll check next time */ - if (likely(!atomic_read(&q->is_in_shutdown))) { - qdio_mark_q(q); - QDIO_DBF_TEXT4(0,trace,"busy,agn"); - } - return; - } - qdio_perf_stat_inc(&perf_stats.outbound_tl_runs); - qdio_perf_stat_inc(&perf_stats.tl_runs); - - /* see comment in qdio_kick_outbound_q */ - siga_attempts=atomic_read(&q->busy_siga_counter); - while (siga_attempts) { - atomic_dec(&q->busy_siga_counter); - qdio_kick_outbound_q(q); - siga_attempts--; - } - - if (qdio_has_outbound_q_moved(q)) - qdio_kick_outbound_handler(q); - - if (q->queue_type == QDIO_ZFCP_QFMT) { - if ((!q->hydra_gives_outbound_pcis) && - (!qdio_is_outbound_q_done(q))) - qdio_mark_q(q); - } - else if (((!q->is_iqdio_q) && (!q->is_pci_out)) || - (q->queue_type == QDIO_IQDIO_QFMT_ASYNCH)) { - /* - * make sure buffer switch from PRIMED to EMPTY is noticed - * and outbound_handler is called - */ - if (qdio_is_outbound_q_done(q)) { - del_timer(&q->timer); - } else { - if (!timer_pending(&q->timer)) - mod_timer(&q->timer, jiffies + - QDIO_FORCE_CHECK_TIMEOUT); - } - } - - qdio_release_q(q); -} - -static void -qdio_outbound_processing(unsigned long q) -{ - __qdio_outbound_processing((struct qdio_q *) q); -} - -/************************* INBOUND ROUTINES *******************************/ - - -static int -qdio_get_inbound_buffer_frontier(struct qdio_q *q) -{ - struct qdio_irq *irq; - int f,f_mod_no; - volatile char *slsb; - unsigned int count = 1; - int first_not_to_check; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif /* CONFIG_QDIO_DEBUG */ -#ifdef QDIO_USE_PROCESSING_STATE - int last_position=-1; -#endif /* QDIO_USE_PROCESSING_STATE */ - - QDIO_DBF_TEXT4(0,trace,"getibfro"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - irq = (struct qdio_irq *) q->irq_ptr; - if (irq->is_qebsm) - return qdio_qebsm_get_inbound_buffer_frontier(q); - - slsb=&q->slsb.acc.val[0]; - f_mod_no=f=q->first_to_check; - /* - * we don't check 128 buffers, as otherwise qdio_has_inbound_q_moved - * would return 0 - */ - first_not_to_check=f+qdio_min(atomic_read(&q->number_of_buffers_used), - (QDIO_MAX_BUFFERS_PER_Q-1)); - - /* - * we don't use this one, as a PCI or we after a thin interrupt - * will sync the queues - */ - /* SYNC_MEMORY;*/ - -check_next: - f_mod_no=f&(QDIO_MAX_BUFFERS_PER_Q-1); - if (f==first_not_to_check) - goto out; - switch (slsb[f_mod_no]) { - - /* CU_EMPTY means frontier is reached */ - case SLSB_CU_INPUT_EMPTY: - QDIO_DBF_TEXT5(0,trace,"inptempt"); - break; - - /* P_PRIMED means set slsb to P_PROCESSING and move on */ - case SLSB_P_INPUT_PRIMED: - QDIO_DBF_TEXT5(0,trace,"inptprim"); - -#ifdef QDIO_USE_PROCESSING_STATE - /* - * as soon as running under VM, polling the input queues will - * kill VM in terms of CP overhead - */ - if (q->siga_sync) { - set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); - } else { - /* set the previous buffer to NOT_INIT. The current - * buffer will be set to PROCESSING at the end of - * this function to avoid further interrupts. */ - if (last_position>=0) - set_slsb(q, &last_position, - SLSB_P_INPUT_NOT_INIT, &count); - atomic_set(&q->polling,1); - last_position=f_mod_no; - } -#else /* QDIO_USE_PROCESSING_STATE */ - set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); -#endif /* QDIO_USE_PROCESSING_STATE */ - /* - * not needed, as the inbound queue will be synced on the next - * siga-r, resp. tiqdio_is_inbound_q_done will do the siga-s - */ - /*SYNC_MEMORY;*/ - f++; - atomic_dec(&q->number_of_buffers_used); - goto check_next; - - case SLSB_P_INPUT_NOT_INIT: - case SLSB_P_INPUT_PROCESSING: - QDIO_DBF_TEXT5(0,trace,"inpnipro"); - break; - - /* P_ERROR means frontier is reached, break and report error */ - case SLSB_P_INPUT_ERROR: -#ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text,"inperr%2x",f_mod_no); - QDIO_DBF_TEXT3(1,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256); - - /* kind of process the buffer */ - set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); - - if (q->qdio_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; - q->qdio_error=SLSB_P_INPUT_ERROR; - q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR; - - /* we increment the frontier, as this buffer - * was processed obviously */ - f_mod_no=(f_mod_no+1)&(QDIO_MAX_BUFFERS_PER_Q-1); - atomic_dec(&q->number_of_buffers_used); - -#ifdef QDIO_USE_PROCESSING_STATE - last_position=-1; -#endif /* QDIO_USE_PROCESSING_STATE */ - - break; - - /* everything else means frontier not changed (HALTED or so) */ - default: - break; - } -out: - q->first_to_check=f_mod_no; - -#ifdef QDIO_USE_PROCESSING_STATE - if (last_position>=0) - set_slsb(q, &last_position, SLSB_P_INPUT_PROCESSING, &count); -#endif /* QDIO_USE_PROCESSING_STATE */ - - QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); - - return q->first_to_check; -} - -static int -qdio_has_inbound_q_moved(struct qdio_q *q) -{ - int i; - - i=qdio_get_inbound_buffer_frontier(q); - if ( (i!=GET_SAVED_FRONTIER(q)) || - (q->error_status_flags&QDIO_STATUS_LOOK_FOR_ERROR) ) { - SAVE_FRONTIER(q,i); - if ((!q->siga_sync)&&(!q->hydra_gives_outbound_pcis)) - SAVE_TIMESTAMP(q); - - QDIO_DBF_TEXT4(0,trace,"inhasmvd"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 1; - } else { - QDIO_DBF_TEXT4(0,trace,"inhsntmv"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 0; - } -} - -/* means, no more buffers to be filled */ -static int -tiqdio_is_inbound_q_done(struct qdio_q *q) -{ - int no_used; - unsigned int start_buf, count; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - no_used=atomic_read(&q->number_of_buffers_used); - - /* propagate the change from 82 to 80 through VM */ - SYNC_MEMORY; - -#ifdef CONFIG_QDIO_DEBUG - if (no_used) { - sprintf(dbf_text,"iqisnt%02x",no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); - } else { - QDIO_DBF_TEXT4(0,trace,"iniqisdo"); - } - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#endif /* CONFIG_QDIO_DEBUG */ - - if (!no_used) - return 1; - if (irq->is_qebsm) { - count = 1; - start_buf = q->first_to_check; - qdio_do_eqbs(q, &state, &start_buf, &count); - } else - state = q->slsb.acc.val[q->first_to_check]; - if (state != SLSB_P_INPUT_PRIMED) - /* - * nothing more to do, if next buffer is not PRIMED. - * note that we did a SYNC_MEMORY before, that there - * has been a sychnronization. - * we will return 0 below, as there is nothing to do - * (stop_polling not necessary, as we have not been - * using the PROCESSING state - */ - return 0; - - /* - * ok, the next input buffer is primed. that means, that device state - * change indicator and adapter local summary are set, so we will find - * it next time. - * we will return 0 below, as there is nothing to do, except scheduling - * ourselves for the next time. - */ - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - return 0; -} - -static int -qdio_is_inbound_q_done(struct qdio_q *q) -{ - int no_used; - unsigned int start_buf, count; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - no_used=atomic_read(&q->number_of_buffers_used); - - /* - * we need that one for synchronization with the adapter, as it - * does a kind of PCI avoidance - */ - SYNC_MEMORY; - - if (!no_used) { - QDIO_DBF_TEXT4(0,trace,"inqisdnA"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 1; - } - if (irq->is_qebsm) { - count = 1; - start_buf = q->first_to_check; - qdio_do_eqbs(q, &state, &start_buf, &count); - } else - state = q->slsb.acc.val[q->first_to_check]; - if (state == SLSB_P_INPUT_PRIMED) { - /* we got something to do */ - QDIO_DBF_TEXT4(0,trace,"inqisntA"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - return 0; - } - - /* on VM, we don't poll, so the q is always done here */ - if (q->siga_sync) - return 1; - if (q->hydra_gives_outbound_pcis) - return 1; - - /* - * at this point we know, that inbound first_to_check - * has (probably) not moved (see qdio_inbound_processing) - */ - if (NOW>GET_SAVED_TIMESTAMP(q)+q->timing.threshold) { -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0,trace,"inqisdon"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - sprintf(dbf_text,"pf%02xcn%02x",q->first_to_check,no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - return 1; - } else { -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0,trace,"inqisntd"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - sprintf(dbf_text,"pf%02xcn%02x",q->first_to_check,no_used); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - return 0; - } -} - -static void -qdio_kick_inbound_handler(struct qdio_q *q) -{ - int count, start, end, real_end, i; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; -#endif - - QDIO_DBF_TEXT4(0,trace,"kickinh"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - start=q->first_element_to_kick; - real_end=q->first_to_check; - end=(real_end+QDIO_MAX_BUFFERS_PER_Q-1)&(QDIO_MAX_BUFFERS_PER_Q-1); - - i=start; - count=0; - while (1) { - count++; - if (i==end) - break; - i=(i+1)&(QDIO_MAX_BUFFERS_PER_Q-1); - } - -#ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text,"s=%2xc=%2x",start,count); - QDIO_DBF_TEXT4(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (likely(q->state==QDIO_IRQ_STATE_ACTIVE)) - q->handler(q->cdev, - QDIO_STATUS_INBOUND_INT|q->error_status_flags, - q->qdio_error,q->siga_error,q->q_no,start,count, - q->int_parm); - - /* for the next time: */ - q->first_element_to_kick=real_end; - q->qdio_error=0; - q->siga_error=0; - q->error_status_flags=0; - - qdio_perf_stat_inc(&perf_stats.inbound_cnt); -} - -static void -__tiqdio_inbound_processing(struct qdio_q *q, int spare_ind_was_set) -{ - struct qdio_irq *irq_ptr; - struct qdio_q *oq; - int i; - - QDIO_DBF_TEXT4(0,trace,"iqinproc"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - /* - * we first want to reserve the q, so that we know, that we don't - * interrupt ourselves and call qdio_unmark_q, as is_in_shutdown might - * be set - */ - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs_resched); - /* - * as we might just be about to stop polling, we make - * sure that we check again at least once more - */ - tiqdio_sched_tl(); - return; - } - qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs); - if (unlikely(atomic_read(&q->is_in_shutdown))) { - qdio_unmark_q(q); - goto out; - } - - /* - * we reset spare_ind_was_set, when the queue does not use the - * spare indicator - */ - if (spare_ind_was_set) - spare_ind_was_set = (q->dev_st_chg_ind == &spare_indicator); - - if (!(*(q->dev_st_chg_ind)) && !spare_ind_was_set) - goto out; - /* - * q->dev_st_chg_ind is the indicator, be it shared or not. - * only clear it, if indicator is non-shared - */ - if (q->dev_st_chg_ind != &spare_indicator) - tiqdio_clear_summary_bit((__u32*)q->dev_st_chg_ind); - - if (q->hydra_gives_outbound_pcis) { - if (!q->siga_sync_done_on_thinints) { - SYNC_MEMORY_ALL; - } else if (!q->siga_sync_done_on_outb_tis) { - SYNC_MEMORY_ALL_OUTB; - } - } else { - SYNC_MEMORY; - } - /* - * maybe we have to do work on our outbound queues... at least - * we have to check the outbound-int-capable thinint-capable - * queues - */ - if (q->hydra_gives_outbound_pcis) { - irq_ptr = (struct qdio_irq*)q->irq_ptr; - for (i=0;ino_output_qs;i++) { - oq = irq_ptr->output_qs[i]; - if (!qdio_is_outbound_q_done(oq)) { - qdio_perf_stat_dec(&perf_stats.tl_runs); - __qdio_outbound_processing(oq); - } - } - } - - if (!qdio_has_inbound_q_moved(q)) - goto out; - - qdio_kick_inbound_handler(q); - if (tiqdio_is_inbound_q_done(q)) - if (!qdio_stop_polling(q)) { - /* - * we set the flags to get into the stuff next time, - * see also comment in qdio_stop_polling - */ - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - } -out: - qdio_release_q(q); -} - -static void -tiqdio_inbound_processing(unsigned long q) -{ - __tiqdio_inbound_processing((struct qdio_q *) q, - atomic_read(&spare_indicator_usecount)); -} - -static void -__qdio_inbound_processing(struct qdio_q *q) -{ - int q_laps=0; - - QDIO_DBF_TEXT4(0,trace,"qinproc"); - QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); - - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.inbound_tl_runs_resched); - /* as we're sissies, we'll check next time */ - if (likely(!atomic_read(&q->is_in_shutdown))) { - qdio_mark_q(q); - QDIO_DBF_TEXT4(0,trace,"busy,agn"); - } - return; - } - qdio_perf_stat_inc(&perf_stats.inbound_tl_runs); - qdio_perf_stat_inc(&perf_stats.tl_runs); - -again: - if (qdio_has_inbound_q_moved(q)) { - qdio_kick_inbound_handler(q); - if (!qdio_stop_polling(q)) { - q_laps++; - if (q_lapssiga_sync) - return 2; - - if (unlikely(qdio_reserve_q(q))) { - qdio_release_q(q); - qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs_resched); - /* - * as we might just be about to stop polling, we make - * sure that we check again at least once more - */ - - /* - * sanity -- we'd get here without setting the - * dev st chg ind - */ - tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind); - tiqdio_sched_tl(); - return 0; - } - if (qdio_stop_polling(q)) { - qdio_release_q(q); - return 2; - } - if (q_lapsdev_st_chg_ind); - tiqdio_sched_tl(); - qdio_release_q(q); - return 1; - -} -#endif /* QDIO_USE_PROCESSING_STATE */ - -static void -tiqdio_inbound_checks(void) -{ - struct qdio_q *q; - int spare_ind_was_set=0; -#ifdef QDIO_USE_PROCESSING_STATE - int q_laps=0; -#endif /* QDIO_USE_PROCESSING_STATE */ - - QDIO_DBF_TEXT4(0,trace,"iqdinbck"); - QDIO_DBF_TEXT5(0,trace,"iqlocsum"); - -#ifdef QDIO_USE_PROCESSING_STATE -again: -#endif /* QDIO_USE_PROCESSING_STATE */ - - /* when the spare indicator is used and set, save that and clear it */ - if ((atomic_read(&spare_indicator_usecount)) && spare_indicator) { - spare_ind_was_set = 1; - tiqdio_clear_summary_bit((__u32*)&spare_indicator); - } - - q=(struct qdio_q*)tiq_list; - do { - if (!q) - break; - __tiqdio_inbound_processing(q, spare_ind_was_set); - q=(struct qdio_q*)q->list_next; - } while (q!=(struct qdio_q*)tiq_list); - -#ifdef QDIO_USE_PROCESSING_STATE - q=(struct qdio_q*)tiq_list; - do { - int ret; - - ret = tiqdio_reset_processing_state(q, q_laps); - switch (ret) { - case 0: - return; - case 1: - q_laps++; - case 2: - q = (struct qdio_q*)q->list_next; - break; - default: - q_laps++; - goto again; - } - } while (q!=(struct qdio_q*)tiq_list); -#endif /* QDIO_USE_PROCESSING_STATE */ -} - -static void -tiqdio_tl(unsigned long data) -{ - QDIO_DBF_TEXT4(0,trace,"iqdio_tl"); - - qdio_perf_stat_inc(&perf_stats.tl_runs); - - tiqdio_inbound_checks(); -} - -/********************* GENERAL HELPER_ROUTINES ***********************/ - -static void -qdio_release_irq_memory(struct qdio_irq *irq_ptr) -{ - int i; - struct qdio_q *q; - - for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) { - q = irq_ptr->input_qs[i]; - if (q) { - free_page((unsigned long) q->slib); - kmem_cache_free(qdio_q_cache, q); - } - q = irq_ptr->output_qs[i]; - if (q) { - free_page((unsigned long) q->slib); - kmem_cache_free(qdio_q_cache, q); - } - } - free_page((unsigned long) irq_ptr->qdr); - free_page((unsigned long) irq_ptr); -} - -static void -qdio_set_impl_params(struct qdio_irq *irq_ptr, - unsigned int qib_param_field_format, - /* pointer to 128 bytes or NULL, if no param field */ - unsigned char *qib_param_field, - /* pointer to no_queues*128 words of data or NULL */ - unsigned int no_input_qs, - unsigned int no_output_qs, - unsigned long *input_slib_elements, - unsigned long *output_slib_elements) -{ - int i,j; - - if (!irq_ptr) - return; - - irq_ptr->qib.pfmt=qib_param_field_format; - if (qib_param_field) - memcpy(irq_ptr->qib.parm,qib_param_field, - QDIO_MAX_BUFFERS_PER_Q); - - if (input_slib_elements) - for (i=0;iinput_qs[i]->slib->slibe[j].parms= - input_slib_elements[ - i*QDIO_MAX_BUFFERS_PER_Q+j]; - } - if (output_slib_elements) - for (i=0;ioutput_qs[i]->slib->slibe[j].parms= - output_slib_elements[ - i*QDIO_MAX_BUFFERS_PER_Q+j]; - } -} - -static int -qdio_alloc_qs(struct qdio_irq *irq_ptr, - int no_input_qs, int no_output_qs) -{ - int i; - struct qdio_q *q; - - for (i = 0; i < no_input_qs; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); - if (!q) - return -ENOMEM; - memset(q, 0, sizeof(*q)); - - q->slib = (struct slib *) __get_free_page(GFP_KERNEL); - if (!q->slib) { - kmem_cache_free(qdio_q_cache, q); - return -ENOMEM; - } - irq_ptr->input_qs[i]=q; - } - - for (i = 0; i < no_output_qs; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); - if (!q) - return -ENOMEM; - memset(q, 0, sizeof(*q)); - - q->slib = (struct slib *) __get_free_page(GFP_KERNEL); - if (!q->slib) { - kmem_cache_free(qdio_q_cache, q); - return -ENOMEM; - } - irq_ptr->output_qs[i]=q; - } - return 0; -} - -static void -qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, - int no_input_qs, int no_output_qs, - qdio_handler_t *input_handler, - qdio_handler_t *output_handler, - unsigned long int_parm,int q_format, - unsigned long flags, - void **inbound_sbals_array, - void **outbound_sbals_array) -{ - struct qdio_q *q; - int i,j; - char dbf_text[20]; /* see qdio_initialize */ - void *ptr; - int available; - - sprintf(dbf_text,"qfqs%4x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - for (i=0;iinput_qs[i]; - - memset(q,0,((char*)&q->slib)-((char*)q)); - sprintf(dbf_text,"in-q%4x",i); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&q,sizeof(void*)); - - memset(q->slib,0,PAGE_SIZE); - q->sl=(struct sl*)(((char*)q->slib)+PAGE_SIZE/2); - - available=0; - - for (j=0;jsbal[j]=*(inbound_sbals_array++); - - q->queue_type=q_format; - q->int_parm=int_parm; - q->schid = irq_ptr->schid; - q->irq_ptr = irq_ptr; - q->cdev = cdev; - q->mask=1<<(31-i); - q->q_no=i; - q->is_input_q=1; - q->first_to_check=0; - q->last_move_ftc=0; - q->handler=input_handler; - q->dev_st_chg_ind=irq_ptr->dev_st_chg_ind; - - /* q->is_thinint_q isn't valid at this time, but - * irq_ptr->is_thinint_irq is - */ - if (irq_ptr->is_thinint_irq) - tasklet_init(&q->tasklet, tiqdio_inbound_processing, - (unsigned long) q); - else - tasklet_init(&q->tasklet, qdio_inbound_processing, - (unsigned long) q); - - /* actually this is not used for inbound queues. yet. */ - atomic_set(&q->busy_siga_counter,0); - q->timing.busy_start=0; - -/* for (j=0;jtiming.last_transfer_times[j]=(qdio_get_micros()/ - QDIO_STATS_NUMBER)*j; - q->timing.last_transfer_index=QDIO_STATS_NUMBER-1; -*/ - - /* fill in slib */ - if (i>0) irq_ptr->input_qs[i-1]->slib->nsliba= - (unsigned long)(q->slib); - q->slib->sla=(unsigned long)(q->sl); - q->slib->slsba=(unsigned long)(&q->slsb.acc.val[0]); - - /* fill in sl */ - for (j=0;jsl->element[j].sbal=(unsigned long)(q->sbal[j]); - - QDIO_DBF_TEXT2(0,setup,"sl-sb-b0"); - ptr=(void*)q->sl; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)&q->slsb; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)q->sbal[0]; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - - /* fill in slsb */ - if (!irq_ptr->is_qebsm) { - unsigned int count = 1; - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) - set_slsb(q, &j, SLSB_P_INPUT_NOT_INIT, &count); - } - } - - for (i=0;ioutput_qs[i]; - memset(q,0,((char*)&q->slib)-((char*)q)); - - sprintf(dbf_text,"outq%4x",i); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&q,sizeof(void*)); - - memset(q->slib,0,PAGE_SIZE); - q->sl=(struct sl*)(((char*)q->slib)+PAGE_SIZE/2); - - available=0; - - for (j=0;jsbal[j]=*(outbound_sbals_array++); - - q->queue_type=q_format; - if ((q->queue_type == QDIO_IQDIO_QFMT) && - (no_output_qs > 1) && - (i == no_output_qs-1)) - q->queue_type = QDIO_IQDIO_QFMT_ASYNCH; - q->int_parm=int_parm; - q->is_input_q=0; - q->is_pci_out = 0; - q->schid = irq_ptr->schid; - q->cdev = cdev; - q->irq_ptr = irq_ptr; - q->mask=1<<(31-i); - q->q_no=i; - q->first_to_check=0; - q->last_move_ftc=0; - q->handler=output_handler; - - tasklet_init(&q->tasklet, qdio_outbound_processing, - (unsigned long) q); - setup_timer(&q->timer, qdio_outbound_processing, - (unsigned long) q); - - atomic_set(&q->busy_siga_counter,0); - q->timing.busy_start=0; - - /* fill in slib */ - if (i>0) irq_ptr->output_qs[i-1]->slib->nsliba= - (unsigned long)(q->slib); - q->slib->sla=(unsigned long)(q->sl); - q->slib->slsba=(unsigned long)(&q->slsb.acc.val[0]); - - /* fill in sl */ - for (j=0;jsl->element[j].sbal=(unsigned long)(q->sbal[j]); - - QDIO_DBF_TEXT2(0,setup,"sl-sb-b0"); - ptr=(void*)q->sl; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)&q->slsb; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - ptr=(void*)q->sbal[0]; - QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); - - /* fill in slsb */ - if (!irq_ptr->is_qebsm) { - unsigned int count = 1; - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) - set_slsb(q, &j, SLSB_P_OUTPUT_NOT_INIT, &count); - } - } -} - -static void -qdio_fill_thresholds(struct qdio_irq *irq_ptr, - unsigned int no_input_qs, - unsigned int no_output_qs, - unsigned int min_input_threshold, - unsigned int max_input_threshold, - unsigned int min_output_threshold, - unsigned int max_output_threshold) -{ - int i; - struct qdio_q *q; - - for (i=0;iinput_qs[i]; - q->timing.threshold=max_input_threshold; -/* for (j=0;jthreshold_classes[j].threshold= - min_input_threshold+ - (max_input_threshold-min_input_threshold)/ - QDIO_STATS_CLASSES; - } - qdio_use_thresholds(q,QDIO_STATS_CLASSES/2);*/ - } - for (i=0;ioutput_qs[i]; - q->timing.threshold=max_output_threshold; -/* for (j=0;jthreshold_classes[j].threshold= - min_output_threshold+ - (max_output_threshold-min_output_threshold)/ - QDIO_STATS_CLASSES; - } - qdio_use_thresholds(q,QDIO_STATS_CLASSES/2);*/ - } -} - -static void tiqdio_thinint_handler(void *ind, void *drv_data) -{ - QDIO_DBF_TEXT4(0,trace,"thin_int"); - - qdio_perf_stat_inc(&perf_stats.thinints); - - /* SVS only when needed: - * issue SVS to benefit from iqdio interrupt avoidance - * (SVS clears AISOI)*/ - if (!omit_svs) - tiqdio_clear_global_summary(); - - tiqdio_inbound_checks(); -} - -static void -qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state) -{ - int i; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]; - - QDIO_DBF_TEXT5(0,trace,"newstate"); - sprintf(dbf_text,"%4x%4x",irq_ptr->schid.sch_no,state); - QDIO_DBF_TEXT5(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - irq_ptr->state=state; - for (i=0;ino_input_qs;i++) - irq_ptr->input_qs[i]->state=state; - for (i=0;ino_output_qs;i++) - irq_ptr->output_qs[i]->state=state; - mb(); -} - -static void -qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb) -{ - char dbf_text[15]; - - if (irb->esw.esw0.erw.cons) { - sprintf(dbf_text,"sens%4x",schid.sch_no); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_DBF_HEX0(0,sense,irb,QDIO_DBF_SENSE_LEN); - - QDIO_PRINT_WARN("sense data available on qdio channel.\n"); - QDIO_HEXDUMP16(WARN,"irb: ",irb); - QDIO_HEXDUMP16(WARN,"sense data: ",irb->ecw); - } - -} - -static void -qdio_handle_pci(struct qdio_irq *irq_ptr) -{ - int i; - struct qdio_q *q; - - qdio_perf_stat_inc(&perf_stats.pcis); - for (i=0;ino_input_qs;i++) { - q=irq_ptr->input_qs[i]; - if (q->is_input_q&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT) - qdio_mark_q(q); - else { - qdio_perf_stat_dec(&perf_stats.tl_runs); - __qdio_inbound_processing(q); - } - } - if (!irq_ptr->hydra_gives_outbound_pcis) - return; - for (i=0;ino_output_qs;i++) { - q=irq_ptr->output_qs[i]; - if (qdio_is_outbound_q_done(q)) - continue; - qdio_perf_stat_dec(&perf_stats.tl_runs); - if (!irq_ptr->sync_done_on_outb_pcis) - SYNC_MEMORY; - __qdio_outbound_processing(q); - } -} - -static void qdio_establish_handle_irq(struct ccw_device*, int, int); - -static void -qdio_handle_activate_check(struct ccw_device *cdev, unsigned long intparm, - int cstat, int dstat) -{ - struct qdio_irq *irq_ptr; - struct qdio_q *q; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - - QDIO_DBF_TEXT2(1, trace, "ick2"); - sprintf(dbf_text,"%s", cdev->dev.bus_id); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_DBF_HEX2(0,trace,&intparm,sizeof(int)); - QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int)); - QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int)); - QDIO_PRINT_ERR("received check condition on activate " \ - "queues on device %s (cs=x%x, ds=x%x).\n", - cdev->dev.bus_id, cstat, dstat); - if (irq_ptr->no_input_qs) { - q=irq_ptr->input_qs[0]; - } else if (irq_ptr->no_output_qs) { - q=irq_ptr->output_qs[0]; - } else { - QDIO_PRINT_ERR("oops... no queue registered for device %s!?\n", - cdev->dev.bus_id); - goto omit_handler_call; - } - q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| - QDIO_STATUS_LOOK_FOR_ERROR, - 0,0,0,-1,-1,q->int_parm); -omit_handler_call: - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_STOPPED); - -} - -static void -qdio_call_shutdown(struct work_struct *work) -{ - struct ccw_device_private *priv; - struct ccw_device *cdev; - - priv = container_of(work, struct ccw_device_private, kick_work); - cdev = priv->cdev; - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - put_device(&cdev->dev); -} - -static void -qdio_timeout_handler(struct ccw_device *cdev) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - QDIO_DBF_TEXT2(0, trace, "qtoh"); - sprintf(dbf_text, "%s", cdev->dev.bus_id); - QDIO_DBF_TEXT2(0, trace, dbf_text); - - irq_ptr = cdev->private->qdio_data; - sprintf(dbf_text, "state:%d", irq_ptr->state); - QDIO_DBF_TEXT2(0, trace, dbf_text); - - switch (irq_ptr->state) { - case QDIO_IRQ_STATE_INACTIVE: - QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: timed out\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(1,setup,"eq:timeo"); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - break; - case QDIO_IRQ_STATE_CLEANUP: - QDIO_PRINT_INFO("Did not get interrupt on cleanup, " - "irq=0.%x.%x.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - break; - case QDIO_IRQ_STATE_ESTABLISHED: - case QDIO_IRQ_STATE_ACTIVE: - /* I/O has been terminated by common I/O layer. */ - QDIO_PRINT_INFO("Queues on irq 0.%x.%04x killed by cio.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(1, trace, "cio:term"); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); - if (get_device(&cdev->dev)) { - /* Can't call shutdown from interrupt context. */ - PREPARE_WORK(&cdev->private->kick_work, - qdio_call_shutdown); - queue_work(ccw_device_work, &cdev->private->kick_work); - } - break; - default: - BUG(); - } - wake_up(&cdev->private->wait_q); -} - -static void -qdio_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) -{ - struct qdio_irq *irq_ptr; - int cstat,dstat; - char dbf_text[15]; - -#ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT4(0, trace, "qint"); - sprintf(dbf_text, "%s", cdev->dev.bus_id); - QDIO_DBF_TEXT4(0, trace, dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (!intparm) { - QDIO_PRINT_ERR("got unsolicited interrupt in qdio " \ - "handler, device %s\n", cdev->dev.bus_id); - return; - } - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) { - QDIO_DBF_TEXT2(1, trace, "uint"); - sprintf(dbf_text,"%s", cdev->dev.bus_id); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_PRINT_ERR("received interrupt on unused device %s!\n", - cdev->dev.bus_id); - return; - } - - if (IS_ERR(irb)) { - /* Currently running i/o is in error. */ - switch (PTR_ERR(irb)) { - case -EIO: - QDIO_PRINT_ERR("i/o error on device %s\n", - cdev->dev.bus_id); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - wake_up(&cdev->private->wait_q); - return; - case -ETIMEDOUT: - qdio_timeout_handler(cdev); - return; - default: - QDIO_PRINT_ERR("unknown error state %ld on device %s\n", - PTR_ERR(irb), cdev->dev.bus_id); - return; - } - } - - qdio_irq_check_sense(irq_ptr->schid, irb); - -#ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text, "state:%d", irq_ptr->state); - QDIO_DBF_TEXT4(0, trace, dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - - switch (irq_ptr->state) { - case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(cdev, cstat, dstat); - break; - - case QDIO_IRQ_STATE_CLEANUP: - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - break; - - case QDIO_IRQ_STATE_ESTABLISHED: - case QDIO_IRQ_STATE_ACTIVE: - if (cstat & SCHN_STAT_PCI) { - qdio_handle_pci(irq_ptr); - break; - } - - if ((cstat&~SCHN_STAT_PCI)||dstat) { - qdio_handle_activate_check(cdev, intparm, cstat, dstat); - break; - } - default: - QDIO_PRINT_ERR("got interrupt for queues in state %d on " \ - "device %s?!\n", - irq_ptr->state, cdev->dev.bus_id); - } - wake_up(&cdev->private->wait_q); - -} - -int -qdio_synchronize(struct ccw_device *cdev, unsigned int flags, - unsigned int queue_number) -{ - int cc = 0; - struct qdio_q *q; - struct qdio_irq *irq_ptr; - void *ptr; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[15]="SyncXXXX"; -#endif - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - -#ifdef CONFIG_QDIO_DEBUG - *((int*)(&dbf_text[4])) = irq_ptr->schid.sch_no; - QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN); - *((int*)(&dbf_text[0]))=flags; - *((int*)(&dbf_text[4]))=queue_number; - QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN); -#endif /* CONFIG_QDIO_DEBUG */ - - if (flags&QDIO_FLAG_SYNC_INPUT) { - q=irq_ptr->input_qs[queue_number]; - if (!q) - return -EINVAL; - if (!(irq_ptr->is_qebsm)) - cc = do_siga_sync(q->schid, 0, q->mask); - } else if (flags&QDIO_FLAG_SYNC_OUTPUT) { - q=irq_ptr->output_qs[queue_number]; - if (!q) - return -EINVAL; - if (!(irq_ptr->is_qebsm)) - cc = do_siga_sync(q->schid, q->mask, 0); - } else - return -EINVAL; - - ptr=&cc; - if (cc) - QDIO_DBF_HEX3(0,trace,&ptr,sizeof(int)); - - return cc; -} - -static int -qdio_get_ssqd_information(struct subchannel_id *schid, - struct qdio_chsc_ssqd **ssqd_area) -{ - int result; - - QDIO_DBF_TEXT0(0, setup, "getssqd"); - *ssqd_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC); - if (!ssqd_area) { - QDIO_PRINT_WARN("Could not get memory for chsc on sch x%x.\n", - schid->sch_no); - return -ENOMEM; - } - - (*ssqd_area)->request = (struct chsc_header) { - .length = 0x0010, - .code = 0x0024, - }; - (*ssqd_area)->first_sch = schid->sch_no; - (*ssqd_area)->last_sch = schid->sch_no; - (*ssqd_area)->ssid = schid->ssid; - result = chsc(*ssqd_area); - - if (result) { - QDIO_PRINT_WARN("CHSC returned cc %i on sch 0.%x.%x.\n", - result, schid->ssid, schid->sch_no); - goto out; - } - - if ((*ssqd_area)->response.code != QDIO_CHSC_RESPONSE_CODE_OK) { - QDIO_PRINT_WARN("CHSC response is 0x%x on sch 0.%x.%x.\n", - (*ssqd_area)->response.code, - schid->ssid, schid->sch_no); - goto out; - } - if (!((*ssqd_area)->flags & CHSC_FLAG_QDIO_CAPABILITY) || - !((*ssqd_area)->flags & CHSC_FLAG_VALIDITY) || - ((*ssqd_area)->sch != schid->sch_no)) { - QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \ - "using all SIGAs.\n", - schid->ssid, schid->sch_no); - goto out; - } - return 0; -out: - return -EINVAL; -} - -int -qdio_get_ssqd_pct(struct ccw_device *cdev) -{ - struct qdio_chsc_ssqd *ssqd_area; - struct subchannel_id schid; - char dbf_text[15]; - int rc; - int pct = 0; - - QDIO_DBF_TEXT0(0, setup, "getpct"); - schid = ccw_device_get_subchannel_id(cdev); - rc = qdio_get_ssqd_information(&schid, &ssqd_area); - if (!rc) - pct = (int)ssqd_area->pct; - if (rc != -ENOMEM) - mempool_free(ssqd_area, qdio_mempool_scssc); - sprintf(dbf_text, "pct: %d", pct); - QDIO_DBF_TEXT2(0, setup, dbf_text); - return pct; -} -EXPORT_SYMBOL(qdio_get_ssqd_pct); - -static void -qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned long token) -{ - struct qdio_q *q; - int i; - unsigned int count, start_buf; - char dbf_text[15]; - - /*check if QEBSM is disabled */ - if (!(irq_ptr->is_qebsm) || !(irq_ptr->qdioac & 0x01)) { - irq_ptr->is_qebsm = 0; - irq_ptr->sch_token = 0; - irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM; - QDIO_DBF_TEXT0(0,setup,"noV=V"); - return; - } - irq_ptr->sch_token = token; - /*input queue*/ - for (i = 0; i < irq_ptr->no_input_qs;i++) { - q = irq_ptr->input_qs[i]; - count = QDIO_MAX_BUFFERS_PER_Q; - start_buf = 0; - set_slsb(q, &start_buf, SLSB_P_INPUT_NOT_INIT, &count); - } - sprintf(dbf_text,"V=V:%2x",irq_ptr->is_qebsm); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"%8lx",irq_ptr->sch_token); - QDIO_DBF_TEXT0(0,setup,dbf_text); - /*output queue*/ - for (i = 0; i < irq_ptr->no_output_qs; i++) { - q = irq_ptr->output_qs[i]; - count = QDIO_MAX_BUFFERS_PER_Q; - start_buf = 0; - set_slsb(q, &start_buf, SLSB_P_OUTPUT_NOT_INIT, &count); - } -} - -static void -qdio_get_ssqd_siga(struct qdio_irq *irq_ptr) -{ - int rc; - struct qdio_chsc_ssqd *ssqd_area; - - QDIO_DBF_TEXT0(0,setup,"getssqd"); - irq_ptr->qdioac = 0; - rc = qdio_get_ssqd_information(&irq_ptr->schid, &ssqd_area); - if (rc) { - QDIO_PRINT_WARN("using all SIGAs for sch x%x.n", - irq_ptr->schid.sch_no); - irq_ptr->qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY | - CHSC_FLAG_SIGA_OUTPUT_NECESSARY | - CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */ - irq_ptr->is_qebsm = 0; - } else - irq_ptr->qdioac = ssqd_area->qdioac1; - - qdio_check_subchannel_qebsm(irq_ptr, ssqd_area->sch_token); - if (rc != -ENOMEM) - mempool_free(ssqd_area, qdio_mempool_scssc); -} - -static unsigned int -tiqdio_check_chsc_availability(void) -{ - char dbf_text[15]; - - /* Check for bit 41. */ - if (!css_general_characteristics.aif) { - QDIO_PRINT_WARN("Adapter interruption facility not " \ - "installed.\n"); - return -ENOENT; - } - - /* Check for bits 107 and 108. */ - if (!css_chsc_characteristics.scssc || - !css_chsc_characteristics.scsscf) { - QDIO_PRINT_WARN("Set Chan Subsys. Char. & Fast-CHSCs " \ - "not available.\n"); - return -ENOENT; - } - - /* Check for OSA/FCP thin interrupts (bit 67). */ - hydra_thinints = css_general_characteristics.aif_osa; - sprintf(dbf_text,"hydrati%1x", hydra_thinints); - QDIO_DBF_TEXT0(0,setup,dbf_text); - -#ifdef CONFIG_64BIT - /* Check for QEBSM support in general (bit 58). */ - is_passthrough = css_general_characteristics.qebsm; -#endif - sprintf(dbf_text,"cssQBS:%1x", is_passthrough); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - /* Check for aif time delay disablement fac (bit 56). If installed, - * omit svs even under lpar (good point by rick again) */ - omit_svs = css_general_characteristics.aif_tdd; - sprintf(dbf_text,"omitsvs%1x", omit_svs); - QDIO_DBF_TEXT0(0,setup,dbf_text); - return 0; -} - - -static unsigned int -tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero) -{ - unsigned long real_addr_local_summary_bit; - unsigned long real_addr_dev_st_chg_ind; - void *ptr; - char dbf_text[15]; - - unsigned int resp_code; - int result; - - struct { - struct chsc_header request; - u16 operation_code; - u16 reserved1; - u32 reserved2; - u32 reserved3; - u64 summary_indicator_addr; - u64 subchannel_indicator_addr; - u32 ks:4; - u32 kc:4; - u32 reserved4:21; - u32 isc:3; - u32 word_with_d_bit; - /* set to 0x10000000 to enable - * time delay disablement facility */ - u32 reserved5; - struct subchannel_id schid; - u32 reserved6[1004]; - struct chsc_header response; - u32 reserved7; - } *scssc_area; - - if (!irq_ptr->is_thinint_irq) - return -ENODEV; - - if (reset_to_zero) { - real_addr_local_summary_bit=0; - real_addr_dev_st_chg_ind=0; - } else { - real_addr_local_summary_bit= - virt_to_phys((volatile void *)tiqdio_ind); - real_addr_dev_st_chg_ind= - virt_to_phys((volatile void *)irq_ptr->dev_st_chg_ind); - } - - scssc_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC); - if (!scssc_area) { - QDIO_PRINT_WARN("No memory for setting indicators on " \ - "subchannel 0.%x.%x.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - return -ENOMEM; - } - scssc_area->request = (struct chsc_header) { - .length = 0x0fe0, - .code = 0x0021, - }; - scssc_area->operation_code = 0; - - scssc_area->summary_indicator_addr = real_addr_local_summary_bit; - scssc_area->subchannel_indicator_addr = real_addr_dev_st_chg_ind; - scssc_area->ks = QDIO_STORAGE_KEY; - scssc_area->kc = QDIO_STORAGE_KEY; - scssc_area->isc = TIQDIO_THININT_ISC; - scssc_area->schid = irq_ptr->schid; - /* enables the time delay disablement facility. Don't care - * whether it is really there (i.e. we haven't checked for - * it) */ - if (css_general_characteristics.aif_tdd) - scssc_area->word_with_d_bit = 0x10000000; - else - QDIO_PRINT_WARN("Time delay disablement facility " \ - "not available\n"); - - result = chsc(scssc_area); - if (result) { - QDIO_PRINT_WARN("could not set indicators on irq 0.%x.%x, " \ - "cc=%i.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no,result); - result = -EIO; - goto out; - } - - resp_code = scssc_area->response.code; - if (resp_code!=QDIO_CHSC_RESPONSE_CODE_OK) { - QDIO_PRINT_WARN("response upon setting indicators " \ - "is 0x%x.\n",resp_code); - sprintf(dbf_text,"sidR%4x",resp_code); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT1(0,setup,dbf_text); - ptr=&scssc_area->response; - QDIO_DBF_HEX2(1,setup,&ptr,QDIO_DBF_SETUP_LEN); - result = -EIO; - goto out; - } - - QDIO_DBF_TEXT2(0,setup,"setscind"); - QDIO_DBF_HEX2(0,setup,&real_addr_local_summary_bit, - sizeof(unsigned long)); - QDIO_DBF_HEX2(0,setup,&real_addr_dev_st_chg_ind,sizeof(unsigned long)); - result = 0; -out: - mempool_free(scssc_area, qdio_mempool_scssc); - return result; - -} - -static unsigned int -tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target) -{ - unsigned int resp_code; - int result; - void *ptr; - char dbf_text[15]; - - struct { - struct chsc_header request; - u16 operation_code; - u16 reserved1; - u32 reserved2; - u32 reserved3; - u32 reserved4[2]; - u32 delay_target; - u32 reserved5[1009]; - struct chsc_header response; - u32 reserved6; - } *scsscf_area; - - if (!irq_ptr->is_thinint_irq) - return -ENODEV; - - scsscf_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC); - if (!scsscf_area) { - QDIO_PRINT_WARN("No memory for setting delay target on " \ - "subchannel 0.%x.%x.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - return -ENOMEM; - } - scsscf_area->request = (struct chsc_header) { - .length = 0x0fe0, - .code = 0x1027, - }; - - scsscf_area->delay_target = delay_target<<16; - - result=chsc(scsscf_area); - if (result) { - QDIO_PRINT_WARN("could not set delay target on irq 0.%x.%x, " \ - "cc=%i. Continuing.\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - result); - result = -EIO; - goto out; - } - - resp_code = scsscf_area->response.code; - if (resp_code!=QDIO_CHSC_RESPONSE_CODE_OK) { - QDIO_PRINT_WARN("response upon setting delay target " \ - "is 0x%x. Continuing.\n",resp_code); - sprintf(dbf_text,"sdtR%4x",resp_code); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT1(0,setup,dbf_text); - ptr=&scsscf_area->response; - QDIO_DBF_HEX2(1,trace,&ptr,QDIO_DBF_TRACE_LEN); - } - QDIO_DBF_TEXT2(0,trace,"delytrgt"); - QDIO_DBF_HEX2(0,trace,&delay_target,sizeof(unsigned long)); - result = 0; /* not critical */ -out: - mempool_free(scsscf_area, qdio_mempool_scssc); - return result; -} - -int -qdio_cleanup(struct ccw_device *cdev, int how) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - int rc; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - sprintf(dbf_text,"qcln%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - rc = qdio_shutdown(cdev, how); - if ((rc == 0) || (rc == -EINPROGRESS)) - rc = qdio_free(cdev); - return rc; -} - -int -qdio_shutdown(struct ccw_device *cdev, int how) -{ - struct qdio_irq *irq_ptr; - int i; - int result = 0; - int rc; - unsigned long flags; - int timeout; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - down(&irq_ptr->setting_up_sema); - - sprintf(dbf_text,"qsqs%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - /* mark all qs as uninteresting */ - for (i=0;ino_input_qs;i++) - atomic_set(&irq_ptr->input_qs[i]->is_in_shutdown,1); - - for (i=0;ino_output_qs;i++) - atomic_set(&irq_ptr->output_qs[i]->is_in_shutdown,1); - - tasklet_kill(&tiqdio_tasklet); - - for (i=0;ino_input_qs;i++) { - qdio_unmark_q(irq_ptr->input_qs[i]); - tasklet_kill(&irq_ptr->input_qs[i]->tasklet); - wait_event_interruptible_timeout(cdev->private->wait_q, - !atomic_read(&irq_ptr-> - input_qs[i]-> - use_count), - QDIO_NO_USE_COUNT_TIMEOUT); - if (atomic_read(&irq_ptr->input_qs[i]->use_count)) - result=-EINPROGRESS; - } - - for (i=0;ino_output_qs;i++) { - tasklet_kill(&irq_ptr->output_qs[i]->tasklet); - del_timer(&irq_ptr->output_qs[i]->timer); - wait_event_interruptible_timeout(cdev->private->wait_q, - !atomic_read(&irq_ptr-> - output_qs[i]-> - use_count), - QDIO_NO_USE_COUNT_TIMEOUT); - if (atomic_read(&irq_ptr->output_qs[i]->use_count)) - result=-EINPROGRESS; - } - - /* cleanup subchannel */ - spin_lock_irqsave(get_ccwdev_lock(cdev),flags); - if (how&QDIO_FLAG_CLEANUP_USING_CLEAR) { - rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); - timeout=QDIO_CLEANUP_CLEAR_TIMEOUT; - } else if (how&QDIO_FLAG_CLEANUP_USING_HALT) { - rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP); - timeout=QDIO_CLEANUP_HALT_TIMEOUT; - } else { /* default behaviour */ - rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP); - timeout=QDIO_CLEANUP_HALT_TIMEOUT; - } - if (rc == -ENODEV) { - /* No need to wait for device no longer present. */ - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - } else if (((void *)cdev->handler != (void *)qdio_handler) && rc == 0) { - /* - * Whoever put another handler there, has to cope with the - * interrupt theirself. Might happen if qdio_shutdown was - * called on already shutdown queues, but this shouldn't have - * bad side effects. - */ - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - } else if (rc == 0) { - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); - spin_unlock_irqrestore(get_ccwdev_lock(cdev),flags); - - wait_event_interruptible_timeout(cdev->private->wait_q, - irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || - irq_ptr->state == QDIO_IRQ_STATE_ERR, - timeout); - } else { - QDIO_PRINT_INFO("ccw_device_{halt,clear} returned %d for " - "device %s\n", result, cdev->dev.bus_id); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - result = rc; - goto out; - } - if (irq_ptr->is_thinint_irq) { - qdio_put_indicator((__u32*)irq_ptr->dev_st_chg_ind); - tiqdio_set_subchannel_ind(irq_ptr,1); - /* reset adapter interrupt indicators */ - } - - /* exchange int handlers, if necessary */ - if ((void*)cdev->handler == (void*)qdio_handler) - cdev->handler=irq_ptr->original_int_handler; - - /* Ignore errors. */ - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); -out: - up(&irq_ptr->setting_up_sema); - return result; -} - -int -qdio_free(struct ccw_device *cdev) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - down(&irq_ptr->setting_up_sema); - - sprintf(dbf_text,"qfqs%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT1(0,trace,dbf_text); - QDIO_DBF_TEXT0(0,setup,dbf_text); - - cdev->private->qdio_data = NULL; - - up(&irq_ptr->setting_up_sema); - - qdio_release_irq_memory(irq_ptr); - module_put(THIS_MODULE); - return 0; -} - -static void -qdio_allocate_do_dbf(struct qdio_initialize *init_data) -{ - char dbf_text[20]; /* if a printf printed out more than 8 chars */ - - sprintf(dbf_text,"qfmt:%x",init_data->q_format); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,init_data->adapter_name,8); - sprintf(dbf_text,"qpff%4x",init_data->qib_param_field_format); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&init_data->qib_param_field,sizeof(char*)); - QDIO_DBF_HEX0(0,setup,&init_data->input_slib_elements,sizeof(long*)); - QDIO_DBF_HEX0(0,setup,&init_data->output_slib_elements,sizeof(long*)); - sprintf(dbf_text,"miit%4x",init_data->min_input_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"mait%4x",init_data->max_input_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"miot%4x",init_data->min_output_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"maot%4x",init_data->max_output_threshold); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"niq:%4x",init_data->no_input_qs); - QDIO_DBF_TEXT0(0,setup,dbf_text); - sprintf(dbf_text,"noq:%4x",init_data->no_output_qs); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_HEX0(0,setup,&init_data->input_handler,sizeof(void*)); - QDIO_DBF_HEX0(0,setup,&init_data->output_handler,sizeof(void*)); - QDIO_DBF_HEX0(0,setup,&init_data->int_parm,sizeof(long)); - QDIO_DBF_HEX0(0,setup,&init_data->flags,sizeof(long)); - QDIO_DBF_HEX0(0,setup,&init_data->input_sbal_addr_array,sizeof(void*)); - QDIO_DBF_HEX0(0,setup,&init_data->output_sbal_addr_array,sizeof(void*)); -} - -static void -qdio_allocate_fill_input_desc(struct qdio_irq *irq_ptr, int i, int iqfmt) -{ - irq_ptr->input_qs[i]->is_iqdio_q = iqfmt; - irq_ptr->input_qs[i]->is_thinint_q = irq_ptr->is_thinint_irq; - - irq_ptr->qdr->qdf0[i].sliba=(unsigned long)(irq_ptr->input_qs[i]->slib); - - irq_ptr->qdr->qdf0[i].sla=(unsigned long)(irq_ptr->input_qs[i]->sl); - - irq_ptr->qdr->qdf0[i].slsba= - (unsigned long)(&irq_ptr->input_qs[i]->slsb.acc.val[0]); - - irq_ptr->qdr->qdf0[i].akey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i].bkey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i].ckey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i].dkey=QDIO_STORAGE_KEY; -} - -static void -qdio_allocate_fill_output_desc(struct qdio_irq *irq_ptr, int i, - int j, int iqfmt) -{ - irq_ptr->output_qs[i]->is_iqdio_q = iqfmt; - irq_ptr->output_qs[i]->is_thinint_q = irq_ptr->is_thinint_irq; - - irq_ptr->qdr->qdf0[i+j].sliba=(unsigned long)(irq_ptr->output_qs[i]->slib); - - irq_ptr->qdr->qdf0[i+j].sla=(unsigned long)(irq_ptr->output_qs[i]->sl); - - irq_ptr->qdr->qdf0[i+j].slsba= - (unsigned long)(&irq_ptr->output_qs[i]->slsb.acc.val[0]); - - irq_ptr->qdr->qdf0[i+j].akey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i+j].bkey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i+j].ckey=QDIO_STORAGE_KEY; - irq_ptr->qdr->qdf0[i+j].dkey=QDIO_STORAGE_KEY; -} - - -static void -qdio_initialize_set_siga_flags_input(struct qdio_irq *irq_ptr) -{ - int i; - - for (i=0;ino_input_qs;i++) { - irq_ptr->input_qs[i]->siga_sync= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY; - irq_ptr->input_qs[i]->siga_in= - irq_ptr->qdioac&CHSC_FLAG_SIGA_INPUT_NECESSARY; - irq_ptr->input_qs[i]->siga_out= - irq_ptr->qdioac&CHSC_FLAG_SIGA_OUTPUT_NECESSARY; - irq_ptr->input_qs[i]->siga_sync_done_on_thinints= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS; - irq_ptr->input_qs[i]->hydra_gives_outbound_pcis= - irq_ptr->hydra_gives_outbound_pcis; - irq_ptr->input_qs[i]->siga_sync_done_on_outb_tis= - ((irq_ptr->qdioac& - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS))== - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS)); - - } -} - -static void -qdio_initialize_set_siga_flags_output(struct qdio_irq *irq_ptr) -{ - int i; - - for (i=0;ino_output_qs;i++) { - irq_ptr->output_qs[i]->siga_sync= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY; - irq_ptr->output_qs[i]->siga_in= - irq_ptr->qdioac&CHSC_FLAG_SIGA_INPUT_NECESSARY; - irq_ptr->output_qs[i]->siga_out= - irq_ptr->qdioac&CHSC_FLAG_SIGA_OUTPUT_NECESSARY; - irq_ptr->output_qs[i]->siga_sync_done_on_thinints= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS; - irq_ptr->output_qs[i]->hydra_gives_outbound_pcis= - irq_ptr->hydra_gives_outbound_pcis; - irq_ptr->output_qs[i]->siga_sync_done_on_outb_tis= - ((irq_ptr->qdioac& - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS))== - (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS| - CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS)); - - } -} - -static int -qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat, - int dstat) -{ - char dbf_text[15]; - struct qdio_irq *irq_ptr; - - irq_ptr = cdev->private->qdio_data; - - if (cstat || (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END))) { - sprintf(dbf_text,"ick1%4x",irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(1,trace,dbf_text); - QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int)); - QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int)); - QDIO_PRINT_ERR("received check condition on establish " \ - "queues on irq 0.%x.%x (cs=x%x, ds=x%x).\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - cstat,dstat); - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ERR); - } - - if (!(dstat & DEV_STAT_DEV_END)) { - QDIO_DBF_TEXT2(1,setup,"eq:no de"); - QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat)); - QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat)); - QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: didn't get " - "device end: dstat=%02x, cstat=%02x\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - dstat, cstat); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - return 1; - } - - if (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END)) { - QDIO_DBF_TEXT2(1,setup,"eq:badio"); - QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat)); - QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat)); - QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: got " - "the following devstat: dstat=%02x, " - "cstat=%02x\n", irq_ptr->schid.ssid, - irq_ptr->schid.sch_no, dstat, cstat); - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); - return 1; - } - return 0; -} - -static void -qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - irq_ptr = cdev->private->qdio_data; - - sprintf(dbf_text,"qehi%4x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - - if (qdio_establish_irq_check_for_errors(cdev, cstat, dstat)) - return; - - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ESTABLISHED); -} - -int -qdio_initialize(struct qdio_initialize *init_data) -{ - int rc; - char dbf_text[15]; - - sprintf(dbf_text,"qini%4x",init_data->cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - - rc = qdio_allocate(init_data); - if (rc == 0) { - rc = qdio_establish(init_data); - if (rc != 0) - qdio_free(init_data->cdev); - } - - return rc; -} - - -int -qdio_allocate(struct qdio_initialize *init_data) -{ - struct qdio_irq *irq_ptr; - char dbf_text[15]; - - sprintf(dbf_text,"qalc%4x",init_data->cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) || - (init_data->no_output_qs>QDIO_MAX_QUEUES_PER_IRQ) || - ((init_data->no_input_qs) && (!init_data->input_handler)) || - ((init_data->no_output_qs) && (!init_data->output_handler)) ) - return -EINVAL; - - if (!init_data->input_sbal_addr_array) - return -EINVAL; - - if (!init_data->output_sbal_addr_array) - return -EINVAL; - - qdio_allocate_do_dbf(init_data); - - /* create irq */ - irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - - QDIO_DBF_TEXT0(0,setup,"irq_ptr:"); - QDIO_DBF_HEX0(0,setup,&irq_ptr,sizeof(void*)); - - if (!irq_ptr) { - QDIO_PRINT_ERR("allocation of irq_ptr failed!\n"); - return -ENOMEM; - } - - init_MUTEX(&irq_ptr->setting_up_sema); - - /* QDR must be in DMA area since CCW data address is only 32 bit */ - irq_ptr->qdr = (struct qdr *) __get_free_page(GFP_KERNEL | GFP_DMA); - if (!(irq_ptr->qdr)) { - free_page((unsigned long) irq_ptr); - QDIO_PRINT_ERR("allocation of irq_ptr->qdr failed!\n"); - return -ENOMEM; - } - QDIO_DBF_TEXT0(0,setup,"qdr:"); - QDIO_DBF_HEX0(0,setup,&irq_ptr->qdr,sizeof(void*)); - - if (qdio_alloc_qs(irq_ptr, - init_data->no_input_qs, - init_data->no_output_qs)) { - QDIO_PRINT_ERR("queue allocation failed!\n"); - qdio_release_irq_memory(irq_ptr); - return -ENOMEM; - } - - init_data->cdev->private->qdio_data = irq_ptr; - - qdio_set_state(irq_ptr,QDIO_IRQ_STATE_INACTIVE); - - return 0; -} - -static int qdio_fill_irq(struct qdio_initialize *init_data) -{ - int i; - char dbf_text[15]; - struct ciw *ciw; - int is_iqdio; - struct qdio_irq *irq_ptr; - - irq_ptr = init_data->cdev->private->qdio_data; - - memset(irq_ptr,0,((char*)&irq_ptr->qdr)-((char*)irq_ptr)); - - /* wipes qib.ac, required by ar7063 */ - memset(irq_ptr->qdr,0,sizeof(struct qdr)); - - irq_ptr->int_parm=init_data->int_parm; - - irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev); - irq_ptr->no_input_qs=init_data->no_input_qs; - irq_ptr->no_output_qs=init_data->no_output_qs; - - if (init_data->q_format==QDIO_IQDIO_QFMT) { - irq_ptr->is_iqdio_irq=1; - irq_ptr->is_thinint_irq=1; - } else { - irq_ptr->is_iqdio_irq=0; - irq_ptr->is_thinint_irq=hydra_thinints; - } - sprintf(dbf_text,"is_i_t%1x%1x", - irq_ptr->is_iqdio_irq,irq_ptr->is_thinint_irq); - QDIO_DBF_TEXT2(0,setup,dbf_text); - - if (irq_ptr->is_thinint_irq) { - irq_ptr->dev_st_chg_ind = qdio_get_indicator(); - QDIO_DBF_HEX1(0,setup,&irq_ptr->dev_st_chg_ind,sizeof(void*)); - if (!irq_ptr->dev_st_chg_ind) { - QDIO_PRINT_WARN("no indicator location available " \ - "for irq 0.%x.%x\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no); - qdio_release_irq_memory(irq_ptr); - return -ENOBUFS; - } - } - - /* defaults */ - irq_ptr->equeue.cmd=DEFAULT_ESTABLISH_QS_CMD; - irq_ptr->equeue.count=DEFAULT_ESTABLISH_QS_COUNT; - irq_ptr->aqueue.cmd=DEFAULT_ACTIVATE_QS_CMD; - irq_ptr->aqueue.count=DEFAULT_ACTIVATE_QS_COUNT; - - qdio_fill_qs(irq_ptr, init_data->cdev, - init_data->no_input_qs, - init_data->no_output_qs, - init_data->input_handler, - init_data->output_handler,init_data->int_parm, - init_data->q_format,init_data->flags, - init_data->input_sbal_addr_array, - init_data->output_sbal_addr_array); - - if (!try_module_get(THIS_MODULE)) { - QDIO_PRINT_CRIT("try_module_get() failed!\n"); - qdio_release_irq_memory(irq_ptr); - return -EINVAL; - } - - qdio_fill_thresholds(irq_ptr,init_data->no_input_qs, - init_data->no_output_qs, - init_data->min_input_threshold, - init_data->max_input_threshold, - init_data->min_output_threshold, - init_data->max_output_threshold); - - /* fill in qdr */ - irq_ptr->qdr->qfmt=init_data->q_format; - irq_ptr->qdr->iqdcnt=init_data->no_input_qs; - irq_ptr->qdr->oqdcnt=init_data->no_output_qs; - irq_ptr->qdr->iqdsz=sizeof(struct qdesfmt0)/4; /* size in words */ - irq_ptr->qdr->oqdsz=sizeof(struct qdesfmt0)/4; - - irq_ptr->qdr->qiba=(unsigned long)&irq_ptr->qib; - irq_ptr->qdr->qkey=QDIO_STORAGE_KEY; - - /* fill in qib */ - irq_ptr->is_qebsm = is_passthrough; - if (irq_ptr->is_qebsm) - irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM; - - irq_ptr->qib.qfmt=init_data->q_format; - if (init_data->no_input_qs) - irq_ptr->qib.isliba=(unsigned long)(irq_ptr->input_qs[0]->slib); - if (init_data->no_output_qs) - irq_ptr->qib.osliba=(unsigned long)(irq_ptr->output_qs[0]->slib); - memcpy(irq_ptr->qib.ebcnam,init_data->adapter_name,8); - - qdio_set_impl_params(irq_ptr,init_data->qib_param_field_format, - init_data->qib_param_field, - init_data->no_input_qs, - init_data->no_output_qs, - init_data->input_slib_elements, - init_data->output_slib_elements); - - /* first input descriptors, then output descriptors */ - is_iqdio = (init_data->q_format == QDIO_IQDIO_QFMT) ? 1 : 0; - for (i=0;ino_input_qs;i++) - qdio_allocate_fill_input_desc(irq_ptr, i, is_iqdio); - - for (i=0;ino_output_qs;i++) - qdio_allocate_fill_output_desc(irq_ptr, i, - init_data->no_input_qs, - is_iqdio); - - /* qdr, qib, sls, slsbs, slibs, sbales filled. */ - - /* get qdio commands */ - ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); - if (!ciw) { - QDIO_DBF_TEXT2(1,setup,"no eq"); - QDIO_PRINT_INFO("No equeue CIW found for QDIO commands. " - "Trying to use default.\n"); - } else - irq_ptr->equeue = *ciw; - ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); - if (!ciw) { - QDIO_DBF_TEXT2(1,setup,"no aq"); - QDIO_PRINT_INFO("No aqueue CIW found for QDIO commands. " - "Trying to use default.\n"); - } else - irq_ptr->aqueue = *ciw; - - /* Set new interrupt handler. */ - irq_ptr->original_int_handler = init_data->cdev->handler; - init_data->cdev->handler = qdio_handler; - - return 0; -} - -int -qdio_establish(struct qdio_initialize *init_data) -{ - struct qdio_irq *irq_ptr; - unsigned long saveflags; - int result, result2; - struct ccw_device *cdev; - char dbf_text[20]; - - cdev=init_data->cdev; - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -EINVAL; - - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - - down(&irq_ptr->setting_up_sema); - - qdio_fill_irq(init_data); - - /* the thinint CHSC stuff */ - if (irq_ptr->is_thinint_irq) { - - result = tiqdio_set_subchannel_ind(irq_ptr,0); - if (result) { - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - return result; - } - tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET); - } - - sprintf(dbf_text,"qest%4x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_DBF_TEXT0(0,trace,dbf_text); - - /* establish q */ - irq_ptr->ccw.cmd_code=irq_ptr->equeue.cmd; - irq_ptr->ccw.flags=CCW_FLAG_SLI; - irq_ptr->ccw.count=irq_ptr->equeue.count; - irq_ptr->ccw.cda=QDIO_GET_ADDR(irq_ptr->qdr); - - spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags); - - ccw_device_set_options_mask(cdev, 0); - result = ccw_device_start(cdev, &irq_ptr->ccw, - QDIO_DOING_ESTABLISH, 0, 0); - if (result) { - result2 = ccw_device_start(cdev, &irq_ptr->ccw, - QDIO_DOING_ESTABLISH, 0, 0); - sprintf(dbf_text,"eq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - if (result2) { - sprintf(dbf_text,"eq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - } - QDIO_PRINT_WARN("establish queues on irq 0.%x.%04x: do_IO " \ - "returned %i, next try returned %i\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - result, result2); - result=result2; - } - - spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags); - - if (result) { - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev,QDIO_FLAG_CLEANUP_USING_CLEAR); - return result; - } - - wait_event_interruptible_timeout(cdev->private->wait_q, - irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED || - irq_ptr->state == QDIO_IRQ_STATE_ERR, - QDIO_ESTABLISH_TIMEOUT); - - if (irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED) - result = 0; - else { - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - return -EIO; - } - - qdio_get_ssqd_siga(irq_ptr); - /* if this gets set once, we're running under VM and can omit SVSes */ - if (irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY) - omit_svs=1; - - sprintf(dbf_text,"qdioac%2x",irq_ptr->qdioac); - QDIO_DBF_TEXT2(0,setup,dbf_text); - - sprintf(dbf_text,"qib ac%2x",irq_ptr->qib.ac); - QDIO_DBF_TEXT2(0,setup,dbf_text); - - irq_ptr->hydra_gives_outbound_pcis= - irq_ptr->qib.ac&QIB_AC_OUTBOUND_PCI_SUPPORTED; - irq_ptr->sync_done_on_outb_pcis= - irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS; - - qdio_initialize_set_siga_flags_input(irq_ptr); - qdio_initialize_set_siga_flags_output(irq_ptr); - - up(&irq_ptr->setting_up_sema); - - return result; - -} - -int -qdio_activate(struct ccw_device *cdev, int flags) -{ - struct qdio_irq *irq_ptr; - int i,result=0,result2; - unsigned long saveflags; - char dbf_text[20]; /* see qdio_initialize */ - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - - down(&irq_ptr->setting_up_sema); - if (irq_ptr->state==QDIO_IRQ_STATE_INACTIVE) { - result=-EBUSY; - goto out; - } - - sprintf(dbf_text,"qact%4x", irq_ptr->schid.sch_no); - QDIO_DBF_TEXT2(0,setup,dbf_text); - QDIO_DBF_TEXT2(0,trace,dbf_text); - - /* activate q */ - irq_ptr->ccw.cmd_code=irq_ptr->aqueue.cmd; - irq_ptr->ccw.flags=CCW_FLAG_SLI; - irq_ptr->ccw.count=irq_ptr->aqueue.count; - irq_ptr->ccw.cda=QDIO_GET_ADDR(0); - - spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags); - - ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); - result=ccw_device_start(cdev,&irq_ptr->ccw,QDIO_DOING_ACTIVATE, - 0, DOIO_DENY_PREFETCH); - if (result) { - result2=ccw_device_start(cdev,&irq_ptr->ccw, - QDIO_DOING_ACTIVATE,0,0); - sprintf(dbf_text,"aq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - if (result2) { - sprintf(dbf_text,"aq:io%4x",result); - QDIO_DBF_TEXT2(1,setup,dbf_text); - } - QDIO_PRINT_WARN("activate queues on irq 0.%x.%04x: do_IO " \ - "returned %i, next try returned %i\n", - irq_ptr->schid.ssid, irq_ptr->schid.sch_no, - result, result2); - result=result2; - } - - spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags); - if (result) - goto out; - - for (i=0;ino_input_qs;i++) { - if (irq_ptr->is_thinint_irq) { - /* - * that way we know, that, if we will get interrupted - * by tiqdio_inbound_processing, qdio_unmark_q will - * not be called - */ - qdio_reserve_q(irq_ptr->input_qs[i]); - qdio_mark_tiq(irq_ptr->input_qs[i]); - qdio_release_q(irq_ptr->input_qs[i]); - } - } - - if (flags&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT) { - for (i=0;ino_input_qs;i++) { - irq_ptr->input_qs[i]->is_input_q|= - QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT; - } - } - - msleep(QDIO_ACTIVATE_TIMEOUT); - switch (irq_ptr->state) { - case QDIO_IRQ_STATE_STOPPED: - case QDIO_IRQ_STATE_ERR: - up(&irq_ptr->setting_up_sema); - qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); - down(&irq_ptr->setting_up_sema); - result = -EIO; - break; - default: - qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ACTIVE); - result = 0; - } - out: - up(&irq_ptr->setting_up_sema); - - return result; -} - -/* buffers filled forwards again to make Rick happy */ -static void -qdio_do_qdio_fill_input(struct qdio_q *q, unsigned int qidx, - unsigned int count, struct qdio_buffer *buffers) -{ - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - int tmp = 0; - - qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1); - if (irq->is_qebsm) { - while (count) { - tmp = set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count); - if (!tmp) - return; - } - return; - } - for (;;) { - set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count); - count--; - if (!count) break; - qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1); - } -} - -static void -qdio_do_qdio_fill_output(struct qdio_q *q, unsigned int qidx, - unsigned int count, struct qdio_buffer *buffers) -{ - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - int tmp = 0; - - qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1); - if (irq->is_qebsm) { - while (count) { - tmp = set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count); - if (!tmp) - return; - } - return; - } - - for (;;) { - set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count); - count--; - if (!count) break; - qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1); - } -} - -static void -do_qdio_handle_inbound(struct qdio_q *q, unsigned int callflags, - unsigned int qidx, unsigned int count, - struct qdio_buffer *buffers) -{ - int used_elements; - - /* This is the inbound handling of queues */ - used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count; - - qdio_do_qdio_fill_input(q,qidx,count,buffers); - - if ((used_elements+count==QDIO_MAX_BUFFERS_PER_Q)&& - (callflags&QDIO_FLAG_UNDER_INTERRUPT)) - atomic_xchg(&q->polling,0); - - if (used_elements) - return; - if (callflags&QDIO_FLAG_DONT_SIGA) - return; - if (q->siga_in) { - int result; - - result=qdio_siga_input(q); - if (result) { - if (q->siga_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR; - q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR; - q->siga_error=result; - } - } - - qdio_mark_q(q); -} - -static void -do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, - unsigned int qidx, unsigned int count, - struct qdio_buffer *buffers) -{ - int used_elements; - unsigned int cnt, start_buf; - unsigned char state = 0; - struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; - - /* This is the outbound handling of queues */ - qdio_do_qdio_fill_output(q,qidx,count,buffers); - - used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count; - - if (callflags&QDIO_FLAG_DONT_SIGA) { - qdio_perf_stat_inc(&perf_stats.outbound_cnt); - return; - } - if (callflags & QDIO_FLAG_PCI_OUT) - q->is_pci_out = 1; - else - q->is_pci_out = 0; - if (q->is_iqdio_q) { - /* one siga for every sbal */ - while (count--) - qdio_kick_outbound_q(q); - - __qdio_outbound_processing(q); - } else { - /* under VM, we do a SIGA sync unconditionally */ - SYNC_MEMORY; - else { - /* - * w/o shadow queues (else branch of - * SYNC_MEMORY :-/ ), we try to - * fast-requeue buffers - */ - if (irq->is_qebsm) { - cnt = 1; - start_buf = ((qidx+QDIO_MAX_BUFFERS_PER_Q-1) & - (QDIO_MAX_BUFFERS_PER_Q-1)); - qdio_do_eqbs(q, &state, &start_buf, &cnt); - } else - state = q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1) - &(QDIO_MAX_BUFFERS_PER_Q-1) ]; - if (state != SLSB_CU_OUTPUT_PRIMED) { - qdio_kick_outbound_q(q); - } else { - QDIO_DBF_TEXT3(0,trace, "fast-req"); - qdio_perf_stat_inc(&perf_stats.fast_reqs); - } - } - /* - * only marking the q could take too long, - * the upper layer module could do a lot of - * traffic in that time - */ - __qdio_outbound_processing(q); - } - - qdio_perf_stat_inc(&perf_stats.outbound_cnt); -} - -/* count must be 1 in iqdio */ -int -do_QDIO(struct ccw_device *cdev,unsigned int callflags, - unsigned int queue_number, unsigned int qidx, - unsigned int count,struct qdio_buffer *buffers) -{ - struct qdio_irq *irq_ptr; -#ifdef CONFIG_QDIO_DEBUG - char dbf_text[20]; - - sprintf(dbf_text,"doQD%04x",cdev->private->schid.sch_no); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if ( (qidx>QDIO_MAX_BUFFERS_PER_Q) || - (count>QDIO_MAX_BUFFERS_PER_Q) || - (queue_number>QDIO_MAX_QUEUES_PER_IRQ) ) - return -EINVAL; - - if (count==0) - return 0; - - irq_ptr = cdev->private->qdio_data; - if (!irq_ptr) - return -ENODEV; - -#ifdef CONFIG_QDIO_DEBUG - if (callflags&QDIO_FLAG_SYNC_INPUT) - QDIO_DBF_HEX3(0,trace,&irq_ptr->input_qs[queue_number], - sizeof(void*)); - else - QDIO_DBF_HEX3(0,trace,&irq_ptr->output_qs[queue_number], - sizeof(void*)); - sprintf(dbf_text,"flag%04x",callflags); - QDIO_DBF_TEXT3(0,trace,dbf_text); - sprintf(dbf_text,"qi%02xct%02x",qidx,count); - QDIO_DBF_TEXT3(0,trace,dbf_text); -#endif /* CONFIG_QDIO_DEBUG */ - - if (irq_ptr->state!=QDIO_IRQ_STATE_ACTIVE) - return -EBUSY; - - if (callflags&QDIO_FLAG_SYNC_INPUT) - do_qdio_handle_inbound(irq_ptr->input_qs[queue_number], - callflags, qidx, count, buffers); - else if (callflags&QDIO_FLAG_SYNC_OUTPUT) - do_qdio_handle_outbound(irq_ptr->output_qs[queue_number], - callflags, qidx, count, buffers); - else { - QDIO_DBF_TEXT3(1,trace,"doQD:inv"); - return -EINVAL; - } - return 0; -} - -static int -qdio_perf_procfile_read(char *buffer, char **buffer_location, off_t offset, - int buffer_length, int *eof, void *data) -{ - int c=0; - - /* we are always called with buffer_length=4k, so we all - deliver on the first read */ - if (offset>0) - return 0; - -#define _OUTP_IT(x...) c+=sprintf(buffer+c,x) -#ifdef CONFIG_64BIT - _OUTP_IT("Number of tasklet runs (total) : %li\n", - (long)atomic64_read(&perf_stats.tl_runs)); - _OUTP_IT("Inbound tasklet runs tried/retried : %li/%li\n", - (long)atomic64_read(&perf_stats.inbound_tl_runs), - (long)atomic64_read(&perf_stats.inbound_tl_runs_resched)); - _OUTP_IT("Inbound-thin tasklet runs tried/retried : %li/%li\n", - (long)atomic64_read(&perf_stats.inbound_thin_tl_runs), - (long)atomic64_read(&perf_stats.inbound_thin_tl_runs_resched)); - _OUTP_IT("Outbound tasklet runs tried/retried : %li/%li\n", - (long)atomic64_read(&perf_stats.outbound_tl_runs), - (long)atomic64_read(&perf_stats.outbound_tl_runs_resched)); - _OUTP_IT("\n"); - _OUTP_IT("Number of SIGA sync's issued : %li\n", - (long)atomic64_read(&perf_stats.siga_syncs)); - _OUTP_IT("Number of SIGA in's issued : %li\n", - (long)atomic64_read(&perf_stats.siga_ins)); - _OUTP_IT("Number of SIGA out's issued : %li\n", - (long)atomic64_read(&perf_stats.siga_outs)); - _OUTP_IT("Number of PCIs caught : %li\n", - (long)atomic64_read(&perf_stats.pcis)); - _OUTP_IT("Number of adapter interrupts caught : %li\n", - (long)atomic64_read(&perf_stats.thinints)); - _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA) : %li\n", - (long)atomic64_read(&perf_stats.fast_reqs)); - _OUTP_IT("\n"); - _OUTP_IT("Number of inbound transfers : %li\n", - (long)atomic64_read(&perf_stats.inbound_cnt)); - _OUTP_IT("Number of do_QDIOs outbound : %li\n", - (long)atomic64_read(&perf_stats.outbound_cnt)); -#else /* CONFIG_64BIT */ - _OUTP_IT("Number of tasklet runs (total) : %i\n", - atomic_read(&perf_stats.tl_runs)); - _OUTP_IT("Inbound tasklet runs tried/retried : %i/%i\n", - atomic_read(&perf_stats.inbound_tl_runs), - atomic_read(&perf_stats.inbound_tl_runs_resched)); - _OUTP_IT("Inbound-thin tasklet runs tried/retried : %i/%i\n", - atomic_read(&perf_stats.inbound_thin_tl_runs), - atomic_read(&perf_stats.inbound_thin_tl_runs_resched)); - _OUTP_IT("Outbound tasklet runs tried/retried : %i/%i\n", - atomic_read(&perf_stats.outbound_tl_runs), - atomic_read(&perf_stats.outbound_tl_runs_resched)); - _OUTP_IT("\n"); - _OUTP_IT("Number of SIGA sync's issued : %i\n", - atomic_read(&perf_stats.siga_syncs)); - _OUTP_IT("Number of SIGA in's issued : %i\n", - atomic_read(&perf_stats.siga_ins)); - _OUTP_IT("Number of SIGA out's issued : %i\n", - atomic_read(&perf_stats.siga_outs)); - _OUTP_IT("Number of PCIs caught : %i\n", - atomic_read(&perf_stats.pcis)); - _OUTP_IT("Number of adapter interrupts caught : %i\n", - atomic_read(&perf_stats.thinints)); - _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA) : %i\n", - atomic_read(&perf_stats.fast_reqs)); - _OUTP_IT("\n"); - _OUTP_IT("Number of inbound transfers : %i\n", - atomic_read(&perf_stats.inbound_cnt)); - _OUTP_IT("Number of do_QDIOs outbound : %i\n", - atomic_read(&perf_stats.outbound_cnt)); -#endif /* CONFIG_64BIT */ - _OUTP_IT("\n"); - - return c; -} - -static struct proc_dir_entry *qdio_perf_proc_file; - -static void -qdio_add_procfs_entry(void) -{ - proc_perf_file_registration=0; - qdio_perf_proc_file=create_proc_entry(QDIO_PERF, - S_IFREG|0444,NULL); - if (qdio_perf_proc_file) { - qdio_perf_proc_file->read_proc=&qdio_perf_procfile_read; - } else proc_perf_file_registration=-1; - - if (proc_perf_file_registration) - QDIO_PRINT_WARN("was not able to register perf. " \ - "proc-file (%i).\n", - proc_perf_file_registration); -} - -static void -qdio_remove_procfs_entry(void) -{ - if (!proc_perf_file_registration) /* means if it went ok earlier */ - remove_proc_entry(QDIO_PERF,NULL); -} - -/** - * attributes in sysfs - *****************************************************************************/ - -static ssize_t -qdio_performance_stats_show(struct bus_type *bus, char *buf) -{ - return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0); -} - -static ssize_t -qdio_performance_stats_store(struct bus_type *bus, const char *buf, size_t count) -{ - unsigned long i; - int ret; - - ret = strict_strtoul(buf, 16, &i); - if (!ret && ((i == 0) || (i == 1))) { - if (i == qdio_performance_stats) - return count; - qdio_performance_stats = i; - if (i==0) { - /* reset perf. stat. info */ -#ifdef CONFIG_64BIT - atomic64_set(&perf_stats.tl_runs, 0); - atomic64_set(&perf_stats.outbound_tl_runs, 0); - atomic64_set(&perf_stats.inbound_tl_runs, 0); - atomic64_set(&perf_stats.inbound_tl_runs_resched, 0); - atomic64_set(&perf_stats.inbound_thin_tl_runs, 0); - atomic64_set(&perf_stats.inbound_thin_tl_runs_resched, - 0); - atomic64_set(&perf_stats.siga_outs, 0); - atomic64_set(&perf_stats.siga_ins, 0); - atomic64_set(&perf_stats.siga_syncs, 0); - atomic64_set(&perf_stats.pcis, 0); - atomic64_set(&perf_stats.thinints, 0); - atomic64_set(&perf_stats.fast_reqs, 0); - atomic64_set(&perf_stats.outbound_cnt, 0); - atomic64_set(&perf_stats.inbound_cnt, 0); -#else /* CONFIG_64BIT */ - atomic_set(&perf_stats.tl_runs, 0); - atomic_set(&perf_stats.outbound_tl_runs, 0); - atomic_set(&perf_stats.inbound_tl_runs, 0); - atomic_set(&perf_stats.inbound_tl_runs_resched, 0); - atomic_set(&perf_stats.inbound_thin_tl_runs, 0); - atomic_set(&perf_stats.inbound_thin_tl_runs_resched, 0); - atomic_set(&perf_stats.siga_outs, 0); - atomic_set(&perf_stats.siga_ins, 0); - atomic_set(&perf_stats.siga_syncs, 0); - atomic_set(&perf_stats.pcis, 0); - atomic_set(&perf_stats.thinints, 0); - atomic_set(&perf_stats.fast_reqs, 0); - atomic_set(&perf_stats.outbound_cnt, 0); - atomic_set(&perf_stats.inbound_cnt, 0); -#endif /* CONFIG_64BIT */ - } - } else { - QDIO_PRINT_ERR("QDIO performance_stats: write 0 or 1 to this file!\n"); - return -EINVAL; - } - return count; -} - -static BUS_ATTR(qdio_performance_stats, 0644, qdio_performance_stats_show, - qdio_performance_stats_store); - -static void -tiqdio_register_thinints(void) -{ - char dbf_text[20]; - - tiqdio_ind = - s390_register_adapter_interrupt(&tiqdio_thinint_handler, NULL, - TIQDIO_THININT_ISC); - if (IS_ERR(tiqdio_ind)) { - sprintf(dbf_text, "regthn%lx", PTR_ERR(tiqdio_ind)); - QDIO_DBF_TEXT0(0,setup,dbf_text); - QDIO_PRINT_ERR("failed to register adapter handler " \ - "(rc=%li).\nAdapter interrupts might " \ - "not work. Continuing.\n", - PTR_ERR(tiqdio_ind)); - tiqdio_ind = NULL; - } -} - -static void -tiqdio_unregister_thinints(void) -{ - if (tiqdio_ind) - s390_unregister_adapter_interrupt(tiqdio_ind, - TIQDIO_THININT_ISC); -} - -static int -qdio_get_qdio_memory(void) -{ - int i; - indicator_used[0]=1; - - for (i=1;i + * Jan Glauber + */ #ifndef _CIO_QDIO_H #define _CIO_QDIO_H #include -#include #include +#include "chsc.h" -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_VERBOSE_LEVEL 9 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_VERBOSE_LEVEL 5 -#endif /* CONFIG_QDIO_DEBUG */ -#define QDIO_USE_PROCESSING_STATE - -#define QDIO_MINIMAL_BH_RELIEF_TIME 16 -#define QDIO_TIMER_POLL_VALUE 1 -#define IQDIO_TIMER_POLL_VALUE 1 - -/* - * unfortunately this can't be (QDIO_MAX_BUFFERS_PER_Q*4/3) or so -- as - * we never know, whether we'll get initiative again, e.g. to give the - * transmit skb's back to the stack, however the stack may be waiting for - * them... therefore we define 4 as threshold to start polling (which - * will stop as soon as the asynchronous queue catches up) - * btw, this only applies to the asynchronous HiperSockets queue - */ -#define IQDIO_FILL_LEVEL_TO_POLL 4 - -#define TIQDIO_THININT_ISC QDIO_AIRQ_ISC -#define TIQDIO_DELAY_TARGET 0 -#define QDIO_BUSY_BIT_PATIENCE 100 /* in microsecs */ -#define QDIO_BUSY_BIT_GIVE_UP 10000000 /* 10 seconds */ -#define IQDIO_GLOBAL_LAPS 2 /* GLOBAL_LAPS are not used as we */ -#define IQDIO_GLOBAL_LAPS_INT 1 /* don't global summary */ -#define IQDIO_LOCAL_LAPS 4 -#define IQDIO_LOCAL_LAPS_INT 1 -#define IQDIO_GLOBAL_SUMMARY_CC_MASK 2 -/*#define IQDIO_IQDC_INT_PARM 0x1234*/ - -#define QDIO_Q_LAPS 5 - -#define QDIO_STORAGE_KEY PAGE_DEFAULT_KEY - -#define L2_CACHELINE_SIZE 256 -#define INDICATORS_PER_CACHELINE (L2_CACHELINE_SIZE/sizeof(__u32)) - -#define QDIO_PERF "qdio_perf" - -/* must be a power of 2 */ -/*#define QDIO_STATS_NUMBER 4 - -#define QDIO_STATS_CLASSES 2 -#define QDIO_STATS_COUNT_NEEDED 2*/ - -#define QDIO_NO_USE_COUNT_TIMEOUT (1*HZ) /* wait for 1 sec on each q before - exiting without having use_count - of the queue to 0 */ - -#define QDIO_ESTABLISH_TIMEOUT (1*HZ) -#define QDIO_CLEANUP_CLEAR_TIMEOUT (20*HZ) -#define QDIO_CLEANUP_HALT_TIMEOUT (10*HZ) -#define QDIO_FORCE_CHECK_TIMEOUT (10*HZ) -#define QDIO_ACTIVATE_TIMEOUT (5) /* 5 ms */ +#define QDIO_BUSY_BIT_PATIENCE 100 /* 100 microseconds */ +#define QDIO_BUSY_BIT_GIVE_UP 2000000 /* 2 seconds = eternity */ +#define QDIO_INPUT_THRESHOLD 500 /* 500 microseconds */ enum qdio_irq_states { QDIO_IRQ_STATE_INACTIVE, @@ -72,565 +26,352 @@ enum qdio_irq_states { NR_QDIO_IRQ_STATES, }; -/* used as intparm in do_IO: */ -#define QDIO_DOING_SENSEID 0 -#define QDIO_DOING_ESTABLISH 1 -#define QDIO_DOING_ACTIVATE 2 -#define QDIO_DOING_CLEANUP 3 - -/************************* DEBUG FACILITY STUFF *********************/ - -#define QDIO_DBF_HEX(ex,name,level,addr,len) \ - do { \ - if (ex) \ - debug_exception(qdio_dbf_##name,level,(void*)(addr),len); \ - else \ - debug_event(qdio_dbf_##name,level,(void*)(addr),len); \ - } while (0) -#define QDIO_DBF_TEXT(ex,name,level,text) \ - do { \ - if (ex) \ - debug_text_exception(qdio_dbf_##name,level,text); \ - else \ - debug_text_event(qdio_dbf_##name,level,text); \ - } while (0) - - -#define QDIO_DBF_HEX0(ex,name,addr,len) QDIO_DBF_HEX(ex,name,0,addr,len) -#define QDIO_DBF_HEX1(ex,name,addr,len) QDIO_DBF_HEX(ex,name,1,addr,len) -#define QDIO_DBF_HEX2(ex,name,addr,len) QDIO_DBF_HEX(ex,name,2,addr,len) -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_HEX3(ex,name,addr,len) QDIO_DBF_HEX(ex,name,3,addr,len) -#define QDIO_DBF_HEX4(ex,name,addr,len) QDIO_DBF_HEX(ex,name,4,addr,len) -#define QDIO_DBF_HEX5(ex,name,addr,len) QDIO_DBF_HEX(ex,name,5,addr,len) -#define QDIO_DBF_HEX6(ex,name,addr,len) QDIO_DBF_HEX(ex,name,6,addr,len) -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_HEX3(ex,name,addr,len) do {} while (0) -#define QDIO_DBF_HEX4(ex,name,addr,len) do {} while (0) -#define QDIO_DBF_HEX5(ex,name,addr,len) do {} while (0) -#define QDIO_DBF_HEX6(ex,name,addr,len) do {} while (0) -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_TEXT0(ex,name,text) QDIO_DBF_TEXT(ex,name,0,text) -#define QDIO_DBF_TEXT1(ex,name,text) QDIO_DBF_TEXT(ex,name,1,text) -#define QDIO_DBF_TEXT2(ex,name,text) QDIO_DBF_TEXT(ex,name,2,text) -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_TEXT3(ex,name,text) QDIO_DBF_TEXT(ex,name,3,text) -#define QDIO_DBF_TEXT4(ex,name,text) QDIO_DBF_TEXT(ex,name,4,text) -#define QDIO_DBF_TEXT5(ex,name,text) QDIO_DBF_TEXT(ex,name,5,text) -#define QDIO_DBF_TEXT6(ex,name,text) QDIO_DBF_TEXT(ex,name,6,text) -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_TEXT3(ex,name,text) do {} while (0) -#define QDIO_DBF_TEXT4(ex,name,text) do {} while (0) -#define QDIO_DBF_TEXT5(ex,name,text) do {} while (0) -#define QDIO_DBF_TEXT6(ex,name,text) do {} while (0) -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_SETUP_NAME "qdio_setup" -#define QDIO_DBF_SETUP_LEN 8 -#define QDIO_DBF_SETUP_PAGES 4 -#define QDIO_DBF_SETUP_NR_AREAS 1 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_SETUP_LEVEL 6 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_SETUP_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_SBAL_NAME "qdio_labs" /* sbal */ -#define QDIO_DBF_SBAL_LEN 256 -#define QDIO_DBF_SBAL_PAGES 4 -#define QDIO_DBF_SBAL_NR_AREAS 2 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_SBAL_LEVEL 6 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_SBAL_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_TRACE_NAME "qdio_trace" -#define QDIO_DBF_TRACE_LEN 8 -#define QDIO_DBF_TRACE_NR_AREAS 2 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_TRACE_PAGES 16 -#define QDIO_DBF_TRACE_LEVEL 4 /* -------- could be even more verbose here */ -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_TRACE_PAGES 4 -#define QDIO_DBF_TRACE_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_DBF_SENSE_NAME "qdio_sense" -#define QDIO_DBF_SENSE_LEN 64 -#define QDIO_DBF_SENSE_PAGES 2 -#define QDIO_DBF_SENSE_NR_AREAS 1 -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_SENSE_LEVEL 6 -#else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_SENSE_LEVEL 2 -#endif /* CONFIG_QDIO_DEBUG */ - -#ifdef CONFIG_QDIO_DEBUG -#define QDIO_TRACE_QTYPE QDIO_ZFCP_QFMT - -#define QDIO_DBF_SLSB_OUT_NAME "qdio_slsb_out" -#define QDIO_DBF_SLSB_OUT_LEN QDIO_MAX_BUFFERS_PER_Q -#define QDIO_DBF_SLSB_OUT_PAGES 256 -#define QDIO_DBF_SLSB_OUT_NR_AREAS 1 -#define QDIO_DBF_SLSB_OUT_LEVEL 6 - -#define QDIO_DBF_SLSB_IN_NAME "qdio_slsb_in" -#define QDIO_DBF_SLSB_IN_LEN QDIO_MAX_BUFFERS_PER_Q -#define QDIO_DBF_SLSB_IN_PAGES 256 -#define QDIO_DBF_SLSB_IN_NR_AREAS 1 -#define QDIO_DBF_SLSB_IN_LEVEL 6 -#endif /* CONFIG_QDIO_DEBUG */ - -#define QDIO_PRINTK_HEADER QDIO_NAME ": " - -#if QDIO_VERBOSE_LEVEL>8 -#define QDIO_PRINT_STUPID(x...) printk( KERN_DEBUG QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_STUPID(x...) do { } while (0) -#endif +/* used as intparm in do_IO */ +#define QDIO_DOING_ESTABLISH 1 +#define QDIO_DOING_ACTIVATE 2 +#define QDIO_DOING_CLEANUP 3 + +#define SLSB_STATE_NOT_INIT 0x0 +#define SLSB_STATE_EMPTY 0x1 +#define SLSB_STATE_PRIMED 0x2 +#define SLSB_STATE_HALTED 0xe +#define SLSB_STATE_ERROR 0xf +#define SLSB_TYPE_INPUT 0x0 +#define SLSB_TYPE_OUTPUT 0x20 +#define SLSB_OWNER_PROG 0x80 +#define SLSB_OWNER_CU 0x40 + +#define SLSB_P_INPUT_NOT_INIT \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_NOT_INIT) /* 0x80 */ +#define SLSB_P_INPUT_ACK \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_EMPTY) /* 0x81 */ +#define SLSB_CU_INPUT_EMPTY \ + (SLSB_OWNER_CU | SLSB_TYPE_INPUT | SLSB_STATE_EMPTY) /* 0x41 */ +#define SLSB_P_INPUT_PRIMED \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_PRIMED) /* 0x82 */ +#define SLSB_P_INPUT_HALTED \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_HALTED) /* 0x8e */ +#define SLSB_P_INPUT_ERROR \ + (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_ERROR) /* 0x8f */ +#define SLSB_P_OUTPUT_NOT_INIT \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_NOT_INIT) /* 0xa0 */ +#define SLSB_P_OUTPUT_EMPTY \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_EMPTY) /* 0xa1 */ +#define SLSB_CU_OUTPUT_PRIMED \ + (SLSB_OWNER_CU | SLSB_TYPE_OUTPUT | SLSB_STATE_PRIMED) /* 0x62 */ +#define SLSB_P_OUTPUT_HALTED \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_HALTED) /* 0xae */ +#define SLSB_P_OUTPUT_ERROR \ + (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_ERROR) /* 0xaf */ + +#define SLSB_ERROR_DURING_LOOKUP 0xff + +/* additional CIWs returned by extended Sense-ID */ +#define CIW_TYPE_EQUEUE 0x3 /* establish QDIO queues */ +#define CIW_TYPE_AQUEUE 0x4 /* activate QDIO queues */ -#if QDIO_VERBOSE_LEVEL>7 -#define QDIO_PRINT_ALL(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_ALL(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>6 -#define QDIO_PRINT_INFO(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_INFO(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>5 -#define QDIO_PRINT_WARN(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_WARN(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>4 -#define QDIO_PRINT_ERR(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_ERR(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>3 -#define QDIO_PRINT_CRIT(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_CRIT(x...) do { } while (0) -#endif - -#if QDIO_VERBOSE_LEVEL>2 -#define QDIO_PRINT_ALERT(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_ALERT(x...) do { } while (0) -#endif +/* flags for st qdio sch data */ +#define CHSC_FLAG_QDIO_CAPABILITY 0x80 +#define CHSC_FLAG_VALIDITY 0x40 + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ -#if QDIO_VERBOSE_LEVEL>1 -#define QDIO_PRINT_EMERG(x...) printk( QDIO_PRINTK_HEADER x) -#else -#define QDIO_PRINT_EMERG(x...) do { } while (0) -#endif - -#define QDIO_HEXDUMP16(importance,header,ptr) \ -QDIO_PRINT_##importance(header "%02x %02x %02x %02x " \ - "%02x %02x %02x %02x %02x %02x %02x %02x " \ - "%02x %02x %02x %02x\n",*(((char*)ptr)), \ - *(((char*)ptr)+1),*(((char*)ptr)+2), \ - *(((char*)ptr)+3),*(((char*)ptr)+4), \ - *(((char*)ptr)+5),*(((char*)ptr)+6), \ - *(((char*)ptr)+7),*(((char*)ptr)+8), \ - *(((char*)ptr)+9),*(((char*)ptr)+10), \ - *(((char*)ptr)+11),*(((char*)ptr)+12), \ - *(((char*)ptr)+13),*(((char*)ptr)+14), \ - *(((char*)ptr)+15)); \ -QDIO_PRINT_##importance(header "%02x %02x %02x %02x %02x %02x %02x %02x " \ - "%02x %02x %02x %02x %02x %02x %02x %02x\n", \ - *(((char*)ptr)+16),*(((char*)ptr)+17), \ - *(((char*)ptr)+18),*(((char*)ptr)+19), \ - *(((char*)ptr)+20),*(((char*)ptr)+21), \ - *(((char*)ptr)+22),*(((char*)ptr)+23), \ - *(((char*)ptr)+24),*(((char*)ptr)+25), \ - *(((char*)ptr)+26),*(((char*)ptr)+27), \ - *(((char*)ptr)+28),*(((char*)ptr)+29), \ - *(((char*)ptr)+30),*(((char*)ptr)+31)); - -/****************** END OF DEBUG FACILITY STUFF *********************/ +#ifdef CONFIG_64BIT +static inline int do_sqbs(u64 token, unsigned char state, int queue, + int *start, int *count) +{ + register unsigned long _ccq asm ("0") = *count; + register unsigned long _token asm ("1") = token; + unsigned long _queuestart = ((unsigned long)queue << 32) | *start; -/* - * Some instructions as assembly - */ + asm volatile( + " .insn rsy,0xeb000000008A,%1,0,0(%2)" + : "+d" (_ccq), "+d" (_queuestart) + : "d" ((unsigned long)state), "d" (_token) + : "memory", "cc"); + *count = _ccq & 0xff; + *start = _queuestart & 0xff; -static inline int -do_sqbs(unsigned long sch, unsigned char state, int queue, - unsigned int *start, unsigned int *count) -{ -#ifdef CONFIG_64BIT - register unsigned long _ccq asm ("0") = *count; - register unsigned long _sch asm ("1") = sch; - unsigned long _queuestart = ((unsigned long)queue << 32) | *start; - - asm volatile( - " .insn rsy,0xeb000000008A,%1,0,0(%2)" - : "+d" (_ccq), "+d" (_queuestart) - : "d" ((unsigned long)state), "d" (_sch) - : "memory", "cc"); - *count = _ccq & 0xff; - *start = _queuestart & 0xff; - - return (_ccq >> 32) & 0xff; -#else - return 0; -#endif + return (_ccq >> 32) & 0xff; } -static inline int -do_eqbs(unsigned long sch, unsigned char *state, int queue, - unsigned int *start, unsigned int *count) +static inline int do_eqbs(u64 token, unsigned char *state, int queue, + int *start, int *count) { -#ifdef CONFIG_64BIT register unsigned long _ccq asm ("0") = *count; - register unsigned long _sch asm ("1") = sch; + register unsigned long _token asm ("1") = token; unsigned long _queuestart = ((unsigned long)queue << 32) | *start; unsigned long _state = 0; asm volatile( " .insn rrf,0xB99c0000,%1,%2,0,0" : "+d" (_ccq), "+d" (_queuestart), "+d" (_state) - : "d" (_sch) - : "memory", "cc" ); + : "d" (_token) + : "memory", "cc"); *count = _ccq & 0xff; *start = _queuestart & 0xff; *state = _state & 0xff; return (_ccq >> 32) & 0xff; -#else - return 0; -#endif -} - - -static inline int -do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2) -{ - register unsigned long reg0 asm ("0") = 2; - register struct subchannel_id reg1 asm ("1") = schid; - register unsigned long reg2 asm ("2") = mask1; - register unsigned long reg3 asm ("3") = mask2; - int cc; - - asm volatile( - " siga 0\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) - : "d" (reg0), "d" (reg1), "d" (reg2), "d" (reg3) : "cc"); - return cc; -} - -static inline int -do_siga_input(struct subchannel_id schid, unsigned int mask) -{ - register unsigned long reg0 asm ("0") = 1; - register struct subchannel_id reg1 asm ("1") = schid; - register unsigned long reg2 asm ("2") = mask; - int cc; - - asm volatile( - " siga 0\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) - : "d" (reg0), "d" (reg1), "d" (reg2) : "cc", "memory"); - return cc; -} - -static inline int -do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb, - unsigned int fc) -{ - register unsigned long __fc asm("0") = fc; - register unsigned long __schid asm("1") = schid; - register unsigned long __mask asm("2") = mask; - int cc; - - asm volatile( - " siga 0\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask) - : "0" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION) - : "cc", "memory"); - (*bb) = ((unsigned int) __fc) >> 31; - return cc; -} - -static inline unsigned long -do_clear_global_summary(void) -{ - register unsigned long __fn asm("1") = 3; - register unsigned long __tmp asm("2"); - register unsigned long __time asm("3"); - - asm volatile( - " .insn rre,0xb2650000,2,0" - : "+d" (__fn), "=d" (__tmp), "=d" (__time)); - return __time; } - -/* - * QDIO device commands returned by extended Sense-ID - */ -#define DEFAULT_ESTABLISH_QS_CMD 0x1b -#define DEFAULT_ESTABLISH_QS_COUNT 0x1000 -#define DEFAULT_ACTIVATE_QS_CMD 0x1f -#define DEFAULT_ACTIVATE_QS_COUNT 0 - -/* - * additional CIWs returned by extended Sense-ID - */ -#define CIW_TYPE_EQUEUE 0x3 /* establish QDIO queues */ -#define CIW_TYPE_AQUEUE 0x4 /* activate QDIO queues */ +#else +static inline int do_sqbs(u64 token, unsigned char state, int queue, + int *start, int *count) { return 0; } +static inline int do_eqbs(u64 token, unsigned char *state, int queue, + int *start, int *count) { return 0; } +#endif /* CONFIG_64BIT */ -#define QDIO_CHSC_RESPONSE_CODE_OK 1 -/* flags for st qdio sch data */ -#define CHSC_FLAG_QDIO_CAPABILITY 0x80 -#define CHSC_FLAG_VALIDITY 0x40 +struct qdio_irq; -#define CHSC_FLAG_SIGA_INPUT_NECESSARY 0x40 -#define CHSC_FLAG_SIGA_OUTPUT_NECESSARY 0x20 -#define CHSC_FLAG_SIGA_SYNC_NECESSARY 0x10 -#define CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS 0x08 -#define CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS 0x04 +struct siga_flag { + u8 input:1; + u8 output:1; + u8 sync:1; + u8 no_sync_ti:1; + u8 no_sync_out_ti:1; + u8 no_sync_out_pci:1; + u8:2; +} __attribute__ ((packed)); -struct qdio_chsc_ssqd { +struct chsc_ssqd_area { struct chsc_header request; - u16 reserved1:10; - u16 ssid:2; - u16 fmt:4; + u16:10; + u8 ssid:2; + u8 fmt:4; u16 first_sch; - u16 reserved2; + u16:16; u16 last_sch; - u32 reserved3; + u32:32; struct chsc_header response; - u32 reserved4; - u8 flags; - u8 reserved5; - u16 sch; - u8 qfmt; - u8 parm; - u8 qdioac1; - u8 sch_class; - u8 pct; - u8 icnt; - u8 reserved7; - u8 ocnt; - u8 reserved8; - u8 mbccnt; - u16 qdioac2; - u64 sch_token; -}; + u32:32; + struct qdio_ssqd_desc qdio_ssqd; +} __attribute__ ((packed)); -struct qdio_perf_stats { -#ifdef CONFIG_64BIT - atomic64_t tl_runs; - atomic64_t outbound_tl_runs; - atomic64_t outbound_tl_runs_resched; - atomic64_t inbound_tl_runs; - atomic64_t inbound_tl_runs_resched; - atomic64_t inbound_thin_tl_runs; - atomic64_t inbound_thin_tl_runs_resched; - - atomic64_t siga_outs; - atomic64_t siga_ins; - atomic64_t siga_syncs; - atomic64_t pcis; - atomic64_t thinints; - atomic64_t fast_reqs; - - atomic64_t outbound_cnt; - atomic64_t inbound_cnt; -#else /* CONFIG_64BIT */ - atomic_t tl_runs; - atomic_t outbound_tl_runs; - atomic_t outbound_tl_runs_resched; - atomic_t inbound_tl_runs; - atomic_t inbound_tl_runs_resched; - atomic_t inbound_thin_tl_runs; - atomic_t inbound_thin_tl_runs_resched; - - atomic_t siga_outs; - atomic_t siga_ins; - atomic_t siga_syncs; - atomic_t pcis; - atomic_t thinints; - atomic_t fast_reqs; - - atomic_t outbound_cnt; - atomic_t inbound_cnt; -#endif /* CONFIG_64BIT */ +struct scssc_area { + struct chsc_header request; + u16 operation_code; + u16:16; + u32:32; + u32:32; + u64 summary_indicator_addr; + u64 subchannel_indicator_addr; + u32 ks:4; + u32 kc:4; + u32:21; + u32 isc:3; + u32 word_with_d_bit; + u32:32; + struct subchannel_id schid; + u32 reserved[1004]; + struct chsc_header response; + u32:32; +} __attribute__ ((packed)); + +struct qdio_input_q { + /* input buffer acknowledgement flag */ + int polling; + + /* last time of noticing incoming data */ + u64 timestamp; + + /* lock for clearing the acknowledgement */ + spinlock_t lock; }; -/* unlikely as the later the better */ -#define SYNC_MEMORY if (unlikely(q->siga_sync)) qdio_siga_sync_q(q) -#define SYNC_MEMORY_ALL if (unlikely(q->siga_sync)) \ - qdio_siga_sync(q,~0U,~0U) -#define SYNC_MEMORY_ALL_OUTB if (unlikely(q->siga_sync)) \ - qdio_siga_sync(q,~0U,0) +struct qdio_output_q { + /* failed siga-w attempts*/ + atomic_t busy_siga_counter; -#define NOW qdio_get_micros() -#define SAVE_TIMESTAMP(q) q->timing.last_transfer_time=NOW -#define GET_SAVED_TIMESTAMP(q) (q->timing.last_transfer_time) -#define SAVE_FRONTIER(q,val) q->last_move_ftc=val -#define GET_SAVED_FRONTIER(q) (q->last_move_ftc) + /* start time of busy condition */ + u64 timestamp; -#define MY_MODULE_STRING(x) #x + /* PCIs are enabled for the queue */ + int pci_out_enabled; -#ifdef CONFIG_64BIT -#define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x) -#else /* CONFIG_64BIT */ -#define QDIO_GET_ADDR(x) ((__u32)(long)x) -#endif /* CONFIG_64BIT */ + /* timer to check for more outbound work */ + struct timer_list timer; +}; struct qdio_q { - volatile struct slsb slsb; + struct slsb slsb; + union { + struct qdio_input_q in; + struct qdio_output_q out; + } u; - char unused[QDIO_MAX_BUFFERS_PER_Q]; + /* queue number */ + int nr; - __u32 * dev_st_chg_ind; + /* bitmask of queue number */ + int mask; + /* input or output queue */ int is_input_q; - struct subchannel_id schid; - struct ccw_device *cdev; - - unsigned int is_iqdio_q; - unsigned int is_thinint_q; - /* bit 0 means queue 0, bit 1 means queue 1, ... */ - unsigned int mask; - unsigned int q_no; + /* list of thinint input queues */ + struct list_head entry; + /* upper-layer program handler */ qdio_handler_t (*handler); - /* points to the next buffer to be checked for having - * been processed by the card (outbound) - * or to the next buffer the program should check for (inbound) */ - volatile int first_to_check; - /* and the last time it was: */ - volatile int last_move_ftc; + /* + * inbound: next buffer the program should check for + * outbound: next buffer to check for having been processed + * by the card + */ + int first_to_check; - atomic_t number_of_buffers_used; - atomic_t polling; + /* first_to_check of the last time */ + int last_move_ftc; - unsigned int siga_in; - unsigned int siga_out; - unsigned int siga_sync; - unsigned int siga_sync_done_on_thinints; - unsigned int siga_sync_done_on_outb_tis; - unsigned int hydra_gives_outbound_pcis; + /* beginning position for calling the program */ + int first_to_kick; - /* used to save beginning position when calling dd_handlers */ - int first_element_to_kick; + /* number of buffers in use by the adapter */ + atomic_t nr_buf_used; - atomic_t use_count; - atomic_t is_in_shutdown; - - void *irq_ptr; - - struct timer_list timer; -#ifdef QDIO_USE_TIMERS_FOR_POLLING - atomic_t timer_already_set; - spinlock_t timer_lock; -#else /* QDIO_USE_TIMERS_FOR_POLLING */ + struct qdio_irq *irq_ptr; struct tasklet_struct tasklet; -#endif /* QDIO_USE_TIMERS_FOR_POLLING */ - - enum qdio_irq_states state; - - /* used to store the error condition during a data transfer */ + /* error condition during a data transfer */ unsigned int qdio_error; - unsigned int siga_error; - unsigned int error_status_flags; - - /* list of interesting queues */ - volatile struct qdio_q *list_next; - volatile struct qdio_q *list_prev; struct sl *sl; - volatile struct sbal *sbal[QDIO_MAX_BUFFERS_PER_Q]; - - struct qdio_buffer *qdio_buffers[QDIO_MAX_BUFFERS_PER_Q]; - - unsigned long int_parm; - - /*struct { - int in_bh_check_limit; - int threshold; - } threshold_classes[QDIO_STATS_CLASSES];*/ - - struct { - /* inbound: the time to stop polling - outbound: the time to kick peer */ - int threshold; /* the real value */ - - /* outbound: last time of do_QDIO - inbound: last time of noticing incoming data */ - /*__u64 last_transfer_times[QDIO_STATS_NUMBER]; - int last_transfer_index; */ - - __u64 last_transfer_time; - __u64 busy_start; - } timing; - atomic_t busy_siga_counter; - unsigned int queue_type; - unsigned int is_pci_out; - - /* leave this member at the end. won't be cleared in qdio_fill_qs */ - struct slib *slib; /* a page is allocated under this pointer, - sl points into this page, offset PAGE_SIZE/2 - (after slib) */ + struct qdio_buffer *sbal[QDIO_MAX_BUFFERS_PER_Q]; + + /* + * Warning: Leave this member at the end so it won't be cleared in + * qdio_fill_qs. A page is allocated under this pointer and used for + * slib and sl. slib is 2048 bytes big and sl points to offset + * PAGE_SIZE / 2. + */ + struct slib *slib; } __attribute__ ((aligned(256))); struct qdio_irq { - __u32 * volatile dev_st_chg_ind; + struct qib qib; + u32 *dsci; /* address of device state change indicator */ + struct ccw_device *cdev; unsigned long int_parm; struct subchannel_id schid; - - unsigned int is_iqdio_irq; - unsigned int is_thinint_irq; - unsigned int hydra_gives_outbound_pcis; - unsigned int sync_done_on_outb_pcis; - - /* QEBSM facility */ - unsigned int is_qebsm; - unsigned long sch_token; + unsigned long sch_token; /* QEBSM facility */ enum qdio_irq_states state; - unsigned int no_input_qs; - unsigned int no_output_qs; + struct siga_flag siga_flag; /* siga sync information from qdioac */ - unsigned char qdioac; + int nr_input_qs; + int nr_output_qs; struct ccw1 ccw; - struct ciw equeue; struct ciw aqueue; - struct qib qib; - - void (*original_int_handler) (struct ccw_device *, - unsigned long, struct irb *); + struct qdio_ssqd_desc ssqd_desc; + + void (*orig_handler) (struct ccw_device *, unsigned long, struct irb *); - /* leave these four members together at the end. won't be cleared in qdio_fill_irq */ + /* + * Warning: Leave these members together at the end so they won't be + * cleared in qdio_setup_irq. + */ struct qdr *qdr; + unsigned long chsc_page; + struct qdio_q *input_qs[QDIO_MAX_QUEUES_PER_IRQ]; struct qdio_q *output_qs[QDIO_MAX_QUEUES_PER_IRQ]; - struct semaphore setting_up_sema; + + struct mutex setup_mutex; }; -#endif + +/* helper functions */ +#define queue_type(q) q->irq_ptr->qib.qfmt + +#define is_thinint_irq(irq) \ + (irq->qib.qfmt == QDIO_IQDIO_QFMT || \ + css_general_characteristics.aif_osa) + +/* the highest iqdio queue is used for multicast */ +static inline int multicast_outbound(struct qdio_q *q) +{ + return (q->irq_ptr->nr_output_qs > 1) && + (q->nr == q->irq_ptr->nr_output_qs - 1); +} + +static inline unsigned long long get_usecs(void) +{ + return monotonic_clock() >> 12; +} + +#define pci_out_supported(q) \ + (q->irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) +#define is_qebsm(q) (q->irq_ptr->sch_token != 0) + +#define need_siga_sync_thinint(q) (!q->irq_ptr->siga_flag.no_sync_ti) +#define need_siga_sync_out_thinint(q) (!q->irq_ptr->siga_flag.no_sync_out_ti) +#define need_siga_in(q) (q->irq_ptr->siga_flag.input) +#define need_siga_out(q) (q->irq_ptr->siga_flag.output) +#define need_siga_sync(q) (q->irq_ptr->siga_flag.sync) +#define siga_syncs_out_pci(q) (q->irq_ptr->siga_flag.no_sync_out_pci) + +#define for_each_input_queue(irq_ptr, q, i) \ + for (i = 0, q = irq_ptr->input_qs[0]; \ + i < irq_ptr->nr_input_qs; \ + q = irq_ptr->input_qs[++i]) +#define for_each_output_queue(irq_ptr, q, i) \ + for (i = 0, q = irq_ptr->output_qs[0]; \ + i < irq_ptr->nr_output_qs; \ + q = irq_ptr->output_qs[++i]) + +#define prev_buf(bufnr) \ + ((bufnr + QDIO_MAX_BUFFERS_MASK) & QDIO_MAX_BUFFERS_MASK) +#define next_buf(bufnr) \ + ((bufnr + 1) & QDIO_MAX_BUFFERS_MASK) +#define add_buf(bufnr, inc) \ + ((bufnr + inc) & QDIO_MAX_BUFFERS_MASK) + +/* prototypes for thin interrupt */ +void qdio_sync_after_thinint(struct qdio_q *q); +int get_buf_state(struct qdio_q *q, unsigned int bufnr, unsigned char *state); +void qdio_check_outbound_after_thinint(struct qdio_q *q); +int qdio_inbound_q_moved(struct qdio_q *q); +void qdio_kick_inbound_handler(struct qdio_q *q); +void qdio_stop_polling(struct qdio_q *q); +int qdio_siga_sync_q(struct qdio_q *q); + +void qdio_setup_thinint(struct qdio_irq *irq_ptr); +int qdio_establish_thinint(struct qdio_irq *irq_ptr); +void qdio_shutdown_thinint(struct qdio_irq *irq_ptr); +void tiqdio_add_input_queues(struct qdio_irq *irq_ptr); +void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr); +void tiqdio_inbound_processing(unsigned long q); +int tiqdio_allocate_memory(void); +void tiqdio_free_memory(void); +int tiqdio_register_thinints(void); +void tiqdio_unregister_thinints(void); + +/* prototypes for setup */ +void qdio_inbound_processing(unsigned long data); +void qdio_outbound_processing(unsigned long data); +void qdio_outbound_timer(unsigned long data); +void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, + struct irb *irb); +int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, + int nr_output_qs); +void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr); +int qdio_setup_irq(struct qdio_initialize *init_data); +void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, + struct ccw_device *cdev); +void qdio_release_memory(struct qdio_irq *irq_ptr); +int qdio_setup_init(void); +void qdio_setup_exit(void); + +#endif /* _CIO_QDIO_H */ diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c new file mode 100644 index 000000000000..337aa3087a78 --- /dev/null +++ b/drivers/s390/cio/qdio_debug.c @@ -0,0 +1,240 @@ +/* + * drivers/s390/cio/qdio_debug.c + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#include +#include +#include +#include +#include +#include "qdio_debug.h" +#include "qdio.h" + +debug_info_t *qdio_dbf_setup; +debug_info_t *qdio_dbf_trace; + +static struct dentry *debugfs_root; +#define MAX_DEBUGFS_QUEUES 32 +static struct dentry *debugfs_queues[MAX_DEBUGFS_QUEUES] = { NULL }; +static DEFINE_MUTEX(debugfs_mutex); + +void qdio_allocate_do_dbf(struct qdio_initialize *init_data) +{ + char dbf_text[20]; + + sprintf(dbf_text, "qfmt:%x", init_data->q_format); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, init_data->adapter_name, 8); + sprintf(dbf_text, "qpff%4x", init_data->qib_param_field_format); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, &init_data->qib_param_field, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->input_slib_elements, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->output_slib_elements, sizeof(void *)); + sprintf(dbf_text, "niq:%4x", init_data->no_input_qs); + QDIO_DBF_TEXT0(0, setup, dbf_text); + sprintf(dbf_text, "noq:%4x", init_data->no_output_qs); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, &init_data->input_handler, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->output_handler, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->int_parm, sizeof(long)); + QDIO_DBF_HEX0(0, setup, &init_data->flags, sizeof(long)); + QDIO_DBF_HEX0(0, setup, &init_data->input_sbal_addr_array, sizeof(void *)); + QDIO_DBF_HEX0(0, setup, &init_data->output_sbal_addr_array, sizeof(void *)); +} + +static void qdio_unregister_dbf_views(void) +{ + if (qdio_dbf_setup) + debug_unregister(qdio_dbf_setup); + if (qdio_dbf_trace) + debug_unregister(qdio_dbf_trace); +} + +static int qdio_register_dbf_views(void) +{ + qdio_dbf_setup = debug_register("qdio_setup", QDIO_DBF_SETUP_PAGES, + QDIO_DBF_SETUP_NR_AREAS, + QDIO_DBF_SETUP_LEN); + if (!qdio_dbf_setup) + goto oom; + debug_register_view(qdio_dbf_setup, &debug_hex_ascii_view); + debug_set_level(qdio_dbf_setup, QDIO_DBF_SETUP_LEVEL); + + qdio_dbf_trace = debug_register("qdio_trace", QDIO_DBF_TRACE_PAGES, + QDIO_DBF_TRACE_NR_AREAS, + QDIO_DBF_TRACE_LEN); + if (!qdio_dbf_trace) + goto oom; + debug_register_view(qdio_dbf_trace, &debug_hex_ascii_view); + debug_set_level(qdio_dbf_trace, QDIO_DBF_TRACE_LEVEL); + return 0; +oom: + qdio_unregister_dbf_views(); + return -ENOMEM; +} + +static int qstat_show(struct seq_file *m, void *v) +{ + unsigned char state; + struct qdio_q *q = m->private; + int i; + + if (!q) + return 0; + + seq_printf(m, "device state indicator: %d\n", *q->irq_ptr->dsci); + seq_printf(m, "nr_used: %d\n", atomic_read(&q->nr_buf_used)); + seq_printf(m, "ftc: %d\n", q->first_to_check); + seq_printf(m, "last_move_ftc: %d\n", q->last_move_ftc); + seq_printf(m, "polling: %d\n", q->u.in.polling); + seq_printf(m, "slsb buffer states:\n"); + + qdio_siga_sync_q(q); + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) { + get_buf_state(q, i, &state); + switch (state) { + case SLSB_P_INPUT_NOT_INIT: + case SLSB_P_OUTPUT_NOT_INIT: + seq_printf(m, "N"); + break; + case SLSB_P_INPUT_PRIMED: + case SLSB_CU_OUTPUT_PRIMED: + seq_printf(m, "+"); + break; + case SLSB_P_INPUT_ACK: + seq_printf(m, "A"); + break; + case SLSB_P_INPUT_ERROR: + case SLSB_P_OUTPUT_ERROR: + seq_printf(m, "x"); + break; + case SLSB_CU_INPUT_EMPTY: + case SLSB_P_OUTPUT_EMPTY: + seq_printf(m, "-"); + break; + case SLSB_P_INPUT_HALTED: + case SLSB_P_OUTPUT_HALTED: + seq_printf(m, "."); + break; + default: + seq_printf(m, "?"); + } + if (i == 63) + seq_printf(m, "\n"); + } + seq_printf(m, "\n"); + return 0; +} + +static ssize_t qstat_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct qdio_q *q = seq->private; + + if (!q) + return 0; + + if (q->is_input_q) + xchg(q->irq_ptr->dsci, 1); + local_bh_disable(); + tasklet_schedule(&q->tasklet); + local_bh_enable(); + return count; +} + +static int qstat_seq_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, qstat_show, + filp->f_path.dentry->d_inode->i_private); +} + +static void get_queue_name(struct qdio_q *q, struct ccw_device *cdev, char *name) +{ + memset(name, 0, sizeof(name)); + sprintf(name, "%s", cdev->dev.bus_id); + if (q->is_input_q) + sprintf(name + strlen(name), "_input"); + else + sprintf(name + strlen(name), "_output"); + sprintf(name + strlen(name), "_%d", q->nr); +} + +static void remove_debugfs_entry(struct qdio_q *q) +{ + int i; + + for (i = 0; i < MAX_DEBUGFS_QUEUES; i++) { + if (!debugfs_queues[i]) + continue; + if (debugfs_queues[i]->d_inode->i_private == q) { + debugfs_remove(debugfs_queues[i]); + debugfs_queues[i] = NULL; + } + } +} + +static struct file_operations debugfs_fops = { + .owner = THIS_MODULE, + .open = qstat_seq_open, + .read = seq_read, + .write = qstat_seq_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev) +{ + int i = 0; + char name[40]; + + while (debugfs_queues[i] != NULL) { + i++; + if (i >= MAX_DEBUGFS_QUEUES) + return; + } + get_queue_name(q, cdev, name); + debugfs_queues[i] = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR, + debugfs_root, q, &debugfs_fops); +} + +void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) +{ + struct qdio_q *q; + int i; + + mutex_lock(&debugfs_mutex); + for_each_input_queue(irq_ptr, q, i) + setup_debugfs_entry(q, cdev); + for_each_output_queue(irq_ptr, q, i) + setup_debugfs_entry(q, cdev); + mutex_unlock(&debugfs_mutex); +} + +void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) +{ + struct qdio_q *q; + int i; + + mutex_lock(&debugfs_mutex); + for_each_input_queue(irq_ptr, q, i) + remove_debugfs_entry(q); + for_each_output_queue(irq_ptr, q, i) + remove_debugfs_entry(q); + mutex_unlock(&debugfs_mutex); +} + +int __init qdio_debug_init(void) +{ + debugfs_root = debugfs_create_dir("qdio_queues", NULL); + return qdio_register_dbf_views(); +} + +void qdio_debug_exit(void) +{ + debugfs_remove(debugfs_root); + qdio_unregister_dbf_views(); +} diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h new file mode 100644 index 000000000000..8484b83698e1 --- /dev/null +++ b/drivers/s390/cio/qdio_debug.h @@ -0,0 +1,91 @@ +/* + * drivers/s390/cio/qdio_debug.h + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#ifndef QDIO_DEBUG_H +#define QDIO_DEBUG_H + +#include +#include +#include "qdio.h" + +#define QDIO_DBF_HEX(ex, name, level, addr, len) \ + do { \ + if (ex) \ + debug_exception(qdio_dbf_##name, level, (void *)(addr), len); \ + else \ + debug_event(qdio_dbf_##name, level, (void *)(addr), len); \ + } while (0) +#define QDIO_DBF_TEXT(ex, name, level, text) \ + do { \ + if (ex) \ + debug_text_exception(qdio_dbf_##name, level, text); \ + else \ + debug_text_event(qdio_dbf_##name, level, text); \ + } while (0) + +#define QDIO_DBF_HEX0(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 0, addr, len) +#define QDIO_DBF_HEX1(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 1, addr, len) +#define QDIO_DBF_HEX2(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 2, addr, len) + +#ifdef CONFIG_QDIO_DEBUG +#define QDIO_DBF_HEX3(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 3, addr, len) +#define QDIO_DBF_HEX4(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 4, addr, len) +#define QDIO_DBF_HEX5(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 5, addr, len) +#define QDIO_DBF_HEX6(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 6, addr, len) +#else +#define QDIO_DBF_HEX3(ex, name, addr, len) do {} while (0) +#define QDIO_DBF_HEX4(ex, name, addr, len) do {} while (0) +#define QDIO_DBF_HEX5(ex, name, addr, len) do {} while (0) +#define QDIO_DBF_HEX6(ex, name, addr, len) do {} while (0) +#endif /* CONFIG_QDIO_DEBUG */ + +#define QDIO_DBF_TEXT0(ex, name, text) QDIO_DBF_TEXT(ex, name, 0, text) +#define QDIO_DBF_TEXT1(ex, name, text) QDIO_DBF_TEXT(ex, name, 1, text) +#define QDIO_DBF_TEXT2(ex, name, text) QDIO_DBF_TEXT(ex, name, 2, text) + +#ifdef CONFIG_QDIO_DEBUG +#define QDIO_DBF_TEXT3(ex, name, text) QDIO_DBF_TEXT(ex, name, 3, text) +#define QDIO_DBF_TEXT4(ex, name, text) QDIO_DBF_TEXT(ex, name, 4, text) +#define QDIO_DBF_TEXT5(ex, name, text) QDIO_DBF_TEXT(ex, name, 5, text) +#define QDIO_DBF_TEXT6(ex, name, text) QDIO_DBF_TEXT(ex, name, 6, text) +#else +#define QDIO_DBF_TEXT3(ex, name, text) do {} while (0) +#define QDIO_DBF_TEXT4(ex, name, text) do {} while (0) +#define QDIO_DBF_TEXT5(ex, name, text) do {} while (0) +#define QDIO_DBF_TEXT6(ex, name, text) do {} while (0) +#endif /* CONFIG_QDIO_DEBUG */ + +/* s390dbf views */ +#define QDIO_DBF_SETUP_LEN 8 +#define QDIO_DBF_SETUP_PAGES 4 +#define QDIO_DBF_SETUP_NR_AREAS 1 + +#define QDIO_DBF_TRACE_LEN 8 +#define QDIO_DBF_TRACE_NR_AREAS 2 + +#ifdef CONFIG_QDIO_DEBUG +#define QDIO_DBF_TRACE_PAGES 16 +#define QDIO_DBF_SETUP_LEVEL 6 +#define QDIO_DBF_TRACE_LEVEL 4 +#else /* !CONFIG_QDIO_DEBUG */ +#define QDIO_DBF_TRACE_PAGES 4 +#define QDIO_DBF_SETUP_LEVEL 2 +#define QDIO_DBF_TRACE_LEVEL 2 +#endif /* CONFIG_QDIO_DEBUG */ + +extern debug_info_t *qdio_dbf_setup; +extern debug_info_t *qdio_dbf_trace; + +void qdio_allocate_do_dbf(struct qdio_initialize *init_data); +void debug_print_bstat(struct qdio_q *q); +void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, + struct ccw_device *cdev); +void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, + struct ccw_device *cdev); +int qdio_debug_init(void); +void qdio_debug_exit(void); +#endif diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c new file mode 100644 index 000000000000..d10c73cc1688 --- /dev/null +++ b/drivers/s390/cio/qdio_main.c @@ -0,0 +1,1755 @@ +/* + * linux/drivers/s390/cio/qdio_main.c + * + * Linux for s390 qdio support, buffer handling, qdio API and module support. + * + * Copyright 2000,2008 IBM Corp. + * Author(s): Utz Bacher + * Jan Glauber + * 2.6 cio integration by Cornelia Huck + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cio.h" +#include "css.h" +#include "device.h" +#include "qdio.h" +#include "qdio_debug.h" +#include "qdio_perf.h" + +MODULE_AUTHOR("Utz Bacher ,"\ + "Jan Glauber "); +MODULE_DESCRIPTION("QDIO base support"); +MODULE_LICENSE("GPL"); + +static inline int do_siga_sync(struct subchannel_id schid, + unsigned int out_mask, unsigned int in_mask) +{ + register unsigned long __fc asm ("0") = 2; + register struct subchannel_id __schid asm ("1") = schid; + register unsigned long out asm ("2") = out_mask; + register unsigned long in asm ("3") = in_mask; + int cc; + + asm volatile( + " siga 0\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (__fc), "d" (__schid), "d" (out), "d" (in) : "cc"); + return cc; +} + +static inline int do_siga_input(struct subchannel_id schid, unsigned int mask) +{ + register unsigned long __fc asm ("0") = 1; + register struct subchannel_id __schid asm ("1") = schid; + register unsigned long __mask asm ("2") = mask; + int cc; + + asm volatile( + " siga 0\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (__fc), "d" (__schid), "d" (__mask) : "cc", "memory"); + return cc; +} + +/** + * do_siga_output - perform SIGA-w/wt function + * @schid: subchannel id or in case of QEBSM the subchannel token + * @mask: which output queues to process + * @bb: busy bit indicator, set only if SIGA-w/wt could not access a buffer + * @fc: function code to perform + * + * Returns cc or QDIO_ERROR_SIGA_ACCESS_EXCEPTION. + * Note: For IQDC unicast queues only the highest priority queue is processed. + */ +static inline int do_siga_output(unsigned long schid, unsigned long mask, + u32 *bb, unsigned int fc) +{ + register unsigned long __fc asm("0") = fc; + register unsigned long __schid asm("1") = schid; + register unsigned long __mask asm("2") = mask; + int cc = QDIO_ERROR_SIGA_ACCESS_EXCEPTION; + + asm volatile( + " siga 0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask) + : : "cc", "memory"); + *bb = ((unsigned int) __fc) >> 31; + return cc; +} + +static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) +{ + char dbf_text[15]; + + /* all done or next buffer state different */ + if (ccq == 0 || ccq == 32) + return 0; + /* not all buffers processed */ + if (ccq == 96 || ccq == 97) + return 1; + /* notify devices immediately */ + sprintf(dbf_text, "%d", ccq); + QDIO_DBF_TEXT2(1, trace, dbf_text); + return -EIO; +} + +/** + * qdio_do_eqbs - extract buffer states for QEBSM + * @q: queue to manipulate + * @state: state of the extracted buffers + * @start: buffer number to start at + * @count: count of buffers to examine + * + * Returns the number of successfull extracted equal buffer states. + * Stops processing if a state is different from the last buffers state. + */ +static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, + int start, int count) +{ + unsigned int ccq = 0; + int tmp_count = count, tmp_start = start; + int nr = q->nr; + int rc; + char dbf_text[15]; + + BUG_ON(!q->irq_ptr->sch_token); + + if (!q->is_input_q) + nr += q->irq_ptr->nr_input_qs; +again: + ccq = do_eqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count); + rc = qdio_check_ccq(q, ccq); + + /* At least one buffer was processed, return and extract the remaining + * buffers later. + */ + if ((ccq == 96) && (count != tmp_count)) + return (count - tmp_count); + if (rc == 1) { + QDIO_DBF_TEXT5(1, trace, "eqAGAIN"); + goto again; + } + + if (rc < 0) { + QDIO_DBF_TEXT2(1, trace, "eqberr"); + sprintf(dbf_text, "%2x,%2x,%d,%d", count, tmp_count, ccq, nr); + QDIO_DBF_TEXT2(1, trace, dbf_text); + q->handler(q->irq_ptr->cdev, + QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; + } + return count - tmp_count; +} + +/** + * qdio_do_sqbs - set buffer states for QEBSM + * @q: queue to manipulate + * @state: new state of the buffers + * @start: first buffer number to change + * @count: how many buffers to change + * + * Returns the number of successfully changed buffers. + * Does retrying until the specified count of buffer states is set or an + * error occurs. + */ +static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start, + int count) +{ + unsigned int ccq = 0; + int tmp_count = count, tmp_start = start; + int nr = q->nr; + int rc; + char dbf_text[15]; + + BUG_ON(!q->irq_ptr->sch_token); + + if (!q->is_input_q) + nr += q->irq_ptr->nr_input_qs; +again: + ccq = do_sqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count); + rc = qdio_check_ccq(q, ccq); + if (rc == 1) { + QDIO_DBF_TEXT5(1, trace, "sqAGAIN"); + goto again; + } + if (rc < 0) { + QDIO_DBF_TEXT3(1, trace, "sqberr"); + sprintf(dbf_text, "%2x,%2x", count, tmp_count); + QDIO_DBF_TEXT3(1, trace, dbf_text); + sprintf(dbf_text, "%d,%d", ccq, nr); + QDIO_DBF_TEXT3(1, trace, dbf_text); + + q->handler(q->irq_ptr->cdev, + QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; + } + WARN_ON(tmp_count); + return count - tmp_count; +} + +/* returns number of examined buffers and their common state in *state */ +static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, + unsigned char *state, unsigned int count) +{ + unsigned char __state = 0; + int i; + + BUG_ON(bufnr > QDIO_MAX_BUFFERS_MASK); + BUG_ON(count > QDIO_MAX_BUFFERS_PER_Q); + + if (is_qebsm(q)) + return qdio_do_eqbs(q, state, bufnr, count); + + for (i = 0; i < count; i++) { + if (!__state) + __state = q->slsb.val[bufnr]; + else if (q->slsb.val[bufnr] != __state) + break; + bufnr = next_buf(bufnr); + } + *state = __state; + return i; +} + +inline int get_buf_state(struct qdio_q *q, unsigned int bufnr, + unsigned char *state) +{ + return get_buf_states(q, bufnr, state, 1); +} + +/* wrap-around safe setting of slsb states, returns number of changed buffers */ +static inline int set_buf_states(struct qdio_q *q, int bufnr, + unsigned char state, int count) +{ + int i; + + BUG_ON(bufnr > QDIO_MAX_BUFFERS_MASK); + BUG_ON(count > QDIO_MAX_BUFFERS_PER_Q); + + if (is_qebsm(q)) + return qdio_do_sqbs(q, state, bufnr, count); + + for (i = 0; i < count; i++) { + xchg(&q->slsb.val[bufnr], state); + bufnr = next_buf(bufnr); + } + return count; +} + +static inline int set_buf_state(struct qdio_q *q, int bufnr, + unsigned char state) +{ + return set_buf_states(q, bufnr, state, 1); +} + +/* set slsb states to initial state */ +void qdio_init_buf_states(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + for_each_input_queue(irq_ptr, q, i) + set_buf_states(q, 0, SLSB_P_INPUT_NOT_INIT, + QDIO_MAX_BUFFERS_PER_Q); + for_each_output_queue(irq_ptr, q, i) + set_buf_states(q, 0, SLSB_P_OUTPUT_NOT_INIT, + QDIO_MAX_BUFFERS_PER_Q); +} + +static int qdio_siga_sync(struct qdio_q *q, unsigned int output, + unsigned int input) +{ + int cc; + + if (!need_siga_sync(q)) + return 0; + + qdio_perf_stat_inc(&perf_stats.siga_sync); + + cc = do_siga_sync(q->irq_ptr->schid, output, input); + if (cc) { + QDIO_DBF_TEXT4(0, trace, "sigasync"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *)); + } + return cc; +} + +inline int qdio_siga_sync_q(struct qdio_q *q) +{ + if (q->is_input_q) + return qdio_siga_sync(q, 0, q->mask); + else + return qdio_siga_sync(q, q->mask, 0); +} + +static inline int qdio_siga_sync_out(struct qdio_q *q) +{ + return qdio_siga_sync(q, ~0U, 0); +} + +static inline int qdio_siga_sync_all(struct qdio_q *q) +{ + return qdio_siga_sync(q, ~0U, ~0U); +} + +static inline int qdio_do_siga_output(struct qdio_q *q, unsigned int *busy_bit) +{ + unsigned int fc = 0; + unsigned long schid; + + if (!is_qebsm(q)) + schid = *((u32 *)&q->irq_ptr->schid); + else { + schid = q->irq_ptr->sch_token; + fc |= 0x80; + } + return do_siga_output(schid, q->mask, busy_bit, fc); +} + +static int qdio_siga_output(struct qdio_q *q) +{ + int cc; + u32 busy_bit; + u64 start_time = 0; + + QDIO_DBF_TEXT5(0, trace, "sigaout"); + QDIO_DBF_HEX5(0, trace, &q, sizeof(void *)); + + qdio_perf_stat_inc(&perf_stats.siga_out); +again: + cc = qdio_do_siga_output(q, &busy_bit); + if (queue_type(q) == QDIO_IQDIO_QFMT && cc == 2 && busy_bit) { + if (!start_time) + start_time = get_usecs(); + else if ((get_usecs() - start_time) < QDIO_BUSY_BIT_PATIENCE) + goto again; + } + + if (cc == 2 && busy_bit) + cc |= QDIO_ERROR_SIGA_BUSY; + if (cc) + QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *)); + return cc; +} + +static inline int qdio_siga_input(struct qdio_q *q) +{ + int cc; + + QDIO_DBF_TEXT4(0, trace, "sigain"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + + qdio_perf_stat_inc(&perf_stats.siga_in); + + cc = do_siga_input(q->irq_ptr->schid, q->mask); + if (cc) + QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *)); + return cc; +} + +/* called from thinint inbound handler */ +void qdio_sync_after_thinint(struct qdio_q *q) +{ + if (pci_out_supported(q)) { + if (need_siga_sync_thinint(q)) + qdio_siga_sync_all(q); + else if (need_siga_sync_out_thinint(q)) + qdio_siga_sync_out(q); + } else + qdio_siga_sync_q(q); +} + +inline void qdio_stop_polling(struct qdio_q *q) +{ + spin_lock_bh(&q->u.in.lock); + if (!q->u.in.polling) { + spin_unlock_bh(&q->u.in.lock); + return; + } + q->u.in.polling = 0; + qdio_perf_stat_inc(&perf_stats.debug_stop_polling); + + /* show the card that we are not polling anymore */ + set_buf_state(q, q->last_move_ftc, SLSB_P_INPUT_NOT_INIT); + spin_unlock_bh(&q->u.in.lock); +} + +static void announce_buffer_error(struct qdio_q *q) +{ + char dbf_text[15]; + + if (q->is_input_q) + QDIO_DBF_TEXT3(1, trace, "inperr"); + else + QDIO_DBF_TEXT3(0, trace, "outperr"); + + sprintf(dbf_text, "%x-%x-%x", q->first_to_check, + q->sbal[q->first_to_check]->element[14].flags, + q->sbal[q->first_to_check]->element[15].flags); + QDIO_DBF_TEXT3(1, trace, dbf_text); + QDIO_DBF_HEX2(1, trace, q->sbal[q->first_to_check], 256); + + q->qdio_error = QDIO_ERROR_SLSB_STATE; +} + +static int get_inbound_buffer_frontier(struct qdio_q *q) +{ + int count, stop; + unsigned char state; + + /* + * If we still poll don't update last_move_ftc, keep the + * previously ACK buffer there. + */ + if (!q->u.in.polling) + q->last_move_ftc = q->first_to_check; + + /* + * Don't check 128 buffers, as otherwise qdio_inbound_q_moved + * would return 0. + */ + count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK); + stop = add_buf(q->first_to_check, count); + + /* + * No siga sync here, as a PCI or we after a thin interrupt + * will sync the queues. + */ + + /* need to set count to 1 for non-qebsm */ + if (!is_qebsm(q)) + count = 1; + +check_next: + if (q->first_to_check == stop) + goto out; + + count = get_buf_states(q, q->first_to_check, &state, count); + if (!count) + goto out; + + switch (state) { + case SLSB_P_INPUT_PRIMED: + QDIO_DBF_TEXT5(0, trace, "inptprim"); + + /* + * Only ACK the first buffer. The ACK will be removed in + * qdio_stop_polling. + */ + if (q->u.in.polling) + state = SLSB_P_INPUT_NOT_INIT; + else { + q->u.in.polling = 1; + state = SLSB_P_INPUT_ACK; + } + set_buf_state(q, q->first_to_check, state); + + /* + * Need to change all PRIMED buffers to NOT_INIT, otherwise + * we're loosing initiative in the thinint code. + */ + if (count > 1) + set_buf_states(q, next_buf(q->first_to_check), + SLSB_P_INPUT_NOT_INIT, count - 1); + + /* + * No siga-sync needed for non-qebsm here, as the inbound queue + * will be synced on the next siga-r, resp. + * tiqdio_is_inbound_q_done will do the siga-sync. + */ + q->first_to_check = add_buf(q->first_to_check, count); + atomic_sub(count, &q->nr_buf_used); + goto check_next; + case SLSB_P_INPUT_ERROR: + announce_buffer_error(q); + /* process the buffer, the upper layer will take care of it */ + q->first_to_check = add_buf(q->first_to_check, count); + atomic_sub(count, &q->nr_buf_used); + break; + case SLSB_CU_INPUT_EMPTY: + case SLSB_P_INPUT_NOT_INIT: + case SLSB_P_INPUT_ACK: + QDIO_DBF_TEXT5(0, trace, "inpnipro"); + break; + default: + BUG(); + } +out: + QDIO_DBF_HEX4(0, trace, &q->first_to_check, sizeof(int)); + return q->first_to_check; +} + +int qdio_inbound_q_moved(struct qdio_q *q) +{ + int bufnr; + + bufnr = get_inbound_buffer_frontier(q); + + if ((bufnr != q->last_move_ftc) || q->qdio_error) { + if (!need_siga_sync(q) && !pci_out_supported(q)) + q->u.in.timestamp = get_usecs(); + + QDIO_DBF_TEXT4(0, trace, "inhasmvd"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + return 1; + } else + return 0; +} + +static int qdio_inbound_q_done(struct qdio_q *q) +{ + unsigned char state; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; +#endif + + if (!atomic_read(&q->nr_buf_used)) + return 1; + + /* + * We need that one for synchronization with the adapter, as it + * does a kind of PCI avoidance. + */ + qdio_siga_sync_q(q); + + get_buf_state(q, q->first_to_check, &state); + if (state == SLSB_P_INPUT_PRIMED) + /* we got something to do */ + return 0; + + /* on VM, we don't poll, so the q is always done here */ + if (need_siga_sync(q) || pci_out_supported(q)) + return 1; + + /* + * At this point we know, that inbound first_to_check + * has (probably) not moved (see qdio_inbound_processing). + */ + if (get_usecs() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) { +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT4(0, trace, "inqisdon"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + sprintf(dbf_text, "pf%02x", q->first_to_check); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + return 1; + } else { +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT4(0, trace, "inqisntd"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + sprintf(dbf_text, "pf%02x", q->first_to_check); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + return 0; + } +} + +void qdio_kick_inbound_handler(struct qdio_q *q) +{ + int count, start, end; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; +#endif + + qdio_perf_stat_inc(&perf_stats.inbound_handler); + + start = q->first_to_kick; + end = q->first_to_check; + if (end >= start) + count = end - start; + else + count = end + QDIO_MAX_BUFFERS_PER_Q - start; + +#ifdef CONFIG_QDIO_DEBUG + sprintf(dbf_text, "s=%2xc=%2x", start, count); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) + return; + + q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, + start, count, q->irq_ptr->int_parm); + + /* for the next time */ + q->first_to_kick = q->first_to_check; + q->qdio_error = 0; +} + +static void __qdio_inbound_processing(struct qdio_q *q) +{ + qdio_perf_stat_inc(&perf_stats.tasklet_inbound); +again: + if (!qdio_inbound_q_moved(q)) + return; + + qdio_kick_inbound_handler(q); + + if (!qdio_inbound_q_done(q)) + /* means poll time is not yet over */ + goto again; + + qdio_stop_polling(q); + /* + * We need to check again to not lose initiative after + * resetting the ACK state. + */ + if (!qdio_inbound_q_done(q)) + goto again; +} + +/* inbound tasklet */ +void qdio_inbound_processing(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + __qdio_inbound_processing(q); +} + +static int get_outbound_buffer_frontier(struct qdio_q *q) +{ + int count, stop; + unsigned char state; + + if (((queue_type(q) != QDIO_IQDIO_QFMT) && !pci_out_supported(q)) || + (queue_type(q) == QDIO_IQDIO_QFMT && multicast_outbound(q))) + qdio_siga_sync_q(q); + + /* + * Don't check 128 buffers, as otherwise qdio_inbound_q_moved + * would return 0. + */ + count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK); + stop = add_buf(q->first_to_check, count); + + /* need to set count to 1 for non-qebsm */ + if (!is_qebsm(q)) + count = 1; + +check_next: + if (q->first_to_check == stop) + return q->first_to_check; + + count = get_buf_states(q, q->first_to_check, &state, count); + if (!count) + return q->first_to_check; + + switch (state) { + case SLSB_P_OUTPUT_EMPTY: + /* the adapter got it */ + QDIO_DBF_TEXT5(0, trace, "outpempt"); + + atomic_sub(count, &q->nr_buf_used); + q->first_to_check = add_buf(q->first_to_check, count); + /* + * We fetch all buffer states at once. get_buf_states may + * return count < stop. For QEBSM we do not loop. + */ + if (is_qebsm(q)) + break; + goto check_next; + case SLSB_P_OUTPUT_ERROR: + announce_buffer_error(q); + /* process the buffer, the upper layer will take care of it */ + q->first_to_check = add_buf(q->first_to_check, count); + atomic_sub(count, &q->nr_buf_used); + break; + case SLSB_CU_OUTPUT_PRIMED: + /* the adapter has not fetched the output yet */ + QDIO_DBF_TEXT5(0, trace, "outpprim"); + break; + case SLSB_P_OUTPUT_NOT_INIT: + case SLSB_P_OUTPUT_HALTED: + break; + default: + BUG(); + } + return q->first_to_check; +} + +/* all buffers processed? */ +static inline int qdio_outbound_q_done(struct qdio_q *q) +{ + return atomic_read(&q->nr_buf_used) == 0; +} + +static inline int qdio_outbound_q_moved(struct qdio_q *q) +{ + int bufnr; + + bufnr = get_outbound_buffer_frontier(q); + + if ((bufnr != q->last_move_ftc) || q->qdio_error) { + q->last_move_ftc = bufnr; + QDIO_DBF_TEXT4(0, trace, "oqhasmvd"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + return 1; + } else + return 0; +} + +/* + * VM could present us cc=2 and busy bit set on SIGA-write + * during reconfiguration of their Guest LAN (only in iqdio mode, + * otherwise qdio is asynchronous and cc=2 and busy bit there will take + * the queues down immediately). + * + * Therefore qdio_siga_output will try for a short time constantly, + * if such a condition occurs. If it doesn't change, it will + * increase the busy_siga_counter and save the timestamp, and + * schedule the queue for later processing. qdio_outbound_processing + * will check out the counter. If non-zero, it will call qdio_kick_outbound_q + * as often as the value of the counter. This will attempt further SIGA + * instructions. For each successful SIGA, the counter is + * decreased, for failing SIGAs the counter remains the same, after + * all. After some time of no movement, qdio_kick_outbound_q will + * finally fail and reflect corresponding error codes to call + * the upper layer module and have it take the queues down. + * + * Note that this is a change from the original HiperSockets design + * (saying cc=2 and busy bit means take the queues down), but in + * these days Guest LAN didn't exist... excessive cc=2 with busy bit + * conditions will still take the queues down, but the threshold is + * higher due to the Guest LAN environment. + * + * Called from outbound tasklet and do_QDIO handler. + */ +static void qdio_kick_outbound_q(struct qdio_q *q) +{ + int rc; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; + + QDIO_DBF_TEXT5(0, trace, "kickoutq"); + QDIO_DBF_HEX5(0, trace, &q, sizeof(void *)); +#endif /* CONFIG_QDIO_DEBUG */ + + if (!need_siga_out(q)) + return; + + rc = qdio_siga_output(q); + switch (rc) { + case 0: + /* went smooth this time, reset timestamp */ + q->u.out.timestamp = 0; + + /* TODO: improve error handling for CC=0 case */ +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT3(0, trace, "cc2reslv"); + sprintf(dbf_text, "%4x%2x%2x", q->irq_ptr->schid.sch_no, q->nr, + atomic_read(&q->u.out.busy_siga_counter)); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + break; + /* cc=2 and busy bit */ + case (2 | QDIO_ERROR_SIGA_BUSY): + atomic_inc(&q->u.out.busy_siga_counter); + + /* if the last siga was successful, save timestamp here */ + if (!q->u.out.timestamp) + q->u.out.timestamp = get_usecs(); + + /* if we're in time, don't touch qdio_error */ + if (get_usecs() - q->u.out.timestamp < QDIO_BUSY_BIT_GIVE_UP) { + tasklet_schedule(&q->tasklet); + break; + } + QDIO_DBF_TEXT2(0, trace, "cc2REPRT"); +#ifdef CONFIG_QDIO_DEBUG + sprintf(dbf_text, "%4x%2x%2x", q->irq_ptr->schid.sch_no, q->nr, + atomic_read(&q->u.out.busy_siga_counter)); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + default: + /* for plain cc=1, 2 or 3 */ + q->qdio_error = rc; + } +} + +static void qdio_kick_outbound_handler(struct qdio_q *q) +{ + int start, end, count; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; +#endif + + start = q->first_to_kick; + end = q->last_move_ftc; + if (end >= start) + count = end - start; + else + count = end + QDIO_MAX_BUFFERS_PER_Q - start; + +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT4(0, trace, "kickouth"); + QDIO_DBF_HEX4(0, trace, &q, sizeof(void *)); + + sprintf(dbf_text, "s=%2xc=%2x", start, count); + QDIO_DBF_TEXT4(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) + return; + + q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count, + q->irq_ptr->int_parm); + + /* for the next time: */ + q->first_to_kick = q->last_move_ftc; + q->qdio_error = 0; +} + +static void __qdio_outbound_processing(struct qdio_q *q) +{ + int siga_attempts; + + qdio_perf_stat_inc(&perf_stats.tasklet_outbound); + + /* see comment in qdio_kick_outbound_q */ + siga_attempts = atomic_read(&q->u.out.busy_siga_counter); + while (siga_attempts--) { + atomic_dec(&q->u.out.busy_siga_counter); + qdio_kick_outbound_q(q); + } + + BUG_ON(atomic_read(&q->nr_buf_used) < 0); + + if (qdio_outbound_q_moved(q)) + qdio_kick_outbound_handler(q); + + if (queue_type(q) == QDIO_ZFCP_QFMT) { + if (!pci_out_supported(q) && !qdio_outbound_q_done(q)) + tasklet_schedule(&q->tasklet); + return; + } + + /* bail out for HiperSockets unicast queues */ + if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q)) + return; + + if (q->u.out.pci_out_enabled) + return; + + /* + * Now we know that queue type is either qeth without pci enabled + * or HiperSockets multicast. Make sure buffer switch from PRIMED to + * EMPTY is noticed and outbound_handler is called after some time. + */ + if (qdio_outbound_q_done(q)) + del_timer(&q->u.out.timer); + else { + if (!timer_pending(&q->u.out.timer)) { + mod_timer(&q->u.out.timer, jiffies + 10 * HZ); + qdio_perf_stat_inc(&perf_stats.debug_tl_out_timer); + } + } +} + +/* outbound tasklet */ +void qdio_outbound_processing(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + __qdio_outbound_processing(q); +} + +void qdio_outbound_timer(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + tasklet_schedule(&q->tasklet); +} + +/* called from thinint inbound tasklet */ +void qdio_check_outbound_after_thinint(struct qdio_q *q) +{ + struct qdio_q *out; + int i; + + if (!pci_out_supported(q)) + return; + + for_each_output_queue(q->irq_ptr, out, i) + if (!qdio_outbound_q_done(out)) + tasklet_schedule(&out->tasklet); +} + +static inline void qdio_set_state(struct qdio_irq *irq_ptr, + enum qdio_irq_states state) +{ +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[15]; + + QDIO_DBF_TEXT5(0, trace, "newstate"); + sprintf(dbf_text, "%4x%4x", irq_ptr->schid.sch_no, state); + QDIO_DBF_TEXT5(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + irq_ptr->state = state; + mb(); +} + +static void qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb) +{ + char dbf_text[15]; + + if (irb->esw.esw0.erw.cons) { + sprintf(dbf_text, "sens%4x", schid.sch_no); + QDIO_DBF_TEXT2(1, trace, dbf_text); + QDIO_DBF_HEX0(0, trace, irb, 64); + QDIO_DBF_HEX0(0, trace, irb->ecw, 64); + } +} + +/* PCI interrupt handler */ +static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) +{ + int i; + struct qdio_q *q; + + qdio_perf_stat_inc(&perf_stats.pci_int); + + for_each_input_queue(irq_ptr, q, i) + tasklet_schedule(&q->tasklet); + + if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)) + return; + + for_each_output_queue(irq_ptr, q, i) { + if (qdio_outbound_q_done(q)) + continue; + + if (!siga_syncs_out_pci(q)) + qdio_siga_sync_q(q); + + tasklet_schedule(&q->tasklet); + } +} + +static void qdio_handle_activate_check(struct ccw_device *cdev, + unsigned long intparm, int cstat, int dstat) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct qdio_q *q; + char dbf_text[15]; + + QDIO_DBF_TEXT2(1, trace, "ick2"); + sprintf(dbf_text, "%s", cdev->dev.bus_id); + QDIO_DBF_TEXT2(1, trace, dbf_text); + QDIO_DBF_HEX2(0, trace, &intparm, sizeof(int)); + QDIO_DBF_HEX2(0, trace, &dstat, sizeof(int)); + QDIO_DBF_HEX2(0, trace, &cstat, sizeof(int)); + + if (irq_ptr->nr_input_qs) { + q = irq_ptr->input_qs[0]; + } else if (irq_ptr->nr_output_qs) { + q = irq_ptr->output_qs[0]; + } else { + dump_stack(); + goto no_handler; + } + q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, irq_ptr->int_parm); +no_handler: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); +} + +static void qdio_call_shutdown(struct work_struct *work) +{ + struct ccw_device_private *priv; + struct ccw_device *cdev; + + priv = container_of(work, struct ccw_device_private, kick_work); + cdev = priv->cdev; + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + put_device(&cdev->dev); +} + +static void qdio_int_error(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + + switch (irq_ptr->state) { + case QDIO_IRQ_STATE_INACTIVE: + case QDIO_IRQ_STATE_CLEANUP: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + break; + case QDIO_IRQ_STATE_ESTABLISHED: + case QDIO_IRQ_STATE_ACTIVE: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); + if (get_device(&cdev->dev)) { + /* Can't call shutdown from interrupt context. */ + PREPARE_WORK(&cdev->private->kick_work, + qdio_call_shutdown); + queue_work(ccw_device_work, &cdev->private->kick_work); + } + break; + default: + WARN_ON(1); + } + wake_up(&cdev->private->wait_q); +} + +static int qdio_establish_check_errors(struct ccw_device *cdev, int cstat, + int dstat) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + + if (cstat || (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END))) { + QDIO_DBF_TEXT2(1, setup, "eq:ckcon"); + goto error; + } + + if (!(dstat & DEV_STAT_DEV_END)) { + QDIO_DBF_TEXT2(1, setup, "eq:no de"); + goto error; + } + + if (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END)) { + QDIO_DBF_TEXT2(1, setup, "eq:badio"); + goto error; + } + return 0; +error: + QDIO_DBF_HEX2(0, trace, &cstat, sizeof(int)); + QDIO_DBF_HEX2(0, trace, &dstat, sizeof(int)); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + return 1; +} + +static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, + int dstat) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + char dbf_text[15]; + + sprintf(dbf_text, "qehi%4x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + if (!qdio_establish_check_errors(cdev, cstat, dstat)) + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED); +} + +/* qdio interrupt handler */ +void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, + struct irb *irb) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + int cstat, dstat; + char dbf_text[15]; + + qdio_perf_stat_inc(&perf_stats.qdio_int); + + if (!intparm || !irq_ptr) { + sprintf(dbf_text, "qihd%4x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + return; + } + + if (IS_ERR(irb)) { + switch (PTR_ERR(irb)) { + case -EIO: + sprintf(dbf_text, "ierr%4x", + cdev->private->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + qdio_int_error(cdev); + return; + case -ETIMEDOUT: + sprintf(dbf_text, "qtoh%4x", + cdev->private->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + qdio_int_error(cdev); + return; + default: + WARN_ON(1); + return; + } + } + qdio_irq_check_sense(irq_ptr->schid, irb); + + cstat = irb->scsw.cmd.cstat; + dstat = irb->scsw.cmd.dstat; + + switch (irq_ptr->state) { + case QDIO_IRQ_STATE_INACTIVE: + qdio_establish_handle_irq(cdev, cstat, dstat); + break; + + case QDIO_IRQ_STATE_CLEANUP: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); + break; + + case QDIO_IRQ_STATE_ESTABLISHED: + case QDIO_IRQ_STATE_ACTIVE: + if (cstat & SCHN_STAT_PCI) { + qdio_int_handler_pci(irq_ptr); + /* no state change so no need to wake up wait_q */ + return; + } + if ((cstat & ~SCHN_STAT_PCI) || dstat) { + qdio_handle_activate_check(cdev, intparm, cstat, + dstat); + break; + } + default: + WARN_ON(1); + } + wake_up(&cdev->private->wait_q); +} + +/** + * qdio_get_ssqd_desc - get qdio subchannel description + * @cdev: ccw device to get description for + * + * Returns a pointer to the saved qdio subchannel description, + * or NULL for not setup qdio devices. + */ +struct qdio_ssqd_desc *qdio_get_ssqd_desc(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr; + + QDIO_DBF_TEXT0(0, setup, "getssqd"); + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return NULL; + + return &irq_ptr->ssqd_desc; +} +EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); + +/** + * qdio_cleanup - shutdown queues and free data structures + * @cdev: associated ccw device + * @how: use halt or clear to shutdown + * + * This function calls qdio_shutdown() for @cdev with method @how + * and on success qdio_free() for @cdev. + */ +int qdio_cleanup(struct ccw_device *cdev, int how) +{ + struct qdio_irq *irq_ptr; + char dbf_text[15]; + int rc; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + sprintf(dbf_text, "qcln%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + rc = qdio_shutdown(cdev, how); + if (rc == 0) + rc = qdio_free(cdev); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_cleanup); + +static void qdio_shutdown_queues(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct qdio_q *q; + int i; + + for_each_input_queue(irq_ptr, q, i) + tasklet_disable(&q->tasklet); + + for_each_output_queue(irq_ptr, q, i) { + tasklet_disable(&q->tasklet); + del_timer(&q->u.out.timer); + } +} + +/** + * qdio_shutdown - shut down a qdio subchannel + * @cdev: associated ccw device + * @how: use halt or clear to shutdown + */ +int qdio_shutdown(struct ccw_device *cdev, int how) +{ + struct qdio_irq *irq_ptr; + int rc; + unsigned long flags; + char dbf_text[15]; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + mutex_lock(&irq_ptr->setup_mutex); + /* + * Subchannel was already shot down. We cannot prevent being called + * twice since cio may trigger a shutdown asynchronously. + */ + if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { + mutex_unlock(&irq_ptr->setup_mutex); + return 0; + } + + sprintf(dbf_text, "qsqs%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + tiqdio_remove_input_queues(irq_ptr); + qdio_shutdown_queues(cdev); + qdio_shutdown_debug_entries(irq_ptr, cdev); + + /* cleanup subchannel */ + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + + if (how & QDIO_FLAG_CLEANUP_USING_CLEAR) + rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); + else + /* default behaviour is halt */ + rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP); + if (rc) { + sprintf(dbf_text, "sher%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + sprintf(dbf_text, "rc=%d", rc); + QDIO_DBF_TEXT0(0, setup, dbf_text); + goto no_cleanup; + } + + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + wait_event_interruptible_timeout(cdev->private->wait_q, + irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || + irq_ptr->state == QDIO_IRQ_STATE_ERR, + 10 * HZ); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + +no_cleanup: + qdio_shutdown_thinint(irq_ptr); + + /* restore interrupt handler */ + if ((void *)cdev->handler == (void *)qdio_int_handler) + cdev->handler = irq_ptr->orig_handler; + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); + mutex_unlock(&irq_ptr->setup_mutex); + module_put(THIS_MODULE); + if (rc) + return rc; + return 0; +} +EXPORT_SYMBOL_GPL(qdio_shutdown); + +/** + * qdio_free - free data structures for a qdio subchannel + * @cdev: associated ccw device + */ +int qdio_free(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr; + char dbf_text[15]; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + mutex_lock(&irq_ptr->setup_mutex); + + sprintf(dbf_text, "qfqs%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + cdev->private->qdio_data = NULL; + mutex_unlock(&irq_ptr->setup_mutex); + + qdio_release_memory(irq_ptr); + return 0; +} +EXPORT_SYMBOL_GPL(qdio_free); + +/** + * qdio_initialize - allocate and establish queues for a qdio subchannel + * @init_data: initialization data + * + * This function first allocates queues via qdio_allocate() and on success + * establishes them via qdio_establish(). + */ +int qdio_initialize(struct qdio_initialize *init_data) +{ + int rc; + char dbf_text[15]; + + sprintf(dbf_text, "qini%4x", init_data->cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + rc = qdio_allocate(init_data); + if (rc) + return rc; + + rc = qdio_establish(init_data); + if (rc) + qdio_free(init_data->cdev); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_initialize); + +/** + * qdio_allocate - allocate qdio queues and associated data + * @init_data: initialization data + */ +int qdio_allocate(struct qdio_initialize *init_data) +{ + struct qdio_irq *irq_ptr; + char dbf_text[15]; + + sprintf(dbf_text, "qalc%4x", init_data->cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + if ((init_data->no_input_qs && !init_data->input_handler) || + (init_data->no_output_qs && !init_data->output_handler)) + return -EINVAL; + + if ((init_data->no_input_qs > QDIO_MAX_QUEUES_PER_IRQ) || + (init_data->no_output_qs > QDIO_MAX_QUEUES_PER_IRQ)) + return -EINVAL; + + if ((!init_data->input_sbal_addr_array) || + (!init_data->output_sbal_addr_array)) + return -EINVAL; + + qdio_allocate_do_dbf(init_data); + + /* irq_ptr must be in GFP_DMA since it contains ccw1.cda */ + irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!irq_ptr) + goto out_err; + QDIO_DBF_TEXT0(0, setup, "irq_ptr:"); + QDIO_DBF_HEX0(0, setup, &irq_ptr, sizeof(void *)); + + mutex_init(&irq_ptr->setup_mutex); + + /* + * Allocate a page for the chsc calls in qdio_establish. + * Must be pre-allocated since a zfcp recovery will call + * qdio_establish. In case of low memory and swap on a zfcp disk + * we may not be able to allocate memory otherwise. + */ + irq_ptr->chsc_page = get_zeroed_page(GFP_KERNEL); + if (!irq_ptr->chsc_page) + goto out_rel; + + /* qdr is used in ccw1.cda which is u32 */ + irq_ptr->qdr = kzalloc(sizeof(struct qdr), GFP_KERNEL | GFP_DMA); + if (!irq_ptr->qdr) + goto out_rel; + WARN_ON((unsigned long)irq_ptr->qdr & 0xfff); + + QDIO_DBF_TEXT0(0, setup, "qdr:"); + QDIO_DBF_HEX0(0, setup, &irq_ptr->qdr, sizeof(void *)); + + if (qdio_allocate_qs(irq_ptr, init_data->no_input_qs, + init_data->no_output_qs)) + goto out_rel; + + init_data->cdev->private->qdio_data = irq_ptr; + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); + return 0; +out_rel: + qdio_release_memory(irq_ptr); +out_err: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(qdio_allocate); + +/** + * qdio_establish - establish queues on a qdio subchannel + * @init_data: initialization data + */ +int qdio_establish(struct qdio_initialize *init_data) +{ + char dbf_text[20]; + struct qdio_irq *irq_ptr; + struct ccw_device *cdev = init_data->cdev; + unsigned long saveflags; + int rc; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + if (cdev->private->state != DEV_STATE_ONLINE) + return -EINVAL; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + sprintf(dbf_text, "qest%4x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_TEXT0(0, trace, dbf_text); + + mutex_lock(&irq_ptr->setup_mutex); + qdio_setup_irq(init_data); + + rc = qdio_establish_thinint(irq_ptr); + if (rc) { + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return rc; + } + + /* establish q */ + irq_ptr->ccw.cmd_code = irq_ptr->equeue.cmd; + irq_ptr->ccw.flags = CCW_FLAG_SLI; + irq_ptr->ccw.count = irq_ptr->equeue.count; + irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr); + + spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + ccw_device_set_options_mask(cdev, 0); + + rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); + if (rc) { + sprintf(dbf_text, "eq:io%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + sprintf(dbf_text, "eq:rc%4x", rc); + QDIO_DBF_TEXT2(1, setup, dbf_text); + } + spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + + if (rc) { + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return rc; + } + + wait_event_interruptible_timeout(cdev->private->wait_q, + irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED || + irq_ptr->state == QDIO_IRQ_STATE_ERR, HZ); + + if (irq_ptr->state != QDIO_IRQ_STATE_ESTABLISHED) { + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return -EIO; + } + + qdio_setup_ssqd_info(irq_ptr); + sprintf(dbf_text, "qib ac%2x", irq_ptr->qib.ac); + QDIO_DBF_TEXT2(0, setup, dbf_text); + + /* qebsm is now setup if available, initialize buffer states */ + qdio_init_buf_states(irq_ptr); + + mutex_unlock(&irq_ptr->setup_mutex); + qdio_print_subchannel_info(irq_ptr, cdev); + qdio_setup_debug_entries(irq_ptr, cdev); + return 0; +} +EXPORT_SYMBOL_GPL(qdio_establish); + +/** + * qdio_activate - activate queues on a qdio subchannel + * @cdev: associated cdev + */ +int qdio_activate(struct ccw_device *cdev) +{ + struct qdio_irq *irq_ptr; + int rc; + unsigned long saveflags; + char dbf_text[20]; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + + if (cdev->private->state != DEV_STATE_ONLINE) + return -EINVAL; + + mutex_lock(&irq_ptr->setup_mutex); + if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { + rc = -EBUSY; + goto out; + } + + sprintf(dbf_text, "qact%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(0, setup, dbf_text); + QDIO_DBF_TEXT2(0, trace, dbf_text); + + irq_ptr->ccw.cmd_code = irq_ptr->aqueue.cmd; + irq_ptr->ccw.flags = CCW_FLAG_SLI; + irq_ptr->ccw.count = irq_ptr->aqueue.count; + irq_ptr->ccw.cda = 0; + + spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); + + rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, + 0, DOIO_DENY_PREFETCH); + if (rc) { + sprintf(dbf_text, "aq:io%4x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(1, setup, dbf_text); + sprintf(dbf_text, "aq:rc%4x", rc); + QDIO_DBF_TEXT2(1, setup, dbf_text); + } + spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + + if (rc) + goto out; + + if (is_thinint_irq(irq_ptr)) + tiqdio_add_input_queues(irq_ptr); + + /* wait for subchannel to become active */ + msleep(5); + + switch (irq_ptr->state) { + case QDIO_IRQ_STATE_STOPPED: + case QDIO_IRQ_STATE_ERR: + mutex_unlock(&irq_ptr->setup_mutex); + qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); + return -EIO; + default: + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ACTIVE); + rc = 0; + } +out: + mutex_unlock(&irq_ptr->setup_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_activate); + +static inline int buf_in_between(int bufnr, int start, int count) +{ + int end = add_buf(start, count); + + if (end > start) { + if (bufnr >= start && bufnr < end) + return 1; + else + return 0; + } + + /* wrap-around case */ + if ((bufnr >= start && bufnr <= QDIO_MAX_BUFFERS_PER_Q) || + (bufnr < end)) + return 1; + else + return 0; +} + +/** + * handle_inbound - reset processed input buffers + * @q: queue containing the buffers + * @callflags: flags + * @bufnr: first buffer to process + * @count: how many buffers are emptied + */ +static void handle_inbound(struct qdio_q *q, unsigned int callflags, + int bufnr, int count) +{ + unsigned long flags; + int used, rc; + + /* + * do_QDIO could run in parallel with the queue tasklet so the + * upper-layer programm could empty the ACK'ed buffer here. + * If that happens we must clear the polling flag, otherwise + * qdio_stop_polling() could set the buffer to NOT_INIT after + * it was set to EMPTY which would kill us. + */ + spin_lock_irqsave(&q->u.in.lock, flags); + if (q->u.in.polling) + if (buf_in_between(q->last_move_ftc, bufnr, count)) + q->u.in.polling = 0; + + count = set_buf_states(q, bufnr, SLSB_CU_INPUT_EMPTY, count); + spin_unlock_irqrestore(&q->u.in.lock, flags); + + used = atomic_add_return(count, &q->nr_buf_used) - count; + BUG_ON(used + count > QDIO_MAX_BUFFERS_PER_Q); + + /* no need to signal as long as the adapter had free buffers */ + if (used) + return; + + if (need_siga_in(q)) { + rc = qdio_siga_input(q); + if (rc) + q->qdio_error = rc; + } +} + +/** + * handle_outbound - process filled outbound buffers + * @q: queue containing the buffers + * @callflags: flags + * @bufnr: first buffer to process + * @count: how many buffers are filled + */ +static void handle_outbound(struct qdio_q *q, unsigned int callflags, + int bufnr, int count) +{ + unsigned char state; + int used; + + qdio_perf_stat_inc(&perf_stats.outbound_handler); + + count = set_buf_states(q, bufnr, SLSB_CU_OUTPUT_PRIMED, count); + used = atomic_add_return(count, &q->nr_buf_used); + BUG_ON(used > QDIO_MAX_BUFFERS_PER_Q); + + if (callflags & QDIO_FLAG_PCI_OUT) + q->u.out.pci_out_enabled = 1; + else + q->u.out.pci_out_enabled = 0; + + if (queue_type(q) == QDIO_IQDIO_QFMT) { + if (multicast_outbound(q)) + qdio_kick_outbound_q(q); + else + /* + * One siga-w per buffer required for unicast + * HiperSockets. + */ + while (count--) + qdio_kick_outbound_q(q); + goto out; + } + + if (need_siga_sync(q)) { + qdio_siga_sync_q(q); + goto out; + } + + /* try to fast requeue buffers */ + get_buf_state(q, prev_buf(bufnr), &state); + if (state != SLSB_CU_OUTPUT_PRIMED) + qdio_kick_outbound_q(q); + else { + QDIO_DBF_TEXT5(0, trace, "fast-req"); + qdio_perf_stat_inc(&perf_stats.fast_requeue); + } +out: + /* Fixme: could wait forever if called from process context */ + tasklet_schedule(&q->tasklet); +} + +/** + * do_QDIO - process input or output buffers + * @cdev: associated ccw_device for the qdio subchannel + * @callflags: input or output and special flags from the program + * @q_nr: queue number + * @bufnr: buffer number + * @count: how many buffers to process + */ +int do_QDIO(struct ccw_device *cdev, unsigned int callflags, + int q_nr, int bufnr, int count) +{ + struct qdio_irq *irq_ptr; +#ifdef CONFIG_QDIO_DEBUG + char dbf_text[20]; + + sprintf(dbf_text, "doQD%04x", cdev->private->schid.sch_no); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if ((bufnr > QDIO_MAX_BUFFERS_PER_Q) || + (count > QDIO_MAX_BUFFERS_PER_Q) || + (q_nr > QDIO_MAX_QUEUES_PER_IRQ)) + return -EINVAL; + + if (!count) + return 0; + + irq_ptr = cdev->private->qdio_data; + if (!irq_ptr) + return -ENODEV; + +#ifdef CONFIG_QDIO_DEBUG + if (callflags & QDIO_FLAG_SYNC_INPUT) + QDIO_DBF_HEX3(0, trace, &irq_ptr->input_qs[q_nr], + sizeof(void *)); + else + QDIO_DBF_HEX3(0, trace, &irq_ptr->output_qs[q_nr], + sizeof(void *)); + + sprintf(dbf_text, "flag%04x", callflags); + QDIO_DBF_TEXT3(0, trace, dbf_text); + sprintf(dbf_text, "qi%02xct%02x", bufnr, count); + QDIO_DBF_TEXT3(0, trace, dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + + if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE) + return -EBUSY; + + if (callflags & QDIO_FLAG_SYNC_INPUT) + handle_inbound(irq_ptr->input_qs[q_nr], + callflags, bufnr, count); + else if (callflags & QDIO_FLAG_SYNC_OUTPUT) + handle_outbound(irq_ptr->output_qs[q_nr], + callflags, bufnr, count); + else { + QDIO_DBF_TEXT3(1, trace, "doQD:inv"); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(do_QDIO); + +static int __init init_QDIO(void) +{ + int rc; + + rc = qdio_setup_init(); + if (rc) + return rc; + rc = tiqdio_allocate_memory(); + if (rc) + goto out_cache; + rc = qdio_debug_init(); + if (rc) + goto out_ti; + rc = qdio_setup_perf_stats(); + if (rc) + goto out_debug; + rc = tiqdio_register_thinints(); + if (rc) + goto out_perf; + return 0; + +out_perf: + qdio_remove_perf_stats(); +out_debug: + qdio_debug_exit(); +out_ti: + tiqdio_free_memory(); +out_cache: + qdio_setup_exit(); + return rc; +} + +static void __exit exit_QDIO(void) +{ + tiqdio_unregister_thinints(); + tiqdio_free_memory(); + qdio_remove_perf_stats(); + qdio_debug_exit(); + qdio_setup_exit(); +} + +module_init(init_QDIO); +module_exit(exit_QDIO); diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c new file mode 100644 index 000000000000..ea01b85b1cc9 --- /dev/null +++ b/drivers/s390/cio/qdio_perf.c @@ -0,0 +1,151 @@ +/* + * drivers/s390/cio/qdio_perf.c + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#include +#include +#include +#include + +#include "cio.h" +#include "css.h" +#include "device.h" +#include "ioasm.h" +#include "chsc.h" +#include "qdio_debug.h" +#include "qdio_perf.h" + +int qdio_performance_stats; +struct qdio_perf_stats perf_stats; + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *qdio_perf_pde; +#endif + +inline void qdio_perf_stat_inc(atomic_long_t *count) +{ + if (qdio_performance_stats) + atomic_long_inc(count); +} + +inline void qdio_perf_stat_dec(atomic_long_t *count) +{ + if (qdio_performance_stats) + atomic_long_dec(count); +} + +/* + * procfs functions + */ +static int qdio_perf_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "Number of qdio interrupts\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.qdio_int)); + seq_printf(m, "Number of PCI interrupts\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.pci_int)); + seq_printf(m, "Number of adapter interrupts\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.thin_int)); + seq_printf(m, "\n"); + seq_printf(m, "Inbound tasklet runs\t\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.tasklet_inbound)); + seq_printf(m, "Outbound tasklet runs\t\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.tasklet_outbound)); + seq_printf(m, "Adapter interrupt tasklet runs/loops\t\t: %li/%li\n", + (long)atomic_long_read(&perf_stats.tasklet_thinint), + (long)atomic_long_read(&perf_stats.tasklet_thinint_loop)); + seq_printf(m, "Adapter interrupt inbound tasklet runs/loops\t: %li/%li\n", + (long)atomic_long_read(&perf_stats.thinint_inbound), + (long)atomic_long_read(&perf_stats.thinint_inbound_loop)); + seq_printf(m, "\n"); + seq_printf(m, "Number of SIGA In issued\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.siga_in)); + seq_printf(m, "Number of SIGA Out issued\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.siga_out)); + seq_printf(m, "Number of SIGA Sync issued\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.siga_sync)); + seq_printf(m, "\n"); + seq_printf(m, "Number of inbound transfers\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.inbound_handler)); + seq_printf(m, "Number of outbound transfers\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.outbound_handler)); + seq_printf(m, "\n"); + seq_printf(m, "Number of fast requeues (outg. SBAL w/o SIGA)\t: %li\n", + (long)atomic_long_read(&perf_stats.fast_requeue)); + seq_printf(m, "Number of outbound tasklet mod_timer calls\t: %li\n", + (long)atomic_long_read(&perf_stats.debug_tl_out_timer)); + seq_printf(m, "Number of stop polling calls\t\t\t: %li\n", + (long)atomic_long_read(&perf_stats.debug_stop_polling)); + seq_printf(m, "AI inbound tasklet loops after stop polling\t: %li\n", + (long)atomic_long_read(&perf_stats.thinint_inbound_loop2)); + seq_printf(m, "\n"); + return 0; +} +static int qdio_perf_seq_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, qdio_perf_proc_show, NULL); +} + +static struct file_operations qdio_perf_proc_fops = { + .owner = THIS_MODULE, + .open = qdio_perf_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * sysfs functions + */ +static ssize_t qdio_perf_stats_show(struct bus_type *bus, char *buf) +{ + return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0); +} + +static ssize_t qdio_perf_stats_store(struct bus_type *bus, + const char *buf, size_t count) +{ + unsigned long i; + + if (strict_strtoul(buf, 16, &i) != 0) + return -EINVAL; + if ((i != 0) && (i != 1)) + return -EINVAL; + if (i == qdio_performance_stats) + return count; + + qdio_performance_stats = i; + /* reset performance statistics */ + if (i == 0) + memset(&perf_stats, 0, sizeof(struct qdio_perf_stats)); + return count; +} + +static BUS_ATTR(qdio_performance_stats, 0644, qdio_perf_stats_show, + qdio_perf_stats_store); + +int __init qdio_setup_perf_stats(void) +{ + int rc; + + rc = bus_create_file(&ccw_bus_type, &bus_attr_qdio_performance_stats); + if (rc) + return rc; + +#ifdef CONFIG_PROC_FS + memset(&perf_stats, 0, sizeof(struct qdio_perf_stats)); + qdio_perf_pde = proc_create("qdio_perf", S_IFREG | S_IRUGO, + NULL, &qdio_perf_proc_fops); +#endif + return 0; +} + +void __exit qdio_remove_perf_stats(void) +{ +#ifdef CONFIG_PROC_FS + remove_proc_entry("qdio_perf", NULL); +#endif + bus_remove_file(&ccw_bus_type, &bus_attr_qdio_performance_stats); +} diff --git a/drivers/s390/cio/qdio_perf.h b/drivers/s390/cio/qdio_perf.h new file mode 100644 index 000000000000..5c406a8b7387 --- /dev/null +++ b/drivers/s390/cio/qdio_perf.h @@ -0,0 +1,54 @@ +/* + * drivers/s390/cio/qdio_perf.h + * + * Copyright IBM Corp. 2008 + * + * Author: Jan Glauber (jang@linux.vnet.ibm.com) + */ +#ifndef QDIO_PERF_H +#define QDIO_PERF_H + +#include +#include +#include + +struct qdio_perf_stats { + /* interrupt handler calls */ + atomic_long_t qdio_int; + atomic_long_t pci_int; + atomic_long_t thin_int; + + /* tasklet runs */ + atomic_long_t tasklet_inbound; + atomic_long_t tasklet_outbound; + atomic_long_t tasklet_thinint; + atomic_long_t tasklet_thinint_loop; + atomic_long_t thinint_inbound; + atomic_long_t thinint_inbound_loop; + atomic_long_t thinint_inbound_loop2; + + /* signal adapter calls */ + atomic_long_t siga_out; + atomic_long_t siga_in; + atomic_long_t siga_sync; + + /* misc */ + atomic_long_t inbound_handler; + atomic_long_t outbound_handler; + atomic_long_t fast_requeue; + + /* for debugging */ + atomic_long_t debug_tl_out_timer; + atomic_long_t debug_stop_polling; +}; + +extern struct qdio_perf_stats perf_stats; +extern int qdio_performance_stats; + +int qdio_setup_perf_stats(void); +void qdio_remove_perf_stats(void); + +extern void qdio_perf_stat_inc(atomic_long_t *count); +extern void qdio_perf_stat_dec(atomic_long_t *count); + +#endif diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c new file mode 100644 index 000000000000..f0923a8aceda --- /dev/null +++ b/drivers/s390/cio/qdio_setup.c @@ -0,0 +1,521 @@ +/* + * driver/s390/cio/qdio_setup.c + * + * qdio queue initialization + * + * Copyright (C) IBM Corp. 2008 + * Author(s): Jan Glauber + */ +#include +#include +#include + +#include "cio.h" +#include "css.h" +#include "device.h" +#include "ioasm.h" +#include "chsc.h" +#include "qdio.h" +#include "qdio_debug.h" + +static struct kmem_cache *qdio_q_cache; + +/* + * qebsm is only available under 64bit but the adapter sets the feature + * flag anyway, so we manually override it. + */ +static inline int qebsm_possible(void) +{ +#ifdef CONFIG_64BIT + return css_general_characteristics.qebsm; +#endif + return 0; +} + +/* + * qib_param_field: pointer to 128 bytes or NULL, if no param field + * nr_input_qs: pointer to nr_queues*128 words of data or NULL + */ +static void set_impl_params(struct qdio_irq *irq_ptr, + unsigned int qib_param_field_format, + unsigned char *qib_param_field, + unsigned long *input_slib_elements, + unsigned long *output_slib_elements) +{ + struct qdio_q *q; + int i, j; + + if (!irq_ptr) + return; + + WARN_ON((unsigned long)&irq_ptr->qib & 0xff); + irq_ptr->qib.pfmt = qib_param_field_format; + if (qib_param_field) + memcpy(irq_ptr->qib.parm, qib_param_field, + QDIO_MAX_BUFFERS_PER_Q); + + if (!input_slib_elements) + goto output; + + for_each_input_queue(irq_ptr, q, i) { + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + q->slib->slibe[j].parms = + input_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j]; + } +output: + if (!output_slib_elements) + return; + + for_each_output_queue(irq_ptr, q, i) { + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + q->slib->slibe[j].parms = + output_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j]; + } +} + +static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) +{ + struct qdio_q *q; + int i; + + for (i = 0; i < nr_queues; i++) { + q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); + if (!q) + return -ENOMEM; + WARN_ON((unsigned long)q & 0xff); + + q->slib = (struct slib *) __get_free_page(GFP_KERNEL); + if (!q->slib) { + kmem_cache_free(qdio_q_cache, q); + return -ENOMEM; + } + WARN_ON((unsigned long)q->slib & 0x7ff); + irq_ptr_qs[i] = q; + } + return 0; +} + +int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, int nr_output_qs) +{ + int rc; + + rc = __qdio_allocate_qs(irq_ptr->input_qs, nr_input_qs); + if (rc) + return rc; + rc = __qdio_allocate_qs(irq_ptr->output_qs, nr_output_qs); + return rc; +} + +static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr, + qdio_handler_t *handler, int i) +{ + /* must be cleared by every qdio_establish */ + memset(q, 0, ((char *)&q->slib) - ((char *)q)); + memset(q->slib, 0, PAGE_SIZE); + + q->irq_ptr = irq_ptr; + q->mask = 1 << (31 - i); + q->nr = i; + q->handler = handler; +} + +static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, + void **sbals_array, char *dbf_text, int i) +{ + struct qdio_q *prev; + int j; + + QDIO_DBF_TEXT0(0, setup, dbf_text); + QDIO_DBF_HEX0(0, setup, &q, sizeof(void *)); + + q->sl = (struct sl *)((char *)q->slib + PAGE_SIZE / 2); + + /* fill in sbal */ + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) { + q->sbal[j] = *sbals_array++; + WARN_ON((unsigned long)q->sbal[j] & 0xff); + } + + /* fill in slib */ + if (i > 0) { + prev = (q->is_input_q) ? irq_ptr->input_qs[i - 1] + : irq_ptr->output_qs[i - 1]; + prev->slib->nsliba = (unsigned long)q->slib; + } + + q->slib->sla = (unsigned long)q->sl; + q->slib->slsba = (unsigned long)&q->slsb.val[0]; + + /* fill in sl */ + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + q->sl->element[j].sbal = (unsigned long)q->sbal[j]; + + QDIO_DBF_TEXT2(0, setup, "sl-sb-b0"); + QDIO_DBF_HEX2(0, setup, q->sl, sizeof(void *)); + QDIO_DBF_HEX2(0, setup, &q->slsb, sizeof(void *)); + QDIO_DBF_HEX2(0, setup, q->sbal, sizeof(void *)); +} + +static void setup_queues(struct qdio_irq *irq_ptr, + struct qdio_initialize *qdio_init) +{ + char dbf_text[20]; + struct qdio_q *q; + void **input_sbal_array = qdio_init->input_sbal_addr_array; + void **output_sbal_array = qdio_init->output_sbal_addr_array; + int i; + + sprintf(dbf_text, "qfqs%4x", qdio_init->cdev->private->schid.sch_no); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + for_each_input_queue(irq_ptr, q, i) { + sprintf(dbf_text, "in-q%4x", i); + setup_queues_misc(q, irq_ptr, qdio_init->input_handler, i); + + q->is_input_q = 1; + spin_lock_init(&q->u.in.lock); + setup_storage_lists(q, irq_ptr, input_sbal_array, dbf_text, i); + input_sbal_array += QDIO_MAX_BUFFERS_PER_Q; + + if (is_thinint_irq(irq_ptr)) + tasklet_init(&q->tasklet, tiqdio_inbound_processing, + (unsigned long) q); + else + tasklet_init(&q->tasklet, qdio_inbound_processing, + (unsigned long) q); + } + + for_each_output_queue(irq_ptr, q, i) { + sprintf(dbf_text, "outq%4x", i); + setup_queues_misc(q, irq_ptr, qdio_init->output_handler, i); + + q->is_input_q = 0; + setup_storage_lists(q, irq_ptr, output_sbal_array, + dbf_text, i); + output_sbal_array += QDIO_MAX_BUFFERS_PER_Q; + + tasklet_init(&q->tasklet, qdio_outbound_processing, + (unsigned long) q); + setup_timer(&q->u.out.timer, (void(*)(unsigned long)) + &qdio_outbound_timer, (unsigned long)q); + } +} + +static void process_ac_flags(struct qdio_irq *irq_ptr, unsigned char qdioac) +{ + if (qdioac & AC1_SIGA_INPUT_NEEDED) + irq_ptr->siga_flag.input = 1; + if (qdioac & AC1_SIGA_OUTPUT_NEEDED) + irq_ptr->siga_flag.output = 1; + if (qdioac & AC1_SIGA_SYNC_NEEDED) + irq_ptr->siga_flag.sync = 1; + if (qdioac & AC1_AUTOMATIC_SYNC_ON_THININT) + irq_ptr->siga_flag.no_sync_ti = 1; + if (qdioac & AC1_AUTOMATIC_SYNC_ON_OUT_PCI) + irq_ptr->siga_flag.no_sync_out_pci = 1; + + if (irq_ptr->siga_flag.no_sync_out_pci && + irq_ptr->siga_flag.no_sync_ti) + irq_ptr->siga_flag.no_sync_out_ti = 1; +} + +static void check_and_setup_qebsm(struct qdio_irq *irq_ptr, + unsigned char qdioac, unsigned long token) +{ + char dbf_text[15]; + + if (!(irq_ptr->qib.rflags & QIB_RFLAGS_ENABLE_QEBSM)) + goto no_qebsm; + if (!(qdioac & AC1_SC_QEBSM_AVAILABLE) || + (!(qdioac & AC1_SC_QEBSM_ENABLED))) + goto no_qebsm; + + irq_ptr->sch_token = token; + + QDIO_DBF_TEXT0(0, setup, "V=V:1"); + sprintf(dbf_text, "%8lx", irq_ptr->sch_token); + QDIO_DBF_TEXT0(0, setup, dbf_text); + return; + +no_qebsm: + irq_ptr->sch_token = 0; + irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM; + QDIO_DBF_TEXT0(0, setup, "noV=V"); +} + +static int __get_ssqd_info(struct qdio_irq *irq_ptr) +{ + struct chsc_ssqd_area *ssqd; + int rc; + + QDIO_DBF_TEXT0(0, setup, "getssqd"); + ssqd = (struct chsc_ssqd_area *)irq_ptr->chsc_page; + memset(ssqd, 0, PAGE_SIZE); + + ssqd->request = (struct chsc_header) { + .length = 0x0010, + .code = 0x0024, + }; + ssqd->first_sch = irq_ptr->schid.sch_no; + ssqd->last_sch = irq_ptr->schid.sch_no; + ssqd->ssid = irq_ptr->schid.ssid; + + if (chsc(ssqd)) + return -EIO; + rc = chsc_error_from_response(ssqd->response.code); + if (rc) + return rc; + + if (!(ssqd->qdio_ssqd.flags & CHSC_FLAG_QDIO_CAPABILITY) || + !(ssqd->qdio_ssqd.flags & CHSC_FLAG_VALIDITY) || + (ssqd->qdio_ssqd.sch != irq_ptr->schid.sch_no)) + return -EINVAL; + + memcpy(&irq_ptr->ssqd_desc, &ssqd->qdio_ssqd, + sizeof(struct qdio_ssqd_desc)); + return 0; +} + +void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr) +{ + unsigned char qdioac; + char dbf_text[15]; + int rc; + + rc = __get_ssqd_info(irq_ptr); + if (rc) { + QDIO_DBF_TEXT2(0, setup, "ssqdasig"); + sprintf(dbf_text, "schno%x", irq_ptr->schid.sch_no); + QDIO_DBF_TEXT2(0, setup, dbf_text); + sprintf(dbf_text, "rc:%d", rc); + QDIO_DBF_TEXT2(0, setup, dbf_text); + /* all flags set, worst case */ + qdioac = AC1_SIGA_INPUT_NEEDED | AC1_SIGA_OUTPUT_NEEDED | + AC1_SIGA_SYNC_NEEDED; + } else + qdioac = irq_ptr->ssqd_desc.qdioac1; + + check_and_setup_qebsm(irq_ptr, qdioac, irq_ptr->ssqd_desc.sch_token); + process_ac_flags(irq_ptr, qdioac); + + sprintf(dbf_text, "qdioac%2x", qdioac); + QDIO_DBF_TEXT2(0, setup, dbf_text); +} + +void qdio_release_memory(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + /* + * Must check queue array manually since irq_ptr->nr_input_queues / + * irq_ptr->nr_input_queues may not yet be set. + */ + for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) { + q = irq_ptr->input_qs[i]; + if (q) { + free_page((unsigned long) q->slib); + kmem_cache_free(qdio_q_cache, q); + } + } + for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) { + q = irq_ptr->output_qs[i]; + if (q) { + free_page((unsigned long) q->slib); + kmem_cache_free(qdio_q_cache, q); + } + } + kfree(irq_ptr->qdr); + free_page(irq_ptr->chsc_page); + free_page((unsigned long) irq_ptr); +} + +static void __qdio_allocate_fill_qdr(struct qdio_irq *irq_ptr, + struct qdio_q **irq_ptr_qs, + int i, int nr) +{ + irq_ptr->qdr->qdf0[i + nr].sliba = + (unsigned long)irq_ptr_qs[i]->slib; + + irq_ptr->qdr->qdf0[i + nr].sla = + (unsigned long)irq_ptr_qs[i]->sl; + + irq_ptr->qdr->qdf0[i + nr].slsba = + (unsigned long)&irq_ptr_qs[i]->slsb.val[0]; + + irq_ptr->qdr->qdf0[i + nr].akey = PAGE_DEFAULT_KEY; + irq_ptr->qdr->qdf0[i + nr].bkey = PAGE_DEFAULT_KEY; + irq_ptr->qdr->qdf0[i + nr].ckey = PAGE_DEFAULT_KEY; + irq_ptr->qdr->qdf0[i + nr].dkey = PAGE_DEFAULT_KEY; +} + +static void setup_qdr(struct qdio_irq *irq_ptr, + struct qdio_initialize *qdio_init) +{ + int i; + + irq_ptr->qdr->qfmt = qdio_init->q_format; + irq_ptr->qdr->iqdcnt = qdio_init->no_input_qs; + irq_ptr->qdr->oqdcnt = qdio_init->no_output_qs; + irq_ptr->qdr->iqdsz = sizeof(struct qdesfmt0) / 4; /* size in words */ + irq_ptr->qdr->oqdsz = sizeof(struct qdesfmt0) / 4; + irq_ptr->qdr->qiba = (unsigned long)&irq_ptr->qib; + irq_ptr->qdr->qkey = PAGE_DEFAULT_KEY; + + for (i = 0; i < qdio_init->no_input_qs; i++) + __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->input_qs, i, 0); + + for (i = 0; i < qdio_init->no_output_qs; i++) + __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->output_qs, i, + qdio_init->no_input_qs); +} + +static void setup_qib(struct qdio_irq *irq_ptr, + struct qdio_initialize *init_data) +{ + if (qebsm_possible()) + irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM; + + irq_ptr->qib.qfmt = init_data->q_format; + if (init_data->no_input_qs) + irq_ptr->qib.isliba = + (unsigned long)(irq_ptr->input_qs[0]->slib); + if (init_data->no_output_qs) + irq_ptr->qib.osliba = + (unsigned long)(irq_ptr->output_qs[0]->slib); + memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8); +} + +int qdio_setup_irq(struct qdio_initialize *init_data) +{ + struct ciw *ciw; + struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data; + int rc; + + memset(irq_ptr, 0, ((char *)&irq_ptr->qdr) - ((char *)irq_ptr)); + /* wipes qib.ac, required by ar7063 */ + memset(irq_ptr->qdr, 0, sizeof(struct qdr)); + + irq_ptr->int_parm = init_data->int_parm; + irq_ptr->nr_input_qs = init_data->no_input_qs; + irq_ptr->nr_output_qs = init_data->no_output_qs; + + irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev); + irq_ptr->cdev = init_data->cdev; + setup_queues(irq_ptr, init_data); + + setup_qib(irq_ptr, init_data); + qdio_setup_thinint(irq_ptr); + set_impl_params(irq_ptr, init_data->qib_param_field_format, + init_data->qib_param_field, + init_data->input_slib_elements, + init_data->output_slib_elements); + + /* fill input and output descriptors */ + setup_qdr(irq_ptr, init_data); + + /* qdr, qib, sls, slsbs, slibs, sbales are filled now */ + + /* get qdio commands */ + ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); + if (!ciw) { + QDIO_DBF_TEXT2(1, setup, "no eq"); + rc = -EINVAL; + goto out_err; + } + irq_ptr->equeue = *ciw; + + ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); + if (!ciw) { + QDIO_DBF_TEXT2(1, setup, "no aq"); + rc = -EINVAL; + goto out_err; + } + irq_ptr->aqueue = *ciw; + + /* set new interrupt handler */ + irq_ptr->orig_handler = init_data->cdev->handler; + init_data->cdev->handler = qdio_int_handler; + return 0; +out_err: + qdio_release_memory(irq_ptr); + return rc; +} + +void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, + struct ccw_device *cdev) +{ + char s[80]; + + sprintf(s, "%s ", cdev->dev.bus_id); + + switch (irq_ptr->qib.qfmt) { + case QDIO_QETH_QFMT: + sprintf(s + strlen(s), "OSADE "); + break; + case QDIO_ZFCP_QFMT: + sprintf(s + strlen(s), "ZFCP "); + break; + case QDIO_IQDIO_QFMT: + sprintf(s + strlen(s), "HiperSockets "); + break; + } + sprintf(s + strlen(s), "using: "); + + if (!is_thinint_irq(irq_ptr)) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "AdapterInterrupts "); + if (!(irq_ptr->sch_token != 0)) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "QEBSM "); + if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "OutboundPCI "); + if (!css_general_characteristics.aif_tdd) + sprintf(s + strlen(s), "no"); + sprintf(s + strlen(s), "TDD\n"); + printk(KERN_INFO "qdio: %s", s); + + memset(s, 0, sizeof(s)); + sprintf(s, "%s SIGA required: ", cdev->dev.bus_id); + if (irq_ptr->siga_flag.input) + sprintf(s + strlen(s), "Read "); + if (irq_ptr->siga_flag.output) + sprintf(s + strlen(s), "Write "); + if (irq_ptr->siga_flag.sync) + sprintf(s + strlen(s), "Sync "); + if (!irq_ptr->siga_flag.no_sync_ti) + sprintf(s + strlen(s), "SyncAI "); + if (!irq_ptr->siga_flag.no_sync_out_ti) + sprintf(s + strlen(s), "SyncOutAI "); + if (!irq_ptr->siga_flag.no_sync_out_pci) + sprintf(s + strlen(s), "SyncOutPCI"); + sprintf(s + strlen(s), "\n"); + printk(KERN_INFO "qdio: %s", s); +} + +int __init qdio_setup_init(void) +{ + char dbf_text[15]; + + qdio_q_cache = kmem_cache_create("qdio_q", sizeof(struct qdio_q), + 256, 0, NULL); + if (!qdio_q_cache) + return -ENOMEM; + + /* Check for OSA/FCP thin interrupts (bit 67). */ + sprintf(dbf_text, "thini%1x", + (css_general_characteristics.aif_osa) ? 1 : 0); + QDIO_DBF_TEXT0(0, setup, dbf_text); + + /* Check for QEBSM support in general (bit 58). */ + sprintf(dbf_text, "cssQBS:%1x", + (qebsm_possible()) ? 1 : 0); + QDIO_DBF_TEXT0(0, setup, dbf_text); + return 0; +} + +void __exit qdio_setup_exit(void) +{ + kmem_cache_destroy(qdio_q_cache); +} diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c new file mode 100644 index 000000000000..9291a771d812 --- /dev/null +++ b/drivers/s390/cio/qdio_thinint.c @@ -0,0 +1,380 @@ +/* + * linux/drivers/s390/cio/thinint_qdio.c + * + * thin interrupt support for qdio + * + * Copyright 2000-2008 IBM Corp. + * Author(s): Utz Bacher + * Cornelia Huck + * Jan Glauber + */ +#include +#include +#include +#include +#include +#include + +#include "cio.h" +#include "ioasm.h" +#include "qdio.h" +#include "qdio_debug.h" +#include "qdio_perf.h" + +/* + * Restriction: only 63 iqdio subchannels would have its own indicator, + * after that, subsequent subchannels share one indicator + */ +#define TIQDIO_NR_NONSHARED_IND 63 +#define TIQDIO_NR_INDICATORS (TIQDIO_NR_NONSHARED_IND + 1) +#define TIQDIO_SHARED_IND 63 + +/* list of thin interrupt input queues */ +static LIST_HEAD(tiq_list); + +/* adapter local summary indicator */ +static unsigned char *tiqdio_alsi; + +/* device state change indicators */ +struct indicator_t { + u32 ind; /* u32 because of compare-and-swap performance */ + atomic_t count; /* use count, 0 or 1 for non-shared indicators */ +}; +static struct indicator_t *q_indicators; + +static void tiqdio_tasklet_fn(unsigned long data); +static DECLARE_TASKLET(tiqdio_tasklet, tiqdio_tasklet_fn, 0); + +static int css_qdio_omit_svs; + +static inline unsigned long do_clear_global_summary(void) +{ + register unsigned long __fn asm("1") = 3; + register unsigned long __tmp asm("2"); + register unsigned long __time asm("3"); + + asm volatile( + " .insn rre,0xb2650000,2,0" + : "+d" (__fn), "=d" (__tmp), "=d" (__time)); + return __time; +} + +/* returns addr for the device state change indicator */ +static u32 *get_indicator(void) +{ + int i; + + for (i = 0; i < TIQDIO_NR_NONSHARED_IND; i++) + if (!atomic_read(&q_indicators[i].count)) { + atomic_set(&q_indicators[i].count, 1); + return &q_indicators[i].ind; + } + + /* use the shared indicator */ + atomic_inc(&q_indicators[TIQDIO_SHARED_IND].count); + return &q_indicators[TIQDIO_SHARED_IND].ind; +} + +static void put_indicator(u32 *addr) +{ + int i; + + if (!addr) + return; + i = ((unsigned long)addr - (unsigned long)q_indicators) / + sizeof(struct indicator_t); + atomic_dec(&q_indicators[i].count); +} + +void tiqdio_add_input_queues(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + /* No TDD facility? If we must use SIGA-s we can also omit SVS. */ + if (!css_qdio_omit_svs && irq_ptr->siga_flag.sync) + css_qdio_omit_svs = 1; + + for_each_input_queue(irq_ptr, q, i) { + list_add_rcu(&q->entry, &tiq_list); + synchronize_rcu(); + } + xchg(irq_ptr->dsci, 1); + tasklet_schedule(&tiqdio_tasklet); +} + +/* + * we cannot stop the tiqdio tasklet here since it is for all + * thinint qdio devices and it must run as long as there is a + * thinint device left + */ +void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr) +{ + struct qdio_q *q; + int i; + + for_each_input_queue(irq_ptr, q, i) { + list_del_rcu(&q->entry); + synchronize_rcu(); + } +} + +static inline int tiqdio_inbound_q_done(struct qdio_q *q) +{ + unsigned char state; + + if (!atomic_read(&q->nr_buf_used)) + return 1; + + qdio_siga_sync_q(q); + get_buf_state(q, q->first_to_check, &state); + + if (state == SLSB_P_INPUT_PRIMED) + /* more work coming */ + return 0; + return 1; +} + +static inline int shared_ind(struct qdio_irq *irq_ptr) +{ + return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; +} + +static void __tiqdio_inbound_processing(struct qdio_q *q) +{ + qdio_perf_stat_inc(&perf_stats.thinint_inbound); + qdio_sync_after_thinint(q); + + /* + * Maybe we have work on our outbound queues... at least + * we have to check the PCI capable queues. + */ + qdio_check_outbound_after_thinint(q); + +again: + if (!qdio_inbound_q_moved(q)) + return; + + qdio_kick_inbound_handler(q); + + if (!tiqdio_inbound_q_done(q)) { + qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop); + goto again; + } + + qdio_stop_polling(q); + /* + * We need to check again to not lose initiative after + * resetting the ACK state. + */ + if (!tiqdio_inbound_q_done(q)) { + qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop2); + goto again; + } +} + +void tiqdio_inbound_processing(unsigned long data) +{ + struct qdio_q *q = (struct qdio_q *)data; + + __tiqdio_inbound_processing(q); +} + +/* check for work on all inbound thinint queues */ +static void tiqdio_tasklet_fn(unsigned long data) +{ + struct qdio_q *q; + + qdio_perf_stat_inc(&perf_stats.tasklet_thinint); +again: + + /* protect tiq_list entries, only changed in activate or shutdown */ + rcu_read_lock(); + + list_for_each_entry_rcu(q, &tiq_list, entry) + /* only process queues from changed sets */ + if (*q->irq_ptr->dsci) { + + /* only clear it if the indicator is non-shared */ + if (!shared_ind(q->irq_ptr)) + xchg(q->irq_ptr->dsci, 0); + /* + * don't call inbound processing directly since + * that could starve other thinint queues + */ + tasklet_schedule(&q->tasklet); + } + + rcu_read_unlock(); + + /* + * if we used the shared indicator clear it now after all queues + * were processed + */ + if (atomic_read(&q_indicators[TIQDIO_SHARED_IND].count)) { + xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 0); + + /* prevent racing */ + if (*tiqdio_alsi) + xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 1); + } + + /* check for more work */ + if (*tiqdio_alsi) { + xchg(tiqdio_alsi, 0); + qdio_perf_stat_inc(&perf_stats.tasklet_thinint_loop); + goto again; + } +} + +/** + * tiqdio_thinint_handler - thin interrupt handler for qdio + * @ind: pointer to adapter local summary indicator + * @drv_data: NULL + */ +static void tiqdio_thinint_handler(void *ind, void *drv_data) +{ + qdio_perf_stat_inc(&perf_stats.thin_int); + + /* + * SVS only when needed: issue SVS to benefit from iqdio interrupt + * avoidance (SVS clears adapter interrupt suppression overwrite) + */ + if (!css_qdio_omit_svs) + do_clear_global_summary(); + + /* + * reset local summary indicator (tiqdio_alsi) to stop adapter + * interrupts for now, the tasklet will clean all dsci's + */ + xchg((u8 *)ind, 0); + tasklet_hi_schedule(&tiqdio_tasklet); +} + +static int set_subchannel_ind(struct qdio_irq *irq_ptr, int reset) +{ + struct scssc_area *scssc_area; + char dbf_text[15]; + void *ptr; + int rc; + + scssc_area = (struct scssc_area *)irq_ptr->chsc_page; + memset(scssc_area, 0, PAGE_SIZE); + + if (reset) { + scssc_area->summary_indicator_addr = 0; + scssc_area->subchannel_indicator_addr = 0; + } else { + scssc_area->summary_indicator_addr = virt_to_phys(tiqdio_alsi); + scssc_area->subchannel_indicator_addr = + virt_to_phys(irq_ptr->dsci); + } + + scssc_area->request = (struct chsc_header) { + .length = 0x0fe0, + .code = 0x0021, + }; + scssc_area->operation_code = 0; + scssc_area->ks = PAGE_DEFAULT_KEY; + scssc_area->kc = PAGE_DEFAULT_KEY; + scssc_area->isc = QDIO_AIRQ_ISC; + scssc_area->schid = irq_ptr->schid; + + /* enable the time delay disablement facility */ + if (css_general_characteristics.aif_tdd) + scssc_area->word_with_d_bit = 0x10000000; + + rc = chsc(scssc_area); + if (rc) + return -EIO; + + rc = chsc_error_from_response(scssc_area->response.code); + if (rc) { + sprintf(dbf_text, "sidR%4x", scssc_area->response.code); + QDIO_DBF_TEXT1(0, trace, dbf_text); + QDIO_DBF_TEXT1(0, setup, dbf_text); + ptr = &scssc_area->response; + QDIO_DBF_HEX2(1, setup, &ptr, QDIO_DBF_SETUP_LEN); + return rc; + } + + QDIO_DBF_TEXT2(0, setup, "setscind"); + QDIO_DBF_HEX2(0, setup, &scssc_area->summary_indicator_addr, + sizeof(unsigned long)); + QDIO_DBF_HEX2(0, setup, &scssc_area->subchannel_indicator_addr, + sizeof(unsigned long)); + return 0; +} + +/* allocate non-shared indicators and shared indicator */ +int __init tiqdio_allocate_memory(void) +{ + q_indicators = kzalloc(sizeof(struct indicator_t) * TIQDIO_NR_INDICATORS, + GFP_KERNEL); + if (!q_indicators) + return -ENOMEM; + return 0; +} + +void tiqdio_free_memory(void) +{ + kfree(q_indicators); +} + +int __init tiqdio_register_thinints(void) +{ + char dbf_text[20]; + + isc_register(QDIO_AIRQ_ISC); + tiqdio_alsi = s390_register_adapter_interrupt(&tiqdio_thinint_handler, + NULL, QDIO_AIRQ_ISC); + if (IS_ERR(tiqdio_alsi)) { + sprintf(dbf_text, "regthn%lx", PTR_ERR(tiqdio_alsi)); + QDIO_DBF_TEXT0(0, setup, dbf_text); + tiqdio_alsi = NULL; + isc_unregister(QDIO_AIRQ_ISC); + return -ENOMEM; + } + return 0; +} + +int qdio_establish_thinint(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return 0; + + /* Check for aif time delay disablement. If installed, + * omit SVS even under LPAR + */ + if (css_general_characteristics.aif_tdd) + css_qdio_omit_svs = 1; + return set_subchannel_ind(irq_ptr, 0); +} + +void qdio_setup_thinint(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return; + irq_ptr->dsci = get_indicator(); + QDIO_DBF_HEX1(0, setup, &irq_ptr->dsci, sizeof(void *)); +} + +void qdio_shutdown_thinint(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return; + + /* reset adapter interrupt indicators */ + put_indicator(irq_ptr->dsci); + set_subchannel_ind(irq_ptr, 1); +} + +void __exit tiqdio_unregister_thinints(void) +{ + tasklet_disable(&tiqdio_tasklet); + + if (tiqdio_alsi) { + s390_unregister_adapter_interrupt(tiqdio_alsi, QDIO_AIRQ_ISC); + isc_unregister(QDIO_AIRQ_ISC); + } +} diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 699ac11debd8..1895dbb553cd 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -239,11 +239,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa, /*not used unless the microcode gets patched*/ #define QETH_PCI_TIMER_VALUE(card) 3 -#define QETH_MIN_INPUT_THRESHOLD 1 -#define QETH_MAX_INPUT_THRESHOLD 500 -#define QETH_MIN_OUTPUT_THRESHOLD 1 -#define QETH_MAX_OUTPUT_THRESHOLD 300 - /* priority queing */ #define QETH_PRIOQ_DEFAULT QETH_NO_PRIO_QUEUEING #define QETH_DEFAULT_QUEUE 2 @@ -811,17 +806,14 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *, enum qeth_ipa_cmds, enum qeth_prot_versions); int qeth_query_setadapterparms(struct qeth_card *); -int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int, - unsigned int, const char *); +int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int, const char *); void qeth_queue_input_buffer(struct qeth_card *, int); struct sk_buff *qeth_core_get_next_skb(struct qeth_card *, struct qdio_buffer *, struct qdio_buffer_element **, int *, struct qeth_hdr **); void qeth_schedule_recovery(struct qeth_card *); void qeth_qdio_output_handler(struct ccw_device *, unsigned int, - unsigned int, unsigned int, - unsigned int, int, int, - unsigned long); + int, int, int, unsigned long); void qeth_clear_ipacmd_list(struct qeth_card *); int qeth_qdio_clear_card(struct qeth_card *, int); void qeth_clear_working_pool_list(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 0ac54dc638c2..c3ad89e302bd 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2073,7 +2073,7 @@ static void qeth_create_qib_param_field_blkt(struct qeth_card *card, static int qeth_qdio_activate(struct qeth_card *card) { QETH_DBF_TEXT(SETUP, 3, "qdioact"); - return qdio_activate(CARD_DDEV(card), 0); + return qdio_activate(CARD_DDEV(card)); } static int qeth_dm_act(struct qeth_card *card) @@ -2349,16 +2349,11 @@ int qeth_init_qdio_queues(struct qeth_card *card) card->qdio.in_q->next_buf_to_init = card->qdio.in_buf_pool.buf_count - 1; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, - card->qdio.in_buf_pool.buf_count - 1, NULL); + card->qdio.in_buf_pool.buf_count - 1); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); return rc; } - rc = qdio_synchronize(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0); - if (rc) { - QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); - return rc; - } /* outbound queue */ for (i = 0; i < card->qdio.no_out_queues; ++i) { memset(card->qdio.out_qs[i]->qdio_bufs, 0, @@ -2559,9 +2554,9 @@ int qeth_query_setadapterparms(struct qeth_card *card) EXPORT_SYMBOL_GPL(qeth_query_setadapterparms); int qeth_check_qdio_errors(struct qdio_buffer *buf, unsigned int qdio_error, - unsigned int siga_error, const char *dbftext) + const char *dbftext) { - if (qdio_error || siga_error) { + if (qdio_error) { QETH_DBF_TEXT(TRACE, 2, dbftext); QETH_DBF_TEXT(QERR, 2, dbftext); QETH_DBF_TEXT_(QERR, 2, " F15=%02X", @@ -2569,7 +2564,6 @@ int qeth_check_qdio_errors(struct qdio_buffer *buf, unsigned int qdio_error, QETH_DBF_TEXT_(QERR, 2, " F14=%02X", buf->element[14].flags & 0xff); QETH_DBF_TEXT_(QERR, 2, " qerr=%X", qdio_error); - QETH_DBF_TEXT_(QERR, 2, " serr=%X", siga_error); return 1; } return 0; @@ -2622,9 +2616,8 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index) card->perf_stats.inbound_do_qdio_start_time = qeth_get_micros(); } - rc = do_QDIO(CARD_DDEV(card), - QDIO_FLAG_SYNC_INPUT | QDIO_FLAG_UNDER_INTERRUPT, - 0, queue->next_buf_to_init, count, NULL); + rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, + queue->next_buf_to_init, count); if (card->options.performance_stats) card->perf_stats.inbound_do_qdio_time += qeth_get_micros() - @@ -2643,14 +2636,13 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index) EXPORT_SYMBOL_GPL(qeth_queue_input_buffer); static int qeth_handle_send_error(struct qeth_card *card, - struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err, - unsigned int siga_err) + struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err) { int sbalf15 = buffer->buffer->element[15].flags & 0xff; - int cc = siga_err & 3; + int cc = qdio_err & 3; QETH_DBF_TEXT(TRACE, 6, "hdsnderr"); - qeth_check_qdio_errors(buffer->buffer, qdio_err, siga_err, "qouterr"); + qeth_check_qdio_errors(buffer->buffer, qdio_err, "qouterr"); switch (cc) { case 0: if (qdio_err) { @@ -2662,7 +2654,7 @@ static int qeth_handle_send_error(struct qeth_card *card, } return QETH_SEND_ERROR_NONE; case 2: - if (siga_err & QDIO_SIGA_ERROR_B_BIT_SET) { + if (qdio_err & QDIO_ERROR_SIGA_BUSY) { QETH_DBF_TEXT(TRACE, 1, "SIGAcc2B"); QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); return QETH_SEND_ERROR_KICK_IT; @@ -2758,8 +2750,8 @@ static int qeth_flush_buffers_on_no_pci(struct qeth_qdio_out_q *queue) return 0; } -static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int, - int index, int count) +static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, + int count) { struct qeth_qdio_out_buffer *buf; int rc; @@ -2807,12 +2799,10 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int, qeth_get_micros(); } qdio_flags = QDIO_FLAG_SYNC_OUTPUT; - if (under_int) - qdio_flags |= QDIO_FLAG_UNDER_INTERRUPT; if (atomic_read(&queue->set_pci_flags_count)) qdio_flags |= QDIO_FLAG_PCI_OUT; rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, - queue->queue_no, index, count, NULL); + queue->queue_no, index, count); if (queue->card->options.performance_stats) queue->card->perf_stats.outbound_do_qdio_time += qeth_get_micros() - @@ -2866,16 +2856,15 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) queue->card->perf_stats.bufs_sent_pack += flush_cnt; if (flush_cnt) - qeth_flush_buffers(queue, 1, index, flush_cnt); + qeth_flush_buffers(queue, index, flush_cnt); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); } } } -void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status, - unsigned int qdio_error, unsigned int siga_error, - unsigned int __queue, int first_element, int count, - unsigned long card_ptr) +void qeth_qdio_output_handler(struct ccw_device *ccwdev, + unsigned int qdio_error, int __queue, int first_element, + int count, unsigned long card_ptr) { struct qeth_card *card = (struct qeth_card *) card_ptr; struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue]; @@ -2883,15 +2872,12 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status, int i; QETH_DBF_TEXT(TRACE, 6, "qdouhdl"); - if (status & QDIO_STATUS_LOOK_FOR_ERROR) { - if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) { - QETH_DBF_TEXT(TRACE, 2, "achkcond"); - QETH_DBF_TEXT_(TRACE, 2, "%s", CARD_BUS_ID(card)); - QETH_DBF_TEXT_(TRACE, 2, "%08x", status); - netif_stop_queue(card->dev); - qeth_schedule_recovery(card); - return; - } + if (qdio_error & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) { + QETH_DBF_TEXT(TRACE, 2, "achkcond"); + QETH_DBF_TEXT_(TRACE, 2, "%s", CARD_BUS_ID(card)); + netif_stop_queue(card->dev); + qeth_schedule_recovery(card); + return; } if (card->options.performance_stats) { card->perf_stats.outbound_handler_cnt++; @@ -2901,8 +2887,7 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status, for (i = first_element; i < (first_element + count); ++i) { buffer = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q]; /*we only handle the KICK_IT error by doing a recovery */ - if (qeth_handle_send_error(card, buffer, - qdio_error, siga_error) + if (qeth_handle_send_error(card, buffer, qdio_error) == QETH_SEND_ERROR_KICK_IT){ netif_stop_queue(card->dev); qeth_schedule_recovery(card); @@ -3164,11 +3149,11 @@ int qeth_do_send_packet_fast(struct qeth_card *card, atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); if (ctx == NULL) { qeth_fill_buffer(queue, buffer, skb); - qeth_flush_buffers(queue, 0, index, 1); + qeth_flush_buffers(queue, index, 1); } else { flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index); WARN_ON(buffers_needed != flush_cnt); - qeth_flush_buffers(queue, 0, index, flush_cnt); + qeth_flush_buffers(queue, index, flush_cnt); } return 0; out: @@ -3221,8 +3206,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, * again */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY){ - qeth_flush_buffers(queue, 0, - start_index, flush_count); + qeth_flush_buffers(queue, start_index, + flush_count); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); return -EBUSY; @@ -3253,7 +3238,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, flush_count += tmp; out: if (flush_count) - qeth_flush_buffers(queue, 0, start_index, flush_count); + qeth_flush_buffers(queue, start_index, flush_count); else if (!atomic_read(&queue->set_pci_flags_count)) atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH); /* @@ -3274,7 +3259,7 @@ out: if (!flush_count && !atomic_read(&queue->set_pci_flags_count)) flush_count += qeth_flush_buffers_on_no_pci(queue); if (flush_count) - qeth_flush_buffers(queue, 0, start_index, flush_count); + qeth_flush_buffers(queue, start_index, flush_count); } /* at this point the queue is UNLOCKED again */ if (queue->card->options.performance_stats && do_pack) @@ -3686,10 +3671,6 @@ static int qeth_qdio_establish(struct qeth_card *card) init_data.q_format = qeth_get_qdio_q_format(card); init_data.qib_param_field_format = 0; init_data.qib_param_field = qib_param_field; - init_data.min_input_threshold = QETH_MIN_INPUT_THRESHOLD; - init_data.max_input_threshold = QETH_MAX_INPUT_THRESHOLD; - init_data.min_output_threshold = QETH_MIN_OUTPUT_THRESHOLD; - init_data.max_output_threshold = QETH_MAX_OUTPUT_THRESHOLD; init_data.no_input_qs = 1; init_data.no_output_qs = card->qdio.no_out_queues; init_data.input_handler = card->discipline.input_handler; @@ -3751,8 +3732,9 @@ static int qeth_core_driver_group(const char *buf, struct device *root_dev, int qeth_core_hardsetup_card(struct qeth_card *card) { + struct qdio_ssqd_desc *qdio_ssqd; int retries = 3; - int mpno; + int mpno = 0; int rc; QETH_DBF_TEXT(SETUP, 2, "hrdsetup"); @@ -3784,7 +3766,10 @@ retry: QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); return rc; } - mpno = qdio_get_ssqd_pct(CARD_DDEV(card)); + + qdio_ssqd = qdio_get_ssqd_desc(CARD_DDEV(card)); + if (qdio_ssqd) + mpno = qdio_ssqd->pcnt; if (mpno) mpno = min(mpno - 1, QETH_MAX_PORTNO); if (card->info.portno > mpno) { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index f682f7b14480..3fbc3bdec0c5 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -726,8 +726,7 @@ tx_drop: } static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev, - unsigned int status, unsigned int qdio_err, - unsigned int siga_err, unsigned int queue, + unsigned int qdio_err, unsigned int queue, int first_element, int count, unsigned long card_ptr) { struct net_device *net_dev; @@ -742,23 +741,20 @@ static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev, card->perf_stats.inbound_cnt++; card->perf_stats.inbound_start_time = qeth_get_micros(); } - if (status & QDIO_STATUS_LOOK_FOR_ERROR) { - if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) { - QETH_DBF_TEXT(TRACE, 1, "qdinchk"); - QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", first_element, - count); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", queue, status); - qeth_schedule_recovery(card); - return; - } + if (qdio_err & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) { + QETH_DBF_TEXT(TRACE, 1, "qdinchk"); + QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); + QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", first_element, + count); + QETH_DBF_TEXT_(TRACE, 1, "%04X", queue); + qeth_schedule_recovery(card); + return; } for (i = first_element; i < (first_element + count); ++i) { index = i % QDIO_MAX_BUFFERS_PER_Q; buffer = &card->qdio.in_q->bufs[index]; - if (!((status & QDIO_STATUS_LOOK_FOR_ERROR) && - qeth_check_qdio_errors(buffer->buffer, - qdio_err, siga_err, "qinerr"))) + if (!(qdio_err && + qeth_check_qdio_errors(buffer->buffer, qdio_err, "qinerr"))) qeth_l2_process_inbound_buffer(card, buffer, index); /* clear buffer and give back to hardware */ qeth_put_buffer_pool_entry(card, buffer->pool_entry); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 06deaee50f6d..22f64aa6dd1f 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2939,8 +2939,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) } static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev, - unsigned int status, unsigned int qdio_err, - unsigned int siga_err, unsigned int queue, int first_element, + unsigned int qdio_err, unsigned int queue, int first_element, int count, unsigned long card_ptr) { struct net_device *net_dev; @@ -2955,23 +2954,21 @@ static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev, card->perf_stats.inbound_cnt++; card->perf_stats.inbound_start_time = qeth_get_micros(); } - if (status & QDIO_STATUS_LOOK_FOR_ERROR) { - if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) { - QETH_DBF_TEXT(TRACE, 1, "qdinchk"); - QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", - first_element, count); - QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", queue, status); - qeth_schedule_recovery(card); - return; - } + if (qdio_err & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) { + QETH_DBF_TEXT(TRACE, 1, "qdinchk"); + QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card)); + QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", + first_element, count); + QETH_DBF_TEXT_(TRACE, 1, "%04X", queue); + qeth_schedule_recovery(card); + return; } for (i = first_element; i < (first_element + count); ++i) { index = i % QDIO_MAX_BUFFERS_PER_Q; buffer = &card->qdio.in_q->bufs[index]; - if (!((status & QDIO_STATUS_LOOK_FOR_ERROR) && + if (!(qdio_err && qeth_check_qdio_errors(buffer->buffer, - qdio_err, siga_err, "qinerr"))) + qdio_err, "qinerr"))) qeth_l3_process_inbound_buffer(card, buffer, index); /* clear buffer and give back to hardware */ qeth_put_buffer_pool_entry(card, buffer->pool_entry); diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 36169c6944fd..fca48b88fc53 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -297,15 +297,13 @@ void zfcp_hba_dbf_event_fsf_unsol(const char *tag, struct zfcp_adapter *adapter, /** * zfcp_hba_dbf_event_qdio - trace event for QDIO related failure * @adapter: adapter affected by this QDIO related event - * @status: as passed by qdio module * @qdio_error: as passed by qdio module - * @siga_error: as passed by qdio module * @sbal_index: first buffer with error condition, as passed by qdio module * @sbal_count: number of buffers affected, as passed by qdio module */ -void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status, - unsigned int qdio_error, unsigned int siga_error, - int sbal_index, int sbal_count) +void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, + unsigned int qdio_error, int sbal_index, + int sbal_count) { struct zfcp_hba_dbf_record *r = &adapter->hba_dbf_buf; unsigned long flags; @@ -313,9 +311,7 @@ void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status, spin_lock_irqsave(&adapter->hba_dbf_lock, flags); memset(r, 0, sizeof(*r)); strncpy(r->tag, "qdio", ZFCP_DBF_TAG_SIZE); - r->u.qdio.status = status; r->u.qdio.qdio_error = qdio_error; - r->u.qdio.siga_error = siga_error; r->u.qdio.sbal_index = sbal_index; r->u.qdio.sbal_count = sbal_count; debug_event(adapter->hba_dbf, 0, r, sizeof(*r)); @@ -398,9 +394,7 @@ static void zfcp_hba_dbf_view_status(char **p, static void zfcp_hba_dbf_view_qdio(char **p, struct zfcp_hba_dbf_record_qdio *r) { - zfcp_dbf_out(p, "status", "0x%08x", r->status); zfcp_dbf_out(p, "qdio_error", "0x%08x", r->qdio_error); - zfcp_dbf_out(p, "siga_error", "0x%08x", r->siga_error); zfcp_dbf_out(p, "sbal_index", "0x%02x", r->sbal_index); zfcp_dbf_out(p, "sbal_count", "0x%02x", r->sbal_count); } diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index d04aea604974..0ddb18449d11 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -139,9 +139,7 @@ struct zfcp_hba_dbf_record_status { } __attribute__ ((packed)); struct zfcp_hba_dbf_record_qdio { - u32 status; u32 qdio_error; - u32 siga_error; u8 sbal_index; u8 sbal_count; } __attribute__ ((packed)); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 8065b2b224b7..edfdb21591f3 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -48,9 +48,8 @@ extern void zfcp_rec_dbf_event_action(u8, struct zfcp_erp_action *); extern void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *); extern void zfcp_hba_dbf_event_fsf_unsol(const char *, struct zfcp_adapter *, struct fsf_status_read_buffer *); -extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, - unsigned int, unsigned int, unsigned int, - int, int); +extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, unsigned int, int, + int); extern void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *); extern void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *); extern void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 72e3094796d4..d6dbd653fde9 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -74,17 +74,15 @@ static void zfcp_qdio_zero_sbals(struct qdio_buffer *sbal[], int first, int cnt) } } -static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int status, - unsigned int qdio_err, unsigned int siga_err, - unsigned int queue_no, int first, int count, +static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int qdio_err, + int queue_no, int first, int count, unsigned long parm) { struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm; struct zfcp_qdio_queue *queue = &adapter->req_q; - if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) { - zfcp_hba_dbf_event_qdio(adapter, status, qdio_err, siga_err, - first, count); + if (unlikely(qdio_err)) { + zfcp_hba_dbf_event_qdio(adapter, qdio_err, first, count); zfcp_qdio_handler_error(adapter, 140); return; } @@ -129,8 +127,7 @@ static void zfcp_qdio_resp_put_back(struct zfcp_adapter *adapter, int processed) count = atomic_read(&queue->count) + processed; - retval = do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT | QDIO_FLAG_UNDER_INTERRUPT, - 0, start, count, NULL); + retval = do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, start, count); if (unlikely(retval)) { atomic_set(&queue->count, count); @@ -142,9 +139,8 @@ static void zfcp_qdio_resp_put_back(struct zfcp_adapter *adapter, int processed) } } -static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int status, - unsigned int qdio_err, unsigned int siga_err, - unsigned int queue_no, int first, int count, +static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err, + int queue_no, int first, int count, unsigned long parm) { struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm; @@ -152,9 +148,8 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int status, volatile struct qdio_buffer_element *sbale; int sbal_idx, sbale_idx, sbal_no; - if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) { - zfcp_hba_dbf_event_qdio(adapter, status, qdio_err, siga_err, - first, count); + if (unlikely(qdio_err)) { + zfcp_hba_dbf_event_qdio(adapter, qdio_err, first, count); zfcp_qdio_handler_error(adapter, 147); return; } @@ -362,7 +357,7 @@ int zfcp_qdio_send(struct zfcp_fsf_req *fsf_req) } retval = do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_OUTPUT, 0, first, - count, NULL); + count); if (unlikely(retval)) { zfcp_qdio_zero_sbals(req_q->sbal, first, count); return retval; @@ -400,10 +395,6 @@ int zfcp_qdio_allocate(struct zfcp_adapter *adapter) init_data->qib_param_field = NULL; init_data->input_slib_elements = NULL; init_data->output_slib_elements = NULL; - init_data->min_input_threshold = 1; - init_data->max_input_threshold = 5000; - init_data->min_output_threshold = 1; - init_data->max_output_threshold = 1000; init_data->no_input_qs = 1; init_data->no_output_qs = 1; init_data->input_handler = zfcp_qdio_int_resp; @@ -436,9 +427,7 @@ void zfcp_qdio_close(struct zfcp_adapter *adapter) atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status); spin_unlock(&req_q->lock); - while (qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR) - == -EINPROGRESS) - ssleep(1); + qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR); /* cleanup used outbound sbals */ count = atomic_read(&req_q->count); @@ -473,7 +462,7 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter) return -EIO; } - if (qdio_activate(adapter->ccw_device, 0)) { + if (qdio_activate(adapter->ccw_device)) { dev_err(&adapter->ccw_device->dev, "Activate of QDIO queues failed.\n"); goto failed_qdio; @@ -487,7 +476,7 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter) } if (do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_INPUT, 0, 0, - QDIO_MAX_BUFFERS_PER_Q, NULL)) { + QDIO_MAX_BUFFERS_PER_Q)) { dev_err(&adapter->ccw_device->dev, "Init of QDIO response queue failed.\n"); goto failed_qdio; @@ -501,9 +490,6 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter) return 0; failed_qdio: - while (qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR) - == -EINPROGRESS) - ssleep(1); - + qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR); return -EIO; } diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index ed53f14007a2..f2467e936e55 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -416,12 +416,17 @@ static int clariion_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device *sdev; struct scsi_dh_data *scsi_dh_data; struct clariion_dh_data *h; int i, found = 0; unsigned long flags; + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + if (action == BUS_NOTIFY_ADD_DEVICE) { for (i = 0; clariion_dev_list[i].vendor; i++) { if (!strncmp(sdev->vendor, clariion_dev_list[i].vendor, diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 12ceab7b3662..ae6be87d6a83 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -131,11 +131,16 @@ static int hp_sw_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device *sdev; struct scsi_dh_data *scsi_dh_data; int i, found = 0; unsigned long flags; + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + if (action == BUS_NOTIFY_ADD_DEVICE) { for (i = 0; hp_sw_dh_data_list[i].vendor; i++) { if (!strncmp(sdev->vendor, hp_sw_dh_data_list[i].vendor, diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 6fff077a888d..fdf34b0ec6e1 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -608,12 +608,17 @@ static int rdac_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_device *sdev; struct scsi_dh_data *scsi_dh_data; struct rdac_dh_data *h; int i, found = 0; unsigned long flags; + if (!scsi_is_sdev_device(dev)) + return 0; + + sdev = to_scsi_device(dev); + if (action == BUS_NOTIFY_ADD_DEVICE) { for (i = 0; rdac_dev_list[i].vendor; i++) { if (!strncmp(sdev->vendor, rdac_dev_list[i].vendor, diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 683bce375c74..f843c1383a4b 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -258,19 +258,6 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) return ide_stopped; } -static ide_startstop_t -idescsi_atapi_abort(ide_drive_t *drive, struct request *rq) -{ - debug_log("%s called for %lu\n", __func__, - ((struct ide_atapi_pc *) rq->special)->scsi_cmd->serial_number); - - rq->errors |= ERROR_MAX; - - idescsi_end_request(drive, 0, 0); - - return ide_stopped; -} - static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs) { idescsi_scsi_t *scsi = drive_to_idescsi(drive); @@ -524,7 +511,6 @@ static ide_driver_t idescsi_driver = { .do_request = idescsi_do_request, .end_request = idescsi_end_request, .error = idescsi_atapi_error, - .abort = idescsi_atapi_abort, #ifdef CONFIG_IDE_PROC_FS .proc = idescsi_proc, #endif diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 97c68d021d28..638b68649e79 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -383,21 +383,14 @@ static int __devinit check_name(char *name) return 0; } -static int __devinit check_resources(struct pnp_option *option) +static int __devinit check_resources(struct pnp_dev *dev) { - struct pnp_option *tmp; - if (!option) - return 0; + resource_size_t base[] = {0x2f8, 0x3f8, 0x2e8, 0x3e8}; + int i; - for (tmp = option; tmp; tmp = tmp->next) { - struct pnp_port *port; - for (port = tmp->port; port; port = port->next) - if ((port->size == 8) && - ((port->min == 0x2f8) || - (port->min == 0x3f8) || - (port->min == 0x2e8) || - (port->min == 0x3e8))) - return 1; + for (i = 0; i < ARRAY_SIZE(base); i++) { + if (pnp_possible_config(dev, IORESOURCE_IO, base[i], 8)) + return 1; } return 0; @@ -420,10 +413,7 @@ static int __devinit serial_pnp_guess_board(struct pnp_dev *dev, int *flags) (dev->card && check_name(dev->card->name)))) return -ENODEV; - if (check_resources(dev->independent)) - return 0; - - if (check_resources(dev->dependent)) + if (check_resources(dev)) return 0; return -ENODEV; diff --git a/fs/Kconfig b/fs/Kconfig index 313b2e06ded5..17216ba99c85 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1375,6 +1375,9 @@ config JFFS2_CMODE_FAVOURLZO endchoice +# UBIFS File system configuration +source "fs/ubifs/Kconfig" + config CRAMFS tristate "Compressed ROM file system support (cramfs)" depends on BLOCK @@ -1544,10 +1547,6 @@ config UFS_FS The recently released UFS2 variant (used in FreeBSD 5.x) is READ-ONLY supported. - If you only intend to mount files from some other Unix over the - network using NFS, you don't need the UFS file system support (but - you need NFS file system support obviously). - Note that this option is generally not needed for floppies, since a good portable way to transport files and directories between unixes (and even other operating systems) is given by the tar program ("man @@ -1587,6 +1586,7 @@ menuconfig NETWORK_FILESYSTEMS Say Y here to get to see options for network filesystems and filesystem-related networking code, such as NFS daemon and RPCSEC security modules. + This option alone does not add any kernel code. If you say N, all options in this submenu will be skipped and @@ -1595,76 +1595,92 @@ menuconfig NETWORK_FILESYSTEMS if NETWORK_FILESYSTEMS config NFS_FS - tristate "NFS file system support" + tristate "NFS client support" depends on INET select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL help - If you are connected to some other (usually local) Unix computer - (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing - on that computer (the NFS server) using the Network File Sharing - protocol, say Y. "Mounting files" means that the client can access - the files with usual UNIX commands as if they were sitting on the - client's hard disk. For this to work, the server must run the - programs nfsd and mountd (but does not need to have NFS file system - support enabled in its kernel). NFS is explained in the Network - Administrator's Guide, available from - , on its man page: "man - nfs", and in the NFS-HOWTO. + Choose Y here if you want to access files residing on other + computers using Sun's Network File System protocol. To compile + this file system support as a module, choose M here: the module + will be called nfs. - A superior but less widely used alternative to NFS is provided by - the Coda file system; see "Coda file system support" below. + To mount file systems exported by NFS servers, you also need to + install the user space mount.nfs command which can be found in + the Linux nfs-utils package, available from http://linux-nfs.org/. + Information about using the mount command is available in the + mount(8) man page. More detail about the Linux NFS client + implementation is available via the nfs(5) man page. - If you say Y here, you should have said Y to TCP/IP networking also. - This option would enlarge your kernel by about 27 KB. - - To compile this file system support as a module, choose M here: the - module will be called nfs. + Below you can choose which versions of the NFS protocol are + available in the kernel to mount NFS servers. Support for NFS + version 2 (RFC 1094) is always available when NFS_FS is selected. - If you are configuring a diskless machine which will mount its root - file system over NFS at boot time, say Y here and to "Kernel - level IP autoconfiguration" above and to "Root file system on NFS" - below. You cannot compile this driver as a module in this case. - There are two packages designed for booting diskless machines over - the net: netboot, available from - , and Etherboot, - available from . + To configure a system which mounts its root file system via NFS + at boot time, say Y here, select "Kernel level IP + autoconfiguration" in the NETWORK menu, and select "Root file + system on NFS" below. You cannot compile this file system as a + module in this case. - If you don't know what all this is about, say N. + If unsure, say N. config NFS_V3 - bool "Provide NFSv3 client support" + bool "NFS client support for NFS version 3" depends on NFS_FS help - Say Y here if you want your NFS client to be able to speak version - 3 of the NFS protocol. + This option enables support for version 3 of the NFS protocol + (RFC 1813) in the kernel's NFS client. If unsure, say Y. config NFS_V3_ACL - bool "Provide client support for the NFSv3 ACL protocol extension" + bool "NFS client support for the NFSv3 ACL protocol extension" depends on NFS_V3 help - Implement the NFSv3 ACL protocol extension for manipulating POSIX - Access Control Lists. The server should also be compiled with - the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + Some NFS servers support an auxiliary NFSv3 ACL protocol that + Sun added to Solaris but never became an official part of the + NFS version 3 protocol. This protocol extension allows + applications on NFS clients to manipulate POSIX Access Control + Lists on files residing on NFS servers. NFS servers enforce + ACLs on local files whether this protocol is available or not. + + Choose Y here if your NFS server supports the Solaris NFSv3 ACL + protocol extension and you want your NFS client to allow + applications to access and modify ACLs on files on the server. + + Most NFS servers don't support the Solaris NFSv3 ACL protocol + extension. You can choose N here or specify the "noacl" mount + option to prevent your NFS client from trying to use the NFSv3 + ACL protocol. If unsure, say N. config NFS_V4 - bool "Provide NFSv4 client support (EXPERIMENTAL)" + bool "NFS client support for NFS version 4 (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL select RPCSEC_GSS_KRB5 help - Say Y here if you want your NFS client to be able to speak the newer - version 4 of the NFS protocol. + This option enables support for version 4 of the NFS protocol + (RFC 3530) in the kernel's NFS client. - Note: Requires auxiliary userspace daemons which may be found on - http://www.citi.umich.edu/projects/nfsv4/ + To mount NFS servers using NFSv4, you also need to install user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. If unsure, say N. +config ROOT_NFS + bool "Root file system on NFS" + depends on NFS_FS=y && IP_PNP + help + If you want your system to mount its root file system via NFS, + choose Y here. This is common practice for managing systems + without local permanent storage. For details, read + . + + Most people say N here. + config NFSD tristate "NFS server support" depends on INET @@ -1746,20 +1762,6 @@ config NFSD_V4 If unsure, say N. -config ROOT_NFS - bool "Root file system on NFS" - depends on NFS_FS=y && IP_PNP - help - If you want your Linux box to mount its whole root file system (the - one containing the directory /) from some other computer over the - net via NFS (presumably because your box doesn't have a hard disk), - say Y. Read for - details. It is likely that in this case, you also want to say Y to - "Kernel level IP autoconfiguration" so that your box can discover - its network address at boot time. - - Most people say N here. - config LOCKD tristate @@ -1800,27 +1802,6 @@ config SUNRPC_XPRT_RDMA If unsure, say N. -config SUNRPC_BIND34 - bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - default n - help - RPC requests over IPv6 networks require support for larger - addresses when performing an RPC bind. Sun added support for - IPv6 addressing by creating two new versions of the rpcbind - protocol (RFC 1833). - - This option enables support in the kernel RPC client for - querying rpcbind servers via versions 3 and 4 of the rpcbind - protocol. The kernel automatically falls back to version 2 - if a remote rpcbind service does not support versions 3 or 4. - By themselves, these new versions do not provide support for - RPC over IPv6, but the new protocol versions are necessary to - support it. - - If unsure, say N to get traditional behavior (version 2 rpcbind - requests only). - config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/fs/Makefile b/fs/Makefile index 277b079dec9e..3b2178b4bb66 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -101,6 +101,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs/ obj-$(CONFIG_UFS_FS) += ufs/ obj-$(CONFIG_EFS_FS) += efs/ obj-$(CONFIG_JFFS2_FS) += jffs2/ +obj-$(CONFIG_UBIFS_FS) += ubifs/ obj-$(CONFIG_AFFS_FS) += affs/ obj-$(CONFIG_ROMFS_FS) += romfs/ obj-$(CONFIG_QNX4FS_FS) += qnx4/ diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ae45f77765c0..25adfc3c693a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -424,8 +424,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so * that it can be located for waiting on in __writeback_single_inode(). * - * Called under inode_lock. - * * If `bdi' is non-zero then we're being asked to writeback a specific queue. * This function assumes that the blockdev superblock's inodes are backed by * a variety of queues, so all inodes are searched. For other superblocks, @@ -441,11 +439,12 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * on the writer throttling path, and we get decent balancing between many * throttled threads: we don't want them all piling up on inode_sync_wait. */ -static void -sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) +void generic_sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc) { const unsigned long start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); if (!wbc->for_kupdate || list_empty(&sb->s_io)) queue_io(sb, wbc->older_than_this); @@ -524,8 +523,16 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (!list_empty(&sb->s_more_io)) wbc->more_io = 1; } + spin_unlock(&inode_lock); return; /* Leave any unwritten inodes on s_io */ } +EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); + +static void sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc) +{ + generic_sync_sb_inodes(sb, wbc); +} /* * Start writeback of dirty pagecache data against all unlocked inodes. @@ -565,11 +572,8 @@ restart: * be unmounted by the time it is released. */ if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) { - spin_lock(&inode_lock); + if (sb->s_root) sync_sb_inodes(sb, wbc); - spin_unlock(&inode_lock); - } up_read(&sb->s_umount); } spin_lock(&sb_lock); @@ -607,9 +611,7 @@ void sync_inodes_sb(struct super_block *sb, int wait) (inodes_stat.nr_inodes - inodes_stat.nr_unused) + nr_dirty + nr_unstable; wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ - spin_lock(&inode_lock); sync_sb_inodes(sb, &wbc); - spin_unlock(&inode_lock); } /* diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5df517b81f3f..1f6dc518505c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -224,7 +224,9 @@ void nlm_release_call(struct nlm_rqst *call) static void nlmclnt_rpc_release(void *data) { + lock_kernel(); nlm_release_call(data); + unlock_kernel(); } static int nlm_wait_on_grace(wait_queue_head_t *queue) @@ -430,7 +432,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) * Report the conflicting lock back to the application. */ fl->fl_start = req->a_res.lock.fl.fl_start; - fl->fl_end = req->a_res.lock.fl.fl_start; + fl->fl_end = req->a_res.lock.fl.fl_end; fl->fl_type = req->a_res.lock.fl.fl_type; fl->fl_pid = 0; break; @@ -710,7 +712,9 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) die: return; retry_rebind: + lock_kernel(); nlm_rebind_host(req->a_host); + unlock_kernel(); retry_unlock: rpc_restart_call(task); } @@ -788,7 +792,9 @@ retry_cancel: /* Don't ever retry more than 3 times */ if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) goto die; + lock_kernel(); nlm_rebind_host(req->a_host); + unlock_kernel(); rpc_restart_call(task); rpc_delay(task, 30 * HZ); } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 385437e3387d..2e27176ff42f 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -248,7 +248,9 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) static void nlm4svc_callback_release(void *data) { + lock_kernel(); nlm_release_call(data); + unlock_kernel(); } static const struct rpc_call_ops nlm4svc_callback_ops = { diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 81aca859bfde..56a08ab9a4cb 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -795,6 +795,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); + lock_kernel(); /* if the block is not on a list at this point then it has * been invalidated. Don't try to requeue it. * @@ -804,7 +805,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) * for nlm_blocked? */ if (list_empty(&block->b_list)) - return; + goto out; /* Technically, we should down the file semaphore here. Since we * move the block towards the head of the queue only, no harm @@ -818,13 +819,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) } nlmsvc_insert_block(block, timeout); svc_wake_up(block->b_daemon); +out: + unlock_kernel(); } static void nlmsvc_grant_release(void *data) { struct nlm_rqst *call = data; + lock_kernel(); nlmsvc_release_block(call->a_block); + unlock_kernel(); } static const struct rpc_call_ops nlmsvc_grant_ops = { diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 88379cc6e0b1..ce6952b50a75 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -278,7 +278,9 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) static void nlmsvc_callback_release(void *data) { + lock_kernel(); nlm_release_call(data); + unlock_kernel(); } static const struct rpc_call_ops nlmsvc_callback_ops = { diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c1e7c8300629..f447f4b4476c 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -27,7 +27,7 @@ struct nfs_callback_data { unsigned int users; - struct svc_serv *serv; + struct svc_rqst *rqst; struct task_struct *task; }; @@ -91,21 +91,17 @@ nfs_callback_svc(void *vrqstp) svc_process(rqstp); } unlock_kernel(); - nfs_callback_info.task = NULL; - svc_exit_thread(rqstp); return 0; } /* - * Bring up the server process if it is not already up. + * Bring up the callback thread if it is not already up. */ int nfs_callback_up(void) { struct svc_serv *serv = NULL; - struct svc_rqst *rqstp; int ret = 0; - lock_kernel(); mutex_lock(&nfs_callback_mutex); if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) goto out; @@ -121,22 +117,23 @@ int nfs_callback_up(void) nfs_callback_tcpport = ret; dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); - rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(rqstp)) { - ret = PTR_ERR(rqstp); + nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(nfs_callback_info.rqst)) { + ret = PTR_ERR(nfs_callback_info.rqst); + nfs_callback_info.rqst = NULL; goto out_err; } svc_sock_update_bufs(serv); - nfs_callback_info.serv = serv; - nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp, + nfs_callback_info.task = kthread_run(nfs_callback_svc, + nfs_callback_info.rqst, "nfsv4-svc"); if (IS_ERR(nfs_callback_info.task)) { ret = PTR_ERR(nfs_callback_info.task); - nfs_callback_info.serv = NULL; + svc_exit_thread(nfs_callback_info.rqst); + nfs_callback_info.rqst = NULL; nfs_callback_info.task = NULL; - svc_exit_thread(rqstp); goto out_err; } out: @@ -149,7 +146,6 @@ out: if (serv) svc_destroy(serv); mutex_unlock(&nfs_callback_mutex); - unlock_kernel(); return ret; out_err: dprintk("Couldn't create callback socket or server thread; err = %d\n", @@ -159,17 +155,19 @@ out_err: } /* - * Kill the server process if it is not already down. + * Kill the callback thread if it's no longer being used. */ void nfs_callback_down(void) { - lock_kernel(); mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; - if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) + if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) { kthread_stop(nfs_callback_info.task); + svc_exit_thread(nfs_callback_info.rqst); + nfs_callback_info.rqst = NULL; + nfs_callback_info.task = NULL; + } mutex_unlock(&nfs_callback_mutex); - unlock_kernel(); } static int nfs_callback_authenticate(struct svc_rqst *rqstp) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2a092ca69b5..5ee23e7058b3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -431,14 +431,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, { to->to_initval = timeo * HZ / 10; to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; switch (proto) { case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: + if (to->to_retries == 0) + to->to_retries = NFS_DEF_TCP_RETRANS; if (to->to_initval == 0) - to->to_initval = 60 * HZ; + to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; to->to_increment = to->to_initval; @@ -450,14 +450,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_exponential = 0; break; case XPRT_TRANSPORT_UDP: - default: + if (to->to_retries == 0) + to->to_retries = NFS_DEF_UDP_RETRANS; if (!to->to_initval) - to->to_initval = 11 * HZ / 10; + to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_UDP_TIMEOUT) to->to_initval = NFS_MAX_UDP_TIMEOUT; to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; + default: + BUG(); } } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 982a2064fe4c..28a238dab23a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -133,13 +133,14 @@ nfs_opendir(struct inode *inode, struct file *filp) { int res; - dfprintk(VFS, "NFS: opendir(%s/%ld)\n", - inode->i_sb->s_id, inode->i_ino); + dfprintk(FILE, "NFS: open dir(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); + + nfs_inc_stats(inode, NFSIOS_VFSOPEN); - lock_kernel(); /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); - unlock_kernel(); return res; } @@ -528,13 +529,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct nfs_fattr fattr; long res; - dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", + dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", dentry->d_parent->d_name.name, dentry->d_name.name, (long long)filp->f_pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); - lock_kernel(); - /* * filp->f_pos points to the dirent entry number. * *desc->dir_cookie has the cookie for the next entry. We have @@ -592,10 +591,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } out: nfs_unblock_sillyrename(dentry); - unlock_kernel(); if (res > 0) res = 0; - dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", + dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", dentry->d_parent->d_name.name, dentry->d_name.name, res); return res; @@ -603,7 +601,15 @@ out: static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { - mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + + dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, + offset, origin); + + mutex_lock(&inode->i_mutex); switch (origin) { case 1: offset += filp->f_pos; @@ -619,7 +625,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) nfs_file_open_context(filp)->dir_cookie = 0; } out: - mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&inode->i_mutex); return offset; } @@ -629,10 +635,11 @@ out: */ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { - dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", + dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); + nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); return 0; } @@ -767,7 +774,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) struct nfs_fattr fattr; parent = dget_parent(dentry); - lock_kernel(); dir = parent->d_inode; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; @@ -805,7 +811,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: - unlock_kernel(); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", __func__, dentry->d_parent->d_name.name, @@ -824,7 +829,6 @@ out_zap_parent: shrink_dcache_parent(dentry); } d_drop(dentry); - unlock_kernel(); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", __func__, dentry->d_parent->d_name.name, @@ -858,6 +862,14 @@ static int nfs_dentry_delete(struct dentry *dentry) } +static void nfs_drop_nlink(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&inode->i_lock); +} + /* * Called when the dentry loses inode. * We use it to clean up silly-renamed files. @@ -869,10 +881,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - lock_kernel(); drop_nlink(inode); nfs_complete_unlink(dentry, inode); - unlock_kernel(); } iput(inode); } @@ -903,8 +913,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(-ENOMEM); dentry->d_op = NFS_PROTO(dir)->dentry_ops; - lock_kernel(); - /* * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. @@ -912,7 +920,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (nfs_is_exclusive_create(dir, nd)) { d_instantiate(dentry, NULL); res = NULL; - goto out_unlock; + goto out; } parent = dentry->d_parent; @@ -940,8 +948,6 @@ no_entry: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: nfs_unblock_sillyrename(parent); -out_unlock: - unlock_kernel(); out: return res; } @@ -999,9 +1005,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } /* Open the file on the server */ - lock_kernel(); res = nfs4_atomic_open(dir, dentry, nd); - unlock_kernel(); if (IS_ERR(res)) { error = PTR_ERR(res); switch (error) { @@ -1063,9 +1067,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - lock_kernel(); ret = nfs4_open_revalidate(dir, dentry, openflags, nd); - unlock_kernel(); out: dput(parent); if (!ret) @@ -1218,14 +1220,11 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; - lock_kernel(); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); if (error != 0) goto out_err; - unlock_kernel(); return 0; out_err: - unlock_kernel(); d_drop(dentry); return error; } @@ -1248,14 +1247,11 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - lock_kernel(); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); if (status != 0) goto out_err; - unlock_kernel(); return 0; out_err: - unlock_kernel(); d_drop(dentry); return status; } @@ -1274,15 +1270,12 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; - lock_kernel(); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); if (error != 0) goto out_err; - unlock_kernel(); return 0; out_err: d_drop(dentry); - unlock_kernel(); return error; } @@ -1299,14 +1292,12 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - lock_kernel(); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); else if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); - unlock_kernel(); return error; } @@ -1408,7 +1399,7 @@ static int nfs_safe_remove(struct dentry *dentry) error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) - drop_nlink(inode); + nfs_drop_nlink(inode); nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); @@ -1431,7 +1422,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - lock_kernel(); spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1) { @@ -1440,7 +1430,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); - unlock_kernel(); return error; } if (!d_unhashed(dentry)) { @@ -1454,7 +1443,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); - unlock_kernel(); return error; } @@ -1491,13 +1479,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym attr.ia_mode = S_IFLNK | S_IRWXUGO; attr.ia_valid = ATTR_MODE; - lock_kernel(); - page = alloc_page(GFP_HIGHUSER); - if (!page) { - unlock_kernel(); + if (!page) return -ENOMEM; - } kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, pathlen); @@ -1512,7 +1496,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym dentry->d_name.name, symname, error); d_drop(dentry); __free_page(page); - unlock_kernel(); return error; } @@ -1530,7 +1513,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym } else __free_page(page); - unlock_kernel(); return 0; } @@ -1544,14 +1526,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); - lock_kernel(); d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { atomic_inc(&inode->i_count); d_add(dentry, inode); } - unlock_kernel(); return error; } @@ -1591,7 +1571,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the rename, * we unhash the dentry and free the inode in advance. */ - lock_kernel(); if (!d_unhashed(new_dentry)) { d_drop(new_dentry); rehash = new_dentry; @@ -1635,7 +1614,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* dentry still busy? */ goto out; } else - drop_nlink(new_inode); + nfs_drop_nlink(new_inode); go_ahead: /* @@ -1669,7 +1648,6 @@ out: /* new dentry created? */ if (dentry) dput(dentry); - unlock_kernel(); return error; } @@ -1962,8 +1940,6 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) } force_lookup: - lock_kernel(); - if (!NFS_PROTO(inode)->access) goto out_notsup; @@ -1973,7 +1949,6 @@ force_lookup: put_rpccred(cred); } else res = PTR_ERR(cred); - unlock_kernel(); out: dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); @@ -1982,7 +1957,6 @@ out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask, NULL); - unlock_kernel(); goto out; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4757a2b326a1..08f6b040d289 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -890,7 +890,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - dprintk("nfs: direct read(%s/%s, %zd@%Ld)\n", + dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, (long long) pos); @@ -947,7 +947,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); - dfprintk(VFS, "nfs: direct write(%s/%s, %zd@%Ld)\n", + dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, (long long) pos); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 4e98a56a1777..78460657f5cb 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -50,7 +50,7 @@ static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); -static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); +static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); @@ -72,7 +72,7 @@ const struct file_operations nfs_file_operations = { .open = nfs_file_open, .flush = nfs_file_flush, .release = nfs_file_release, - .fsync = nfs_fsync, + .fsync = nfs_file_fsync, .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, @@ -119,25 +119,33 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; + dprintk("NFS: open file(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); + res = nfs_check_flags(filp->f_flags); if (res) return res; nfs_inc_stats(inode, NFSIOS_VFSOPEN); - lock_kernel(); - res = NFS_PROTO(inode)->file_open(inode, filp); - unlock_kernel(); + res = nfs_open(inode, filp); return res; } static int nfs_file_release(struct inode *inode, struct file *filp) { + struct dentry *dentry = filp->f_path.dentry; + + dprintk("NFS: release(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) - nfs_wb_all(filp->f_path.dentry->d_inode); + nfs_wb_all(dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); - return NFS_PROTO(inode)->file_release(inode, filp); + return nfs_release(inode, filp); } /** @@ -171,6 +179,12 @@ force_reval: static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { loff_t loff; + + dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + offset, origin); + /* origin == SEEK_END => we must revalidate the cached file length */ if (origin == SEEK_END) { struct inode *inode = filp->f_mapping->host; @@ -185,7 +199,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) } /* - * Helper for nfs_file_flush() and nfs_fsync() + * Helper for nfs_file_flush() and nfs_file_fsync() * * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to * disk, but it retrieves and clears ctx->error after synching, despite @@ -211,16 +225,18 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) /* * Flush all dirty pages, and check for write errors. - * */ static int nfs_file_flush(struct file *file, fl_owner_t id) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = file->f_path.dentry->d_inode; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; int status; - dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + dprintk("NFS: flush(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); if ((file->f_mode & FMODE_WRITE) == 0) return 0; @@ -245,7 +261,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos); - dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", + dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); @@ -265,7 +281,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, struct inode *inode = dentry->d_inode; ssize_t res; - dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", + dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); @@ -282,7 +298,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) struct inode *inode = dentry->d_inode; int status; - dfprintk(VFS, "nfs: mmap(%s/%s)\n", + dprintk("NFS: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); status = nfs_revalidate_mapping(inode, file->f_mapping); @@ -300,12 +316,14 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * whether any write errors occurred for this process. */ static int -nfs_fsync(struct file *file, struct dentry *dentry, int datasync) +nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + dprintk("NFS: fsync file(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); return nfs_do_fsync(ctx, inode); @@ -328,6 +346,11 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page *page; index = pos >> PAGE_CACHE_SHIFT; + dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); + page = __grab_cache_page(mapping, index); if (!page) return -ENOMEM; @@ -348,9 +371,32 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, unsigned offset = pos & (PAGE_CACHE_SIZE - 1); int status; - lock_kernel(); + dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); + + /* + * Zero any uninitialised parts of the page, and then mark the page + * as up to date if it turns out that we're extending the file. + */ + if (!PageUptodate(page)) { + unsigned pglen = nfs_page_length(page); + unsigned end = offset + len; + + if (pglen == 0) { + zero_user_segments(page, 0, offset, + end, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } else if (end >= pglen) { + zero_user_segment(page, end, PAGE_CACHE_SIZE); + if (offset == 0) + SetPageUptodate(page); + } else + zero_user_segment(page, pglen, PAGE_CACHE_SIZE); + } + status = nfs_updatepage(file, page, offset, copied); - unlock_kernel(); unlock_page(page); page_cache_release(page); @@ -362,6 +408,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_page(struct page *page, unsigned long offset) { + dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); + if (offset != 0) return; /* Cancel any unstarted writes on this page */ @@ -370,13 +418,20 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) static int nfs_release_page(struct page *page, gfp_t gfp) { + dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + /* If PagePrivate() is set, then the page is not freeable */ return 0; } static int nfs_launder_page(struct page *page) { - return nfs_wb_page(page->mapping->host, page); + struct inode *inode = page->mapping->host; + + dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", + inode->i_ino, (long long)page_offset(page)); + + return nfs_wb_page(inode, page); } const struct address_space_operations nfs_file_aops = { @@ -396,13 +451,19 @@ const struct address_space_operations nfs_file_aops = { static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct file *filp = vma->vm_file; + struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; int ret = -EINVAL; struct address_space *mapping; + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + filp->f_mapping->host->i_ino, + (long long)page_offset(page)); + lock_page(page); mapping = page->mapping; - if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) + if (mapping != dentry->d_inode->i_mapping) goto out_unlock; ret = 0; @@ -450,9 +511,9 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos); - dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", + dprintk("NFS: write(%s/%s, %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (long long) pos); + (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -586,7 +647,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - nfs_zap_caches(inode); + if (!nfs_have_delegation(inode, FMODE_READ)) + nfs_zap_caches(inode); out: return status; } @@ -596,23 +658,35 @@ out: */ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode * inode = filp->f_mapping->host; + struct inode *inode = filp->f_mapping->host; + int ret = -ENOLCK; - dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", - inode->i_sb->s_id, inode->i_ino, + dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) - return -ENOLCK; + goto out_err; + + if (NFS_PROTO(inode)->lock_check_bounds != NULL) { + ret = NFS_PROTO(inode)->lock_check_bounds(fl); + if (ret < 0) + goto out_err; + } if (IS_GETLK(cmd)) - return do_getlk(filp, cmd, fl); - if (fl->fl_type == F_UNLCK) - return do_unlk(filp, cmd, fl); - return do_setlk(filp, cmd, fl); + ret = do_getlk(filp, cmd, fl); + else if (fl->fl_type == F_UNLCK) + ret = do_unlk(filp, cmd, fl); + else + ret = do_setlk(filp, cmd, fl); +out_err: + return ret; } /* @@ -620,9 +694,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) */ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { - dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", - filp->f_path.dentry->d_inode->i_sb->s_id, - filp->f_path.dentry->d_inode->i_ino, + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, fl->fl_type, fl->fl_flags); /* @@ -645,12 +719,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_setlk(filp, cmd, fl); } +/* + * There is no protocol support for leases, so we have no way to implement + * them correctly in the face of opens by other clients. + */ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { - /* - * There is no protocol support for leases, so we have no way - * to implement them correctly in the face of opens by other - * clients. - */ + dprintk("NFS: setlease(%s/%s, arg=%ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, arg); + return -EINVAL; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 596c5d8e86f4..df23f987da6b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); -static void nfs_zap_acl_cache(struct inode *); - static struct kmem_cache * nfs_inode_cachep; static inline unsigned long @@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) } } -static void nfs_zap_acl_cache(struct inode *inode) +void nfs_zap_acl_cache(struct inode *inode) { void (*clear_acl_cache)(struct inode *); @@ -347,7 +345,7 @@ out_no_inode: goto out; } -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) int nfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -369,10 +367,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; - if (attr->ia_valid == 0) + if ((attr->ia_valid & ~ATTR_FILE) == 0) return 0; - lock_kernel(); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { filemap_write_and_wait(inode->i_mapping); @@ -386,10 +383,65 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); - unlock_kernel(); return error; } +/** + * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * This is a copy of the common vmtruncate, but with the locking + * corrected to take into account the fact that NFS requires + * inode->i_size to be updated under the inode->i_lock. + */ +static int nfs_vmtruncate(struct inode * inode, loff_t offset) +{ + if (i_size_read(inode) < offset) { + unsigned long limit; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out_big; + spin_lock(&inode->i_lock); + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + } else { + struct address_space *mapping = inode->i_mapping; + + /* + * truncation of in-use swapfiles is disallowed - it would + * cause subsequent swapout to scribble on the now-freed + * blocks. + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + spin_lock(&inode->i_lock); + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + + /* + * unmap_mapping_range is called twice, first simply for + * efficiency so that truncate_inode_pages does fewer + * single-page unmaps. However after this first call, and + * before truncate_inode_pages finishes, it is possible for + * private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second + * unmap_mapping_range call must be made for correctness. + */ + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, offset); + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + } + return 0; +out_sig: + send_sig(SIGXFSZ, current, 0); +out_big: + return -EFBIG; +} + /** * nfs_setattr_update_inode - Update inode metadata after a setattr call. * @inode: pointer to struct inode @@ -416,8 +468,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } if ((attr->ia_valid & ATTR_SIZE) != 0) { nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); - inode->i_size = attr->ia_size; - vmtruncate(inode, attr->ia_size); + nfs_vmtruncate(inode, attr->ia_size); } } @@ -647,7 +698,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - lock_kernel(); if (is_bad_inode(inode)) goto out_nowait; if (NFS_STALE(inode)) @@ -696,7 +746,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) nfs_wake_up_inode(inode); out_nowait: - unlock_kernel(); return status; } @@ -831,9 +880,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; } - if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) && + if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && nfsi->npages == 0) - inode->i_size = nfs_size_to_loff_t(fattr->size); + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } } @@ -974,7 +1023,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa (fattr->valid & NFS_ATTR_WCC) == 0) { memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); - fattr->pre_size = inode->i_size; + fattr->pre_size = i_size_read(inode); fattr->valid |= NFS_ATTR_WCC; } return nfs_post_op_update_inode(inode, fattr); @@ -1059,7 +1108,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (nfsi->npages == 0 || new_isize > cur_isize) { - inode->i_size = new_isize; + i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } dprintk("NFS: isize change on server for file %s/%ld\n", diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 04ae867dddba..24241fcbb98d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -150,6 +150,7 @@ extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); #endif +void nfs_zap_acl_cache(struct inode *inode); /* super.c */ extern struct file_system_type nfs_xdev_fs_type; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 6350ecbde589..a36952810032 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -5,135 +5,41 @@ * * Copyright (C) 2005, 2006 Chuck Lever * - * NFS client per-mount statistics provide information about the health of - * the NFS client and the health of each NFS mount point. Generally these - * are not for detailed problem diagnosis, but simply to indicate that there - * is a problem. - * - * These counters are not meant to be human-readable, but are meant to be - * integrated into system monitoring tools such as "sar" and "iostat". As - * such, the counters are sampled by the tools over time, and are never - * zeroed after a file system is mounted. Moving averages can be computed - * by the tools by taking the difference between two instantaneous samples - * and dividing that by the time between the samples. */ #ifndef _NFS_IOSTAT #define _NFS_IOSTAT -#define NFS_IOSTAT_VERS "1.0" - -/* - * NFS byte counters - * - * 1. SERVER - the number of payload bytes read from or written to the - * server by the NFS client via an NFS READ or WRITE request. - * - * 2. NORMAL - the number of bytes read or written by applications via - * the read(2) and write(2) system call interfaces. - * - * 3. DIRECT - the number of bytes read or written from files opened - * with the O_DIRECT flag. - * - * These counters give a view of the data throughput into and out of the NFS - * client. Comparing the number of bytes requested by an application with the - * number of bytes the client requests from the server can provide an - * indication of client efficiency (per-op, cache hits, etc). - * - * These counters can also help characterize which access methods are in - * use. DIRECT by itself shows whether there is any O_DIRECT traffic. - * NORMAL + DIRECT shows how much data is going through the system call - * interface. A large amount of SERVER traffic without much NORMAL or - * DIRECT traffic shows that applications are using mapped files. - * - * NFS page counters - * - * These count the number of pages read or written via nfs_readpage(), - * nfs_readpages(), or their write equivalents. - */ -enum nfs_stat_bytecounters { - NFSIOS_NORMALREADBYTES = 0, - NFSIOS_NORMALWRITTENBYTES, - NFSIOS_DIRECTREADBYTES, - NFSIOS_DIRECTWRITTENBYTES, - NFSIOS_SERVERREADBYTES, - NFSIOS_SERVERWRITTENBYTES, - NFSIOS_READPAGES, - NFSIOS_WRITEPAGES, - __NFSIOS_BYTESMAX, -}; - -/* - * NFS event counters - * - * These counters provide a low-overhead way of monitoring client activity - * without enabling NFS trace debugging. The counters show the rate at - * which VFS requests are made, and how often the client invalidates its - * data and attribute caches. This allows system administrators to monitor - * such things as how close-to-open is working, and answer questions such - * as "why are there so many GETATTR requests on the wire?" - * - * They also count anamolous events such as short reads and writes, silly - * renames due to close-after-delete, and operations that change the size - * of a file (such operations can often be the source of data corruption - * if applications aren't using file locking properly). - */ -enum nfs_stat_eventcounters { - NFSIOS_INODEREVALIDATE = 0, - NFSIOS_DENTRYREVALIDATE, - NFSIOS_DATAINVALIDATE, - NFSIOS_ATTRINVALIDATE, - NFSIOS_VFSOPEN, - NFSIOS_VFSLOOKUP, - NFSIOS_VFSACCESS, - NFSIOS_VFSUPDATEPAGE, - NFSIOS_VFSREADPAGE, - NFSIOS_VFSREADPAGES, - NFSIOS_VFSWRITEPAGE, - NFSIOS_VFSWRITEPAGES, - NFSIOS_VFSGETDENTS, - NFSIOS_VFSSETATTR, - NFSIOS_VFSFLUSH, - NFSIOS_VFSFSYNC, - NFSIOS_VFSLOCK, - NFSIOS_VFSRELEASE, - NFSIOS_CONGESTIONWAIT, - NFSIOS_SETATTRTRUNC, - NFSIOS_EXTENDWRITE, - NFSIOS_SILLYRENAME, - NFSIOS_SHORTREAD, - NFSIOS_SHORTWRITE, - NFSIOS_DELAY, - __NFSIOS_COUNTSMAX, -}; - -#ifdef __KERNEL__ - #include #include +#include struct nfs_iostats { unsigned long long bytes[__NFSIOS_BYTESMAX]; unsigned long events[__NFSIOS_COUNTSMAX]; } ____cacheline_aligned; -static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_server_stats(const struct nfs_server *server, + enum nfs_stat_eventcounters stat) { struct nfs_iostats *iostats; int cpu; cpu = get_cpu(); iostats = per_cpu_ptr(server->io_stats, cpu); - iostats->events[stat] ++; + iostats->events[stat]++; put_cpu_no_resched(); } -static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_stats(const struct inode *inode, + enum nfs_stat_eventcounters stat) { nfs_inc_server_stats(NFS_SERVER(inode), stat); } -static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_add_server_stats(const struct nfs_server *server, + enum nfs_stat_bytecounters stat, + unsigned long addend) { struct nfs_iostats *iostats; int cpu; @@ -144,7 +50,9 @@ static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat put_cpu_no_resched(); } -static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_add_stats(const struct inode *inode, + enum nfs_stat_bytecounters stat, + unsigned long addend) { nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } @@ -160,5 +68,4 @@ static inline void nfs_free_iostats(struct nfs_iostats *stats) free_percpu(stats); } -#endif -#endif +#endif /* _NFS_IOSTAT */ diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9b7362565c0c..423842f51ac9 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -5,6 +5,8 @@ #include #include +#include "internal.h" + #define NFSDBG_FACILITY NFSDBG_PROC ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) @@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) status = nfs_revalidate_inode(server, inode); if (status < 0) return ERR_PTR(status); + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); acl = nfs3_get_cached_acl(inode, type); if (acl != ERR_PTR(-EAGAIN)) return acl; @@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, dprintk("NFS call setacl\n"); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; status = rpc_call_sync(server->client_acl, &msg, 0); - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; - spin_unlock(&inode->i_lock); + nfs_access_zap_cache(inode); + nfs_zap_acl_cache(inode); dprintk("NFS reply setacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c3523ad03ed1..1e750e4574a9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -129,6 +129,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, int status; dprintk("NFS call setattr\n"); + if (sattr->ia_valid & ATTR_FILE) + msg.rpc_cred = nfs_file_cred(sattr->ia_file); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) @@ -248,6 +250,53 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, return status; } +struct nfs3_createdata { + struct rpc_message msg; + union { + struct nfs3_createargs create; + struct nfs3_mkdirargs mkdir; + struct nfs3_symlinkargs symlink; + struct nfs3_mknodargs mknod; + } arg; + struct nfs3_diropres res; + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs_fattr dir_attr; +}; + +static struct nfs3_createdata *nfs3_alloc_createdata(void) +{ + struct nfs3_createdata *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data != NULL) { + data->msg.rpc_argp = &data->arg; + data->msg.rpc_resp = &data->res; + data->res.fh = &data->fh; + data->res.fattr = &data->fattr; + data->res.dir_attr = &data->dir_attr; + nfs_fattr_init(data->res.fattr); + nfs_fattr_init(data->res.dir_attr); + } + return data; +} + +static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) +{ + int status; + + status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + nfs_post_op_update_inode(dir, data->res.dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + return status; +} + +static void nfs3_free_createdata(struct nfs3_createdata *data) +{ + kfree(data); +} + /* * Create a regular file. * For now, we don't implement O_EXCL. @@ -256,70 +305,60 @@ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { - struct nfs_fh fhandle; - struct nfs_fattr fattr; - struct nfs_fattr dir_attr; - struct nfs3_createargs arg = { - .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len, - .sattr = sattr, - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fhandle, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs3_createdata *data; mode_t mode = sattr->ia_mode; - int status; + int status = -ENOMEM; dprintk("NFS call create %s\n", dentry->d_name.name); - arg.createmode = NFS3_CREATE_UNCHECKED; + + data = nfs3_alloc_createdata(); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE]; + data->arg.create.fh = NFS_FH(dir); + data->arg.create.name = dentry->d_name.name; + data->arg.create.len = dentry->d_name.len; + data->arg.create.sattr = sattr; + + data->arg.create.createmode = NFS3_CREATE_UNCHECKED; if (flags & O_EXCL) { - arg.createmode = NFS3_CREATE_EXCLUSIVE; - arg.verifier[0] = jiffies; - arg.verifier[1] = current->pid; + data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE; + data->arg.create.verifier[0] = jiffies; + data->arg.create.verifier[1] = current->pid; } sattr->ia_mode &= ~current->fs->umask; -again: - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_refresh_inode(dir, &dir_attr); + for (;;) { + status = nfs3_do_create(dir, dentry, data); - /* If the server doesn't support the exclusive creation semantics, - * try again with simple 'guarded' mode. */ - if (status == -ENOTSUPP) { - switch (arg.createmode) { + if (status != -ENOTSUPP) + break; + /* If the server doesn't support the exclusive creation + * semantics, try again with simple 'guarded' mode. */ + switch (data->arg.create.createmode) { case NFS3_CREATE_EXCLUSIVE: - arg.createmode = NFS3_CREATE_GUARDED; + data->arg.create.createmode = NFS3_CREATE_GUARDED; break; case NFS3_CREATE_GUARDED: - arg.createmode = NFS3_CREATE_UNCHECKED; + data->arg.create.createmode = NFS3_CREATE_UNCHECKED; break; case NFS3_CREATE_UNCHECKED: goto out; } - goto again; + nfs_fattr_init(data->res.dir_attr); + nfs_fattr_init(data->res.fattr); } - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); if (status != 0) goto out; /* When we created the file with exclusive semantics, make * sure we set the attributes afterwards. */ - if (arg.createmode == NFS3_CREATE_EXCLUSIVE) { + if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) { dprintk("NFS call setattr (post-create)\n"); if (!(sattr->ia_valid & ATTR_ATIME_SET)) @@ -330,14 +369,15 @@ again: /* Note: we could use a guarded setattr here, but I'm * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ - status = nfs3_proc_setattr(dentry, &fattr, sattr); - nfs_post_op_update_inode(dentry->d_inode, &fattr); + status = nfs3_proc_setattr(dentry, data->res.fattr, sattr); + nfs_post_op_update_inode(dentry->d_inode, data->res.fattr); dprintk("NFS reply setattr (post-create): %d\n", status); + if (status != 0) + goto out; } - if (status != 0) - goto out; status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: + nfs3_free_createdata(data); dprintk("NFS reply create: %d\n", status); return status; } @@ -452,40 +492,28 @@ static int nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, unsigned int len, struct iattr *sattr) { - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_attr; - struct nfs3_symlinkargs arg = { - .fromfh = NFS_FH(dir), - .fromname = dentry->d_name.name, - .fromlen = dentry->d_name.len, - .pages = &page, - .pathlen = len, - .sattr = sattr - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fhandle, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; + struct nfs3_createdata *data; + int status = -ENOMEM; if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; dprintk("NFS call symlink %s\n", dentry->d_name.name); - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_post_op_update_inode(dir, &dir_attr); - if (status != 0) + data = nfs3_alloc_createdata(); + if (data == NULL) goto out; - status = nfs_instantiate(dentry, &fhandle, &fattr); + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK]; + data->arg.symlink.fromfh = NFS_FH(dir); + data->arg.symlink.fromname = dentry->d_name.name; + data->arg.symlink.fromlen = dentry->d_name.len; + data->arg.symlink.pages = &page; + data->arg.symlink.pathlen = len; + data->arg.symlink.sattr = sattr; + + status = nfs3_do_create(dir, dentry, data); + + nfs3_free_createdata(data); out: dprintk("NFS reply symlink: %d\n", status); return status; @@ -494,42 +522,31 @@ out: static int nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_attr; - struct nfs3_mkdirargs arg = { - .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len, - .sattr = sattr - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fhandle, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs3_createdata *data; int mode = sattr->ia_mode; - int status; + int status = -ENOMEM; dprintk("NFS call mkdir %s\n", dentry->d_name.name); sattr->ia_mode &= ~current->fs->umask; - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_post_op_update_inode(dir, &dir_attr); - if (status != 0) + data = nfs3_alloc_createdata(); + if (data == NULL) goto out; - status = nfs_instantiate(dentry, &fhandle, &fattr); + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; + data->arg.mkdir.fh = NFS_FH(dir); + data->arg.mkdir.name = dentry->d_name.name; + data->arg.mkdir.len = dentry->d_name.len; + data->arg.mkdir.sattr = sattr; + + status = nfs3_do_create(dir, dentry, data); if (status != 0) goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: + nfs3_free_createdata(data); dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -615,52 +632,50 @@ static int nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { - struct nfs_fh fh; - struct nfs_fattr fattr, dir_attr; - struct nfs3_mknodargs arg = { - .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len, - .sattr = sattr, - .rdev = rdev - }; - struct nfs3_diropres res = { - .dir_attr = &dir_attr, - .fh = &fh, - .fattr = &fattr - }; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs3_createdata *data; mode_t mode = sattr->ia_mode; - int status; - - switch (sattr->ia_mode & S_IFMT) { - case S_IFBLK: arg.type = NF3BLK; break; - case S_IFCHR: arg.type = NF3CHR; break; - case S_IFIFO: arg.type = NF3FIFO; break; - case S_IFSOCK: arg.type = NF3SOCK; break; - default: return -EINVAL; - } + int status = -ENOMEM; dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); sattr->ia_mode &= ~current->fs->umask; - nfs_fattr_init(&dir_attr); - nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_post_op_update_inode(dir, &dir_attr); - if (status != 0) + data = nfs3_alloc_createdata(); + if (data == NULL) goto out; - status = nfs_instantiate(dentry, &fh, &fattr); + + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD]; + data->arg.mknod.fh = NFS_FH(dir); + data->arg.mknod.name = dentry->d_name.name; + data->arg.mknod.len = dentry->d_name.len; + data->arg.mknod.sattr = sattr; + data->arg.mknod.rdev = rdev; + + switch (sattr->ia_mode & S_IFMT) { + case S_IFBLK: + data->arg.mknod.type = NF3BLK; + break; + case S_IFCHR: + data->arg.mknod.type = NF3CHR; + break; + case S_IFIFO: + data->arg.mknod.type = NF3FIFO; + break; + case S_IFSOCK: + data->arg.mknod.type = NF3SOCK; + break; + default: + status = -EINVAL; + goto out; + } + + status = nfs3_do_create(dir, dentry, data); if (status != 0) goto out; status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: + nfs3_free_createdata(data); dprintk("NFS reply mknod: %d\n", status); return status; } @@ -801,8 +816,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_done = nfs3_commit_done, - .file_open = nfs_open, - .file_release = nfs_release, .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1293e0acd82b..c910413eaeca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -451,9 +451,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); - lock_kernel(); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); - unlock_kernel(); if (ret != 0) goto out; ret = -EAGAIN; @@ -1139,8 +1137,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int return res; } -static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, - struct iattr *sattr, struct nfs4_state *state) +static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, + struct nfs_fattr *fattr, struct iattr *sattr, + struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_setattrargs arg = { @@ -1154,9 +1153,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, .server = server, }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &arg, - .rpc_resp = &res, + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = cred, }; unsigned long timestamp = jiffies; int status; @@ -1166,7 +1166,6 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { /* Use that stateid */ } else if (state != NULL) { - msg.rpc_cred = state->owner->so_cred; nfs4_copy_stateid(&arg.stateid, state, current->files); } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); @@ -1177,15 +1176,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, return status; } -static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, - struct iattr *sattr, struct nfs4_state *state) +static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, + struct nfs_fattr *fattr, struct iattr *sattr, + struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_do_setattr(inode, fattr, sattr, state), + _nfs4_do_setattr(inode, cred, fattr, sattr, state), &exception); } while (exception.retry); return err; @@ -1647,29 +1647,25 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct rpc_cred *cred; struct inode *inode = dentry->d_inode; - struct nfs_open_context *ctx; + struct rpc_cred *cred = NULL; struct nfs4_state *state = NULL; int status; nfs_fattr_init(fattr); - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return PTR_ERR(cred); - /* Search for an existing open(O_WRITE) file */ - ctx = nfs_find_open_context(inode, cred, FMODE_WRITE); - if (ctx != NULL) + if (sattr->ia_valid & ATTR_FILE) { + struct nfs_open_context *ctx; + + ctx = nfs_file_open_context(sattr->ia_file); + cred = ctx->cred; state = ctx->state; + } - status = nfs4_do_setattr(inode, fattr, sattr, state); + status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); - if (ctx != NULL) - put_nfs_open_context(ctx); - put_rpccred(cred); return status; } @@ -1897,17 +1893,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, goto out; } state = nfs4_do_open(dir, &path, flags, sattr, cred); - put_rpccred(cred); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); - goto out; + goto out_putcred; } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (flags & O_EXCL) { struct nfs_fattr fattr; - status = nfs4_do_setattr(state->inode, &fattr, sattr, state); + status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); nfs_post_op_update_inode(state->inode, &fattr); @@ -1916,6 +1911,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = nfs4_intent_set_file(nd, &path, state); else nfs4_close_sync(&path, state, flags); +out_putcred: + put_rpccred(cred); out: return status; } @@ -2079,47 +2076,81 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n return err; } +struct nfs4_createdata { + struct rpc_message msg; + struct nfs4_create_arg arg; + struct nfs4_create_res res; + struct nfs_fh fh; + struct nfs_fattr fattr; + struct nfs_fattr dir_fattr; +}; + +static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, + struct qstr *name, struct iattr *sattr, u32 ftype) +{ + struct nfs4_createdata *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data != NULL) { + struct nfs_server *server = NFS_SERVER(dir); + + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; + data->msg.rpc_argp = &data->arg; + data->msg.rpc_resp = &data->res; + data->arg.dir_fh = NFS_FH(dir); + data->arg.server = server; + data->arg.name = name; + data->arg.attrs = sattr; + data->arg.ftype = ftype; + data->arg.bitmask = server->attr_bitmask; + data->res.server = server; + data->res.fh = &data->fh; + data->res.fattr = &data->fattr; + data->res.dir_fattr = &data->dir_fattr; + nfs_fattr_init(data->res.fattr); + nfs_fattr_init(data->res.dir_fattr); + } + return data; +} + +static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) +{ + int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + if (status == 0) { + update_changeattr(dir, &data->res.dir_cinfo); + nfs_post_op_update_inode(dir, data->res.dir_fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + } + return status; +} + +static void nfs4_free_createdata(struct nfs4_createdata *data) +{ + kfree(data); +} + static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, unsigned int len, struct iattr *sattr) { - struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_fattr; - struct nfs4_create_arg arg = { - .dir_fh = NFS_FH(dir), - .server = server, - .name = &dentry->d_name, - .attrs = sattr, - .ftype = NF4LNK, - .bitmask = server->attr_bitmask, - }; - struct nfs4_create_res res = { - .server = server, - .fh = &fhandle, - .fattr = &fattr, - .dir_fattr = &dir_fattr, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; + struct nfs4_createdata *data; + int status = -ENAMETOOLONG; if (len > NFS4_MAXPATHLEN) - return -ENAMETOOLONG; + goto out; - arg.u.symlink.pages = &page; - arg.u.symlink.len = len; - nfs_fattr_init(&fattr); - nfs_fattr_init(&dir_fattr); + status = -ENOMEM; + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4LNK); + if (data == NULL) + goto out; + + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; + data->arg.u.symlink.pages = &page; + data->arg.u.symlink.len = len; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) { - update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); - status = nfs_instantiate(dentry, &fhandle, &fattr); - } + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: return status; } @@ -2140,39 +2171,17 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { - struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fh fhandle; - struct nfs_fattr fattr, dir_fattr; - struct nfs4_create_arg arg = { - .dir_fh = NFS_FH(dir), - .server = server, - .name = &dentry->d_name, - .attrs = sattr, - .ftype = NF4DIR, - .bitmask = server->attr_bitmask, - }; - struct nfs4_create_res res = { - .server = server, - .fh = &fhandle, - .fattr = &fattr, - .dir_fattr = &dir_fattr, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; + struct nfs4_createdata *data; + int status = -ENOMEM; - nfs_fattr_init(&fattr); - nfs_fattr_init(&dir_fattr); - - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) { - update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); - status = nfs_instantiate(dentry, &fhandle, &fattr); - } + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); + if (data == NULL) + goto out; + + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: return status; } @@ -2242,56 +2251,34 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { - struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fh fh; - struct nfs_fattr fattr, dir_fattr; - struct nfs4_create_arg arg = { - .dir_fh = NFS_FH(dir), - .server = server, - .name = &dentry->d_name, - .attrs = sattr, - .bitmask = server->attr_bitmask, - }; - struct nfs4_create_res res = { - .server = server, - .fh = &fh, - .fattr = &fattr, - .dir_fattr = &dir_fattr, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status; - int mode = sattr->ia_mode; - - nfs_fattr_init(&fattr); - nfs_fattr_init(&dir_fattr); + struct nfs4_createdata *data; + int mode = sattr->ia_mode; + int status = -ENOMEM; BUG_ON(!(sattr->ia_valid & ATTR_MODE)); BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); + + data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); + if (data == NULL) + goto out; + if (S_ISFIFO(mode)) - arg.ftype = NF4FIFO; + data->arg.ftype = NF4FIFO; else if (S_ISBLK(mode)) { - arg.ftype = NF4BLK; - arg.u.device.specdata1 = MAJOR(rdev); - arg.u.device.specdata2 = MINOR(rdev); + data->arg.ftype = NF4BLK; + data->arg.u.device.specdata1 = MAJOR(rdev); + data->arg.u.device.specdata2 = MINOR(rdev); } else if (S_ISCHR(mode)) { - arg.ftype = NF4CHR; - arg.u.device.specdata1 = MAJOR(rdev); - arg.u.device.specdata2 = MINOR(rdev); + data->arg.ftype = NF4CHR; + data->arg.u.device.specdata1 = MAJOR(rdev); + data->arg.u.device.specdata2 = MINOR(rdev); } - else - arg.ftype = NF4SOCK; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (status == 0) { - update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); - status = nfs_instantiate(dentry, &fh, &fattr); - } + status = nfs4_do_create(dir, dentry, data); + + nfs4_free_createdata(data); +out: return status; } @@ -2706,6 +2693,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) ret = nfs_revalidate_inode(server, inode); if (ret < 0) return ret; + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) return ret; @@ -2733,7 +2722,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - nfs_zap_caches(inode); + nfs_access_zap_cache(inode); + nfs_zap_acl_cache(inode); return ret; } @@ -2767,8 +2757,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) task->tk_status = 0; return -EAGAIN; case -NFS4ERR_DELAY: - nfs_inc_server_stats((struct nfs_server *) server, - NFSIOS_DELAY); + nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; @@ -2933,7 +2922,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { - long timeout; + long timeout = 0; int err; do { err = _nfs4_proc_setclientid_confirm(clp, cred); @@ -3725,8 +3714,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_done = nfs4_commit_done, - .file_open = nfs_open, - .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 856a8934f610..401ef8b28f97 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -940,7 +940,6 @@ static int reclaimer(void *ptr) allow_signal(SIGKILL); /* Ensure exclusive access to NFSv4 state */ - lock_kernel(); down_write(&clp->cl_sem); /* Are there any NFS mounts out there? */ if (list_empty(&clp->cl_superblocks)) @@ -1000,7 +999,6 @@ restart_loop: nfs_delegation_reap_unclaimed(clp); out: up_write(&clp->cl_sem); - unlock_kernel(); if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 531379d36823..46763d1cd397 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -1,6 +1,4 @@ /* - * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $ - * * Copyright (C) 1995, 1996 Gero Kuhlmann * * Allow an NFS filesystem to be mounted as root. The way this works is: @@ -297,10 +295,10 @@ static int __init root_nfs_name(char *name) nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; - nfs_data.acregmin = 3; - nfs_data.acregmax = 60; - nfs_data.acdirmin = 30; - nfs_data.acdirmax = 60; + nfs_data.acregmin = NFS_DEF_ACREGMIN; + nfs_data.acregmax = NFS_DEF_ACREGMAX; + nfs_data.acdirmin = NFS_DEF_ACDIRMIN; + nfs_data.acdirmax = NFS_DEF_ACDIRMAX; strcpy(buf, NFS_ROOT); /* Process options received from the remote server */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 03599bfe81cf..4dbb84df1b68 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -129,6 +129,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, sattr->ia_mode &= S_IALLUGO; dprintk("NFS call setattr\n"); + if (sattr->ia_valid & ATTR_FILE) + msg.rpc_cred = nfs_file_cred(sattr->ia_file); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) @@ -598,6 +600,29 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } +/* Helper functions for NFS lock bounds checking */ +#define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL) +static int nfs_lock_check_bounds(const struct file_lock *fl) +{ + __s32 start, end; + + start = (__s32)fl->fl_start; + if ((loff_t)start != fl->fl_start) + goto out_einval; + + if (fl->fl_end != OFFSET_MAX) { + end = (__s32)fl->fl_end; + if ((loff_t)end != fl->fl_end) + goto out_einval; + } else + end = NFS_LOCK32_OFFSET_MAX; + + if (start < 0 || start > end) + goto out_einval; + return 0; +out_einval: + return -EINVAL; +} const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ @@ -630,7 +655,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .write_setup = nfs_proc_write_setup, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, - .file_open = nfs_open, - .file_release = nfs_release, .lock = nfs_proc_lock, + .lock_check_bounds = nfs_lock_check_bounds, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 614efeed5437..1b94e3650f5c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,6 @@ enum { /* Mount options that take no arguments */ Opt_soft, Opt_hard, - Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, @@ -92,8 +92,8 @@ enum { Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, - /* Mount options that are ignored */ - Opt_userspace, Opt_deprecated, + /* Special mount options */ + Opt_userspace, Opt_deprecated, Opt_sloppy, Opt_err }; @@ -101,10 +101,14 @@ enum { static match_table_t nfs_mount_option_tokens = { { Opt_userspace, "bg" }, { Opt_userspace, "fg" }, + { Opt_userspace, "retry=%s" }, + + { Opt_sloppy, "sloppy" }, + { Opt_soft, "soft" }, { Opt_hard, "hard" }, - { Opt_intr, "intr" }, - { Opt_nointr, "nointr" }, + { Opt_deprecated, "intr" }, + { Opt_deprecated, "nointr" }, { Opt_posix, "posix" }, { Opt_noposix, "noposix" }, { Opt_cto, "cto" }, @@ -136,7 +140,6 @@ static match_table_t nfs_mount_option_tokens = { { Opt_acdirmin, "acdirmin=%u" }, { Opt_acdirmax, "acdirmax=%u" }, { Opt_actimeo, "actimeo=%u" }, - { Opt_userspace, "retry=%u" }, { Opt_namelen, "namlen=%u" }, { Opt_mountport, "mountport=%u" }, { Opt_mountvers, "mountvers=%u" }, @@ -207,6 +210,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); static void nfs_put_super(struct super_block *); +static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); static struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, @@ -234,6 +238,7 @@ static const struct super_operations nfs_sops = { .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, }; #ifdef CONFIG_NFS_V4 @@ -278,6 +283,7 @@ static const struct super_operations nfs4_sops = { .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, }; #endif @@ -368,8 +374,6 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) }; int error; - lock_kernel(); - error = server->nfs_client->rpc_ops->statfs(server, fh, &res); if (error < 0) goto out_err; @@ -401,12 +405,10 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = server->namelen; - unlock_kernel(); return 0; out_err: dprintk("%s: statfs error = %d\n", __func__, -error); - unlock_kernel(); return error; } @@ -514,13 +516,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->bsize != 0) seq_printf(m, ",bsize=%u", nfss->bsize); seq_printf(m, ",namlen=%u", nfss->namelen); - if (nfss->acregmin != 3*HZ || showdefaults) + if (nfss->acregmin != NFS_DEF_ACREGMIN*HZ || showdefaults) seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ || showdefaults) + if (nfss->acregmax != NFS_DEF_ACREGMAX*HZ || showdefaults) seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ || showdefaults) + if (nfss->acdirmin != NFS_DEF_ACDIRMIN*HZ || showdefaults) seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ || showdefaults) + if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults) seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) @@ -702,49 +704,233 @@ static int nfs_verify_server_address(struct sockaddr *addr) return 0; } +static void nfs_parse_ipv4_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; + + if (str_len <= INET_ADDRSTRLEN) { + dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", + (int)str_len, string); + + sin->sin_family = AF_INET; + *addr_len = sizeof(*sin); + if (in4_pton(string, str_len, addr, '\0', NULL)) + return; + } + + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} + +#define IPV6_SCOPE_DELIMITER '%' + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, + const char *delim, + struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) + return ; + if (*delim != IPV6_SCOPE_DELIMITER) + return; + + len = (string + str_len) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + /* scope_id is set to zero on error */ + strict_strtoul(p, 10, &scope_id); + } + + kfree(p); + sin6->sin6_scope_id = scope_id; + dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); + } +} + +static void nfs_parse_ipv6_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; + + if (str_len <= INET6_ADDRSTRLEN) { + dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", + (int)str_len, string); + + sin6->sin6_family = AF_INET6; + *addr_len = sizeof(*sin6); + if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { + nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); + return; + } + } + + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} +#else +static void nfs_parse_ipv6_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} +#endif + /* - * Parse string addresses passed in via a mount option, - * and construct a sockaddr based on the result. + * Construct a sockaddr based on the contents of a string that contains + * an IP address in presentation format. * - * If address parsing fails, set the sockaddr's address - * family to AF_UNSPEC to force nfs_verify_server_address() - * to punt the mount. + * If there is a problem constructing the new sockaddr, set the address + * family to AF_UNSPEC. */ -static void nfs_parse_server_address(char *value, - struct sockaddr *sap, - size_t *len) +static void nfs_parse_ip_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) { - if (strchr(value, ':')) { - struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; - u8 *addr = (u8 *)&ap->sin6_addr.in6_u; + unsigned int i, colons; - ap->sin6_family = AF_INET6; - *len = sizeof(*ap); - if (in6_pton(value, -1, addr, '\0', NULL)) - return; - } else { - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - u8 *addr = (u8 *)&ap->sin_addr.s_addr; + colons = 0; + for (i = 0; i < str_len; i++) + if (string[i] == ':') + colons++; + + if (colons >= 2) + nfs_parse_ipv6_address(string, str_len, sap, addr_len); + else + nfs_parse_ipv4_address(string, str_len, sap, addr_len); +} + +/* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + break; + default: + mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) +{ + nfs_validate_transport_protocol(mnt); - ap->sin_family = AF_INET; - *len = sizeof(*ap); - if (in4_pton(value, -1, addr, '\0', NULL)) + if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || + mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) return; + switch (mnt->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; } +} - sap->sa_family = AF_UNSPEC; - *len = 0; +/* + * Parse the value of the 'sec=' option. + * + * The flavor_len setting is for v4 mounts. + */ +static int nfs_parse_security_flavors(char *value, + struct nfs_parsed_mount_data *mnt) +{ + substring_t args[MAX_OPT_ARGS]; + + dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); + + switch (match_token(value, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + mnt->auth_flavor_len = 0; + mnt->auth_flavors[0] = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + mnt->auth_flavor_len = 1; + mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; + break; + default: + return 0; + } + + return 1; +} + +static void nfs_parse_invalid_value(const char *option) +{ + dfprintk(MOUNT, "NFS: bad value specified for %s option\n", option); } /* * Error-check and convert a string of mount options from user space into - * a data structure + * a data structure. The whole mount string is processed; bad options are + * skipped as they are encountered. If there were no errors, return 1; + * otherwise return 0 (zero). */ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; - int rc; + int rc, sloppy = 0, errors = 0; if (!raw) { dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -777,15 +963,16 @@ static int nfs_parse_mount_options(char *raw, token = match_token(p, nfs_mount_option_tokens, args); switch (token) { + + /* + * boolean options: foo/nofoo + */ case Opt_soft: mnt->flags |= NFS_MOUNT_SOFT; break; case Opt_hard: mnt->flags &= ~NFS_MOUNT_SOFT; break; - case Opt_intr: - case Opt_nointr: - break; case Opt_posix: mnt->flags |= NFS_MOUNT_POSIX; break; @@ -819,20 +1006,14 @@ static int nfs_parse_mount_options(char *raw, case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - mnt->timeo = 7; - mnt->retrans = 5; break; case Opt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - mnt->timeo = 600; - mnt->retrans = 2; break; case Opt_rdma: mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - mnt->timeo = 600; - mnt->retrans = 2; break; case Opt_acl: mnt->flags &= ~NFS_MOUNT_NOACL; @@ -853,165 +1034,144 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_UNSHARED; break; + /* + * options that take numeric values + */ case Opt_port: - if (match_int(args, &option)) - return 0; - if (option < 0 || option > 65535) - return 0; - mnt->nfs_server.port = option; + if (match_int(args, &option) || + option < 0 || option > USHORT_MAX) { + errors++; + nfs_parse_invalid_value("port"); + } else + mnt->nfs_server.port = option; break; case Opt_rsize: - if (match_int(args, &mnt->rsize)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("rsize"); + } else + mnt->rsize = option; break; case Opt_wsize: - if (match_int(args, &mnt->wsize)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("wsize"); + } else + mnt->wsize = option; break; case Opt_bsize: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->bsize = option; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("bsize"); + } else + mnt->bsize = option; break; case Opt_timeo: - if (match_int(args, &mnt->timeo)) - return 0; + if (match_int(args, &option) || option <= 0) { + errors++; + nfs_parse_invalid_value("timeo"); + } else + mnt->timeo = option; break; case Opt_retrans: - if (match_int(args, &mnt->retrans)) - return 0; + if (match_int(args, &option) || option <= 0) { + errors++; + nfs_parse_invalid_value("retrans"); + } else + mnt->retrans = option; break; case Opt_acregmin: - if (match_int(args, &mnt->acregmin)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acregmin"); + } else + mnt->acregmin = option; break; case Opt_acregmax: - if (match_int(args, &mnt->acregmax)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acregmax"); + } else + mnt->acregmax = option; break; case Opt_acdirmin: - if (match_int(args, &mnt->acdirmin)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acdirmin"); + } else + mnt->acdirmin = option; break; case Opt_acdirmax: - if (match_int(args, &mnt->acdirmax)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("acdirmax"); + } else + mnt->acdirmax = option; break; case Opt_actimeo: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->acregmin = - mnt->acregmax = - mnt->acdirmin = - mnt->acdirmax = option; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("actimeo"); + } else + mnt->acregmin = mnt->acregmax = + mnt->acdirmin = mnt->acdirmax = option; break; case Opt_namelen: - if (match_int(args, &mnt->namlen)) - return 0; + if (match_int(args, &option) || option < 0) { + errors++; + nfs_parse_invalid_value("namlen"); + } else + mnt->namlen = option; break; case Opt_mountport: - if (match_int(args, &option)) - return 0; - if (option < 0 || option > 65535) - return 0; - mnt->mount_server.port = option; + if (match_int(args, &option) || + option < 0 || option > USHORT_MAX) { + errors++; + nfs_parse_invalid_value("mountport"); + } else + mnt->mount_server.port = option; break; case Opt_mountvers: - if (match_int(args, &option)) - return 0; - if (option < 0) - return 0; - mnt->mount_server.version = option; + if (match_int(args, &option) || + option < NFS_MNT_VERSION || + option > NFS_MNT3_VERSION) { + errors++; + nfs_parse_invalid_value("mountvers"); + } else + mnt->mount_server.version = option; break; case Opt_nfsvers: - if (match_int(args, &option)) - return 0; + if (match_int(args, &option)) { + errors++; + nfs_parse_invalid_value("nfsvers"); + break; + } switch (option) { - case 2: + case NFS2_VERSION: mnt->flags &= ~NFS_MOUNT_VER3; break; - case 3: + case NFS3_VERSION: mnt->flags |= NFS_MOUNT_VER3; break; default: - goto out_unrec_vers; + errors++; + nfs_parse_invalid_value("nfsvers"); } break; + /* + * options that take text values + */ case Opt_sec: string = match_strdup(args); if (string == NULL) goto out_nomem; - token = match_token(string, nfs_secflavor_tokens, args); + rc = nfs_parse_security_flavors(string, mnt); kfree(string); - - /* - * The flags setting is for v2/v3. The flavor_len - * setting is for v4. v2/v3 also need to know the - * difference between NULL and UNIX. - */ - switch (token) { - case Opt_sec_none: - mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 0; - mnt->auth_flavors[0] = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 0; - mnt->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; - break; - default: - goto out_unrec_sec; + if (!rc) { + errors++; + dfprintk(MOUNT, "NFS: unrecognized " + "security flavor\n"); } break; case Opt_proto: @@ -1026,24 +1186,20 @@ static int nfs_parse_mount_options(char *raw, case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - mnt->timeo = 7; - mnt->retrans = 5; break; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - mnt->timeo = 600; - mnt->retrans = 2; break; case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - mnt->timeo = 600; - mnt->retrans = 2; break; default: - goto out_unrec_xprt; + errors++; + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); } break; case Opt_mountproto: @@ -1063,16 +1219,19 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - goto out_unrec_xprt; + errors++; + dfprintk(MOUNT, "NFS: unrecognized " + "transport protocol\n"); } break; case Opt_addr: string = match_strdup(args); if (string == NULL) goto out_nomem; - nfs_parse_server_address(string, (struct sockaddr *) - &mnt->nfs_server.address, - &mnt->nfs_server.addrlen); + nfs_parse_ip_address(string, strlen(string), + (struct sockaddr *) + &mnt->nfs_server.address, + &mnt->nfs_server.addrlen); kfree(string); break; case Opt_clientaddr: @@ -1093,24 +1252,33 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - nfs_parse_server_address(string, (struct sockaddr *) - &mnt->mount_server.address, - &mnt->mount_server.addrlen); + nfs_parse_ip_address(string, strlen(string), + (struct sockaddr *) + &mnt->mount_server.address, + &mnt->mount_server.addrlen); kfree(string); break; + /* + * Special options + */ + case Opt_sloppy: + sloppy = 1; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; case Opt_userspace: case Opt_deprecated: + dfprintk(MOUNT, "NFS: ignoring mount option " + "'%s'\n", p); break; default: - goto out_unknown; + errors++; + dfprintk(MOUNT, "NFS: unrecognized mount option " + "'%s'\n", p); } } - nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, - mnt->nfs_server.port); - return 1; out_nomem: @@ -1120,21 +1288,6 @@ out_security_failure: free_secdata(secdata); printk(KERN_INFO "NFS: security options invalid: %d\n", rc); return 0; -out_unrec_vers: - printk(KERN_INFO "NFS: unrecognized NFS version number\n"); - return 0; - -out_unrec_xprt: - printk(KERN_INFO "NFS: unrecognized transport protocol\n"); - return 0; - -out_unrec_sec: - printk(KERN_INFO "NFS: unrecognized security flavor\n"); - return 0; - -out_unknown: - printk(KERN_INFO "NFS: unknown mount option: %s\n", p); - return 0; } /* @@ -1188,11 +1341,146 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, if (status == 0) return 0; - dfprintk(MOUNT, "NFS: unable to mount server %s, error %d", + dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", hostname, status); return status; } +static int nfs_parse_simple_hostname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *colon, *comma; + + colon = strchr(dev_name, ':'); + if (colon == NULL) + goto out_bad_devname; + + len = colon - dev_name; + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (!*hostname) + goto out_nomem; + + /* kill possible hostname list: not supported */ + comma = strchr(*hostname, ','); + if (comma != NULL) { + if (comma == *hostname) + goto out_bad_devname; + *comma = '\0'; + } + + colon++; + len = strlen(colon); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(colon, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* + * Hostname has square brackets around it because it contains one or + * more colons. We look for the first closing square bracket, and a + * colon must follow it. + */ +static int nfs_parse_protected_hostname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + size_t len; + char *start, *end; + + start = (char *)(dev_name + 1); + + end = strchr(start, ']'); + if (end == NULL) + goto out_bad_devname; + if (*(end + 1) != ':') + goto out_bad_devname; + + len = end - start; + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + *hostname = kstrndup(start, len, GFP_KERNEL); + if (*hostname == NULL) + goto out_nomem; + + end += 2; + len = strlen(end); + if (len > maxpathlen) + goto out_path; + *export_path = kstrndup(end, len, GFP_KERNEL); + if (!*export_path) + goto out_nomem; + + return 0; + +out_bad_devname: + dfprintk(MOUNT, "NFS: device name not in host:path format\n"); + return -EINVAL; + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); + return -ENOMEM; + +out_hostname: + dfprintk(MOUNT, "NFS: server hostname too long\n"); + return -ENAMETOOLONG; + +out_path: + dfprintk(MOUNT, "NFS: export pathname too long\n"); + return -ENAMETOOLONG; +} + +/* + * Split "dev_name" into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) +{ + if (*dev_name == '[') + return nfs_parse_protected_hostname(dev_name, + hostname, maxnamlen, + export_path, maxpathlen); + + return nfs_parse_simple_hostname(dev_name, + hostname, maxnamlen, + export_path, maxpathlen); +} + /* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle @@ -1222,16 +1510,14 @@ static int nfs_validate_mount_data(void *options, args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); args->rsize = NFS_MAX_FILE_IO_SIZE; args->wsize = NFS_MAX_FILE_IO_SIZE; - args->timeo = 600; - args->retrans = 2; - args->acregmin = 3; - args->acregmax = 60; - args->acdirmin = 30; - args->acdirmax = 60; + args->acregmin = NFS_DEF_ACREGMIN; + args->acregmax = NFS_DEF_ACREGMAX; + args->acdirmin = NFS_DEF_ACDIRMIN; + args->acdirmax = NFS_DEF_ACDIRMAX; args->mount_server.port = 0; /* autobind unless user sets port */ - args->mount_server.protocol = XPRT_TRANSPORT_UDP; args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; + args->auth_flavors[0] = RPC_AUTH_UNIX; switch (data->version) { case 1: @@ -1289,7 +1575,9 @@ static int nfs_validate_mount_data(void *options, args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); args->namlen = data->namlen; args->bsize = data->bsize; - args->auth_flavors[0] = data->pseudoflavor; + + if (data->flags & NFS_MOUNT_SECFLAVOUR) + args->auth_flavors[0] = data->pseudoflavor; if (!args->nfs_server.hostname) goto out_nomem; @@ -1321,8 +1609,6 @@ static int nfs_validate_mount_data(void *options, break; default: { - unsigned int len; - char *c; int status; if (nfs_parse_mount_options((char *)options, args) == 0) @@ -1332,21 +1618,22 @@ static int nfs_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - c = strchr(dev_name, ':'); - if (c == NULL) - return -EINVAL; - len = c - dev_name; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (!args->nfs_server.hostname) - goto out_nomem; + nfs_set_port((struct sockaddr *)&args->nfs_server.address, + args->nfs_server.port); - c++; - if (strlen(c) > NFS_MAXPATHLEN) - return -ENAMETOOLONG; - args->nfs_server.export_path = c; + nfs_set_mount_transport_protocol(args); + + status = nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + PAGE_SIZE, + &args->nfs_server.export_path, + NFS_MAXPATHLEN); + if (!status) + status = nfs_try_mount(args, mntfh); + + kfree(args->nfs_server.export_path); + args->nfs_server.export_path = NULL; - status = nfs_try_mount(args, mntfh); if (status) return status; @@ -1354,9 +1641,6 @@ static int nfs_validate_mount_data(void *options, } } - if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) - args->auth_flavors[0] = RPC_AUTH_UNIX; - #ifndef CONFIG_NFS_V3 if (args->flags & NFS_MOUNT_VER3) goto out_v3_not_compiled; @@ -1396,6 +1680,80 @@ out_invalid_fh: return -EINVAL; } +static int +nfs_compare_remount_data(struct nfs_server *nfss, + struct nfs_parsed_mount_data *data) +{ + if (data->flags != nfss->flags || + data->rsize != nfss->rsize || + data->wsize != nfss->wsize || + data->retrans != nfss->client->cl_timeout->to_retries || + data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || + data->acregmin != nfss->acregmin / HZ || + data->acregmax != nfss->acregmax / HZ || + data->acdirmin != nfss->acdirmin / HZ || + data->acdirmax != nfss->acdirmax / HZ || + data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || + memcmp(&data->nfs_server.address, &nfss->nfs_client->cl_addr, + data->nfs_server.addrlen) != 0) + return -EINVAL; + + return 0; +} + +static int +nfs_remount(struct super_block *sb, int *flags, char *raw_data) +{ + int error; + struct nfs_server *nfss = sb->s_fs_info; + struct nfs_parsed_mount_data *data; + struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; + struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; + u32 nfsvers = nfss->nfs_client->rpc_ops->version; + + /* + * Userspace mount programs that send binary options generally send + * them populated with default values. We have no way to know which + * ones were explicitly specified. Fall back to legacy behavior and + * just return success. + */ + if ((nfsvers == 4 && options4->version == 1) || + (nfsvers <= 3 && options->version >= 1 && + options->version <= 6)) + return 0; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* fill out struct with values from existing mount */ + data->flags = nfss->flags; + data->rsize = nfss->rsize; + data->wsize = nfss->wsize; + data->retrans = nfss->client->cl_timeout->to_retries; + data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; + data->acregmin = nfss->acregmin / HZ; + data->acregmax = nfss->acregmax / HZ; + data->acdirmin = nfss->acdirmin / HZ; + data->acdirmax = nfss->acdirmax / HZ; + data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, + data->nfs_server.addrlen); + + /* overwrite those values with any that were specified */ + error = nfs_parse_mount_options((char *)options, data); + if (error < 0) + goto out; + + /* compare new mount options with old ones */ + error = nfs_compare_remount_data(nfss, data); +out: + kfree(data); + return error; +} + /* * Initialise the common bits of the superblock */ @@ -1811,14 +2169,13 @@ static int nfs4_validate_mount_data(void *options, args->rsize = NFS_MAX_FILE_IO_SIZE; args->wsize = NFS_MAX_FILE_IO_SIZE; - args->timeo = 600; - args->retrans = 2; - args->acregmin = 3; - args->acregmax = 60; - args->acdirmin = 30; - args->acdirmax = 60; + args->acregmin = NFS_DEF_ACREGMIN; + args->acregmax = NFS_DEF_ACREGMAX; + args->acdirmin = NFS_DEF_ACDIRMIN; + args->acdirmax = NFS_DEF_ACDIRMAX; args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ - args->nfs_server.protocol = XPRT_TRANSPORT_TCP; + args->auth_flavors[0] = RPC_AUTH_UNIX; + args->auth_flavor_len = 0; switch (data->version) { case 1: @@ -1834,18 +2191,13 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - switch (data->auth_flavourlen) { - case 0: - args->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case 1: + if (data->auth_flavourlen) { + if (data->auth_flavourlen > 1) + goto out_inval_auth; if (copy_from_user(&args->auth_flavors[0], data->auth_flavours, sizeof(args->auth_flavors[0]))) return -EFAULT; - break; - default: - goto out_inval_auth; } c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); @@ -1879,10 +2231,11 @@ static int nfs4_validate_mount_data(void *options, args->acdirmin = data->acdirmin; args->acdirmax = data->acdirmax; args->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(args); break; default: { - unsigned int len; + int status; if (nfs_parse_mount_options((char *)options, args) == 0) return -EINVAL; @@ -1891,44 +2244,25 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; - switch (args->auth_flavor_len) { - case 0: - args->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case 1: - break; - default: - goto out_inval_auth; - } + nfs_set_port((struct sockaddr *)&args->nfs_server.address, + args->nfs_server.port); - /* - * Split "dev_name" into "hostname:mntpath". - */ - c = strchr(dev_name, ':'); - if (c == NULL) - return -EINVAL; - /* while calculating len, pretend ':' is '\0' */ - len = c - dev_name; - if (len > NFS4_MAXNAMLEN) - return -ENAMETOOLONG; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (!args->nfs_server.hostname) - goto out_nomem; - - c++; /* step over the ':' */ - len = strlen(c); - if (len > NFS4_MAXPATHLEN) - return -ENAMETOOLONG; - args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); - if (!args->nfs_server.export_path) - goto out_nomem; + nfs_validate_transport_protocol(args); - dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); + if (args->auth_flavor_len > 1) + goto out_inval_auth; if (args->client_address == NULL) goto out_no_client_address; + status = nfs_parse_devname(dev_name, + &args->nfs_server.hostname, + NFS4_MAXNAMLEN, + &args->nfs_server.export_path, + NFS4_MAXPATHLEN); + if (status < 0) + return status; + break; } } @@ -1944,10 +2278,6 @@ out_inval_auth: data->auth_flavourlen); return -EINVAL; -out_nomem: - dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n"); - return -ENOMEM; - out_no_address: dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); return -EINVAL; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f333848fd3be..3229e217c773 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -34,9 +34,6 @@ /* * Local function declarations */ -static struct nfs_page * nfs_update_request(struct nfs_open_context*, - struct page *, - unsigned int, unsigned int); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, struct inode *inode, int ioflags); static void nfs_redirty_request(struct nfs_page *req); @@ -136,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page) static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { struct inode *inode = page->mapping->host; - loff_t end, i_size = i_size_read(inode); - pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + loff_t end, i_size; + pgoff_t end_index; + spin_lock(&inode->i_lock); + i_size = i_size_read(inode); + end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; if (i_size > 0 && page->index < end_index) - return; + goto out; end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) - return; - nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + goto out; i_size_write(inode, end); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); +out: + spin_unlock(&inode->i_lock); } /* A writeback failed: mark the page as bad, and invalidate the page cache */ @@ -169,29 +171,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int SetPageUptodate(page); } -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) -{ - struct nfs_page *req; - int ret; - - for (;;) { - req = nfs_update_request(ctx, page, offset, count); - if (!IS_ERR(req)) - break; - ret = PTR_ERR(req); - if (ret != -EBUSY) - return ret; - ret = nfs_wb_page(page->mapping->host, page); - if (ret != 0) - return ret; - } - /* Update file length */ - nfs_grow_file(page, offset, count); - nfs_clear_page_tag_locked(req); - return 0; -} - static int wb_priority(struct writeback_control *wbc) { if (wbc->for_reclaim) @@ -268,12 +247,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, return ret; spin_lock(&inode->i_lock); } - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { - /* This request is marked for commit */ + if (test_bit(PG_CLEAN, &req->wb_flags)) { spin_unlock(&inode->i_lock); - nfs_clear_page_tag_locked(req); - nfs_pageio_complete(pgio); - return 0; + BUG(); } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); @@ -355,11 +331,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) /* * Insert a write request into an inode */ -static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; + error = radix_tree_preload(GFP_NOFS); + if (error != 0) + goto out; + + /* Lock the request! */ + nfs_lock_request_dontget(req); + + spin_lock(&inode->i_lock); error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error); if (!nfsi->npages) { @@ -373,6 +357,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) kref_get(&req->wb_kref); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + spin_unlock(&inode->i_lock); + radix_tree_preload_end(); +out: + return error; } /* @@ -405,19 +393,6 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -/* - * Check if a request is dirty - */ -static inline int -nfs_dirty_request(struct nfs_page *req) -{ - struct page *page = req->wb_page; - - if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) - return 0; - return !PageWriteback(page); -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /* * Add a request to the inode's commit list. @@ -430,7 +405,7 @@ nfs_mark_request_commit(struct nfs_page *req) spin_lock(&inode->i_lock); nfsi->ncommit++; - set_bit(PG_NEED_COMMIT, &(req)->wb_flags); + set_bit(PG_CLEAN, &(req)->wb_flags); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); @@ -440,6 +415,19 @@ nfs_mark_request_commit(struct nfs_page *req) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } +static int +nfs_clear_request_commit(struct nfs_page *req) +{ + struct page *page = req->wb_page; + + if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + return 1; + } + return 0; +} + static inline int nfs_write_need_commit(struct nfs_write_data *data) { @@ -449,7 +437,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) static inline int nfs_reschedule_unstable_write(struct nfs_page *req) { - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { nfs_mark_request_commit(req); return 1; } @@ -465,6 +453,12 @@ nfs_mark_request_commit(struct nfs_page *req) { } +static inline int +nfs_clear_request_commit(struct nfs_page *req) +{ + return 0; +} + static inline int nfs_write_need_commit(struct nfs_write_data *data) { @@ -522,11 +516,8 @@ static void nfs_cancel_commit_list(struct list_head *head) while(!list_empty(head)) { req = nfs_list_entry(head->next); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); nfs_list_remove_request(req); - clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); + nfs_clear_request_commit(req); nfs_inode_remove_request(req); nfs_unlock_request(req); } @@ -564,110 +555,124 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg #endif /* - * Try to update any existing write request, or create one if there is none. - * In order to match, the request's credentials must match those of - * the calling process. + * Search for an existing write request, and attempt to update + * it to reflect a new dirty region on a given page. * - * Note: Should always be called with the Page Lock held! + * If the attempt fails, then the existing request is flushed out + * to disk. */ -static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, - struct page *page, unsigned int offset, unsigned int bytes) +static struct nfs_page *nfs_try_to_update_request(struct inode *inode, + struct page *page, + unsigned int offset, + unsigned int bytes) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - struct nfs_page *req, *new = NULL; - pgoff_t rqend, end; + struct nfs_page *req; + unsigned int rqend; + unsigned int end; + int error; + + if (!PagePrivate(page)) + return NULL; end = offset + bytes; + spin_lock(&inode->i_lock); for (;;) { - /* Loop over all inode entries and see if we find - * A request for the page we wish to update + req = nfs_page_find_request_locked(page); + if (req == NULL) + goto out_unlock; + + rqend = req->wb_offset + req->wb_bytes; + /* + * Tell the caller to flush out the request if + * the offsets are non-contiguous. + * Note: nfs_flush_incompatible() will already + * have flushed out requests having wrong owners. */ - if (new) { - if (radix_tree_preload(GFP_NOFS)) { - nfs_release_request(new); - return ERR_PTR(-ENOMEM); - } - } + if (offset > rqend + || end < req->wb_offset) + goto out_flushme; - spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); - if (req) { - if (!nfs_set_page_tag_locked(req)) { - int error; - - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - if (error < 0) { - if (new) { - radix_tree_preload_end(); - nfs_release_request(new); - } - return ERR_PTR(error); - } - continue; - } - spin_unlock(&inode->i_lock); - if (new) { - radix_tree_preload_end(); - nfs_release_request(new); - } + if (nfs_set_page_tag_locked(req)) break; - } - if (new) { - nfs_lock_request_dontget(new); - nfs_inode_add_request(inode, new); - spin_unlock(&inode->i_lock); - radix_tree_preload_end(); - req = new; - goto zero_page; - } + /* The request is locked, so wait and then retry */ spin_unlock(&inode->i_lock); - - new = nfs_create_request(ctx, inode, page, offset, bytes); - if (IS_ERR(new)) - return new; + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error != 0) + goto out_err; + spin_lock(&inode->i_lock); } - /* We have a request for our page. - * If the creds don't match, or the - * page addresses don't match, - * tell the caller to wait on the conflicting - * request. - */ - rqend = req->wb_offset + req->wb_bytes; - if (req->wb_context != ctx - || req->wb_page != page - || !nfs_dirty_request(req) - || offset > rqend || end < req->wb_offset) { - nfs_clear_page_tag_locked(req); - return ERR_PTR(-EBUSY); - } + if (nfs_clear_request_commit(req)) + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT); /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; req->wb_pgbase = offset; - req->wb_bytes = max(end, rqend) - req->wb_offset; - goto zero_page; } - if (end > rqend) req->wb_bytes = end - req->wb_offset; - + else + req->wb_bytes = rqend - req->wb_offset; +out_unlock: + spin_unlock(&inode->i_lock); return req; -zero_page: - /* If this page might potentially be marked as up to date, - * then we need to zero any uninitalised data. */ - if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE - && !PageUptodate(req->wb_page)) - zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); +out_flushme: + spin_unlock(&inode->i_lock); + nfs_release_request(req); + error = nfs_wb_page(inode, page); +out_err: + return ERR_PTR(error); +} + +/* + * Try to update an existing write request, or create one if there is none. + * + * Note: Should always be called with the Page Lock held to prevent races + * if we have to add a new request. Also assumes that the caller has + * already called nfs_flush_incompatible() if necessary. + */ +static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, + struct page *page, unsigned int offset, unsigned int bytes) +{ + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int error; + + req = nfs_try_to_update_request(inode, page, offset, bytes); + if (req != NULL) + goto out; + req = nfs_create_request(ctx, inode, page, offset, bytes); + if (IS_ERR(req)) + goto out; + error = nfs_inode_add_request(inode, req); + if (error != 0) { + nfs_release_request(req); + req = ERR_PTR(error); + } +out: return req; } +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_page *req; + + req = nfs_setup_write_request(ctx, page, offset, count); + if (IS_ERR(req)) + return PTR_ERR(req); + /* Update file length */ + nfs_grow_file(page, offset, count); + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_clear_page_tag_locked(req); + return 0; +} + int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); @@ -685,8 +690,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) req = nfs_page_find_request(page); if (req == NULL) return 0; - do_flush = req->wb_page != page || req->wb_context != ctx - || !nfs_dirty_request(req); + do_flush = req->wb_page != page || req->wb_context != ctx; nfs_release_request(req); if (!do_flush) return 0; @@ -721,10 +725,10 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", + dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) +offset)); + (long long)(page_offset(page) + offset)); /* If we're not using byte range locks, and we know the page * is up to date, it may be more efficient to extend the write @@ -744,7 +748,7 @@ int nfs_updatepage(struct file *file, struct page *page, else __set_page_dirty_nobuffers(page); - dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", + dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); return status; } @@ -752,12 +756,7 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - if (PageError(req->wb_page)) { - nfs_end_page_writeback(req->wb_page); - nfs_inode_remove_request(req); - } else if (!nfs_reschedule_unstable_write(req)) { - /* Set the PG_uptodate flag */ - nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); } else @@ -834,7 +833,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, NFS_PROTO(inode)->write_setup(data, &msg); dprintk("NFS: %5u initiated write call " - "(req %s/%Ld, %u bytes @ offset %Lu)\n", + "(req %s/%lld, %u bytes @ offset %llu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -978,13 +977,13 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req = data->req; - dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); + dprintk("NFS: %5u write(%s/%lld %d@%lld)", + task->tk_pid, + data->req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long) + NFS_FILEID(data->req->wb_context->path.dentry->d_inode), + data->req->wb_bytes, (long long)req_offset(data->req)); nfs_writeback_done(task, data); } @@ -1058,7 +1057,8 @@ static void nfs_writeback_release_full(void *calldata) nfs_list_remove_request(req); - dprintk("NFS: write (%s/%Ld %d@%Ld)", + dprintk("NFS: %5u write (%s/%lld %d@%lld)", + data->task.tk_pid, req->wb_context->path.dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, @@ -1078,8 +1078,6 @@ static void nfs_writeback_release_full(void *calldata) dprintk(" marked for commit\n"); goto next; } - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: nfs_end_page_writeback(page); @@ -1133,7 +1131,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) static unsigned long complain; if (time_before(complain, jiffies)) { - dprintk("NFS: faulty NFS server %s:" + dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(data->inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); @@ -1297,12 +1295,9 @@ static void nfs_commit_release(void *calldata) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); + nfs_clear_request_commit(req); - dprintk("NFS: commit (%s/%Ld %d@%Ld)", + dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->path.dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, @@ -1318,9 +1313,6 @@ static void nfs_commit_release(void *calldata) * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ - /* Set the PG_uptodate flag */ - nfs_mark_uptodate(req->wb_page, req->wb_pgbase, - req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; @@ -1479,7 +1471,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) req = nfs_page_find_request(page); if (req == NULL) goto out; - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_bit(PG_CLEAN, &req->wb_flags)) { nfs_release_request(req); break; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4d4760e687c3..702fa577aa6e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -381,7 +381,7 @@ static int do_probe_callback(void *data) .program = &cb_program, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ - .flags = (RPC_CLNT_CREATE_NOPING), + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig new file mode 100644 index 000000000000..91ceeda7e5bf --- /dev/null +++ b/fs/ubifs/Kconfig @@ -0,0 +1,72 @@ +config UBIFS_FS + tristate "UBIFS file system support" + select CRC16 + select CRC32 + select CRYPTO if UBIFS_FS_ADVANCED_COMPR + select CRYPTO if UBIFS_FS_LZO + select CRYPTO if UBIFS_FS_ZLIB + select CRYPTO_LZO if UBIFS_FS_LZO + select CRYPTO_DEFLATE if UBIFS_FS_ZLIB + depends on MTD_UBI + help + UBIFS is a file system for flash devices which works on top of UBI. + +config UBIFS_FS_XATTR + bool "Extended attributes support" + depends on UBIFS_FS + help + This option enables support of extended attributes. + +config UBIFS_FS_ADVANCED_COMPR + bool "Advanced compression options" + depends on UBIFS_FS + help + This option allows to explicitly choose which compressions, if any, + are enabled in UBIFS. Removing compressors means inbility to read + existing file systems. + + If unsure, say 'N'. + +config UBIFS_FS_LZO + bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR + depends on UBIFS_FS + default y + help + LZO compressor is generally faster then zlib but compresses worse. + Say 'Y' if unsure. + +config UBIFS_FS_ZLIB + bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR + depends on UBIFS_FS + default y + help + Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. + +# Debugging-related stuff +config UBIFS_FS_DEBUG + bool "Enable debugging" + depends on UBIFS_FS + select DEBUG_FS + select KALLSYMS_ALL + help + This option enables UBIFS debugging. + +config UBIFS_FS_DEBUG_MSG_LVL + int "Default message level (0 = no extra messages, 3 = lots)" + depends on UBIFS_FS_DEBUG + default "0" + help + This controls the amount of debugging messages produced by UBIFS. + If reporting bugs, please try to have available a full dump of the + messages at level 1 while the misbehaviour was occurring. Level 2 + may become necessary if level 1 messages were not enough to find the + bug. Generally Level 3 should be avoided. + +config UBIFS_FS_DEBUG_CHKS + bool "Enable extra checks" + depends on UBIFS_FS_DEBUG + help + If extra checks are enabled UBIFS will check the consistency of its + internal data structures during operation. However, UBIFS performance + is dramatically slower when this option is selected especially if the + file system is large. diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile new file mode 100644 index 000000000000..80e93c35e496 --- /dev/null +++ b/fs/ubifs/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_UBIFS_FS) += ubifs.o + +ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o +ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o +ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o +ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o + +ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o +ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c new file mode 100644 index 000000000000..d81fb9ed2b8e --- /dev/null +++ b/fs/ubifs/budget.c @@ -0,0 +1,731 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the budgeting sub-system which is responsible for UBIFS + * space management. + * + * Factors such as compression, wasted space at the ends of LEBs, space in other + * journal heads, the effect of updates on the index, and so on, make it + * impossible to accurately predict the amount of space needed. Consequently + * approximations are used. + */ + +#include "ubifs.h" +#include +#include + +/* + * When pessimistic budget calculations say that there is no enough space, + * UBIFS starts writing back dirty inodes and pages, doing garbage collection, + * or committing. The below constants define maximum number of times UBIFS + * repeats the operations. + */ +#define MAX_SHRINK_RETRIES 8 +#define MAX_GC_RETRIES 4 +#define MAX_CMT_RETRIES 2 +#define MAX_NOSPC_RETRIES 1 + +/* + * The below constant defines amount of dirty pages which should be written + * back at when trying to shrink the liability. + */ +#define NR_TO_WRITE 16 + +/** + * struct retries_info - information about re-tries while making free space. + * @prev_liability: previous liability + * @shrink_cnt: how many times the liability was shrinked + * @shrink_retries: count of liability shrink re-tries (increased when + * liability does not shrink) + * @try_gc: GC should be tried first + * @gc_retries: how many times GC was run + * @cmt_retries: how many times commit has been done + * @nospc_retries: how many times GC returned %-ENOSPC + * + * Since we consider budgeting to be the fast-path, and this structure has to + * be allocated on stack and zeroed out, we make it smaller using bit-fields. + */ +struct retries_info { + long long prev_liability; + unsigned int shrink_cnt; + unsigned int shrink_retries:5; + unsigned int try_gc:1; + unsigned int gc_retries:4; + unsigned int cmt_retries:3; + unsigned int nospc_retries:1; +}; + +/** + * shrink_liability - write-back some dirty pages/inodes. + * @c: UBIFS file-system description object + * @nr_to_write: how many dirty pages to write-back + * + * This function shrinks UBIFS liability by means of writing back some amount + * of dirty inodes and their pages. Returns the amount of pages which were + * written back. The returned value does not include dirty inodes which were + * synchronized. + * + * Note, this function synchronizes even VFS inodes which are locked + * (@i_mutex) by the caller of the budgeting function, because write-back does + * not touch @i_mutex. + */ +static int shrink_liability(struct ubifs_info *c, int nr_to_write) +{ + int nr_written; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .range_end = LLONG_MAX, + .nr_to_write = nr_to_write, + }; + + generic_sync_sb_inodes(c->vfs_sb, &wbc); + nr_written = nr_to_write - wbc.nr_to_write; + + if (!nr_written) { + /* + * Re-try again but wait on pages/inodes which are being + * written-back concurrently (e.g., by pdflush). + */ + memset(&wbc, 0, sizeof(struct writeback_control)); + wbc.sync_mode = WB_SYNC_ALL; + wbc.range_end = LLONG_MAX; + wbc.nr_to_write = nr_to_write; + generic_sync_sb_inodes(c->vfs_sb, &wbc); + nr_written = nr_to_write - wbc.nr_to_write; + } + + dbg_budg("%d pages were written back", nr_written); + return nr_written; +} + + +/** + * run_gc - run garbage collector. + * @c: UBIFS file-system description object + * + * This function runs garbage collector to make some more free space. Returns + * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a + * negative error code in case of failure. + */ +static int run_gc(struct ubifs_info *c) +{ + int err, lnum; + + /* Make some free space by garbage-collecting dirty space */ + down_read(&c->commit_sem); + lnum = ubifs_garbage_collect(c, 1); + up_read(&c->commit_sem); + if (lnum < 0) + return lnum; + + /* GC freed one LEB, return it to lprops */ + dbg_budg("GC freed LEB %d", lnum); + err = ubifs_return_leb(c, lnum); + if (err) + return err; + return 0; +} + +/** + * make_free_space - make more free space on the file-system. + * @c: UBIFS file-system description object + * @ri: information about previous invocations of this function + * + * This function is called when an operation cannot be budgeted because there + * is supposedly no free space. But in most cases there is some free space: + * o budgeting is pessimistic, so it always budgets more then it is actually + * needed, so shrinking the liability is one way to make free space - the + * cached data will take less space then it was budgeted for; + * o GC may turn some dark space into free space (budgeting treats dark space + * as not available); + * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs. + * + * So this function tries to do the above. Returns %-EAGAIN if some free space + * was presumably made and the caller has to re-try budgeting the operation. + * Returns %-ENOSPC if it couldn't do more free space, and other negative error + * codes on failures. + */ +static int make_free_space(struct ubifs_info *c, struct retries_info *ri) +{ + int err; + + /* + * If we have some dirty pages and inodes (liability), try to write + * them back unless this was tried too many times without effect + * already. + */ + if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) { + long long liability; + + spin_lock(&c->space_lock); + liability = c->budg_idx_growth + c->budg_data_growth + + c->budg_dd_growth; + spin_unlock(&c->space_lock); + + if (ri->prev_liability >= liability) { + /* Liability does not shrink, next time try GC then */ + ri->shrink_retries += 1; + if (ri->gc_retries < MAX_GC_RETRIES) + ri->try_gc = 1; + dbg_budg("liability did not shrink: retries %d of %d", + ri->shrink_retries, MAX_SHRINK_RETRIES); + } + + dbg_budg("force write-back (count %d)", ri->shrink_cnt); + shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); + + ri->prev_liability = liability; + ri->shrink_cnt += 1; + return -EAGAIN; + } + + /* + * Try to run garbage collector unless it was already tried too many + * times. + */ + if (ri->gc_retries < MAX_GC_RETRIES) { + ri->gc_retries += 1; + dbg_budg("run GC, retries %d of %d", + ri->gc_retries, MAX_GC_RETRIES); + + ri->try_gc = 0; + err = run_gc(c); + if (!err) + return -EAGAIN; + + if (err == -EAGAIN) { + dbg_budg("GC asked to commit"); + err = ubifs_run_commit(c); + if (err) + return err; + return -EAGAIN; + } + + if (err != -ENOSPC) + return err; + + /* + * GC could not make any progress. If this is the first time, + * then it makes sense to try to commit, because it might make + * some dirty space. + */ + dbg_budg("GC returned -ENOSPC, retries %d", + ri->nospc_retries); + if (ri->nospc_retries >= MAX_NOSPC_RETRIES) + return err; + ri->nospc_retries += 1; + } + + /* Neither GC nor write-back helped, try to commit */ + if (ri->cmt_retries < MAX_CMT_RETRIES) { + ri->cmt_retries += 1; + dbg_budg("run commit, retries %d of %d", + ri->cmt_retries, MAX_CMT_RETRIES); + err = ubifs_run_commit(c); + if (err) + return err; + return -EAGAIN; + } + return -ENOSPC; +} + +/** + * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * @c: UBIFS file-system description object + * + * This function calculates and returns the number of eraseblocks which should + * be kept for index usage. + */ +int ubifs_calc_min_idx_lebs(struct ubifs_info *c) +{ + int ret; + uint64_t idx_size; + + idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; + + /* And make sure we have twice the index size of space reserved */ + idx_size <<= 1; + + /* + * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' + * pair, nor similarly the two variables for the new index size, so we + * have to do this costly 64-bit division on fast-path. + */ + if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) + ret = idx_size + 1; + else + ret = idx_size; + /* + * The index head is not available for the in-the-gaps method, so add an + * extra LEB to compensate. + */ + ret += 1; + /* + * At present the index needs at least 2 LEBs: one for the index head + * and one for in-the-gaps method (which currently does not cater for + * the index head and so excludes it from consideration). + */ + if (ret < 2) + ret = 2; + return ret; +} + +/** + * ubifs_calc_available - calculate available FS space. + * @c: UBIFS file-system description object + * @min_idx_lebs: minimum number of LEBs reserved for the index + * + * This function calculates and returns amount of FS space available for use. + */ +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) +{ + int subtract_lebs; + long long available; + + /* + * Force the amount available to the total size reported if the used + * space is zero. + */ + if (c->lst.total_used <= UBIFS_INO_NODE_SZ && + c->budg_data_growth + c->budg_dd_growth == 0) { + /* Do the same calculation as for c->block_cnt */ + available = c->main_lebs - 2; + available *= c->leb_size - c->dark_wm; + return available; + } + + available = c->main_bytes - c->lst.total_used; + + /* + * Now 'available' contains theoretically available flash space + * assuming there is no index, so we have to subtract the space which + * is reserved for the index. + */ + subtract_lebs = min_idx_lebs; + + /* Take into account that GC reserves one LEB for its own needs */ + subtract_lebs += 1; + + /* + * The GC journal head LEB is not really accessible. And since + * different write types go to different heads, we may count only on + * one head's space. + */ + subtract_lebs += c->jhead_cnt - 1; + + /* We also reserve one LEB for deletions, which bypass budgeting */ + subtract_lebs += 1; + + available -= (long long)subtract_lebs * c->leb_size; + + /* Subtract the dead space which is not available for use */ + available -= c->lst.total_dead; + + /* + * Subtract dark space, which might or might not be usable - it depends + * on the data which we have on the media and which will be written. If + * this is a lot of uncompressed or not-compressible data, the dark + * space cannot be used. + */ + available -= c->lst.total_dark; + + /* + * However, there is more dark space. The index may be bigger than + * @min_idx_lebs. Those extra LEBs are assumed to be available, but + * their dark space is not included in total_dark, so it is subtracted + * here. + */ + if (c->lst.idx_lebs > min_idx_lebs) { + subtract_lebs = c->lst.idx_lebs - min_idx_lebs; + available -= subtract_lebs * c->dark_wm; + } + + /* The calculations are rough and may end up with a negative number */ + return available > 0 ? available : 0; +} + +/** + * can_use_rp - check whether the user is allowed to use reserved pool. + * @c: UBIFS file-system description object + * + * UBIFS has so-called "reserved pool" which is flash space reserved + * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock. + * This function checks whether current user is allowed to use reserved pool. + * Returns %1 current user is allowed to use reserved pool and %0 otherwise. + */ +static int can_use_rp(struct ubifs_info *c) +{ + if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || + (c->rp_gid != 0 && in_group_p(c->rp_gid))) + return 1; + return 0; +} + +/** + * do_budget_space - reserve flash space for index and data growth. + * @c: UBIFS file-system description object + * + * This function makes sure UBIFS has enough free eraseblocks for index growth + * and data. + * + * When budgeting index space, UBIFS reserves twice as more LEBs as the index + * would take if it was consolidated and written to the flash. This guarantees + * that the "in-the-gaps" commit method always succeeds and UBIFS will always + * be able to commit dirty index. So this function basically adds amount of + * budgeted index space to the size of the current index, multiplies this by 2, + * and makes sure this does not exceed the amount of free eraseblocks. + * + * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: + * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might + * be large, because UBIFS does not do any index consolidation as long as + * there is free space. IOW, the index may take a lot of LEBs, but the LEBs + * will contain a lot of dirt. + * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be + * consolidated to take up to @c->min_idx_lebs LEBs. + * + * This function returns zero in case of success, and %-ENOSPC in case of + * failure. + */ +static int do_budget_space(struct ubifs_info *c) +{ + long long outstanding, available; + int lebs, rsvd_idx_lebs, min_idx_lebs; + + /* First budget index space */ + min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + /* Now 'min_idx_lebs' contains number of LEBs to reserve */ + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + + /* + * The number of LEBs that are available to be used by the index is: + * + * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - + * @c->lst.taken_empty_lebs + * + * @empty_lebs are available because they are empty. @freeable_cnt are + * available because they contain only free and dirty space and the + * index allocation always occurs after wbufs are synch'ed. + * @idx_gc_cnt are available because they are index LEBs that have been + * garbage collected (including trivial GC) and are awaiting the commit + * before they can be unmapped - note that the in-the-gaps method will + * grab these if it needs them. @taken_empty_lebs are empty_lebs that + * have already been allocated for some purpose (also includes those + * LEBs on the @idx_gc list). + * + * Note, @taken_empty_lebs may temporarily be higher by one because of + * the way we serialize LEB allocations and budgeting. See a comment in + * 'ubifs_find_free_space()'. + */ + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + if (unlikely(rsvd_idx_lebs > lebs)) { + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " + "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, + rsvd_idx_lebs); + return -ENOSPC; + } + + available = ubifs_calc_available(c, min_idx_lebs); + outstanding = c->budg_data_growth + c->budg_dd_growth; + + if (unlikely(available < outstanding)) { + dbg_budg("out of data space: available %lld, outstanding %lld", + available, outstanding); + return -ENOSPC; + } + + if (available - outstanding <= c->rp_size && !can_use_rp(c)) + return -ENOSPC; + + c->min_idx_lebs = min_idx_lebs; + return 0; +} + +/** + * calc_idx_growth - calculate approximate index growth from budgeting request. + * @c: UBIFS file-system description object + * @req: budgeting request + * + * For now we assume each new node adds one znode. But this is rather poor + * approximation, though. + */ +static int calc_idx_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int znodes; + + znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) + + req->new_dent; + return znodes * c->max_idx_node_sz; +} + +/** + * calc_data_growth - calculate approximate amount of new data from budgeting + * request. + * @c: UBIFS file-system description object + * @req: budgeting request + */ +static int calc_data_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int data_growth; + + data_growth = req->new_ino ? c->inode_budget : 0; + if (req->new_page) + data_growth += c->page_budget; + if (req->new_dent) + data_growth += c->dent_budget; + data_growth += req->new_ino_d; + return data_growth; +} + +/** + * calc_dd_growth - calculate approximate amount of data which makes other data + * dirty from budgeting request. + * @c: UBIFS file-system description object + * @req: budgeting request + */ +static int calc_dd_growth(const struct ubifs_info *c, + const struct ubifs_budget_req *req) +{ + int dd_growth; + + dd_growth = req->dirtied_page ? c->page_budget : 0; + + if (req->dirtied_ino) + dd_growth += c->inode_budget << (req->dirtied_ino - 1); + if (req->mod_dent) + dd_growth += c->dent_budget; + dd_growth += req->dirtied_ino_d; + return dd_growth; +} + +/** + * ubifs_budget_space - ensure there is enough space to complete an operation. + * @c: UBIFS file-system description object + * @req: budget request + * + * This function allocates budget for an operation. It uses pessimistic + * approximation of how much flash space the operation needs. The goal of this + * function is to make sure UBIFS always has flash space to flush all dirty + * pages, dirty inodes, and dirty znodes (liability). This function may force + * commit, garbage-collection or write-back. Returns zero in case of success, + * %-ENOSPC if there is no free space and other negative error codes in case of + * failures. + */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) +{ + int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); + int err, idx_growth, data_growth, dd_growth; + struct retries_info ri; + + ubifs_assert(req->dirtied_ino <= 4); + ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + + data_growth = calc_data_growth(c, req); + dd_growth = calc_dd_growth(c, req); + if (!data_growth && !dd_growth) + return 0; + idx_growth = calc_idx_growth(c, req); + memset(&ri, 0, sizeof(struct retries_info)); + +again: + spin_lock(&c->space_lock); + ubifs_assert(c->budg_idx_growth >= 0); + ubifs_assert(c->budg_data_growth >= 0); + ubifs_assert(c->budg_dd_growth >= 0); + + if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { + dbg_budg("no space"); + spin_unlock(&c->space_lock); + return -ENOSPC; + } + + c->budg_idx_growth += idx_growth; + c->budg_data_growth += data_growth; + c->budg_dd_growth += dd_growth; + + err = do_budget_space(c); + if (likely(!err)) { + req->idx_growth = idx_growth; + req->data_growth = data_growth; + req->dd_growth = dd_growth; + spin_unlock(&c->space_lock); + return 0; + } + + /* Restore the old values */ + c->budg_idx_growth -= idx_growth; + c->budg_data_growth -= data_growth; + c->budg_dd_growth -= dd_growth; + spin_unlock(&c->space_lock); + + if (req->fast) { + dbg_budg("no space for fast budgeting"); + return err; + } + + err = make_free_space(c, &ri); + if (err == -EAGAIN) { + dbg_budg("try again"); + cond_resched(); + goto again; + } else if (err == -ENOSPC) { + dbg_budg("FS is full, -ENOSPC"); + c->nospace = 1; + if (can_use_rp(c) || c->rp_size == 0) + c->nospace_rp = 1; + smp_wmb(); + } else + ubifs_err("cannot budget space, error %d", err); + return err; +} + +/** + * ubifs_release_budget - release budgeted free space. + * @c: UBIFS file-system description object + * @req: budget request + * + * This function releases the space budgeted by 'ubifs_budget_space()'. Note, + * since the index changes (which were budgeted for in @req->idx_growth) will + * only be written to the media on commit, this function moves the index budget + * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be + * zeroed by the commit operation. + */ +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) +{ + ubifs_assert(req->dirtied_ino <= 4); + ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + if (!req->recalculate) { + ubifs_assert(req->idx_growth >= 0); + ubifs_assert(req->data_growth >= 0); + ubifs_assert(req->dd_growth >= 0); + } + + if (req->recalculate) { + req->data_growth = calc_data_growth(c, req); + req->dd_growth = calc_dd_growth(c, req); + req->idx_growth = calc_idx_growth(c, req); + } + + if (!req->data_growth && !req->dd_growth) + return; + + c->nospace = c->nospace_rp = 0; + smp_wmb(); + + spin_lock(&c->space_lock); + c->budg_idx_growth -= req->idx_growth; + c->budg_uncommitted_idx += req->idx_growth; + c->budg_data_growth -= req->data_growth; + c->budg_dd_growth -= req->dd_growth; + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + ubifs_assert(c->budg_idx_growth >= 0); + ubifs_assert(c->budg_data_growth >= 0); + ubifs_assert(c->min_idx_lebs < c->main_lebs); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_convert_page_budget - convert budget of a new page. + * @c: UBIFS file-system description object + * + * This function converts budget which was allocated for a new page of data to + * the budget of changing an existing page of data. The latter is smaller then + * the former, so this function only does simple re-calculation and does not + * involve any write-back. + */ +void ubifs_convert_page_budget(struct ubifs_info *c) +{ + spin_lock(&c->space_lock); + /* Release the index growth reservation */ + c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + /* Release the data growth reservation */ + c->budg_data_growth -= c->page_budget; + /* Increase the dirty data growth reservation instead */ + c->budg_dd_growth += c->page_budget; + /* And re-calculate the indexing space reservation */ + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_release_dirty_inode_budget - release dirty inode budget. + * @c: UBIFS file-system description object + * @ui: UBIFS inode to release the budget for + * + * This function releases budget corresponding to a dirty inode. It is usually + * called when after the inode has been written to the media and marked as + * clean. + */ +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + struct ubifs_inode *ui) +{ + struct ubifs_budget_req req = {.dd_growth = c->inode_budget, + .dirtied_ino_d = ui->data_len}; + + ubifs_release_budget(c, &req); +} + +/** + * ubifs_budg_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * + * This function returns amount of free space on the file-system. + */ +long long ubifs_budg_get_free_space(struct ubifs_info *c) +{ + int min_idx_lebs, rsvd_idx_lebs; + long long available, outstanding, free; + + /* Do exactly the same calculations as in 'do_budget_space()' */ + spin_lock(&c->space_lock); + min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + + if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt + - c->lst.taken_empty_lebs) { + spin_unlock(&c->space_lock); + return 0; + } + + available = ubifs_calc_available(c, min_idx_lebs); + outstanding = c->budg_data_growth + c->budg_dd_growth; + c->min_idx_lebs = min_idx_lebs; + spin_unlock(&c->space_lock); + + if (available > outstanding) + free = ubifs_reported_space(c, available - outstanding); + else + free = 0; + return free; +} diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c new file mode 100644 index 000000000000..3b516316c9b3 --- /dev/null +++ b/fs/ubifs/commit.c @@ -0,0 +1,677 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements functions that manage the running of the commit process. + * Each affected module has its own functions to accomplish their part in the + * commit and those functions are called here. + * + * The commit is the process whereby all updates to the index and LEB properties + * are written out together and the journal becomes empty. This keeps the + * file system consistent - at all times the state can be recreated by reading + * the index and LEB properties and then replaying the journal. + * + * The commit is split into two parts named "commit start" and "commit end". + * During commit start, the commit process has exclusive access to the journal + * by holding the commit semaphore down for writing. As few I/O operations as + * possible are performed during commit start, instead the nodes that are to be + * written are merely identified. During commit end, the commit semaphore is no + * longer held and the journal is again in operation, allowing users to continue + * to use the file system while the bulk of the commit I/O is performed. The + * purpose of this two-step approach is to prevent the commit from causing any + * latency blips. Note that in any case, the commit does not prevent lookups + * (as permitted by the TNC mutex), or access to VFS data structures e.g. page + * cache. + */ + +#include +#include +#include "ubifs.h" + +/** + * do_commit - commit the journal. + * @c: UBIFS file-system description object + * + * This function implements UBIFS commit. It has to be called with commit lock + * locked. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int do_commit(struct ubifs_info *c) +{ + int err, new_ltail_lnum, old_ltail_lnum, i; + struct ubifs_zbranch zroot; + struct ubifs_lp_stats lst; + + dbg_cmt("start"); + if (c->ro_media) { + err = -EROFS; + goto out_up; + } + + /* Sync all write buffers (necessary for recovery) */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + goto out_up; + } + + err = ubifs_gc_start_commit(c); + if (err) + goto out_up; + err = dbg_check_lprops(c); + if (err) + goto out_up; + err = ubifs_log_start_commit(c, &new_ltail_lnum); + if (err) + goto out_up; + err = ubifs_tnc_start_commit(c, &zroot); + if (err) + goto out_up; + err = ubifs_lpt_start_commit(c); + if (err) + goto out_up; + err = ubifs_orphan_start_commit(c); + if (err) + goto out_up; + + ubifs_get_lp_stats(c, &lst); + + up_write(&c->commit_sem); + + err = ubifs_tnc_end_commit(c); + if (err) + goto out; + err = ubifs_lpt_end_commit(c); + if (err) + goto out; + err = ubifs_orphan_end_commit(c); + if (err) + goto out; + old_ltail_lnum = c->ltail_lnum; + err = ubifs_log_end_commit(c, new_ltail_lnum); + if (err) + goto out; + err = dbg_check_old_index(c, &zroot); + if (err) + goto out; + + mutex_lock(&c->mst_mutex); + c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); + c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); + c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); + c->mst_node->root_offs = cpu_to_le32(zroot.offs); + c->mst_node->root_len = cpu_to_le32(zroot.len); + c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); + c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); + c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); + c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); + c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); + c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); + c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); + c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); + c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); + c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); + c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); + c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); + c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); + c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); + c->mst_node->total_free = cpu_to_le64(lst.total_free); + c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); + c->mst_node->total_used = cpu_to_le64(lst.total_used); + c->mst_node->total_dead = cpu_to_le64(lst.total_dead); + c->mst_node->total_dark = cpu_to_le64(lst.total_dark); + if (c->no_orphs) + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + else + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); + err = ubifs_write_master(c); + mutex_unlock(&c->mst_mutex); + if (err) + goto out; + + err = ubifs_log_post_commit(c, old_ltail_lnum); + if (err) + goto out; + err = ubifs_gc_end_commit(c); + if (err) + goto out; + err = ubifs_lpt_post_commit(c); + if (err) + goto out; + + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_RESTING; + wake_up(&c->cmt_wq); + dbg_cmt("commit end"); + spin_unlock(&c->cs_lock); + + return 0; + +out_up: + up_write(&c->commit_sem); +out: + ubifs_err("commit failed, error %d", err); + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_BROKEN; + wake_up(&c->cmt_wq); + spin_unlock(&c->cs_lock); + ubifs_ro_mode(c, err); + return err; +} + +/** + * run_bg_commit - run background commit if it is needed. + * @c: UBIFS file-system description object + * + * This function runs background commit if it is needed. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int run_bg_commit(struct ubifs_info *c) +{ + spin_lock(&c->cs_lock); + /* + * Run background commit only if background commit was requested or if + * commit is required. + */ + if (c->cmt_state != COMMIT_BACKGROUND && + c->cmt_state != COMMIT_REQUIRED) + goto out; + spin_unlock(&c->cs_lock); + + down_write(&c->commit_sem); + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_REQUIRED) + c->cmt_state = COMMIT_RUNNING_REQUIRED; + else if (c->cmt_state == COMMIT_BACKGROUND) + c->cmt_state = COMMIT_RUNNING_BACKGROUND; + else + goto out_cmt_unlock; + spin_unlock(&c->cs_lock); + + return do_commit(c); + +out_cmt_unlock: + up_write(&c->commit_sem); +out: + spin_unlock(&c->cs_lock); + return 0; +} + +/** + * ubifs_bg_thread - UBIFS background thread function. + * @info: points to the file-system description object + * + * This function implements various file-system background activities: + * o when a write-buffer timer expires it synchronizes the appropriate + * write-buffer; + * o when the journal is about to be full, it starts in-advance commit. + * + * Note, other stuff like background garbage collection may be added here in + * future. + */ +int ubifs_bg_thread(void *info) +{ + int err; + struct ubifs_info *c = info; + + ubifs_msg("background thread \"%s\" started, PID %d", + c->bgt_name, current->pid); + set_freezable(); + + while (1) { + if (kthread_should_stop()) + break; + + if (try_to_freeze()) + continue; + + set_current_state(TASK_INTERRUPTIBLE); + /* Check if there is something to do */ + if (!c->need_bgt) { + /* + * Nothing prevents us from going sleep now and + * be never woken up and block the task which + * could wait in 'kthread_stop()' forever. + */ + if (kthread_should_stop()) + break; + schedule(); + continue; + } else + __set_current_state(TASK_RUNNING); + + c->need_bgt = 0; + err = ubifs_bg_wbufs_sync(c); + if (err) + ubifs_ro_mode(c, err); + + run_bg_commit(c); + cond_resched(); + } + + dbg_msg("background thread \"%s\" stops", c->bgt_name); + return 0; +} + +/** + * ubifs_commit_required - set commit state to "required". + * @c: UBIFS file-system description object + * + * This function is called if a commit is required but cannot be done from the + * calling function, so it is just flagged instead. + */ +void ubifs_commit_required(struct ubifs_info *c) +{ + spin_lock(&c->cs_lock); + switch (c->cmt_state) { + case COMMIT_RESTING: + case COMMIT_BACKGROUND: + dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), + dbg_cstate(COMMIT_REQUIRED)); + c->cmt_state = COMMIT_REQUIRED; + break; + case COMMIT_RUNNING_BACKGROUND: + dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), + dbg_cstate(COMMIT_RUNNING_REQUIRED)); + c->cmt_state = COMMIT_RUNNING_REQUIRED; + break; + case COMMIT_REQUIRED: + case COMMIT_RUNNING_REQUIRED: + case COMMIT_BROKEN: + break; + } + spin_unlock(&c->cs_lock); +} + +/** + * ubifs_request_bg_commit - notify the background thread to do a commit. + * @c: UBIFS file-system description object + * + * This function is called if the journal is full enough to make a commit + * worthwhile, so background thread is kicked to start it. + */ +void ubifs_request_bg_commit(struct ubifs_info *c) +{ + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_RESTING) { + dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), + dbg_cstate(COMMIT_BACKGROUND)); + c->cmt_state = COMMIT_BACKGROUND; + spin_unlock(&c->cs_lock); + ubifs_wake_up_bgt(c); + } else + spin_unlock(&c->cs_lock); +} + +/** + * wait_for_commit - wait for commit. + * @c: UBIFS file-system description object + * + * This function sleeps until the commit operation is no longer running. + */ +static int wait_for_commit(struct ubifs_info *c) +{ + dbg_cmt("pid %d goes sleep", current->pid); + + /* + * The following sleeps if the condition is false, and will be woken + * when the commit ends. It is possible, although very unlikely, that we + * will wake up and see the subsequent commit running, rather than the + * one we were waiting for, and go back to sleep. However, we will be + * woken again, so there is no danger of sleeping forever. + */ + wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && + c->cmt_state != COMMIT_RUNNING_REQUIRED); + dbg_cmt("commit finished, pid %d woke up", current->pid); + return 0; +} + +/** + * ubifs_run_commit - run or wait for commit. + * @c: UBIFS file-system description object + * + * This function runs commit and returns zero in case of success and a negative + * error code in case of failure. + */ +int ubifs_run_commit(struct ubifs_info *c) +{ + int err = 0; + + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_BROKEN) { + err = -EINVAL; + goto out; + } + + if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) + /* + * We set the commit state to 'running required' to indicate + * that we want it to complete as quickly as possible. + */ + c->cmt_state = COMMIT_RUNNING_REQUIRED; + + if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { + spin_unlock(&c->cs_lock); + return wait_for_commit(c); + } + spin_unlock(&c->cs_lock); + + /* Ok, the commit is indeed needed */ + + down_write(&c->commit_sem); + spin_lock(&c->cs_lock); + /* + * Since we unlocked 'c->cs_lock', the state may have changed, so + * re-check it. + */ + if (c->cmt_state == COMMIT_BROKEN) { + err = -EINVAL; + goto out_cmt_unlock; + } + + if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) + c->cmt_state = COMMIT_RUNNING_REQUIRED; + + if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { + up_write(&c->commit_sem); + spin_unlock(&c->cs_lock); + return wait_for_commit(c); + } + c->cmt_state = COMMIT_RUNNING_REQUIRED; + spin_unlock(&c->cs_lock); + + err = do_commit(c); + return err; + +out_cmt_unlock: + up_write(&c->commit_sem); +out: + spin_unlock(&c->cs_lock); + return err; +} + +/** + * ubifs_gc_should_commit - determine if it is time for GC to run commit. + * @c: UBIFS file-system description object + * + * This function is called by garbage collection to determine if commit should + * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal + * is full enough to start commit, this function returns true. It is not + * absolutely necessary to commit yet, but it feels like this should be better + * then to keep doing GC. This function returns %1 if GC has to initiate commit + * and %0 if not. + */ +int ubifs_gc_should_commit(struct ubifs_info *c) +{ + int ret = 0; + + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_BACKGROUND) { + dbg_cmt("commit required now"); + c->cmt_state = COMMIT_REQUIRED; + } else + dbg_cmt("commit not requested"); + if (c->cmt_state == COMMIT_REQUIRED) + ret = 1; + spin_unlock(&c->cs_lock); + return ret; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * struct idx_node - hold index nodes during index tree traversal. + * @list: list + * @iip: index in parent (slot number of this indexing node in the parent + * indexing node) + * @upper_key: all keys in this indexing node have to be less or equivalent to + * this key + * @idx: index node (8-byte aligned because all node structures must be 8-byte + * aligned) + */ +struct idx_node { + struct list_head list; + int iip; + union ubifs_key upper_key; + struct ubifs_idx_node idx __attribute__((aligned(8))); +}; + +/** + * dbg_old_index_check_init - get information for the next old index check. + * @c: UBIFS file-system description object + * @zroot: root of the index + * + * This function records information about the index that will be needed for the + * next old index check i.e. 'dbg_check_old_index()'. + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ + struct ubifs_idx_node *idx; + int lnum, offs, len, err = 0; + + c->old_zroot = *zroot; + + lnum = c->old_zroot.lnum; + offs = c->old_zroot.offs; + len = c->old_zroot.len; + + idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); + if (!idx) + return -ENOMEM; + + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err) + goto out; + + c->old_zroot_level = le16_to_cpu(idx->level); + c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); +out: + kfree(idx); + return err; +} + +/** + * dbg_check_old_index - check the old copy of the index. + * @c: UBIFS file-system description object + * @zroot: root of the new index + * + * In order to be able to recover from an unclean unmount, a complete copy of + * the index must exist on flash. This is the "old" index. The commit process + * must write the "new" index to flash without overwriting or destroying any + * part of the old index. This function is run at commit end in order to check + * that the old index does indeed exist completely intact. + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ + int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; + int first = 1, iip; + union ubifs_key lower_key, upper_key, l_key, u_key; + unsigned long long uninitialized_var(last_sqnum); + struct ubifs_idx_node *idx; + struct list_head list; + struct idx_node *i; + size_t sz; + + if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) + goto out; + + INIT_LIST_HEAD(&list); + + sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - + UBIFS_IDX_NODE_SZ; + + /* Start at the old zroot */ + lnum = c->old_zroot.lnum; + offs = c->old_zroot.offs; + len = c->old_zroot.len; + iip = 0; + + /* + * Traverse the index tree preorder depth-first i.e. do a node and then + * its subtrees from left to right. + */ + while (1) { + struct ubifs_branch *br; + + /* Get the next index node */ + i = kmalloc(sz, GFP_NOFS); + if (!i) { + err = -ENOMEM; + goto out_free; + } + i->iip = iip; + /* Keep the index nodes on our path in a linked list */ + list_add_tail(&i->list, &list); + /* Read the index node */ + idx = &i->idx; + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err) + goto out_free; + /* Validate index node */ + child_cnt = le16_to_cpu(idx->child_cnt); + if (child_cnt < 1 || child_cnt > c->fanout) { + err = 1; + goto out_dump; + } + if (first) { + first = 0; + /* Check root level and sqnum */ + if (le16_to_cpu(idx->level) != c->old_zroot_level) { + err = 2; + goto out_dump; + } + if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { + err = 3; + goto out_dump; + } + /* Set last values as though root had a parent */ + last_level = le16_to_cpu(idx->level) + 1; + last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; + key_read(c, ubifs_idx_key(c, idx), &lower_key); + highest_ino_key(c, &upper_key, INUM_WATERMARK); + } + key_copy(c, &upper_key, &i->upper_key); + if (le16_to_cpu(idx->level) != last_level - 1) { + err = 3; + goto out_dump; + } + /* + * The index is always written bottom up hence a child's sqnum + * is always less than the parents. + */ + if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { + err = 4; + goto out_dump; + } + /* Check key range */ + key_read(c, ubifs_idx_key(c, idx), &l_key); + br = ubifs_idx_branch(c, idx, child_cnt - 1); + key_read(c, &br->key, &u_key); + if (keys_cmp(c, &lower_key, &l_key) > 0) { + err = 5; + goto out_dump; + } + if (keys_cmp(c, &upper_key, &u_key) < 0) { + err = 6; + goto out_dump; + } + if (keys_cmp(c, &upper_key, &u_key) == 0) + if (!is_hash_key(c, &u_key)) { + err = 7; + goto out_dump; + } + /* Go to next index node */ + if (le16_to_cpu(idx->level) == 0) { + /* At the bottom, so go up until can go right */ + while (1) { + /* Drop the bottom of the list */ + list_del(&i->list); + kfree(i); + /* No more list means we are done */ + if (list_empty(&list)) + goto out; + /* Look at the new bottom */ + i = list_entry(list.prev, struct idx_node, + list); + idx = &i->idx; + /* Can we go right */ + if (iip + 1 < le16_to_cpu(idx->child_cnt)) { + iip = iip + 1; + break; + } else + /* Nope, so go up again */ + iip = i->iip; + } + } else + /* Go down left */ + iip = 0; + /* + * We have the parent in 'idx' and now we set up for reading the + * child pointed to by slot 'iip'. + */ + last_level = le16_to_cpu(idx->level); + last_sqnum = le64_to_cpu(idx->ch.sqnum); + br = ubifs_idx_branch(c, idx, iip); + lnum = le32_to_cpu(br->lnum); + offs = le32_to_cpu(br->offs); + len = le32_to_cpu(br->len); + key_read(c, &br->key, &lower_key); + if (iip + 1 < le16_to_cpu(idx->child_cnt)) { + br = ubifs_idx_branch(c, idx, iip + 1); + key_read(c, &br->key, &upper_key); + } else + key_copy(c, &i->upper_key, &upper_key); + } +out: + err = dbg_old_index_check_init(c, zroot); + if (err) + goto out_free; + + return 0; + +out_dump: + dbg_err("dumping index node (iip=%d)", i->iip); + dbg_dump_node(c, idx); + list_del(&i->list); + kfree(i); + if (!list_empty(&list)) { + i = list_entry(list.prev, struct idx_node, list); + dbg_err("dumping parent index node"); + dbg_dump_node(c, &i->idx); + } +out_free: + while (!list_empty(&list)) { + i = list_entry(list.next, struct idx_node, list); + list_del(&i->list); + kfree(i); + } + ubifs_err("failed, error %d", err); + if (err > 0) + err = -EINVAL; + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c new file mode 100644 index 000000000000..5bb51dac3c16 --- /dev/null +++ b/fs/ubifs/compress.c @@ -0,0 +1,253 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + * Zoltan Sogor + */ + +/* + * This file provides a single place to access to compression and + * decompression. + */ + +#include +#include "ubifs.h" + +/* Fake description object for the "none" compressor */ +static struct ubifs_compressor none_compr = { + .compr_type = UBIFS_COMPR_NONE, + .name = "no compression", + .capi_name = "", +}; + +#ifdef CONFIG_UBIFS_FS_LZO +static DEFINE_MUTEX(lzo_mutex); + +static struct ubifs_compressor lzo_compr = { + .compr_type = UBIFS_COMPR_LZO, + .comp_mutex = &lzo_mutex, + .name = "LZO", + .capi_name = "lzo", +}; +#else +static struct ubifs_compressor lzo_compr = { + .compr_type = UBIFS_COMPR_LZO, + .name = "LZO", +}; +#endif + +#ifdef CONFIG_UBIFS_FS_ZLIB +static DEFINE_MUTEX(deflate_mutex); +static DEFINE_MUTEX(inflate_mutex); + +static struct ubifs_compressor zlib_compr = { + .compr_type = UBIFS_COMPR_ZLIB, + .comp_mutex = &deflate_mutex, + .decomp_mutex = &inflate_mutex, + .name = "zlib", + .capi_name = "deflate", +}; +#else +static struct ubifs_compressor zlib_compr = { + .compr_type = UBIFS_COMPR_ZLIB, + .name = "zlib", +}; +#endif + +/* All UBIFS compressors */ +struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/** + * ubifs_compress - compress data. + * @in_buf: data to compress + * @in_len: length of the data to compress + * @out_buf: output buffer where compressed data should be stored + * @out_len: output buffer length is returned here + * @compr_type: type of compression to use on enter, actually used compression + * type on exit + * + * This function compresses input buffer @in_buf of length @in_len and stores + * the result in the output buffer @out_buf and the resulting length in + * @out_len. If the input buffer does not compress, it is just copied to the + * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if + * compression error occurred. + * + * Note, if the input buffer was not compressed, it is copied to the output + * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. + * + * This functions returns %0 on success or a negative error code on failure. + */ +void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, + int *compr_type) +{ + int err; + struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; + + if (*compr_type == UBIFS_COMPR_NONE) + goto no_compr; + + /* If the input data is small, do not even try to compress it */ + if (in_len < UBIFS_MIN_COMPR_LEN) + goto no_compr; + + if (compr->comp_mutex) + mutex_lock(compr->comp_mutex); + err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, + out_len); + if (compr->comp_mutex) + mutex_unlock(compr->comp_mutex); + if (unlikely(err)) { + ubifs_warn("cannot compress %d bytes, compressor %s, " + "error %d, leave data uncompressed", + in_len, compr->name, err); + goto no_compr; + } + + /* + * Presently, we just require that compression results in less data, + * rather than any defined minimum compression ratio or amount. + */ + if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) + goto no_compr; + + return; + +no_compr: + memcpy(out_buf, in_buf, in_len); + *out_len = in_len; + *compr_type = UBIFS_COMPR_NONE; +} + +/** + * ubifs_decompress - decompress data. + * @in_buf: data to decompress + * @in_len: length of the data to decompress + * @out_buf: output buffer where decompressed data should + * @out_len: output length is returned here + * @compr_type: type of compression + * + * This function decompresses data from buffer @in_buf into buffer @out_buf. + * The length of the uncompressed data is returned in @out_len. This functions + * returns %0 on success or a negative error code on failure. + */ +int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, + int *out_len, int compr_type) +{ + int err; + struct ubifs_compressor *compr; + + if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { + ubifs_err("invalid compression type %d", compr_type); + return -EINVAL; + } + + compr = ubifs_compressors[compr_type]; + + if (unlikely(!compr->capi_name)) { + ubifs_err("%s compression is not compiled in", compr->name); + return -EINVAL; + } + + if (compr_type == UBIFS_COMPR_NONE) { + memcpy(out_buf, in_buf, in_len); + *out_len = in_len; + return 0; + } + + if (compr->decomp_mutex) + mutex_lock(compr->decomp_mutex); + err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, + out_len); + if (compr->decomp_mutex) + mutex_unlock(compr->decomp_mutex); + if (err) + ubifs_err("cannot decompress %d bytes, compressor %s, " + "error %d", in_len, compr->name, err); + + return err; +} + +/** + * compr_init - initialize a compressor. + * @compr: compressor description object + * + * This function initializes the requested compressor and returns zero in case + * of success or a negative error code in case of failure. + */ +static int __init compr_init(struct ubifs_compressor *compr) +{ + if (compr->capi_name) { + compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); + if (IS_ERR(compr->cc)) { + ubifs_err("cannot initialize compressor %s, error %ld", + compr->name, PTR_ERR(compr->cc)); + return PTR_ERR(compr->cc); + } + } + + ubifs_compressors[compr->compr_type] = compr; + return 0; +} + +/** + * compr_exit - de-initialize a compressor. + * @compr: compressor description object + */ +static void compr_exit(struct ubifs_compressor *compr) +{ + if (compr->capi_name) + crypto_free_comp(compr->cc); + return; +} + +/** + * ubifs_compressors_init - initialize UBIFS compressors. + * + * This function initializes the compressor which were compiled in. Returns + * zero in case of success and a negative error code in case of failure. + */ +int __init ubifs_compressors_init(void) +{ + int err; + + err = compr_init(&lzo_compr); + if (err) + return err; + + err = compr_init(&zlib_compr); + if (err) + goto out_lzo; + + ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; + return 0; + +out_lzo: + compr_exit(&lzo_compr); + return err; +} + +/** + * ubifs_compressors_exit - de-initialize UBIFS compressors. + */ +void __exit ubifs_compressors_exit(void) +{ + compr_exit(&lzo_compr); + compr_exit(&zlib_compr); +} diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c new file mode 100644 index 000000000000..4e3aaeba4eca --- /dev/null +++ b/fs/ubifs/debug.c @@ -0,0 +1,2289 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements most of the debugging stuff which is compiled in only + * when it is enabled. But some debugging check functions are implemented in + * corresponding subsystem, just because they are closely related and utilize + * various local functions of those subsystems. + */ + +#define UBIFS_DBG_PRESERVE_UBI + +#include "ubifs.h" +#include +#include + +#ifdef CONFIG_UBIFS_FS_DEBUG + +DEFINE_SPINLOCK(dbg_lock); + +static char dbg_key_buf0[128]; +static char dbg_key_buf1[128]; + +unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; +unsigned int ubifs_tst_flags; + +module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + +static const char *get_key_fmt(int fmt) +{ + switch (fmt) { + case UBIFS_SIMPLE_KEY_FMT: + return "simple"; + default: + return "unknown/invalid format"; + } +} + +static const char *get_key_hash(int hash) +{ + switch (hash) { + case UBIFS_KEY_HASH_R5: + return "R5"; + case UBIFS_KEY_HASH_TEST: + return "test"; + default: + return "unknown/invalid name hash"; + } +} + +static const char *get_key_type(int type) +{ + switch (type) { + case UBIFS_INO_KEY: + return "inode"; + case UBIFS_DENT_KEY: + return "direntry"; + case UBIFS_XENT_KEY: + return "xentry"; + case UBIFS_DATA_KEY: + return "data"; + case UBIFS_TRUN_KEY: + return "truncate"; + default: + return "unknown/invalid key"; + } +} + +static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, + char *buffer) +{ + char *p = buffer; + int type = key_type(c, key); + + if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { + switch (type) { + case UBIFS_INO_KEY: + sprintf(p, "(%lu, %s)", key_inum(c, key), + get_key_type(type)); + break; + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: + sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key), + get_key_type(type), key_hash(c, key)); + break; + case UBIFS_DATA_KEY: + sprintf(p, "(%lu, %s, %u)", key_inum(c, key), + get_key_type(type), key_block(c, key)); + break; + case UBIFS_TRUN_KEY: + sprintf(p, "(%lu, %s)", + key_inum(c, key), get_key_type(type)); + break; + default: + sprintf(p, "(bad key type: %#08x, %#08x)", + key->u32[0], key->u32[1]); + } + } else + sprintf(p, "bad key format %d", c->key_fmt); +} + +const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +{ + /* dbg_lock must be held */ + sprintf_key(c, key, dbg_key_buf0); + return dbg_key_buf0; +} + +const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +{ + /* dbg_lock must be held */ + sprintf_key(c, key, dbg_key_buf1); + return dbg_key_buf1; +} + +const char *dbg_ntype(int type) +{ + switch (type) { + case UBIFS_PAD_NODE: + return "padding node"; + case UBIFS_SB_NODE: + return "superblock node"; + case UBIFS_MST_NODE: + return "master node"; + case UBIFS_REF_NODE: + return "reference node"; + case UBIFS_INO_NODE: + return "inode node"; + case UBIFS_DENT_NODE: + return "direntry node"; + case UBIFS_XENT_NODE: + return "xentry node"; + case UBIFS_DATA_NODE: + return "data node"; + case UBIFS_TRUN_NODE: + return "truncate node"; + case UBIFS_IDX_NODE: + return "indexing node"; + case UBIFS_CS_NODE: + return "commit start node"; + case UBIFS_ORPH_NODE: + return "orphan node"; + default: + return "unknown node"; + } +} + +static const char *dbg_gtype(int type) +{ + switch (type) { + case UBIFS_NO_NODE_GROUP: + return "no node group"; + case UBIFS_IN_NODE_GROUP: + return "in node group"; + case UBIFS_LAST_OF_NODE_GROUP: + return "last of node group"; + default: + return "unknown"; + } +} + +const char *dbg_cstate(int cmt_state) +{ + switch (cmt_state) { + case COMMIT_RESTING: + return "commit resting"; + case COMMIT_BACKGROUND: + return "background commit requested"; + case COMMIT_REQUIRED: + return "commit required"; + case COMMIT_RUNNING_BACKGROUND: + return "BACKGROUND commit running"; + case COMMIT_RUNNING_REQUIRED: + return "commit running and required"; + case COMMIT_BROKEN: + return "broken commit"; + default: + return "unknown commit state"; + } +} + +static void dump_ch(const struct ubifs_ch *ch) +{ + printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); + printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); + printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, + dbg_ntype(ch->node_type)); + printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, + dbg_gtype(ch->group_type)); + printk(KERN_DEBUG "\tsqnum %llu\n", + (unsigned long long)le64_to_cpu(ch->sqnum)); + printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); +} + +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) +{ + const struct ubifs_inode *ui = ubifs_inode(inode); + + printk(KERN_DEBUG "inode %lu\n", inode->i_ino); + printk(KERN_DEBUG "size %llu\n", + (unsigned long long)i_size_read(inode)); + printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); + printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); + printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); + printk(KERN_DEBUG "atime %u.%u\n", + (unsigned int)inode->i_atime.tv_sec, + (unsigned int)inode->i_atime.tv_nsec); + printk(KERN_DEBUG "mtime %u.%u\n", + (unsigned int)inode->i_mtime.tv_sec, + (unsigned int)inode->i_mtime.tv_nsec); + printk(KERN_DEBUG "ctime %u.%u\n", + (unsigned int)inode->i_ctime.tv_sec, + (unsigned int)inode->i_ctime.tv_nsec); + printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); + printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); + printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); + printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); + printk(KERN_DEBUG "dirty %u\n", ui->dirty); + printk(KERN_DEBUG "xattr %u\n", ui->xattr); + printk(KERN_DEBUG "flags %d\n", ui->flags); + printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); + printk(KERN_DEBUG "data_len %d\n", ui->data_len); +} + +void dbg_dump_node(const struct ubifs_info *c, const void *node) +{ + int i, n; + union ubifs_key key; + const struct ubifs_ch *ch = node; + + if (dbg_failure_mode) + return; + + /* If the magic is incorrect, just hexdump the first bytes */ + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { + printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node, UBIFS_CH_SZ, 1); + return; + } + + spin_lock(&dbg_lock); + dump_ch(node); + + switch (ch->node_type) { + case UBIFS_PAD_NODE: + { + const struct ubifs_pad_node *pad = node; + + printk(KERN_DEBUG "\tpad_len %u\n", + le32_to_cpu(pad->pad_len)); + break; + } + case UBIFS_SB_NODE: + { + const struct ubifs_sb_node *sup = node; + unsigned int sup_flags = le32_to_cpu(sup->flags); + + printk(KERN_DEBUG "\tkey_hash %d (%s)\n", + (int)sup->key_hash, get_key_hash(sup->key_hash)); + printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", + (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); + printk(KERN_DEBUG "\tflags %#x\n", sup_flags); + printk(KERN_DEBUG "\t big_lpt %u\n", + !!(sup_flags & UBIFS_FLG_BIGLPT)); + printk(KERN_DEBUG "\tmin_io_size %u\n", + le32_to_cpu(sup->min_io_size)); + printk(KERN_DEBUG "\tleb_size %u\n", + le32_to_cpu(sup->leb_size)); + printk(KERN_DEBUG "\tleb_cnt %u\n", + le32_to_cpu(sup->leb_cnt)); + printk(KERN_DEBUG "\tmax_leb_cnt %u\n", + le32_to_cpu(sup->max_leb_cnt)); + printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", + (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); + printk(KERN_DEBUG "\tlog_lebs %u\n", + le32_to_cpu(sup->log_lebs)); + printk(KERN_DEBUG "\tlpt_lebs %u\n", + le32_to_cpu(sup->lpt_lebs)); + printk(KERN_DEBUG "\torph_lebs %u\n", + le32_to_cpu(sup->orph_lebs)); + printk(KERN_DEBUG "\tjhead_cnt %u\n", + le32_to_cpu(sup->jhead_cnt)); + printk(KERN_DEBUG "\tfanout %u\n", + le32_to_cpu(sup->fanout)); + printk(KERN_DEBUG "\tlsave_cnt %u\n", + le32_to_cpu(sup->lsave_cnt)); + printk(KERN_DEBUG "\tdefault_compr %u\n", + (int)le16_to_cpu(sup->default_compr)); + printk(KERN_DEBUG "\trp_size %llu\n", + (unsigned long long)le64_to_cpu(sup->rp_size)); + printk(KERN_DEBUG "\trp_uid %u\n", + le32_to_cpu(sup->rp_uid)); + printk(KERN_DEBUG "\trp_gid %u\n", + le32_to_cpu(sup->rp_gid)); + printk(KERN_DEBUG "\tfmt_version %u\n", + le32_to_cpu(sup->fmt_version)); + printk(KERN_DEBUG "\ttime_gran %u\n", + le32_to_cpu(sup->time_gran)); + printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" + "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", + sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3], + sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7], + sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11], + sup->uuid[12], sup->uuid[13], sup->uuid[14], + sup->uuid[15]); + break; + } + case UBIFS_MST_NODE: + { + const struct ubifs_mst_node *mst = node; + + printk(KERN_DEBUG "\thighest_inum %llu\n", + (unsigned long long)le64_to_cpu(mst->highest_inum)); + printk(KERN_DEBUG "\tcommit number %llu\n", + (unsigned long long)le64_to_cpu(mst->cmt_no)); + printk(KERN_DEBUG "\tflags %#x\n", + le32_to_cpu(mst->flags)); + printk(KERN_DEBUG "\tlog_lnum %u\n", + le32_to_cpu(mst->log_lnum)); + printk(KERN_DEBUG "\troot_lnum %u\n", + le32_to_cpu(mst->root_lnum)); + printk(KERN_DEBUG "\troot_offs %u\n", + le32_to_cpu(mst->root_offs)); + printk(KERN_DEBUG "\troot_len %u\n", + le32_to_cpu(mst->root_len)); + printk(KERN_DEBUG "\tgc_lnum %u\n", + le32_to_cpu(mst->gc_lnum)); + printk(KERN_DEBUG "\tihead_lnum %u\n", + le32_to_cpu(mst->ihead_lnum)); + printk(KERN_DEBUG "\tihead_offs %u\n", + le32_to_cpu(mst->ihead_offs)); + printk(KERN_DEBUG "\tindex_size %u\n", + le32_to_cpu(mst->index_size)); + printk(KERN_DEBUG "\tlpt_lnum %u\n", + le32_to_cpu(mst->lpt_lnum)); + printk(KERN_DEBUG "\tlpt_offs %u\n", + le32_to_cpu(mst->lpt_offs)); + printk(KERN_DEBUG "\tnhead_lnum %u\n", + le32_to_cpu(mst->nhead_lnum)); + printk(KERN_DEBUG "\tnhead_offs %u\n", + le32_to_cpu(mst->nhead_offs)); + printk(KERN_DEBUG "\tltab_lnum %u\n", + le32_to_cpu(mst->ltab_lnum)); + printk(KERN_DEBUG "\tltab_offs %u\n", + le32_to_cpu(mst->ltab_offs)); + printk(KERN_DEBUG "\tlsave_lnum %u\n", + le32_to_cpu(mst->lsave_lnum)); + printk(KERN_DEBUG "\tlsave_offs %u\n", + le32_to_cpu(mst->lsave_offs)); + printk(KERN_DEBUG "\tlscan_lnum %u\n", + le32_to_cpu(mst->lscan_lnum)); + printk(KERN_DEBUG "\tleb_cnt %u\n", + le32_to_cpu(mst->leb_cnt)); + printk(KERN_DEBUG "\tempty_lebs %u\n", + le32_to_cpu(mst->empty_lebs)); + printk(KERN_DEBUG "\tidx_lebs %u\n", + le32_to_cpu(mst->idx_lebs)); + printk(KERN_DEBUG "\ttotal_free %llu\n", + (unsigned long long)le64_to_cpu(mst->total_free)); + printk(KERN_DEBUG "\ttotal_dirty %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dirty)); + printk(KERN_DEBUG "\ttotal_used %llu\n", + (unsigned long long)le64_to_cpu(mst->total_used)); + printk(KERN_DEBUG "\ttotal_dead %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dead)); + printk(KERN_DEBUG "\ttotal_dark %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dark)); + break; + } + case UBIFS_REF_NODE: + { + const struct ubifs_ref_node *ref = node; + + printk(KERN_DEBUG "\tlnum %u\n", + le32_to_cpu(ref->lnum)); + printk(KERN_DEBUG "\toffs %u\n", + le32_to_cpu(ref->offs)); + printk(KERN_DEBUG "\tjhead %u\n", + le32_to_cpu(ref->jhead)); + break; + } + case UBIFS_INO_NODE: + { + const struct ubifs_ino_node *ino = node; + + key_read(c, &ino->key, &key); + printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tcreat_sqnum %llu\n", + (unsigned long long)le64_to_cpu(ino->creat_sqnum)); + printk(KERN_DEBUG "\tsize %llu\n", + (unsigned long long)le64_to_cpu(ino->size)); + printk(KERN_DEBUG "\tnlink %u\n", + le32_to_cpu(ino->nlink)); + printk(KERN_DEBUG "\tatime %lld.%u\n", + (long long)le64_to_cpu(ino->atime_sec), + le32_to_cpu(ino->atime_nsec)); + printk(KERN_DEBUG "\tmtime %lld.%u\n", + (long long)le64_to_cpu(ino->mtime_sec), + le32_to_cpu(ino->mtime_nsec)); + printk(KERN_DEBUG "\tctime %lld.%u\n", + (long long)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); + printk(KERN_DEBUG "\tuid %u\n", + le32_to_cpu(ino->uid)); + printk(KERN_DEBUG "\tgid %u\n", + le32_to_cpu(ino->gid)); + printk(KERN_DEBUG "\tmode %u\n", + le32_to_cpu(ino->mode)); + printk(KERN_DEBUG "\tflags %#x\n", + le32_to_cpu(ino->flags)); + printk(KERN_DEBUG "\txattr_cnt %u\n", + le32_to_cpu(ino->xattr_cnt)); + printk(KERN_DEBUG "\txattr_size %u\n", + le32_to_cpu(ino->xattr_size)); + printk(KERN_DEBUG "\txattr_names %u\n", + le32_to_cpu(ino->xattr_names)); + printk(KERN_DEBUG "\tcompr_type %#x\n", + (int)le16_to_cpu(ino->compr_type)); + printk(KERN_DEBUG "\tdata len %u\n", + le32_to_cpu(ino->data_len)); + break; + } + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + { + const struct ubifs_dent_node *dent = node; + int nlen = le16_to_cpu(dent->nlen); + + key_read(c, &dent->key, &key); + printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tinum %llu\n", + (unsigned long long)le64_to_cpu(dent->inum)); + printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); + printk(KERN_DEBUG "\tnlen %d\n", nlen); + printk(KERN_DEBUG "\tname "); + + if (nlen > UBIFS_MAX_NLEN) + printk(KERN_DEBUG "(bad name length, not printing, " + "bad or corrupted node)"); + else { + for (i = 0; i < nlen && dent->name[i]; i++) + printk("%c", dent->name[i]); + } + printk("\n"); + + break; + } + case UBIFS_DATA_NODE: + { + const struct ubifs_data_node *dn = node; + int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; + + key_read(c, &dn->key, &key); + printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); + printk(KERN_DEBUG "\tsize %u\n", + le32_to_cpu(dn->size)); + printk(KERN_DEBUG "\tcompr_typ %d\n", + (int)le16_to_cpu(dn->compr_type)); + printk(KERN_DEBUG "\tdata size %d\n", + dlen); + printk(KERN_DEBUG "\tdata:\n"); + print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, + (void *)&dn->data, dlen, 0); + break; + } + case UBIFS_TRUN_NODE: + { + const struct ubifs_trun_node *trun = node; + + printk(KERN_DEBUG "\tinum %u\n", + le32_to_cpu(trun->inum)); + printk(KERN_DEBUG "\told_size %llu\n", + (unsigned long long)le64_to_cpu(trun->old_size)); + printk(KERN_DEBUG "\tnew_size %llu\n", + (unsigned long long)le64_to_cpu(trun->new_size)); + break; + } + case UBIFS_IDX_NODE: + { + const struct ubifs_idx_node *idx = node; + + n = le16_to_cpu(idx->child_cnt); + printk(KERN_DEBUG "\tchild_cnt %d\n", n); + printk(KERN_DEBUG "\tlevel %d\n", + (int)le16_to_cpu(idx->level)); + printk(KERN_DEBUG "\tBranches:\n"); + + for (i = 0; i < n && i < c->fanout - 1; i++) { + const struct ubifs_branch *br; + + br = ubifs_idx_branch(c, idx, i); + key_read(c, &br->key, &key); + printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", + i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), + le32_to_cpu(br->len), DBGKEY(&key)); + } + break; + } + case UBIFS_CS_NODE: + break; + case UBIFS_ORPH_NODE: + { + const struct ubifs_orph_node *orph = node; + + printk(KERN_DEBUG "\tcommit number %llu\n", + (unsigned long long) + le64_to_cpu(orph->cmt_no) & LLONG_MAX); + printk(KERN_DEBUG "\tlast node flag %llu\n", + (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); + n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; + printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); + for (i = 0; i < n; i++) + printk(KERN_DEBUG "\t ino %llu\n", + le64_to_cpu(orph->inos[i])); + break; + } + default: + printk(KERN_DEBUG "node type %d was not recognized\n", + (int)ch->node_type); + } + spin_unlock(&dbg_lock); +} + +void dbg_dump_budget_req(const struct ubifs_budget_req *req) +{ + spin_lock(&dbg_lock); + printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", + req->new_ino, req->dirtied_ino); + printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", + req->new_ino_d, req->dirtied_ino_d); + printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", + req->new_page, req->dirtied_page); + printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", + req->new_dent, req->mod_dent); + printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); + printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", + req->data_growth, req->dd_growth); + spin_unlock(&dbg_lock); +} + +void dbg_dump_lstats(const struct ubifs_lp_stats *lst) +{ + spin_lock(&dbg_lock); + printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", + lst->empty_lebs, lst->idx_lebs); + printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " + "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, + lst->total_dirty); + printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " + "total_dead %lld\n", lst->total_used, lst->total_dark, + lst->total_dead); + spin_unlock(&dbg_lock); +} + +void dbg_dump_budg(struct ubifs_info *c) +{ + int i; + struct rb_node *rb; + struct ubifs_bud *bud; + struct ubifs_gced_idx_leb *idx_gc; + + spin_lock(&dbg_lock); + printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " + "budg_dd_growth %lld, budg_idx_growth %lld\n", + c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); + printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " + "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, + c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, + c->freeable_cnt); + printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " + "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, + c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); + printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " + "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + atomic_long_read(&c->dirty_zn_cnt), + atomic_long_read(&c->clean_zn_cnt)); + printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + c->dark_wm, c->dead_wm, c->max_idx_node_sz); + printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", + c->gc_lnum, c->ihead_lnum); + for (i = 0; i < c->jhead_cnt; i++) + printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", + c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); + } + list_for_each_entry(bud, &c->old_buds, list) + printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); + list_for_each_entry(idx_gc, &c->idx_gc, list) + printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", + idx_gc->lnum, idx_gc->unmap); + printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); + spin_unlock(&dbg_lock); +} + +void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) +{ + printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " + "flags %#x\n", lp->lnum, lp->free, lp->dirty, + c->leb_size - lp->free - lp->dirty, lp->flags); +} + +void dbg_dump_lprops(struct ubifs_info *c) +{ + int lnum, err; + struct ubifs_lprops lp; + struct ubifs_lp_stats lst; + + printk(KERN_DEBUG "Dumping LEB properties\n"); + ubifs_get_lp_stats(c, &lst); + dbg_dump_lstats(&lst); + + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { + err = ubifs_read_one_lp(c, lnum, &lp); + if (err) + ubifs_err("cannot read lprops for LEB %d", lnum); + + dbg_dump_lprop(c, &lp); + } +} + +void dbg_dump_leb(const struct ubifs_info *c, int lnum) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + + if (dbg_failure_mode) + return; + + printk(KERN_DEBUG "Dumping LEB %d\n", lnum); + + sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + if (IS_ERR(sleb)) { + ubifs_err("scan error %d", (int)PTR_ERR(sleb)); + return; + } + + printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, + sleb->nodes_cnt, sleb->endpt); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, + snod->offs, snod->len); + dbg_dump_node(c, snod->node); + } + + ubifs_scan_destroy(sleb); + return; +} + +void dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode) +{ + int n; + const struct ubifs_zbranch *zbr; + + spin_lock(&dbg_lock); + if (znode->parent) + zbr = &znode->parent->zbranch[znode->iip]; + else + zbr = &c->zroot; + + printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" + " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, + zbr->len, znode->parent, znode->iip, znode->level, + znode->child_cnt, znode->flags); + + if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { + spin_unlock(&dbg_lock); + return; + } + + printk(KERN_DEBUG "zbranches:\n"); + for (n = 0; n < znode->child_cnt; n++) { + zbr = &znode->zbranch[n]; + if (znode->level > 0) + printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, + DBGKEY(&zbr->key)); + else + printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, + DBGKEY(&zbr->key)); + } + spin_unlock(&dbg_lock); +} + +void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) +{ + int i; + + printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", + cat, heap->cnt); + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + + printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " + "flags %d\n", i, lprops->lnum, lprops->hpos, + lprops->free, lprops->dirty, lprops->flags); + } +} + +void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) +{ + int i; + + printk(KERN_DEBUG "Dumping pnode:\n"); + printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", + (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); + printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", + pnode->flags, iip, pnode->level, pnode->num); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp = &pnode->lprops[i]; + + printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", + i, lp->free, lp->dirty, lp->flags, lp->lnum); + } +} + +void dbg_dump_tnc(struct ubifs_info *c) +{ + struct ubifs_znode *znode; + int level; + + printk(KERN_DEBUG "\n"); + printk(KERN_DEBUG "Dumping the TNC tree\n"); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + level = znode->level; + printk(KERN_DEBUG "== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + printk(KERN_DEBUG "== Level %d ==\n", level); + } + dbg_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + } + + printk(KERN_DEBUG "\n"); +} + +static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, + void *priv) +{ + dbg_dump_znode(c, znode); + return 0; +} + +/** + * dbg_dump_index - dump the on-flash index. + * @c: UBIFS file-system description object + * + * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()' + * which dumps only in-memory znodes and does not read znodes which from flash. + */ +void dbg_dump_index(struct ubifs_info *c) +{ + dbg_walk_index(c, NULL, dump_znode, NULL); +} + +/** + * dbg_check_synced_i_size - check synchronized inode size. + * @inode: inode to check + * + * If inode is clean, synchronized inode size has to be equivalent to current + * inode size. This function has to be called only for locked inodes (@i_mutex + * has to be locked). Returns %0 if synchronized inode size if correct, and + * %-EINVAL if not. + */ +int dbg_check_synced_i_size(struct inode *inode) +{ + int err = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; + + mutex_lock(&ui->ui_mutex); + spin_lock(&ui->ui_lock); + if (ui->ui_size != ui->synced_i_size && !ui->dirty) { + ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " + "is clean", ui->ui_size, ui->synced_i_size); + ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, + inode->i_mode, i_size_read(inode)); + dbg_dump_stack(); + err = -EINVAL; + } + spin_unlock(&ui->ui_lock); + mutex_unlock(&ui->ui_mutex); + return err; +} + +/* + * dbg_check_dir - check directory inode size and link count. + * @c: UBIFS file-system description object + * @dir: the directory to calculate size for + * @size: the result is returned here + * + * This function makes sure that directory size and link count are correct. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * Note, it is good idea to make sure the @dir->i_mutex is locked before + * calling this function. + */ +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) +{ + unsigned int nlink = 2; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + struct qstr nm = { .name = NULL }; + loff_t size = UBIFS_INO_NODE_SZ; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + if (!S_ISDIR(dir->i_mode)) + return 0; + + lowest_dent_key(c, &key, dir->i_ino); + while (1) { + int err; + + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -ENOENT) + break; + return err; + } + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + size += CALC_DENT_SIZE(nm.len); + if (dent->type == UBIFS_ITYPE_DIR) + nlink += 1; + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); + + if (i_size_read(dir) != size) { + ubifs_err("directory inode %lu has size %llu, " + "but calculated size is %llu", dir->i_ino, + (unsigned long long)i_size_read(dir), + (unsigned long long)size); + dump_stack(); + return -EINVAL; + } + if (dir->i_nlink != nlink) { + ubifs_err("directory inode %lu has nlink %u, but calculated " + "nlink is %u", dir->i_ino, dir->i_nlink, nlink); + dump_stack(); + return -EINVAL; + } + + return 0; +} + +/** + * dbg_check_key_order - make sure that colliding keys are properly ordered. + * @c: UBIFS file-system description object + * @zbr1: first zbranch + * @zbr2: following zbranch + * + * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of + * names of the direntries/xentries which are referred by the keys. This + * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes + * sure the name of direntry/xentry referred by @zbr1 is less than + * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not, + * and a negative error code in case of failure. + */ +static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + struct ubifs_zbranch *zbr2) +{ + int err, nlen1, nlen2, cmp; + struct ubifs_dent_node *dent1, *dent2; + union ubifs_key key; + + ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); + dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent1) + return -ENOMEM; + dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent2) { + err = -ENOMEM; + goto out_free; + } + + err = ubifs_tnc_read_node(c, zbr1, dent1); + if (err) + goto out_free; + err = ubifs_validate_entry(c, dent1); + if (err) + goto out_free; + + err = ubifs_tnc_read_node(c, zbr2, dent2); + if (err) + goto out_free; + err = ubifs_validate_entry(c, dent2); + if (err) + goto out_free; + + /* Make sure node keys are the same as in zbranch */ + err = 1; + key_read(c, &dent1->key, &key); + if (keys_cmp(c, &zbr1->key, &key)) { + dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, DBGKEY(&key)); + dbg_err("but it should have key %s according to tnc", + DBGKEY(&zbr1->key)); + dbg_dump_node(c, dent1); + goto out_free; + } + + key_read(c, &dent2->key, &key); + if (keys_cmp(c, &zbr2->key, &key)) { + dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, DBGKEY(&key)); + dbg_err("but it should have key %s according to tnc", + DBGKEY(&zbr2->key)); + dbg_dump_node(c, dent2); + goto out_free; + } + + nlen1 = le16_to_cpu(dent1->nlen); + nlen2 = le16_to_cpu(dent2->nlen); + + cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2)); + if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) { + err = 0; + goto out_free; + } + if (cmp == 0 && nlen1 == nlen2) + dbg_err("2 xent/dent nodes with the same name"); + else + dbg_err("bad order of colliding key %s", + DBGKEY(&key)); + + dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + dbg_dump_node(c, dent1); + dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); + dbg_dump_node(c, dent2); + +out_free: + kfree(dent2); + kfree(dent1); + return err; +} + +/** + * dbg_check_znode - check if znode is all right. + * @c: UBIFS file-system description object + * @zbr: zbranch which points to this znode + * + * This function makes sure that znode referred to by @zbr is all right. + * Returns zero if it is, and %-EINVAL if it is not. + */ +static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) +{ + struct ubifs_znode *znode = zbr->znode; + struct ubifs_znode *zp = znode->parent; + int n, err, cmp; + + if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { + err = 1; + goto out; + } + if (znode->level < 0) { + err = 2; + goto out; + } + if (znode->iip < 0 || znode->iip >= c->fanout) { + err = 3; + goto out; + } + + if (zbr->len == 0) + /* Only dirty zbranch may have no on-flash nodes */ + if (!ubifs_zn_dirty(znode)) { + err = 4; + goto out; + } + + if (ubifs_zn_dirty(znode)) { + /* + * If znode is dirty, its parent has to be dirty as well. The + * order of the operation is important, so we have to have + * memory barriers. + */ + smp_mb(); + if (zp && !ubifs_zn_dirty(zp)) { + /* + * The dirty flag is atomic and is cleared outside the + * TNC mutex, so znode's dirty flag may now have + * been cleared. The child is always cleared before the + * parent, so we just need to check again. + */ + smp_mb(); + if (ubifs_zn_dirty(znode)) { + err = 5; + goto out; + } + } + } + + if (zp) { + const union ubifs_key *min, *max; + + if (znode->level != zp->level - 1) { + err = 6; + goto out; + } + + /* Make sure the 'parent' pointer in our znode is correct */ + err = ubifs_search_zbranch(c, zp, &zbr->key, &n); + if (!err) { + /* This zbranch does not exist in the parent */ + err = 7; + goto out; + } + + if (znode->iip >= zp->child_cnt) { + err = 8; + goto out; + } + + if (znode->iip != n) { + /* This may happen only in case of collisions */ + if (keys_cmp(c, &zp->zbranch[n].key, + &zp->zbranch[znode->iip].key)) { + err = 9; + goto out; + } + n = znode->iip; + } + + /* + * Make sure that the first key in our znode is greater than or + * equal to the key in the pointing zbranch. + */ + min = &zbr->key; + cmp = keys_cmp(c, min, &znode->zbranch[0].key); + if (cmp == 1) { + err = 10; + goto out; + } + + if (n + 1 < zp->child_cnt) { + max = &zp->zbranch[n + 1].key; + + /* + * Make sure the last key in our znode is less or + * equivalent than the the key in zbranch which goes + * after our pointing zbranch. + */ + cmp = keys_cmp(c, max, + &znode->zbranch[znode->child_cnt - 1].key); + if (cmp == -1) { + err = 11; + goto out; + } + } + } else { + /* This may only be root znode */ + if (zbr != &c->zroot) { + err = 12; + goto out; + } + } + + /* + * Make sure that next key is greater or equivalent then the previous + * one. + */ + for (n = 1; n < znode->child_cnt; n++) { + cmp = keys_cmp(c, &znode->zbranch[n - 1].key, + &znode->zbranch[n].key); + if (cmp > 0) { + err = 13; + goto out; + } + if (cmp == 0) { + /* This can only be keys with colliding hash */ + if (!is_hash_key(c, &znode->zbranch[n].key)) { + err = 14; + goto out; + } + + if (znode->level != 0 || c->replaying) + continue; + + /* + * Colliding keys should follow binary order of + * corresponding xentry/dentry names. + */ + err = dbg_check_key_order(c, &znode->zbranch[n - 1], + &znode->zbranch[n]); + if (err < 0) + return err; + if (err) { + err = 15; + goto out; + } + } + } + + for (n = 0; n < znode->child_cnt; n++) { + if (!znode->zbranch[n].znode && + (znode->zbranch[n].lnum == 0 || + znode->zbranch[n].len == 0)) { + err = 16; + goto out; + } + + if (znode->zbranch[n].lnum != 0 && + znode->zbranch[n].len == 0) { + err = 17; + goto out; + } + + if (znode->zbranch[n].lnum == 0 && + znode->zbranch[n].len != 0) { + err = 18; + goto out; + } + + if (znode->zbranch[n].lnum == 0 && + znode->zbranch[n].offs != 0) { + err = 19; + goto out; + } + + if (znode->level != 0 && znode->zbranch[n].znode) + if (znode->zbranch[n].znode->parent != znode) { + err = 20; + goto out; + } + } + + return 0; + +out: + ubifs_err("failed, error %d", err); + ubifs_msg("dump of the znode"); + dbg_dump_znode(c, znode); + if (zp) { + ubifs_msg("dump of the parent znode"); + dbg_dump_znode(c, zp); + } + dump_stack(); + return -EINVAL; +} + +/** + * dbg_check_tnc - check TNC tree. + * @c: UBIFS file-system description object + * @extra: do extra checks that are possible at start commit + * + * This function traverses whole TNC tree and checks every znode. Returns zero + * if everything is all right and %-EINVAL if something is wrong with TNC. + */ +int dbg_check_tnc(struct ubifs_info *c, int extra) +{ + struct ubifs_znode *znode; + long clean_cnt = 0, dirty_cnt = 0; + int err, last; + + if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) + return 0; + + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); + if (!c->zroot.znode) + return 0; + + znode = ubifs_tnc_postorder_first(c->zroot.znode); + while (1) { + struct ubifs_znode *prev; + struct ubifs_zbranch *zbr; + + if (!znode->parent) + zbr = &c->zroot; + else + zbr = &znode->parent->zbranch[znode->iip]; + + err = dbg_check_znode(c, zbr); + if (err) + return err; + + if (extra) { + if (ubifs_zn_dirty(znode)) + dirty_cnt += 1; + else + clean_cnt += 1; + } + + prev = znode; + znode = ubifs_tnc_postorder_next(znode); + if (!znode) + break; + + /* + * If the last key of this znode is equivalent to the first key + * of the next znode (collision), then check order of the keys. + */ + last = prev->child_cnt - 1; + if (prev->level == 0 && znode->level == 0 && !c->replaying && + !keys_cmp(c, &prev->zbranch[last].key, + &znode->zbranch[0].key)) { + err = dbg_check_key_order(c, &prev->zbranch[last], + &znode->zbranch[0]); + if (err < 0) + return err; + if (err) { + ubifs_msg("first znode"); + dbg_dump_znode(c, prev); + ubifs_msg("second znode"); + dbg_dump_znode(c, znode); + return -EINVAL; + } + } + } + + if (extra) { + if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { + ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", + atomic_long_read(&c->clean_zn_cnt), + clean_cnt); + return -EINVAL; + } + if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { + ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", + atomic_long_read(&c->dirty_zn_cnt), + dirty_cnt); + return -EINVAL; + } + } + + return 0; +} + +/** + * dbg_walk_index - walk the on-flash index. + * @c: UBIFS file-system description object + * @leaf_cb: called for each leaf node + * @znode_cb: called for each indexing node + * @priv: private date which is passed to callbacks + * + * This function walks the UBIFS index and calls the @leaf_cb for each leaf + * node and @znode_cb for each indexing node. Returns zero in case of success + * and a negative error code in case of failure. + * + * It would be better if this function removed every znode it pulled to into + * the TNC, so that the behavior more closely matched the non-debugging + * behavior. + */ +int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv) +{ + int err; + struct ubifs_zbranch *zbr; + struct ubifs_znode *znode, *child; + + mutex_lock(&c->tnc_mutex); + /* If the root indexing node is not in TNC - pull it */ + if (!c->zroot.znode) { + c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(c->zroot.znode)) { + err = PTR_ERR(c->zroot.znode); + c->zroot.znode = NULL; + goto out_unlock; + } + } + + /* + * We are going to traverse the indexing tree in the postorder manner. + * Go down and find the leftmost indexing node where we are going to + * start from. + */ + znode = c->zroot.znode; + while (znode->level > 0) { + zbr = &znode->zbranch[0]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + + znode = child; + } + + /* Iterate over all indexing nodes */ + while (1) { + int idx; + + cond_resched(); + + if (znode_cb) { + err = znode_cb(c, znode, priv); + if (err) { + ubifs_err("znode checking function returned " + "error %d", err); + dbg_dump_znode(c, znode); + goto out_dump; + } + } + if (leaf_cb && znode->level == 0) { + for (idx = 0; idx < znode->child_cnt; idx++) { + zbr = &znode->zbranch[idx]; + err = leaf_cb(c, zbr, priv); + if (err) { + ubifs_err("leaf checking function " + "returned error %d, for leaf " + "at LEB %d:%d", + err, zbr->lnum, zbr->offs); + goto out_dump; + } + } + } + + if (!znode->parent) + break; + + idx = znode->iip + 1; + znode = znode->parent; + if (idx < znode->child_cnt) { + /* Switch to the next index in the parent */ + zbr = &znode->zbranch[idx]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, idx); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + znode = child; + } else + /* + * This is the last child, switch to the parent and + * continue. + */ + continue; + + /* Go to the lowest leftmost znode in the new sub-tree */ + while (znode->level > 0) { + zbr = &znode->zbranch[0]; + child = zbr->znode; + if (!child) { + child = ubifs_load_znode(c, zbr, znode, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto out_unlock; + } + zbr->znode = child; + } + znode = child; + } + } + + mutex_unlock(&c->tnc_mutex); + return 0; + +out_dump: + if (znode->parent) + zbr = &znode->parent->zbranch[znode->iip]; + else + zbr = &c->zroot; + ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); + dbg_dump_znode(c, znode); +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * add_size - add znode size to partially calculated index size. + * @c: UBIFS file-system description object + * @znode: znode to add size for + * @priv: partially calculated index size + * + * This is a helper function for 'dbg_check_idx_size()' which is called for + * every indexing node and adds its size to the 'long long' variable pointed to + * by @priv. + */ +static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) +{ + long long *idx_size = priv; + int add; + + add = ubifs_idx_node_sz(c, znode->child_cnt); + add = ALIGN(add, 8); + *idx_size += add; + return 0; +} + +/** + * dbg_check_idx_size - check index size. + * @c: UBIFS file-system description object + * @idx_size: size to check + * + * This function walks the UBIFS index, calculates its size and checks that the + * size is equivalent to @idx_size. Returns zero in case of success and a + * negative error code in case of failure. + */ +int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) +{ + int err; + long long calc = 0; + + if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) + return 0; + + err = dbg_walk_index(c, NULL, add_size, &calc); + if (err) { + ubifs_err("error %d while walking the index", err); + return err; + } + + if (calc != idx_size) { + ubifs_err("index size check failed: calculated size is %lld, " + "should be %lld", calc, idx_size); + dump_stack(); + return -EINVAL; + } + + return 0; +} + +/** + * struct fsck_inode - information about an inode used when checking the file-system. + * @rb: link in the RB-tree of inodes + * @inum: inode number + * @mode: inode type, permissions, etc + * @nlink: inode link count + * @xattr_cnt: count of extended attributes + * @references: how many directory/xattr entries refer this inode (calculated + * while walking the index) + * @calc_cnt: for directory inode count of child directories + * @size: inode size (read from on-flash inode) + * @xattr_sz: summary size of all extended attributes (read from on-flash + * inode) + * @calc_sz: for directories calculated directory size + * @calc_xcnt: count of extended attributes + * @calc_xsz: calculated summary size of all extended attributes + * @xattr_nms: sum of lengths of all extended attribute names belonging to this + * inode (read from on-flash inode) + * @calc_xnms: calculated sum of lengths of all extended attribute names + */ +struct fsck_inode { + struct rb_node rb; + ino_t inum; + umode_t mode; + unsigned int nlink; + unsigned int xattr_cnt; + int references; + int calc_cnt; + long long size; + unsigned int xattr_sz; + long long calc_sz; + long long calc_xcnt; + long long calc_xsz; + unsigned int xattr_nms; + long long calc_xnms; +}; + +/** + * struct fsck_data - private FS checking information. + * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects) + */ +struct fsck_data { + struct rb_root inodes; +}; + +/** + * add_inode - add inode information to RB-tree of inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * @ino: raw UBIFS inode to add + * + * This is a helper function for 'check_leaf()' which adds information about + * inode @ino to the RB-tree of inodes. Returns inode information pointer in + * case of success and a negative error code in case of failure. + */ +static struct fsck_inode *add_inode(struct ubifs_info *c, + struct fsck_data *fsckd, + struct ubifs_ino_node *ino) +{ + struct rb_node **p, *parent = NULL; + struct fsck_inode *fscki; + ino_t inum = key_inum_flash(c, &ino->key); + + p = &fsckd->inodes.rb_node; + while (*p) { + parent = *p; + fscki = rb_entry(parent, struct fsck_inode, rb); + if (inum < fscki->inum) + p = &(*p)->rb_left; + else if (inum > fscki->inum) + p = &(*p)->rb_right; + else + return fscki; + } + + if (inum > c->highest_inum) { + ubifs_err("too high inode number, max. is %lu", + c->highest_inum); + return ERR_PTR(-EINVAL); + } + + fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS); + if (!fscki) + return ERR_PTR(-ENOMEM); + + fscki->inum = inum; + fscki->nlink = le32_to_cpu(ino->nlink); + fscki->size = le64_to_cpu(ino->size); + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + fscki->xattr_sz = le32_to_cpu(ino->xattr_size); + fscki->xattr_nms = le32_to_cpu(ino->xattr_names); + fscki->mode = le32_to_cpu(ino->mode); + if (S_ISDIR(fscki->mode)) { + fscki->calc_sz = UBIFS_INO_NODE_SZ; + fscki->calc_cnt = 2; + } + rb_link_node(&fscki->rb, parent, p); + rb_insert_color(&fscki->rb, &fsckd->inodes); + return fscki; +} + +/** + * search_inode - search inode in the RB-tree of inodes. + * @fsckd: FS checking information + * @inum: inode number to search + * + * This is a helper function for 'check_leaf()' which searches inode @inum in + * the RB-tree of inodes and returns an inode information pointer or %NULL if + * the inode was not found. + */ +static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum) +{ + struct rb_node *p; + struct fsck_inode *fscki; + + p = fsckd->inodes.rb_node; + while (p) { + fscki = rb_entry(p, struct fsck_inode, rb); + if (inum < fscki->inum) + p = p->rb_left; + else if (inum > fscki->inum) + p = p->rb_right; + else + return fscki; + } + return NULL; +} + +/** + * read_add_inode - read inode node and add it to RB-tree of inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * @inum: inode number to read + * + * This is a helper function for 'check_leaf()' which finds inode node @inum in + * the index, reads it, and adds it to the RB-tree of inodes. Returns inode + * information pointer in case of success and a negative error code in case of + * failure. + */ +static struct fsck_inode *read_add_inode(struct ubifs_info *c, + struct fsck_data *fsckd, ino_t inum) +{ + int n, err; + union ubifs_key key; + struct ubifs_znode *znode; + struct ubifs_zbranch *zbr; + struct ubifs_ino_node *ino; + struct fsck_inode *fscki; + + fscki = search_inode(fsckd, inum); + if (fscki) + return fscki; + + ino_key_init(c, &key, inum); + err = ubifs_lookup_level0(c, &key, &znode, &n); + if (!err) { + ubifs_err("inode %lu not found in index", inum); + return ERR_PTR(-ENOENT); + } else if (err < 0) { + ubifs_err("error %d while looking up inode %lu", err, inum); + return ERR_PTR(err); + } + + zbr = &znode->zbranch[n]; + if (zbr->len < UBIFS_INO_NODE_SZ) { + ubifs_err("bad node %lu node length %d", inum, zbr->len); + return ERR_PTR(-EINVAL); + } + + ino = kmalloc(zbr->len, GFP_NOFS); + if (!ino) + return ERR_PTR(-ENOMEM); + + err = ubifs_tnc_read_node(c, zbr, ino); + if (err) { + ubifs_err("cannot read inode node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + kfree(ino); + return ERR_PTR(err); + } + + fscki = add_inode(c, fsckd, ino); + kfree(ino); + if (IS_ERR(fscki)) { + ubifs_err("error %ld while adding inode %lu node", + PTR_ERR(fscki), inum); + return fscki; + } + + return fscki; +} + +/** + * check_leaf - check leaf node. + * @c: UBIFS file-system description object + * @zbr: zbranch of the leaf node to check + * @priv: FS checking information + * + * This is a helper function for 'dbg_check_filesystem()' which is called for + * every single leaf node while walking the indexing tree. It checks that the + * leaf node referred from the indexing tree exists, has correct CRC, and does + * some other basic validation. This function is also responsible for building + * an RB-tree of inodes - it adds all inodes into the RB-tree. It also + * calculates reference count, size, etc for each inode in order to later + * compare them to the information stored inside the inodes and detect possible + * inconsistencies. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *priv) +{ + ino_t inum; + void *node; + struct ubifs_ch *ch; + int err, type = key_type(c, &zbr->key); + struct fsck_inode *fscki; + + if (zbr->len < UBIFS_CH_SZ) { + ubifs_err("bad leaf length %d (LEB %d:%d)", + zbr->len, zbr->lnum, zbr->offs); + return -EINVAL; + } + + node = kmalloc(zbr->len, GFP_NOFS); + if (!node) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, node); + if (err) { + ubifs_err("cannot read leaf node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + goto out_free; + } + + /* If this is an inode node, add it to RB-tree of inodes */ + if (type == UBIFS_INO_KEY) { + fscki = add_inode(c, priv, node); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while adding inode node", err); + goto out_dump; + } + goto out; + } + + if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && + type != UBIFS_DATA_KEY) { + ubifs_err("unexpected node type %d at LEB %d:%d", + type, zbr->lnum, zbr->offs); + err = -EINVAL; + goto out_free; + } + + ch = node; + if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { + ubifs_err("too high sequence number, max. is %llu", + c->max_sqnum); + err = -EINVAL; + goto out_dump; + } + + if (type == UBIFS_DATA_KEY) { + long long blk_offs; + struct ubifs_data_node *dn = node; + + /* + * Search the inode node this data node belongs to and insert + * it to the RB-tree of inodes. + */ + inum = key_inum_flash(c, &dn->key); + fscki = read_add_inode(c, priv, inum); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing data node and " + "trying to find inode node %lu", err, inum); + goto out_dump; + } + + /* Make sure the data node is within inode size */ + blk_offs = key_block_flash(c, &dn->key); + blk_offs <<= UBIFS_BLOCK_SHIFT; + blk_offs += le32_to_cpu(dn->size); + if (blk_offs > fscki->size) { + ubifs_err("data node at LEB %d:%d is not within inode " + "size %lld", zbr->lnum, zbr->offs, + fscki->size); + err = -EINVAL; + goto out_dump; + } + } else { + int nlen; + struct ubifs_dent_node *dent = node; + struct fsck_inode *fscki1; + + err = ubifs_validate_entry(c, dent); + if (err) + goto out_dump; + + /* + * Search the inode node this entry refers to and the parent + * inode node and insert them to the RB-tree of inodes. + */ + inum = le64_to_cpu(dent->inum); + fscki = read_add_inode(c, priv, inum); + if (IS_ERR(fscki)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing entry node and " + "trying to find inode node %lu", err, inum); + goto out_dump; + } + + /* Count how many direntries or xentries refers this inode */ + fscki->references += 1; + + inum = key_inum_flash(c, &dent->key); + fscki1 = read_add_inode(c, priv, inum); + if (IS_ERR(fscki1)) { + err = PTR_ERR(fscki); + ubifs_err("error %d while processing entry node and " + "trying to find parent inode node %lu", + err, inum); + goto out_dump; + } + + nlen = le16_to_cpu(dent->nlen); + if (type == UBIFS_XENT_KEY) { + fscki1->calc_xcnt += 1; + fscki1->calc_xsz += CALC_DENT_SIZE(nlen); + fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size); + fscki1->calc_xnms += nlen; + } else { + fscki1->calc_sz += CALC_DENT_SIZE(nlen); + if (dent->type == UBIFS_ITYPE_DIR) + fscki1->calc_cnt += 1; + } + } + +out: + kfree(node); + return 0; + +out_dump: + ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); + dbg_dump_node(c, node); +out_free: + kfree(node); + return err; +} + +/** + * free_inodes - free RB-tree of inodes. + * @fsckd: FS checking information + */ +static void free_inodes(struct fsck_data *fsckd) +{ + struct rb_node *this = fsckd->inodes.rb_node; + struct fsck_inode *fscki; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + fscki = rb_entry(this, struct fsck_inode, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &fscki->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(fscki); + } + } +} + +/** + * check_inodes - checks all inodes. + * @c: UBIFS file-system description object + * @fsckd: FS checking information + * + * This is a helper function for 'dbg_check_filesystem()' which walks the + * RB-tree of inodes after the index scan has been finished, and checks that + * inode nlink, size, etc are correct. Returns zero if inodes are fine, + * %-EINVAL if not, and a negative error code in case of failure. + */ +static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) +{ + int n, err; + union ubifs_key key; + struct ubifs_znode *znode; + struct ubifs_zbranch *zbr; + struct ubifs_ino_node *ino; + struct fsck_inode *fscki; + struct rb_node *this = rb_first(&fsckd->inodes); + + while (this) { + fscki = rb_entry(this, struct fsck_inode, rb); + this = rb_next(this); + + if (S_ISDIR(fscki->mode)) { + /* + * Directories have to have exactly one reference (they + * cannot have hardlinks), although root inode is an + * exception. + */ + if (fscki->inum != UBIFS_ROOT_INO && + fscki->references != 1) { + ubifs_err("directory inode %lu has %d " + "direntries which refer it, but " + "should be 1", fscki->inum, + fscki->references); + goto out_dump; + } + if (fscki->inum == UBIFS_ROOT_INO && + fscki->references != 0) { + ubifs_err("root inode %lu has non-zero (%d) " + "direntries which refer it", + fscki->inum, fscki->references); + goto out_dump; + } + if (fscki->calc_sz != fscki->size) { + ubifs_err("directory inode %lu size is %lld, " + "but calculated size is %lld", + fscki->inum, fscki->size, + fscki->calc_sz); + goto out_dump; + } + if (fscki->calc_cnt != fscki->nlink) { + ubifs_err("directory inode %lu nlink is %d, " + "but calculated nlink is %d", + fscki->inum, fscki->nlink, + fscki->calc_cnt); + goto out_dump; + } + } else { + if (fscki->references != fscki->nlink) { + ubifs_err("inode %lu nlink is %d, but " + "calculated nlink is %d", fscki->inum, + fscki->nlink, fscki->references); + goto out_dump; + } + } + if (fscki->xattr_sz != fscki->calc_xsz) { + ubifs_err("inode %lu has xattr size %u, but " + "calculated size is %lld", + fscki->inum, fscki->xattr_sz, + fscki->calc_xsz); + goto out_dump; + } + if (fscki->xattr_cnt != fscki->calc_xcnt) { + ubifs_err("inode %lu has %u xattrs, but " + "calculated count is %lld", fscki->inum, + fscki->xattr_cnt, fscki->calc_xcnt); + goto out_dump; + } + if (fscki->xattr_nms != fscki->calc_xnms) { + ubifs_err("inode %lu has xattr names' size %u, but " + "calculated names' size is %lld", + fscki->inum, fscki->xattr_nms, + fscki->calc_xnms); + goto out_dump; + } + } + + return 0; + +out_dump: + /* Read the bad inode and dump it */ + ino_key_init(c, &key, fscki->inum); + err = ubifs_lookup_level0(c, &key, &znode, &n); + if (!err) { + ubifs_err("inode %lu not found in index", fscki->inum); + return -ENOENT; + } else if (err < 0) { + ubifs_err("error %d while looking up inode %lu", + err, fscki->inum); + return err; + } + + zbr = &znode->zbranch[n]; + ino = kmalloc(zbr->len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, ino); + if (err) { + ubifs_err("cannot read inode node at LEB %d:%d, error %d", + zbr->lnum, zbr->offs, err); + kfree(ino); + return err; + } + + ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", + fscki->inum, zbr->lnum, zbr->offs); + dbg_dump_node(c, ino); + kfree(ino); + return -EINVAL; +} + +/** + * dbg_check_filesystem - check the file-system. + * @c: UBIFS file-system description object + * + * This function checks the file system, namely: + * o makes sure that all leaf nodes exist and their CRCs are correct; + * o makes sure inode nlink, size, xattr size/count are correct (for all + * inodes). + * + * The function reads whole indexing tree and all nodes, so it is pretty + * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if + * not, and a negative error code in case of failure. + */ +int dbg_check_filesystem(struct ubifs_info *c) +{ + int err; + struct fsck_data fsckd; + + if (!(ubifs_chk_flags & UBIFS_CHK_FS)) + return 0; + + fsckd.inodes = RB_ROOT; + err = dbg_walk_index(c, check_leaf, NULL, &fsckd); + if (err) + goto out_free; + + err = check_inodes(c, &fsckd); + if (err) + goto out_free; + + free_inodes(&fsckd); + return 0; + +out_free: + ubifs_err("file-system check failed with error %d", err); + dump_stack(); + free_inodes(&fsckd); + return err; +} + +static int invocation_cnt; + +int dbg_force_in_the_gaps(void) +{ + if (!dbg_force_in_the_gaps_enabled) + return 0; + /* Force in-the-gaps every 8th commit */ + return !((invocation_cnt++) & 0x7); +} + +/* Failure mode for recovery testing */ + +#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) + +struct failure_mode_info { + struct list_head list; + struct ubifs_info *c; +}; + +static LIST_HEAD(fmi_list); +static DEFINE_SPINLOCK(fmi_lock); + +static unsigned int next; + +static int simple_rand(void) +{ + if (next == 0) + next = current->pid; + next = next * 1103515245 + 12345; + return (next >> 16) & 32767; +} + +void dbg_failure_mode_registration(struct ubifs_info *c) +{ + struct failure_mode_info *fmi; + + fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); + if (!fmi) { + dbg_err("Failed to register failure mode - no memory"); + return; + } + fmi->c = c; + spin_lock(&fmi_lock); + list_add_tail(&fmi->list, &fmi_list); + spin_unlock(&fmi_lock); +} + +void dbg_failure_mode_deregistration(struct ubifs_info *c) +{ + struct failure_mode_info *fmi, *tmp; + + spin_lock(&fmi_lock); + list_for_each_entry_safe(fmi, tmp, &fmi_list, list) + if (fmi->c == c) { + list_del(&fmi->list); + kfree(fmi); + } + spin_unlock(&fmi_lock); +} + +static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) +{ + struct failure_mode_info *fmi; + + spin_lock(&fmi_lock); + list_for_each_entry(fmi, &fmi_list, list) + if (fmi->c->ubi == desc) { + struct ubifs_info *c = fmi->c; + + spin_unlock(&fmi_lock); + return c; + } + spin_unlock(&fmi_lock); + return NULL; +} + +static int in_failure_mode(struct ubi_volume_desc *desc) +{ + struct ubifs_info *c = dbg_find_info(desc); + + if (c && dbg_failure_mode) + return c->failure_mode; + return 0; +} + +static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) +{ + struct ubifs_info *c = dbg_find_info(desc); + + if (!c || !dbg_failure_mode) + return 0; + if (c->failure_mode) + return 1; + if (!c->fail_cnt) { + /* First call - decide delay to failure */ + if (chance(1, 2)) { + unsigned int delay = 1 << (simple_rand() >> 11); + + if (chance(1, 2)) { + c->fail_delay = 1; + c->fail_timeout = jiffies + + msecs_to_jiffies(delay); + dbg_rcvry("failing after %ums", delay); + } else { + c->fail_delay = 2; + c->fail_cnt_max = delay; + dbg_rcvry("failing after %u calls", delay); + } + } + c->fail_cnt += 1; + } + /* Determine if failure delay has expired */ + if (c->fail_delay == 1) { + if (time_before(jiffies, c->fail_timeout)) + return 0; + } else if (c->fail_delay == 2) + if (c->fail_cnt++ < c->fail_cnt_max) + return 0; + if (lnum == UBIFS_SB_LNUM) { + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(19, 20)) + return 0; + dbg_rcvry("failing in super block LEB %d", lnum); + } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { + if (chance(19, 20)) + return 0; + dbg_rcvry("failing in master LEB %d", lnum); + } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { + if (write) { + if (chance(99, 100)) + return 0; + } else if (chance(399, 400)) + return 0; + dbg_rcvry("failing in log LEB %d", lnum); + } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { + if (write) { + if (chance(7, 8)) + return 0; + } else if (chance(19, 20)) + return 0; + dbg_rcvry("failing in LPT LEB %d", lnum); + } else if (lnum >= c->orph_first && lnum <= c->orph_last) { + if (write) { + if (chance(1, 2)) + return 0; + } else if (chance(9, 10)) + return 0; + dbg_rcvry("failing in orphan LEB %d", lnum); + } else if (lnum == c->ihead_lnum) { + if (chance(99, 100)) + return 0; + dbg_rcvry("failing in index head LEB %d", lnum); + } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { + if (chance(9, 10)) + return 0; + dbg_rcvry("failing in GC head LEB %d", lnum); + } else if (write && !RB_EMPTY_ROOT(&c->buds) && + !ubifs_search_bud(c, lnum)) { + if (chance(19, 20)) + return 0; + dbg_rcvry("failing in non-bud LEB %d", lnum); + } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || + c->cmt_state == COMMIT_RUNNING_REQUIRED) { + if (chance(999, 1000)) + return 0; + dbg_rcvry("failing in bud LEB %d commit running", lnum); + } else { + if (chance(9999, 10000)) + return 0; + dbg_rcvry("failing in bud LEB %d commit not running", lnum); + } + ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); + c->failure_mode = 1; + dump_stack(); + return 1; +} + +static void cut_data(const void *buf, int len) +{ + int flen, i; + unsigned char *p = (void *)buf; + + flen = (len * (long long)simple_rand()) >> 15; + for (i = flen; i < len; i++) + p[i] = 0xff; +} + +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + if (in_failure_mode(desc)) + return -EIO; + return ubi_leb_read(desc, lnum, buf, offset, len, check); +} + +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype) +{ + int err; + + if (in_failure_mode(desc)) + return -EIO; + if (do_fail(desc, lnum, 1)) + cut_data(buf, len); + err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); + if (err) + return err; + if (in_failure_mode(desc)) + return -EIO; + return 0; +} + +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype) +{ + int err; + + if (do_fail(desc, lnum, 1)) + return -EIO; + err = ubi_leb_change(desc, lnum, buf, len, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 1)) + return -EIO; + return 0; +} + +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) +{ + int err; + + if (do_fail(desc, lnum, 0)) + return -EIO; + err = ubi_leb_erase(desc, lnum); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EIO; + return 0; +} + +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) +{ + int err; + + if (do_fail(desc, lnum, 0)) + return -EIO; + err = ubi_leb_unmap(desc, lnum); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EIO; + return 0; +} + +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + if (in_failure_mode(desc)) + return -EIO; + return ubi_is_mapped(desc, lnum); +} + +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +{ + int err; + + if (do_fail(desc, lnum, 0)) + return -EIO; + err = ubi_leb_map(desc, lnum, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 0)) + return -EIO; + return 0; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h new file mode 100644 index 000000000000..3c4f1e93c9e0 --- /dev/null +++ b/fs/ubifs/debug.h @@ -0,0 +1,403 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +#ifndef __UBIFS_DEBUG_H__ +#define __UBIFS_DEBUG_H__ + +#ifdef CONFIG_UBIFS_FS_DEBUG + +#define UBIFS_DBG(op) op + +#define ubifs_assert(expr) do { \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + dbg_dump_stack(); \ + } \ +} while (0) + +#define ubifs_assert_cmt_locked(c) do { \ + if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ + up_write(&(c)->commit_sem); \ + printk(KERN_CRIT "commit lock is not locked!\n"); \ + ubifs_assert(0); \ + } \ +} while (0) + +#define dbg_dump_stack() do { \ + if (!dbg_failure_mode) \ + dump_stack(); \ +} while (0) + +/* Generic debugging messages */ +#define dbg_msg(fmt, ...) do { \ + spin_lock(&dbg_lock); \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) + +#define dbg_do_msg(typ, fmt, ...) do { \ + if (ubifs_msg_flags & typ) \ + dbg_msg(fmt, ##__VA_ARGS__); \ +} while (0) + +#define dbg_err(fmt, ...) do { \ + spin_lock(&dbg_lock); \ + ubifs_err(fmt, ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) + +const char *dbg_key_str0(const struct ubifs_info *c, + const union ubifs_key *key); +const char *dbg_key_str1(const struct ubifs_info *c, + const union ubifs_key *key); + +/* + * DBGKEY macros require dbg_lock to be held, which it is in the dbg message + * macros. + */ +#define DBGKEY(key) dbg_key_str0(c, (key)) +#define DBGKEY1(key) dbg_key_str1(c, (key)) + +/* General messages */ +#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) + +/* Additional journal messages */ +#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) + +/* Additional TNC messages */ +#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) + +/* Additional lprops messages */ +#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) + +/* Additional LEB find messages */ +#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) + +/* Additional mount messages */ +#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) + +/* Additional I/O messages */ +#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) + +/* Additional commit messages */ +#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) + +/* Additional budgeting messages */ +#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) + +/* Additional log messages */ +#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) + +/* Additional gc messages */ +#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) + +/* Additional scan messages */ +#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) + +/* Additional recovery messages */ +#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) + +/* + * Debugging message type flags (must match msg_type_names in debug.c). + * + * UBIFS_MSG_GEN: general messages + * UBIFS_MSG_JNL: journal messages + * UBIFS_MSG_MNT: mount messages + * UBIFS_MSG_CMT: commit messages + * UBIFS_MSG_FIND: LEB find messages + * UBIFS_MSG_BUDG: budgeting messages + * UBIFS_MSG_GC: garbage collection messages + * UBIFS_MSG_TNC: TNC messages + * UBIFS_MSG_LP: lprops messages + * UBIFS_MSG_IO: I/O messages + * UBIFS_MSG_LOG: log messages + * UBIFS_MSG_SCAN: scan messages + * UBIFS_MSG_RCVRY: recovery messages + */ +enum { + UBIFS_MSG_GEN = 0x1, + UBIFS_MSG_JNL = 0x2, + UBIFS_MSG_MNT = 0x4, + UBIFS_MSG_CMT = 0x8, + UBIFS_MSG_FIND = 0x10, + UBIFS_MSG_BUDG = 0x20, + UBIFS_MSG_GC = 0x40, + UBIFS_MSG_TNC = 0x80, + UBIFS_MSG_LP = 0x100, + UBIFS_MSG_IO = 0x200, + UBIFS_MSG_LOG = 0x400, + UBIFS_MSG_SCAN = 0x800, + UBIFS_MSG_RCVRY = 0x1000, +}; + +/* Debugging message type flags for each default debug message level */ +#define UBIFS_MSG_LVL_0 0 +#define UBIFS_MSG_LVL_1 0x1 +#define UBIFS_MSG_LVL_2 0x7f +#define UBIFS_MSG_LVL_3 0xffff + +/* + * Debugging check flags (must match chk_names in debug.c). + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC + * UBIFS_CHK_IDX_SZ: check index size + * UBIFS_CHK_ORPH: check orphans + * UBIFS_CHK_OLD_IDX: check the old index + * UBIFS_CHK_LPROPS: check lprops + * UBIFS_CHK_FS: check the file-system + */ +enum { + UBIFS_CHK_GEN = 0x1, + UBIFS_CHK_TNC = 0x2, + UBIFS_CHK_IDX_SZ = 0x4, + UBIFS_CHK_ORPH = 0x8, + UBIFS_CHK_OLD_IDX = 0x10, + UBIFS_CHK_LPROPS = 0x20, + UBIFS_CHK_FS = 0x40, +}; + +/* + * Special testing flags (must match tst_names in debug.c). + * + * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ +enum { + UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, + UBIFS_TST_RCVRY = 0x4, +}; + +#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +#else +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 +#endif + +#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +#else +#define UBIFS_CHK_FLAGS_DEFAULT 0 +#endif + +extern spinlock_t dbg_lock; + +extern unsigned int ubifs_msg_flags; +extern unsigned int ubifs_chk_flags; +extern unsigned int ubifs_tst_flags; + +/* Dump functions */ + +const char *dbg_ntype(int type); +const char *dbg_cstate(int cmt_state); +const char *dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key); +void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); +void dbg_dump_node(const struct ubifs_info *c, const void *node); +void dbg_dump_budget_req(const struct ubifs_budget_req *req); +void dbg_dump_lstats(const struct ubifs_lp_stats *lst); +void dbg_dump_budg(struct ubifs_info *c); +void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); +void dbg_dump_lprops(struct ubifs_info *c); +void dbg_dump_leb(const struct ubifs_info *c, int lnum); +void dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode); +void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); +void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip); +void dbg_dump_tnc(struct ubifs_info *c); +void dbg_dump_index(struct ubifs_info *c); + +/* Checking helper functions */ + +typedef int (*dbg_leaf_callback)(struct ubifs_info *c, + struct ubifs_zbranch *zbr, void *priv); +typedef int (*dbg_znode_callback)(struct ubifs_info *c, + struct ubifs_znode *znode, void *priv); + +int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv); + +/* Checking functions */ + +int dbg_check_lprops(struct ubifs_info *c); + +int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); + +int dbg_check_cats(struct ubifs_info *c); + +int dbg_check_ltab(struct ubifs_info *c); + +int dbg_check_synced_i_size(struct inode *inode); + +int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); + +int dbg_check_tnc(struct ubifs_info *c, int extra); + +int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); + +int dbg_check_filesystem(struct ubifs_info *c); + +void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos); + +int dbg_check_lprops(struct ubifs_info *c); +int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col); + +/* Force the use of in-the-gaps method for testing */ + +#define dbg_force_in_the_gaps_enabled \ + (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) + +int dbg_force_in_the_gaps(void); + +/* Failure mode for recovery testing */ + +#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) + +void dbg_failure_mode_registration(struct ubifs_info *c); +void dbg_failure_mode_deregistration(struct ubifs_info *c); + +#ifndef UBIFS_DBG_PRESERVE_UBI + +#define ubi_leb_read dbg_leb_read +#define ubi_leb_write dbg_leb_write +#define ubi_leb_change dbg_leb_change +#define ubi_leb_erase dbg_leb_erase +#define ubi_leb_unmap dbg_leb_unmap +#define ubi_is_mapped dbg_is_mapped +#define ubi_leb_map dbg_leb_map + +#endif + +int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check); +int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype); +int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype); +int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); +int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); +int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); + +static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, + int offset, int len) +{ + return dbg_leb_read(desc, lnum, buf, offset, len, 0); +} + +static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, + const void *buf, int offset, int len) +{ + return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); +} + +static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, + const void *buf, int len) +{ + return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); +} + +#else /* !CONFIG_UBIFS_FS_DEBUG */ + +#define UBIFS_DBG(op) +#define ubifs_assert(expr) ({}) +#define ubifs_assert_cmt_locked(c) +#define dbg_dump_stack() +#define dbg_err(fmt, ...) ({}) +#define dbg_msg(fmt, ...) ({}) +#define dbg_key(c, key, fmt, ...) ({}) + +#define dbg_gen(fmt, ...) ({}) +#define dbg_jnl(fmt, ...) ({}) +#define dbg_tnc(fmt, ...) ({}) +#define dbg_lp(fmt, ...) ({}) +#define dbg_find(fmt, ...) ({}) +#define dbg_mnt(fmt, ...) ({}) +#define dbg_io(fmt, ...) ({}) +#define dbg_cmt(fmt, ...) ({}) +#define dbg_budg(fmt, ...) ({}) +#define dbg_log(fmt, ...) ({}) +#define dbg_gc(fmt, ...) ({}) +#define dbg_scan(fmt, ...) ({}) +#define dbg_rcvry(fmt, ...) ({}) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) + +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 + +#define dbg_old_index_check_init(c, zroot) 0 +#define dbg_check_old_index(c, zroot) 0 + +#define dbg_check_cats(c) 0 + +#define dbg_check_ltab(c) 0 + +#define dbg_check_synced_i_size(inode) 0 + +#define dbg_check_dir_size(c, dir) 0 + +#define dbg_check_tnc(c, x) 0 + +#define dbg_check_idx_size(c, idx_size) 0 + +#define dbg_check_filesystem(c) 0 + +#define dbg_check_heap(c, heap, cat, add_pos) ({}) + +#define dbg_check_lprops(c) 0 +#define dbg_check_lpt_nodes(c, cnode, row, col) 0 + +#define dbg_force_in_the_gaps_enabled 0 +#define dbg_force_in_the_gaps() 0 + +#define dbg_failure_mode 0 +#define dbg_failure_mode_registration(c) ({}) +#define dbg_failure_mode_deregistration(c) ({}) + +#endif /* !CONFIG_UBIFS_FS_DEBUG */ + +#endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c new file mode 100644 index 000000000000..e90374be7d3b --- /dev/null +++ b/fs/ubifs/dir.c @@ -0,0 +1,1240 @@ +/* * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + * Zoltan Sogor + */ + +/* + * This file implements directory operations. + * + * All FS operations in this file allocate budget before writing anything to the + * media. If they fail to allocate it, the error is returned. The only + * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even + * if they unable to allocate the budget, because deletion %-ENOSPC failure is + * not what users are usually ready to get. UBIFS budgeting subsystem has some + * space reserved for these purposes. + * + * All operations in this file write all inodes which they change straight + * away, instead of marking them dirty. For example, 'ubifs_link()' changes + * @i_size of the parent inode and writes the parent inode together with the + * target inode. This was done to simplify file-system recovery which would + * otherwise be very difficult to do. The only exception is rename which marks + * the re-named inode dirty (because its @i_ctime is updated) but does not + * write it, but just marks it as dirty. + */ + +#include "ubifs.h" + +/** + * inherit_flags - inherit flags of the parent inode. + * @dir: parent inode + * @mode: new inode mode flags + * + * This is a helper function for 'ubifs_new_inode()' which inherits flag of the + * parent directory inode @dir. UBIFS inodes inherit the following flags: + * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on + * sub-directory basis; + * o %UBIFS_SYNC_FL - useful for the same reasons; + * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories. + * + * This function returns the inherited flags. + */ +static int inherit_flags(const struct inode *dir, int mode) +{ + int flags; + const struct ubifs_inode *ui = ubifs_inode(dir); + + if (!S_ISDIR(dir->i_mode)) + /* + * The parent is not a directory, which means that an extended + * attribute inode is being created. No flags. + */ + return 0; + + flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL); + if (!S_ISDIR(mode)) + /* The "DIRSYNC" flag only applies to directories */ + flags &= ~UBIFS_DIRSYNC_FL; + return flags; +} + +/** + * ubifs_new_inode - allocate new UBIFS inode object. + * @c: UBIFS file-system description object + * @dir: parent directory inode + * @mode: inode mode flags + * + * This function finds an unused inode number, allocates new inode and + * initializes it. Returns new inode in case of success and an error code in + * case of failure. + */ +struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + int mode) +{ + struct inode *inode; + struct ubifs_inode *ui; + + inode = new_inode(c->vfs_sb); + ui = ubifs_inode(inode); + if (!inode) + return ERR_PTR(-ENOMEM); + + /* + * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and + * marking them dirty in file write path (see 'file_update_time()'). + * UBIFS has to fully control "clean <-> dirty" transitions of inodes + * to make budgeting work. + */ + inode->i_flags |= (S_NOCMTIME); + + inode->i_uid = current->fsuid; + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_mtime = inode->i_atime = inode->i_ctime = + ubifs_current_time(inode); + inode->i_mapping->nrpages = 0; + /* Disable readahead */ + inode->i_mapping->backing_dev_info = &c->bdi; + + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_mapping->a_ops = &ubifs_file_address_operations; + inode->i_op = &ubifs_file_inode_operations; + inode->i_fop = &ubifs_file_operations; + break; + case S_IFDIR: + inode->i_op = &ubifs_dir_inode_operations; + inode->i_fop = &ubifs_dir_operations; + inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ; + break; + case S_IFLNK: + inode->i_op = &ubifs_symlink_inode_operations; + break; + case S_IFSOCK: + case S_IFIFO: + case S_IFBLK: + case S_IFCHR: + inode->i_op = &ubifs_file_inode_operations; + break; + default: + BUG(); + } + + ui->flags = inherit_flags(dir, mode); + ubifs_set_inode_flags(inode); + if (S_ISREG(mode)) + ui->compr_type = c->default_compr; + else + ui->compr_type = UBIFS_COMPR_NONE; + ui->synced_i_size = 0; + + spin_lock(&c->cnt_lock); + /* Inode number overflow is currently not supported */ + if (c->highest_inum >= INUM_WARN_WATERMARK) { + if (c->highest_inum >= INUM_WATERMARK) { + spin_unlock(&c->cnt_lock); + ubifs_err("out of inode numbers"); + make_bad_inode(inode); + iput(inode); + return ERR_PTR(-EINVAL); + } + ubifs_warn("running out of inode numbers (current %lu, max %d)", + c->highest_inum, INUM_WATERMARK); + } + + inode->i_ino = ++c->highest_inum; + inode->i_generation = ++c->vfs_gen; + /* + * The creation sequence number remains with this inode for its + * lifetime. All nodes for this inode have a greater sequence number, + * and so it is possible to distinguish obsolete nodes belonging to a + * previous incarnation of the same inode number - for example, for the + * purpose of rebuilding the index. + */ + ui->creat_sqnum = ++c->max_sqnum; + spin_unlock(&c->cnt_lock); + return inode; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) +{ + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + if (le16_to_cpu(dent->nlen) != nm->len) + return -EINVAL; + if (memcmp(dent->name, nm->name, nm->len)) + return -EINVAL; + return 0; +} + +#else + +#define dbg_check_name(dent, nm) 0 + +#endif + +static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + int err; + union ubifs_key key; + struct inode *inode = NULL; + struct ubifs_dent_node *dent; + struct ubifs_info *c = dir->i_sb->s_fs_info; + + dbg_gen("'%.*s' in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, dir->i_ino); + + if (dentry->d_name.len > UBIFS_MAX_NLEN) + return ERR_PTR(-ENAMETOOLONG); + + dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent) + return ERR_PTR(-ENOMEM); + + dent_key_init(c, &key, dir->i_ino, &dentry->d_name); + + err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); + if (err) { + /* + * Do not hash the direntry if parent 'i_nlink' is zero, because + * this has side-effects - '->delete_inode()' call will not be + * called for the parent orphan inode, because 'd_count' of its + * direntry will stay 1 (it'll be negative direntry I guess) + * and prevent 'iput_final()' until the dentry is destroyed due + * to unmount or memory pressure. + */ + if (err == -ENOENT && dir->i_nlink != 0) { + dbg_gen("not found"); + goto done; + } + goto out; + } + + if (dbg_check_name(dent, &dentry->d_name)) { + err = -EINVAL; + goto out; + } + + inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum)); + if (IS_ERR(inode)) { + /* + * This should not happen. Probably the file-system needs + * checking. + */ + err = PTR_ERR(inode); + ubifs_err("dead directory entry '%.*s', error %d", + dentry->d_name.len, dentry->d_name.name, err); + ubifs_ro_mode(c, err); + goto out; + } + +done: + kfree(dent); + /* + * Note, d_splice_alias() would be required instead if we supported + * NFS. + */ + d_add(dentry, inode); + return NULL; + +out: + kfree(dent); + return ERR_PTR(err); +} + +static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1 }; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + + /* + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + */ + + dbg_gen("dent '%.*s', mode %#x in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, dir, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + mutex_lock(&dir_ui->ui_mutex); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + mutex_unlock(&dir_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + ubifs_err("cannot create regular file, error %d", err); + return err; +} + +/** + * vfs_dent_type - get VFS directory entry type. + * @type: UBIFS directory entry type + * + * This function converts UBIFS directory entry type into VFS directory entry + * type. + */ +static unsigned int vfs_dent_type(uint8_t type) +{ + switch (type) { + case UBIFS_ITYPE_REG: + return DT_REG; + case UBIFS_ITYPE_DIR: + return DT_DIR; + case UBIFS_ITYPE_LNK: + return DT_LNK; + case UBIFS_ITYPE_BLK: + return DT_BLK; + case UBIFS_ITYPE_CHR: + return DT_CHR; + case UBIFS_ITYPE_FIFO: + return DT_FIFO; + case UBIFS_ITYPE_SOCK: + return DT_SOCK; + default: + BUG(); + } + return 0; +} + +/* + * The classical Unix view for directory is that it is a linear array of + * (name, inode number) entries. Linux/VFS assumes this model as well. + * Particularly, 'readdir()' call wants us to return a directory entry offset + * which later may be used to continue 'readdir()'ing the directory or to + * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this + * model because directory entries are identified by keys, which may collide. + * + * UBIFS uses directory entry hash value for directory offsets, so + * 'seekdir()'/'telldir()' may not always work because of possible key + * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work + * properly by means of saving full directory entry name in the private field + * of the file description object. + * + * This means that UBIFS cannot support NFS which requires full + * 'seekdir()'/'telldir()' support. + */ +static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int err, over = 0; + struct qstr nm; + union ubifs_key key; + struct ubifs_dent_node *dent; + struct inode *dir = file->f_path.dentry->d_inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + + if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + /* + * The directory was seek'ed to a senseless position or there + * are no more entries. + */ + return 0; + + /* File positions 0 and 1 correspond to "." and ".." */ + if (file->f_pos == 0) { + ubifs_assert(!file->private_data); + over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); + if (over) + return 0; + file->f_pos = 1; + } + + if (file->f_pos == 1) { + ubifs_assert(!file->private_data); + over = filldir(dirent, "..", 2, 1, + parent_ino(file->f_path.dentry), DT_DIR); + if (over) + return 0; + + /* Find the first entry in TNC and save it */ + lowest_dent_key(c, &key, dir->i_ino); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + } + + dent = file->private_data; + if (!dent) { + /* + * The directory was seek'ed to and is now readdir'ed. + * Find the entry corresponding to @file->f_pos or the + * closest one. + */ + dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + nm.name = NULL; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + } + + while (1) { + dbg_gen("feed '%s', ino %llu, new f_pos %#x", + dent->name, le64_to_cpu(dent->inum), + key_hash_flash(c, &dent->key)); + ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); + + nm.len = le16_to_cpu(dent->nlen); + over = filldir(dirent, dent->name, nm.len, file->f_pos, + le64_to_cpu(dent->inum), + vfs_dent_type(dent->type)); + if (over) + return 0; + + /* Switch to the next entry */ + key_read(c, &dent->key, &key); + nm.name = dent->name; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } + + kfree(file->private_data); + file->f_pos = key_hash_flash(c, &dent->key); + file->private_data = dent; + cond_resched(); + } + +out: + if (err != -ENOENT) { + ubifs_err("cannot find next direntry, error %d", err); + return err; + } + + kfree(file->private_data); + file->private_data = NULL; + file->f_pos = 2; + return 0; +} + +/* If a directory is seeked, we have to free saved readdir() state */ +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + kfree(file->private_data); + file->private_data = NULL; + return generic_file_llseek(file, offset, origin); +} + +/* Free saved readdir() state when the directory is closed */ +static int ubifs_dir_release(struct inode *dir, struct file *file) +{ + kfree(file->private_data); + file->private_data = NULL; + return 0; +} + +/** + * lock_2_inodes - lock two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + if (inode1->i_ino < inode2->i_ino) { + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3); + } else { + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3); + } +} + +/** + * unlock_2_inodes - unlock two UBIFS inodes inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); +} + +static int ubifs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct inode *inode = old_dentry->d_inode; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_inode *dir_ui = ubifs_inode(dir); + int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, + .dirtied_ino_d = ui->data_len }; + + /* + * Budget request settings: new direntry, changing the target inode, + * changing the parent inode. + */ + + dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, inode->i_ino, + inode->i_nlink, dir->i_ino); + err = dbg_check_synced_i_size(inode); + if (err) + return err; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + lock_2_inodes(dir, inode); + inc_nlink(inode); + atomic_inc(&inode->i_count); + inode->i_ctime = ubifs_current_time(inode); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + unlock_2_inodes(dir, inode); + + ubifs_release_budget(c, &req); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + drop_nlink(inode); + unlock_2_inodes(dir, inode); + ubifs_release_budget(c, &req); + iput(inode); + return err; +} + +static int ubifs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct inode *inode = dentry->d_inode; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, budgeted = 1; + struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + + /* + * Budget request settings: deletion direntry, deletion inode (+1 for + * @dirtied_ino), changing the parent directory inode. If budgeting + * fails, go ahead anyway because we have extra space reserved for + * deletions. + */ + + dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, inode->i_ino, + inode->i_nlink, dir->i_ino); + err = dbg_check_synced_i_size(inode); + if (err) + return err; + + err = ubifs_budget_space(c, &req); + if (err) { + if (err != -ENOSPC) + return err; + err = 0; + budgeted = 0; + } + + lock_2_inodes(dir, inode); + inode->i_ctime = ubifs_current_time(dir); + drop_nlink(inode); + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); + if (err) + goto out_cancel; + unlock_2_inodes(dir, inode); + + if (budgeted) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } + return 0; + +out_cancel: + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + inc_nlink(inode); + unlock_2_inodes(dir, inode); + if (budgeted) + ubifs_release_budget(c, &req); + return err; +} + +/** + * check_dir_empty - check if a directory is empty or not. + * @c: UBIFS file-system description object + * @dir: VFS inode object of the directory to check + * + * This function checks if directory @dir is empty. Returns zero if the + * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes + * in case of of errors. + */ +static int check_dir_empty(struct ubifs_info *c, struct inode *dir) +{ + struct qstr nm = { .name = NULL }; + struct ubifs_dent_node *dent; + union ubifs_key key; + int err; + + lowest_dent_key(c, &key, dir->i_ino); + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -ENOENT) + err = 0; + } else { + kfree(dent); + err = -ENOTEMPTY; + } + return err; +} + +static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct inode *inode = dentry->d_inode; + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, budgeted = 1; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + + /* + * Budget request settings: deletion direntry, deletion inode and + * changing the parent inode. If budgeting fails, go ahead anyway + * because we have extra space reserved for deletions. + */ + + dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, + dentry->d_name.name, inode->i_ino, dir->i_ino); + + err = check_dir_empty(c, dentry->d_inode); + if (err) + return err; + + err = ubifs_budget_space(c, &req); + if (err) { + if (err != -ENOSPC) + return err; + budgeted = 0; + } + + lock_2_inodes(dir, inode); + inode->i_ctime = ubifs_current_time(dir); + clear_nlink(inode); + drop_nlink(dir); + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); + if (err) + goto out_cancel; + unlock_2_inodes(dir, inode); + + if (budgeted) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } + return 0; + +out_cancel: + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + inc_nlink(dir); + inc_nlink(inode); + inc_nlink(inode); + unlock_2_inodes(dir, inode); + if (budgeted) + ubifs_release_budget(c, &req); + return err; +} + +static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino_d = 1 }; + + /* + * Budget request settings: new inode, new direntry and changing parent + * directory inode. + */ + + dbg_gen("dent '%.*s', mode %#x in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, dir, S_IFDIR | mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + mutex_lock(&dir_ui->ui_mutex); + insert_inode_hash(inode); + inc_nlink(inode); + inc_nlink(dir); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) { + ubifs_err("cannot create directory, error %d", err); + goto out_cancel; + } + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + drop_nlink(dir); + mutex_unlock(&dir_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +static int ubifs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct inode *inode; + struct ubifs_inode *ui; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + union ubifs_dev_desc *dev = NULL; + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, devlen = 0; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .new_ino_d = devlen, .dirtied_ino = 1 }; + + /* + * Budget request settings: new inode, new direntry and changing parent + * directory inode. + */ + + dbg_gen("dent '%.*s' in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, dir->i_ino); + + if (!new_valid_dev(rdev)) + return -EINVAL; + + if (S_ISBLK(mode) || S_ISCHR(mode)) { + dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!dev) + return -ENOMEM; + devlen = ubifs_encode_dev(dev, rdev); + } + + err = ubifs_budget_space(c, &req); + if (err) { + kfree(dev); + return err; + } + + inode = ubifs_new_inode(c, dir, mode); + if (IS_ERR(inode)) { + kfree(dev); + err = PTR_ERR(inode); + goto out_budg; + } + + init_special_inode(inode, inode->i_mode, rdev); + inode->i_size = ubifs_inode(inode)->ui_size = devlen; + ui = ubifs_inode(inode); + ui->data = dev; + ui->data_len = devlen; + + mutex_lock(&dir_ui->ui_mutex); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + mutex_unlock(&dir_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +static int ubifs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct inode *inode; + struct ubifs_inode *ui; + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, len = strlen(symname); + int sz_change = CALC_DENT_SIZE(dentry->d_name.len); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .new_ino_d = len, .dirtied_ino = 1 }; + + /* + * Budget request settings: new inode, new direntry and changing parent + * directory inode. + */ + + dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, + dentry->d_name.name, symname, dir->i_ino); + + if (len > UBIFS_MAX_INO_DATA) + return -ENAMETOOLONG; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + ui = ubifs_inode(inode); + ui->data = kmalloc(len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_inode; + } + + memcpy(ui->data, symname, len); + ((char *)ui->data)[len] = '\0'; + /* + * The terminating zero byte is not written to the flash media and it + * is put just to make later in-memory string processing simpler. Thus, + * data length is @len, not @len + %1. + */ + ui->data_len = len; + inode->i_size = ubifs_inode(inode)->ui_size = len; + + mutex_lock(&dir_ui->ui_mutex); + dir->i_size += sz_change; + dir_ui->ui_size = dir->i_size; + dir->i_mtime = dir->i_ctime = inode->i_ctime; + err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); + if (err) + goto out_cancel; + mutex_unlock(&dir_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + d_instantiate(dentry, inode); + return 0; + +out_cancel: + dir->i_size -= sz_change; + dir_ui->ui_size = dir->i_size; + mutex_unlock(&dir_ui->ui_mutex); +out_inode: + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +/** + * lock_3_inodes - lock three UBIFS inodes for rename. + * @inode1: first inode + * @inode2: second inode + * @inode3: third inode + * + * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may + * be null. + */ +static void lock_3_inodes(struct inode *inode1, struct inode *inode2, + struct inode *inode3) +{ + struct inode *i1, *i2, *i3; + + if (!inode3) { + if (inode1 != inode2) { + lock_2_inodes(inode1, inode2); + return; + } + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + return; + } + + if (inode1 == inode2) { + lock_2_inodes(inode1, inode3); + return; + } + + /* 3 different inodes */ + if (inode1 < inode2) { + i3 = inode2; + if (inode1 < inode3) { + i1 = inode1; + i2 = inode3; + } else { + i1 = inode3; + i2 = inode1; + } + } else { + i3 = inode1; + if (inode2 < inode3) { + i1 = inode2; + i2 = inode3; + } else { + i1 = inode3; + i2 = inode2; + } + } + mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1); + lock_2_inodes(i2, i3); +} + +/** + * unlock_3_inodes - unlock three UBIFS inodes for rename. + * @inode1: first inode + * @inode2: second inode + * @inode3: third inode + */ +static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, + struct inode *inode3) +{ + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); + if (inode1 != inode2) + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + if (inode3) + mutex_unlock(&ubifs_inode(inode3)->ui_mutex); +} + +static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct ubifs_info *c = old_dir->i_sb->s_fs_info; + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode); + int err, release, sync = 0, move = (new_dir != old_dir); + int is_dir = S_ISDIR(old_inode->i_mode); + int unlink = !!new_inode; + int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len); + int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len); + struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, + .dirtied_ino = 3 }; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1, + .dirtied_ino_d = old_inode_ui->data_len }; + struct timespec time; + + /* + * Budget request settings: deletion direntry, new direntry, removing + * the old inode, and changing old and new parent directory inodes. + * + * However, this operation also marks the target inode as dirty and + * does not write it, so we allocate budget for the target inode + * separately. + */ + + dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " + "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, + old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, + new_dentry->d_name.name, new_dir->i_ino); + + if (unlink && is_dir) { + err = check_dir_empty(c, new_inode); + if (err) + return err; + } + + err = ubifs_budget_space(c, &req); + if (err) + return err; + err = ubifs_budget_space(c, &ino_req); + if (err) { + ubifs_release_budget(c, &req); + return err; + } + + lock_3_inodes(old_dir, new_dir, new_inode); + + /* + * Like most other Unix systems, set the @i_ctime for inodes on a + * rename. + */ + time = ubifs_current_time(old_dir); + old_inode->i_ctime = time; + + /* We must adjust parent link count when renaming directories */ + if (is_dir) { + if (move) { + /* + * @old_dir loses a link because we are moving + * @old_inode to a different directory. + */ + drop_nlink(old_dir); + /* + * @new_dir only gains a link if we are not also + * overwriting an existing directory. + */ + if (!unlink) + inc_nlink(new_dir); + } else { + /* + * @old_inode is not moving to a different directory, + * but @old_dir still loses a link if we are + * overwriting an existing directory. + */ + if (unlink) + drop_nlink(old_dir); + } + } + + old_dir->i_size -= old_sz; + ubifs_inode(old_dir)->ui_size = old_dir->i_size; + old_dir->i_mtime = old_dir->i_ctime = time; + new_dir->i_mtime = new_dir->i_ctime = time; + + /* + * And finally, if we unlinked a direntry which happened to have the + * same name as the moved direntry, we have to decrement @i_nlink of + * the unlinked inode and change its ctime. + */ + if (unlink) { + /* + * Directories cannot have hard-links, so if this is a + * directory, decrement its @i_nlink twice because an empty + * directory has @i_nlink 2. + */ + if (is_dir) + drop_nlink(new_inode); + new_inode->i_ctime = time; + drop_nlink(new_inode); + } else { + new_dir->i_size += new_sz; + ubifs_inode(new_dir)->ui_size = new_dir->i_size; + } + + /* + * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode + * is dirty, because this will be done later on at the end of + * 'ubifs_rename()'. + */ + if (IS_SYNC(old_inode)) { + sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); + if (unlink && IS_SYNC(new_inode)) + sync = 1; + } + err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry, + sync); + if (err) + goto out_cancel; + + unlock_3_inodes(old_dir, new_dir, new_inode); + ubifs_release_budget(c, &req); + + mutex_lock(&old_inode_ui->ui_mutex); + release = old_inode_ui->dirty; + mark_inode_dirty_sync(old_inode); + mutex_unlock(&old_inode_ui->ui_mutex); + + if (release) + ubifs_release_budget(c, &ino_req); + if (IS_SYNC(old_inode)) + err = old_inode->i_sb->s_op->write_inode(old_inode, 1); + return err; + +out_cancel: + if (unlink) { + if (is_dir) + inc_nlink(new_inode); + inc_nlink(new_inode); + } else { + new_dir->i_size -= new_sz; + ubifs_inode(new_dir)->ui_size = new_dir->i_size; + } + old_dir->i_size += old_sz; + ubifs_inode(old_dir)->ui_size = old_dir->i_size; + if (is_dir) { + if (move) { + inc_nlink(old_dir); + if (!unlink) + drop_nlink(new_dir); + } else { + if (unlink) + inc_nlink(old_dir); + } + } + unlock_3_inodes(old_dir, new_dir, new_inode); + ubifs_release_budget(c, &ino_req); + ubifs_release_budget(c, &req); + return err; +} + +int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + loff_t size; + struct inode *inode = dentry->d_inode; + struct ubifs_inode *ui = ubifs_inode(inode); + + mutex_lock(&ui->ui_mutex); + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = inode->i_mode; + stat->nlink = inode->i_nlink; + stat->uid = inode->i_uid; + stat->gid = inode->i_gid; + stat->rdev = inode->i_rdev; + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->blksize = UBIFS_BLOCK_SIZE; + stat->size = ui->ui_size; + + /* + * Unfortunately, the 'stat()' system call was designed for block + * device based file systems, and it is not appropriate for UBIFS, + * because UBIFS does not have notion of "block". For example, it is + * difficult to tell how many block a directory takes - it actually + * takes less than 300 bytes, but we have to round it to block size, + * which introduces large mistake. This makes utilities like 'du' to + * report completely senseless numbers. This is the reason why UBIFS + * goes the same way as JFFS2 - it reports zero blocks for everything + * but regular files, which makes more sense than reporting completely + * wrong sizes. + */ + if (S_ISREG(inode->i_mode)) { + size = ui->xattr_size; + size += stat->size; + size = ALIGN(size, UBIFS_BLOCK_SIZE); + /* + * Note, user-space expects 512-byte blocks count irrespectively + * of what was reported in @stat->size. + */ + stat->blocks = size >> 9; + } else + stat->blocks = 0; + mutex_unlock(&ui->ui_mutex); + return 0; +} + +struct inode_operations ubifs_dir_inode_operations = { + .lookup = ubifs_lookup, + .create = ubifs_create, + .link = ubifs_link, + .symlink = ubifs_symlink, + .unlink = ubifs_unlink, + .mkdir = ubifs_mkdir, + .rmdir = ubifs_rmdir, + .mknod = ubifs_mknod, + .rename = ubifs_rename, + .setattr = ubifs_setattr, + .getattr = ubifs_getattr, +#ifdef CONFIG_UBIFS_FS_XATTR + .setxattr = ubifs_setxattr, + .getxattr = ubifs_getxattr, + .listxattr = ubifs_listxattr, + .removexattr = ubifs_removexattr, +#endif +}; + +struct file_operations ubifs_dir_operations = { + .llseek = ubifs_dir_llseek, + .release = ubifs_dir_release, + .read = generic_read_dir, + .readdir = ubifs_readdir, + .fsync = ubifs_fsync, + .unlocked_ioctl = ubifs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ubifs_compat_ioctl, +#endif +}; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c new file mode 100644 index 000000000000..005a3b854d96 --- /dev/null +++ b/fs/ubifs/file.c @@ -0,0 +1,1275 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements VFS file and inode operations of regular files, device + * nodes and symlinks as well as address space operations. + * + * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the + * page is dirty and is used for budgeting purposes - dirty pages should not be + * budgeted. The PG_checked flag is set if full budgeting is required for the + * page e.g., when it corresponds to a file hole or it is just beyond the file + * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to + * fail in this function, and the budget is released in 'ubifs_write_end()'. So + * the PG_private and PG_checked flags carry the information about how the page + * was budgeted, to make it possible to release the budget properly. + * + * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations + * we implement. However, this is not true for '->writepage()', which might be + * called with 'i_mutex' unlocked. For example, when pdflush is performing + * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the + * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is + * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim + * path'. So, in '->writepage()' we are only guaranteed that the page is + * locked. + * + * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., + * readahead path does not have it locked ("sys_read -> generic_file_aio_read + * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is + * not set as well. However, UBIFS disables readahead. + * + * This, for example means that there might be 2 concurrent '->writepage()' + * calls for the same inode, but different inode dirty pages. + */ + +#include "ubifs.h" +#include + +static int read_block(struct inode *inode, void *addr, unsigned int block, + struct ubifs_data_node *dn) +{ + struct ubifs_info *c = inode->i_sb->s_fs_info; + int err, len, out_len; + union ubifs_key key; + unsigned int dlen; + + data_key_init(c, &key, inode->i_ino, block); + err = ubifs_tnc_lookup(c, &key, dn); + if (err) { + if (err == -ENOENT) + /* Not found, so it must be a hole */ + memset(addr, 0, UBIFS_BLOCK_SIZE); + return err; + } + + ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum); + + len = le32_to_cpu(dn->size); + if (len <= 0 || len > UBIFS_BLOCK_SIZE) + goto dump; + + dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + out_len = UBIFS_BLOCK_SIZE; + err = ubifs_decompress(&dn->data, dlen, addr, &out_len, + le16_to_cpu(dn->compr_type)); + if (err || len != out_len) + goto dump; + + /* + * Data length can be less than a full block, even for blocks that are + * not the last in the file (e.g., as a result of making a hole and + * appending data). Ensure that the remainder is zeroed out. + */ + if (len < UBIFS_BLOCK_SIZE) + memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + + return 0; + +dump: + ubifs_err("bad data node (block %u, inode %lu)", + block, inode->i_ino); + dbg_dump_node(c, dn); + return -EINVAL; +} + +static int do_readpage(struct page *page) +{ + void *addr; + int err = 0, i; + unsigned int block, beyond; + struct ubifs_data_node *dn; + struct inode *inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + + dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", + inode->i_ino, page->index, i_size, page->flags); + ubifs_assert(!PageChecked(page)); + ubifs_assert(!PagePrivate(page)); + + addr = kmap(page); + + block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + if (block >= beyond) { + /* Reading beyond inode */ + SetPageChecked(page); + memset(addr, 0, PAGE_CACHE_SIZE); + goto out; + } + + dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); + if (!dn) { + err = -ENOMEM; + goto error; + } + + i = 0; + while (1) { + int ret; + + if (block >= beyond) { + /* Reading beyond inode */ + err = -ENOENT; + memset(addr, 0, UBIFS_BLOCK_SIZE); + } else { + ret = read_block(inode, addr, block, dn); + if (ret) { + err = ret; + if (err != -ENOENT) + break; + } + } + if (++i >= UBIFS_BLOCKS_PER_PAGE) + break; + block += 1; + addr += UBIFS_BLOCK_SIZE; + } + if (err) { + if (err == -ENOENT) { + /* Not found, so it must be a hole */ + SetPageChecked(page); + dbg_gen("hole"); + goto out_free; + } + ubifs_err("cannot read page %lu of inode %lu, error %d", + page->index, inode->i_ino, err); + goto error; + } + +out_free: + kfree(dn); +out: + SetPageUptodate(page); + ClearPageError(page); + flush_dcache_page(page); + kunmap(page); + return 0; + +error: + kfree(dn); + ClearPageUptodate(page); + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + return err; +} + +/** + * release_new_page_budget - release budget of a new page. + * @c: UBIFS file-system description object + * + * This is a helper function which releases budget corresponding to the budget + * of one new page of data. + */ +static void release_new_page_budget(struct ubifs_info *c) +{ + struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 }; + + ubifs_release_budget(c, &req); +} + +/** + * release_existing_page_budget - release budget of an existing page. + * @c: UBIFS file-system description object + * + * This is a helper function which releases budget corresponding to the budget + * of changing one one page of data which already exists on the flash media. + */ +static void release_existing_page_budget(struct ubifs_info *c) +{ + struct ubifs_budget_req req = { .dd_growth = c->page_budget}; + + ubifs_release_budget(c, &req); +} + +static int write_begin_slow(struct address_space *mapping, + loff_t pos, unsigned len, struct page **pagep) +{ + struct inode *inode = mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct ubifs_budget_req req = { .new_page = 1 }; + int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + struct page *page; + + dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", + inode->i_ino, pos, len, inode->i_size); + + /* + * At the slow path we have to budget before locking the page, because + * budgeting may force write-back, which would wait on locked pages and + * deadlock if we had the page locked. At this point we do not know + * anything about the page, so assume that this is a new page which is + * written to a hole. This corresponds to largest budget. Later the + * budget will be amended if this is not true. + */ + if (appending) + /* We are appending data, budget for inode change */ + req.dirtied_ino = 1; + + err = ubifs_budget_space(c, &req); + if (unlikely(err)) + return err; + + page = __grab_cache_page(mapping, index); + if (unlikely(!page)) { + ubifs_release_budget(c, &req); + return -ENOMEM; + } + + if (!PageUptodate(page)) { + if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + SetPageChecked(page); + else { + err = do_readpage(page); + if (err) { + unlock_page(page); + page_cache_release(page); + return err; + } + } + + SetPageUptodate(page); + ClearPageError(page); + } + + if (PagePrivate(page)) + /* + * The page is dirty, which means it was budgeted twice: + * o first time the budget was allocated by the task which + * made the page dirty and set the PG_private flag; + * o and then we budgeted for it for the second time at the + * very beginning of this function. + * + * So what we have to do is to release the page budget we + * allocated. + */ + release_new_page_budget(c); + else if (!PageChecked(page)) + /* + * We are changing a page which already exists on the media. + * This means that changing the page does not make the amount + * of indexing information larger, and this part of the budget + * which we have already acquired may be released. + */ + ubifs_convert_page_budget(c); + + if (appending) { + struct ubifs_inode *ui = ubifs_inode(inode); + + /* + * 'ubifs_write_end()' is optimized from the fast-path part of + * 'ubifs_write_begin()' and expects the @ui_mutex to be locked + * if data is appended. + */ + mutex_lock(&ui->ui_mutex); + if (ui->dirty) + /* + * The inode is dirty already, so we may free the + * budget we allocated. + */ + ubifs_release_dirty_inode_budget(c, ui); + } + + *pagep = page; + return 0; +} + +/** + * allocate_budget - allocate budget for 'ubifs_write_begin()'. + * @c: UBIFS file-system description object + * @page: page to allocate budget for + * @ui: UBIFS inode object the page belongs to + * @appending: non-zero if the page is appended + * + * This is a helper function for 'ubifs_write_begin()' which allocates budget + * for the operation. The budget is allocated differently depending on whether + * this is appending, whether the page is dirty or not, and so on. This + * function leaves the @ui->ui_mutex locked in case of appending. Returns zero + * in case of success and %-ENOSPC in case of failure. + */ +static int allocate_budget(struct ubifs_info *c, struct page *page, + struct ubifs_inode *ui, int appending) +{ + struct ubifs_budget_req req = { .fast = 1 }; + + if (PagePrivate(page)) { + if (!appending) + /* + * The page is dirty and we are not appending, which + * means no budget is needed at all. + */ + return 0; + + mutex_lock(&ui->ui_mutex); + if (ui->dirty) + /* + * The page is dirty and we are appending, so the inode + * has to be marked as dirty. However, it is already + * dirty, so we do not need any budget. We may return, + * but @ui->ui_mutex hast to be left locked because we + * should prevent write-back from flushing the inode + * and freeing the budget. The lock will be released in + * 'ubifs_write_end()'. + */ + return 0; + + /* + * The page is dirty, we are appending, the inode is clean, so + * we need to budget the inode change. + */ + req.dirtied_ino = 1; + } else { + if (PageChecked(page)) + /* + * The page corresponds to a hole and does not + * exist on the media. So changing it makes + * make the amount of indexing information + * larger, and we have to budget for a new + * page. + */ + req.new_page = 1; + else + /* + * Not a hole, the change will not add any new + * indexing information, budget for page + * change. + */ + req.dirtied_page = 1; + + if (appending) { + mutex_lock(&ui->ui_mutex); + if (!ui->dirty) + /* + * The inode is clean but we will have to mark + * it as dirty because we are appending. This + * needs a budget. + */ + req.dirtied_ino = 1; + } + } + + return ubifs_budget_space(c, &req); +} + +/* + * This function is called when a page of data is going to be written. Since + * the page of data will not necessarily go to the flash straight away, UBIFS + * has to reserve space on the media for it, which is done by means of + * budgeting. + * + * This is the hot-path of the file-system and we are trying to optimize it as + * much as possible. For this reasons it is split on 2 parts - slow and fast. + * + * There many budgeting cases: + * o a new page is appended - we have to budget for a new page and for + * changing the inode; however, if the inode is already dirty, there is + * no need to budget for it; + * o an existing clean page is changed - we have budget for it; if the page + * does not exist on the media (a hole), we have to budget for a new + * page; otherwise, we may budget for changing an existing page; the + * difference between these cases is that changing an existing page does + * not introduce anything new to the FS indexing information, so it does + * not grow, and smaller budget is acquired in this case; + * o an existing dirty page is changed - no need to budget at all, because + * the page budget has been acquired by earlier, when the page has been + * marked dirty. + * + * UBIFS budgeting sub-system may force write-back if it thinks there is no + * space to reserve. This imposes some locking restrictions and makes it + * impossible to take into account the above cases, and makes it impossible to + * optimize budgeting. + * + * The solution for this is that the fast path of 'ubifs_write_begin()' assumes + * there is a plenty of flash space and the budget will be acquired quickly, + * without forcing write-back. The slow path does not make this assumption. + */ +static int ubifs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + struct page *page; + + + ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); + + if (unlikely(c->ro_media)) + return -EROFS; + + /* Try out the fast-path part first */ + page = __grab_cache_page(mapping, index); + if (unlikely(!page)) + return -ENOMEM; + + if (!PageUptodate(page)) { + /* The page is not loaded from the flash */ + if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + /* + * We change whole page so no need to load it. But we + * have to set the @PG_checked flag to make the further + * code the page is new. This might be not true, but it + * is better to budget more that to read the page from + * the media. + */ + SetPageChecked(page); + else { + err = do_readpage(page); + if (err) { + unlock_page(page); + page_cache_release(page); + return err; + } + } + + SetPageUptodate(page); + ClearPageError(page); + } + + err = allocate_budget(c, page, ui, appending); + if (unlikely(err)) { + ubifs_assert(err == -ENOSPC); + /* + * Budgeting failed which means it would have to force + * write-back but didn't, because we set the @fast flag in the + * request. Write-back cannot be done now, while we have the + * page locked, because it would deadlock. Unlock and free + * everything and fall-back to slow-path. + */ + if (appending) { + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + mutex_unlock(&ui->ui_mutex); + } + unlock_page(page); + page_cache_release(page); + + return write_begin_slow(mapping, pos, len, pagep); + } + + /* + * Whee, we aquired budgeting quickly - without involving + * garbage-collection, committing or forceing write-back. We return + * with @ui->ui_mutex locked if we are appending pages, and unlocked + * otherwise. This is an optimization (slightly hacky though). + */ + *pagep = page; + return 0; + +} + +/** + * cancel_budget - cancel budget. + * @c: UBIFS file-system description object + * @page: page to cancel budget for + * @ui: UBIFS inode object the page belongs to + * @appending: non-zero if the page is appended + * + * This is a helper function for a page write operation. It unlocks the + * @ui->ui_mutex in case of appending. + */ +static void cancel_budget(struct ubifs_info *c, struct page *page, + struct ubifs_inode *ui, int appending) +{ + if (appending) { + if (!ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); + mutex_unlock(&ui->ui_mutex); + } + if (!PagePrivate(page)) { + if (PageChecked(page)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + } +} + +static int ubifs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + loff_t end_pos = pos + len; + int appending = !!(end_pos > inode->i_size); + + dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", + inode->i_ino, pos, page->index, len, copied, inode->i_size); + + if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) { + /* + * VFS copied less data to the page that it intended and + * declared in its '->write_begin()' call via the @len + * argument. If the page was not up-to-date, and @len was + * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did + * not load it from the media (for optimization reasons). This + * means that part of the page contains garbage. So read the + * page now. + */ + dbg_gen("copied %d instead of %d, read page and repeat", + copied, len); + cancel_budget(c, page, ui, appending); + + /* + * Return 0 to force VFS to repeat the whole operation, or the + * error code if 'do_readpage()' failes. + */ + copied = do_readpage(page); + goto out; + } + + if (!PagePrivate(page)) { + SetPagePrivate(page); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } + + if (appending) { + i_size_write(inode, end_pos); + ui->ui_size = end_pos; + /* + * Note, we do not set @I_DIRTY_PAGES (which means that the + * inode has dirty pages), this has been done in + * '__set_page_dirty_nobuffers()'. + */ + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + mutex_unlock(&ui->ui_mutex); + } + +out: + unlock_page(page); + page_cache_release(page); + return copied; +} + +static int ubifs_readpage(struct file *file, struct page *page) +{ + do_readpage(page); + unlock_page(page); + return 0; +} + +static int do_writepage(struct page *page, int len) +{ + int err = 0, i, blen; + unsigned int block; + void *addr; + union ubifs_key key; + struct inode *inode = page->mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + +#ifdef UBIFS_DEBUG + spin_lock(&ui->ui_lock); + ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); + spin_unlock(&ui->ui_lock); +#endif + + /* Update radix tree tags */ + set_page_writeback(page); + + addr = kmap(page); + block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + i = 0; + while (len) { + blen = min_t(int, len, UBIFS_BLOCK_SIZE); + data_key_init(c, &key, inode->i_ino, block); + err = ubifs_jnl_write_data(c, inode, &key, addr, blen); + if (err) + break; + if (++i >= UBIFS_BLOCKS_PER_PAGE) + break; + block += 1; + addr += blen; + len -= blen; + } + if (err) { + SetPageError(page); + ubifs_err("cannot write page %lu of inode %lu, error %d", + page->index, inode->i_ino, err); + ubifs_ro_mode(c, err); + } + + ubifs_assert(PagePrivate(page)); + if (PageChecked(page)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); + ClearPagePrivate(page); + ClearPageChecked(page); + + kunmap(page); + unlock_page(page); + end_page_writeback(page); + return err; +} + +/* + * When writing-back dirty inodes, VFS first writes-back pages belonging to the + * inode, then the inode itself. For UBIFS this may cause a problem. Consider a + * situation when a we have an inode with size 0, then a megabyte of data is + * appended to the inode, then write-back starts and flushes some amount of the + * dirty pages, the journal becomes full, commit happens and finishes, and then + * an unclean reboot happens. When the file system is mounted next time, the + * inode size would still be 0, but there would be many pages which are beyond + * the inode size, they would be indexed and consume flash space. Because the + * journal has been committed, the replay would not be able to detect this + * situation and correct the inode size. This means UBIFS would have to scan + * whole index and correct all inode sizes, which is long an unacceptable. + * + * To prevent situations like this, UBIFS writes pages back only if they are + * within last synchronized inode size, i.e. the the size which has been + * written to the flash media last time. Otherwise, UBIFS forces inode + * write-back, thus making sure the on-flash inode contains current inode size, + * and then keeps writing pages back. + * + * Some locking issues explanation. 'ubifs_writepage()' first is called with + * the page locked, and it locks @ui_mutex. However, write-back does take inode + * @i_mutex, which means other VFS operations may be run on this inode at the + * same time. And the problematic one is truncation to smaller size, from where + * we have to call 'vmtruncate()', which first changes @inode->i_size, then + * drops the truncated pages. And while dropping the pages, it takes the page + * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with + * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This + * means that @inode->i_size is changed while @ui_mutex is unlocked. + * + * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond + * inode size. How do we do this if @inode->i_size may became smaller while we + * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the + * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size + * internally and updates it under @ui_mutex. + * + * Q: why we do not worry that if we race with truncation, we may end up with a + * situation when the inode is truncated while we are in the middle of + * 'do_writepage()', so we do write beyond inode size? + * A: If we are in the middle of 'do_writepage()', truncation would be locked + * on the page lock and it would not write the truncated inode node to the + * journal before we have finished. + */ +static int ubifs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct ubifs_inode *ui = ubifs_inode(inode); + loff_t i_size = i_size_read(inode), synced_i_size; + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + int err, len = i_size & (PAGE_CACHE_SIZE - 1); + void *kaddr; + + dbg_gen("ino %lu, pg %lu, pg flags %#lx", + inode->i_ino, page->index, page->flags); + ubifs_assert(PagePrivate(page)); + + /* Is the page fully outside @i_size? (truncate in progress) */ + if (page->index > end_index || (page->index == end_index && !len)) { + err = 0; + goto out_unlock; + } + + spin_lock(&ui->ui_lock); + synced_i_size = ui->synced_i_size; + spin_unlock(&ui->ui_lock); + + /* Is the page fully inside @i_size? */ + if (page->index < end_index) { + if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { + err = inode->i_sb->s_op->write_inode(inode, 1); + if (err) + goto out_unlock; + /* + * The inode has been written, but the write-buffer has + * not been synchronized, so in case of an unclean + * reboot we may end up with some pages beyond inode + * size, but they would be in the journal (because + * commit flushes write buffers) and recovery would deal + * with this. + */ + } + return do_writepage(page, PAGE_CACHE_SIZE); + } + + /* + * The page straddles @i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + + if (i_size > synced_i_size) { + err = inode->i_sb->s_op->write_inode(inode, 1); + if (err) + goto out_unlock; + } + + return do_writepage(page, len); + +out_unlock: + unlock_page(page); + return err; +} + +/** + * do_attr_changes - change inode attributes. + * @inode: inode to change attributes for + * @attr: describes attributes to change + */ +static void do_attr_changes(struct inode *inode, const struct iattr *attr) +{ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (attr->ia_valid & ATTR_ATIME) + inode->i_atime = timespec_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = timespec_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = timespec_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); + if (attr->ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + inode->i_mode = mode; + } +} + +/** + * do_truncation - truncate an inode. + * @c: UBIFS file-system description object + * @inode: inode to truncate + * @attr: inode attribute changes description + * + * This function implements VFS '->setattr()' call when the inode is truncated + * to a smaller size. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int do_truncation(struct ubifs_info *c, struct inode *inode, + const struct iattr *attr) +{ + int err; + struct ubifs_budget_req req; + loff_t old_size = inode->i_size, new_size = attr->ia_size; + int offset = new_size & (UBIFS_BLOCK_SIZE - 1); + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); + memset(&req, 0, sizeof(struct ubifs_budget_req)); + + /* + * If this is truncation to a smaller size, and we do not truncate on a + * block boundary, budget for changing one data block, because the last + * block will be re-written. + */ + if (new_size & (UBIFS_BLOCK_SIZE - 1)) + req.dirtied_page = 1; + + req.dirtied_ino = 1; + /* A funny way to budget for truncation node */ + req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; + err = ubifs_budget_space(c, &req); + if (err) + return err; + + err = vmtruncate(inode, new_size); + if (err) + goto out_budg; + + if (offset) { + pgoff_t index = new_size >> PAGE_CACHE_SHIFT; + struct page *page; + + page = find_lock_page(inode->i_mapping, index); + if (page) { + if (PageDirty(page)) { + /* + * 'ubifs_jnl_truncate()' will try to truncate + * the last data node, but it contains + * out-of-date data because the page is dirty. + * Write the page now, so that + * 'ubifs_jnl_truncate()' will see an already + * truncated (and up to date) data node. + */ + ubifs_assert(PagePrivate(page)); + + clear_page_dirty_for_io(page); + if (UBIFS_BLOCKS_PER_PAGE_SHIFT) + offset = new_size & + (PAGE_CACHE_SIZE - 1); + err = do_writepage(page, offset); + page_cache_release(page); + if (err) + goto out_budg; + /* + * We could now tell 'ubifs_jnl_truncate()' not + * to read the last block. + */ + } else { + /* + * We could 'kmap()' the page and pass the data + * to 'ubifs_jnl_truncate()' to save it from + * having to read it. + */ + unlock_page(page); + page_cache_release(page); + } + } + } + + mutex_lock(&ui->ui_mutex); + ui->ui_size = inode->i_size; + /* Truncation changes inode [mc]time */ + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + /* The other attributes may be changed at the same time as well */ + do_attr_changes(inode, attr); + + err = ubifs_jnl_truncate(c, inode, old_size, new_size); + mutex_unlock(&ui->ui_mutex); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +/** + * do_setattr - change inode attributes. + * @c: UBIFS file-system description object + * @inode: inode to change attributes for + * @attr: inode attribute changes description + * + * This function implements VFS '->setattr()' call for all cases except + * truncations to smaller size. Returns zero in case of success and a negative + * error code in case of failure. + */ +static int do_setattr(struct ubifs_info *c, struct inode *inode, + const struct iattr *attr) +{ + int err, release; + loff_t new_size = attr->ia_size; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ui->data_len }; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + if (attr->ia_valid & ATTR_SIZE) { + dbg_gen("size %lld -> %lld", inode->i_size, new_size); + err = vmtruncate(inode, new_size); + if (err) + goto out; + } + + mutex_lock(&ui->ui_mutex); + if (attr->ia_valid & ATTR_SIZE) { + /* Truncation changes inode [mc]time */ + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + /* 'vmtruncate()' changed @i_size, update @ui_size */ + ui->ui_size = inode->i_size; + } + + do_attr_changes(inode, attr); + + release = ui->dirty; + if (attr->ia_valid & ATTR_SIZE) + /* + * Inode length changed, so we have to make sure + * @I_DIRTY_DATASYNC is set. + */ + __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); + else + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + + if (release) + ubifs_release_budget(c, &req); + if (IS_SYNC(inode)) + err = inode->i_sb->s_op->write_inode(inode, 1); + return err; + +out: + ubifs_release_budget(c, &req); + return err; +} + +int ubifs_setattr(struct dentry *dentry, struct iattr *attr) +{ + int err; + struct inode *inode = dentry->d_inode; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); + err = inode_change_ok(inode, attr); + if (err) + return err; + + err = dbg_check_synced_i_size(inode); + if (err) + return err; + + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size) + /* Truncation to a smaller size */ + err = do_truncation(c, inode, attr); + else + err = do_setattr(c, inode, attr); + + return err; +} + +static void ubifs_invalidatepage(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + ubifs_assert(PagePrivate(page)); + if (offset) + /* Partial page remains dirty */ + return; + + if (PageChecked(page)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); + ClearPagePrivate(page); + ClearPageChecked(page); +} + +static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct ubifs_inode *ui = ubifs_inode(dentry->d_inode); + + nd_set_link(nd, ui->data); + return NULL; +} + +int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct ubifs_info *c = inode->i_sb->s_fs_info; + int err; + + dbg_gen("syncing inode %lu", inode->i_ino); + + /* + * VFS has already synchronized dirty pages for this inode. Synchronize + * the inode unless this is a 'datasync()' call. + */ + if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { + err = inode->i_sb->s_op->write_inode(inode, 1); + if (err) + return err; + } + + /* + * Nodes related to this inode may still sit in a write-buffer. Flush + * them. + */ + err = ubifs_sync_wbufs_by_inode(c, inode); + if (err) + return err; + + return 0; +} + +/** + * mctime_update_needed - check if mtime or ctime update is needed. + * @inode: the inode to do the check for + * @now: current time + * + * This helper function checks if the inode mtime/ctime should be updated or + * not. If current values of the time-stamps are within the UBIFS inode time + * granularity, they are not updated. This is an optimization. + */ +static inline int mctime_update_needed(const struct inode *inode, + const struct timespec *now) +{ + if (!timespec_equal(&inode->i_mtime, now) || + !timespec_equal(&inode->i_ctime, now)) + return 1; + return 0; +} + +/** + * update_ctime - update mtime and ctime of an inode. + * @c: UBIFS file-system description object + * @inode: inode to update + * + * This function updates mtime and ctime of the inode if it is not equivalent to + * current time. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int update_mctime(struct ubifs_info *c, struct inode *inode) +{ + struct timespec now = ubifs_current_time(inode); + struct ubifs_inode *ui = ubifs_inode(inode); + + if (mctime_update_needed(inode, &now)) { + int err, release; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ui->data_len }; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&ui->ui_mutex); + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + release = ui->dirty; + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_budget(c, &req); + } + + return 0; +} + +static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + int err; + ssize_t ret; + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + err = update_mctime(c, inode); + if (err) + return err; + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (ret < 0) + return ret; + + if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { + err = ubifs_sync_wbufs_by_inode(c, inode); + if (err) + return err; + } + + return ret; +} + +static int ubifs_set_page_dirty(struct page *page) +{ + int ret; + + ret = __set_page_dirty_nobuffers(page); + /* + * An attempt to dirty a page without budgeting for it - should not + * happen. + */ + ubifs_assert(ret == 0); + return ret; +} + +static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + /* + * An attempt to release a dirty page without budgeting for it - should + * not happen. + */ + if (PageWriteback(page)) + return 0; + ubifs_assert(PagePrivate(page)); + ubifs_assert(0); + ClearPagePrivate(page); + ClearPageChecked(page); + return 1; +} + +/* + * mmap()d file has taken write protection fault and is being made + * writable. UBIFS must ensure page is budgeted for. + */ +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct timespec now = ubifs_current_time(inode); + struct ubifs_budget_req req = { .new_page = 1 }; + int err, update_time; + + dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, + i_size_read(inode)); + ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); + + if (unlikely(c->ro_media)) + return -EROFS; + + /* + * We have not locked @page so far so we may budget for changing the + * page. Note, we cannot do this after we locked the page, because + * budgeting may cause write-back which would cause deadlock. + * + * At the moment we do not know whether the page is dirty or not, so we + * assume that it is not and budget for a new page. We could look at + * the @PG_private flag and figure this out, but we may race with write + * back and the page state may change by the time we lock it, so this + * would need additional care. We do not bother with this at the + * moment, although it might be good idea to do. Instead, we allocate + * budget for a new page and amend it later on if the page was in fact + * dirty. + * + * The budgeting-related logic of this function is similar to what we + * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there + * for more comments. + */ + update_time = mctime_update_needed(inode, &now); + if (update_time) + /* + * We have to change inode time stamp which requires extra + * budgeting. + */ + req.dirtied_ino = 1; + + err = ubifs_budget_space(c, &req); + if (unlikely(err)) { + if (err == -ENOSPC) + ubifs_warn("out of space for mmapped file " + "(inode number %lu)", inode->i_ino); + return err; + } + + lock_page(page); + if (unlikely(page->mapping != inode->i_mapping || + page_offset(page) > i_size_read(inode))) { + /* Page got truncated out from underneath us */ + err = -EINVAL; + goto out_unlock; + } + + if (PagePrivate(page)) + release_new_page_budget(c); + else { + if (!PageChecked(page)) + ubifs_convert_page_budget(c); + SetPagePrivate(page); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } + + if (update_time) { + int release; + struct ubifs_inode *ui = ubifs_inode(inode); + + mutex_lock(&ui->ui_mutex); + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + release = ui->dirty; + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_dirty_inode_budget(c, ui); + } + + unlock_page(page); + return 0; + +out_unlock: + unlock_page(page); + ubifs_release_budget(c, &req); + return err; +} + +static struct vm_operations_struct ubifs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = ubifs_vm_page_mkwrite, +}; + +static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + + /* 'generic_file_mmap()' takes care of NOMMU case */ + err = generic_file_mmap(file, vma); + if (err) + return err; + vma->vm_ops = &ubifs_file_vm_ops; + return 0; +} + +struct address_space_operations ubifs_file_address_operations = { + .readpage = ubifs_readpage, + .writepage = ubifs_writepage, + .write_begin = ubifs_write_begin, + .write_end = ubifs_write_end, + .invalidatepage = ubifs_invalidatepage, + .set_page_dirty = ubifs_set_page_dirty, + .releasepage = ubifs_releasepage, +}; + +struct inode_operations ubifs_file_inode_operations = { + .setattr = ubifs_setattr, + .getattr = ubifs_getattr, +#ifdef CONFIG_UBIFS_FS_XATTR + .setxattr = ubifs_setxattr, + .getxattr = ubifs_getxattr, + .listxattr = ubifs_listxattr, + .removexattr = ubifs_removexattr, +#endif +}; + +struct inode_operations ubifs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = ubifs_follow_link, + .setattr = ubifs_setattr, + .getattr = ubifs_getattr, +}; + +struct file_operations ubifs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = ubifs_aio_write, + .mmap = ubifs_file_mmap, + .fsync = ubifs_fsync, + .unlocked_ioctl = ubifs_ioctl, + .splice_read = generic_file_splice_read, +#ifdef CONFIG_COMPAT + .compat_ioctl = ubifs_compat_ioctl, +#endif +}; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c new file mode 100644 index 000000000000..10394c548367 --- /dev/null +++ b/fs/ubifs/find.c @@ -0,0 +1,975 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file contains functions for finding LEBs for various purposes e.g. + * garbage collection. In general, lprops category heaps and lists are used + * for fast access, falling back on scanning the LPT as a last resort. + */ + +#include +#include "ubifs.h" + +/** + * struct scan_data - data provided to scan callback functions + * @min_space: minimum number of bytes for which to scan + * @pick_free: whether it is OK to scan for empty LEBs + * @lnum: LEB number found is returned here + * @exclude_index: whether to exclude index LEBs + */ +struct scan_data { + int min_space; + int pick_free; + int lnum; + int exclude_index; +}; + +/** + * valuable - determine whether LEB properties are valuable. + * @c: the UBIFS file-system description object + * @lprops: LEB properties + * + * This function return %1 if the LEB properties should be added to the LEB + * properties tree in memory. Otherwise %0 is returned. + */ +static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) +{ + int n, cat = lprops->flags & LPROPS_CAT_MASK; + struct ubifs_lpt_heap *heap; + + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + heap = &c->lpt_heap[cat - 1]; + if (heap->cnt < heap->max_cnt) + return 1; + if (lprops->free + lprops->dirty >= c->dark_wm) + return 1; + return 0; + case LPROPS_EMPTY: + n = c->lst.empty_lebs + c->freeable_cnt - + c->lst.taken_empty_lebs; + if (n < c->lsave_cnt) + return 1; + return 0; + case LPROPS_FREEABLE: + return 1; + case LPROPS_FRDI_IDX: + return 1; + } + return 0; +} + +/** + * scan_for_dirty_cb - dirty space scan callback. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_for_dirty_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude LEBs with too little space */ + if (lprops->free + lprops->dirty < data->min_space) + return ret; + /* If specified, exclude index LEBs */ + if (data->exclude_index && lprops->flags & LPROPS_INDEX) + return ret; + /* If specified, exclude empty or freeable LEBs */ + if (lprops->free + lprops->dirty == c->leb_size) { + if (!data->pick_free) + return ret; + /* Exclude LEBs with too little dirty space (unless it is empty) */ + } else if (lprops->dirty < c->dead_wm) + return ret; + /* Finally we found space */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * scan_for_dirty - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount free plus dirty space the returned LEB has to + * have + * @pick_free: if it is OK to return a free or freeable LEB + * @exclude_index: whether to exclude index LEBs + * + * This function returns a pointer to the LEB properties found or a negative + * error code. + */ +static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, + int min_space, int pick_free, + int exclude_index) +{ + const struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + struct scan_data data; + int err, i; + + /* There may be an LEB with enough dirty space on the free heap */ + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (lprops->free + lprops->dirty < min_space) + continue; + if (lprops->dirty < c->dead_wm) + continue; + return lprops; + } + /* + * A LEB may have fallen off of the bottom of the dirty heap, and ended + * up as uncategorized even though it has enough dirty space for us now, + * so check the uncategorized list. N.B. neither empty nor freeable LEBs + * can end up as uncategorized because they are kept on lists not + * finite-sized heaps. + */ + list_for_each_entry(lprops, &c->uncat_list, list) { + if (lprops->flags & LPROPS_TAKEN) + continue; + if (lprops->free + lprops->dirty < min_space) + continue; + if (exclude_index && (lprops->flags & LPROPS_INDEX)) + continue; + if (lprops->dirty < c->dead_wm) + continue; + return lprops; + } + /* We have looked everywhere in main memory, now scan the flash */ + if (c->pnodes_have >= c->pnode_cnt) + /* All pnodes are in memory, so skip scan */ + return ERR_PTR(-ENOSPC); + data.min_space = min_space; + data.pick_free = pick_free; + data.lnum = -1; + data.exclude_index = exclude_index; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_for_dirty_cb, + &data); + if (err) + return ERR_PTR(err); + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return lprops; + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free + lprops->dirty >= min_space); + ubifs_assert(lprops->dirty >= c->dead_wm || + (pick_free && + lprops->free + lprops->dirty == c->leb_size)); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector. + * @c: the UBIFS file-system description object + * @ret_lp: LEB properties are returned here on exit + * @min_space: minimum amount free plus dirty space the returned LEB has to + * have + * @pick_free: controls whether it is OK to pick empty or index LEBs + * + * This function tries to find a dirty logical eraseblock which has at least + * @min_space free and dirty space. It prefers to take an LEB from the dirty or + * dirty index heap, and it falls-back to LPT scanning if the heaps are empty + * or do not have an LEB which satisfies the @min_space criteria. + * + * Note: + * o LEBs which have less than dead watermark of dirty space are never picked + * by this function; + * + * Returns zero and the LEB properties of + * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a + * negative error code in case of other failures. The returned LEB is marked as + * "taken". + * + * The additional @pick_free argument controls if this function has to return a + * free or freeable LEB if one is present. For example, GC must to set it to %1, + * when called from the journal space reservation function, because the + * appearance of free space may coincide with the loss of enough dirty space + * for GC to succeed anyway. + * + * In contrast, if the Garbage Collector is called from budgeting, it should + * just make free space, not return LEBs which are already free or freeable. + * + * In addition @pick_free is set to %2 by the recovery process in order to + * recover gc_lnum in which case an index LEB must not be returned. + */ +int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + int min_space, int pick_free) +{ + int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0; + const struct ubifs_lprops *lp = NULL, *idx_lp = NULL; + struct ubifs_lpt_heap *heap, *idx_heap; + + ubifs_get_lprops(c); + + if (pick_free) { + int lebs, rsvd_idx_lebs = 0; + + spin_lock(&c->space_lock); + lebs = c->lst.empty_lebs; + lebs += c->freeable_cnt - c->lst.taken_empty_lebs; + + /* + * Note, the index may consume more LEBs than have been reserved + * for it. It is OK because it might be consolidated by GC. + * But if the index takes fewer LEBs than it is reserved for it, + * this function must avoid picking those reserved LEBs. + */ + if (c->min_idx_lebs >= c->lst.idx_lebs) { + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + exclude_index = 1; + } + spin_unlock(&c->space_lock); + + /* Check if there are enough free LEBs for the index */ + if (rsvd_idx_lebs < lebs) { + /* OK, try to find an empty LEB */ + lp = ubifs_fast_find_empty(c); + if (lp) + goto found; + + /* Or a freeable LEB */ + lp = ubifs_fast_find_freeable(c); + if (lp) + goto found; + } else + /* + * We cannot pick free/freeable LEBs in the below code. + */ + pick_free = 0; + } else { + spin_lock(&c->space_lock); + exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); + spin_unlock(&c->space_lock); + } + + /* Look on the dirty and dirty index heaps */ + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + + if (idx_heap->cnt && !exclude_index) { + idx_lp = idx_heap->arr[0]; + sum = idx_lp->free + idx_lp->dirty; + /* + * Since we reserve twice as more space for the index than it + * actually takes, it does not make sense to pick indexing LEBs + * with less than half LEB of dirty space. + */ + if (sum < min_space || sum < c->half_leb_size) + idx_lp = NULL; + } + + if (heap->cnt) { + lp = heap->arr[0]; + if (lp->dirty + lp->free < min_space) + lp = NULL; + } + + /* Pick the LEB with most space */ + if (idx_lp && lp) { + if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty) + lp = idx_lp; + } else if (idx_lp && !lp) + lp = idx_lp; + + if (lp) { + ubifs_assert(lp->dirty >= c->dead_wm); + goto found; + } + + /* Did not find a dirty LEB on the dirty heaps, have to scan */ + dbg_find("scanning LPT for a dirty LEB"); + lp = scan_for_dirty(c, min_space, pick_free, exclude_index); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + ubifs_assert(lp->dirty >= c->dead_wm || + (pick_free && lp->free + lp->dirty == c->leb_size)); + +found: + dbg_find("found LEB %d, free %d, dirty %d, flags %#x", + lp->lnum, lp->free, lp->dirty, lp->flags); + + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + memcpy(ret_lp, lp, sizeof(struct ubifs_lprops)); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * scan_for_free_cb - free space scan callback. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_for_free_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude index LEBs */ + if (lprops->flags & LPROPS_INDEX) + return ret; + /* Exclude LEBs with too little space */ + if (lprops->free < data->min_space) + return ret; + /* If specified, exclude empty LEBs */ + if (!data->pick_free && lprops->free == c->leb_size) + return ret; + /* + * LEBs that have only free and dirty space must not be allocated + * because they may have been unmapped already or they may have data + * that is obsolete only because of nodes that are still sitting in a + * wbuf. + */ + if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0) + return ret; + /* Finally we found space */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * do_find_free_space - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount of free space required + * @pick_free: whether it is OK to scan for empty LEBs + * @squeeze: whether to try to find space in a non-empty LEB first + * + * This function returns a pointer to the LEB properties found or a negative + * error code. + */ +static +const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, + int min_space, int pick_free, + int squeeze) +{ + const struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + struct scan_data data; + int err, i; + + if (squeeze) { + lprops = ubifs_fast_find_free(c); + if (lprops && lprops->free >= min_space) + return lprops; + } + if (pick_free) { + lprops = ubifs_fast_find_empty(c); + if (lprops) + return lprops; + } + if (!squeeze) { + lprops = ubifs_fast_find_free(c); + if (lprops && lprops->free >= min_space) + return lprops; + } + /* There may be an LEB with enough free space on the dirty heap */ + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (lprops->free >= min_space) + return lprops; + } + /* + * A LEB may have fallen off of the bottom of the free heap, and ended + * up as uncategorized even though it has enough free space for us now, + * so check the uncategorized list. N.B. neither empty nor freeable LEBs + * can end up as uncategorized because they are kept on lists not + * finite-sized heaps. + */ + list_for_each_entry(lprops, &c->uncat_list, list) { + if (lprops->flags & LPROPS_TAKEN) + continue; + if (lprops->flags & LPROPS_INDEX) + continue; + if (lprops->free >= min_space) + return lprops; + } + /* We have looked everywhere in main memory, now scan the flash */ + if (c->pnodes_have >= c->pnode_cnt) + /* All pnodes are in memory, so skip scan */ + return ERR_PTR(-ENOSPC); + data.min_space = min_space; + data.pick_free = pick_free; + data.lnum = -1; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_for_free_cb, + &data); + if (err) + return ERR_PTR(err); + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return lprops; + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free >= min_space); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_find_free_space - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount of required free space + * @free: contains amount of free space in the LEB on exit + * @squeeze: whether to try to find space in a non-empty LEB first + * + * This function looks for an LEB with at least @min_space bytes of free space. + * It tries to find an empty LEB if possible. If no empty LEBs are available, + * this function searches for a non-empty data LEB. The returned LEB is marked + * as "taken". + * + * This function returns found LEB number in case of success, %-ENOSPC if it + * failed to find a LEB with @min_space bytes of free space and other a negative + * error codes in case of failure. + */ +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, + int squeeze) +{ + const struct ubifs_lprops *lprops; + int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags; + + dbg_find("min_space %d", min_space); + ubifs_get_lprops(c); + + /* Check if there are enough empty LEBs for commit */ + spin_lock(&c->space_lock); + if (c->min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); + if (rsvd_idx_lebs < lebs) + /* + * OK to allocate an empty LEB, but we still don't want to go + * looking for one if there aren't any. + */ + if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + pick_free = 1; + /* + * Because we release the space lock, we must account + * for this allocation here. After the LEB properties + * flags have been updated, we subtract one. Note, the + * result of this is that lprops also decreases + * @taken_empty_lebs in 'ubifs_change_lp()', so it is + * off by one for a short period of time which may + * introduce a small disturbance to budgeting + * calculations, but this is harmless because at the + * worst case this would make the budgeting subsystem + * be more pessimistic than needed. + * + * Fundamentally, this is about serialization of the + * budgeting and lprops subsystems. We could make the + * @space_lock a mutex and avoid dropping it before + * calling 'ubifs_change_lp()', but mutex is more + * heavy-weight, and we want budgeting to be as fast as + * possible. + */ + c->lst.taken_empty_lebs += 1; + } + spin_unlock(&c->space_lock); + + lprops = do_find_free_space(c, min_space, pick_free, squeeze); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + lnum = lprops->lnum; + flags = lprops->flags | LPROPS_TAKEN; + + lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + if (pick_free) { + spin_lock(&c->space_lock); + c->lst.taken_empty_lebs -= 1; + spin_unlock(&c->space_lock); + } + + *free = lprops->free; + ubifs_release_lprops(c); + + if (*free == c->leb_size) { + /* + * Ensure that empty LEBs have been unmapped. They may not have + * been, for example, because of an unclean unmount. Also + * LEBs that were freeable LEBs (free + dirty == leb_size) will + * not have been unmapped. + */ + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + + dbg_find("found LEB %d, free %d", lnum, *free); + ubifs_assert(*free >= min_space); + return lnum; + +out: + if (pick_free) { + spin_lock(&c->space_lock); + c->lst.taken_empty_lebs -= 1; + spin_unlock(&c->space_lock); + } + ubifs_release_lprops(c); + return err; +} + +/** + * scan_for_idx_cb - callback used by the scan for a free LEB for the index. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_for_idx_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude index LEBS */ + if (lprops->flags & LPROPS_INDEX) + return ret; + /* Exclude LEBs that cannot be made empty */ + if (lprops->free + lprops->dirty != c->leb_size) + return ret; + /* + * We are allocating for the index so it is safe to allocate LEBs with + * only free and dirty space, because write buffers are sync'd at commit + * start. + */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * scan_for_leb_for_idx - scan for a free LEB for the index. + * @c: the UBIFS file-system description object + */ +static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct scan_data data; + int err; + + data.lnum = -1; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_for_idx_cb, + &data); + if (err) + return ERR_PTR(err); + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return lprops; + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_find_free_leb_for_idx - find a free LEB for the index. + * @c: the UBIFS file-system description object + * + * This function looks for a free LEB and returns that LEB number. The returned + * LEB is marked as "taken", "index". + * + * Only empty LEBs are allocated. This is for two reasons. First, the commit + * calculates the number of LEBs to allocate based on the assumption that they + * will be empty. Secondly, free space at the end of an index LEB is not + * guaranteed to be empty because it may have been used by the in-the-gaps + * method prior to an unclean unmount. + * + * If no LEB is found %-ENOSPC is returned. For other failures another negative + * error code is returned. + */ +int ubifs_find_free_leb_for_idx(struct ubifs_info *c) +{ + const struct ubifs_lprops *lprops; + int lnum = -1, err, flags; + + ubifs_get_lprops(c); + + lprops = ubifs_fast_find_empty(c); + if (!lprops) { + lprops = ubifs_fast_find_freeable(c); + if (!lprops) { + ubifs_assert(c->freeable_cnt == 0); + if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + lprops = scan_for_leb_for_idx(c); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + } + } + } + + if (!lprops) { + err = -ENOSPC; + goto out; + } + + lnum = lprops->lnum; + + dbg_find("found LEB %d, free %d, dirty %d, flags %#x", + lnum, lprops->free, lprops->dirty, lprops->flags); + + flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX; + lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + ubifs_release_lprops(c); + + /* + * Ensure that empty LEBs have been unmapped. They may not have been, + * for example, because of an unclean unmount. Also LEBs that were + * freeable LEBs (free + dirty == leb_size) will not have been unmapped. + */ + err = ubifs_leb_unmap(c, lnum); + if (err) { + ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_TAKEN | LPROPS_INDEX, 0); + return err; + } + + return lnum; + +out: + ubifs_release_lprops(c); + return err; +} + +static int cmp_dirty_idx(const struct ubifs_lprops **a, + const struct ubifs_lprops **b) +{ + const struct ubifs_lprops *lpa = *a; + const struct ubifs_lprops *lpb = *b; + + return lpa->dirty + lpa->free - lpb->dirty - lpb->free; +} + +static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b, + int size) +{ + struct ubifs_lprops *t = *a; + + *a = *b; + *b = t; +} + +/** + * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos. + * @c: the UBIFS file-system description object + * + * This function is called each commit to create an array of LEB numbers of + * dirty index LEBs sorted in order of dirty and free space. This is used by + * the in-the-gaps method of TNC commit. + */ +int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) +{ + int i; + + ubifs_get_lprops(c); + /* Copy the LPROPS_DIRTY_IDX heap */ + c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt; + memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr, + sizeof(void *) * c->dirty_idx.cnt); + /* Sort it so that the dirtiest is now at the end */ + sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), + (int (*)(const void *, const void *))cmp_dirty_idx, + (void (*)(void *, void *, int))swap_dirty_idx); + dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); + if (c->dirty_idx.cnt) + dbg_find("dirtiest index LEB is %d with dirty %d and free %d", + c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum, + c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty, + c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free); + /* Replace the lprops pointers with LEB numbers */ + for (i = 0; i < c->dirty_idx.cnt; i++) + c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum; + ubifs_release_lprops(c); + return 0; +} + +/** + * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_dirty_idx_cb(struct ubifs_info *c, + const struct ubifs_lprops *lprops, int in_tree, + struct scan_data *data) +{ + int ret = LPT_SCAN_CONTINUE; + + /* Exclude LEBs that are currently in use */ + if (lprops->flags & LPROPS_TAKEN) + return LPT_SCAN_CONTINUE; + /* Determine whether to add these LEB properties to the tree */ + if (!in_tree && valuable(c, lprops)) + ret |= LPT_SCAN_ADD; + /* Exclude non-index LEBs */ + if (!(lprops->flags & LPROPS_INDEX)) + return ret; + /* Exclude LEBs with too little space */ + if (lprops->free + lprops->dirty < c->min_idx_node_sz) + return ret; + /* Finally we found space */ + data->lnum = lprops->lnum; + return LPT_SCAN_ADD | LPT_SCAN_STOP; +} + +/** + * find_dirty_idx_leb - find a dirty index LEB. + * @c: the UBIFS file-system description object + * + * This function returns LEB number upon success and a negative error code upon + * failure. In particular, -ENOSPC is returned if a dirty index LEB is not + * found. + * + * Note that this function scans the entire LPT but it is called very rarely. + */ +static int find_dirty_idx_leb(struct ubifs_info *c) +{ + const struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + struct scan_data data; + int err, i, ret; + + /* Check all structures in memory first */ + data.lnum = -1; + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + ret = scan_dirty_idx_cb(c, lprops, 1, &data); + if (ret & LPT_SCAN_STOP) + goto found; + } + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + ret = scan_dirty_idx_cb(c, lprops, 1, &data); + if (ret & LPT_SCAN_STOP) + goto found; + } + list_for_each_entry(lprops, &c->uncat_list, list) { + ret = scan_dirty_idx_cb(c, lprops, 1, &data); + if (ret & LPT_SCAN_STOP) + goto found; + } + if (c->pnodes_have >= c->pnode_cnt) + /* All pnodes are in memory, so skip scan */ + return -ENOSPC; + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, + (ubifs_lpt_scan_callback)scan_dirty_idx_cb, + &data); + if (err) + return err; +found: + ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); + c->lscan_lnum = data.lnum; + lprops = ubifs_lpt_lookup_dirty(c, data.lnum); + if (IS_ERR(lprops)) + return PTR_ERR(lprops); + ubifs_assert(lprops->lnum == data.lnum); + ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert((lprops->flags & LPROPS_INDEX)); + + dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x", + lprops->lnum, lprops->free, lprops->dirty, lprops->flags); + + lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, + lprops->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lprops)) + return PTR_ERR(lprops); + + return lprops->lnum; +} + +/** + * get_idx_gc_leb - try to get a LEB number from trivial GC. + * @c: the UBIFS file-system description object + */ +static int get_idx_gc_leb(struct ubifs_info *c) +{ + const struct ubifs_lprops *lp; + int err, lnum; + + err = ubifs_get_idx_gc_leb(c); + if (err < 0) + return err; + lnum = err; + /* + * The LEB was due to be unmapped after the commit but + * it is needed now for this commit. + */ + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (unlikely(IS_ERR(lp))) + return PTR_ERR(lp); + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_INDEX, -1); + if (unlikely(IS_ERR(lp))) + return PTR_ERR(lp); + dbg_find("LEB %d, dirty %d and free %d flags %#x", + lp->lnum, lp->dirty, lp->free, lp->flags); + return lnum; +} + +/** + * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array. + * @c: the UBIFS file-system description object + */ +static int find_dirtiest_idx_leb(struct ubifs_info *c) +{ + const struct ubifs_lprops *lp; + int lnum; + + while (1) { + if (!c->dirty_idx.cnt) + return -ENOSPC; + /* The lprops pointers were replaced by LEB numbers */ + lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt]; + lp = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lp)) + return PTR_ERR(lp); + if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX)) + continue; + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) + return PTR_ERR(lp); + break; + } + dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, + lp->free, lp->flags); + ubifs_assert(lp->flags | LPROPS_TAKEN); + ubifs_assert(lp->flags | LPROPS_INDEX); + return lnum; +} + +/** + * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit. + * @c: the UBIFS file-system description object + * + * This function attempts to find an untaken index LEB with the most free and + * dirty space that can be used without overwriting index nodes that were in the + * last index committed. + */ +int ubifs_find_dirty_idx_leb(struct ubifs_info *c) +{ + int err; + + ubifs_get_lprops(c); + + /* + * We made an array of the dirtiest index LEB numbers as at the start of + * last commit. Try that array first. + */ + err = find_dirtiest_idx_leb(c); + + /* Next try scanning the entire LPT */ + if (err == -ENOSPC) + err = find_dirty_idx_leb(c); + + /* Finally take any index LEBs awaiting trivial GC */ + if (err == -ENOSPC) + err = get_idx_gc_leb(c); + + ubifs_release_lprops(c); + return err; +} diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c new file mode 100644 index 000000000000..d0f3dac29081 --- /dev/null +++ b/fs/ubifs/gc.c @@ -0,0 +1,773 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements garbage collection. The procedure for garbage collection + * is different depending on whether a LEB as an index LEB (contains index + * nodes) or not. For non-index LEBs, garbage collection finds a LEB which + * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete + * nodes to the journal, at which point the garbage-collected LEB is free to be + * reused. For index LEBs, garbage collection marks the non-obsolete index nodes + * dirty in the TNC, and after the next commit, the garbage-collected LEB is + * to be reused. Garbage collection will cause the number of dirty index nodes + * to grow, however sufficient space is reserved for the index to ensure the + * commit will never run out of space. + */ + +#include +#include "ubifs.h" + +/* + * GC tries to optimize the way it fit nodes to available space, and it sorts + * nodes a little. The below constants are watermarks which define "large", + * "medium", and "small" nodes. + */ +#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) +#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ + +/* + * GC may need to move more then one LEB to make progress. The below constants + * define "soft" and "hard" limits on the number of LEBs the garbage collector + * may move. + */ +#define SOFT_LEBS_LIMIT 4 +#define HARD_LEBS_LIMIT 32 + +/** + * switch_gc_head - switch the garbage collection journal head. + * @c: UBIFS file-system description object + * @buf: buffer to write + * @len: length of the buffer to write + * @lnum: LEB number written is returned here + * @offs: offset written is returned here + * + * This function switch the GC head to the next LEB which is reserved in + * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required, + * and other negative error code in case of failures. + */ +static int switch_gc_head(struct ubifs_info *c) +{ + int err, gc_lnum = c->gc_lnum; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + + ubifs_assert(gc_lnum != -1); + dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)", + wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum, + c->leb_size - wbuf->offs - wbuf->used); + + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + return err; + + /* + * The GC write-buffer was synchronized, we may safely unmap + * 'c->gc_lnum'. + */ + err = ubifs_leb_unmap(c, gc_lnum); + if (err) + return err; + + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); + if (err) + return err; + + c->gc_lnum = -1; + err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM); + return err; +} + +/** + * move_nodes - move nodes. + * @c: UBIFS file-system description object + * @sleb: describes nodes to move + * + * This function moves valid nodes from data LEB described by @sleb to the GC + * journal head. The obsolete nodes are dropped. + * + * When moving nodes we have to deal with classical bin-packing problem: the + * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", + * where the nodes in the @sleb->nodes list are the elements which should be + * fit optimally to the bins. This function uses the "first fit decreasing" + * strategy, although it does not really sort the nodes but just split them on + * 3 classes - large, medium, and small, so they are roughly sorted. + * + * This function returns zero in case of success, %-EAGAIN if commit is + * required, and other negative error codes in case of other failures. + */ +static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *snod, *tmp; + struct list_head large, medium, small; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + int avail, err, min = INT_MAX; + + INIT_LIST_HEAD(&large); + INIT_LIST_HEAD(&medium); + INIT_LIST_HEAD(&small); + + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + struct list_head *lst; + + ubifs_assert(snod->type != UBIFS_IDX_NODE); + ubifs_assert(snod->type != UBIFS_REF_NODE); + ubifs_assert(snod->type != UBIFS_CS_NODE); + + err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, + snod->offs, 0); + if (err < 0) + goto out; + + lst = &snod->list; + list_del(lst); + if (!err) { + /* The node is obsolete, remove it from the list */ + kfree(snod); + continue; + } + + /* + * Sort the list of nodes so that large nodes go first, and + * small nodes go last. + */ + if (snod->len > MEDIUM_NODE_WM) + list_add(lst, &large); + else if (snod->len > SMALL_NODE_WM) + list_add(lst, &medium); + else + list_add(lst, &small); + + /* And find the smallest node */ + if (snod->len < min) + min = snod->len; + } + + /* + * Join the tree lists so that we'd have one roughly sorted list + * ('large' will be the head of the joined list). + */ + list_splice(&medium, large.prev); + list_splice(&small, large.prev); + + if (wbuf->lnum == -1) { + /* + * The GC journal head is not set, because it is the first GC + * invocation since mount. + */ + err = switch_gc_head(c); + if (err) + goto out; + } + + /* Write nodes to their new location. Use the first-fit strategy */ + while (1) { + avail = c->leb_size - wbuf->offs - wbuf->used; + list_for_each_entry_safe(snod, tmp, &large, list) { + int new_lnum, new_offs; + + if (avail < min) + break; + + if (snod->len > avail) + /* This node does not fit */ + continue; + + cond_resched(); + + new_lnum = wbuf->lnum; + new_offs = wbuf->offs + wbuf->used; + err = ubifs_wbuf_write_nolock(wbuf, snod->node, + snod->len); + if (err) + goto out; + err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, + snod->offs, new_lnum, new_offs, + snod->len); + if (err) + goto out; + + avail = c->leb_size - wbuf->offs - wbuf->used; + list_del(&snod->list); + kfree(snod); + } + + if (list_empty(&large)) + break; + + /* + * Waste the rest of the space in the LEB and switch to the + * next LEB. + */ + err = switch_gc_head(c); + if (err) + goto out; + } + + return 0; + +out: + list_for_each_entry_safe(snod, tmp, &large, list) { + list_del(&snod->list); + kfree(snod); + } + return err; +} + +/** + * gc_sync_wbufs - sync write-buffers for GC. + * @c: UBIFS file-system description object + * + * We must guarantee that obsoleting nodes are on flash. Unfortunately they may + * be in a write-buffer instead. That is, a node could be written to a + * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is + * erased before the write-buffer is sync'd and then there is an unclean + * unmount, then an existing node is lost. To avoid this, we sync all + * write-buffers. + * + * This function returns %0 on success or a negative error code on failure. + */ +static int gc_sync_wbufs(struct ubifs_info *c) +{ + int err, i; + + for (i = 0; i < c->jhead_cnt; i++) { + if (i == GCHD) + continue; + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + return 0; +} + +/** + * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock. + * @c: UBIFS file-system description object + * @lp: describes the LEB to garbage collect + * + * This function garbage-collects an LEB and returns one of the @LEB_FREED, + * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is + * required, and other negative error codes in case of failures. + */ +int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + int err = 0, lnum = lp->lnum; + + ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 || + c->need_recovery); + ubifs_assert(c->gc_lnum != lnum); + ubifs_assert(wbuf->lnum != lnum); + + /* + * We scan the entire LEB even though we only really need to scan up to + * (c->leb_size - lp->free). + */ + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + + ubifs_assert(!list_empty(&sleb->nodes)); + snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); + + if (snod->type == UBIFS_IDX_NODE) { + struct ubifs_gced_idx_leb *idx_gc; + + dbg_gc("indexing LEB %d (free %d, dirty %d)", + lnum, lp->free, lp->dirty); + list_for_each_entry(snod, &sleb->nodes, list) { + struct ubifs_idx_node *idx = snod->node; + int level = le16_to_cpu(idx->level); + + ubifs_assert(snod->type == UBIFS_IDX_NODE); + key_read(c, ubifs_idx_key(c, idx), &snod->key); + err = ubifs_dirty_idx_node(c, &snod->key, level, lnum, + snod->offs); + if (err) + goto out; + } + + idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); + if (!idx_gc) { + err = -ENOMEM; + goto out; + } + + idx_gc->lnum = lnum; + idx_gc->unmap = 0; + list_add(&idx_gc->list, &c->idx_gc); + + /* + * Don't release the LEB until after the next commit, because + * it may contain date which is needed for recovery. So + * although we freed this LEB, it will become usable only after + * the commit. + */ + err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, + LPROPS_INDEX, 1); + if (err) + goto out; + err = LEB_FREED_IDX; + } else { + dbg_gc("data LEB %d (free %d, dirty %d)", + lnum, lp->free, lp->dirty); + + err = move_nodes(c, sleb); + if (err) + goto out; + + err = gc_sync_wbufs(c); + if (err) + goto out; + + err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); + if (err) + goto out; + + if (c->gc_lnum == -1) { + c->gc_lnum = lnum; + err = LEB_RETAINED; + } else { + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + goto out; + + err = ubifs_leb_unmap(c, lnum); + if (err) + goto out; + + err = LEB_FREED; + } + } + +out: + ubifs_scan_destroy(sleb); + return err; +} + +/** + * ubifs_garbage_collect - UBIFS garbage collector. + * @c: UBIFS file-system description object + * @anyway: do GC even if there are free LEBs + * + * This function does out-of-place garbage collection. The return codes are: + * o positive LEB number if the LEB has been freed and may be used; + * o %-EAGAIN if the caller has to run commit; + * o %-ENOSPC if GC failed to make any progress; + * o other negative error codes in case of other errors. + * + * Garbage collector writes data to the journal when GC'ing data LEBs, and just + * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point + * commit may be required. But commit cannot be run from inside GC, because the + * caller might be holding the commit lock, so %-EAGAIN is returned instead; + * And this error code means that the caller has to run commit, and re-run GC + * if there is still no free space. + * + * There are many reasons why this function may return %-EAGAIN: + * o the log is full and there is no space to write an LEB reference for + * @c->gc_lnum; + * o the journal is too large and exceeds size limitations; + * o GC moved indexing LEBs, but they can be used only after the commit; + * o the shrinker fails to find clean znodes to free and requests the commit; + * o etc. + * + * Note, if the file-system is close to be full, this function may return + * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of + * the function. E.g., this happens if the limits on the journal size are too + * tough and GC writes too much to the journal before an LEB is freed. This + * might also mean that the journal is too large, and the TNC becomes to big, + * so that the shrinker is constantly called, finds not clean znodes to free, + * and requests commit. Well, this may also happen if the journal is all right, + * but another kernel process consumes too much memory. Anyway, infinite + * %-EAGAIN may happen, but in some extreme/misconfiguration cases. + */ +int ubifs_garbage_collect(struct ubifs_info *c, int anyway) +{ + int i, err, ret, min_space = c->dead_wm; + struct ubifs_lprops lp; + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + + ubifs_assert_cmt_locked(c); + + if (ubifs_gc_should_commit(c)) + return -EAGAIN; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + + if (c->ro_media) { + ret = -EROFS; + goto out_unlock; + } + + /* We expect the write-buffer to be empty on entry */ + ubifs_assert(!wbuf->used); + + for (i = 0; ; i++) { + int space_before = c->leb_size - wbuf->offs - wbuf->used; + int space_after; + + cond_resched(); + + /* Give the commit an opportunity to run */ + if (ubifs_gc_should_commit(c)) { + ret = -EAGAIN; + break; + } + + if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) { + /* + * We've done enough iterations. Indexing LEBs were + * moved and will be available after the commit. + */ + dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN"); + ubifs_commit_required(c); + ret = -EAGAIN; + break; + } + + if (i > HARD_LEBS_LIMIT) { + /* + * We've moved too many LEBs and have not made + * progress, give up. + */ + dbg_gc("hard limit, -ENOSPC"); + ret = -ENOSPC; + break; + } + + /* + * Empty and freeable LEBs can turn up while we waited for + * the wbuf lock, or while we have been running GC. In that + * case, we should just return one of those instead of + * continuing to GC dirty LEBs. Hence we request + * 'ubifs_find_dirty_leb()' to return an empty LEB if it can. + */ + ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1); + if (ret) { + if (ret == -ENOSPC) + dbg_gc("no more dirty LEBs"); + break; + } + + dbg_gc("found LEB %d: free %d, dirty %d, sum %d " + "(min. space %d)", lp.lnum, lp.free, lp.dirty, + lp.free + lp.dirty, min_space); + + if (lp.free + lp.dirty == c->leb_size) { + /* An empty LEB was returned */ + dbg_gc("LEB %d is free, return it", lp.lnum); + /* + * ubifs_find_dirty_leb() doesn't return freeable index + * LEBs. + */ + ubifs_assert(!(lp.flags & LPROPS_INDEX)); + if (lp.free != c->leb_size) { + /* + * Write buffers must be sync'd before + * unmapping freeable LEBs, because one of them + * may contain data which obsoletes something + * in 'lp.pnum'. + */ + ret = gc_sync_wbufs(c); + if (ret) + goto out; + ret = ubifs_change_one_lp(c, lp.lnum, + c->leb_size, 0, 0, 0, + 0); + if (ret) + goto out; + } + ret = ubifs_leb_unmap(c, lp.lnum); + if (ret) + goto out; + ret = lp.lnum; + break; + } + + space_before = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum == -1) + space_before = 0; + + ret = ubifs_garbage_collect_leb(c, &lp); + if (ret < 0) { + if (ret == -EAGAIN || ret == -ENOSPC) { + /* + * These codes are not errors, so we have to + * return the LEB to lprops. But if the + * 'ubifs_return_leb()' function fails, its + * failure code is propagated to the caller + * instead of the original '-EAGAIN' or + * '-ENOSPC'. + */ + err = ubifs_return_leb(c, lp.lnum); + if (err) + ret = err; + break; + } + goto out; + } + + if (ret == LEB_FREED) { + /* An LEB has been freed and is ready for use */ + dbg_gc("LEB %d freed, return", lp.lnum); + ret = lp.lnum; + break; + } + + if (ret == LEB_FREED_IDX) { + /* + * This was an indexing LEB and it cannot be + * immediately used. And instead of requesting the + * commit straight away, we try to garbage collect some + * more. + */ + dbg_gc("indexing LEB %d freed, continue", lp.lnum); + continue; + } + + ubifs_assert(ret == LEB_RETAINED); + space_after = c->leb_size - wbuf->offs - wbuf->used; + dbg_gc("LEB %d retained, freed %d bytes", lp.lnum, + space_after - space_before); + + if (space_after > space_before) { + /* GC makes progress, keep working */ + min_space >>= 1; + if (min_space < c->dead_wm) + min_space = c->dead_wm; + continue; + } + + dbg_gc("did not make progress"); + + /* + * GC moved an LEB bud have not done any progress. This means + * that the previous GC head LEB contained too few free space + * and the LEB which was GC'ed contained only large nodes which + * did not fit that space. + * + * We can do 2 things: + * 1. pick another LEB in a hope it'll contain a small node + * which will fit the space we have at the end of current GC + * head LEB, but there is no guarantee, so we try this out + * unless we have already been working for too long; + * 2. request an LEB with more dirty space, which will force + * 'ubifs_find_dirty_leb()' to start scanning the lprops + * table, instead of just picking one from the heap + * (previously it already picked the dirtiest LEB). + */ + if (i < SOFT_LEBS_LIMIT) { + dbg_gc("try again"); + continue; + } + + min_space <<= 1; + if (min_space > c->dark_wm) + min_space = c->dark_wm; + dbg_gc("set min. space to %d", min_space); + } + + if (ret == -ENOSPC && !list_empty(&c->idx_gc)) { + dbg_gc("no space, some index LEBs GC'ed, -EAGAIN"); + ubifs_commit_required(c); + ret = -EAGAIN; + } + + err = ubifs_wbuf_sync_nolock(wbuf); + if (!err) + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) { + ret = err; + goto out; + } +out_unlock: + mutex_unlock(&wbuf->io_mutex); + return ret; + +out: + ubifs_assert(ret < 0); + ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); + ubifs_ro_mode(c, ret); + ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + ubifs_return_leb(c, lp.lnum); + return ret; +} + +/** + * ubifs_gc_start_commit - garbage collection at start of commit. + * @c: UBIFS file-system description object + * + * If a LEB has only dirty and free space, then we may safely unmap it and make + * it free. Note, we cannot do this with indexing LEBs because dirty space may + * correspond index nodes that are required for recovery. In that case, the + * LEB cannot be unmapped until after the next commit. + * + * This function returns %0 upon success and a negative error code upon failure. + */ +int ubifs_gc_start_commit(struct ubifs_info *c) +{ + struct ubifs_gced_idx_leb *idx_gc; + const struct ubifs_lprops *lp; + int err = 0, flags; + + ubifs_get_lprops(c); + + /* + * Unmap (non-index) freeable LEBs. Note that recovery requires that all + * wbufs are sync'd before this, which is done in 'do_commit()'. + */ + while (1) { + lp = ubifs_fast_find_freeable(c); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + goto out; + } + if (!lp) + break; + ubifs_assert(!(lp->flags & LPROPS_TAKEN)); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + err = ubifs_leb_unmap(c, lp->lnum); + if (err) + goto out; + lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + goto out; + } + ubifs_assert(!(lp->flags & LPROPS_TAKEN)); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + } + + /* Mark GC'd index LEBs OK to unmap after this commit finishes */ + list_for_each_entry(idx_gc, &c->idx_gc, list) + idx_gc->unmap = 1; + + /* Record index freeable LEBs for unmapping after commit */ + while (1) { + lp = ubifs_fast_find_frdi_idx(c); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + goto out; + } + if (!lp) + break; + idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); + if (!idx_gc) { + err = -ENOMEM; + goto out; + } + ubifs_assert(!(lp->flags & LPROPS_TAKEN)); + ubifs_assert(lp->flags & LPROPS_INDEX); + /* Don't release the LEB until after the next commit */ + flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; + lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); + if (unlikely(IS_ERR(lp))) { + err = PTR_ERR(lp); + kfree(idx_gc); + goto out; + } + ubifs_assert(lp->flags & LPROPS_TAKEN); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + idx_gc->lnum = lp->lnum; + idx_gc->unmap = 1; + list_add(&idx_gc->list, &c->idx_gc); + } +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_gc_end_commit - garbage collection at end of commit. + * @c: UBIFS file-system description object + * + * This function completes out-of-place garbage collection of index LEBs. + */ +int ubifs_gc_end_commit(struct ubifs_info *c) +{ + struct ubifs_gced_idx_leb *idx_gc, *tmp; + struct ubifs_wbuf *wbuf; + int err = 0; + + wbuf = &c->jheads[GCHD].wbuf; + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list) + if (idx_gc->unmap) { + dbg_gc("LEB %d", idx_gc->lnum); + err = ubifs_leb_unmap(c, idx_gc->lnum); + if (err) + goto out; + err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, + LPROPS_NC, 0, LPROPS_TAKEN, -1); + if (err) + goto out; + list_del(&idx_gc->list); + kfree(idx_gc); + } +out: + mutex_unlock(&wbuf->io_mutex); + return err; +} + +/** + * ubifs_destroy_idx_gc - destroy idx_gc list. + * @c: UBIFS file-system description object + * + * This function destroys the idx_gc list. It is called when unmounting or + * remounting read-only so locks are not needed. + */ +void ubifs_destroy_idx_gc(struct ubifs_info *c) +{ + while (!list_empty(&c->idx_gc)) { + struct ubifs_gced_idx_leb *idx_gc; + + idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, + list); + c->idx_gc_cnt -= 1; + list_del(&idx_gc->list); + kfree(idx_gc); + } + +} + +/** + * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list. + * @c: UBIFS file-system description object + * + * Called during start commit so locks are not needed. + */ +int ubifs_get_idx_gc_leb(struct ubifs_info *c) +{ + struct ubifs_gced_idx_leb *idx_gc; + int lnum; + + if (list_empty(&c->idx_gc)) + return -ENOSPC; + idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); + lnum = idx_gc->lnum; + /* c->idx_gc_cnt is updated by the caller when lprops are updated */ + list_del(&idx_gc->list); + kfree(idx_gc); + return lnum; +} diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c new file mode 100644 index 000000000000..3374f91b6709 --- /dev/null +++ b/fs/ubifs/io.c @@ -0,0 +1,914 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + * Zoltan Sogor + */ + +/* + * This file implements UBIFS I/O subsystem which provides various I/O-related + * helper functions (reading/writing/checking/validating nodes) and implements + * write-buffering support. Write buffers help to save space which otherwise + * would have been wasted for padding to the nearest minimal I/O unit boundary. + * Instead, data first goes to the write-buffer and is flushed when the + * buffer is full or when it is not used for some time (by timer). This is + * similarto the mechanism is used by JFFS2. + * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many + * functions related to write-buffers have "nolock" suffix which means that the + * caller has to lock the write-buffer before calling this function. + * + * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not + * aligned, UBIFS starts the next node from the aligned address, and the padded + * bytes may contain any rubbish. In other words, UBIFS does not put padding + * bytes in those small gaps. Common headers of nodes store real node lengths, + * not aligned lengths. Indexing nodes also store real lengths in branches. + * + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * + * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes + * every time they are read from the flash media. + */ + +#include +#include "ubifs.h" + +/** + * ubifs_check_node - check node. + * @c: UBIFS file-system description object + * @buf: node to check + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * + * This function checks node magic number and CRC checksum. This function also + * validates node length to prevent UBIFS from becoming crazy when an attacker + * feeds it a file-system image with incorrect nodes. For example, too large + * node length in the common header could cause UBIFS to read memory outside of + * allocated buffer when checking the CRC checksum. + * + * This function returns zero in case of success %-EUCLEAN in case of bad CRC + * or magic. + */ +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + int offs, int quiet) +{ + int err = -EINVAL, type, node_len; + uint32_t crc, node_crc, magic; + const struct ubifs_ch *ch = buf; + + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + + magic = le32_to_cpu(ch->magic); + if (magic != UBIFS_NODE_MAGIC) { + if (!quiet) + ubifs_err("bad magic %#08x, expected %#08x", + magic, UBIFS_NODE_MAGIC); + err = -EUCLEAN; + goto out; + } + + type = ch->node_type; + if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { + if (!quiet) + ubifs_err("bad node type %d", type); + goto out; + } + + node_len = le32_to_cpu(ch->len); + if (node_len + offs > c->leb_size) + goto out_len; + + if (c->ranges[type].max_len == 0) { + if (node_len != c->ranges[type].len) + goto out_len; + } else if (node_len < c->ranges[type].min_len || + node_len > c->ranges[type].max_len) + goto out_len; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); + node_crc = le32_to_cpu(ch->crc); + if (crc != node_crc) { + if (!quiet) + ubifs_err("bad CRC: calculated %#08x, read %#08x", + crc, node_crc); + err = -EUCLEAN; + goto out; + } + + return 0; + +out_len: + if (!quiet) + ubifs_err("bad node length %d", node_len); +out: + if (!quiet) { + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + } + return err; +} + +/** + * ubifs_pad - pad flash space. + * @c: UBIFS file-system description object + * @buf: buffer to put padding to + * @pad: how many bytes to pad + * + * The flash media obliges us to write only in chunks of %c->min_io_size and + * when we have to write less data we add padding node to the write-buffer and + * pad it to the next minimal I/O unit's boundary. Padding nodes help when the + * media is being scanned. If the amount of wasted space is not enough to fit a + * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes + * pattern (%UBIFS_PADDING_BYTE). + * + * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is + * used. + */ +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) +{ + uint32_t crc; + + ubifs_assert(pad >= 0 && !(pad & 7)); + + if (pad >= UBIFS_PAD_NODE_SZ) { + struct ubifs_ch *ch = buf; + struct ubifs_pad_node *pad_node = buf; + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->node_type = UBIFS_PAD_NODE; + ch->group_type = UBIFS_NO_NODE_GROUP; + ch->padding[0] = ch->padding[1] = 0; + ch->sqnum = 0; + ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); + pad -= UBIFS_PAD_NODE_SZ; + pad_node->pad_len = cpu_to_le32(pad); + crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); + ch->crc = cpu_to_le32(crc); + memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); + } else if (pad > 0) + /* Too little space, padding node won't fit */ + memset(buf, UBIFS_PADDING_BYTE, pad); +} + +/** + * next_sqnum - get next sequence number. + * @c: UBIFS file-system description object + */ +static unsigned long long next_sqnum(struct ubifs_info *c) +{ + unsigned long long sqnum; + + spin_lock(&c->cnt_lock); + sqnum = ++c->max_sqnum; + spin_unlock(&c->cnt_lock); + + if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { + if (sqnum >= SQNUM_WATERMARK) { + ubifs_err("sequence number overflow %llu, end of life", + sqnum); + ubifs_ro_mode(c, -EINVAL); + } + ubifs_warn("running out of sequence numbers, end of life soon"); + } + + return sqnum; +} + +/** + * ubifs_prepare_node - prepare node to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @pad: if the buffer has to be padded + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC, fills the common header, and adds proper padding up to + * the next minimum I/O unit if @pad is not zero. + */ +void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) +{ + uint32_t crc; + struct ubifs_ch *ch = node; + unsigned long long sqnum = next_sqnum(c); + + ubifs_assert(len >= UBIFS_CH_SZ); + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->len = cpu_to_le32(len); + ch->group_type = UBIFS_NO_NODE_GROUP; + ch->sqnum = cpu_to_le64(sqnum); + ch->padding[0] = ch->padding[1] = 0; + crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); + ch->crc = cpu_to_le32(crc); + + if (pad) { + len = ALIGN(len, 8); + pad = ALIGN(len, c->min_io_size) - len; + ubifs_pad(c, node + len, pad); + } +} + +/** + * ubifs_prep_grp_node - prepare node of a group to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @last: indicates the last node of the group + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC and fills the common header. + */ +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) +{ + uint32_t crc; + struct ubifs_ch *ch = node; + unsigned long long sqnum = next_sqnum(c); + + ubifs_assert(len >= UBIFS_CH_SZ); + + ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); + ch->len = cpu_to_le32(len); + if (last) + ch->group_type = UBIFS_LAST_OF_NODE_GROUP; + else + ch->group_type = UBIFS_IN_NODE_GROUP; + ch->sqnum = cpu_to_le64(sqnum); + ch->padding[0] = ch->padding[1] = 0; + crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); + ch->crc = cpu_to_le32(crc); +} + +/** + * wbuf_timer_callback - write-buffer timer callback function. + * @data: timer data (write-buffer descriptor) + * + * This function is called when the write-buffer timer expires. + */ +static void wbuf_timer_callback_nolock(unsigned long data) +{ + struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; + + wbuf->need_sync = 1; + wbuf->c->need_wbuf_sync = 1; + ubifs_wake_up_bgt(wbuf->c); +} + +/** + * new_wbuf_timer - start new write-buffer timer. + * @wbuf: write-buffer descriptor + */ +static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) +{ + ubifs_assert(!timer_pending(&wbuf->timer)); + + if (!wbuf->timeout) + return; + + wbuf->timer.expires = jiffies + wbuf->timeout; + add_timer(&wbuf->timer); +} + +/** + * cancel_wbuf_timer - cancel write-buffer timer. + * @wbuf: write-buffer descriptor + */ +static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) +{ + /* + * If the syncer is waiting for the lock (from the background thread's + * context) and another task is changing write-buffer then the syncing + * should be canceled. + */ + wbuf->need_sync = 0; + del_timer(&wbuf->timer); +} + +/** + * ubifs_wbuf_sync_nolock - synchronize write-buffer. + * @wbuf: write-buffer to synchronize + * + * This function synchronizes write-buffer @buf and returns zero in case of + * success or a negative error code in case of failure. + */ +int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) +{ + struct ubifs_info *c = wbuf->c; + int err, dirt; + + cancel_wbuf_timer_nolock(wbuf); + if (!wbuf->used || wbuf->lnum == -1) + /* Write-buffer is empty or not seeked */ + return 0; + + dbg_io("LEB %d:%d, %d bytes", + wbuf->lnum, wbuf->offs, wbuf->used); + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); + ubifs_assert(!(wbuf->avail & 7)); + ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); + + if (c->ro_media) + return -EROFS; + + ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + c->min_io_size, wbuf->dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d", + c->min_io_size, wbuf->lnum, wbuf->offs); + dbg_dump_stack(); + return err; + } + + dirt = wbuf->avail; + + spin_lock(&wbuf->lock); + wbuf->offs += c->min_io_size; + wbuf->avail = c->min_io_size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + + if (wbuf->sync_callback) + err = wbuf->sync_callback(c, wbuf->lnum, + c->leb_size - wbuf->offs, dirt); + return err; +} + +/** + * ubifs_wbuf_seek_nolock - seek write-buffer. + * @wbuf: write-buffer + * @lnum: logical eraseblock number to seek to + * @offs: logical eraseblock offset to seek to + * @dtype: data type + * + * This function targets the write buffer to logical eraseblock @lnum:@offs. + * The write-buffer is synchronized if it is not empty. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype) +{ + const struct ubifs_info *c = wbuf->c; + + dbg_io("LEB %d:%d", lnum, offs); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); + ubifs_assert(offs >= 0 && offs <= c->leb_size); + ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); + ubifs_assert(lnum != wbuf->lnum); + + if (wbuf->used > 0) { + int err = ubifs_wbuf_sync_nolock(wbuf); + + if (err) + return err; + } + + spin_lock(&wbuf->lock); + wbuf->lnum = lnum; + wbuf->offs = offs; + wbuf->avail = c->min_io_size; + wbuf->used = 0; + spin_unlock(&wbuf->lock); + wbuf->dtype = dtype; + + return 0; +} + +/** + * ubifs_bg_wbufs_sync - synchronize write-buffers. + * @c: UBIFS file-system description object + * + * This function is called by background thread to synchronize write-buffers. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_bg_wbufs_sync(struct ubifs_info *c) +{ + int err, i; + + if (!c->need_wbuf_sync) + return 0; + c->need_wbuf_sync = 0; + + if (c->ro_media) { + err = -EROFS; + goto out_timers; + } + + dbg_io("synchronize"); + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + cond_resched(); + + /* + * If the mutex is locked then wbuf is being changed, so + * synchronization is not necessary. + */ + if (mutex_is_locked(&wbuf->io_mutex)) + continue; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + if (!wbuf->need_sync) { + mutex_unlock(&wbuf->io_mutex); + continue; + } + + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + if (err) { + ubifs_err("cannot sync write-buffer, error %d", err); + ubifs_ro_mode(c, err); + goto out_timers; + } + } + + return 0; + +out_timers: + /* Cancel all timers to prevent repeated errors */ + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + cancel_wbuf_timer_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + } + return err; +} + +/** + * ubifs_wbuf_write_nolock - write data to flash via write-buffer. + * @wbuf: write-buffer + * @buf: node to write + * @len: node length + * + * This function writes data to flash via write-buffer @wbuf. This means that + * the last piece of the node won't reach the flash media immediately if it + * does not take whole minimal I/O unit. Instead, the node will sit in RAM + * until the write-buffer is synchronized (e.g., by timer). + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the node cannot be written because there is no more + * space in this logical eraseblock, %-ENOSPC is returned. + */ +int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) +{ + struct ubifs_info *c = wbuf->c; + int err, written, n, aligned_len = ALIGN(len, 8), offs; + + dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, + wbuf->offs + wbuf->used); + ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); + ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); + ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); + ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); + ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); + + if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { + err = -ENOSPC; + goto out; + } + + cancel_wbuf_timer_nolock(wbuf); + + if (c->ro_media) + return -EROFS; + + if (aligned_len <= wbuf->avail) { + /* + * The node is not very large and fits entirely within + * write-buffer. + */ + memcpy(wbuf->buf + wbuf->used, buf, len); + + if (aligned_len == wbuf->avail) { + dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, + wbuf->offs); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, + wbuf->offs, c->min_io_size, + wbuf->dtype); + if (err) + goto out; + + spin_lock(&wbuf->lock); + wbuf->offs += c->min_io_size; + wbuf->avail = c->min_io_size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + } else { + spin_lock(&wbuf->lock); + wbuf->avail -= aligned_len; + wbuf->used += aligned_len; + spin_unlock(&wbuf->lock); + } + + goto exit; + } + + /* + * The node is large enough and does not fit entirely within current + * minimal I/O unit. We have to fill and flush write-buffer and switch + * to the next min. I/O unit. + */ + dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); + memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, + c->min_io_size, wbuf->dtype); + if (err) + goto out; + + offs = wbuf->offs + c->min_io_size; + len -= wbuf->avail; + aligned_len -= wbuf->avail; + written = wbuf->avail; + + /* + * The remaining data may take more whole min. I/O units, so write the + * remains multiple to min. I/O unit size directly to the flash media. + * We align node length to 8-byte boundary because we anyway flash wbuf + * if the remaining space is less than 8 bytes. + */ + n = aligned_len >> c->min_io_shift; + if (n) { + n <<= c->min_io_shift; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, + wbuf->dtype); + if (err) + goto out; + offs += n; + aligned_len -= n; + len -= n; + written += n; + } + + spin_lock(&wbuf->lock); + if (aligned_len) + /* + * And now we have what's left and what does not take whole + * min. I/O unit, so write it to the write-buffer and we are + * done. + */ + memcpy(wbuf->buf, buf + written, len); + + wbuf->offs = offs; + wbuf->used = aligned_len; + wbuf->avail = c->min_io_size - aligned_len; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + +exit: + if (wbuf->sync_callback) { + int free = c->leb_size - wbuf->offs - wbuf->used; + + err = wbuf->sync_callback(c, wbuf->lnum, free, 0); + if (err) + goto out; + } + + if (wbuf->used) + new_wbuf_timer_nolock(wbuf); + + return 0; + +out: + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + len, wbuf->lnum, wbuf->offs, err); + dbg_dump_node(c, buf); + dbg_dump_stack(); + dbg_dump_leb(c, wbuf->lnum); + return err; +} + +/** + * ubifs_write_node - write node to the media. + * @c: UBIFS file-system description object + * @buf: the node to write + * @len: node length + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) + * + * This function automatically fills node magic number, assigns sequence + * number, and calculates node CRC checksum. The length of the @buf buffer has + * to be aligned to the minimal I/O unit size. This function automatically + * appends padding node and padding bytes if needed. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int dtype) +{ + int err, buf_len = ALIGN(len, c->min_io_size); + + dbg_io("LEB %d:%d, %s, length %d (aligned %d)", + lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, + buf_len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); + + if (c->ro_media) + return -EROFS; + + ubifs_prepare_node(c, buf, len, 1); + err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", + buf_len, lnum, offs, err); + dbg_dump_node(c, buf); + dbg_dump_stack(); + } + + return err; +} + +/** + * ubifs_read_node_wbuf - read node from the media or write-buffer. + * @wbuf: wbuf to check for un-written data + * @buf: buffer to read to + * @type: node type + * @len: node length + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and length, checks it and stores + * in @buf. If the node partially or fully sits in the write-buffer, this + * function takes data from the buffer, otherwise it reads the flash media. + * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative + * error code in case of failure. + */ +int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + int lnum, int offs) +{ + const struct ubifs_info *c = wbuf->c; + int err, rlen, overlap; + struct ubifs_ch *ch = buf; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + + spin_lock(&wbuf->lock); + overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); + if (!overlap) { + /* We may safely unlock the write-buffer and read the data */ + spin_unlock(&wbuf->lock); + return ubifs_read_node(c, buf, type, len, lnum, offs); + } + + /* Don't read under wbuf */ + rlen = wbuf->offs - offs; + if (rlen < 0) + rlen = 0; + + /* Copy the rest from the write-buffer */ + memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); + spin_unlock(&wbuf->lock); + + if (rlen > 0) { + /* Read everything that goes before write-buffer */ + err = ubi_read(c->ubi, lnum, buf, offs, rlen); + if (err && err != -EBADMSG) { + ubifs_err("failed to read node %d from LEB %d:%d, " + "error %d", type, lnum, offs, err); + dbg_dump_stack(); + return err; + } + } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, type); + goto out; + } + + err = ubifs_check_node(c, buf, lnum, offs, 0); + if (err) { + ubifs_err("expected node type %d", type); + return err; + } + + rlen = le32_to_cpu(ch->len); + if (rlen != len) { + ubifs_err("bad node length %d, expected %d", rlen, len); + goto out; + } + + return 0; + +out: + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return -EINVAL; +} + +/** + * ubifs_read_node - read node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and and length, checks it and + * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched + * and a negative error code in case of failure. + */ +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + int lnum, int offs) +{ + int err, l; + struct ubifs_ch *ch = buf; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) { + ubifs_err("cannot read node %d from LEB %d:%d, error %d", + type, lnum, offs, err); + return err; + } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", + ch->node_type, type); + goto out; + } + + err = ubifs_check_node(c, buf, lnum, offs, 0); + if (err) { + ubifs_err("expected node type %d", type); + return err; + } + + l = le32_to_cpu(ch->len); + if (l != len) { + ubifs_err("bad node length %d, expected %d", l, len); + goto out; + } + + return 0; + +out: + ubifs_err("bad node at LEB %d:%d", lnum, offs); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return -EINVAL; +} + +/** + * ubifs_wbuf_init - initialize write-buffer. + * @c: UBIFS file-system description object + * @wbuf: write-buffer to initialize + * + * This function initializes write buffer. Returns zero in case of success + * %-ENOMEM in case of failure. + */ +int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) +{ + size_t size; + + wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); + if (!wbuf->buf) + return -ENOMEM; + + size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + wbuf->inodes = kmalloc(size, GFP_KERNEL); + if (!wbuf->inodes) { + kfree(wbuf->buf); + wbuf->buf = NULL; + return -ENOMEM; + } + + wbuf->used = 0; + wbuf->lnum = wbuf->offs = -1; + wbuf->avail = c->min_io_size; + wbuf->dtype = UBI_UNKNOWN; + wbuf->sync_callback = NULL; + mutex_init(&wbuf->io_mutex); + spin_lock_init(&wbuf->lock); + + wbuf->c = c; + init_timer(&wbuf->timer); + wbuf->timer.function = wbuf_timer_callback_nolock; + wbuf->timer.data = (unsigned long)wbuf; + wbuf->timeout = DEFAULT_WBUF_TIMEOUT; + wbuf->next_ino = 0; + + return 0; +} + +/** + * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. + * @wbuf: the write-buffer whereto add + * @inum: the inode number + * + * This function adds an inode number to the inode array of the write-buffer. + */ +void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) +{ + if (!wbuf->buf) + /* NOR flash or something similar */ + return; + + spin_lock(&wbuf->lock); + if (wbuf->used) + wbuf->inodes[wbuf->next_ino++] = inum; + spin_unlock(&wbuf->lock); +} + +/** + * wbuf_has_ino - returns if the wbuf contains data from the inode. + * @wbuf: the write-buffer + * @inum: the inode number + * + * This function returns with %1 if the write-buffer contains some data from the + * given inode otherwise it returns with %0. + */ +static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) +{ + int i, ret = 0; + + spin_lock(&wbuf->lock); + for (i = 0; i < wbuf->next_ino; i++) + if (inum == wbuf->inodes[i]) { + ret = 1; + break; + } + spin_unlock(&wbuf->lock); + + return ret; +} + +/** + * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. + * @c: UBIFS file-system description object + * @inode: inode to synchronize + * + * This function synchronizes write-buffers which contain nodes belonging to + * @inode. Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) +{ + int i, err = 0; + + for (i = 0; i < c->jhead_cnt; i++) { + struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; + + if (i == GCHD) + /* + * GC head is special, do not look at it. Even if the + * head contains something related to this inode, it is + * a _copy_ of corresponding on-flash node which sits + * somewhere else. + */ + continue; + + if (!wbuf_has_ino(wbuf, inode->i_ino)) + continue; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + if (wbuf_has_ino(wbuf, inode->i_ino)) + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + + if (err) { + ubifs_ro_mode(c, err); + return err; + } + } + return 0; +} diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c new file mode 100644 index 000000000000..5e82cffe9695 --- /dev/null +++ b/fs/ubifs/ioctl.c @@ -0,0 +1,204 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Zoltan Sogor + * Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* This file implements EXT2-compatible extended attribute ioctl() calls */ + +#include +#include +#include +#include "ubifs.h" + +/** + * ubifs_set_inode_flags - set VFS inode flags. + * @inode: VFS inode to set flags for + * + * This function propagates flags from UBIFS inode object to VFS inode object. + */ +void ubifs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = ubifs_inode(inode)->flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC); + if (flags & UBIFS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & UBIFS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & UBIFS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & UBIFS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +/* + * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags. + * @ioctl_flags: flags to convert + * + * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags + * (@UBIFS_COMPR_FL, etc). + */ +static int ioctl2ubifs(int ioctl_flags) +{ + int ubifs_flags = 0; + + if (ioctl_flags & FS_COMPR_FL) + ubifs_flags |= UBIFS_COMPR_FL; + if (ioctl_flags & FS_SYNC_FL) + ubifs_flags |= UBIFS_SYNC_FL; + if (ioctl_flags & FS_APPEND_FL) + ubifs_flags |= UBIFS_APPEND_FL; + if (ioctl_flags & FS_IMMUTABLE_FL) + ubifs_flags |= UBIFS_IMMUTABLE_FL; + if (ioctl_flags & FS_DIRSYNC_FL) + ubifs_flags |= UBIFS_DIRSYNC_FL; + + return ubifs_flags; +} + +/* + * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags. + * @ubifs_flags: flags to convert + * + * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags + * (@FS_COMPR_FL, etc). + */ +static int ubifs2ioctl(int ubifs_flags) +{ + int ioctl_flags = 0; + + if (ubifs_flags & UBIFS_COMPR_FL) + ioctl_flags |= FS_COMPR_FL; + if (ubifs_flags & UBIFS_SYNC_FL) + ioctl_flags |= FS_SYNC_FL; + if (ubifs_flags & UBIFS_APPEND_FL) + ioctl_flags |= FS_APPEND_FL; + if (ubifs_flags & UBIFS_IMMUTABLE_FL) + ioctl_flags |= FS_IMMUTABLE_FL; + if (ubifs_flags & UBIFS_DIRSYNC_FL) + ioctl_flags |= FS_DIRSYNC_FL; + + return ioctl_flags; +} + +static int setflags(struct inode *inode, int flags) +{ + int oldflags, err, release; + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ui->data_len }; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + */ + mutex_lock(&ui->ui_mutex); + oldflags = ubifs2ioctl(ui->flags); + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto out_unlock; + } + } + + ui->flags = ioctl2ubifs(flags); + ubifs_set_inode_flags(inode); + inode->i_ctime = ubifs_current_time(inode); + release = ui->dirty; + mark_inode_dirty_sync(inode); + mutex_unlock(&ui->ui_mutex); + + if (release) + ubifs_release_budget(c, &req); + if (IS_SYNC(inode)) + err = write_inode_now(inode, 1); + return err; + +out_unlock: + ubifs_err("can't modify inode %lu attributes", inode->i_ino); + mutex_unlock(&ui->ui_mutex); + ubifs_release_budget(c, &req); + return err; +} + +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int flags, err; + struct inode *inode = file->f_path.dentry->d_inode; + + switch (cmd) { + case FS_IOC_GETFLAGS: + flags = ubifs2ioctl(ubifs_inode(inode)->flags); + + return put_user(flags, (int __user *) arg); + + case FS_IOC_SETFLAGS: { + if (IS_RDONLY(inode)) + return -EROFS; + + if (!is_owner_or_cap(inode)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + if (!S_ISDIR(inode->i_mode)) + flags &= ~FS_DIRSYNC_FL; + + /* + * Make sure the file-system is read-write and make sure it + * will not become read-only while we are changing the flags. + */ + err = mnt_want_write(file->f_path.mnt); + if (err) + return err; + err = setflags(inode, flags); + mnt_drop_write(file->f_path.mnt); + return err; + } + + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + default: + return -ENOIOCTLCMD; + } + return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c new file mode 100644 index 000000000000..283155abe5f5 --- /dev/null +++ b/fs/ubifs/journal.c @@ -0,0 +1,1387 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS journal. + * + * The journal consists of 2 parts - the log and bud LEBs. The log has fixed + * length and position, while a bud logical eraseblock is any LEB in the main + * area. Buds contain file system data - data nodes, inode nodes, etc. The log + * contains only references to buds and some other stuff like commit + * start node. The idea is that when we commit the journal, we do + * not copy the data, the buds just become indexed. Since after the commit the + * nodes in bud eraseblocks become leaf nodes of the file system index tree, we + * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will + * become leafs in the future. + * + * The journal is multi-headed because we want to write data to the journal as + * optimally as possible. It is nice to have nodes belonging to the same inode + * in one LEB, so we may write data owned by different inodes to different + * journal heads, although at present only one data head is used. + * + * For recovery reasons, the base head contains all inode nodes, all directory + * entry nodes and all truncate nodes. This means that the other heads contain + * only data nodes. + * + * Bud LEBs may be half-indexed. For example, if the bud was not full at the + * time of commit, the bud is retained to continue to be used in the journal, + * even though the "front" of the LEB is now indexed. In that case, the log + * reference contains the offset where the bud starts for the purposes of the + * journal. + * + * The journal size has to be limited, because the larger is the journal, the + * longer it takes to mount UBIFS (scanning the journal) and the more memory it + * takes (indexing in the TNC). + * + * All the journal write operations like 'ubifs_jnl_update()' here, which write + * multiple UBIFS nodes to the journal at one go, are atomic with respect to + * unclean reboots. Should the unclean reboot happen, the recovery code drops + * all the nodes. + */ + +#include "ubifs.h" + +/** + * zero_ino_node_unused - zero out unused fields of an on-flash inode node. + * @ino: the inode to zero out + */ +static inline void zero_ino_node_unused(struct ubifs_ino_node *ino) +{ + memset(ino->padding1, 0, 4); + memset(ino->padding2, 0, 26); +} + +/** + * zero_dent_node_unused - zero out unused fields of an on-flash directory + * entry node. + * @dent: the directory entry to zero out + */ +static inline void zero_dent_node_unused(struct ubifs_dent_node *dent) +{ + dent->padding1 = 0; + memset(dent->padding2, 0, 4); +} + +/** + * zero_data_node_unused - zero out unused fields of an on-flash data node. + * @data: the data node to zero out + */ +static inline void zero_data_node_unused(struct ubifs_data_node *data) +{ + memset(data->padding, 0, 2); +} + +/** + * zero_trun_node_unused - zero out unused fields of an on-flash truncation + * node. + * @trun: the truncation node to zero out + */ +static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) +{ + memset(trun->padding, 0, 12); +} + +/** + * reserve_space - reserve space in the journal. + * @c: UBIFS file-system description object + * @jhead: journal head number + * @len: node length + * + * This function reserves space in journal head @head. If the reservation + * succeeded, the journal head stays locked and later has to be unlocked using + * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock + * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and + * other negative error codes in case of other failures. + */ +static int reserve_space(struct ubifs_info *c, int jhead, int len) +{ + int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + /* + * Typically, the base head has smaller nodes written to it, so it is + * better to try to allocate space at the ends of eraseblocks. This is + * what the squeeze parameter does. + */ + squeeze = (jhead == BASEHD); +again: + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + + if (c->ro_media) { + err = -EROFS; + goto out_unlock; + } + + avail = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum != -1 && avail >= len) + return 0; + + /* + * Write buffer wasn't seek'ed or there is no enough space - look for an + * LEB with some empty space. + */ + lnum = ubifs_find_free_space(c, len, &free, squeeze); + if (lnum >= 0) { + /* Found an LEB, add it to the journal head */ + offs = c->leb_size - free; + err = ubifs_add_bud_to_log(c, jhead, lnum, offs); + if (err) + goto out_return; + /* A new bud was successfully allocated and added to the log */ + goto out; + } + + err = lnum; + if (err != -ENOSPC) + goto out_unlock; + + /* + * No free space, we have to run garbage collector to make + * some. But the write-buffer mutex has to be unlocked because + * GC also takes it. + */ + dbg_jnl("no free space jhead %d, run GC", jhead); + mutex_unlock(&wbuf->io_mutex); + + lnum = ubifs_garbage_collect(c, 0); + if (lnum < 0) { + err = lnum; + if (err != -ENOSPC) + return err; + + /* + * GC could not make a free LEB. But someone else may + * have allocated new bud for this journal head, + * because we dropped @wbuf->io_mutex, so try once + * again. + */ + dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); + if (retries++ < 2) { + dbg_jnl("retry (%d)", retries); + goto again; + } + + dbg_jnl("return -ENOSPC"); + return err; + } + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + dbg_jnl("got LEB %d for jhead %d", lnum, jhead); + avail = c->leb_size - wbuf->offs - wbuf->used; + + if (wbuf->lnum != -1 && avail >= len) { + /* + * Someone else has switched the journal head and we have + * enough space now. This happens when more then one process is + * trying to write to the same journal head at the same time. + */ + dbg_jnl("return LEB %d back, already have LEB %d:%d", + lnum, wbuf->lnum, wbuf->offs + wbuf->used); + err = ubifs_return_leb(c, lnum); + if (err) + goto out_unlock; + return 0; + } + + err = ubifs_add_bud_to_log(c, jhead, lnum, 0); + if (err) + goto out_return; + offs = 0; + +out: + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); + if (err) + goto out_unlock; + + return 0; + +out_unlock: + mutex_unlock(&wbuf->io_mutex); + return err; + +out_return: + /* An error occurred and the LEB has to be returned to lprops */ + ubifs_assert(err < 0); + err1 = ubifs_return_leb(c, lnum); + if (err1 && err == -EAGAIN) + /* + * Return original error code only if it is not %-EAGAIN, + * which is not really an error. Otherwise, return the error + * code of 'ubifs_return_leb()'. + */ + err = err1; + mutex_unlock(&wbuf->io_mutex); + return err; +} + +/** + * write_node - write node to a journal head. + * @c: UBIFS file-system description object + * @jhead: journal head + * @node: node to write + * @len: node length + * @lnum: LEB number written is returned here + * @offs: offset written is returned here + * + * This function writes a node to reserved space of journal head @jhead. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int write_node(struct ubifs_info *c, int jhead, void *node, int len, + int *lnum, int *offs) +{ + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + ubifs_assert(jhead != GCHD); + + *lnum = c->jheads[jhead].wbuf.lnum; + *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; + + dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + ubifs_prepare_node(c, node, len, 0); + + return ubifs_wbuf_write_nolock(wbuf, node, len); +} + +/** + * write_head - write data to a journal head. + * @c: UBIFS file-system description object + * @jhead: journal head + * @buf: buffer to write + * @len: length to write + * @lnum: LEB number written is returned here + * @offs: offset written is returned here + * @sync: non-zero if the write-buffer has to by synchronized + * + * This function is the same as 'write_node()' but it does not assume the + * buffer it is writing is a node, so it does not prepare it (which means + * initializing common header and calculating CRC). + */ +static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, + int *lnum, int *offs, int sync) +{ + int err; + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + ubifs_assert(jhead != GCHD); + + *lnum = c->jheads[jhead].wbuf.lnum; + *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; + dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + + err = ubifs_wbuf_write_nolock(wbuf, buf, len); + if (err) + return err; + if (sync) + err = ubifs_wbuf_sync_nolock(wbuf); + return err; +} + +/** + * make_reservation - reserve journal space. + * @c: UBIFS file-system description object + * @jhead: journal head + * @len: how many bytes to reserve + * + * This function makes space reservation in journal head @jhead. The function + * takes the commit lock and locks the journal head, and the caller has to + * unlock the head and finish the reservation with 'finish_reservation()'. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * Note, the journal head may be unlocked as soon as the data is written, while + * the commit lock has to be released after the data has been added to the + * TNC. + */ +static int make_reservation(struct ubifs_info *c, int jhead, int len) +{ + int err, cmt_retries = 0, nospc_retries = 0; + +again: + down_read(&c->commit_sem); + err = reserve_space(c, jhead, len); + if (!err) + return 0; + up_read(&c->commit_sem); + + if (err == -ENOSPC) { + /* + * GC could not make any progress. We should try to commit + * once because it could make some dirty space and GC would + * make progress, so make the error -EAGAIN so that the below + * will commit and re-try. + */ + if (nospc_retries++ < 2) { + dbg_jnl("no space, retry"); + err = -EAGAIN; + } + + /* + * This means that the budgeting is incorrect. We always have + * to be able to write to the media, because all operations are + * budgeted. Deletions are not budgeted, though, but we reserve + * an extra LEB for them. + */ + } + + if (err != -EAGAIN) + goto out; + + /* + * -EAGAIN means that the journal is full or too large, or the above + * code wants to do one commit. Do this and re-try. + */ + if (cmt_retries > 128) { + /* + * This should not happen unless the journal size limitations + * are too tough. + */ + ubifs_err("stuck in space allocation"); + err = -ENOSPC; + goto out; + } else if (cmt_retries > 32) + ubifs_warn("too many space allocation re-tries (%d)", + cmt_retries); + + dbg_jnl("-EAGAIN, commit and retry (retried %d times)", + cmt_retries); + cmt_retries += 1; + + err = ubifs_run_commit(c); + if (err) + return err; + goto again; + +out: + ubifs_err("cannot reserve %d bytes in jhead %d, error %d", + len, jhead, err); + if (err == -ENOSPC) { + /* This are some budgeting problems, print useful information */ + down_write(&c->commit_sem); + spin_lock(&c->space_lock); + dbg_dump_stack(); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dbg_dump_lprops(c); + cmt_retries = dbg_check_lprops(c); + up_write(&c->commit_sem); + } + return err; +} + +/** + * release_head - release a journal head. + * @c: UBIFS file-system description object + * @jhead: journal head + * + * This function releases journal head @jhead which was locked by + * the 'make_reservation()' function. It has to be called after each successful + * 'make_reservation()' invocation. + */ +static inline void release_head(struct ubifs_info *c, int jhead) +{ + mutex_unlock(&c->jheads[jhead].wbuf.io_mutex); +} + +/** + * finish_reservation - finish a reservation. + * @c: UBIFS file-system description object + * + * This function finishes journal space reservation. It must be called after + * 'make_reservation()'. + */ +static void finish_reservation(struct ubifs_info *c) +{ + up_read(&c->commit_sem); +} + +/** + * get_dent_type - translate VFS inode mode to UBIFS directory entry type. + * @mode: inode mode + */ +static int get_dent_type(int mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + return UBIFS_ITYPE_REG; + case S_IFDIR: + return UBIFS_ITYPE_DIR; + case S_IFLNK: + return UBIFS_ITYPE_LNK; + case S_IFBLK: + return UBIFS_ITYPE_BLK; + case S_IFCHR: + return UBIFS_ITYPE_CHR; + case S_IFIFO: + return UBIFS_ITYPE_FIFO; + case S_IFSOCK: + return UBIFS_ITYPE_SOCK; + default: + BUG(); + } + return 0; +} + +/** + * pack_inode - pack an inode node. + * @c: UBIFS file-system description object + * @ino: buffer in which to pack inode node + * @inode: inode to pack + * @last: indicates the last node of the group + * @last_reference: non-zero if this is a deletion inode + */ +static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, + const struct inode *inode, int last, + int last_reference) +{ + int data_len = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + + ino->ch.node_type = UBIFS_INO_NODE; + ino_key_init_flash(c, &ino->key, inode->i_ino); + ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); + ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); + ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); + ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); + ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ino->uid = cpu_to_le32(inode->i_uid); + ino->gid = cpu_to_le32(inode->i_gid); + ino->mode = cpu_to_le32(inode->i_mode); + ino->flags = cpu_to_le32(ui->flags); + ino->size = cpu_to_le64(ui->ui_size); + ino->nlink = cpu_to_le32(inode->i_nlink); + ino->compr_type = cpu_to_le16(ui->compr_type); + ino->data_len = cpu_to_le32(ui->data_len); + ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt); + ino->xattr_size = cpu_to_le32(ui->xattr_size); + ino->xattr_names = cpu_to_le32(ui->xattr_names); + zero_ino_node_unused(ino); + + /* + * Drop the attached data if this is a deletion inode, the data is not + * needed anymore. + */ + if (!last_reference) { + memcpy(ino->data, ui->data, ui->data_len); + data_len = ui->data_len; + } + + ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last); +} + +/** + * mark_inode_clean - mark UBIFS inode as clean. + * @c: UBIFS file-system description object + * @ui: UBIFS inode to mark as clean + * + * This helper function marks UBIFS inode @ui as clean by cleaning the + * @ui->dirty flag and releasing its budget. Note, VFS may still treat the + * inode as dirty and try to write it back, but 'ubifs_write_inode()' would + * just do nothing. + */ +static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui) +{ + if (ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); + ui->dirty = 0; +} + +/** + * ubifs_jnl_update - update inode. + * @c: UBIFS file-system description object + * @dir: parent inode or host inode in case of extended attributes + * @nm: directory entry name + * @inode: inode to update + * @deletion: indicates a directory entry deletion i.e unlink or rmdir + * @xent: non-zero if the directory entry is an extended attribute entry + * + * This function updates an inode by writing a directory entry (or extended + * attribute entry), the inode itself, and the parent directory inode (or the + * host inode) to the journal. + * + * The function writes the host inode @dir last, which is important in case of + * extended attributes. Indeed, then we guarantee that if the host inode gets + * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed, + * the extended attribute inode gets flushed too. And this is exactly what the + * user expects - synchronizing the host inode synchronizes its extended + * attributes. Similarly, this guarantees that if @dir is synchronized, its + * directory entry corresponding to @nm gets synchronized too. + * + * If the inode (@inode) or the parent directory (@dir) are synchronous, this + * function synchronizes the write-buffer. + * + * This function marks the @dir and @inode inodes as clean and returns zero on + * success. In case of failure, a negative error code is returned. + */ +int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, + const struct qstr *nm, const struct inode *inode, + int deletion, int xent) +{ + int err, dlen, ilen, len, lnum, ino_offs, dent_offs; + int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); + int last_reference = !!(deletion && inode->i_nlink == 0); + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_dent_node *dent; + struct ubifs_ino_node *ino; + union ubifs_key dent_key, ino_key; + + dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu", + inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino); + ubifs_assert(dir_ui->data_len == 0); + ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex)); + + dlen = UBIFS_DENT_NODE_SZ + nm->len + 1; + ilen = UBIFS_INO_NODE_SZ; + + /* + * If the last reference to the inode is being deleted, then there is + * no need to attach and write inode data, it is being deleted anyway. + * And if the inode is being deleted, no need to synchronize + * write-buffer even if the inode is synchronous. + */ + if (!last_reference) { + ilen += ui->data_len; + sync |= IS_SYNC(inode); + } + + aligned_dlen = ALIGN(dlen, 8); + aligned_ilen = ALIGN(ilen, 8); + len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; + dent = kmalloc(len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + if (!xent) { + dent->ch.node_type = UBIFS_DENT_NODE; + dent_key_init(c, &dent_key, dir->i_ino, nm); + } else { + dent->ch.node_type = UBIFS_XENT_NODE; + xent_key_init(c, &dent_key, dir->i_ino, nm); + } + + key_write(c, &dent_key, dent->key); + dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino); + dent->type = get_dent_type(inode->i_mode); + dent->nlen = cpu_to_le16(nm->len); + memcpy(dent->name, nm->name, nm->len); + dent->name[nm->len] = '\0'; + zero_dent_node_unused(dent); + ubifs_prep_grp_node(c, dent, dlen, 0); + + ino = (void *)dent + aligned_dlen; + pack_inode(c, ino, inode, 0, last_reference); + ino = (void *)ino + aligned_ilen; + pack_inode(c, ino, dir, 1, 0); + + if (last_reference) { + err = ubifs_add_orphan(c, inode->i_ino); + if (err) { + release_head(c, BASEHD); + goto out_finish; + } + } + + err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); + if (err) + goto out_release; + if (!sync) { + struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; + + ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); + ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino); + } + release_head(c, BASEHD); + kfree(dent); + + if (deletion) { + err = ubifs_tnc_remove_nm(c, &dent_key, nm); + if (err) + goto out_ro; + err = ubifs_add_dirt(c, lnum, dlen); + } else + err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm); + if (err) + goto out_ro; + + /* + * Note, we do not remove the inode from TNC even if the last reference + * to it has just been deleted, because the inode may still be opened. + * Instead, the inode has been added to orphan lists and the orphan + * subsystem will take further care about it. + */ + ino_key_init(c, &ino_key, inode->i_ino); + ino_offs = dent_offs + aligned_dlen; + err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen); + if (err) + goto out_ro; + + ino_key_init(c, &ino_key, dir->i_ino); + ino_offs += aligned_ilen; + err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&ui->ui_lock); + ui->synced_i_size = ui->ui_size; + spin_unlock(&ui->ui_lock); + mark_inode_clean(c, ui); + mark_inode_clean(c, dir_ui); + return 0; + +out_finish: + finish_reservation(c); +out_free: + kfree(dent); + return err; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + if (last_reference) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); + return err; +} + +/** + * ubifs_jnl_write_data - write a data node to the journal. + * @c: UBIFS file-system description object + * @inode: inode the data node belongs to + * @key: node key + * @buf: buffer to write + * @len: data length (must not exceed %UBIFS_BLOCK_SIZE) + * + * This function writes a data node to the journal. Returns %0 if the data node + * was successfully written, and a negative error code in case of failure. + */ +int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + const union ubifs_key *key, const void *buf, int len) +{ + struct ubifs_data_node *data; + int err, lnum, offs, compr_type, out_len; + int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key), + key_block(c, key), len, DBGKEY(key)); + ubifs_assert(len <= UBIFS_BLOCK_SIZE); + + data = kmalloc(dlen, GFP_NOFS); + if (!data) + return -ENOMEM; + + data->ch.node_type = UBIFS_DATA_NODE; + key_write(c, key, &data->key); + data->size = cpu_to_le32(len); + zero_data_node_unused(data); + + if (!(ui->flags && UBIFS_COMPR_FL)) + /* Compression is disabled for this inode */ + compr_type = UBIFS_COMPR_NONE; + else + compr_type = ui->compr_type; + + out_len = dlen - UBIFS_DATA_NODE_SZ; + ubifs_compress(buf, len, &data->data, &out_len, &compr_type); + ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); + + dlen = UBIFS_DATA_NODE_SZ + out_len; + data->compr_type = cpu_to_le16(compr_type); + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, DATAHD, dlen); + if (err) + goto out_free; + + err = write_node(c, DATAHD, data, dlen, &lnum, &offs); + if (err) + goto out_release; + ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key)); + release_head(c, DATAHD); + + err = ubifs_tnc_add(c, key, lnum, offs, dlen); + if (err) + goto out_ro; + + finish_reservation(c); + kfree(data); + return 0; + +out_release: + release_head(c, DATAHD); +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(data); + return err; +} + +/** + * ubifs_jnl_write_inode - flush inode to the journal. + * @c: UBIFS file-system description object + * @inode: inode to flush + * @deletion: inode has been deleted + * + * This function writes inode @inode to the journal. If the inode is + * synchronous, it also synchronizes the write-buffer. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, + int deletion) +{ + int err, len, lnum, offs, sync = 0; + struct ubifs_ino_node *ino; + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_jnl("ino %lu%s", inode->i_ino, + deletion ? " (last reference)" : ""); + if (deletion) + ubifs_assert(inode->i_nlink == 0); + + len = UBIFS_INO_NODE_SZ; + /* + * If the inode is being deleted, do not write the attached data. No + * need to synchronize the write-buffer either. + */ + if (!deletion) { + len += ui->data_len; + sync = IS_SYNC(inode); + } + ino = kmalloc(len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + pack_inode(c, ino, inode, 1, deletion); + err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); + if (err) + goto out_release; + if (!sync) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + inode->i_ino); + release_head(c, BASEHD); + + if (deletion) { + err = ubifs_tnc_remove_ino(c, inode->i_ino); + if (err) + goto out_ro; + ubifs_delete_orphan(c, inode->i_ino); + err = ubifs_add_dirt(c, lnum, len); + } else { + union ubifs_key key; + + ino_key_init(c, &key, inode->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, len); + } + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&ui->ui_lock); + ui->synced_i_size = ui->ui_size; + spin_unlock(&ui->ui_lock); + kfree(ino); + return 0; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(ino); + return err; +} + +/** + * ubifs_jnl_rename - rename a directory entry. + * @c: UBIFS file-system description object + * @old_dir: parent inode of directory entry to rename + * @old_dentry: directory entry to rename + * @new_dir: parent inode of directory entry to rename + * @new_dentry: new directory entry (or directory entry to replace) + * @sync: non-zero if the write-buffer has to be synchronized + * + * This function implements the re-name operation which may involve writing up + * to 3 inodes and 2 directory entries. It marks the written inodes as clean + * and returns zero on success. In case of failure, a negative error code is + * returned. + */ +int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, int sync) +{ + void *p; + union ubifs_key key; + struct ubifs_dent_node *dent, *dent2; + int err, dlen1, dlen2, ilen, lnum, offs, len; + const struct inode *old_inode = old_dentry->d_inode; + const struct inode *new_inode = new_dentry->d_inode; + int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; + int last_reference = !!(new_inode && new_inode->i_nlink == 0); + int move = (old_dir != new_dir); + struct ubifs_inode *uninitialized_var(new_ui); + + dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", + old_dentry->d_name.len, old_dentry->d_name.name, + old_dir->i_ino, new_dentry->d_name.len, + new_dentry->d_name.name, new_dir->i_ino); + ubifs_assert(ubifs_inode(old_dir)->data_len == 0); + ubifs_assert(ubifs_inode(new_dir)->data_len == 0); + ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); + ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex)); + + dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1; + dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1; + if (new_inode) { + new_ui = ubifs_inode(new_inode); + ubifs_assert(mutex_is_locked(&new_ui->ui_mutex)); + ilen = UBIFS_INO_NODE_SZ; + if (!last_reference) + ilen += new_ui->data_len; + } else + ilen = 0; + + aligned_dlen1 = ALIGN(dlen1, 8); + aligned_dlen2 = ALIGN(dlen2, 8); + len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); + if (old_dir != new_dir) + len += plen; + dent = kmalloc(len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + /* Make new dent */ + dent->ch.node_type = UBIFS_DENT_NODE; + dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name); + dent->inum = cpu_to_le64(old_inode->i_ino); + dent->type = get_dent_type(old_inode->i_mode); + dent->nlen = cpu_to_le16(new_dentry->d_name.len); + memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len); + dent->name[new_dentry->d_name.len] = '\0'; + zero_dent_node_unused(dent); + ubifs_prep_grp_node(c, dent, dlen1, 0); + + /* Make deletion dent */ + dent2 = (void *)dent + aligned_dlen1; + dent2->ch.node_type = UBIFS_DENT_NODE; + dent_key_init_flash(c, &dent2->key, old_dir->i_ino, + &old_dentry->d_name); + dent2->inum = 0; + dent2->type = DT_UNKNOWN; + dent2->nlen = cpu_to_le16(old_dentry->d_name.len); + memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len); + dent2->name[old_dentry->d_name.len] = '\0'; + zero_dent_node_unused(dent2); + ubifs_prep_grp_node(c, dent2, dlen2, 0); + + p = (void *)dent2 + aligned_dlen2; + if (new_inode) { + pack_inode(c, p, new_inode, 0, last_reference); + p += ALIGN(ilen, 8); + } + + if (!move) + pack_inode(c, p, old_dir, 1, 0); + else { + pack_inode(c, p, old_dir, 0, 0); + p += ALIGN(plen, 8); + pack_inode(c, p, new_dir, 1, 0); + } + + if (last_reference) { + err = ubifs_add_orphan(c, new_inode->i_ino); + if (err) { + release_head(c, BASEHD); + goto out_finish; + } + } + + err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); + if (err) + goto out_release; + if (!sync) { + struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; + + ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino); + ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino); + if (new_inode) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + new_inode->i_ino); + } + release_head(c, BASEHD); + + dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name); + err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name); + if (err) + goto out_ro; + + err = ubifs_add_dirt(c, lnum, dlen2); + if (err) + goto out_ro; + + dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name); + err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name); + if (err) + goto out_ro; + + offs += aligned_dlen1 + aligned_dlen2; + if (new_inode) { + ino_key_init(c, &key, new_inode->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, ilen); + if (err) + goto out_ro; + offs += ALIGN(ilen, 8); + } + + ino_key_init(c, &key, old_dir->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, plen); + if (err) + goto out_ro; + + if (old_dir != new_dir) { + offs += ALIGN(plen, 8); + ino_key_init(c, &key, new_dir->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, plen); + if (err) + goto out_ro; + } + + finish_reservation(c); + if (new_inode) { + mark_inode_clean(c, new_ui); + spin_lock(&new_ui->ui_lock); + new_ui->synced_i_size = new_ui->ui_size; + spin_unlock(&new_ui->ui_lock); + } + mark_inode_clean(c, ubifs_inode(old_dir)); + if (move) + mark_inode_clean(c, ubifs_inode(new_dir)); + kfree(dent); + return 0; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + if (last_reference) + ubifs_delete_orphan(c, new_inode->i_ino); +out_finish: + finish_reservation(c); +out_free: + kfree(dent); + return err; +} + +/** + * recomp_data_node - re-compress a truncated data node. + * @dn: data node to re-compress + * @new_len: new length + * + * This function is used when an inode is truncated and the last data node of + * the inode has to be re-compressed and re-written. + */ +static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) +{ + void *buf; + int err, len, compr_type, out_len; + + out_len = le32_to_cpu(dn->size); + buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); + if (!buf) + return -ENOMEM; + + len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + compr_type = le16_to_cpu(dn->compr_type); + err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); + if (err) + goto out; + + ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); + ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); + dn->compr_type = cpu_to_le16(compr_type); + dn->size = cpu_to_le32(*new_len); + *new_len = UBIFS_DATA_NODE_SZ + out_len; +out: + kfree(buf); + return err; +} + +/** + * ubifs_jnl_truncate - update the journal for a truncation. + * @c: UBIFS file-system description object + * @inode: inode to truncate + * @old_size: old size + * @new_size: new size + * + * When the size of a file decreases due to truncation, a truncation node is + * written, the journal tree is updated, and the last data block is re-written + * if it has been affected. The inode is also updated in order to synchronize + * the new inode size. + * + * This function marks the inode as clean and returns zero on success. In case + * of failure, a negative error code is returned. + */ +int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + loff_t old_size, loff_t new_size) +{ + union ubifs_key key, to_key; + struct ubifs_ino_node *ino; + struct ubifs_trun_node *trun; + struct ubifs_data_node *uninitialized_var(dn); + int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); + struct ubifs_inode *ui = ubifs_inode(inode); + ino_t inum = inode->i_ino; + unsigned int blk; + + dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size); + ubifs_assert(!ui->data_len); + ubifs_assert(S_ISREG(inode->i_mode)); + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + + sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + + UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; + ino = kmalloc(sz, GFP_NOFS); + if (!ino) + return -ENOMEM; + + trun = (void *)ino + UBIFS_INO_NODE_SZ; + trun->ch.node_type = UBIFS_TRUN_NODE; + trun->inum = cpu_to_le32(inum); + trun->old_size = cpu_to_le64(old_size); + trun->new_size = cpu_to_le64(new_size); + zero_trun_node_unused(trun); + + dlen = new_size & (UBIFS_BLOCK_SIZE - 1); + if (dlen) { + /* Get last data block so it can be truncated */ + dn = (void *)trun + UBIFS_TRUN_NODE_SZ; + blk = new_size >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &key, inum, blk); + dbg_jnl("last block key %s", DBGKEY(&key)); + err = ubifs_tnc_lookup(c, &key, dn); + if (err == -ENOENT) + dlen = 0; /* Not found (so it is a hole) */ + else if (err) + goto out_free; + else { + if (le32_to_cpu(dn->size) <= dlen) + dlen = 0; /* Nothing to do */ + else { + int compr_type = le16_to_cpu(dn->compr_type); + + if (compr_type != UBIFS_COMPR_NONE) { + err = recomp_data_node(dn, &dlen); + if (err) + goto out_free; + } else { + dn->size = cpu_to_le32(dlen); + dlen += UBIFS_DATA_NODE_SZ; + } + zero_data_node_unused(dn); + } + } + } + + /* Must make reservation before allocating sequence numbers */ + len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ; + if (dlen) + len += dlen; + err = make_reservation(c, BASEHD, len); + if (err) + goto out_free; + + pack_inode(c, ino, inode, 0, 0); + ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); + if (dlen) + ubifs_prep_grp_node(c, dn, dlen, 1); + + err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); + if (err) + goto out_release; + if (!sync) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum); + release_head(c, BASEHD); + + if (dlen) { + sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ; + err = ubifs_tnc_add(c, &key, lnum, sz, dlen); + if (err) + goto out_ro; + } + + ino_key_init(c, &key, inum); + err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ); + if (err) + goto out_ro; + + err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ); + if (err) + goto out_ro; + + bit = new_size & (UBIFS_BLOCK_SIZE - 1); + blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0); + data_key_init(c, &key, inum, blk); + + bit = old_size & (UBIFS_BLOCK_SIZE - 1); + blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); + data_key_init(c, &to_key, inum, blk); + + err = ubifs_tnc_remove_range(c, &key, &to_key); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&ui->ui_lock); + ui->synced_i_size = ui->ui_size; + spin_unlock(&ui->ui_lock); + mark_inode_clean(c, ui); + kfree(ino); + return 0; + +out_release: + release_head(c, BASEHD); +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(ino); + return err; +} + +#ifdef CONFIG_UBIFS_FS_XATTR + +/** + * ubifs_jnl_delete_xattr - delete an extended attribute. + * @c: UBIFS file-system description object + * @host: host inode + * @inode: extended attribute inode + * @nm: extended attribute entry name + * + * This function delete an extended attribute which is very similar to + * un-linking regular files - it writes a deletion xentry, a deletion inode and + * updates the target inode. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, + const struct inode *inode, const struct qstr *nm) +{ + int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen; + struct ubifs_dent_node *xent; + struct ubifs_ino_node *ino; + union ubifs_key xent_key, key1, key2; + int sync = IS_DIRSYNC(host); + struct ubifs_inode *host_ui = ubifs_inode(host); + + dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d", + host->i_ino, inode->i_ino, nm->name, + ubifs_inode(inode)->data_len); + ubifs_assert(inode->i_nlink == 0); + ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); + + /* + * Since we are deleting the inode, we do not bother to attach any data + * to it and assume its length is %UBIFS_INO_NODE_SZ. + */ + xlen = UBIFS_DENT_NODE_SZ + nm->len + 1; + aligned_xlen = ALIGN(xlen, 8); + hlen = host_ui->data_len + UBIFS_INO_NODE_SZ; + len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8); + + xent = kmalloc(len, GFP_NOFS); + if (!xent) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, len); + if (err) { + kfree(xent); + return err; + } + + xent->ch.node_type = UBIFS_XENT_NODE; + xent_key_init(c, &xent_key, host->i_ino, nm); + key_write(c, &xent_key, xent->key); + xent->inum = 0; + xent->type = get_dent_type(inode->i_mode); + xent->nlen = cpu_to_le16(nm->len); + memcpy(xent->name, nm->name, nm->len); + xent->name[nm->len] = '\0'; + zero_dent_node_unused(xent); + ubifs_prep_grp_node(c, xent, xlen, 0); + + ino = (void *)xent + aligned_xlen; + pack_inode(c, ino, inode, 0, 1); + ino = (void *)ino + UBIFS_INO_NODE_SZ; + pack_inode(c, ino, host, 1, 0); + + err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); + if (!sync && !err) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino); + release_head(c, BASEHD); + kfree(xent); + if (err) + goto out_ro; + + /* Remove the extended attribute entry from TNC */ + err = ubifs_tnc_remove_nm(c, &xent_key, nm); + if (err) + goto out_ro; + err = ubifs_add_dirt(c, lnum, xlen); + if (err) + goto out_ro; + + /* + * Remove all nodes belonging to the extended attribute inode from TNC. + * Well, there actually must be only one node - the inode itself. + */ + lowest_ino_key(c, &key1, inode->i_ino); + highest_ino_key(c, &key2, inode->i_ino); + err = ubifs_tnc_remove_range(c, &key1, &key2); + if (err) + goto out_ro; + err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ); + if (err) + goto out_ro; + + /* And update TNC with the new host inode position */ + ino_key_init(c, &key1, host->i_ino); + err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&host_ui->ui_lock); + host_ui->synced_i_size = host_ui->ui_size; + spin_unlock(&host_ui->ui_lock); + mark_inode_clean(c, host_ui); + return 0; + +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); + return err; +} + +/** + * ubifs_jnl_change_xattr - change an extended attribute. + * @c: UBIFS file-system description object + * @inode: extended attribute inode + * @host: host inode + * + * This function writes the updated version of an extended attribute inode and + * the host inode tho the journal (to the base head). The host inode is written + * after the extended attribute inode in order to guarantee that the extended + * attribute will be flushed when the inode is synchronized by 'fsync()' and + * consequently, the write-buffer is synchronized. This function returns zero + * in case of success and a negative error code in case of failure. + */ +int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, + const struct inode *host) +{ + int err, len1, len2, aligned_len, aligned_len1, lnum, offs; + struct ubifs_inode *host_ui = ubifs_inode(inode); + struct ubifs_ino_node *ino; + union ubifs_key key; + int sync = IS_DIRSYNC(host); + + dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino); + ubifs_assert(host->i_nlink > 0); + ubifs_assert(inode->i_nlink > 0); + ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); + + len1 = UBIFS_INO_NODE_SZ + host_ui->data_len; + len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len; + aligned_len1 = ALIGN(len1, 8); + aligned_len = aligned_len1 + ALIGN(len2, 8); + + ino = kmalloc(aligned_len, GFP_NOFS); + if (!ino) + return -ENOMEM; + + /* Make reservation before allocating sequence numbers */ + err = make_reservation(c, BASEHD, aligned_len); + if (err) + goto out_free; + + pack_inode(c, ino, host, 0, 0); + pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); + + err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); + if (!sync && !err) { + struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; + + ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino); + ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); + } + release_head(c, BASEHD); + if (err) + goto out_ro; + + ino_key_init(c, &key, host->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, len1); + if (err) + goto out_ro; + + ino_key_init(c, &key, inode->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2); + if (err) + goto out_ro; + + finish_reservation(c); + spin_lock(&host_ui->ui_lock); + host_ui->synced_i_size = host_ui->ui_size; + spin_unlock(&host_ui->ui_lock); + mark_inode_clean(c, host_ui); + kfree(ino); + return 0; + +out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); +out_free: + kfree(ino); + return err; +} + +#endif /* CONFIG_UBIFS_FS_XATTR */ diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h new file mode 100644 index 000000000000..8f7476007549 --- /dev/null +++ b/fs/ubifs/key.h @@ -0,0 +1,533 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This header contains various key-related definitions and helper function. + * UBIFS allows several key schemes, so we access key fields only via these + * helpers. At the moment only one key scheme is supported. + * + * Simple key scheme + * ~~~~~~~~~~~~~~~~~ + * + * Keys are 64-bits long. First 32-bits are inode number (parent inode number + * in case of direntry key). Next 3 bits are node type. The last 29 bits are + * 4KiB offset in case of inode node, and direntry hash in case of a direntry + * node. We use "r5" hash borrowed from reiserfs. + */ + +#ifndef __UBIFS_KEY_H__ +#define __UBIFS_KEY_H__ + +/** + * key_r5_hash - R5 hash function (borrowed from reiserfs). + * @s: direntry name + * @len: name length + */ +static inline uint32_t key_r5_hash(const char *s, int len) +{ + uint32_t a = 0; + const signed char *str = (const signed char *)s; + + while (*str) { + a += *str << 4; + a += *str >> 4; + a *= 11; + str++; + } + + a &= UBIFS_S_KEY_HASH_MASK; + + /* + * We use hash values as offset in directories, so values %0 and %1 are + * reserved for "." and "..". %2 is reserved for "end of readdir" + * marker. + */ + if (unlikely(a >= 0 && a <= 2)) + a += 3; + return a; +} + +/** + * key_test_hash - testing hash function. + * @str: direntry name + * @len: name length + */ +static inline uint32_t key_test_hash(const char *str, int len) +{ + uint32_t a = 0; + + len = min_t(uint32_t, len, 4); + memcpy(&a, str, len); + a &= UBIFS_S_KEY_HASH_MASK; + if (unlikely(a >= 0 && a <= 2)) + a += 3; + return a; +} + +/** + * ino_key_init - initialize inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void ino_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * ino_key_init_flash - initialize on-flash inode key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + */ +static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum) +{ + union ubifs_key *key = k; + + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_ino_key - get the lowest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void lowest_ino_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = 0; +} + +/** + * highest_ino_key - get the highest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void highest_ino_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = 0xffffffff; +} + +/** + * dent_key_init - initialize directory entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + const struct qstr *nm) +{ + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_hash - initialize directory entry key without re-calculating + * hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @hash: direntry name hash + */ +static inline void dent_key_init_hash(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + uint32_t hash) +{ + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_flash - initialize on-flash directory entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, const struct qstr *nm) +{ + union ubifs_key *key = k; + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(hash | + (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_dent_key - get the lowest possible directory entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: parent inode number + */ +static inline void lowest_dent_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * xent_key_init - initialize extended attribute entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + const struct qstr *nm) +{ + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_hash - initialize extended attribute entry key without + * re-calculating hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @hash: extended attribute entry name hash + */ +static inline void xent_key_init_hash(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + uint32_t hash) +{ + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->u32[0] = inum; + key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_flash - initialize on-flash extended attribute entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, const struct qstr *nm) +{ + union ubifs_key *key = k; + uint32_t hash = c->key_hash(nm->name, nm->len); + + ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(hash | + (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_xent_key - get the lowest possible extended attribute entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: host inode number + */ +static inline void lowest_xent_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * data_key_init - initialize data key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum, + unsigned int block) +{ + ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); + key->u32[0] = inum; + key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); +} + +/** + * data_key_init_flash - initialize on-flash data key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init_flash(const struct ubifs_info *c, void *k, + ino_t inum, unsigned int block) +{ + union ubifs_key *key = k; + + ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); + key->j32[0] = cpu_to_le32(inum); + key->j32[1] = cpu_to_le32(block | + (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); + memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * trun_key_init - initialize truncation node key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * + * Note, UBIFS does not have truncation keys on the media and this function is + * only used for purposes of replay. + */ +static inline void trun_key_init(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) +{ + key->u32[0] = inum; + key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type - get key type. + * @c: UBIFS file-system description object + * @key: key to get type of + */ +static inline int key_type(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type_flash - get type of a on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to get type of + */ +static inline int key_type_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_inum - fetch inode number from key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return key->u32[0]; +} + +/** + * key_inum_flash - fetch inode number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[0]); +} + +/** + * key_hash - get directory entry hash. + * @c: UBIFS file-system description object + * @key: the key to get hash from + */ +static inline int key_hash(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_hash_flash - get directory entry hash from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get hash from + */ +static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_block - get data block number. + * @c: UBIFS file-system description object + * @key: the key to get the block number from + */ +static inline unsigned int key_block(const struct ubifs_info *c, + const union ubifs_key *key) +{ + return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_block_flash - get data block number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get the block number from + */ +static inline unsigned int key_block_flash(const struct ubifs_info *c, + const void *k) +{ + const union ubifs_key *key = k; + + return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_read - transform a key to in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_read(const struct ubifs_info *c, const void *from, + union ubifs_key *to) +{ + const union ubifs_key *f = from; + + to->u32[0] = le32_to_cpu(f->j32[0]); + to->u32[1] = le32_to_cpu(f->j32[1]); +} + +/** + * key_write - transform a key from in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write(const struct ubifs_info *c, + const union ubifs_key *from, void *to) +{ + union ubifs_key *t = to; + + t->j32[0] = cpu_to_le32(from->u32[0]); + t->j32[1] = cpu_to_le32(from->u32[1]); + memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * key_write_idx - transform a key from in-memory format for the index. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write_idx(const struct ubifs_info *c, + const union ubifs_key *from, void *to) +{ + union ubifs_key *t = to; + + t->j32[0] = cpu_to_le32(from->u32[0]); + t->j32[1] = cpu_to_le32(from->u32[1]); +} + +/** + * key_copy - copy a key. + * @c: UBIFS file-system description object + * @from: the key to copy from + * @to: the key to copy to + */ +static inline void key_copy(const struct ubifs_info *c, + const union ubifs_key *from, union ubifs_key *to) +{ + to->u64[0] = from->u64[0]; +} + +/** + * keys_cmp - compare keys. + * @c: UBIFS file-system description object + * @key1: the first key to compare + * @key2: the second key to compare + * + * This function compares 2 keys and returns %-1 if @key1 is less than + * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. + */ +static inline int keys_cmp(const struct ubifs_info *c, + const union ubifs_key *key1, + const union ubifs_key *key2) +{ + if (key1->u32[0] < key2->u32[0]) + return -1; + if (key1->u32[0] > key2->u32[0]) + return 1; + if (key1->u32[1] < key2->u32[1]) + return -1; + if (key1->u32[1] > key2->u32[1]) + return 1; + + return 0; +} + +/** + * is_hash_key - is a key vulnerable to hash collisions. + * @c: UBIFS file-system description object + * @key: key + * + * This function returns %1 if @key is a hashed key or %0 otherwise. + */ +static inline int is_hash_key(const struct ubifs_info *c, + const union ubifs_key *key) +{ + int type = key_type(c, key); + + return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; +} + +/** + * key_max_inode_size - get maximum file size allowed by current key format. + * @c: UBIFS file-system description object + */ +static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) +{ + switch (c->key_fmt) { + case UBIFS_SIMPLE_KEY_FMT: + return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; + default: + return 0; + } +} +#endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c new file mode 100644 index 000000000000..36857b9ed59e --- /dev/null +++ b/fs/ubifs/log.c @@ -0,0 +1,805 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file is a part of UBIFS journal implementation and contains various + * functions which manipulate the log. The log is a fixed area on the flash + * which does not contain any data but refers to buds. The log is a part of the + * journal. + */ + +#include "ubifs.h" + +#ifdef CONFIG_UBIFS_FS_DEBUG +static int dbg_check_bud_bytes(struct ubifs_info *c); +#else +#define dbg_check_bud_bytes(c) 0 +#endif + +/** + * ubifs_search_bud - search bud LEB. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This function searches bud LEB @lnum. Returns bud description object in case + * of success and %NULL if there is no bud with this LEB number. + */ +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) +{ + struct rb_node *p; + struct ubifs_bud *bud; + + spin_lock(&c->buds_lock); + p = c->buds.rb_node; + while (p) { + bud = rb_entry(p, struct ubifs_bud, rb); + if (lnum < bud->lnum) + p = p->rb_left; + else if (lnum > bud->lnum) + p = p->rb_right; + else { + spin_unlock(&c->buds_lock); + return bud; + } + } + spin_unlock(&c->buds_lock); + return NULL; +} + +/** + * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This functions returns the wbuf for @lnum or %NULL if there is not one. + */ +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) +{ + struct rb_node *p; + struct ubifs_bud *bud; + int jhead; + + if (!c->jheads) + return NULL; + + spin_lock(&c->buds_lock); + p = c->buds.rb_node; + while (p) { + bud = rb_entry(p, struct ubifs_bud, rb); + if (lnum < bud->lnum) + p = p->rb_left; + else if (lnum > bud->lnum) + p = p->rb_right; + else { + jhead = bud->jhead; + spin_unlock(&c->buds_lock); + return &c->jheads[jhead].wbuf; + } + } + spin_unlock(&c->buds_lock); + return NULL; +} + +/** + * next_log_lnum - switch to the next log LEB. + * @c: UBIFS file-system description object + * @lnum: current log LEB + */ +static inline int next_log_lnum(const struct ubifs_info *c, int lnum) +{ + lnum += 1; + if (lnum > c->log_last) + lnum = UBIFS_LOG_LNUM; + + return lnum; +} + +/** + * empty_log_bytes - calculate amount of empty space in the log. + * @c: UBIFS file-system description object + */ +static inline long long empty_log_bytes(const struct ubifs_info *c) +{ + long long h, t; + + h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; + t = (long long)c->ltail_lnum * c->leb_size; + + if (h >= t) + return c->log_bytes - h + t; + else + return t - h; +} + +/** + * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. + * @c: UBIFS file-system description object + * @bud: the bud to add + */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ + struct rb_node **p, *parent = NULL; + struct ubifs_bud *b; + struct ubifs_jhead *jhead; + + spin_lock(&c->buds_lock); + p = &c->buds.rb_node; + while (*p) { + parent = *p; + b = rb_entry(parent, struct ubifs_bud, rb); + ubifs_assert(bud->lnum != b->lnum); + if (bud->lnum < b->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&bud->rb, parent, p); + rb_insert_color(&bud->rb, &c->buds); + if (c->jheads) { + jhead = &c->jheads[bud->jhead]; + list_add_tail(&bud->list, &jhead->buds_list); + } else + ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); + + /* + * Note, although this is a new bud, we anyway account this space now, + * before any data has been written to it, because this is about to + * guarantee fixed mount time, and this bud will anyway be read and + * scanned. + */ + c->bud_bytes += c->leb_size - bud->start; + + dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, + bud->start, bud->jhead, c->bud_bytes); + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_create_buds_lists - create journal head buds lists for remount rw. + * @c: UBIFS file-system description object + */ +void ubifs_create_buds_lists(struct ubifs_info *c) +{ + struct rb_node *p; + + spin_lock(&c->buds_lock); + p = rb_first(&c->buds); + while (p) { + struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); + struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; + + list_add_tail(&bud->list, &jhead->buds_list); + p = rb_next(p); + } + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_add_bud_to_log - add a new bud to the log. + * @c: UBIFS file-system description object + * @jhead: journal head the bud belongs to + * @lnum: LEB number of the bud + * @offs: starting offset of the bud + * + * This function writes reference node for the new bud LEB @lnum it to the log, + * and adds it to the buds tress. It also makes sure that log size does not + * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success, + * %-EAGAIN if commit is required, and a negative error codes in case of + * failure. + */ +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) +{ + int err; + struct ubifs_bud *bud; + struct ubifs_ref_node *ref; + + bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS); + if (!bud) + return -ENOMEM; + ref = kzalloc(c->ref_node_alsz, GFP_NOFS); + if (!ref) { + kfree(bud); + return -ENOMEM; + } + + mutex_lock(&c->log_mutex); + + if (c->ro_media) { + err = -EROFS; + goto out_unlock; + } + + /* Make sure we have enough space in the log */ + if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) { + dbg_log("not enough log space - %lld, required %d", + empty_log_bytes(c), c->min_log_bytes); + ubifs_commit_required(c); + err = -EAGAIN; + goto out_unlock; + } + + /* + * Make sure the the amount of space in buds will not exceed + * 'c->max_bud_bytes' limit, because we want to guarantee mount time + * limits. + * + * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes + * because we are holding @c->log_mutex. All @c->bud_bytes take place + * when both @c->log_mutex and @c->bud_bytes are locked. + */ + if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) { + dbg_log("bud bytes %lld (%lld max), require commit", + c->bud_bytes, c->max_bud_bytes); + ubifs_commit_required(c); + err = -EAGAIN; + goto out_unlock; + } + + /* + * If the journal is full enough - start background commit. Note, it is + * OK to read 'c->cmt_state' without spinlock because integer reads + * are atomic in the kernel. + */ + if (c->bud_bytes >= c->bg_bud_bytes && + c->cmt_state == COMMIT_RESTING) { + dbg_log("bud bytes %lld (%lld max), initiate BG commit", + c->bud_bytes, c->max_bud_bytes); + ubifs_request_bg_commit(c); + } + + bud->lnum = lnum; + bud->start = offs; + bud->jhead = jhead; + + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(bud->lnum); + ref->offs = cpu_to_le32(bud->start); + ref->jhead = cpu_to_le32(jhead); + + if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { + c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + if (c->lhead_offs == 0) { + /* Must ensure next log LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out_unlock; + } + + if (bud->start == 0) { + /* + * Before writing the LEB reference which refers an empty LEB + * to the log, we have to make sure it is mapped, because + * otherwise we'd risk to refer an LEB with garbage in case of + * an unclean reboot, because the target LEB might have been + * unmapped, but not yet physically erased. + */ + err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); + if (err) + goto out_unlock; + } + + dbg_log("write ref LEB %d:%d", + c->lhead_lnum, c->lhead_offs); + err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, + c->lhead_offs, UBI_SHORTTERM); + if (err) + goto out_unlock; + + c->lhead_offs += c->ref_node_alsz; + + ubifs_add_bud(c, bud); + + mutex_unlock(&c->log_mutex); + kfree(ref); + return 0; + +out_unlock: + mutex_unlock(&c->log_mutex); + kfree(ref); + kfree(bud); + return err; +} + +/** + * remove_buds - remove used buds. + * @c: UBIFS file-system description object + * + * This function removes use buds from the buds tree. It does not remove the + * buds which are pointed to by journal heads. + */ +static void remove_buds(struct ubifs_info *c) +{ + struct rb_node *p; + + ubifs_assert(list_empty(&c->old_buds)); + c->cmt_bud_bytes = 0; + spin_lock(&c->buds_lock); + p = rb_first(&c->buds); + while (p) { + struct rb_node *p1 = p; + struct ubifs_bud *bud; + struct ubifs_wbuf *wbuf; + + p = rb_next(p); + bud = rb_entry(p1, struct ubifs_bud, rb); + wbuf = &c->jheads[bud->jhead].wbuf; + + if (wbuf->lnum == bud->lnum) { + /* + * Do not remove buds which are pointed to by journal + * heads (non-closed buds). + */ + c->cmt_bud_bytes += wbuf->offs - bud->start; + dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " + "cmt_bud_bytes %lld", bud->lnum, bud->start, + bud->jhead, wbuf->offs - bud->start, + c->cmt_bud_bytes); + bud->start = wbuf->offs; + } else { + c->cmt_bud_bytes += c->leb_size - bud->start; + dbg_log("remove %d:%d, jhead %d, bud bytes %d, " + "cmt_bud_bytes %lld", bud->lnum, bud->start, + bud->jhead, c->leb_size - bud->start, + c->cmt_bud_bytes); + rb_erase(p1, &c->buds); + list_del(&bud->list); + /* + * If the commit does not finish, the recovery will need + * to replay the journal, in which case the old buds + * must be unchanged. Do not release them until post + * commit i.e. do not allow them to be garbage + * collected. + */ + list_add(&bud->list, &c->old_buds); + } + } + spin_unlock(&c->buds_lock); +} + +/** + * ubifs_log_start_commit - start commit. + * @c: UBIFS file-system description object + * @ltail_lnum: return new log tail LEB number + * + * The commit operation starts with writing "commit start" node to the log and + * reference nodes for all journal heads which will define new journal after + * the commit has been finished. The commit start and reference nodes are + * written in one go to the nearest empty log LEB (hence, when commit is + * finished UBIFS may safely unmap all the previous log LEBs). This function + * returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) +{ + void *buf; + struct ubifs_cs_node *cs; + struct ubifs_ref_node *ref; + int err, i, max_len, len; + + err = dbg_check_bud_bytes(c); + if (err) + return err; + + max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ; + max_len = ALIGN(max_len, c->min_io_size); + buf = cs = kmalloc(max_len, GFP_NOFS); + if (!buf) + return -ENOMEM; + + cs->ch.node_type = UBIFS_CS_NODE; + cs->cmt_no = cpu_to_le64(c->cmt_no + 1); + ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); + + /* + * Note, we do not lock 'c->log_mutex' because this is the commit start + * phase and we are exclusively using the log. And we do not lock + * write-buffer because nobody can write to the file-system at this + * phase. + */ + + len = UBIFS_CS_NODE_SZ; + for (i = 0; i < c->jhead_cnt; i++) { + int lnum = c->jheads[i].wbuf.lnum; + int offs = c->jheads[i].wbuf.offs; + + if (lnum == -1 || offs == c->leb_size) + continue; + + dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); + ref = buf + len; + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(lnum); + ref->offs = cpu_to_le32(offs); + ref->jhead = cpu_to_le32(i); + + ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0); + len += UBIFS_REF_NODE_SZ; + } + + ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len); + + /* Switch to the next log LEB */ + if (c->lhead_offs) { + c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + if (c->lhead_offs == 0) { + /* Must ensure next LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out; + } + + len = ALIGN(len, c->min_io_size); + dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); + err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM); + if (err) + goto out; + + *ltail_lnum = c->lhead_lnum; + + c->lhead_offs += len; + if (c->lhead_offs == c->leb_size) { + c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + + remove_buds(c); + + /* + * We have started the commit and now users may use the rest of the log + * for new writes. + */ + c->min_log_bytes = 0; + +out: + kfree(buf); + return err; +} + +/** + * ubifs_log_end_commit - end commit. + * @c: UBIFS file-system description object + * @ltail_lnum: new log tail LEB number + * + * This function is called on when the commit operation was finished. It + * moves log tail to new position and unmaps LEBs which contain obsolete data. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) +{ + int err; + + /* + * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS + * writes during commit. Its only short "commit" start phase when + * writers are blocked. + */ + mutex_lock(&c->log_mutex); + + dbg_log("old tail was LEB %d:0, new tail is LEB %d:0", + c->ltail_lnum, ltail_lnum); + + c->ltail_lnum = ltail_lnum; + /* + * The commit is finished and from now on it must be guaranteed that + * there is always enough space for the next commit. + */ + c->min_log_bytes = c->leb_size; + + spin_lock(&c->buds_lock); + c->bud_bytes -= c->cmt_bud_bytes; + spin_unlock(&c->buds_lock); + + err = dbg_check_bud_bytes(c); + + mutex_unlock(&c->log_mutex); + return err; +} + +/** + * ubifs_log_post_commit - things to do after commit is completed. + * @c: UBIFS file-system description object + * @old_ltail_lnum: old log tail LEB number + * + * Release buds only after commit is completed, because they must be unchanged + * if recovery is needed. + * + * Unmap log LEBs only after commit is completed, because they may be needed for + * recovery. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) +{ + int lnum, err = 0; + + while (!list_empty(&c->old_buds)) { + struct ubifs_bud *bud; + + bud = list_entry(c->old_buds.next, struct ubifs_bud, list); + err = ubifs_return_leb(c, bud->lnum); + if (err) + return err; + list_del(&bud->list); + kfree(bud); + } + mutex_lock(&c->log_mutex); + for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; + lnum = next_log_lnum(c, lnum)) { + dbg_log("unmap log LEB %d", lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + goto out; + } +out: + mutex_unlock(&c->log_mutex); + return err; +} + +/** + * struct done_ref - references that have been done. + * @rb: rb-tree node + * @lnum: LEB number + */ +struct done_ref { + struct rb_node rb; + int lnum; +}; + +/** + * done_already - determine if a reference has been done already. + * @done_tree: rb-tree to store references that have been done + * @lnum: LEB number of reference + * + * This function returns %1 if the reference has been done, %0 if not, otherwise + * a negative error code is returned. + */ +static int done_already(struct rb_root *done_tree, int lnum) +{ + struct rb_node **p = &done_tree->rb_node, *parent = NULL; + struct done_ref *dr; + + while (*p) { + parent = *p; + dr = rb_entry(parent, struct done_ref, rb); + if (lnum < dr->lnum) + p = &(*p)->rb_left; + else if (lnum > dr->lnum) + p = &(*p)->rb_right; + else + return 1; + } + + dr = kzalloc(sizeof(struct done_ref), GFP_NOFS); + if (!dr) + return -ENOMEM; + + dr->lnum = lnum; + + rb_link_node(&dr->rb, parent, p); + rb_insert_color(&dr->rb, done_tree); + + return 0; +} + +/** + * destroy_done_tree - destroy the done tree. + * @done_tree: done tree to destroy + */ +static void destroy_done_tree(struct rb_root *done_tree) +{ + struct rb_node *this = done_tree->rb_node; + struct done_ref *dr; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + dr = rb_entry(this, struct done_ref, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &dr->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(dr); + } +} + +/** + * add_node - add a node to the consolidated log. + * @c: UBIFS file-system description object + * @buf: buffer to which to add + * @lnum: LEB number to which to write is passed and returned here + * @offs: offset to where to write is passed and returned here + * @node: node to add + * + * This function returns %0 on success and a negative error code on failure. + */ +static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, + void *node) +{ + struct ubifs_ch *ch = node; + int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs; + + if (len > remains) { + int sz = ALIGN(*offs, c->min_io_size), err; + + ubifs_pad(c, buf + *offs, sz - *offs); + err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); + if (err) + return err; + *lnum = next_log_lnum(c, *lnum); + *offs = 0; + } + memcpy(buf + *offs, node, len); + *offs += ALIGN(len, 8); + return 0; +} + +/** + * ubifs_consolidate_log - consolidate the log. + * @c: UBIFS file-system description object + * + * Repeated failed commits could cause the log to be full, but at least 1 LEB is + * needed for commit. This function rewrites the reference nodes in the log + * omitting duplicates, and failed CS nodes, and leaving no gaps. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_consolidate_log(struct ubifs_info *c) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct rb_root done_tree = RB_ROOT; + int lnum, err, first = 1, write_lnum, offs = 0; + void *buf; + + dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum, + c->lhead_lnum); + buf = vmalloc(c->leb_size); + if (!buf) + return -ENOMEM; + lnum = c->ltail_lnum; + write_lnum = lnum; + while (1) { + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + goto out_free; + } + list_for_each_entry(snod, &sleb->nodes, list) { + switch (snod->type) { + case UBIFS_REF_NODE: { + struct ubifs_ref_node *ref = snod->node; + int ref_lnum = le32_to_cpu(ref->lnum); + + err = done_already(&done_tree, ref_lnum); + if (err < 0) + goto out_scan; + if (err != 1) { + err = add_node(c, buf, &write_lnum, + &offs, snod->node); + if (err) + goto out_scan; + } + break; + } + case UBIFS_CS_NODE: + if (!first) + break; + err = add_node(c, buf, &write_lnum, &offs, + snod->node); + if (err) + goto out_scan; + first = 0; + break; + } + } + ubifs_scan_destroy(sleb); + if (lnum == c->lhead_lnum) + break; + lnum = next_log_lnum(c, lnum); + } + if (offs) { + int sz = ALIGN(offs, c->min_io_size); + + ubifs_pad(c, buf + offs, sz - offs); + err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM); + if (err) + goto out_free; + offs = ALIGN(offs, c->min_io_size); + } + destroy_done_tree(&done_tree); + vfree(buf); + if (write_lnum == c->lhead_lnum) { + ubifs_err("log is too full"); + return -EINVAL; + } + /* Unmap remaining LEBs */ + lnum = write_lnum; + do { + lnum = next_log_lnum(c, lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } while (lnum != c->lhead_lnum); + c->lhead_lnum = write_lnum; + c->lhead_offs = offs; + dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs); + return 0; + +out_scan: + ubifs_scan_destroy(sleb); +out_free: + destroy_done_tree(&done_tree); + vfree(buf); + return err; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_check_bud_bytes - make sure bud bytes calculation are all right. + * @c: UBIFS file-system description object + * + * This function makes sure the amount of flash space used by closed buds + * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in + * case of failure. + */ +static int dbg_check_bud_bytes(struct ubifs_info *c) +{ + int i, err = 0; + struct ubifs_bud *bud; + long long bud_bytes = 0; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + spin_lock(&c->buds_lock); + for (i = 0; i < c->jhead_cnt; i++) + list_for_each_entry(bud, &c->jheads[i].buds_list, list) + bud_bytes += c->leb_size - bud->start; + + if (c->bud_bytes != bud_bytes) { + ubifs_err("bad bud_bytes %lld, calculated %lld", + c->bud_bytes, bud_bytes); + err = -EINVAL; + } + spin_unlock(&c->buds_lock); + + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c new file mode 100644 index 000000000000..2ba93da71b65 --- /dev/null +++ b/fs/ubifs/lprops.c @@ -0,0 +1,1357 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the functions that access LEB properties and their + * categories. LEBs are categorized based on the needs of UBIFS, and the + * categories are stored as either heaps or lists to provide a fast way of + * finding a LEB in a particular category. For example, UBIFS may need to find + * an empty LEB for the journal, or a very dirty LEB for garbage collection. + */ + +#include "ubifs.h" + +/** + * get_heap_comp_val - get the LEB properties value for heap comparisons. + * @lprops: LEB properties + * @cat: LEB category + */ +static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) +{ + switch (cat) { + case LPROPS_FREE: + return lprops->free; + case LPROPS_DIRTY_IDX: + return lprops->free + lprops->dirty; + default: + return lprops->dirty; + } +} + +/** + * move_up_lpt_heap - move a new heap entry up as far as possible. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @cat: LEB category + * + * New entries to a heap are added at the bottom and then moved up until the + * parent's value is greater. In the case of LPT's category heaps, the value + * is either the amount of free space or the amount of dirty space, depending + * on the category. + */ +static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + struct ubifs_lprops *lprops, int cat) +{ + int val1, val2, hpos; + + hpos = lprops->hpos; + if (!hpos) + return; /* Already top of the heap */ + val1 = get_heap_comp_val(lprops, cat); + /* Compare to parent and, if greater, move up the heap */ + do { + int ppos = (hpos - 1) / 2; + + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val2 >= val1) + return; + /* Greater than parent so move up */ + heap->arr[ppos]->hpos = hpos; + heap->arr[hpos] = heap->arr[ppos]; + heap->arr[ppos] = lprops; + lprops->hpos = ppos; + hpos = ppos; + } while (hpos); +} + +/** + * adjust_lpt_heap - move a changed heap entry up or down the heap. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @hpos: heap position of @lprops + * @cat: LEB category + * + * Changed entries in a heap are moved up or down until the parent's value is + * greater. In the case of LPT's category heaps, the value is either the amount + * of free space or the amount of dirty space, depending on the category. + */ +static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + struct ubifs_lprops *lprops, int hpos, int cat) +{ + int val1, val2, val3, cpos; + + val1 = get_heap_comp_val(lprops, cat); + /* Compare to parent and, if greater than parent, move up the heap */ + if (hpos) { + int ppos = (hpos - 1) / 2; + + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val1 > val2) { + /* Greater than parent so move up */ + while (1) { + heap->arr[ppos]->hpos = hpos; + heap->arr[hpos] = heap->arr[ppos]; + heap->arr[ppos] = lprops; + lprops->hpos = ppos; + hpos = ppos; + if (!hpos) + return; + ppos = (hpos - 1) / 2; + val2 = get_heap_comp_val(heap->arr[ppos], cat); + if (val1 <= val2) + return; + /* Still greater than parent so keep going */ + } + } + } + /* Not greater than parent, so compare to children */ + while (1) { + /* Compare to left child */ + cpos = hpos * 2 + 1; + if (cpos >= heap->cnt) + return; + val2 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 < val2) { + /* Less than left child, so promote biggest child */ + if (cpos + 1 < heap->cnt) { + val3 = get_heap_comp_val(heap->arr[cpos + 1], + cat); + if (val3 > val2) + cpos += 1; /* Right child is bigger */ + } + heap->arr[cpos]->hpos = hpos; + heap->arr[hpos] = heap->arr[cpos]; + heap->arr[cpos] = lprops; + lprops->hpos = cpos; + hpos = cpos; + continue; + } + /* Compare to right child */ + cpos += 1; + if (cpos >= heap->cnt) + return; + val3 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 < val3) { + /* Less than right child, so promote right child */ + heap->arr[cpos]->hpos = hpos; + heap->arr[hpos] = heap->arr[cpos]; + heap->arr[cpos] = lprops; + lprops->hpos = cpos; + hpos = cpos; + continue; + } + return; + } +} + +/** + * add_to_lpt_heap - add LEB properties to a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category + * + * This function returns %1 if @lprops is added to the heap for LEB category + * @cat, otherwise %0 is returned because the heap is full. + */ +static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat) +{ + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + if (heap->cnt >= heap->max_cnt) { + const int b = LPT_HEAP_SZ / 2 - 1; + int cpos, val1, val2; + + /* Compare to some other LEB on the bottom of heap */ + /* Pick a position kind of randomly */ + cpos = (((size_t)lprops >> 4) & b) + b; + ubifs_assert(cpos >= b); + ubifs_assert(cpos < LPT_HEAP_SZ); + ubifs_assert(cpos < heap->cnt); + + val1 = get_heap_comp_val(lprops, cat); + val2 = get_heap_comp_val(heap->arr[cpos], cat); + if (val1 > val2) { + struct ubifs_lprops *lp; + + lp = heap->arr[cpos]; + lp->flags &= ~LPROPS_CAT_MASK; + lp->flags |= LPROPS_UNCAT; + list_add(&lp->list, &c->uncat_list); + lprops->hpos = cpos; + heap->arr[cpos] = lprops; + move_up_lpt_heap(c, heap, lprops, cat); + dbg_check_heap(c, heap, cat, lprops->hpos); + return 1; /* Added to heap */ + } + dbg_check_heap(c, heap, cat, -1); + return 0; /* Not added to heap */ + } else { + lprops->hpos = heap->cnt++; + heap->arr[lprops->hpos] = lprops; + move_up_lpt_heap(c, heap, lprops, cat); + dbg_check_heap(c, heap, cat, lprops->hpos); + return 1; /* Added to heap */ + } +} + +/** + * remove_from_lpt_heap - remove LEB properties from a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category + */ +static void remove_from_lpt_heap(struct ubifs_info *c, + struct ubifs_lprops *lprops, int cat) +{ + struct ubifs_lpt_heap *heap; + int hpos = lprops->hpos; + + heap = &c->lpt_heap[cat - 1]; + ubifs_assert(hpos >= 0 && hpos < heap->cnt); + ubifs_assert(heap->arr[hpos] == lprops); + heap->cnt -= 1; + if (hpos < heap->cnt) { + heap->arr[hpos] = heap->arr[heap->cnt]; + heap->arr[hpos]->hpos = hpos; + adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); + } + dbg_check_heap(c, heap, cat, -1); +} + +/** + * lpt_heap_replace - replace lprops in a category heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * @cat: LEB category + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * the category heaps to those lprops must be updated to point to the new + * lprops. This function does that. + */ +static void lpt_heap_replace(struct ubifs_info *c, + struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops, int cat) +{ + struct ubifs_lpt_heap *heap; + int hpos = new_lprops->hpos; + + heap = &c->lpt_heap[cat - 1]; + heap->arr[hpos] = new_lprops; +} + +/** + * ubifs_add_to_cat - add LEB properties to a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category to which to add + * + * LEB properties are categorized to enable fast find operations. + */ +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat) +{ + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + if (add_to_lpt_heap(c, lprops, cat)) + break; + /* No more room on heap so make it uncategorized */ + cat = LPROPS_UNCAT; + /* Fall through */ + case LPROPS_UNCAT: + list_add(&lprops->list, &c->uncat_list); + break; + case LPROPS_EMPTY: + list_add(&lprops->list, &c->empty_list); + break; + case LPROPS_FREEABLE: + list_add(&lprops->list, &c->freeable_list); + c->freeable_cnt += 1; + break; + case LPROPS_FRDI_IDX: + list_add(&lprops->list, &c->frdi_idx_list); + break; + default: + ubifs_assert(0); + } + lprops->flags &= ~LPROPS_CAT_MASK; + lprops->flags |= cat; +} + +/** + * ubifs_remove_from_cat - remove LEB properties from a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category from which to remove + * + * LEB properties are categorized to enable fast find operations. + */ +static void ubifs_remove_from_cat(struct ubifs_info *c, + struct ubifs_lprops *lprops, int cat) +{ + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + remove_from_lpt_heap(c, lprops, cat); + break; + case LPROPS_FREEABLE: + c->freeable_cnt -= 1; + ubifs_assert(c->freeable_cnt >= 0); + /* Fall through */ + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FRDI_IDX: + ubifs_assert(!list_empty(&lprops->list)); + list_del(&lprops->list); + break; + default: + ubifs_assert(0); + } +} + +/** + * ubifs_replace_cat - replace lprops in a category list or heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops) +{ + int cat; + + cat = new_lprops->flags & LPROPS_CAT_MASK; + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + lpt_heap_replace(c, old_lprops, new_lprops, cat); + break; + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + list_replace(&old_lprops->list, &new_lprops->list); + break; + default: + ubifs_assert(0); + } +} + +/** + * ubifs_ensure_cat - ensure LEB properties are categorized. + * @c: UBIFS file-system description object + * @lprops: LEB properties + * + * A LEB may have fallen off of the bottom of a heap, and ended up as + * uncategorized even though it has enough space for us now. If that is the case + * this function will put the LEB back onto a heap. + */ +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + int cat = lprops->flags & LPROPS_CAT_MASK; + + if (cat != LPROPS_UNCAT) + return; + cat = ubifs_categorize_lprops(c, lprops); + if (cat == LPROPS_UNCAT) + return; + ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); + ubifs_add_to_cat(c, lprops, cat); +} + +/** + * ubifs_categorize_lprops - categorize LEB properties. + * @c: UBIFS file-system description object + * @lprops: LEB properties to categorize + * + * LEB properties are categorized to enable fast find operations. This function + * returns the LEB category to which the LEB properties belong. Note however + * that if the LEB category is stored as a heap and the heap is full, the + * LEB properties may have their category changed to %LPROPS_UNCAT. + */ +int ubifs_categorize_lprops(const struct ubifs_info *c, + const struct ubifs_lprops *lprops) +{ + if (lprops->flags & LPROPS_TAKEN) + return LPROPS_UNCAT; + + if (lprops->free == c->leb_size) { + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return LPROPS_EMPTY; + } + + if (lprops->free + lprops->dirty == c->leb_size) { + if (lprops->flags & LPROPS_INDEX) + return LPROPS_FRDI_IDX; + else + return LPROPS_FREEABLE; + } + + if (lprops->flags & LPROPS_INDEX) { + if (lprops->dirty + lprops->free >= c->min_idx_node_sz) + return LPROPS_DIRTY_IDX; + } else { + if (lprops->dirty >= c->dead_wm && + lprops->dirty > lprops->free) + return LPROPS_DIRTY; + if (lprops->free > 0) + return LPROPS_FREE; + } + + return LPROPS_UNCAT; +} + +/** + * change_category - change LEB properties category. + * @c: UBIFS file-system description object + * @lprops: LEB properties to recategorize + * + * LEB properties are categorized to enable fast find operations. When the LEB + * properties change they must be recategorized. + */ +static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + int old_cat = lprops->flags & LPROPS_CAT_MASK; + int new_cat = ubifs_categorize_lprops(c, lprops); + + if (old_cat == new_cat) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; + + /* lprops on a heap now must be moved up or down */ + if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) + return; /* Not on a heap */ + heap = &c->lpt_heap[new_cat - 1]; + adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); + } else { + ubifs_remove_from_cat(c, lprops, old_cat); + ubifs_add_to_cat(c, lprops, new_cat); + } +} + +/** + * ubifs_get_lprops - get reference to LEB properties. + * @c: the UBIFS file-system description object + * + * This function locks lprops. Lprops have to be unlocked by + * 'ubifs_release_lprops()'. + */ +void ubifs_get_lprops(struct ubifs_info *c) +{ + mutex_lock(&c->lp_mutex); +} + +/** + * calc_dark - calculate LEB dark space size. + * @c: the UBIFS file-system description object + * @spc: amount of free and dirty space in the LEB + * + * This function calculates amount of dark space in an LEB which has @spc bytes + * of free and dirty space. Returns the calculations result. + * + * Dark space is the space which is not always usable - it depends on which + * nodes are written in which order. E.g., if an LEB has only 512 free bytes, + * it is dark space, because it cannot fit a large data node. So UBIFS cannot + * count on this LEB and treat these 512 bytes as usable because it is not true + * if, for example, only big chunks of uncompressible data will be written to + * the FS. + */ +static int calc_dark(struct ubifs_info *c, int spc) +{ + ubifs_assert(!(spc & 7)); + + if (spc < c->dark_wm) + return spc; + + /* + * If we have slightly more space then the dark space watermark, we can + * anyway safely assume it we'll be able to write a node of the + * smallest size there. + */ + if (spc - c->dark_wm < MIN_WRITE_SZ) + return spc - MIN_WRITE_SZ; + + return c->dark_wm; +} + +/** + * is_lprops_dirty - determine if LEB properties are dirty. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to test + */ +static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ + struct ubifs_pnode *pnode; + int pos; + + pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); + pnode = (struct ubifs_pnode *)container_of(lprops - pos, + struct ubifs_pnode, + lprops[0]); + return !test_bit(COW_ZNODE, &pnode->flags) && + test_bit(DIRTY_CNODE, &pnode->flags); +} + +/** + * ubifs_change_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lp: LEB properties to change + * @free: new free space amount + * @dirty: new dirty space amount + * @flags: new flags + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes LEB properties. This function does not change a LEB + * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. + * + * This function returns a pointer to the updated LEB properties on success + * and a negative error code on failure. N.B. the LEB properties may have had to + * be copied (due to COW) and consequently the pointer returned may not be the + * same as the pointer passed. + */ +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, + const struct ubifs_lprops *lp, + int free, int dirty, int flags, + int idx_gc_cnt) +{ + /* + * This is the only function that is allowed to change lprops, so we + * discard the const qualifier. + */ + struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; + + dbg_lp("LEB %d, free %d, dirty %d, flags %d", + lprops->lnum, free, dirty, flags); + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + ubifs_assert(c->lst.empty_lebs >= 0 && + c->lst.empty_lebs <= c->main_lebs); + ubifs_assert(c->freeable_cnt >= 0); + ubifs_assert(c->freeable_cnt <= c->main_lebs); + ubifs_assert(c->lst.taken_empty_lebs >= 0); + ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); + ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); + ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); + ubifs_assert(!(c->lst.total_used & 7)); + ubifs_assert(free == LPROPS_NC || free >= 0); + ubifs_assert(dirty == LPROPS_NC || dirty >= 0); + + if (!is_lprops_dirty(c, lprops)) { + lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); + if (IS_ERR(lprops)) + return lprops; + } else + ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); + + ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); + + spin_lock(&c->space_lock); + + if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) + c->lst.taken_empty_lebs -= 1; + + if (!(lprops->flags & LPROPS_INDEX)) { + int old_spc; + + old_spc = lprops->free + lprops->dirty; + if (old_spc < c->dead_wm) + c->lst.total_dead -= old_spc; + else + c->lst.total_dark -= calc_dark(c, old_spc); + + c->lst.total_used -= c->leb_size - old_spc; + } + + if (free != LPROPS_NC) { + free = ALIGN(free, 8); + c->lst.total_free += free - lprops->free; + + /* Increase or decrease empty LEBs counter if needed */ + if (free == c->leb_size) { + if (lprops->free != c->leb_size) + c->lst.empty_lebs += 1; + } else if (lprops->free == c->leb_size) + c->lst.empty_lebs -= 1; + lprops->free = free; + } + + if (dirty != LPROPS_NC) { + dirty = ALIGN(dirty, 8); + c->lst.total_dirty += dirty - lprops->dirty; + lprops->dirty = dirty; + } + + if (flags != LPROPS_NC) { + /* Take care about indexing LEBs counter if needed */ + if ((lprops->flags & LPROPS_INDEX)) { + if (!(flags & LPROPS_INDEX)) + c->lst.idx_lebs -= 1; + } else if (flags & LPROPS_INDEX) + c->lst.idx_lebs += 1; + lprops->flags = flags; + } + + if (!(lprops->flags & LPROPS_INDEX)) { + int new_spc; + + new_spc = lprops->free + lprops->dirty; + if (new_spc < c->dead_wm) + c->lst.total_dead += new_spc; + else + c->lst.total_dark += calc_dark(c, new_spc); + + c->lst.total_used += c->leb_size - new_spc; + } + + if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) + c->lst.taken_empty_lebs += 1; + + change_category(c, lprops); + + c->idx_gc_cnt += idx_gc_cnt; + + spin_unlock(&c->space_lock); + + return lprops; +} + +/** + * ubifs_release_lprops - release lprops lock. + * @c: the UBIFS file-system description object + * + * This function has to be called after each 'ubifs_get_lprops()' call to + * unlock lprops. + */ +void ubifs_release_lprops(struct ubifs_info *c) +{ + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + ubifs_assert(c->lst.empty_lebs >= 0 && + c->lst.empty_lebs <= c->main_lebs); + + mutex_unlock(&c->lp_mutex); +} + +/** + * ubifs_get_lp_stats - get lprops statistics. + * @c: UBIFS file-system description object + * @st: return statistics + */ +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) +{ + spin_lock(&c->space_lock); + memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); + spin_unlock(&c->space_lock); +} + +/** + * ubifs_change_one_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space + * @flags_set: flags to set + * @flags_clean: flags to clean + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes properties of LEB @lnum. It is a helper wrapper over + * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the + * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean, int idx_gc_cnt) +{ + int err = 0, flags; + const struct ubifs_lprops *lp; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + flags = (lp->flags | flags_set) & ~flags_clean; + lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); + if (IS_ERR(lp)) + err = PTR_ERR(lp); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_update_one_lp - update LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space to add + * @flags_set: flags to set + * @flags_clean: flags to clean + * + * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to + * current dirty space, not substitutes it. + */ +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean) +{ + int err = 0, flags; + const struct ubifs_lprops *lp; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + flags = (lp->flags | flags_set) & ~flags_clean; + lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); + if (IS_ERR(lp)) + err = PTR_ERR(lp); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_read_one_lp - read LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to read properties for + * @lp: where to store read properties + * + * This helper function reads properties of a LEB @lnum and stores them in @lp. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) +{ + int err = 0; + const struct ubifs_lprops *lpp; + + ubifs_get_lprops(c); + + lpp = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lpp)) { + err = PTR_ERR(lpp); + goto out; + } + + memcpy(lp, lpp, sizeof(struct ubifs_lprops)); + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_fast_find_free - try to find a LEB with free space quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a LEB with free space or %NULL if + * the function is unable to find a LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + heap = &c->lpt_heap[LPROPS_FREE - 1]; + if (heap->cnt == 0) + return NULL; + + lprops = heap->arr[0]; + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + return lprops; +} + +/** + * ubifs_fast_find_empty - try to find an empty LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for an empty LEB or %NULL if the + * function is unable to find an empty LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->empty_list)) + return NULL; + + lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free == c->leb_size); + return lprops; +} + +/** + * ubifs_fast_find_freeable - try to find a freeable LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable LEB or %NULL if the + * function is unable to find a freeable LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->freeable_list)) + return NULL; + + lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert(!(lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + ubifs_assert(c->freeable_cnt > 0); + return lprops; +} + +/** + * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable index LEB or %NULL if the + * function is unable to find a freeable index LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + + ubifs_assert(mutex_is_locked(&c->lp_mutex)); + + if (list_empty(&c->frdi_idx_list)) + return NULL; + + lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); + ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); + ubifs_assert((lprops->flags & LPROPS_INDEX)); + ubifs_assert(lprops->free + lprops->dirty == c->leb_size); + return lprops; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_check_cats - check category heaps and lists. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_cats(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct list_head *pos; + int i, cat; + + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + return 0; + + list_for_each_entry(lprops, &c->empty_list, list) { + if (lprops->free != c->leb_size) { + ubifs_err("non-empty LEB %d on empty list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on empty list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + } + + i = 0; + list_for_each_entry(lprops, &c->freeable_list, list) { + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("non-freeable LEB %d on freeable list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on freeable list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + i += 1; + } + if (i != c->freeable_cnt) { + ubifs_err("freeable list count %d expected %d", i, + c->freeable_cnt); + return -EINVAL; + } + + i = 0; + list_for_each(pos, &c->idx_gc) + i += 1; + if (i != c->idx_gc_cnt) { + ubifs_err("idx_gc list count %d expected %d", i, + c->idx_gc_cnt); + return -EINVAL; + } + + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + if (lprops->free + lprops->dirty != c->leb_size) { + ubifs_err("non-freeable LEB %d on frdi_idx list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB %d on frdi_idx list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + if (!(lprops->flags & LPROPS_INDEX)) { + ubifs_err("non-index LEB %d on frdi_idx list " + "(free %d dirty %d flags %d)", lprops->lnum, + lprops->free, lprops->dirty, lprops->flags); + return -EINVAL; + } + } + + for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + for (i = 0; i < heap->cnt; i++) { + lprops = heap->arr[i]; + if (!lprops) { + ubifs_err("null ptr in LPT heap cat %d", cat); + return -EINVAL; + } + if (lprops->hpos != i) { + ubifs_err("bad ptr in LPT heap cat %d", cat); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + ubifs_err("taken LEB in LPT heap cat %d", cat); + return -EINVAL; + } + } + } + + return 0; +} + +void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos) +{ + int i = 0, j, err = 0; + + if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + return; + + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + struct ubifs_lprops *lp; + + if (i != add_pos) + if ((lprops->flags & LPROPS_CAT_MASK) != cat) { + err = 1; + goto out; + } + if (lprops->hpos != i) { + err = 2; + goto out; + } + lp = ubifs_lpt_lookup(c, lprops->lnum); + if (IS_ERR(lp)) { + err = 3; + goto out; + } + if (lprops != lp) { + dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", + (size_t)lprops, (size_t)lp, lprops->lnum, + lp->lnum); + err = 4; + goto out; + } + for (j = 0; j < i; j++) { + lp = heap->arr[j]; + if (lp == lprops) { + err = 5; + goto out; + } + if (lp->lnum == lprops->lnum) { + err = 6; + goto out; + } + } + } +out: + if (err) { + dbg_msg("failed cat %d hpos %d err %d", cat, i, err); + dbg_dump_stack(); + dbg_dump_heap(c, heap, cat); + } +} + +/** + * struct scan_check_data - data provided to scan callback function. + * @lst: LEB properties statistics + * @err: error code + */ +struct scan_check_data { + struct ubifs_lp_stats lst; + int err; +}; + +/** + * scan_check_cb - scan callback. + * @c: the UBIFS file-system description object + * @lp: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory + * @data: information passed to and from the caller of the scan + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree + * in main memory (%LPT_SCAN_ADD), or whether the scan should stop + * (%LPT_SCAN_STOP). + */ +static int scan_check_cb(struct ubifs_info *c, + const struct ubifs_lprops *lp, int in_tree, + struct scan_check_data *data) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_lp_stats *lst = &data->lst; + int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; + + cat = lp->flags & LPROPS_CAT_MASK; + if (cat != LPROPS_UNCAT) { + cat = ubifs_categorize_lprops(c, lp); + if (cat != (lp->flags & LPROPS_CAT_MASK)) { + ubifs_err("bad LEB category %d expected %d", + (lp->flags & LPROPS_CAT_MASK), cat); + goto out; + } + } + + /* Check lp is on its category list (if it has one) */ + if (in_tree) { + struct list_head *list = NULL; + + switch (cat) { + case LPROPS_EMPTY: + list = &c->empty_list; + break; + case LPROPS_FREEABLE: + list = &c->freeable_list; + break; + case LPROPS_FRDI_IDX: + list = &c->frdi_idx_list; + break; + case LPROPS_UNCAT: + list = &c->uncat_list; + break; + } + if (list) { + struct ubifs_lprops *lprops; + int found = 0; + + list_for_each_entry(lprops, list, list) { + if (lprops == lp) { + found = 1; + break; + } + } + if (!found) { + ubifs_err("bad LPT list (category %d)", cat); + goto out; + } + } + } + + /* Check lp is on its category heap (if it has one) */ + if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) { + struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + + if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || + lp != heap->arr[lp->hpos]) { + ubifs_err("bad LPT heap (category %d)", cat); + goto out; + } + } + + sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + if (IS_ERR(sleb)) { + /* + * After an unclean unmount, empty and freeable LEBs + * may contain garbage. + */ + if (lp->free == c->leb_size) { + ubifs_err("scan errors were in empty LEB " + "- continuing checking"); + lst->empty_lebs += 1; + lst->total_free += c->leb_size; + lst->total_dark += calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + + if (lp->free + lp->dirty == c->leb_size && + !(lp->flags & LPROPS_INDEX)) { + ubifs_err("scan errors were in freeable LEB " + "- continuing checking"); + lst->total_free += lp->free; + lst->total_dirty += lp->dirty; + lst->total_dark += calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + data->err = PTR_ERR(sleb); + return LPT_SCAN_STOP; + } + + is_idx = -1; + list_for_each_entry(snod, &sleb->nodes, list) { + int found, level = 0; + + cond_resched(); + + if (is_idx == -1) + is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; + + if (is_idx && snod->type != UBIFS_IDX_NODE) { + ubifs_err("indexing node in data LEB %d:%d", + lnum, snod->offs); + goto out_destroy; + } + + if (snod->type == UBIFS_IDX_NODE) { + struct ubifs_idx_node *idx = snod->node; + + key_read(c, ubifs_idx_key(c, idx), &snod->key); + level = le16_to_cpu(idx->level); + } + + found = ubifs_tnc_has_node(c, &snod->key, level, lnum, + snod->offs, is_idx); + if (found) { + if (found < 0) + goto out_destroy; + used += ALIGN(snod->len, 8); + } + } + + free = c->leb_size - sleb->endpt; + dirty = sleb->endpt - used; + + if (free > c->leb_size || free < 0 || dirty > c->leb_size || + dirty < 0) { + ubifs_err("bad calculated accounting for LEB %d: " + "free %d, dirty %d", lnum, free, dirty); + goto out_destroy; + } + + if (lp->free + lp->dirty == c->leb_size && + free + dirty == c->leb_size) + if ((is_idx && !(lp->flags & LPROPS_INDEX)) || + (!is_idx && free == c->leb_size) || + lp->free == c->leb_size) { + /* + * Empty or freeable LEBs could contain index + * nodes from an uncompleted commit due to an + * unclean unmount. Or they could be empty for + * the same reason. Or it may simply not have been + * unmapped. + */ + free = lp->free; + dirty = lp->dirty; + is_idx = 0; + } + + if (is_idx && lp->free + lp->dirty == free + dirty && + lnum != c->ihead_lnum) { + /* + * After an unclean unmount, an index LEB could have a different + * amount of free space than the value recorded by lprops. That + * is because the in-the-gaps method may use free space or + * create free space (as a side-effect of using ubi_leb_change + * and not writing the whole LEB). The incorrect free space + * value is not a problem because the index is only ever + * allocated empty LEBs, so there will never be an attempt to + * write to the free space at the end of an index LEB - except + * by the in-the-gaps method for which it is not a problem. + */ + free = lp->free; + dirty = lp->dirty; + } + + if (lp->free != free || lp->dirty != dirty) + goto out_print; + + if (is_idx && !(lp->flags & LPROPS_INDEX)) { + if (free == c->leb_size) + /* Free but not unmapped LEB, it's fine */ + is_idx = 0; + else { + ubifs_err("indexing node without indexing " + "flag"); + goto out_print; + } + } + + if (!is_idx && (lp->flags & LPROPS_INDEX)) { + ubifs_err("data node with indexing flag"); + goto out_print; + } + + if (free == c->leb_size) + lst->empty_lebs += 1; + + if (is_idx) + lst->idx_lebs += 1; + + if (!(lp->flags & LPROPS_INDEX)) + lst->total_used += c->leb_size - free - dirty; + lst->total_free += free; + lst->total_dirty += dirty; + + if (!(lp->flags & LPROPS_INDEX)) { + int spc = free + dirty; + + if (spc < c->dead_wm) + lst->total_dead += spc; + else + lst->total_dark += calc_dark(c, spc); + } + + ubifs_scan_destroy(sleb); + + return LPT_SCAN_CONTINUE; + +out_print: + ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " + "should be free %d, dirty %d", + lnum, lp->free, lp->dirty, lp->flags, free, dirty); + dbg_dump_leb(c, lnum); +out_destroy: + ubifs_scan_destroy(sleb); +out: + data->err = -EINVAL; + return LPT_SCAN_STOP; +} + +/** + * dbg_check_lprops - check all LEB properties. + * @c: UBIFS file-system description object + * + * This function checks all LEB properties and makes sure they are all correct. + * It returns zero if everything is fine, %-EINVAL if there is an inconsistency + * and other negative error codes in case of other errors. This function is + * called while the file system is locked (because of commit start), so no + * additional locking is required. Note that locking the LPT mutex would cause + * a circular lock dependency with the TNC mutex. + */ +int dbg_check_lprops(struct ubifs_info *c) +{ + int i, err; + struct scan_check_data data; + struct ubifs_lp_stats *lst = &data.lst; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + + /* + * As we are going to scan the media, the write buffers have to be + * synchronized. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; + } + + memset(lst, 0, sizeof(struct ubifs_lp_stats)); + + data.err = 0; + err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, + (ubifs_lpt_scan_callback)scan_check_cb, + &data); + if (err && err != -ENOSPC) + goto out; + if (data.err) { + err = data.err; + goto out; + } + + if (lst->empty_lebs != c->lst.empty_lebs || + lst->idx_lebs != c->lst.idx_lebs || + lst->total_free != c->lst.total_free || + lst->total_dirty != c->lst.total_dirty || + lst->total_used != c->lst.total_used) { + ubifs_err("bad overall accounting"); + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + lst->empty_lebs, lst->idx_lebs, lst->total_free, + lst->total_dirty, lst->total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, + c->lst.total_dirty, c->lst.total_used); + err = -EINVAL; + goto out; + } + + if (lst->total_dead != c->lst.total_dead || + lst->total_dark != c->lst.total_dark) { + ubifs_err("bad dead/dark space accounting"); + ubifs_err("calculated: total_dead %lld, total_dark %lld", + lst->total_dead, lst->total_dark); + ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + c->lst.total_dead, c->lst.total_dark); + err = -EINVAL; + goto out; + } + + err = dbg_check_cats(c); +out: + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c new file mode 100644 index 000000000000..9ff2463177e5 --- /dev/null +++ b/fs/ubifs/lpt.c @@ -0,0 +1,2243 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the LEB properties tree (LPT) area. The LPT area + * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and + * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits + * between the log and the orphan area. + * + * The LPT area is like a miniature self-contained file system. It is required + * that it never runs out of space, is fast to access and update, and scales + * logarithmically. The LEB properties tree is implemented as a wandering tree + * much like the TNC, and the LPT area has its own garbage collection. + * + * The LPT has two slightly different forms called the "small model" and the + * "big model". The small model is used when the entire LEB properties table + * can be written into a single eraseblock. In that case, garbage collection + * consists of just writing the whole table, which therefore makes all other + * eraseblocks reusable. In the case of the big model, dirty eraseblocks are + * selected for garbage collection, which consists are marking the nodes in + * that LEB as dirty, and then only the dirty nodes are written out. Also, in + * the case of the big model, a table of LEB numbers is saved so that the entire + * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first + * mounted. + */ + +#include +#include "ubifs.h" + +/** + * do_calc_lpt_geom - calculate sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * Calculate the sizes of LPT bit fields, nodes, and tree, based on the + * properties of the flash and whether LPT is "big" (c->big_lpt). + */ +static void do_calc_lpt_geom(struct ubifs_info *c) +{ + int i, n, bits, per_leb_wastage, max_pnode_cnt; + long long sz, tot_wastage; + + n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; + max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + + c->lpt_hght = 1; + n = UBIFS_LPT_FANOUT; + while (n < max_pnode_cnt) { + c->lpt_hght += 1; + n <<= UBIFS_LPT_FANOUT_SHIFT; + } + + c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + + n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); + c->nnode_cnt = n; + for (i = 1; i < c->lpt_hght; i++) { + n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + c->nnode_cnt += n; + } + + c->space_bits = fls(c->leb_size) - 3; + c->lpt_lnum_bits = fls(c->lpt_lebs); + c->lpt_offs_bits = fls(c->leb_size - 1); + c->lpt_spc_bits = fls(c->leb_size); + + n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); + c->pcnt_bits = fls(n - 1); + + c->lnum_bits = fls(c->max_leb_cnt - 1); + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + (c->big_lpt ? c->pcnt_bits : 0) + + (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; + c->pnode_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + (c->big_lpt ? c->pcnt_bits : 0) + + (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; + c->nnode_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + c->lpt_lebs * c->lpt_spc_bits * 2; + c->ltab_sz = (bits + 7) / 8; + + bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + + c->lnum_bits * c->lsave_cnt; + c->lsave_sz = (bits + 7) / 8; + + /* Calculate the minimum LPT size */ + c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; + c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; + c->lpt_sz += c->ltab_sz; + c->lpt_sz += c->lsave_sz; + + /* Add wastage */ + sz = c->lpt_sz; + per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz); + sz += per_leb_wastage; + tot_wastage = per_leb_wastage; + while (sz > c->leb_size) { + sz += per_leb_wastage; + sz -= c->leb_size; + tot_wastage += per_leb_wastage; + } + tot_wastage += ALIGN(sz, c->min_io_size) - sz; + c->lpt_sz += tot_wastage; +} + +/** + * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_calc_lpt_geom(struct ubifs_info *c) +{ + int lebs_needed; + uint64_t sz; + + do_calc_lpt_geom(c); + + /* Verify that lpt_lebs is big enough */ + sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ + sz += c->leb_size - 1; + do_div(sz, c->leb_size); + lebs_needed = sz; + if (lebs_needed > c->lpt_lebs) { + ubifs_err("too few LPT LEBs"); + return -EINVAL; + } + + /* Verify that ltab fits in a single LEB (since ltab is a single node */ + if (c->ltab_sz > c->leb_size) { + ubifs_err("LPT ltab too big"); + return -EINVAL; + } + + c->check_lpt_free = c->big_lpt; + + return 0; +} + +/** + * calc_dflt_lpt_geom - calculate default LPT geometry. + * @c: the UBIFS file-system description object + * @main_lebs: number of main area LEBs is passed and returned here + * @big_lpt: whether the LPT area is "big" is returned here + * + * The size of the LPT area depends on parameters that themselves are dependent + * on the size of the LPT area. This function, successively recalculates the LPT + * area geometry until the parameters and resultant geometry are consistent. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, + int *big_lpt) +{ + int i, lebs_needed; + uint64_t sz; + + /* Start by assuming the minimum number of LPT LEBs */ + c->lpt_lebs = UBIFS_MIN_LPT_LEBS; + c->main_lebs = *main_lebs - c->lpt_lebs; + if (c->main_lebs <= 0) + return -EINVAL; + + /* And assume we will use the small LPT model */ + c->big_lpt = 0; + + /* + * Calculate the geometry based on assumptions above and then see if it + * makes sense + */ + do_calc_lpt_geom(c); + + /* Small LPT model must have lpt_sz < leb_size */ + if (c->lpt_sz > c->leb_size) { + /* Nope, so try again using big LPT model */ + c->big_lpt = 1; + do_calc_lpt_geom(c); + } + + /* Now check there are enough LPT LEBs */ + for (i = 0; i < 64 ; i++) { + sz = c->lpt_sz * 4; /* Allow 4 times the size */ + sz += c->leb_size - 1; + do_div(sz, c->leb_size); + lebs_needed = sz; + if (lebs_needed > c->lpt_lebs) { + /* Not enough LPT LEBs so try again with more */ + c->lpt_lebs = lebs_needed; + c->main_lebs = *main_lebs - c->lpt_lebs; + if (c->main_lebs <= 0) + return -EINVAL; + do_calc_lpt_geom(c); + continue; + } + if (c->ltab_sz > c->leb_size) { + ubifs_err("LPT ltab too big"); + return -EINVAL; + } + *main_lebs = c->main_lebs; + *big_lpt = c->big_lpt; + return 0; + } + return -EINVAL; +} + +/** + * pack_bits - pack bit fields end-to-end. + * @addr: address at which to pack (passed and next address returned) + * @pos: bit position at which to pack (passed and next position returned) + * @val: value to pack + * @nrbits: number of bits of value to pack (1-32) + */ +static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits) +{ + uint8_t *p = *addr; + int b = *pos; + + ubifs_assert(nrbits > 0); + ubifs_assert(nrbits <= 32); + ubifs_assert(*pos >= 0); + ubifs_assert(*pos < 8); + ubifs_assert((val >> nrbits) == 0 || nrbits == 32); + if (b) { + *p |= ((uint8_t)val) << b; + nrbits += b; + if (nrbits > 8) { + *++p = (uint8_t)(val >>= (8 - b)); + if (nrbits > 16) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 24) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 32) + *++p = (uint8_t)(val >>= 8); + } + } + } + } else { + *p = (uint8_t)val; + if (nrbits > 8) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 16) { + *++p = (uint8_t)(val >>= 8); + if (nrbits > 24) + *++p = (uint8_t)(val >>= 8); + } + } + } + b = nrbits & 7; + if (b == 0) + p++; + *addr = p; + *pos = b; +} + +/** + * ubifs_unpack_bits - unpack bit fields. + * @addr: address at which to unpack (passed and next address returned) + * @pos: bit position at which to unpack (passed and next position returned) + * @nrbits: number of bits of value to unpack (1-32) + * + * This functions returns the value unpacked. + */ +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) +{ + const int k = 32 - nrbits; + uint8_t *p = *addr; + int b = *pos; + uint32_t val; + + ubifs_assert(nrbits > 0); + ubifs_assert(nrbits <= 32); + ubifs_assert(*pos >= 0); + ubifs_assert(*pos < 8); + if (b) { + val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | + ((uint32_t)p[4] << 24); + val <<= (8 - b); + val |= *p >> b; + nrbits += b; + } else + val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | + ((uint32_t)p[3] << 24); + val <<= k; + val >>= k; + b = nrbits & 7; + p += nrbits / 8; + *addr = p; + *pos = b; + ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); + return val; +} + +/** + * ubifs_pack_pnode - pack all the bit fields of a pnode. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @pnode: pnode to pack + */ +void ubifs_pack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS); + if (c->big_lpt) + pack_bits(&addr, &pos, pnode->num, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + pack_bits(&addr, &pos, pnode->lprops[i].free >> 3, + c->space_bits); + pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3, + c->space_bits); + if (pnode->lprops[i].flags & LPROPS_INDEX) + pack_bits(&addr, &pos, 1, 1); + else + pack_bits(&addr, &pos, 0, 1); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->pnode_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_nnode - pack all the bit fields of a nnode. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @nnode: nnode to pack + */ +void ubifs_pack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS); + if (c->big_lpt) + pack_bits(&addr, &pos, nnode->num, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum = nnode->nbranch[i].lnum; + + if (lnum == 0) + lnum = c->lpt_last + 1; + pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits); + pack_bits(&addr, &pos, nnode->nbranch[i].offs, + c->lpt_offs_bits); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->nnode_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_ltab - pack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @ltab: LPT's own lprops table to pack + */ +void ubifs_pack_ltab(struct ubifs_info *c, void *buf, + struct ubifs_lpt_lprops *ltab) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS); + for (i = 0; i < c->lpt_lebs; i++) { + pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits); + pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits); + } + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->ltab_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_pack_lsave - pack the LPT's save table. + * @c: UBIFS file-system description object + * @buf: buffer into which to pack + * @lsave: LPT's save table to pack + */ +void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0; + uint16_t crc; + + pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS); + for (i = 0; i < c->lsave_cnt; i++) + pack_bits(&addr, &pos, lsave[i], c->lnum_bits); + crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + c->lsave_sz - UBIFS_LPT_CRC_BYTES); + addr = buf; + pos = 0; + pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); +} + +/** + * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number to which to add dirty space + * @dirty: amount of dirty space to add + */ +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) +{ + if (!dirty || !lnum) + return; + dbg_lp("LEB %d add %d to %d", + lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * set_ltab - set LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @free: amount of free space + * @dirty: amount of dirty space + */ +static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty) +{ + dbg_lp("LEB %d free %d dirty %d to %d %d", + lnum, c->ltab[lnum - c->lpt_first].free, + c->ltab[lnum - c->lpt_first].dirty, free, dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].free = free; + c->ltab[lnum - c->lpt_first].dirty = dirty; +} + +/** + * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @nnode: nnode for which to add dirt + */ +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) +{ + struct ubifs_nnode *np = nnode->parent; + + if (np) + ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, + c->nnode_sz); + else { + ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); + if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { + c->lpt_drty_flgs |= LTAB_DIRTY; + ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); + } + } +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, + c->pnode_sz); +} + +/** + * calc_nnode_num - calculate nnode number. + * @row: the row in the tree (root is zero) + * @col: the column in the row (leftmost is zero) + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number for the nnode at @row + * and @col. + */ +static int calc_nnode_num(int row, int col) +{ + int num, bits; + + num = 1; + while (row--) { + bits = (col & (UBIFS_LPT_FANOUT - 1)); + col >>= UBIFS_LPT_FANOUT_SHIFT; + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= bits; + } + return num; +} + +/** + * calc_nnode_num_from_parent - calculate nnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_nnode_num_from_parent(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + int num, shft; + + if (!parent) + return 1; + shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; + num = parent->num ^ (1 << shft); + num |= (UBIFS_LPT_FANOUT + iip) << shft; + return num; +} + +/** + * calc_pnode_num_from_parent - calculate pnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The pnode number is a number that uniquely identifies a pnode and can be used + * easily to traverse the tree from the root to that pnode. + * + * This function calculates and returns the pnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_pnode_num_from_parent(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; + + for (i = 0; i < n; i++) { + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= pnum & (UBIFS_LPT_FANOUT - 1); + pnum >>= UBIFS_LPT_FANOUT_SHIFT; + } + num <<= UBIFS_LPT_FANOUT_SHIFT; + num |= iip; + return num; +} + +/** + * ubifs_create_dflt_lpt - create default LPT. + * @c: UBIFS file-system description object + * @main_lebs: number of main area LEBs is passed and returned here + * @lpt_first: LEB number of first LPT LEB + * @lpt_lebs: number of LEBs for LPT is passed and returned here + * @big_lpt: use big LPT model is passed and returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + int *lpt_lebs, int *big_lpt) +{ + int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row; + int blnum, boffs, bsz, bcnt; + struct ubifs_pnode *pnode = NULL; + struct ubifs_nnode *nnode = NULL; + void *buf = NULL, *p; + struct ubifs_lpt_lprops *ltab = NULL; + int *lsave = NULL; + + err = calc_dflt_lpt_geom(c, main_lebs, big_lpt); + if (err) + return err; + *lpt_lebs = c->lpt_lebs; + + /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */ + c->lpt_first = lpt_first; + /* Needed by 'set_ltab()' */ + c->lpt_last = lpt_first + c->lpt_lebs - 1; + /* Needed by 'ubifs_pack_lsave()' */ + c->main_first = c->leb_cnt - *main_lebs; + + lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL); + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); + nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); + buf = vmalloc(c->leb_size); + ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!pnode || !nnode || !buf || !ltab || !lsave) { + err = -ENOMEM; + goto out; + } + + ubifs_assert(!c->ltab); + c->ltab = ltab; /* Needed by set_ltab */ + + /* Initialize LPT's own lprops */ + for (i = 0; i < c->lpt_lebs; i++) { + ltab[i].free = c->leb_size; + ltab[i].dirty = 0; + ltab[i].tgc = 0; + ltab[i].cmt = 0; + } + + lnum = lpt_first; + p = buf; + /* Number of leaf nodes (pnodes) */ + cnt = c->pnode_cnt; + + /* + * The first pnode contains the LEB properties for the LEBs that contain + * the root inode node and the root index node of the index tree. + */ + node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8); + iopos = ALIGN(node_sz, c->min_io_size); + pnode->lprops[0].free = c->leb_size - iopos; + pnode->lprops[0].dirty = iopos - node_sz; + pnode->lprops[0].flags = LPROPS_INDEX; + + node_sz = UBIFS_INO_NODE_SZ; + iopos = ALIGN(node_sz, c->min_io_size); + pnode->lprops[1].free = c->leb_size - iopos; + pnode->lprops[1].dirty = iopos - node_sz; + + for (i = 2; i < UBIFS_LPT_FANOUT; i++) + pnode->lprops[i].free = c->leb_size; + + /* Add first pnode */ + ubifs_pack_pnode(c, p, pnode); + p += c->pnode_sz; + len = c->pnode_sz; + pnode->num += 1; + + /* Reset pnode values for remaining pnodes */ + pnode->lprops[0].free = c->leb_size; + pnode->lprops[0].dirty = 0; + pnode->lprops[0].flags = 0; + + pnode->lprops[1].free = c->leb_size; + pnode->lprops[1].dirty = 0; + + /* + * To calculate the internal node branches, we keep information about + * the level below. + */ + blnum = lnum; /* LEB number of level below */ + boffs = 0; /* Offset of level below */ + bcnt = cnt; /* Number of nodes in level below */ + bsz = c->pnode_sz; /* Size of nodes in level below */ + + /* Add all remaining pnodes */ + for (i = 1; i < cnt; i++) { + if (len + c->pnode_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + ubifs_pack_pnode(c, p, pnode); + p += c->pnode_sz; + len += c->pnode_sz; + /* + * pnodes are simply numbered left to right starting at zero, + * which means the pnode number can be used easily to traverse + * down the tree to the corresponding pnode. + */ + pnode->num += 1; + } + + row = 0; + for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT) + row += 1; + /* Add all nnodes, one level at a time */ + while (1) { + /* Number of internal nodes (nnodes) at next level */ + cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + if (len + c->nnode_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, + alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + /* Only 1 nnode at this level, so it is the root */ + if (cnt == 1) { + c->lpt_lnum = lnum; + c->lpt_offs = len; + } + /* Set branches to the level below */ + for (j = 0; j < UBIFS_LPT_FANOUT; j++) { + if (bcnt) { + if (boffs + bsz > c->leb_size) { + blnum += 1; + boffs = 0; + } + nnode->nbranch[j].lnum = blnum; + nnode->nbranch[j].offs = boffs; + boffs += bsz; + bcnt--; + } else { + nnode->nbranch[j].lnum = 0; + nnode->nbranch[j].offs = 0; + } + } + nnode->num = calc_nnode_num(row, i); + ubifs_pack_nnode(c, p, nnode); + p += c->nnode_sz; + len += c->nnode_sz; + } + /* Only 1 nnode at this level, so it is the root */ + if (cnt == 1) + break; + /* Update the information about the level below */ + bcnt = cnt; + bsz = c->nnode_sz; + row -= 1; + } + + if (*big_lpt) { + /* Need to add LPT's save table */ + if (len + c->lsave_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, + UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + + c->lsave_lnum = lnum; + c->lsave_offs = len; + + for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++) + lsave[i] = c->main_first + i; + for (; i < c->lsave_cnt; i++) + lsave[i] = c->main_first; + + ubifs_pack_lsave(c, p, lsave); + p += c->lsave_sz; + len += c->lsave_sz; + } + + /* Need to add LPT's own LEB properties table */ + if (len + c->ltab_sz > c->leb_size) { + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + p = buf; + len = 0; + } + + c->ltab_lnum = lnum; + c->ltab_offs = len; + + /* Update ltab before packing it */ + len += c->ltab_sz; + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + + ubifs_pack_ltab(c, p, ltab); + p += c->ltab_sz; + + /* Write remaining buffer */ + memset(p, 0xff, alen - len); + err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + + c->nhead_lnum = lnum; + c->nhead_offs = ALIGN(len, c->min_io_size); + + dbg_lp("space_bits %d", c->space_bits); + dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); + dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); + dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); + dbg_lp("pcnt_bits %d", c->pcnt_bits); + dbg_lp("lnum_bits %d", c->lnum_bits); + dbg_lp("pnode_sz %d", c->pnode_sz); + dbg_lp("nnode_sz %d", c->nnode_sz); + dbg_lp("ltab_sz %d", c->ltab_sz); + dbg_lp("lsave_sz %d", c->lsave_sz); + dbg_lp("lsave_cnt %d", c->lsave_cnt); + dbg_lp("lpt_hght %d", c->lpt_hght); + dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); +out: + c->ltab = NULL; + kfree(lsave); + vfree(ltab); + vfree(buf); + kfree(nnode); + kfree(pnode); + return err; +} + +/** + * update_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * When a pnode is loaded into memory, the LEB properties it contains are added, + * by this function, to the LEB category lists and heaps. + */ +static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + int i; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; + int lnum = pnode->lprops[i].lnum; + + if (!lnum) + return; + ubifs_add_to_cat(c, &pnode->lprops[i], cat); + } +} + +/** + * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @old_pnode: pnode copied + * @new_pnode: pnode copy + * + * During commit it is sometimes necessary to copy a pnode + * (see dirty_cow_pnode). When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, + struct ubifs_pnode *new_pnode) +{ + int i; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (!new_pnode->lprops[i].lnum) + return; + ubifs_replace_cat(c, &old_pnode->lprops[i], + &new_pnode->lprops[i]); + } +} + +/** + * check_lpt_crc - check LPT node crc is correct. + * @c: UBIFS file-system description object + * @buf: buffer containing node + * @len: length of node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_crc(void *buf, int len) +{ + int pos = 0; + uint8_t *addr = buf; + uint16_t crc, calc_crc; + + crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); + calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + len - UBIFS_LPT_CRC_BYTES); + if (crc != calc_crc) { + ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, + calc_crc); + dbg_dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * check_lpt_type - check LPT node type is correct. + * @c: UBIFS file-system description object + * @addr: address of type bit field is passed and returned updated here + * @pos: position of type bit field is passed and returned updated here + * @type: expected type + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_type(uint8_t **addr, int *pos, int type) +{ + int node_type; + + node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); + if (node_type != type) { + ubifs_err("invalid type (%d) in LPT node type %d", node_type, + type); + dbg_dump_stack(); + return -EINVAL; + } + return 0; +} + +/** + * unpack_pnode - unpack a pnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed pnode to unpack + * @pnode: pnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); + if (err) + return err; + if (c->big_lpt) + pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); + lprops->free <<= 3; + lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); + lprops->dirty <<= 3; + + if (ubifs_unpack_bits(&addr, &pos, 1)) + lprops->flags = LPROPS_INDEX; + else + lprops->flags = 0; + lprops->flags |= ubifs_categorize_lprops(c, lprops); + } + err = check_lpt_crc(buf, c->pnode_sz); + return err; +} + +/** + * unpack_nnode - unpack a nnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed nnode to unpack + * @nnode: nnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); + if (err) + return err; + if (c->big_lpt) + nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum; + + lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + + c->lpt_first; + if (lnum == c->lpt_last + 1) + lnum = 0; + nnode->nbranch[i].lnum = lnum; + nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, + c->lpt_offs_bits); + } + err = check_lpt_crc(buf, c->nnode_sz); + return err; +} + +/** + * unpack_ltab - unpack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_ltab(struct ubifs_info *c, void *buf) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); + if (err) + return err; + for (i = 0; i < c->lpt_lebs; i++) { + int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + + if (free < 0 || free > c->leb_size || dirty < 0 || + dirty > c->leb_size || free + dirty > c->leb_size) + return -EINVAL; + + c->ltab[i].free = free; + c->ltab[i].dirty = dirty; + c->ltab[i].tgc = 0; + c->ltab[i].cmt = 0; + } + err = check_lpt_crc(buf, c->ltab_sz); + return err; +} + +/** + * unpack_lsave - unpack the LPT's save table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_lsave(struct ubifs_info *c, void *buf) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int i, pos = 0, err; + + err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); + if (err) + return err; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits); + + if (lnum < c->main_first || lnum >= c->leb_cnt) + return -EINVAL; + c->lsave[i] = lnum; + } + err = check_lpt_crc(buf, c->lsave_sz); + return err; +} + +/** + * validate_nnode - validate a nnode. + * @c: UBIFS file-system description object + * @nnode: nnode to validate + * @parent: parent nnode (or NULL for the root nnode) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, + struct ubifs_nnode *parent, int iip) +{ + int i, lvl, max_offs; + + if (c->big_lpt) { + int num = calc_nnode_num_from_parent(c, parent, iip); + + if (nnode->num != num) + return -EINVAL; + } + lvl = parent ? parent->level - 1 : c->lpt_hght; + if (lvl < 1) + return -EINVAL; + if (lvl == 1) + max_offs = c->leb_size - c->pnode_sz; + else + max_offs = c->leb_size - c->nnode_sz; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int lnum = nnode->nbranch[i].lnum; + int offs = nnode->nbranch[i].offs; + + if (lnum == 0) { + if (offs != 0) + return -EINVAL; + continue; + } + if (lnum < c->lpt_first || lnum > c->lpt_last) + return -EINVAL; + if (offs < 0 || offs > max_offs) + return -EINVAL; + } + return 0; +} + +/** + * validate_pnode - validate a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to validate + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) +{ + int i; + + if (c->big_lpt) { + int num = calc_pnode_num_from_parent(c, parent, iip); + + if (pnode->num != num) + return -EINVAL; + } + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + int free = pnode->lprops[i].free; + int dirty = pnode->lprops[i].dirty; + + if (free < 0 || free > c->leb_size || free % c->min_io_size || + (free & 7)) + return -EINVAL; + if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) + return -EINVAL; + if (dirty + free > c->leb_size) + return -EINVAL; + } + return 0; +} + +/** + * set_pnode_lnum - set LEB numbers on a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to update + * + * This function calculates the LEB numbers for the LEB properties it contains + * based on the pnode number. + */ +static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + int i, lnum; + + lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (lnum >= c->leb_cnt) + return; + pnode->lprops[i].lnum = lnum++; + } +} + +/** + * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch = NULL; + struct ubifs_nnode *nnode = NULL; + void *buf = c->lpt_nod_buf; + int err, lnum, offs; + + if (parent) { + branch = &parent->nbranch[iip]; + lnum = branch->lnum; + offs = branch->offs; + } else { + lnum = c->lpt_lnum; + offs = c->lpt_offs; + } + nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } + if (lnum == 0) { + /* + * This nnode was not written which just means that the LEB + * properties in the subtree below it describe empty LEBs. We + * make the nnode as though we had read it, which in fact means + * doing almost nothing. + */ + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { + err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + if (err) + goto out; + err = unpack_nnode(c, buf, nnode); + if (err) + goto out; + } + err = validate_nnode(c, nnode, parent, iip); + if (err) + goto out; + if (!c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + if (parent) { + branch->nnode = nnode; + nnode->level = parent->level - 1; + } else { + c->nroot = nnode; + nnode->level = c->lpt_hght; + } + nnode->parent = parent; + nnode->iip = iip; + return 0; + +out: + ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + kfree(nnode); + return err; +} + +/** + * read_pnode - read a pnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode = NULL; + void *buf = c->lpt_nod_buf; + int err, lnum, offs; + + branch = &parent->nbranch[iip]; + lnum = branch->lnum; + offs = branch->offs; + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } + if (lnum == 0) { + /* + * This pnode was not written which just means that the LEB + * properties in it describe empty LEBs. We make the pnode as + * though we had read it. + */ + int i; + + if (c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = c->leb_size; + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { + err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + if (err) + goto out; + err = unpack_pnode(c, buf, pnode); + if (err) + goto out; + } + err = validate_pnode(c, pnode, parent, iip); + if (err) + goto out; + if (!c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + branch->pnode = pnode; + pnode->parent = parent; + pnode->iip = iip; + set_pnode_lnum(c, pnode); + c->pnodes_have += 1; + return 0; + +out: + ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); + dbg_dump_pnode(c, pnode, parent, iip); + dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + kfree(pnode); + return err; +} + +/** + * read_ltab - read LPT's own lprops table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_ltab(struct ubifs_info *c) +{ + int err; + void *buf; + + buf = vmalloc(c->ltab_sz); + if (!buf) + return -ENOMEM; + err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + if (err) + goto out; + err = unpack_ltab(c, buf); +out: + vfree(buf); + return err; +} + +/** + * read_lsave - read LPT's save table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_lsave(struct ubifs_info *c) +{ + int err, i; + void *buf; + + buf = vmalloc(c->lsave_sz); + if (!buf) + return -ENOMEM; + err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); + if (err) + goto out; + err = unpack_lsave(c, buf); + if (err) + goto out; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = c->lsave[i]; + + /* + * Due to automatic resizing, the values in the lsave table + * could be beyond the volume size - just ignore them. + */ + if (lnum >= c->leb_cnt) + continue; + ubifs_lpt_lookup(c, lnum); + } +out: + vfree(buf); + return err; +} + +/** + * ubifs_get_nnode - get a nnode. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_nnode *nnode; + int err; + + branch = &parent->nbranch[iip]; + nnode = branch->nnode; + if (nnode) + return nnode; + err = ubifs_read_nnode(c, parent, iip); + if (err) + return ERR_PTR(err); + return branch->nnode; +} + +/** + * ubifs_get_pnode - get a pnode. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode; + int err; + + branch = &parent->nbranch[iip]; + pnode = branch->pnode; + if (pnode) + return pnode; + err = read_pnode(c, parent, iip); + if (err) + return ERR_PTR(err); + update_cats(c, branch->pnode); + return branch->pnode; +} + +/** + * ubifs_lpt_lookup - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) +{ + int err, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, + pnode->lprops[iip].flags); + return &pnode->lprops[iip]; +} + +/** + * dirty_cow_nnode - ensure a nnode is not being committed. + * @c: UBIFS file-system description object + * @nnode: nnode to check + * + * Returns dirtied nnode on success or negative error code on failure. + */ +static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, + struct ubifs_nnode *nnode) +{ + struct ubifs_nnode *n; + int i; + + if (!test_bit(COW_CNODE, &nnode->flags)) { + /* nnode is not being committed */ + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + } + return nnode; + } + + /* nnode is being committed, so copy it */ + n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + if (unlikely(!n)) + return ERR_PTR(-ENOMEM); + + memcpy(n, nnode, sizeof(struct ubifs_nnode)); + n->cnext = NULL; + __set_bit(DIRTY_CNODE, &n->flags); + __clear_bit(COW_CNODE, &n->flags); + + /* The children now have new parent */ + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_nbranch *branch = &n->nbranch[i]; + + if (branch->cnode) + branch->cnode->parent = n; + } + + ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); + __set_bit(OBSOLETE_CNODE, &nnode->flags); + + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + if (nnode->parent) + nnode->parent->nbranch[n->iip].nnode = n; + else + c->nroot = n; + return n; +} + +/** + * dirty_cow_pnode - ensure a pnode is not being committed. + * @c: UBIFS file-system description object + * @pnode: pnode to check + * + * Returns dirtied pnode on success or negative error code on failure. + */ +static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + struct ubifs_pnode *p; + + if (!test_bit(COW_CNODE, &pnode->flags)) { + /* pnode is not being committed */ + if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + } + return pnode; + } + + /* pnode is being committed, so copy it */ + p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + if (unlikely(!p)) + return ERR_PTR(-ENOMEM); + + memcpy(p, pnode, sizeof(struct ubifs_pnode)); + p->cnext = NULL; + __set_bit(DIRTY_CNODE, &p->flags); + __clear_bit(COW_CNODE, &p->flags); + replace_cats(c, pnode, p); + + ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); + __set_bit(OBSOLETE_CNODE, &pnode->flags); + + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + pnode->parent->nbranch[p->iip].pnode = p; + return p; +} + +/** + * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) +{ + int err, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + pnode = dirty_cow_pnode(c, pnode); + if (IS_ERR(pnode)) + return ERR_PTR(PTR_ERR(pnode)); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, + pnode->lprops[iip].flags); + ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); + return &pnode->lprops[iip]; +} + +/** + * lpt_init_rd - initialize the LPT for reading. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_rd(struct ubifs_info *c) +{ + int err, i; + + c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!c->ltab) + return -ENOMEM; + + i = max_t(int, c->nnode_sz, c->pnode_sz); + c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); + if (!c->lpt_nod_buf) + return -ENOMEM; + + for (i = 0; i < LPROPS_HEAP_CNT; i++) { + c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, + GFP_KERNEL); + if (!c->lpt_heap[i].arr) + return -ENOMEM; + c->lpt_heap[i].cnt = 0; + c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; + } + + c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); + if (!c->dirty_idx.arr) + return -ENOMEM; + c->dirty_idx.cnt = 0; + c->dirty_idx.max_cnt = LPT_HEAP_SZ; + + err = read_ltab(c); + if (err) + return err; + + dbg_lp("space_bits %d", c->space_bits); + dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); + dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); + dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); + dbg_lp("pcnt_bits %d", c->pcnt_bits); + dbg_lp("lnum_bits %d", c->lnum_bits); + dbg_lp("pnode_sz %d", c->pnode_sz); + dbg_lp("nnode_sz %d", c->nnode_sz); + dbg_lp("ltab_sz %d", c->ltab_sz); + dbg_lp("lsave_sz %d", c->lsave_sz); + dbg_lp("lsave_cnt %d", c->lsave_cnt); + dbg_lp("lpt_hght %d", c->lpt_hght); + dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + + return 0; +} + +/** + * lpt_init_wr - initialize the LPT for writing. + * @c: UBIFS file-system description object + * + * 'lpt_init_rd()' must have been called already. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_wr(struct ubifs_info *c) +{ + int err, i; + + c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + if (!c->ltab_cmt) + return -ENOMEM; + + c->lpt_buf = vmalloc(c->leb_size); + if (!c->lpt_buf) + return -ENOMEM; + + if (c->big_lpt) { + c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS); + if (!c->lsave) + return -ENOMEM; + err = read_lsave(c); + if (err) + return err; + } + + for (i = 0; i < c->lpt_lebs; i++) + if (c->ltab[i].free == c->leb_size) { + err = ubifs_leb_unmap(c, i + c->lpt_first); + if (err) + return err; + } + + return 0; +} + +/** + * ubifs_lpt_init - initialize the LPT. + * @c: UBIFS file-system description object + * @rd: whether to initialize lpt for reading + * @wr: whether to initialize lpt for writing + * + * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true + * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is + * true. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) +{ + int err; + + if (rd) { + err = lpt_init_rd(c); + if (err) + return err; + } + + if (wr) { + err = lpt_init_wr(c); + if (err) + return err; + } + + return 0; +} + +/** + * struct lpt_scan_node - somewhere to put nodes while we scan LPT. + * @nnode: where to keep a nnode + * @pnode: where to keep a pnode + * @cnode: where to keep a cnode + * @in_tree: is the node in the tree in memory + * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in + * the tree + * @ptr.pnode: ditto for pnode + * @ptr.cnode: ditto for cnode + */ +struct lpt_scan_node { + union { + struct ubifs_nnode nnode; + struct ubifs_pnode pnode; + struct ubifs_cnode cnode; + }; + int in_tree; + union { + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct ubifs_cnode *cnode; + } ptr; +}; + +/** + * scan_get_nnode - for the scan, get a nnode from either the tree or flash. + * @c: the UBIFS file-system description object + * @path: where to put the nnode + * @parent: parent of the nnode + * @iip: index in parent of the nnode + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, + struct lpt_scan_node *path, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_nnode *nnode; + void *buf = c->lpt_nod_buf; + int err; + + branch = &parent->nbranch[iip]; + nnode = branch->nnode; + if (nnode) { + path->in_tree = 1; + path->ptr.nnode = nnode; + return nnode; + } + nnode = &path->nnode; + path->in_tree = 0; + path->ptr.nnode = nnode; + memset(nnode, 0, sizeof(struct ubifs_nnode)); + if (branch->lnum == 0) { + /* + * This nnode was not written which just means that the LEB + * properties in the subtree below it describe empty LEBs. We + * make the nnode as though we had read it, which in fact means + * doing almost nothing. + */ + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->nnode_sz); + if (err) + return ERR_PTR(err); + err = unpack_nnode(c, buf, nnode); + if (err) + return ERR_PTR(err); + } + err = validate_nnode(c, nnode, parent, iip); + if (err) + return ERR_PTR(err); + if (!c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + nnode->level = parent->level - 1; + nnode->parent = parent; + nnode->iip = iip; + return nnode; +} + +/** + * scan_get_pnode - for the scan, get a pnode from either the tree or flash. + * @c: the UBIFS file-system description object + * @path: where to put the pnode + * @parent: parent of the pnode + * @iip: index in parent of the pnode + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, + struct lpt_scan_node *path, + struct ubifs_nnode *parent, int iip) +{ + struct ubifs_nbranch *branch; + struct ubifs_pnode *pnode; + void *buf = c->lpt_nod_buf; + int err; + + branch = &parent->nbranch[iip]; + pnode = branch->pnode; + if (pnode) { + path->in_tree = 1; + path->ptr.pnode = pnode; + return pnode; + } + pnode = &path->pnode; + path->in_tree = 0; + path->ptr.pnode = pnode; + memset(pnode, 0, sizeof(struct ubifs_pnode)); + if (branch->lnum == 0) { + /* + * This pnode was not written which just means that the LEB + * properties in it describe empty LEBs. We make the pnode as + * though we had read it. + */ + int i; + + if (c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops * const lprops = &pnode->lprops[i]; + + lprops->free = c->leb_size; + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { + ubifs_assert(branch->lnum >= c->lpt_first && + branch->lnum <= c->lpt_last); + ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); + err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, + c->pnode_sz); + if (err) + return ERR_PTR(err); + err = unpack_pnode(c, buf, pnode); + if (err) + return ERR_PTR(err); + } + err = validate_pnode(c, pnode, parent, iip); + if (err) + return ERR_PTR(err); + if (!c->big_lpt) + pnode->num = calc_pnode_num_from_parent(c, parent, iip); + pnode->parent = parent; + pnode->iip = iip; + set_pnode_lnum(c, pnode); + return pnode; +} + +/** + * ubifs_lpt_scan_nolock - scan the LPT. + * @c: the UBIFS file-system description object + * @start_lnum: LEB number from which to start scanning + * @end_lnum: LEB number at which to stop scanning + * @scan_cb: callback function called for each lprops + * @data: data to be passed to the callback function + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, + ubifs_lpt_scan_callback scan_cb, void *data) +{ + int err = 0, i, h, iip, shft; + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct lpt_scan_node *path; + + if (start_lnum == -1) { + start_lnum = end_lnum + 1; + if (start_lnum >= c->leb_cnt) + start_lnum = c->main_first; + } + + ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt); + ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt); + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return err; + } + + path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1), + GFP_NOFS); + if (!path) + return -ENOMEM; + + path[0].ptr.nnode = c->nroot; + path[0].in_tree = 1; +again: + /* Descend to the pnode containing start_lnum */ + nnode = c->nroot; + i = start_lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = scan_get_nnode(c, path + h, nnode, iip); + if (IS_ERR(nnode)) { + err = PTR_ERR(nnode); + goto out; + } + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = scan_get_pnode(c, path + h, nnode, iip); + if (IS_ERR(pnode)) { + err = PTR_ERR(pnode); + goto out; + } + iip = (i & (UBIFS_LPT_FANOUT - 1)); + + /* Loop for each lprops */ + while (1) { + struct ubifs_lprops *lprops = &pnode->lprops[iip]; + int ret, lnum = lprops->lnum; + + ret = scan_cb(c, lprops, path[h].in_tree, data); + if (ret < 0) { + err = ret; + goto out; + } + if (ret & LPT_SCAN_ADD) { + /* Add all the nodes in path to the tree in memory */ + for (h = 1; h < c->lpt_hght; h++) { + const size_t sz = sizeof(struct ubifs_nnode); + struct ubifs_nnode *parent; + + if (path[h].in_tree) + continue; + nnode = kmalloc(sz, GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } + memcpy(nnode, &path[h].nnode, sz); + parent = nnode->parent; + parent->nbranch[nnode->iip].nnode = nnode; + path[h].ptr.nnode = nnode; + path[h].in_tree = 1; + path[h + 1].cnode.parent = nnode; + } + if (path[h].in_tree) + ubifs_ensure_cat(c, lprops); + else { + const size_t sz = sizeof(struct ubifs_pnode); + struct ubifs_nnode *parent; + + pnode = kmalloc(sz, GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } + memcpy(pnode, &path[h].pnode, sz); + parent = pnode->parent; + parent->nbranch[pnode->iip].pnode = pnode; + path[h].ptr.pnode = pnode; + path[h].in_tree = 1; + update_cats(c, pnode); + c->pnodes_have += 1; + } + err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *) + c->nroot, 0, 0); + if (err) + goto out; + err = dbg_check_cats(c); + if (err) + goto out; + } + if (ret & LPT_SCAN_STOP) { + err = 0; + break; + } + /* Get the next lprops */ + if (lnum == end_lnum) { + /* + * We got to the end without finding what we were + * looking for + */ + err = -ENOSPC; + goto out; + } + if (lnum + 1 >= c->leb_cnt) { + /* Wrap-around to the beginning */ + start_lnum = c->main_first; + goto again; + } + if (iip + 1 < UBIFS_LPT_FANOUT) { + /* Next lprops is in the same pnode */ + iip += 1; + continue; + } + /* We need to get the next pnode. Go up until we can go right */ + iip = pnode->iip; + while (1) { + h -= 1; + ubifs_assert(h >= 0); + nnode = path[h].ptr.nnode; + if (iip + 1 < UBIFS_LPT_FANOUT) + break; + iip = nnode->iip; + } + /* Go right */ + iip += 1; + /* Descend to the pnode */ + h += 1; + for (; h < c->lpt_hght; h++) { + nnode = scan_get_nnode(c, path + h, nnode, iip); + if (IS_ERR(nnode)) { + err = PTR_ERR(nnode); + goto out; + } + iip = 0; + } + pnode = scan_get_pnode(c, path + h, nnode, iip); + if (IS_ERR(pnode)) { + err = PTR_ERR(pnode); + goto out; + } + iip = 0; + } +out: + kfree(path); + return err; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_chk_pnode - check a pnode. + * @c: the UBIFS file-system description object + * @pnode: pnode to check + * @col: pnode column + * + * This function returns %0 on success and a negative error code on failure. + */ +static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + int col) +{ + int i; + + if (pnode->num != col) { + dbg_err("pnode num %d expected %d parent num %d iip %d", + pnode->num, col, pnode->parent->num, pnode->iip); + return -EINVAL; + } + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp, *lprops = &pnode->lprops[i]; + int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i + + c->main_first; + int found, cat = lprops->flags & LPROPS_CAT_MASK; + struct ubifs_lpt_heap *heap; + struct list_head *list = NULL; + + if (lnum >= c->leb_cnt) + continue; + if (lprops->lnum != lnum) { + dbg_err("bad LEB number %d expected %d", + lprops->lnum, lnum); + return -EINVAL; + } + if (lprops->flags & LPROPS_TAKEN) { + if (cat != LPROPS_UNCAT) { + dbg_err("LEB %d taken but not uncat %d", + lprops->lnum, cat); + return -EINVAL; + } + continue; + } + if (lprops->flags & LPROPS_INDEX) { + switch (cat) { + case LPROPS_UNCAT: + case LPROPS_DIRTY_IDX: + case LPROPS_FRDI_IDX: + break; + default: + dbg_err("LEB %d index but cat %d", + lprops->lnum, cat); + return -EINVAL; + } + } else { + switch (cat) { + case LPROPS_UNCAT: + case LPROPS_DIRTY: + case LPROPS_FREE: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + break; + default: + dbg_err("LEB %d not index but cat %d", + lprops->lnum, cat); + return -EINVAL; + } + } + switch (cat) { + case LPROPS_UNCAT: + list = &c->uncat_list; + break; + case LPROPS_EMPTY: + list = &c->empty_list; + break; + case LPROPS_FREEABLE: + list = &c->freeable_list; + break; + case LPROPS_FRDI_IDX: + list = &c->frdi_idx_list; + break; + } + found = 0; + switch (cat) { + case LPROPS_DIRTY: + case LPROPS_DIRTY_IDX: + case LPROPS_FREE: + heap = &c->lpt_heap[cat - 1]; + if (lprops->hpos < heap->cnt && + heap->arr[lprops->hpos] == lprops) + found = 1; + break; + case LPROPS_UNCAT: + case LPROPS_EMPTY: + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + list_for_each_entry(lp, list, list) + if (lprops == lp) { + found = 1; + break; + } + break; + } + if (!found) { + dbg_err("LEB %d cat %d not found in cat heap/list", + lprops->lnum, cat); + return -EINVAL; + } + switch (cat) { + case LPROPS_EMPTY: + if (lprops->free != c->leb_size) { + dbg_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); + return -EINVAL; + } + case LPROPS_FREEABLE: + case LPROPS_FRDI_IDX: + if (lprops->free + lprops->dirty != c->leb_size) { + dbg_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); + return -EINVAL; + } + } + } + return 0; +} + +/** + * dbg_check_lpt_nodes - check nnodes and pnodes. + * @c: the UBIFS file-system description object + * @cnode: next cnode (nnode or pnode) to check + * @row: row of cnode (root is zero) + * @col: column of cnode (leftmost is zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col) +{ + struct ubifs_nnode *nnode, *nn; + struct ubifs_cnode *cn; + int num, iip = 0, err; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + + while (cnode) { + ubifs_assert(row >= 0); + nnode = cnode->parent; + if (cnode->level) { + /* cnode is a nnode */ + num = calc_nnode_num(row, col); + if (cnode->num != num) { + dbg_err("nnode num %d expected %d " + "parent num %d iip %d", cnode->num, num, + (nnode ? nnode->num : 0), cnode->iip); + return -EINVAL; + } + nn = (struct ubifs_nnode *)cnode; + while (iip < UBIFS_LPT_FANOUT) { + cn = nn->nbranch[iip].cnode; + if (cn) { + /* Go down */ + row += 1; + col <<= UBIFS_LPT_FANOUT_SHIFT; + col += iip; + iip = 0; + cnode = cn; + break; + } + /* Go right */ + iip += 1; + } + if (iip < UBIFS_LPT_FANOUT) + continue; + } else { + struct ubifs_pnode *pnode; + + /* cnode is a pnode */ + pnode = (struct ubifs_pnode *)cnode; + err = dbg_chk_pnode(c, pnode, col); + if (err) + return err; + } + /* Go up and to the right */ + row -= 1; + col >>= UBIFS_LPT_FANOUT_SHIFT; + iip = cnode->iip + 1; + cnode = (struct ubifs_cnode *)nnode; + } + return 0; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c new file mode 100644 index 000000000000..5f0b83e20af6 --- /dev/null +++ b/fs/ubifs/lpt_commit.c @@ -0,0 +1,1648 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements commit-related functionality of the LEB properties + * subsystem. + */ + +#include +#include "ubifs.h" + +/** + * first_dirty_cnode - find first dirty cnode. + * @c: UBIFS file-system description object + * @nnode: nnode at which to start + * + * This function returns the first dirty cnode or %NULL if there is not one. + */ +static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode) +{ + ubifs_assert(nnode); + while (1) { + int i, cont = 0; + + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_cnode *cnode; + + cnode = nnode->nbranch[i].cnode; + if (cnode && + test_bit(DIRTY_CNODE, &cnode->flags)) { + if (cnode->level == 0) + return cnode; + nnode = (struct ubifs_nnode *)cnode; + cont = 1; + break; + } + } + if (!cont) + return (struct ubifs_cnode *)nnode; + } +} + +/** + * next_dirty_cnode - find next dirty cnode. + * @cnode: cnode from which to begin searching + * + * This function returns the next dirty cnode or %NULL if there is not one. + */ +static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode) +{ + struct ubifs_nnode *nnode; + int i; + + ubifs_assert(cnode); + nnode = cnode->parent; + if (!nnode) + return NULL; + for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) { + cnode = nnode->nbranch[i].cnode; + if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) { + if (cnode->level == 0) + return cnode; /* cnode is a pnode */ + /* cnode is a nnode */ + return first_dirty_cnode((struct ubifs_nnode *)cnode); + } + } + return (struct ubifs_cnode *)nnode; +} + +/** + * get_cnodes_to_commit - create list of dirty cnodes to commit. + * @c: UBIFS file-system description object + * + * This function returns the number of cnodes to commit. + */ +static int get_cnodes_to_commit(struct ubifs_info *c) +{ + struct ubifs_cnode *cnode, *cnext; + int cnt = 0; + + if (!c->nroot) + return 0; + + if (!test_bit(DIRTY_CNODE, &c->nroot->flags)) + return 0; + + c->lpt_cnext = first_dirty_cnode(c->nroot); + cnode = c->lpt_cnext; + if (!cnode) + return 0; + cnt += 1; + while (1) { + ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); + __set_bit(COW_ZNODE, &cnode->flags); + cnext = next_dirty_cnode(cnode); + if (!cnext) { + cnode->cnext = c->lpt_cnext; + break; + } + cnode->cnext = cnext; + cnode = cnext; + cnt += 1; + } + dbg_cmt("committing %d cnodes", cnt); + dbg_lp("committing %d cnodes", cnt); + ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt); + return cnt; +} + +/** + * upd_ltab - update LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @free: amount of free space + * @dirty: amount of dirty space to add + */ +static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty) +{ + dbg_lp("LEB %d free %d dirty %d to %d +%d", + lnum, c->ltab[lnum - c->lpt_first].free, + c->ltab[lnum - c->lpt_first].dirty, free, dirty); + ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); + c->ltab[lnum - c->lpt_first].free = free; + c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * alloc_lpt_leb - allocate an LPT LEB that is empty. + * @c: UBIFS file-system description object + * @lnum: LEB number is passed and returned here + * + * This function finds the next empty LEB in the ltab starting from @lnum. If a + * an empty LEB is found it is returned in @lnum and the function returns %0. + * Otherwise the function returns -ENOSPC. Note however, that LPT is designed + * never to run out of space. + */ +static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) +{ + int i, n; + + n = *lnum - c->lpt_first + 1; + for (i = n; i < c->lpt_lebs; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (c->ltab[i].free == c->leb_size) { + c->ltab[i].cmt = 1; + *lnum = i + c->lpt_first; + return 0; + } + } + + for (i = 0; i < n; i++) { + if (c->ltab[i].tgc || c->ltab[i].cmt) + continue; + if (c->ltab[i].free == c->leb_size) { + c->ltab[i].cmt = 1; + *lnum = i + c->lpt_first; + return 0; + } + } + dbg_err("last LEB %d", *lnum); + dump_stack(); + return -ENOSPC; +} + +/** + * layout_cnodes - layout cnodes for commit. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_cnodes(struct ubifs_info *c) +{ + int lnum, offs, len, alen, done_lsave, done_ltab, err; + struct ubifs_cnode *cnode; + + cnode = c->lpt_cnext; + if (!cnode) + return 0; + lnum = c->nhead_lnum; + offs = c->nhead_offs; + /* Try to place lsave and ltab nicely */ + done_lsave = !c->big_lpt; + done_ltab = 0; + if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + } + + if (offs + c->ltab_sz <= c->leb_size) { + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + } + + do { + if (cnode->level) { + len = c->nnode_sz; + c->dirty_nn_cnt -= 1; + } else { + len = c->pnode_sz; + c->dirty_pn_cnt -= 1; + } + while (offs + len > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + /* Try to place lsave and ltab nicely */ + if (!done_lsave) { + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + continue; + } + if (!done_ltab) { + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + continue; + } + break; + } + if (cnode->parent) { + cnode->parent->nbranch[cnode->iip].lnum = lnum; + cnode->parent->nbranch[cnode->iip].offs = offs; + } else { + c->lpt_lnum = lnum; + c->lpt_offs = offs; + } + offs += len; + cnode = cnode->cnext; + } while (cnode && cnode != c->lpt_cnext); + + /* Make sure to place LPT's save table */ + if (!done_lsave) { + if (offs + c->lsave_sz > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + } + done_lsave = 1; + c->lsave_lnum = lnum; + c->lsave_offs = offs; + offs += c->lsave_sz; + } + + /* Make sure to place LPT's own lprops table */ + if (!done_ltab) { + if (offs + c->ltab_sz > c->leb_size) { + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + err = alloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + } + done_ltab = 1; + c->ltab_lnum = lnum; + c->ltab_offs = offs; + offs += c->ltab_sz; + } + + alen = ALIGN(offs, c->min_io_size); + upd_ltab(c, lnum, c->leb_size - alen, alen - offs); + return 0; +} + +/** + * realloc_lpt_leb - allocate an LPT LEB that is empty. + * @c: UBIFS file-system description object + * @lnum: LEB number is passed and returned here + * + * This function duplicates exactly the results of the function alloc_lpt_leb. + * It is used during end commit to reallocate the same LEB numbers that were + * allocated by alloc_lpt_leb during start commit. + * + * This function finds the next LEB that was allocated by the alloc_lpt_leb + * function starting from @lnum. If a LEB is found it is returned in @lnum and + * the function returns %0. Otherwise the function returns -ENOSPC. + * Note however, that LPT is designed never to run out of space. + */ +static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) +{ + int i, n; + + n = *lnum - c->lpt_first + 1; + for (i = n; i < c->lpt_lebs; i++) + if (c->ltab[i].cmt) { + c->ltab[i].cmt = 0; + *lnum = i + c->lpt_first; + return 0; + } + + for (i = 0; i < n; i++) + if (c->ltab[i].cmt) { + c->ltab[i].cmt = 0; + *lnum = i + c->lpt_first; + return 0; + } + dbg_err("last LEB %d", *lnum); + dump_stack(); + return -ENOSPC; +} + +/** + * write_cnodes - write cnodes for commit. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_cnodes(struct ubifs_info *c) +{ + int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave; + struct ubifs_cnode *cnode; + void *buf = c->lpt_buf; + + cnode = c->lpt_cnext; + if (!cnode) + return 0; + lnum = c->nhead_lnum; + offs = c->nhead_offs; + from = offs; + /* Ensure empty LEB is unmapped */ + if (offs == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + /* Try to place lsave and ltab nicely */ + done_lsave = !c->big_lpt; + done_ltab = 0; + if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + } + + if (offs + c->ltab_sz <= c->leb_size) { + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + } + + /* Loop for each cnode */ + do { + if (cnode->level) + len = c->nnode_sz; + else + len = c->pnode_sz; + while (offs + len > c->leb_size) { + wlen = offs - from; + if (wlen) { + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, + alen, UBI_SHORTTERM); + if (err) + return err; + } + err = realloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + from = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + /* Try to place lsave and ltab nicely */ + if (!done_lsave) { + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + continue; + } + if (!done_ltab) { + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + continue; + } + break; + } + if (cnode->level) + ubifs_pack_nnode(c, buf + offs, + (struct ubifs_nnode *)cnode); + else + ubifs_pack_pnode(c, buf + offs, + (struct ubifs_pnode *)cnode); + /* + * The reason for the barriers is the same as in case of TNC. + * See comment in 'write_index()'. 'dirty_cow_nnode()' and + * 'dirty_cow_pnode()' are the functions for which this is + * important. + */ + clear_bit(DIRTY_CNODE, &cnode->flags); + smp_mb__before_clear_bit(); + clear_bit(COW_ZNODE, &cnode->flags); + smp_mb__after_clear_bit(); + offs += len; + cnode = cnode->cnext; + } while (cnode && cnode != c->lpt_cnext); + + /* Make sure to place LPT's save table */ + if (!done_lsave) { + if (offs + c->lsave_sz > c->leb_size) { + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen, + UBI_SHORTTERM); + if (err) + return err; + err = realloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + done_lsave = 1; + ubifs_pack_lsave(c, buf + offs, c->lsave); + offs += c->lsave_sz; + } + + /* Make sure to place LPT's own lprops table */ + if (!done_ltab) { + if (offs + c->ltab_sz > c->leb_size) { + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen, + UBI_SHORTTERM); + if (err) + return err; + err = realloc_lpt_leb(c, &lnum); + if (err) + return err; + offs = 0; + ubifs_assert(lnum >= c->lpt_first && + lnum <= c->lpt_last); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + done_ltab = 1; + ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); + offs += c->ltab_sz; + } + + /* Write remaining data in buffer */ + wlen = offs - from; + alen = ALIGN(wlen, c->min_io_size); + memset(buf + offs, 0xff, alen - wlen); + err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); + if (err) + return err; + c->nhead_lnum = lnum; + c->nhead_offs = ALIGN(offs, c->min_io_size); + + dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); + dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); + dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) + dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + return 0; +} + +/** + * next_pnode - find next pnode. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * This function returns the next pnode or %NULL if there are no more pnodes. + */ +static struct ubifs_pnode *next_pnode(struct ubifs_info *c, + struct ubifs_pnode *pnode) +{ + struct ubifs_nnode *nnode; + int iip; + + /* Try to go right */ + nnode = pnode->parent; + iip = pnode->iip + 1; + if (iip < UBIFS_LPT_FANOUT) { + /* We assume here that LEB zero is never an LPT LEB */ + if (nnode->nbranch[iip].lnum) + return ubifs_get_pnode(c, nnode, iip); + else + return NULL; + } + + /* Go up while can't go right */ + do { + iip = nnode->iip + 1; + nnode = nnode->parent; + if (!nnode) + return NULL; + /* We assume here that LEB zero is never an LPT LEB */ + } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); + + /* Go right */ + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return (void *)nnode; + + /* Go down to level 1 */ + while (nnode->level > 1) { + nnode = ubifs_get_nnode(c, nnode, 0); + if (IS_ERR(nnode)) + return (void *)nnode; + } + + return ubifs_get_pnode(c, nnode, 0); +} + +/** + * pnode_lookup - lookup a pnode in the LPT. + * @c: UBIFS file-system description object + * @i: pnode number (0 to main_lebs - 1) + * + * This function returns a pointer to the pnode on success or a negative + * error code on failure. + */ +static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) +{ + int err, h, iip, shft; + struct ubifs_nnode *nnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + i <<= UBIFS_LPT_FANOUT_SHIFT; + nnode = c->nroot; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return ERR_PTR(PTR_ERR(nnode)); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + return ubifs_get_pnode(c, nnode, iip); +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, + c->pnode_sz); +} + +/** + * do_make_pnode_dirty - mark a pnode dirty. + * @c: UBIFS file-system description object + * @pnode: pnode to mark dirty + */ +static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ + /* Assumes cnext list is empty i.e. not called during commit */ + if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { + struct ubifs_nnode *nnode; + + c->dirty_pn_cnt += 1; + add_pnode_dirt(c, pnode); + /* Mark parent and ancestors dirty too */ + nnode = pnode->parent; + while (nnode) { + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + nnode = nnode->parent; + } else + break; + } + } +} + +/** + * make_tree_dirty - mark the entire LEB properties tree dirty. + * @c: UBIFS file-system description object + * + * This function is used by the "small" LPT model to cause the entire LEB + * properties tree to be written. The "small" LPT model does not use LPT + * garbage collection because it is more efficient to write the entire tree + * (because it is small). + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_tree_dirty(struct ubifs_info *c) +{ + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, 0); + while (pnode) { + do_make_pnode_dirty(c, pnode); + pnode = next_pnode(c, pnode); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + } + return 0; +} + +/** + * need_write_all - determine if the LPT area is running out of free space. + * @c: UBIFS file-system description object + * + * This function returns %1 if the LPT area is running out of free space and %0 + * if it is not. + */ +static int need_write_all(struct ubifs_info *c) +{ + long long free = 0; + int i; + + for (i = 0; i < c->lpt_lebs; i++) { + if (i + c->lpt_first == c->nhead_lnum) + free += c->leb_size - c->nhead_offs; + else if (c->ltab[i].free == c->leb_size) + free += c->leb_size; + else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size) + free += c->leb_size; + } + /* Less than twice the size left */ + if (free <= c->lpt_sz * 2) + return 1; + return 0; +} + +/** + * lpt_tgc_start - start trivial garbage collection of LPT LEBs. + * @c: UBIFS file-system description object + * + * LPT trivial garbage collection is where a LPT LEB contains only dirty and + * free space and so may be reused as soon as the next commit is completed. + * This function is called during start commit to mark LPT LEBs for trivial GC. + */ +static void lpt_tgc_start(struct ubifs_info *c) +{ + int i; + + for (i = 0; i < c->lpt_lebs; i++) { + if (i + c->lpt_first == c->nhead_lnum) + continue; + if (c->ltab[i].dirty > 0 && + c->ltab[i].free + c->ltab[i].dirty == c->leb_size) { + c->ltab[i].tgc = 1; + c->ltab[i].free = c->leb_size; + c->ltab[i].dirty = 0; + dbg_lp("LEB %d", i + c->lpt_first); + } + } +} + +/** + * lpt_tgc_end - end trivial garbage collection of LPT LEBs. + * @c: UBIFS file-system description object + * + * LPT trivial garbage collection is where a LPT LEB contains only dirty and + * free space and so may be reused as soon as the next commit is completed. + * This function is called after the commit is completed (master node has been + * written) and unmaps LPT LEBs that were marked for trivial GC. + */ +static int lpt_tgc_end(struct ubifs_info *c) +{ + int i, err; + + for (i = 0; i < c->lpt_lebs; i++) + if (c->ltab[i].tgc) { + err = ubifs_leb_unmap(c, i + c->lpt_first); + if (err) + return err; + c->ltab[i].tgc = 0; + dbg_lp("LEB %d", i + c->lpt_first); + } + return 0; +} + +/** + * populate_lsave - fill the lsave array with important LEB numbers. + * @c: the UBIFS file-system description object + * + * This function is only called for the "big" model. It records a small number + * of LEB numbers of important LEBs. Important LEBs are ones that are (from + * most important to least important): empty, freeable, freeable index, dirty + * index, dirty or free. Upon mount, we read this list of LEB numbers and bring + * their pnodes into memory. That will stop us from having to scan the LPT + * straight away. For the "small" model we assume that scanning the LPT is no + * big deal. + */ +static void populate_lsave(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + int i, cnt = 0; + + ubifs_assert(c->big_lpt); + if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } + list_for_each_entry(lprops, &c->empty_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + list_for_each_entry(lprops, &c->freeable_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + list_for_each_entry(lprops, &c->frdi_idx_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) { + c->lsave[cnt++] = heap->arr[i]->lnum; + if (cnt >= c->lsave_cnt) + return; + } + /* Fill it up completely */ + while (cnt < c->lsave_cnt) + c->lsave[cnt++] = c->main_first; +} + +/** + * nnode_lookup - lookup a nnode in the LPT. + * @c: UBIFS file-system description object + * @i: nnode number + * + * This function returns a pointer to the nnode on success or a negative + * error code on failure. + */ +static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i) +{ + int err, iip; + struct ubifs_nnode *nnode; + + if (!c->nroot) { + err = ubifs_read_nnode(c, NULL, 0); + if (err) + return ERR_PTR(err); + } + nnode = c->nroot; + while (1) { + iip = i & (UBIFS_LPT_FANOUT - 1); + i >>= UBIFS_LPT_FANOUT_SHIFT; + if (!i) + break; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) + return nnode; + } + return nnode; +} + +/** + * make_nnode_dirty - find a nnode and, if found, make it dirty. + * @c: UBIFS file-system description object + * @node_num: nnode number of nnode to make dirty + * @lnum: LEB number where nnode was written + * @offs: offset where nnode was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum, + int offs) +{ + struct ubifs_nnode *nnode; + + nnode = nnode_lookup(c, node_num); + if (IS_ERR(nnode)) + return PTR_ERR(nnode); + if (nnode->parent) { + struct ubifs_nbranch *branch; + + branch = &nnode->parent->nbranch[nnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + return 0; /* nnode is obsolete */ + } else if (c->lpt_lnum != lnum || c->lpt_offs != offs) + return 0; /* nnode is obsolete */ + /* Assumes cnext list is empty i.e. not called during commit */ + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + /* Mark parent and ancestors dirty too */ + nnode = nnode->parent; + while (nnode) { + if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { + c->dirty_nn_cnt += 1; + ubifs_add_nnode_dirt(c, nnode); + nnode = nnode->parent; + } else + break; + } + } + return 0; +} + +/** + * make_pnode_dirty - find a pnode and, if found, make it dirty. + * @c: UBIFS file-system description object + * @node_num: pnode number of pnode to make dirty + * @lnum: LEB number where pnode was written + * @offs: offset where pnode was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum, + int offs) +{ + struct ubifs_pnode *pnode; + struct ubifs_nbranch *branch; + + pnode = pnode_lookup(c, node_num); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + branch = &pnode->parent->nbranch[pnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + return 0; + do_make_pnode_dirty(c, pnode); + return 0; +} + +/** + * make_ltab_dirty - make ltab node dirty. + * @c: UBIFS file-system description object + * @lnum: LEB number where ltab was written + * @offs: offset where ltab was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->ltab_lnum || offs != c->ltab_offs) + return 0; /* This ltab node is obsolete */ + if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { + c->lpt_drty_flgs |= LTAB_DIRTY; + ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); + } + return 0; +} + +/** + * make_lsave_dirty - make lsave node dirty. + * @c: UBIFS file-system description object + * @lnum: LEB number where lsave was written + * @offs: offset where lsave was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->lsave_lnum || offs != c->lsave_offs) + return 0; /* This lsave node is obsolete */ + if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } + return 0; +} + +/** + * make_node_dirty - make node dirty. + * @c: UBIFS file-system description object + * @node_type: LPT node type + * @node_num: node number + * @lnum: LEB number where node was written + * @offs: offset where node was written + * + * This function is used by LPT garbage collection. LPT garbage collection is + * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection + * simply involves marking all the nodes in the LEB being garbage-collected as + * dirty. The dirty nodes are written next commit, after which the LEB is free + * to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, + int lnum, int offs) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return make_nnode_dirty(c, node_num, lnum, offs); + case UBIFS_LPT_PNODE: + return make_pnode_dirty(c, node_num, lnum, offs); + case UBIFS_LPT_LTAB: + return make_ltab_dirty(c, lnum, offs); + case UBIFS_LPT_LSAVE: + return make_lsave_dirty(c, lnum, offs); + } + return -EINVAL; +} + +/** + * get_lpt_node_len - return the length of a node based on its type. + * @c: UBIFS file-system description object + * @node_type: LPT node type + */ +static int get_lpt_node_len(struct ubifs_info *c, int node_type) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return c->nnode_sz; + case UBIFS_LPT_PNODE: + return c->pnode_sz; + case UBIFS_LPT_LTAB: + return c->ltab_sz; + case UBIFS_LPT_LSAVE: + return c->lsave_sz; + } + return 0; +} + +/** + * get_pad_len - return the length of padding in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer + * @len: length of buffer + */ +static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) +{ + int offs, pad_len; + + if (c->min_io_size == 1) + return 0; + offs = c->leb_size - len; + pad_len = ALIGN(offs, c->min_io_size) - offs; + return pad_len; +} + +/** + * get_lpt_node_type - return type (and node number) of a node in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer + * @node_num: node number is returned here + */ +static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int pos = 0, node_type; + + node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); + *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); + return node_type; +} + +/** + * is_a_node - determine if a buffer contains a node. + * @c: UBIFS file-system description object + * @buf: buffer + * @len: length of buffer + * + * This function returns %1 if the buffer contains a node or %0 if it does not. + */ +static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) +{ + uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; + int pos = 0, node_type, node_len; + uint16_t crc, calc_crc; + + node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); + if (node_type == UBIFS_LPT_NOT_A_NODE) + return 0; + node_len = get_lpt_node_len(c, node_type); + if (!node_len || node_len > len) + return 0; + pos = 0; + addr = buf; + crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); + calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, + node_len - UBIFS_LPT_CRC_BYTES); + if (crc != calc_crc) + return 0; + return 1; +} + + +/** + * lpt_gc_lnum - garbage collect a LPT LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to garbage collect + * + * LPT garbage collection is used only for the "big" LPT model + * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes + * in the LEB being garbage-collected as dirty. The dirty nodes are written + * next commit, after which the LEB is free to be reused. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_gc_lnum(struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, node_type, node_num, node_len, offs; + void *buf = c->lpt_buf; + + dbg_lp("LEB %d", lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); + return err; + } + while (1) { + if (!is_a_node(c, buf, len)) { + int pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + buf += pad_len; + len -= pad_len; + continue; + } + return 0; + } + node_type = get_lpt_node_type(c, buf, &node_num); + node_len = get_lpt_node_len(c, node_type); + offs = c->leb_size - len; + ubifs_assert(node_len != 0); + mutex_lock(&c->lp_mutex); + err = make_node_dirty(c, node_type, node_num, lnum, offs); + mutex_unlock(&c->lp_mutex); + if (err) + return err; + buf += node_len; + len -= node_len; + } + return 0; +} + +/** + * lpt_gc - LPT garbage collection. + * @c: UBIFS file-system description object + * + * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'. + * Returns %0 on success and a negative error code on failure. + */ +static int lpt_gc(struct ubifs_info *c) +{ + int i, lnum = -1, dirty = 0; + + mutex_lock(&c->lp_mutex); + for (i = 0; i < c->lpt_lebs; i++) { + ubifs_assert(!c->ltab[i].tgc); + if (i + c->lpt_first == c->nhead_lnum || + c->ltab[i].free + c->ltab[i].dirty == c->leb_size) + continue; + if (c->ltab[i].dirty > dirty) { + dirty = c->ltab[i].dirty; + lnum = i + c->lpt_first; + } + } + mutex_unlock(&c->lp_mutex); + if (lnum == -1) + return -ENOSPC; + return lpt_gc_lnum(c, lnum); +} + +/** + * ubifs_lpt_start_commit - UBIFS commit starts. + * @c: the UBIFS file-system description object + * + * This function has to be called when UBIFS starts the commit operation. + * This function "freezes" all currently dirty LEB properties and does not + * change them anymore. Further changes are saved and tracked separately + * because they are not part of this commit. This function returns zero in case + * of success and a negative error code in case of failure. + */ +int ubifs_lpt_start_commit(struct ubifs_info *c) +{ + int err, cnt; + + dbg_lp(""); + + mutex_lock(&c->lp_mutex); + err = dbg_check_ltab(c); + if (err) + goto out; + + if (c->check_lpt_free) { + /* + * We ensure there is enough free space in + * ubifs_lpt_post_commit() by marking nodes dirty. That + * information is lost when we unmount, so we also need + * to check free space once after mounting also. + */ + c->check_lpt_free = 0; + while (need_write_all(c)) { + mutex_unlock(&c->lp_mutex); + err = lpt_gc(c); + if (err) + return err; + mutex_lock(&c->lp_mutex); + } + } + + lpt_tgc_start(c); + + if (!c->dirty_pn_cnt) { + dbg_cmt("no cnodes to commit"); + err = 0; + goto out; + } + + if (!c->big_lpt && need_write_all(c)) { + /* If needed, write everything */ + err = make_tree_dirty(c); + if (err) + goto out; + lpt_tgc_start(c); + } + + if (c->big_lpt) + populate_lsave(c); + + cnt = get_cnodes_to_commit(c); + ubifs_assert(cnt != 0); + + err = layout_cnodes(c); + if (err) + goto out; + + /* Copy the LPT's own lprops for end commit to write */ + memcpy(c->ltab_cmt, c->ltab, + sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); + c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY); + +out: + mutex_unlock(&c->lp_mutex); + return err; +} + +/** + * free_obsolete_cnodes - free obsolete cnodes for commit end. + * @c: UBIFS file-system description object + */ +static void free_obsolete_cnodes(struct ubifs_info *c) +{ + struct ubifs_cnode *cnode, *cnext; + + cnext = c->lpt_cnext; + if (!cnext) + return; + do { + cnode = cnext; + cnext = cnode->cnext; + if (test_bit(OBSOLETE_CNODE, &cnode->flags)) + kfree(cnode); + else + cnode->cnext = NULL; + } while (cnext != c->lpt_cnext); + c->lpt_cnext = NULL; +} + +/** + * ubifs_lpt_end_commit - finish the commit operation. + * @c: the UBIFS file-system description object + * + * This function has to be called when the commit operation finishes. It + * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to + * the media. Returns zero in case of success and a negative error code in case + * of failure. + */ +int ubifs_lpt_end_commit(struct ubifs_info *c) +{ + int err; + + dbg_lp(""); + + if (!c->lpt_cnext) + return 0; + + err = write_cnodes(c); + if (err) + return err; + + mutex_lock(&c->lp_mutex); + free_obsolete_cnodes(c); + mutex_unlock(&c->lp_mutex); + + return 0; +} + +/** + * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC. + * @c: UBIFS file-system description object + * + * LPT trivial GC is completed after a commit. Also LPT GC is done after a + * commit for the "big" LPT model. + */ +int ubifs_lpt_post_commit(struct ubifs_info *c) +{ + int err; + + mutex_lock(&c->lp_mutex); + err = lpt_tgc_end(c); + if (err) + goto out; + if (c->big_lpt) + while (need_write_all(c)) { + mutex_unlock(&c->lp_mutex); + err = lpt_gc(c); + if (err) + return err; + mutex_lock(&c->lp_mutex); + } +out: + mutex_unlock(&c->lp_mutex); + return err; +} + +/** + * first_nnode - find the first nnode in memory. + * @c: UBIFS file-system description object + * @hght: height of tree where nnode found is returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) +{ + struct ubifs_nnode *nnode; + int h, i, found; + + nnode = c->nroot; + *hght = 0; + if (!nnode) + return NULL; + for (h = 1; h < c->lpt_hght; h++) { + found = 0; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (nnode->nbranch[i].nnode) { + found = 1; + nnode = nnode->nbranch[i].nnode; + *hght = h; + break; + } + } + if (!found) + break; + } + return nnode; +} + +/** + * next_nnode - find the next nnode in memory. + * @c: UBIFS file-system description object + * @nnode: nnode from which to start. + * @hght: height of tree where nnode is, is passed and returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *next_nnode(struct ubifs_info *c, + struct ubifs_nnode *nnode, int *hght) +{ + struct ubifs_nnode *parent; + int iip, h, i, found; + + parent = nnode->parent; + if (!parent) + return NULL; + if (nnode->iip == UBIFS_LPT_FANOUT - 1) { + *hght -= 1; + return parent; + } + for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { + nnode = parent->nbranch[iip].nnode; + if (nnode) + break; + } + if (!nnode) { + *hght -= 1; + return parent; + } + for (h = *hght + 1; h < c->lpt_hght; h++) { + found = 0; + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + if (nnode->nbranch[i].nnode) { + found = 1; + nnode = nnode->nbranch[i].nnode; + *hght = h; + break; + } + } + if (!found) + break; + } + return nnode; +} + +/** + * ubifs_lpt_free - free resources owned by the LPT. + * @c: UBIFS file-system description object + * @wr_only: free only resources used for writing + */ +void ubifs_lpt_free(struct ubifs_info *c, int wr_only) +{ + struct ubifs_nnode *nnode; + int i, hght; + + /* Free write-only things first */ + + free_obsolete_cnodes(c); /* Leftover from a failed commit */ + + vfree(c->ltab_cmt); + c->ltab_cmt = NULL; + vfree(c->lpt_buf); + c->lpt_buf = NULL; + kfree(c->lsave); + c->lsave = NULL; + + if (wr_only) + return; + + /* Now free the rest */ + + nnode = first_nnode(c, &hght); + while (nnode) { + for (i = 0; i < UBIFS_LPT_FANOUT; i++) + kfree(nnode->nbranch[i].nnode); + nnode = next_nnode(c, nnode, &hght); + } + for (i = 0; i < LPROPS_HEAP_CNT; i++) + kfree(c->lpt_heap[i].arr); + kfree(c->dirty_idx.arr); + kfree(c->nroot); + vfree(c->ltab); + kfree(c->lpt_nod_buf); +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. + * @buf: buffer + * @len: buffer length + */ +static int dbg_is_all_ff(uint8_t *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (buf[i] != 0xff) + return 0; + return 1; +} + +/** + * dbg_is_nnode_dirty - determine if a nnode is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where nnode was written + * @offs: offset where nnode was written + */ +static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_nnode *nnode; + int hght; + + /* Entire tree is in memory so first_nnode / next_nnode are ok */ + nnode = first_nnode(c, &hght); + for (; nnode; nnode = next_nnode(c, nnode, &hght)) { + struct ubifs_nbranch *branch; + + cond_resched(); + if (nnode->parent) { + branch = &nnode->parent->nbranch[nnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &nnode->flags)) + return 1; + return 0; + } else { + if (c->lpt_lnum != lnum || c->lpt_offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &nnode->flags)) + return 1; + return 0; + } + } + return 1; +} + +/** + * dbg_is_pnode_dirty - determine if a pnode is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where pnode was written + * @offs: offset where pnode was written + */ +static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs) +{ + int i, cnt; + + cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + struct ubifs_pnode *pnode; + struct ubifs_nbranch *branch; + + cond_resched(); + pnode = pnode_lookup(c, i); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + branch = &pnode->parent->nbranch[pnode->iip]; + if (branch->lnum != lnum || branch->offs != offs) + continue; + if (test_bit(DIRTY_CNODE, &pnode->flags)) + return 1; + return 0; + } + return 1; +} + +/** + * dbg_is_ltab_dirty - determine if a ltab node is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where ltab node was written + * @offs: offset where ltab node was written + */ +static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->ltab_lnum || offs != c->ltab_offs) + return 1; + return (c->lpt_drty_flgs & LTAB_DIRTY) != 0; +} + +/** + * dbg_is_lsave_dirty - determine if a lsave node is dirty. + * @c: the UBIFS file-system description object + * @lnum: LEB number where lsave node was written + * @offs: offset where lsave node was written + */ +static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs) +{ + if (lnum != c->lsave_lnum || offs != c->lsave_offs) + return 1; + return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0; +} + +/** + * dbg_is_node_dirty - determine if a node is dirty. + * @c: the UBIFS file-system description object + * @node_type: node type + * @lnum: LEB number where node was written + * @offs: offset where node was written + */ +static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum, + int offs) +{ + switch (node_type) { + case UBIFS_LPT_NNODE: + return dbg_is_nnode_dirty(c, lnum, offs); + case UBIFS_LPT_PNODE: + return dbg_is_pnode_dirty(c, lnum, offs); + case UBIFS_LPT_LTAB: + return dbg_is_ltab_dirty(c, lnum, offs); + case UBIFS_LPT_LSAVE: + return dbg_is_lsave_dirty(c, lnum, offs); + } + return 1; +} + +/** + * dbg_check_ltab_lnum - check the ltab for a LPT LEB number. + * @c: the UBIFS file-system description object + * @lnum: LEB number where node was written + * @offs: offset where node was written + * + * This function returns %0 on success and a negative error code on failure. + */ +static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; + int ret; + void *buf = c->dbg_buf; + + dbg_lp("LEB %d", lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); + return err; + } + while (1) { + if (!is_a_node(c, buf, len)) { + int i, pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + buf += pad_len; + len -= pad_len; + dirty += pad_len; + continue; + } + if (!dbg_is_all_ff(buf, len)) { + dbg_msg("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); + err = -EINVAL; + } + i = lnum - c->lpt_first; + if (len != c->ltab[i].free) { + dbg_msg("invalid free space in LEB %d " + "(free %d, expected %d)", + lnum, len, c->ltab[i].free); + err = -EINVAL; + } + if (dirty != c->ltab[i].dirty) { + dbg_msg("invalid dirty space in LEB %d " + "(dirty %d, expected %d)", + lnum, dirty, c->ltab[i].dirty); + err = -EINVAL; + } + return err; + } + node_type = get_lpt_node_type(c, buf, &node_num); + node_len = get_lpt_node_len(c, node_type); + ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); + if (ret == 1) + dirty += node_len; + buf += node_len; + len -= node_len; + } +} + +/** + * dbg_check_ltab - check the free and dirty space in the ltab. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int dbg_check_ltab(struct ubifs_info *c) +{ + int lnum, err, i, cnt; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + + /* Bring the entire tree into memory */ + cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + for (i = 0; i < cnt; i++) { + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, i); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + cond_resched(); + } + + /* Check nodes */ + err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0); + if (err) + return err; + + /* Check each LEB */ + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { + err = dbg_check_ltab_lnum(c, lnum); + if (err) { + dbg_err("failed at LEB %d", lnum); + return err; + } + } + + dbg_lp("succeeded"); + return 0; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c new file mode 100644 index 000000000000..71d5493bf565 --- /dev/null +++ b/fs/ubifs/master.c @@ -0,0 +1,387 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* This file implements reading and writing the master node */ + +#include "ubifs.h" + +/** + * scan_for_master - search the valid master node. + * @c: UBIFS file-system description object + * + * This function scans the master node LEBs and search for the latest master + * node. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int scan_for_master(struct ubifs_info *c) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + int lnum, offs = 0, nodes_cnt; + + lnum = UBIFS_MST_LNUM; + + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + nodes_cnt = sleb->nodes_cnt; + if (nodes_cnt > 0) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + if (snod->type != UBIFS_MST_NODE) + goto out; + memcpy(c->mst_node, snod->node, snod->len); + offs = snod->offs; + } + ubifs_scan_destroy(sleb); + + lnum += 1; + + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + if (sleb->nodes_cnt != nodes_cnt) + goto out; + if (!sleb->nodes_cnt) + goto out; + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); + if (snod->type != UBIFS_MST_NODE) + goto out; + if (snod->offs != offs) + goto out; + if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, + (void *)snod->node + UBIFS_CH_SZ, + UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) + goto out; + c->mst_offs = offs; + ubifs_scan_destroy(sleb); + return 0; + +out: + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * validate_master - validate master node. + * @c: UBIFS file-system description object + * + * This function validates data which was read from master node. Returns zero + * if the data is all right and %-EINVAL if not. + */ +static int validate_master(const struct ubifs_info *c) +{ + long long main_sz; + int err; + + if (c->max_sqnum >= SQNUM_WATERMARK) { + err = 1; + goto out; + } + + if (c->cmt_no >= c->max_sqnum) { + err = 2; + goto out; + } + + if (c->highest_inum >= INUM_WATERMARK) { + err = 3; + goto out; + } + + if (c->lhead_lnum < UBIFS_LOG_LNUM || + c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || + c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || + c->lhead_offs & (c->min_io_size - 1)) { + err = 4; + goto out; + } + + if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || + c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { + err = 5; + goto out; + } + + if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || + c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { + err = 6; + goto out; + } + + if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { + err = 7; + goto out; + } + + if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || + c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || + c->ihead_offs > c->leb_size || c->ihead_offs & 7) { + err = 8; + goto out; + } + + main_sz = (long long)c->main_lebs * c->leb_size; + if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { + err = 9; + goto out; + } + + if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || + c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { + err = 10; + goto out; + } + + if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || + c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || + c->nhead_offs > c->leb_size) { + err = 11; + goto out; + } + + if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || + c->ltab_offs < 0 || + c->ltab_offs + c->ltab_sz > c->leb_size) { + err = 12; + goto out; + } + + if (c->big_lpt && (c->lsave_lnum < c->lpt_first || + c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || + c->lsave_offs + c->lsave_sz > c->leb_size)) { + err = 13; + goto out; + } + + if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { + err = 14; + goto out; + } + + if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) { + err = 15; + goto out; + } + + if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) { + err = 16; + goto out; + } + + if (c->lst.total_free < 0 || c->lst.total_free > main_sz || + c->lst.total_free & 7) { + err = 17; + goto out; + } + + if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) { + err = 18; + goto out; + } + + if (c->lst.total_used < 0 || (c->lst.total_used & 7)) { + err = 19; + goto out; + } + + if (c->lst.total_free + c->lst.total_dirty + + c->lst.total_used > main_sz) { + err = 20; + goto out; + } + + if (c->lst.total_dead + c->lst.total_dark + + c->lst.total_used + c->old_idx_sz > main_sz) { + err = 21; + goto out; + } + + if (c->lst.total_dead < 0 || + c->lst.total_dead > c->lst.total_free + c->lst.total_dirty || + c->lst.total_dead & 7) { + err = 22; + goto out; + } + + if (c->lst.total_dark < 0 || + c->lst.total_dark > c->lst.total_free + c->lst.total_dirty || + c->lst.total_dark & 7) { + err = 23; + goto out; + } + + return 0; + +out: + ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); + dbg_dump_node(c, c->mst_node); + return -EINVAL; +} + +/** + * ubifs_read_master - read master node. + * @c: UBIFS file-system description object + * + * This function finds and reads the master node during file-system mount. If + * the flash is empty, it creates default master node as well. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubifs_read_master(struct ubifs_info *c) +{ + int err, old_leb_cnt; + + c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); + if (!c->mst_node) + return -ENOMEM; + + err = scan_for_master(c); + if (err) { + err = ubifs_recover_master_node(c); + if (err) + /* + * Note, we do not free 'c->mst_node' here because the + * unmount routine will take care of this. + */ + return err; + } + + /* Make sure that the recovery flag is clear */ + c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); + + c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum); + c->highest_inum = le64_to_cpu(c->mst_node->highest_inum); + c->cmt_no = le64_to_cpu(c->mst_node->cmt_no); + c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum); + c->zroot.offs = le32_to_cpu(c->mst_node->root_offs); + c->zroot.len = le32_to_cpu(c->mst_node->root_len); + c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum); + c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); + c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); + c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); + c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); + c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); + c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); + c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs); + c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum); + c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs); + c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum); + c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs); + c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum); + c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs); + c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs); + old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt); + c->lst.total_free = le64_to_cpu(c->mst_node->total_free); + c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty); + c->lst.total_used = le64_to_cpu(c->mst_node->total_used); + c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); + c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); + + c->calc_idx_sz = c->old_idx_sz; + + if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) + c->no_orphs = 1; + + if (old_leb_cnt != c->leb_cnt) { + /* The file system has been resized */ + int growth = c->leb_cnt - old_leb_cnt; + + if (c->leb_cnt < old_leb_cnt || + c->leb_cnt < UBIFS_MIN_LEB_CNT) { + ubifs_err("bad leb_cnt on master node"); + dbg_dump_node(c, c->mst_node); + return -EINVAL; + } + + dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", + old_leb_cnt, c->leb_cnt); + c->lst.empty_lebs += growth; + c->lst.total_free += growth * (long long)c->leb_size; + c->lst.total_dark += growth * (long long)c->dark_wm; + + /* + * Reflect changes back onto the master node. N.B. the master + * node gets written immediately whenever mounting (or + * remounting) in read-write mode, so we do not need to write it + * here. + */ + c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); + c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs); + c->mst_node->total_free = cpu_to_le64(c->lst.total_free); + c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark); + } + + err = validate_master(c); + if (err) + return err; + + err = dbg_old_index_check_init(c, &c->zroot); + + return err; +} + +/** + * ubifs_write_master - write master node. + * @c: UBIFS file-system description object + * + * This function writes the master node. The caller has to take the + * @c->mst_mutex lock before calling this function. Returns zero in case of + * success and a negative error code in case of failure. The master node is + * written twice to enable recovery. + */ +int ubifs_write_master(struct ubifs_info *c) +{ + int err, lnum, offs, len; + + if (c->ro_media) + return -EINVAL; + + lnum = UBIFS_MST_LNUM; + offs = c->mst_offs + c->mst_node_alsz; + len = UBIFS_MST_NODE_SZ; + + if (offs + UBIFS_MST_NODE_SZ > c->leb_size) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + offs = 0; + } + + c->mst_offs = offs; + c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); + + err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); + if (err) + return err; + + lnum += 1; + + if (offs == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); + + return err; +} diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h new file mode 100644 index 000000000000..4beccfc256d2 --- /dev/null +++ b/fs/ubifs/misc.h @@ -0,0 +1,342 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file contains miscellaneous helper functions. + */ + +#ifndef __UBIFS_MISC_H__ +#define __UBIFS_MISC_H__ + +/** + * ubifs_zn_dirty - check if znode is dirty. + * @znode: znode to check + * + * This helper function returns %1 if @znode is dirty and %0 otherwise. + */ +static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) +{ + return !!test_bit(DIRTY_ZNODE, &znode->flags); +} + +/** + * ubifs_wake_up_bgt - wake up background thread. + * @c: UBIFS file-system description object + */ +static inline void ubifs_wake_up_bgt(struct ubifs_info *c) +{ + if (c->bgt && !c->need_bgt) { + c->need_bgt = 1; + wake_up_process(c->bgt); + } +} + +/** + * ubifs_tnc_find_child - find next child in znode. + * @znode: znode to search at + * @start: the zbranch index to start at + * + * This helper function looks for znode child starting at index @start. Returns + * the child or %NULL if no children were found. + */ +static inline struct ubifs_znode * +ubifs_tnc_find_child(struct ubifs_znode *znode, int start) +{ + while (start < znode->child_cnt) { + if (znode->zbranch[start].znode) + return znode->zbranch[start].znode; + start += 1; + } + + return NULL; +} + +/** + * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. + * @inode: the VFS 'struct inode' pointer + */ +static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) +{ + return container_of(inode, struct ubifs_inode, vfs_inode); +} + +/** + * ubifs_ro_mode - switch UBIFS to read read-only mode. + * @c: UBIFS file-system description object + * @err: error code which is the reason of switching to R/O mode + */ +static inline void ubifs_ro_mode(struct ubifs_info *c, int err) +{ + if (!c->ro_media) { + c->ro_media = 1; + ubifs_warn("switched to read-only mode, error %d", err); + dbg_dump_stack(); + } +} + +/** + * ubifs_compr_present - check if compressor was compiled in. + * @compr_type: compressor type to check + * + * This function returns %1 of compressor of type @compr_type is present, and + * %0 if not. + */ +static inline int ubifs_compr_present(int compr_type) +{ + ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); + return !!ubifs_compressors[compr_type]->capi_name; +} + +/** + * ubifs_compr_name - get compressor name string by its type. + * @compr_type: compressor type + * + * This function returns compressor type string. + */ +static inline const char *ubifs_compr_name(int compr_type) +{ + ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); + return ubifs_compressors[compr_type]->name; +} + +/** + * ubifs_wbuf_sync - synchronize write-buffer. + * @wbuf: write-buffer to synchronize + * + * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume + * that the write-buffer is already locked. + */ +static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) +{ + int err; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_wbuf_sync_nolock(wbuf); + mutex_unlock(&wbuf->io_mutex); + return err; +} + +/** + * ubifs_leb_unmap - unmap an LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to unmap + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_write - write to a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @offs: offset within LEB to write to + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, + const void *buf, int offs, int len, int dtype) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes at %d:%d, error %d", + len, lnum, offs, err); + return err; + } + + return 0; +} + +/** + * ubifs_leb_change - atomic LEB change. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, + const void *buf, int len, int dtype) +{ + int err; + + if (c->ro_media) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d, error %d", + len, lnum, err); + return err; + } + + return 0; +} + +/** + * ubifs_encode_dev - encode device node IDs. + * @dev: UBIFS device node information + * @rdev: device IDs to encode + * + * This is a helper function which encodes major/minor numbers of a device node + * into UBIFS device node description. We use standard Linux "new" and "huge" + * encodings. + */ +static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) +{ + if (new_valid_dev(rdev)) { + dev->new = cpu_to_le32(new_encode_dev(rdev)); + return sizeof(dev->new); + } else { + dev->huge = cpu_to_le64(huge_encode_dev(rdev)); + return sizeof(dev->huge); + } +} + +/** + * ubifs_add_dirt - add dirty space to LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to add dirty space for + * @dirty: dirty space to add + * + * This is a helper function which increased amount of dirty LEB space. Returns + * zero in case of success and a negative error code in case of failure. + */ +static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) +{ + return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0); +} + +/** + * ubifs_return_leb - return LEB to lprops. + * @c: the UBIFS file-system description object + * @lnum: LEB to return + * + * This helper function cleans the "taken" flag of a logical eraseblock in the + * lprops. Returns zero in case of success and a negative error code in case of + * failure. + */ +static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) +{ + return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_TAKEN, 0); +} + +/** + * ubifs_idx_node_sz - return index node size. + * @c: the UBIFS file-system description object + * @child_cnt: number of children of this index node + */ +static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) +{ + return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; +} + +/** + * ubifs_idx_branch - return pointer to an index branch. + * @c: the UBIFS file-system description object + * @idx: index node + * @bnum: branch number + */ +static inline +struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, + const struct ubifs_idx_node *idx, + int bnum) +{ + return (struct ubifs_branch *)((void *)idx->branches + + (UBIFS_BRANCH_SZ + c->key_len) * bnum); +} + +/** + * ubifs_idx_key - return pointer to an index key. + * @c: the UBIFS file-system description object + * @idx: index node + */ +static inline void *ubifs_idx_key(const struct ubifs_info *c, + const struct ubifs_idx_node *idx) +{ + return (void *)((struct ubifs_branch *)idx->branches)->key; +} + +/** + * ubifs_reported_space - calculate reported free space. + * @c: the UBIFS file-system description object + * @free: amount of free space + * + * This function calculates amount of free space which will be reported to + * user-space. User-space application tend to expect that if the file-system + * (e.g., via the 'statfs()' call) reports that it has N bytes available, they + * are able to write a file of size N. UBIFS attaches node headers to each data + * node and it has to write indexind nodes as well. This introduces additional + * overhead, and UBIFS it has to report sligtly less free space to meet the + * above expectetion. + * + * This function assumes free space is made up of uncompressed data nodes and + * full index nodes (one per data node, doubled because we always allow enough + * space to write the index twice). + * + * Note, the calculation is pessimistic, which means that most of the time + * UBIFS reports less space than it actually has. + */ +static inline long long ubifs_reported_space(const struct ubifs_info *c, + uint64_t free) +{ + int divisor, factor; + + divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); + factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; + do_div(free, divisor); + + return free * factor; +} + +/** + * ubifs_current_time - round current time to time granularity. + * @inode: inode + */ +static inline struct timespec ubifs_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + +#endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c new file mode 100644 index 000000000000..3afeb9242c6a --- /dev/null +++ b/fs/ubifs/orphan.c @@ -0,0 +1,958 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: Adrian Hunter + */ + +#include "ubifs.h" + +/* + * An orphan is an inode number whose inode node has been committed to the index + * with a link count of zero. That happens when an open file is deleted + * (unlinked) and then a commit is run. In the normal course of events the inode + * would be deleted when the file is closed. However in the case of an unclean + * unmount, orphans need to be accounted for. After an unclean unmount, the + * orphans' inodes must be deleted which means either scanning the entire index + * looking for them, or keeping a list on flash somewhere. This unit implements + * the latter approach. + * + * The orphan area is a fixed number of LEBs situated between the LPT area and + * the main area. The number of orphan area LEBs is specified when the file + * system is created. The minimum number is 1. The size of the orphan area + * should be so that it can hold the maximum number of orphans that are expected + * to ever exist at one time. + * + * The number of orphans that can fit in a LEB is: + * + * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) + * + * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. + * + * Orphans are accumulated in a rb-tree. When an inode's link count drops to + * zero, the inode number is added to the rb-tree. It is removed from the tree + * when the inode is deleted. Any new orphans that are in the orphan tree when + * the commit is run, are written to the orphan area in 1 or more orph nodes. + * If the orphan area is full, it is consolidated to make space. There is + * always enough space because validation prevents the user from creating more + * than the maximum number of orphans allowed. + */ + +#ifdef CONFIG_UBIFS_FS_DEBUG +static int dbg_check_orphans(struct ubifs_info *c); +#else +#define dbg_check_orphans(c) 0 +#endif + +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + orphan->new = 1; + + spin_lock(&c->orphan_lock); + if (c->tot_orphans >= c->max_orphans) { + spin_unlock(&c->orphan_lock); + kfree(orphan); + return -ENFILE; + } + p = &c->orph_tree.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + dbg_err("orphaned twice"); + spin_unlock(&c->orphan_lock); + kfree(orphan); + return 0; + } + } + c->tot_orphans += 1; + c->new_orphans += 1; + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, &c->orph_tree); + list_add_tail(&orphan->list, &c->orph_list); + list_add_tail(&orphan->new_list, &c->orph_new); + spin_unlock(&c->orphan_lock); + dbg_gen("ino %lu", inum); + return 0; +} + +/** + * ubifs_delete_orphan - delete an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Delete an orphan. This function is called when an inode is deleted. + */ +void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *o; + struct rb_node *p; + + spin_lock(&c->orphan_lock); + p = c->orph_tree.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else { + if (o->dnext) { + spin_unlock(&c->orphan_lock); + dbg_gen("deleted twice ino %lu", inum); + return; + } + if (o->cnext) { + o->dnext = c->orph_dnext; + c->orph_dnext = o; + spin_unlock(&c->orphan_lock); + dbg_gen("delete later ino %lu", inum); + return; + } + rb_erase(p, &c->orph_tree); + list_del(&o->list); + c->tot_orphans -= 1; + if (o->new) { + list_del(&o->new_list); + c->new_orphans -= 1; + } + spin_unlock(&c->orphan_lock); + kfree(o); + dbg_gen("inum %lu", inum); + return; + } + } + spin_unlock(&c->orphan_lock); + dbg_err("missing orphan ino %lu", inum); + dbg_dump_stack(); +} + +/** + * ubifs_orphan_start_commit - start commit of orphans. + * @c: UBIFS file-system description object + * + * Start commit of orphans. + */ +int ubifs_orphan_start_commit(struct ubifs_info *c) +{ + struct ubifs_orphan *orphan, **last; + + spin_lock(&c->orphan_lock); + last = &c->orph_cnext; + list_for_each_entry(orphan, &c->orph_new, new_list) { + ubifs_assert(orphan->new); + orphan->new = 0; + *last = orphan; + last = &orphan->cnext; + } + *last = orphan->cnext; + c->cmt_orphans = c->new_orphans; + c->new_orphans = 0; + dbg_cmt("%d orphans to commit", c->cmt_orphans); + INIT_LIST_HEAD(&c->orph_new); + if (c->tot_orphans == 0) + c->no_orphs = 1; + else + c->no_orphs = 0; + spin_unlock(&c->orphan_lock); + return 0; +} + +/** + * avail_orphs - calculate available space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in the + * available space. + */ +static int avail_orphs(struct ubifs_info *c) +{ + int avail_lebs, avail, gap; + + avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; + avail = avail_lebs * + ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); + gap = c->leb_size - c->ohead_offs; + if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) + avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); + return avail; +} + +/** + * tot_avail_orphs - calculate total space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in half + * the total space. That leaves half the space for adding new orphans. + */ +static int tot_avail_orphs(struct ubifs_info *c) +{ + int avail_lebs, avail; + + avail_lebs = c->orph_lebs; + avail = avail_lebs * + ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); + return avail / 2; +} + +/** + * do_write_orph_node - write a node + * @c: UBIFS file-system description object + * @len: length of node + * @atomic: write atomically + * + * This function writes a node to the orphan head from the orphan buffer. If + * %atomic is not zero, then the write is done atomically. On success, %0 is + * returned, otherwise a negative error code is returned. + */ +static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) +{ + int err = 0; + + if (atomic) { + ubifs_assert(c->ohead_offs == 0); + ubifs_prepare_node(c, c->orph_buf, len, 1); + len = ALIGN(len, c->min_io_size); + err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len, + UBI_SHORTTERM); + } else { + if (c->ohead_offs == 0) { + /* Ensure LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->ohead_lnum); + if (err) + return err; + } + err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, + c->ohead_offs, UBI_SHORTTERM); + } + return err; +} + +/** + * write_orph_node - write an orph node + * @c: UBIFS file-system description object + * @atomic: write atomically + * + * This function builds an orph node from the cnext list and writes it to the + * orphan head. On success, %0 is returned, otherwise a negative error code + * is returned. + */ +static int write_orph_node(struct ubifs_info *c, int atomic) +{ + struct ubifs_orphan *orphan, *cnext; + struct ubifs_orph_node *orph; + int gap, err, len, cnt, i; + + ubifs_assert(c->cmt_orphans > 0); + gap = c->leb_size - c->ohead_offs; + if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { + c->ohead_lnum += 1; + c->ohead_offs = 0; + gap = c->leb_size; + if (c->ohead_lnum > c->orph_last) { + /* + * We limit the number of orphans so that this should + * never happen. + */ + ubifs_err("out of space in orphan area"); + return -EINVAL; + } + } + cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); + if (cnt > c->cmt_orphans) + cnt = c->cmt_orphans; + len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); + ubifs_assert(c->orph_buf); + orph = c->orph_buf; + orph->ch.node_type = UBIFS_ORPH_NODE; + spin_lock(&c->orphan_lock); + cnext = c->orph_cnext; + for (i = 0; i < cnt; i++) { + orphan = cnext; + orph->inos[i] = cpu_to_le64(orphan->inum); + cnext = orphan->cnext; + orphan->cnext = NULL; + } + c->orph_cnext = cnext; + c->cmt_orphans -= cnt; + spin_unlock(&c->orphan_lock); + if (c->cmt_orphans) + orph->cmt_no = cpu_to_le64(c->cmt_no + 1); + else + /* Mark the last node of the commit */ + orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); + ubifs_assert(c->ohead_offs + len <= c->leb_size); + ubifs_assert(c->ohead_lnum >= c->orph_first); + ubifs_assert(c->ohead_lnum <= c->orph_last); + err = do_write_orph_node(c, len, atomic); + c->ohead_offs += ALIGN(len, c->min_io_size); + c->ohead_offs = ALIGN(c->ohead_offs, 8); + return err; +} + +/** + * write_orph_nodes - write orph nodes until there are no more to commit + * @c: UBIFS file-system description object + * @atomic: write atomically + * + * This function writes orph nodes for all the orphans to commit. On success, + * %0 is returned, otherwise a negative error code is returned. + */ +static int write_orph_nodes(struct ubifs_info *c, int atomic) +{ + int err; + + while (c->cmt_orphans > 0) { + err = write_orph_node(c, atomic); + if (err) + return err; + } + if (atomic) { + int lnum; + + /* Unmap any unused LEBs after consolidation */ + lnum = c->ohead_lnum + 1; + for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + } + return 0; +} + +/** + * consolidate - consolidate the orphan area. + * @c: UBIFS file-system description object + * + * This function enables consolidation by putting all the orphans into the list + * to commit. The list is in the order that the orphans were added, and the + * LEBs are written atomically in order, so at no time can orphans be lost by + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int consolidate(struct ubifs_info *c) +{ + int tot_avail = tot_avail_orphs(c), err = 0; + + spin_lock(&c->orphan_lock); + dbg_cmt("there is space for %d orphans and there are %d", + tot_avail, c->tot_orphans); + if (c->tot_orphans - c->new_orphans <= tot_avail) { + struct ubifs_orphan *orphan, **last; + int cnt = 0; + + /* Change the cnext list to include all non-new orphans */ + last = &c->orph_cnext; + list_for_each_entry(orphan, &c->orph_list, list) { + if (orphan->new) + continue; + *last = orphan; + last = &orphan->cnext; + cnt += 1; + } + *last = orphan->cnext; + ubifs_assert(cnt == c->tot_orphans - c->new_orphans); + c->cmt_orphans = cnt; + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + } else { + /* + * We limit the number of orphans so that this should + * never happen. + */ + ubifs_err("out of space in orphan area"); + err = -EINVAL; + } + spin_unlock(&c->orphan_lock); + return err; +} + +/** + * commit_orphans - commit orphans. + * @c: UBIFS file-system description object + * + * This function commits orphans to flash. On success, %0 is returned, + * otherwise a negative error code is returned. + */ +static int commit_orphans(struct ubifs_info *c) +{ + int avail, atomic = 0, err; + + ubifs_assert(c->cmt_orphans > 0); + avail = avail_orphs(c); + if (avail < c->cmt_orphans) { + /* Not enough space to write new orphans, so consolidate */ + err = consolidate(c); + if (err) + return err; + atomic = 1; + } + err = write_orph_nodes(c, atomic); + return err; +} + +/** + * erase_deleted - erase the orphans marked for deletion. + * @c: UBIFS file-system description object + * + * During commit, the orphans being committed cannot be deleted, so they are + * marked for deletion and deleted by this function. Also, the recovery + * adds killed orphans to the deletion list, and therefore they are deleted + * here too. + */ +static void erase_deleted(struct ubifs_info *c) +{ + struct ubifs_orphan *orphan, *dnext; + + spin_lock(&c->orphan_lock); + dnext = c->orph_dnext; + while (dnext) { + orphan = dnext; + dnext = orphan->dnext; + ubifs_assert(!orphan->new); + rb_erase(&orphan->rb, &c->orph_tree); + list_del(&orphan->list); + c->tot_orphans -= 1; + dbg_gen("deleting orphan ino %lu", orphan->inum); + kfree(orphan); + } + c->orph_dnext = NULL; + spin_unlock(&c->orphan_lock); +} + +/** + * ubifs_orphan_end_commit - end commit of orphans. + * @c: UBIFS file-system description object + * + * End commit of orphans. + */ +int ubifs_orphan_end_commit(struct ubifs_info *c) +{ + int err; + + if (c->cmt_orphans != 0) { + err = commit_orphans(c); + if (err) + return err; + } + erase_deleted(c); + err = dbg_check_orphans(c); + return err; +} + +/** + * clear_orphans - erase all LEBs used for orphans. + * @c: UBIFS file-system description object + * + * If recovery is not required, then the orphans from the previous session + * are not needed. This function locates the LEBs used to record + * orphans, and un-maps them. + */ +static int clear_orphans(struct ubifs_info *c) +{ + int lnum, err; + + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + return 0; +} + +/** + * insert_dead_orphan - insert an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * This function is a helper to the 'do_kill_orphans()' function. The orphan + * must be kept until the next commit, so it is added to the rb-tree and the + * deletion list. + */ +static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + + p = &c->orph_tree.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + /* Already added - no problem */ + kfree(orphan); + return 0; + } + } + c->tot_orphans += 1; + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, &c->orph_tree); + list_add_tail(&orphan->list, &c->orph_list); + orphan->dnext = c->orph_dnext; + c->orph_dnext = orphan; + dbg_mnt("ino %lu, new %d, tot %d", + inum, c->new_orphans, c->tot_orphans); + return 0; +} + +/** + * do_kill_orphans - remove orphan inodes from the index. + * @c: UBIFS file-system description object + * @sleb: scanned LEB + * @last_cmt_no: cmt_no of last orph node read is passed and returned here + * @outofdate: whether the LEB is out of date is returned here + * @last_flagged: whether the end orph node is encountered + * + * This function is a helper to the 'kill_orphans()' function. It goes through + * every orphan node in a LEB and for every inode number recorded, removes + * all keys for that inode from the TNC. + */ +static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + unsigned long long *last_cmt_no, int *outofdate, + int *last_flagged) +{ + struct ubifs_scan_node *snod; + struct ubifs_orph_node *orph; + unsigned long long cmt_no; + ino_t inum; + int i, n, err, first = 1; + + list_for_each_entry(snod, &sleb->nodes, list) { + if (snod->type != UBIFS_ORPH_NODE) { + ubifs_err("invalid node type %d in orphan area at " + "%d:%d", snod->type, sleb->lnum, snod->offs); + dbg_dump_node(c, snod->node); + return -EINVAL; + } + + orph = snod->node; + + /* Check commit number */ + cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; + /* + * The commit number on the master node may be less, because + * of a failed commit. If there are several failed commits in a + * row, the commit number written on orph nodes will continue to + * increase (because the commit number is adjusted here) even + * though the commit number on the master node stays the same + * because the master node has not been re-written. + */ + if (cmt_no > c->cmt_no) + c->cmt_no = cmt_no; + if (cmt_no < *last_cmt_no && *last_flagged) { + /* + * The last orph node had a higher commit number and was + * flagged as the last written for that commit number. + * That makes this orph node, out of date. + */ + if (!first) { + ubifs_err("out of order commit number %llu in " + "orphan node at %d:%d", + cmt_no, sleb->lnum, snod->offs); + dbg_dump_node(c, snod->node); + return -EINVAL; + } + dbg_rcvry("out of date LEB %d", sleb->lnum); + *outofdate = 1; + return 0; + } + + if (first) + first = 0; + + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; + for (i = 0; i < n; i++) { + inum = le64_to_cpu(orph->inos[i]); + dbg_rcvry("deleting orphaned inode %lu", inum); + err = ubifs_tnc_remove_ino(c, inum); + if (err) + return err; + err = insert_dead_orphan(c, inum); + if (err) + return err; + } + + *last_cmt_no = cmt_no; + if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { + dbg_rcvry("last orph node for commit %llu at %d:%d", + cmt_no, sleb->lnum, snod->offs); + *last_flagged = 1; + } else + *last_flagged = 0; + } + + return 0; +} + +/** + * kill_orphans - remove all orphan inodes from the index. + * @c: UBIFS file-system description object + * + * If recovery is required, then orphan inodes recorded during the previous + * session (which ended with an unclean unmount) must be deleted from the index. + * This is done by updating the TNC, but since the index is not updated until + * the next commit, the LEBs where the orphan information is recorded are not + * erased until the next commit. + */ +static int kill_orphans(struct ubifs_info *c) +{ + unsigned long long last_cmt_no = 0; + int lnum, err = 0, outofdate = 0, last_flagged = 0; + + c->ohead_lnum = c->orph_first; + c->ohead_offs = 0; + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) { + dbg_rcvry("no orphans"); + return 0; + } + /* + * Orph nodes always start at c->orph_first and are written to each + * successive LEB in turn. Generally unused LEBs will have been unmapped + * but may contain out of date orph nodes if the unmap didn't go + * through. In addition, the last orph node written for each commit is + * marked (top bit of orph->cmt_no is set to 1). It is possible that + * there are orph nodes from the next commit (i.e. the commit did not + * complete successfully). In that case, no orphans will have been lost + * due to the way that orphans are written, and any orphans added will + * be valid orphans anyway and so can be deleted. + */ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + + dbg_rcvry("LEB %d", lnum); + sleb = ubifs_scan(c, lnum, 0, c->sbuf); + if (IS_ERR(sleb)) { + sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; + } + } + err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, + &last_flagged); + if (err || outofdate) { + ubifs_scan_destroy(sleb); + break; + } + if (sleb->endpt) { + c->ohead_lnum = lnum; + c->ohead_offs = sleb->endpt; + } + ubifs_scan_destroy(sleb); + } + return err; +} + +/** + * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. + * @c: UBIFS file-system description object + * @unclean: indicates recovery from unclean unmount + * @read_only: indicates read only mount + * + * This function is called when mounting to erase orphans from the previous + * session. If UBIFS was not unmounted cleanly, then the inodes recorded as + * orphans are deleted. + */ +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) +{ + int err = 0; + + c->max_orphans = tot_avail_orphs(c); + + if (!read_only) { + c->orph_buf = vmalloc(c->leb_size); + if (!c->orph_buf) + return -ENOMEM; + } + + if (unclean) + err = kill_orphans(c); + else if (!read_only) + err = clear_orphans(c); + + return err; +} + +#ifdef CONFIG_UBIFS_FS_DEBUG + +struct check_orphan { + struct rb_node rb; + ino_t inum; +}; + +struct check_info { + unsigned long last_ino; + unsigned long tot_inos; + unsigned long missing; + unsigned long long leaf_cnt; + struct ubifs_ino_node *node; + struct rb_root root; +}; + +static int dbg_find_orphan(struct ubifs_info *c, ino_t inum) +{ + struct ubifs_orphan *o; + struct rb_node *p; + + spin_lock(&c->orphan_lock); + p = c->orph_tree.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else { + spin_unlock(&c->orphan_lock); + return 1; + } + } + spin_unlock(&c->orphan_lock); + return 0; +} + +static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) +{ + struct check_orphan *orphan, *o; + struct rb_node **p, *parent = NULL; + + orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS); + if (!orphan) + return -ENOMEM; + orphan->inum = inum; + + p = &root->rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct check_orphan, rb); + if (inum < o->inum) + p = &(*p)->rb_left; + else if (inum > o->inum) + p = &(*p)->rb_right; + else { + kfree(orphan); + return 0; + } + } + rb_link_node(&orphan->rb, parent, p); + rb_insert_color(&orphan->rb, root); + return 0; +} + +static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) +{ + struct check_orphan *o; + struct rb_node *p; + + p = root->rb_node; + while (p) { + o = rb_entry(p, struct check_orphan, rb); + if (inum < o->inum) + p = p->rb_left; + else if (inum > o->inum) + p = p->rb_right; + else + return 1; + } + return 0; +} + +static void dbg_free_check_tree(struct rb_root *root) +{ + struct rb_node *this = root->rb_node; + struct check_orphan *o; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + o = rb_entry(this, struct check_orphan, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &o->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(o); + } +} + +static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *priv) +{ + struct check_info *ci = priv; + ino_t inum; + int err; + + inum = key_inum(c, &zbr->key); + if (inum != ci->last_ino) { + /* Lowest node type is the inode node, so it comes first */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + ubifs_err("found orphan node ino %lu, type %d", inum, + key_type(c, &zbr->key)); + ci->last_ino = inum; + ci->tot_inos += 1; + err = ubifs_tnc_read_node(c, zbr, ci->node); + if (err) { + ubifs_err("node read failed, error %d", err); + return err; + } + if (ci->node->nlink == 0) + /* Must be recorded as an orphan */ + if (!dbg_find_check_orphan(&ci->root, inum) && + !dbg_find_orphan(c, inum)) { + ubifs_err("missing orphan, ino %lu", inum); + ci->missing += 1; + } + } + ci->leaf_cnt += 1; + return 0; +} + +static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *snod; + struct ubifs_orph_node *orph; + ino_t inum; + int i, n, err; + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + if (snod->type != UBIFS_ORPH_NODE) + continue; + orph = snod->node; + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; + for (i = 0; i < n; i++) { + inum = le64_to_cpu(orph->inos[i]); + err = dbg_ins_check_orphan(&ci->root, inum); + if (err) + return err; + } + } + return 0; +} + +static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) +{ + int lnum, err = 0; + + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) + return 0; + + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + + sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; + } + + err = dbg_read_orphans(ci, sleb); + ubifs_scan_destroy(sleb); + if (err) + break; + } + + return err; +} + +static int dbg_check_orphans(struct ubifs_info *c) +{ + struct check_info ci; + int err; + + if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) + return 0; + + ci.last_ino = 0; + ci.tot_inos = 0; + ci.missing = 0; + ci.leaf_cnt = 0; + ci.root = RB_ROOT; + ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ci.node) { + ubifs_err("out of memory"); + return -ENOMEM; + } + + err = dbg_scan_orphans(c, &ci); + if (err) + goto out; + + err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); + if (err) { + ubifs_err("cannot scan TNC, error %d", err); + goto out; + } + + if (ci.missing) { + ubifs_err("%lu missing orphan(s)", ci.missing); + err = -EINVAL; + goto out; + } + + dbg_cmt("last inode number is %lu", ci.last_ino); + dbg_cmt("total number of inodes is %lu", ci.tot_inos); + dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt); + +out: + dbg_free_check_tree(&ci.root); + kfree(ci.node); + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c new file mode 100644 index 000000000000..77d26c141cf6 --- /dev/null +++ b/fs/ubifs/recovery.c @@ -0,0 +1,1519 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements functions needed to recover from unclean un-mounts. + * When UBIFS is mounted, it checks a flag on the master node to determine if + * an un-mount was completed sucessfully. If not, the process of mounting + * incorparates additional checking and fixing of on-flash data structures. + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. + */ + +#include +#include "ubifs.h" + +/** + * is_empty - determine whether a buffer is empty (contains all 0xff). + * @buf: buffer to clean + * @len: length of buffer + * + * This function returns %1 if the buffer is empty (contains all 0xff) otherwise + * %0 is returned. + */ +static int is_empty(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return 0; + return 1; +} + +/** + * get_master_node - get the last valid master node allowing for corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @pbuf: buffer containing the LEB read, is returned here + * @mst: master node, if found, is returned here + * @cor: corruption, if found, is returned here + * + * This function allocates a buffer, reads the LEB into it, and finds and + * returns the last valid master node allowing for one area of corruption. + * The corrupt area, if there is one, must be consistent with the assumption + * that it is the result of an unclean unmount while the master node was being + * written. Under those circumstances, it is valid to use the previously written + * master node. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, + struct ubifs_mst_node **mst, void **cor) +{ + const int sz = c->mst_node_alsz; + int err, offs, len; + void *sbuf, *buf; + + sbuf = vmalloc(c->leb_size); + if (!sbuf) + return -ENOMEM; + + err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + if (err && err != -EBADMSG) + goto out_free; + + /* Find the first position that is definitely not a node */ + offs = 0; + buf = sbuf; + len = c->leb_size; + while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { + struct ubifs_ch *ch = buf; + + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) + break; + offs += sz; + buf += sz; + len -= sz; + } + /* See if there was a valid master node before that */ + if (offs) { + int ret; + + offs -= sz; + buf -= sz; + len += sz; + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret != SCANNED_A_NODE && offs) { + /* Could have been corruption so check one place back */ + offs -= sz; + buf -= sz; + len += sz; + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret != SCANNED_A_NODE) + /* + * We accept only one area of corruption because + * we are assuming that it was caused while + * trying to write a master node. + */ + goto out_err; + } + if (ret == SCANNED_A_NODE) { + struct ubifs_ch *ch = buf; + + if (ch->node_type != UBIFS_MST_NODE) + goto out_err; + dbg_rcvry("found a master node at %d:%d", lnum, offs); + *mst = buf; + offs += sz; + buf += sz; + len -= sz; + } + } + /* Check for corruption */ + if (offs < c->leb_size) { + if (!is_empty(buf, min_t(int, len, sz))) { + *cor = buf; + dbg_rcvry("found corruption at %d:%d", lnum, offs); + } + offs += sz; + buf += sz; + len -= sz; + } + /* Check remaining empty space */ + if (offs < c->leb_size) + if (!is_empty(buf, len)) + goto out_err; + *pbuf = sbuf; + return 0; + +out_err: + err = -EINVAL; +out_free: + vfree(sbuf); + *mst = NULL; + *cor = NULL; + return err; +} + +/** + * write_rcvrd_mst_node - write recovered master node. + * @c: UBIFS file-system description object + * @mst: master node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_rcvrd_mst_node(struct ubifs_info *c, + struct ubifs_mst_node *mst) +{ + int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; + uint32_t save_flags; + + dbg_rcvry("recovery"); + + save_flags = mst->flags; + mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY); + + ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); + err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + if (err) + goto out; + err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + if (err) + goto out; +out: + mst->flags = save_flags; + return err; +} + +/** + * ubifs_recover_master_node - recover the master node. + * @c: UBIFS file-system description object + * + * This function recovers the master node from corruption that may occur due to + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_master_node(struct ubifs_info *c) +{ + void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; + struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; + const int sz = c->mst_node_alsz; + int err, offs1, offs2; + + dbg_rcvry("recovery"); + + err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); + if (err) + goto out_free; + + err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); + if (err) + goto out_free; + + if (mst1) { + offs1 = (void *)mst1 - buf1; + if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && + (offs1 == 0 && !cor1)) { + /* + * mst1 was written by recovery at offset 0 with no + * corruption. + */ + dbg_rcvry("recovery recovery"); + mst = mst1; + } else if (mst2) { + offs2 = (void *)mst2 - buf2; + if (offs1 == offs2) { + /* Same offset, so must be the same */ + if (memcmp((void *)mst1 + UBIFS_CH_SZ, + (void *)mst2 + UBIFS_CH_SZ, + UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) + goto out_err; + mst = mst1; + } else if (offs2 + sz == offs1) { + /* 1st LEB was written, 2nd was not */ + if (cor1) + goto out_err; + mst = mst1; + } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + /* 1st LEB was unmapped and written, 2nd not */ + if (cor1) + goto out_err; + mst = mst1; + } else + goto out_err; + } else { + /* + * 2nd LEB was unmapped and about to be written, so + * there must be only one master node in the first LEB + * and no corruption. + */ + if (offs1 != 0 || cor1) + goto out_err; + mst = mst1; + } + } else { + if (!mst2) + goto out_err; + /* + * 1st LEB was unmapped and about to be written, so there must + * be no room left in 2nd LEB. + */ + offs2 = (void *)mst2 - buf2; + if (offs2 + sz + sz <= c->leb_size) + goto out_err; + mst = mst2; + } + + dbg_rcvry("recovered master node from LEB %d", + (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); + + memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); + + if ((c->vfs_sb->s_flags & MS_RDONLY)) { + /* Read-only mode. Keep a copy for switching to rw mode */ + c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); + if (!c->rcvrd_mst_node) { + err = -ENOMEM; + goto out_free; + } + memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); + } else { + /* Write the recovered master node */ + c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; + err = write_rcvrd_mst_node(c, c->mst_node); + if (err) + goto out_free; + } + + vfree(buf2); + vfree(buf1); + + return 0; + +out_err: + err = -EINVAL; +out_free: + ubifs_err("failed to recover master node"); + if (mst1) { + dbg_err("dumping first master node"); + dbg_dump_node(c, mst1); + } + if (mst2) { + dbg_err("dumping second master node"); + dbg_dump_node(c, mst2); + } + vfree(buf2); + vfree(buf1); + return err; +} + +/** + * ubifs_write_rcvrd_mst_node - write the recovered master node. + * @c: UBIFS file-system description object + * + * This function writes the master node that was recovered during mounting in + * read-only mode and must now be written because we are remounting rw. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) +{ + int err; + + if (!c->rcvrd_mst_node) + return 0; + c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); + if (err) + return err; + kfree(c->rcvrd_mst_node); + c->rcvrd_mst_node = NULL; + return 0; +} + +/** + * is_last_write - determine if an offset was in the last write to a LEB. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data + * is in @buf, otherwise %0 is returned. The determination is made by checking + * for subsequent empty space starting from the next min_io_size boundary (or a + * bit less than the common header size if min_io_size is one). + */ +static int is_last_write(const struct ubifs_info *c, void *buf, int offs) +{ + int empty_offs; + int check_len; + uint8_t *p; + + if (c->min_io_size == 1) { + check_len = c->leb_size - offs; + p = buf + check_len; + for (; check_len > 0; check_len--) + if (*--p != 0xff) + break; + /* + * 'check_len' is the size of the corruption which cannot be + * more than the size of 1 node if it was caused by an unclean + * unmount. + */ + if (check_len > UBIFS_MAX_NODE_SZ) + return 0; + return 1; + } + + /* + * Round up to the next c->min_io_size boundary i.e. 'offs' is in the + * last wbuf written. After that should be empty space. + */ + empty_offs = ALIGN(offs + 1, c->min_io_size); + check_len = c->leb_size - empty_offs; + p = buf + empty_offs - offs; + + for (; check_len > 0; check_len--) + if (*p++ != 0xff) + return 0; + return 1; +} + +/** + * clean_buf - clean the data from an LEB sitting in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to clean + * @lnum: LEB number to clean + * @offs: offset from which to clean + * @len: length of buffer + * + * This function pads up to the next min_io_size boundary (if there is one) and + * sets empty space to all 0xff. @buf, @offs and @len are updated to the next + * min_io_size boundary (if there is one). + */ +static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, + int *offs, int *len) +{ + int empty_offs, pad_len; + + lnum = lnum; + dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); + + if (c->min_io_size == 1) { + memset(*buf, 0xff, c->leb_size - *offs); + return; + } + + ubifs_assert(!(*offs & 7)); + empty_offs = ALIGN(*offs, c->min_io_size); + pad_len = empty_offs - *offs; + ubifs_pad(c, *buf, pad_len); + *offs += pad_len; + *buf += pad_len; + *len -= pad_len; + memset(*buf, 0xff, c->leb_size - empty_offs); +} + +/** + * no_more_nodes - determine if there are no more nodes in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @len: length of buffer + * @lnum: LEB number of the LEB from which @buf was read + * @offs: offset from which @buf was read + * + * This function scans @buf for more nodes and returns %0 is a node is found and + * %1 if no more nodes are found. + */ +static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + int lnum, int offs) +{ + int skip, next_offs = 0; + + if (len > UBIFS_DATA_NODE_SZ) { + struct ubifs_ch *ch = buf; + int dlen = le32_to_cpu(ch->len); + + if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && + dlen <= UBIFS_MAX_DATA_NODE_SZ) + /* The corrupt node looks like a data node */ + next_offs = ALIGN(offs + dlen, 8); + } + + if (c->min_io_size == 1) + skip = 8; + else + skip = ALIGN(offs + 1, c->min_io_size) - offs; + + offs += skip; + buf += skip; + len -= skip; + while (len > 8) { + struct ubifs_ch *ch = buf; + uint32_t magic = le32_to_cpu(ch->magic); + int ret; + + if (magic == UBIFS_NODE_MAGIC) { + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret == SCANNED_A_NODE || ret > 0) { + /* + * There is a small chance this is just data in + * a data node, so check that possibility. e.g. + * this is part of a file that itself contains + * a UBIFS image. + */ + if (next_offs && offs + le32_to_cpu(ch->len) <= + next_offs) + continue; + dbg_rcvry("unexpected node at %d:%d", lnum, + offs); + return 0; + } + } + offs += 8; + buf += 8; + len -= 8; + } + return 1; +} + +/** + * fix_unclean_leb - fix an unclean LEB. + * @c: UBIFS file-system description object + * @sleb: scanned LEB information + * @start: offset where scan started + */ +static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int start) +{ + int lnum = sleb->lnum, endpt = start; + + /* Get the end offset of the last node we are keeping */ + if (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + + snod = list_entry(sleb->nodes.prev, + struct ubifs_scan_node, list); + endpt = snod->offs + snod->len; + } + + if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { + /* Add to recovery list */ + struct ubifs_unclean_leb *ucleb; + + dbg_rcvry("need to fix LEB %d start %d endpt %d", + lnum, start, sleb->endpt); + ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); + if (!ucleb) + return -ENOMEM; + ucleb->lnum = lnum; + ucleb->endpt = endpt; + list_add_tail(&ucleb->list, &c->unclean_leb_list); + } else { + /* Write the fixed LEB back to flash */ + int err; + + dbg_rcvry("fixing LEB %d start %d endpt %d", + lnum, start, sleb->endpt); + if (endpt == 0) { + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + } else { + int len = ALIGN(endpt, c->min_io_size); + + if (start) { + err = ubi_read(c->ubi, lnum, sleb->buf, 0, + start); + if (err) + return err; + } + /* Pad to min_io_size */ + if (len > endpt) { + int pad_len = len - ALIGN(endpt, 8); + + if (pad_len > 0) { + void *buf = sleb->buf + len - pad_len; + + ubifs_pad(c, buf, pad_len); + } + } + err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, + UBI_UNKNOWN); + if (err) + return err; + } + } + return 0; +} + +/** + * drop_incomplete_group - drop nodes from an incomplete group. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * + * This function returns %1 if nodes are dropped and %0 otherwise. + */ +static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +{ + int dropped = 0; + + while (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + struct ubifs_ch *ch; + + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + ch = snod->node; + if (ch->group_type != UBIFS_IN_NODE_GROUP) + return dropped; + dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + dropped = 1; + } + return dropped; +} + +/** + * ubifs_recover_leb - scan and recover a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * @grouped: nodes may be grouped for recovery + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf, int grouped) +{ + int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; + int empty_chkd = 0, start = offs; + struct ubifs_scan_leb *sleb; + void *buf = sbuf + offs; + + dbg_rcvry("%d:%d", lnum, offs); + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + + if (sleb->ecc) + need_clean = 1; + + while (len >= 8) { + int ret; + + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + + cond_resched(); + + /* + * Scan quietly until there is an error from which we cannot + * recover + */ + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; + int node_len; + + err = ubifs_add_snod(c, sleb, buf, offs); + if (err) + goto error; + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + continue; + } + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) { + if (!is_empty(buf, len)) { + if (!is_last_write(c, buf, offs)) + break; + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + empty_chkd = 1; + break; + } + + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + empty_chkd = 1; + break; + } + + if (ret == SCANNED_A_CORRUPT_NODE) + if (no_more_nodes(c, buf, len, lnum, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + empty_chkd = 1; + break; + } + + if (quiet) { + /* Redo the last scan but noisily */ + quiet = 0; + continue; + } + + switch (ret) { + case SCANNED_GARBAGE: + dbg_err("garbage"); + goto corrupted; + case SCANNED_A_CORRUPT_NODE: + case SCANNED_A_BAD_PAD_NODE: + dbg_err("bad node"); + goto corrupted; + default: + dbg_err("unknown"); + goto corrupted; + } + } + + if (!empty_chkd && !is_empty(buf, len)) { + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } else { + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); + goto corrupted; + } + } + + /* Drop nodes from incomplete group */ + if (grouped && drop_incomplete_group(sleb, &offs)) { + buf = sbuf + offs; + len = c->leb_size - offs; + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + + if (offs % c->min_io_size) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } + + ubifs_end_scan(c, sleb, lnum, offs); + + if (need_clean) { + err = fix_unclean_leb(c, sleb, start); + if (err) + goto error; + } + + return sleb; + +corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +error: + ubifs_err("LEB %d scanning failed", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); +} + +/** + * get_cs_sqnum - get commit start sequence number. + * @c: UBIFS file-system description object + * @lnum: LEB number of commit start node + * @offs: offset of commit start node + * @cs_sqnum: commit start sequence number is returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, + unsigned long long *cs_sqnum) +{ + struct ubifs_cs_node *cs_node = NULL; + int err, ret; + + dbg_rcvry("at %d:%d", lnum, offs); + cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); + if (!cs_node) + return -ENOMEM; + if (c->leb_size - offs < UBIFS_CS_NODE_SZ) + goto out_err; + err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + if (err && err != -EBADMSG) + goto out_free; + ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); + if (ret != SCANNED_A_NODE) { + dbg_err("Not a valid node"); + goto out_err; + } + if (cs_node->ch.node_type != UBIFS_CS_NODE) { + dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); + goto out_err; + } + if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { + dbg_err("CS node cmt_no %llu != current cmt_no %llu", + (unsigned long long)le64_to_cpu(cs_node->cmt_no), + c->cmt_no); + goto out_err; + } + *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); + dbg_rcvry("commit start sqnum %llu", *cs_sqnum); + kfree(cs_node); + return 0; + +out_err: + err = -EINVAL; +out_free: + ubifs_err("failed to get CS sqnum"); + kfree(cs_node); + return err; +} + +/** + * ubifs_recover_log_leb - scan and recover a log LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + struct ubifs_scan_leb *sleb; + int next_lnum; + + dbg_rcvry("LEB %d", lnum); + next_lnum = lnum + 1; + if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) + next_lnum = UBIFS_LOG_LNUM; + if (next_lnum != c->ltail_lnum) { + /* + * We can only recover at the end of the log, so check that the + * next log LEB is empty or out of date. + */ + sleb = ubifs_scan(c, next_lnum, 0, sbuf); + if (IS_ERR(sleb)) + return sleb; + if (sleb->nodes_cnt) { + struct ubifs_scan_node *snod; + unsigned long long cs_sqnum = c->cs_sqnum; + + snod = list_entry(sleb->nodes.next, + struct ubifs_scan_node, list); + if (cs_sqnum == 0) { + int err; + + err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); + if (err) { + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + } + } + if (snod->sqnum > cs_sqnum) { + ubifs_err("unrecoverable log corruption " + "in LEB %d", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(-EUCLEAN); + } + } + ubifs_scan_destroy(sleb); + } + return ubifs_recover_leb(c, lnum, offs, sbuf, 0); +} + +/** + * recover_head - recover a head. + * @c: UBIFS file-system description object + * @lnum: LEB number of head to recover + * @offs: offset of head to recover + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at a head location. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int recover_head(const struct ubifs_info *c, int lnum, int offs, + void *sbuf) +{ + int len, err, need_clean = 0; + + if (c->min_io_size > 1) + len = c->min_io_size; + else + len = 512; + if (offs + len > c->leb_size) + len = c->leb_size - offs; + + if (!len) + return 0; + + /* Read at the head location and check it is empty flash */ + err = ubi_read(c->ubi, lnum, sbuf, offs, len); + if (err) + need_clean = 1; + else { + uint8_t *p = sbuf; + + while (len--) + if (*p++ != 0xff) { + need_clean = 1; + break; + } + } + + if (need_clean) { + dbg_rcvry("cleaning head at %d:%d", lnum, offs); + if (offs == 0) + return ubifs_leb_unmap(c, lnum); + err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + if (err) + return err; + return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + } + + return 0; +} + +/** + * ubifs_recover_inl_heads - recover index and LPT heads. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at the index and + * LPT head locations. + * + * This deals with the recovery of a half-completed journal commit. UBIFS is + * careful never to overwrite the last version of the index or the LPT. Because + * the index and LPT are wandering trees, data from a half-completed commit will + * not be referenced anywhere in UBIFS. The data will be either in LEBs that are + * assumed to be empty and will be unmapped anyway before use, or in the index + * and LPT heads. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +{ + int err; + + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); + + dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); + err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); + if (err) + return err; + + dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); + err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); + if (err) + return err; + + return 0; +} + +/** + * clean_an_unclean_leb - read and write a LEB to remove corruption. + * @c: UBIFS file-system description object + * @ucleb: unclean LEB information + * @sbuf: LEB-sized buffer to use + * + * This function reads a LEB up to a point pre-determined by the mount recovery, + * checks the nodes, and writes the result back to the flash, thereby cleaning + * off any following corruption, or non-fatal ECC errors. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int clean_an_unclean_leb(const struct ubifs_info *c, + struct ubifs_unclean_leb *ucleb, void *sbuf) +{ + int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; + void *buf = sbuf; + + dbg_rcvry("LEB %d len %d", lnum, len); + + if (len == 0) { + /* Nothing to read, just unmap it */ + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + return 0; + } + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err && err != -EBADMSG) + return err; + + while (len >= 8) { + int ret; + + cond_resched(); + + /* Scan quietly until there is an error */ + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; + int node_len; + + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + continue; + } + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) { + ubifs_err("unexpected empty space at %d:%d", + lnum, offs); + return -EUCLEAN; + } + + if (quiet) { + /* Redo the last scan but noisily */ + quiet = 0; + continue; + } + + ubifs_scanned_corruption(c, lnum, offs, buf); + return -EUCLEAN; + } + + /* Pad to min_io_size */ + len = ALIGN(ucleb->endpt, c->min_io_size); + if (len > ucleb->endpt) { + int pad_len = len - ALIGN(ucleb->endpt, 8); + + if (pad_len > 0) { + buf = c->sbuf + len - pad_len; + ubifs_pad(c, buf, pad_len); + } + } + + /* Write back the LEB atomically */ + err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + if (err) + return err; + + dbg_rcvry("cleaned LEB %d", lnum); + + return 0; +} + +/** + * ubifs_clean_lebs - clean LEBs recovered during read-only mount. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function cleans a LEB identified during recovery that needs to be + * written but was not because UBIFS was mounted read-only. This happens when + * remounting to read-write mode. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +{ + dbg_rcvry("recovery"); + while (!list_empty(&c->unclean_leb_list)) { + struct ubifs_unclean_leb *ucleb; + int err; + + ucleb = list_entry(c->unclean_leb_list.next, + struct ubifs_unclean_leb, list); + err = clean_an_unclean_leb(c, ucleb, sbuf); + if (err) + return err; + list_del(&ucleb->list); + kfree(ucleb); + } + return 0; +} + +/** + * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. + * @c: UBIFS file-system description object + * + * Out-of-place garbage collection requires always one empty LEB with which to + * start garbage collection. The LEB number is recorded in c->gc_lnum and is + * written to the master node on unmounting. In the case of an unclean unmount + * the value of gc_lnum recorded in the master node is out of date and cannot + * be used. Instead, recovery must allocate an empty LEB for this purpose. + * However, there may not be enough empty space, in which case it must be + * possible to GC the dirtiest LEB into the GC head LEB. + * + * This function also runs the commit which causes the TNC updates from + * size-recovery and orphans to be written to the flash. That is important to + * ensure correct replay order for subsequent mounts. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_rcvry_gc_commit(struct ubifs_info *c) +{ + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + struct ubifs_lprops lp; + int lnum, err; + + c->gc_lnum = -1; + if (wbuf->lnum == -1) { + dbg_rcvry("no GC head LEB"); + goto find_free; + } + /* + * See whether the used space in the dirtiest LEB fits in the GC head + * LEB. + */ + if (wbuf->offs == c->leb_size) { + dbg_rcvry("no room in GC head LEB"); + goto find_free; + } + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); + if (err) { + if (err == -ENOSPC) + dbg_err("could not find a dirty LEB"); + return err; + } + ubifs_assert(!(lp.flags & LPROPS_INDEX)); + lnum = lp.lnum; + if (lp.free + lp.dirty == c->leb_size) { + /* An empty LEB was returned */ + if (lp.free != c->leb_size) { + err = ubifs_change_one_lp(c, lnum, c->leb_size, + 0, 0, 0, 0); + if (err) + return err; + } + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; + c->gc_lnum = lnum; + dbg_rcvry("allocated LEB %d for GC", lnum); + /* Run the commit */ + dbg_rcvry("committing"); + return ubifs_run_commit(c); + } + /* + * There was no empty LEB so the used space in the dirtiest LEB must fit + * in the GC head LEB. + */ + if (lp.free + lp.dirty < wbuf->offs) { + dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", + lnum, wbuf->lnum, wbuf->offs); + err = ubifs_return_leb(c, lnum); + if (err) + return err; + goto find_free; + } + /* + * We run the commit before garbage collection otherwise subsequent + * mounts will see the GC and orphan deletion in a different order. + */ + dbg_rcvry("committing"); + err = ubifs_run_commit(c); + if (err) + return err; + /* + * The data in the dirtiest LEB fits in the GC head LEB, so do the GC + * - use locking to keep 'ubifs_assert()' happy. + */ + dbg_rcvry("GC'ing LEB %d", lnum); + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_garbage_collect_leb(c, &lp); + if (err >= 0) { + int err2 = ubifs_wbuf_sync_nolock(wbuf); + + if (err2) + err = err2; + } + mutex_unlock(&wbuf->io_mutex); + if (err < 0) { + dbg_err("GC failed, error %d", err); + if (err == -EAGAIN) + err = -EINVAL; + return err; + } + if (err != LEB_RETAINED) { + dbg_err("GC returned %d", err); + return -EINVAL; + } + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + dbg_rcvry("allocated LEB %d for GC", lnum); + return 0; + +find_free: + /* + * There is no GC head LEB or the free space in the GC head LEB is too + * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so + * GC is not run. + */ + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) { + dbg_err("could not find an empty LEB"); + return lnum; + } + /* And reset the index flag */ + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX, 0); + if (err) + return err; + c->gc_lnum = lnum; + dbg_rcvry("allocated LEB %d for GC", lnum); + /* Run the commit */ + dbg_rcvry("committing"); + return ubifs_run_commit(c); +} + +/** + * struct size_entry - inode size information for recovery. + * @rb: link in the RB-tree of sizes + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + * @inode: inode if pinned in memory awaiting rw mode to fix it + */ +struct size_entry { + struct rb_node rb; + ino_t inum; + loff_t i_size; + loff_t d_size; + int exists; + struct inode *inode; +}; + +/** + * add_ino - add an entry to the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + */ +static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, + loff_t d_size, int exists) +{ + struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; + struct size_entry *e; + + while (*p) { + parent = *p; + e = rb_entry(parent, struct size_entry, rb); + if (inum < e->inum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->inum = inum; + e->i_size = i_size; + e->d_size = d_size; + e->exists = exists; + + rb_link_node(&e->rb, parent, p); + rb_insert_color(&e->rb, &c->size_tree); + + return 0; +} + +/** + * find_ino - find an entry on the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) +{ + struct rb_node *p = c->size_tree.rb_node; + struct size_entry *e; + + while (p) { + e = rb_entry(p, struct size_entry, rb); + if (inum < e->inum) + p = p->rb_left; + else if (inum > e->inum) + p = p->rb_right; + else + return e; + } + return NULL; +} + +/** + * remove_ino - remove an entry from the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static void remove_ino(struct ubifs_info *c, ino_t inum) +{ + struct size_entry *e = find_ino(c, inum); + + if (!e) + return; + rb_erase(&e->rb, &c->size_tree); + kfree(e); +} + +/** + * ubifs_destroy_size_tree - free resources related to the size tree. + * @c: UBIFS file-system description object + */ +void ubifs_destroy_size_tree(struct ubifs_info *c) +{ + struct rb_node *this = c->size_tree.rb_node; + struct size_entry *e; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + e = rb_entry(this, struct size_entry, rb); + if (e->inode) + iput(e->inode); + this = rb_parent(this); + if (this) { + if (this->rb_left == &e->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(e); + } + c->size_tree = RB_ROOT; +} + +/** + * ubifs_recover_size_accum - accumulate inode sizes for recovery. + * @c: UBIFS file-system description object + * @key: node key + * @deletion: node is for a deletion + * @new_size: inode size + * + * This function has two purposes: + * 1) to ensure there are no data nodes that fall outside the inode size + * 2) to ensure there are no data nodes for inodes that do not exist + * To accomplish those purposes, a rb-tree is constructed containing an entry + * for each inode number in the journal that has not been deleted, and recording + * the size from the inode node, the maximum size of any data node (also altered + * by truncations) and a flag indicating a inode number for which no inode node + * was present in the journal. + * + * Note that there is still the possibility that there are data nodes that have + * been committed that are beyond the inode size, however the only way to find + * them would be to scan the entire index. Alternatively, some provision could + * be made to record the size of inodes at the start of commit, which would seem + * very cumbersome for a scenario that is quite unlikely and the only negative + * consequence of which is wasted space. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size) +{ + ino_t inum = key_inum(c, key); + struct size_entry *e; + int err; + + switch (key_type(c, key)) { + case UBIFS_INO_KEY: + if (deletion) + remove_ino(c, inum); + else { + e = find_ino(c, inum); + if (e) { + e->i_size = new_size; + e->exists = 1; + } else { + err = add_ino(c, inum, new_size, 0, 1); + if (err) + return err; + } + } + break; + case UBIFS_DATA_KEY: + e = find_ino(c, inum); + if (e) { + if (new_size > e->d_size) + e->d_size = new_size; + } else { + err = add_ino(c, inum, 0, new_size, 0); + if (err) + return err; + } + break; + case UBIFS_TRUN_KEY: + e = find_ino(c, inum); + if (e) + e->d_size = new_size; + break; + } + return 0; +} + +/** + * fix_size_in_place - fix inode size in place on flash. + * @c: UBIFS file-system description object + * @e: inode size information for recovery + */ +static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) +{ + struct ubifs_ino_node *ino = c->sbuf; + unsigned char *p; + union ubifs_key key; + int err, lnum, offs, len; + loff_t i_size; + uint32_t crc; + + /* Locate the inode node LEB number and offset */ + ino_key_init(c, &key, e->inum); + err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); + if (err) + goto out; + /* + * If the size recorded on the inode node is greater than the size that + * was calculated from nodes in the journal then don't change the inode. + */ + i_size = le64_to_cpu(ino->size); + if (i_size >= e->d_size) + return 0; + /* Read the LEB */ + err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); + if (err) + goto out; + /* Change the size field and recalculate the CRC */ + ino = c->sbuf + offs; + ino->size = cpu_to_le64(e->d_size); + len = le32_to_cpu(ino->ch.len); + crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); + ino->ch.crc = cpu_to_le32(crc); + /* Work out where data in the LEB ends and free space begins */ + p = c->sbuf; + len = c->leb_size - 1; + while (p[len] == 0xff) + len -= 1; + len = ALIGN(len + 1, c->min_io_size); + /* Atomically write the fixed LEB back again */ + err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + if (err) + goto out; + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs, + i_size, e->d_size); + return 0; + +out: + ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", + e->inum, e->i_size, e->d_size, err); + return err; +} + +/** + * ubifs_recover_size - recover inode size. + * @c: UBIFS file-system description object + * + * This function attempts to fix inode size discrepancies identified by the + * 'ubifs_recover_size_accum()' function. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size(struct ubifs_info *c) +{ + struct rb_node *this = rb_first(&c->size_tree); + + while (this) { + struct size_entry *e; + int err; + + e = rb_entry(this, struct size_entry, rb); + if (!e->exists) { + union ubifs_key key; + + ino_key_init(c, &key, e->inum); + err = ubifs_tnc_lookup(c, &key, c->sbuf); + if (err && err != -ENOENT) + return err; + if (err == -ENOENT) { + /* Remove data nodes that have no inode */ + dbg_rcvry("removing ino %lu", e->inum); + err = ubifs_tnc_remove_ino(c, e->inum); + if (err) + return err; + } else { + struct ubifs_ino_node *ino = c->sbuf; + + e->exists = 1; + e->i_size = le64_to_cpu(ino->size); + } + } + if (e->exists && e->i_size < e->d_size) { + if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { + /* Fix the inode size and pin it in memory */ + struct inode *inode; + + inode = ubifs_iget(c->vfs_sb, e->inum); + if (IS_ERR(inode)) + return PTR_ERR(inode); + if (inode->i_size < e->d_size) { + dbg_rcvry("ino %lu size %lld -> %lld", + e->inum, e->d_size, + inode->i_size); + inode->i_size = e->d_size; + ubifs_inode(inode)->ui_size = e->d_size; + e->inode = inode; + this = rb_next(this); + continue; + } + iput(inode); + } else { + /* Fix the size in place */ + err = fix_size_in_place(c, e); + if (err) + return err; + if (e->inode) + iput(e->inode); + } + } + this = rb_next(this); + rb_erase(&e->rb, &c->size_tree); + kfree(e); + } + return 0; +} diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c new file mode 100644 index 000000000000..7399692af859 --- /dev/null +++ b/fs/ubifs/replay.c @@ -0,0 +1,1075 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains journal replay code. It runs when the file-system is being + * mounted and requires no locking. + * + * The larger is the journal, the longer it takes to scan it, so the longer it + * takes to mount UBIFS. This is why the journal has limited size which may be + * changed depending on the system requirements. But a larger journal gives + * faster I/O speed because it writes the index less frequently. So this is a + * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the + * larger is the journal, the more memory its index may consume. + */ + +#include "ubifs.h" + +/* + * Replay flags. + * + * REPLAY_DELETION: node was deleted + * REPLAY_REF: node is a reference node + */ +enum { + REPLAY_DELETION = 1, + REPLAY_REF = 2, +}; + +/** + * struct replay_entry - replay tree entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length + * @sqnum: node sequence number + * @flags: replay flags + * @rb: links the replay tree + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size + * @free: amount of free space in a bud + * @dirty: amount of dirty space in a bud from padding and deletion nodes + * + * UBIFS journal replay must compare node sequence numbers, which means it must + * build a tree of node information to insert into the TNC. + */ +struct replay_entry { + int lnum; + int offs; + int len; + unsigned long long sqnum; + int flags; + struct rb_node rb; + union ubifs_key key; + union { + struct qstr nm; + struct { + loff_t old_size; + loff_t new_size; + }; + struct { + int free; + int dirty; + }; + }; +}; + +/** + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object + * @free: free bytes in the bud + * @sqnum: reference node sequence number + */ +struct bud_entry { + struct list_head list; + struct ubifs_bud *bud; + int free; + unsigned long long sqnum; +}; + +/** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object + * @r: replay entry of bud + */ +static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +{ + const struct ubifs_lprops *lp; + int err = 0, dirty; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, r->lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + dirty = lp->dirty; + if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + /* + * The LEB was added to the journal with a starting offset of + * zero which means the LEB must have been empty. The LEB + * property values should be lp->free == c->leb_size and + * lp->dirty == 0, but that is not the case. The reason is that + * the LEB was garbage collected. The garbage collector resets + * the free and dirty space without recording it anywhere except + * lprops, so if there is not a commit then lprops does not have + * that information next time the file system is mounted. + * + * We do not need to adjust free space because the scan has told + * us the exact value which is recorded in the replay entry as + * r->free. + * + * However we do need to subtract from the dirty space the + * amount of space that the garbage collector reclaimed, which + * is the whole LEB minus the amount of space that was free. + */ + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + lp->free, lp->dirty); + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + lp->free, lp->dirty); + dirty -= c->leb_size - lp->free; + /* + * If the replay order was perfect the dirty space would now be + * zero. The order is not perfect because the the journal heads + * race with eachother. This is not a problem but is does mean + * that the dirty space may temporarily exceed c->leb_size + * during the replay. + */ + if (dirty != 0) + dbg_msg("LEB %d lp: %d free %d dirty " + "replay: %d free %d dirty", r->lnum, lp->free, + lp->dirty, r->free, r->dirty); + } + lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } +out: + ubifs_release_lprops(c); + return err; +} + +/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation + */ +static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) +{ + unsigned min_blk, max_blk; + union ubifs_key min_key, max_key; + ino_t ino; + + min_blk = r->new_size / UBIFS_BLOCK_SIZE; + if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) + min_blk += 1; + + max_blk = r->old_size / UBIFS_BLOCK_SIZE; + if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) + max_blk -= 1; + + ino = key_inum(c, &r->key); + + data_key_init(c, &min_key, ino, min_blk); + data_key_init(c, &max_key, ino, max_blk); + + return ubifs_tnc_remove_range(c, &min_key, &max_key); +} + +/** + * apply_replay_entry - apply a replay entry to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry to apply + * + * Apply a replay entry to the TNC. + */ +static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) +{ + int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + + dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, + r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + + /* Set c->replay_sqnum to help deal with dangling branches. */ + c->replay_sqnum = r->sqnum; + + if (r->flags & REPLAY_REF) + err = set_bud_lprops(c, r); + else if (is_hash_key(c, &r->key)) { + if (deletion) + err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); + else + err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, + r->len, &r->nm); + } else { + if (deletion) + switch (key_type(c, &r->key)) { + case UBIFS_INO_KEY: + { + ino_t inum = key_inum(c, &r->key); + + err = ubifs_tnc_remove_ino(c, inum); + break; + } + case UBIFS_TRUN_KEY: + err = trun_remove_range(c, r); + break; + default: + err = ubifs_tnc_remove(c, &r->key); + break; + } + else + err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, + r->len); + if (err) + return err; + + if (c->need_recovery) + err = ubifs_recover_size_accum(c, &r->key, deletion, + r->new_size); + } + + return err; +} + +/** + * destroy_replay_tree - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay tree. + */ +static void destroy_replay_tree(struct ubifs_info *c) +{ + struct rb_node *this = c->replay_tree.rb_node; + struct replay_entry *r; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + r = rb_entry(this, struct replay_entry, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &r->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + if (is_hash_key(c, &r->key)) + kfree(r->nm.name); + kfree(r); + } + c->replay_tree = RB_ROOT; +} + +/** + * apply_replay_tree - apply the replay tree to the TNC. + * @c: UBIFS file-system description object + * + * Apply the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int apply_replay_tree(struct ubifs_info *c) +{ + struct rb_node *this = rb_first(&c->replay_tree); + + while (this) { + struct replay_entry *r; + int err; + + cond_resched(); + + r = rb_entry(this, struct replay_entry, rb); + err = apply_replay_entry(c, r); + if (err) + return err; + this = rb_next(this); + } + return 0; +} + +/** + * insert_node - insert a node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * @old_size: truncation old size + * @new_size: truncation new size + * + * This function inserts a scanned non-direntry node to the replay tree. The + * replay tree is an RB-tree containing @struct replay_entry elements which are + * indexed by the sequence number. The replay tree is applied at the very end + * of the replay process. Since the tree is sorted in sequence number order, + * the older modifications are applied first. This function returns zero in + * case of success and a negative error code in case of failure. + */ +static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, unsigned long long sqnum, + int deletion, int *used, loff_t old_size, + loff_t new_size) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } else if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + + if (!deletion) + *used += ALIGN(len, 8); + r->lnum = lnum; + r->offs = offs; + r->len = len; + r->sqnum = sqnum; + r->flags = (deletion ? REPLAY_DELETION : 0); + r->old_size = old_size; + r->new_size = new_size; + key_copy(c, key, &r->key); + + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * insert_dent - insert a directory entry node into the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @name: directory entry name + * @nlen: directory entry name length + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * + * This function inserts a scanned directory entry node to the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * This function is also used for extended attribute entries because they are + * implemented as directory entry nodes. + */ +static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, const char *name, int nlen, + unsigned long long sqnum, int deletion, int *used) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + char *nbuf; + + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } + if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + nbuf = kmalloc(nlen + 1, GFP_KERNEL); + if (!nbuf) { + kfree(r); + return -ENOMEM; + } + + if (!deletion) + *used += ALIGN(len, 8); + r->lnum = lnum; + r->offs = offs; + r->len = len; + r->sqnum = sqnum; + r->nm.len = nlen; + memcpy(nbuf, name, nlen); + nbuf[nlen] = '\0'; + r->nm.name = nbuf; + r->flags = (deletion ? REPLAY_DELETION : 0); + key_copy(c, key, &r->key); + + ubifs_assert(!*p); + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * ubifs_validate_entry - validate directory or extended attribute entry node. + * @c: UBIFS file-system description object + * @dent: the node to validate + * + * This function validates directory or extended attribute entry node @dent. + * Returns zero if the node is all right and a %-EINVAL if not. + */ +int ubifs_validate_entry(struct ubifs_info *c, + const struct ubifs_dent_node *dent) +{ + int key_type = key_type_flash(c, dent->key); + int nlen = le16_to_cpu(dent->nlen); + + if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || + dent->type >= UBIFS_ITYPES_CNT || + nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || + strnlen(dent->name, nlen) != nlen || + le64_to_cpu(dent->inum) > MAX_INUM) { + ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? + "directory entry" : "extended attribute entry"); + return -EINVAL; + } + + if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { + ubifs_err("bad key type %d", key_type); + return -EINVAL; + } + + return 0; +} + +/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @free: amount of free space in the bud is returned here + * @dirty: amount of dirty space from padding and deletion nodes is returned + * here + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + int *free, int *dirty) +{ + int err = 0, used = 0; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + struct ubifs_bud *bud; + + dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); + if (c->need_recovery) + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); + else + sleb = ubifs_scan(c, lnum, offs, c->sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + + /* + * The bud does not have to start from offset zero - the beginning of + * the 'lnum' LEB may contain previously committed data. One of the + * things we have to do in replay is to correctly update lprops with + * newer information about this LEB. + * + * At this point lprops thinks that this LEB has 'c->leb_size - offs' + * bytes of free space because it only contain information about + * committed data. + * + * But we know that real amount of free space is 'c->leb_size - + * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and + * 'sleb->endpt' is used by bud data. We have to correctly calculate + * how much of these data are dirty and update lprops with this + * information. + * + * The dirt in that LEB region is comprised of padding nodes, deletion + * nodes, truncation nodes and nodes which are obsoleted by subsequent + * nodes in this LEB. So instead of calculating clean space, we + * calculate used space ('used' variable). + */ + + list_for_each_entry(snod, &sleb->nodes, list) { + int deletion = 0; + + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { + ubifs_err("file system's life ended"); + goto out_dump; + } + + if (snod->sqnum > c->max_sqnum) + c->max_sqnum = snod->sqnum; + + switch (snod->type) { + case UBIFS_INO_NODE: + { + struct ubifs_ino_node *ino = snod->node; + loff_t new_size = le64_to_cpu(ino->size); + + if (le32_to_cpu(ino->nlink) == 0) + deletion = 1; + err = insert_node(c, lnum, snod->offs, snod->len, + &snod->key, snod->sqnum, deletion, + &used, 0, new_size); + break; + } + case UBIFS_DATA_NODE: + { + struct ubifs_data_node *dn = snod->node; + loff_t new_size = le32_to_cpu(dn->size) + + key_block(c, &snod->key) * + UBIFS_BLOCK_SIZE; + + err = insert_node(c, lnum, snod->offs, snod->len, + &snod->key, snod->sqnum, deletion, + &used, 0, new_size); + break; + } + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + { + struct ubifs_dent_node *dent = snod->node; + + err = ubifs_validate_entry(c, dent); + if (err) + goto out_dump; + + err = insert_dent(c, lnum, snod->offs, snod->len, + &snod->key, dent->name, + le16_to_cpu(dent->nlen), snod->sqnum, + !le64_to_cpu(dent->inum), &used); + break; + } + case UBIFS_TRUN_NODE: + { + struct ubifs_trun_node *trun = snod->node; + loff_t old_size = le64_to_cpu(trun->old_size); + loff_t new_size = le64_to_cpu(trun->new_size); + union ubifs_key key; + + /* Validate truncation node */ + if (old_size < 0 || old_size > c->max_inode_sz || + new_size < 0 || new_size > c->max_inode_sz || + old_size <= new_size) { + ubifs_err("bad truncation node"); + goto out_dump; + } + + /* + * Create a fake truncation key just to use the same + * functions which expect nodes to have keys. + */ + trun_key_init(c, &key, le32_to_cpu(trun->inum)); + err = insert_node(c, lnum, snod->offs, snod->len, + &key, snod->sqnum, 1, &used, + old_size, new_size); + break; + } + default: + ubifs_err("unexpected node type %d in bud LEB %d:%d", + snod->type, lnum, snod->offs); + err = -EINVAL; + goto out_dump; + } + if (err) + goto out; + } + + bud = ubifs_search_bud(c, lnum); + if (!bud) + BUG(); + + ubifs_assert(sleb->endpt - offs >= used); + ubifs_assert(sleb->endpt % c->min_io_size == 0); + + if (sleb->endpt + c->min_io_size <= c->leb_size && + !(c->vfs_sb->s_flags & MS_RDONLY)) + err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, + sleb->endpt, UBI_SHORTTERM); + + *dirty = sleb->endpt - offs - used; + *free = c->leb_size - sleb->endpt; + +out: + ubifs_scan_destroy(sleb); + return err; + +out_dump: + ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); + dbg_dump_node(c, snod->node); + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * insert_ref_node - insert a reference node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @sqnum: sequence number + * @free: amount of free space in bud + * @dirty: amount of dirty space from padding and deletion nodes + * + * This function inserts a reference node to the replay tree and returns zero + * in case of success ort a negative error code in case of failure. + */ +static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, + unsigned long long sqnum, int free, int dirty) +{ + struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + + dbg_mnt("add ref LEB %d:%d", lnum, offs); + while (*p) { + parent = *p; + r = rb_entry(parent, struct replay_entry, rb); + if (sqnum < r->sqnum) { + p = &(*p)->rb_left; + continue; + } else if (sqnum > r->sqnum) { + p = &(*p)->rb_right; + continue; + } + ubifs_err("duplicate sqnum in replay tree"); + return -EINVAL; + } + + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; + + r->lnum = lnum; + r->offs = offs; + r->sqnum = sqnum; + r->flags = REPLAY_REF; + r->free = free; + r->dirty = dirty; + + rb_link_node(&r->rb, parent, p); + rb_insert_color(&r->rb, &c->replay_tree); + return 0; +} + +/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_buds(struct ubifs_info *c) +{ + struct bud_entry *b; + int err, uninitialized_var(free), uninitialized_var(dirty); + + list_for_each_entry(b, &c->replay_buds, list) { + err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, + &free, &dirty); + if (err) + return err; + err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, + free, dirty); + if (err) + return err; + } + + return 0; +} + +/** + * destroy_bud_list - destroy the list of buds to replay. + * @c: UBIFS file-system description object + */ +static void destroy_bud_list(struct ubifs_info *c) +{ + struct bud_entry *b; + + while (!list_empty(&c->replay_buds)) { + b = list_entry(c->replay_buds.next, struct bud_entry, list); + list_del(&b->list); + kfree(b); + } +} + +/** + * add_replay_bud - add a bud to the list of buds to replay. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @sqnum: reference node sequence number + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + unsigned long long sqnum) +{ + struct ubifs_bud *bud; + struct bud_entry *b; + + dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); + + bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); + if (!bud) + return -ENOMEM; + + b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); + if (!b) { + kfree(bud); + return -ENOMEM; + } + + bud->lnum = lnum; + bud->start = offs; + bud->jhead = jhead; + ubifs_add_bud(c, bud); + + b->bud = bud; + b->sqnum = sqnum; + list_add_tail(&b->list, &c->replay_buds); + + return 0; +} + +/** + * validate_ref - validate a reference node. + * @c: UBIFS file-system description object + * @ref: the reference node to validate + * @ref_lnum: LEB number of the reference node + * @ref_offs: reference node offset + * + * This function returns %1 if a bud reference already exists for the LEB. %0 is + * returned if the reference node is new, otherwise %-EINVAL is returned if + * validation failed. + */ +static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) +{ + struct ubifs_bud *bud; + int lnum = le32_to_cpu(ref->lnum); + unsigned int offs = le32_to_cpu(ref->offs); + unsigned int jhead = le32_to_cpu(ref->jhead); + + /* + * ref->offs may point to the end of LEB when the journal head points + * to the end of LEB and we write reference node for it during commit. + * So this is why we require 'offs > c->leb_size'. + */ + if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || + lnum < c->main_first || offs > c->leb_size || + offs & (c->min_io_size - 1)) + return -EINVAL; + + /* Make sure we have not already looked at this bud */ + bud = ubifs_search_bud(c, lnum); + if (bud) { + if (bud->jhead == jhead && bud->start <= offs) + return 1; + ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); + return -EINVAL; + } + + return 0; +} + +/** + * replay_log_leb - replay a log logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: log logical eraseblock to replay + * @offs: offset to start replaying from + * @sbuf: scan buffer + * + * This function replays a log LEB and returns zero in case of success, %1 if + * this is the last LEB in the log, and a negative error code in case of + * failure. + */ +static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) +{ + int err; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + const struct ubifs_cs_node *node; + + dbg_mnt("replay log LEB %d:%d", lnum, offs); + sleb = ubifs_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) { + if (c->need_recovery) + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + } + + if (sleb->nodes_cnt == 0) { + err = 1; + goto out; + } + + node = sleb->buf; + + snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); + if (c->cs_sqnum == 0) { + /* + * This is the first log LEB we are looking at, make sure that + * the first node is a commit start node. Also record its + * sequence number so that UBIFS can determine where the log + * ends, because all nodes which were have higher sequence + * numbers. + */ + if (snod->type != UBIFS_CS_NODE) { + dbg_err("first log node at LEB %d:%d is not CS node", + lnum, offs); + goto out_dump; + } + if (le64_to_cpu(node->cmt_no) != c->cmt_no) { + dbg_err("first CS node at LEB %d:%d has wrong " + "commit number %llu expected %llu", + lnum, offs, + (unsigned long long)le64_to_cpu(node->cmt_no), + c->cmt_no); + goto out_dump; + } + + c->cs_sqnum = le64_to_cpu(node->ch.sqnum); + dbg_mnt("commit start sqnum %llu", c->cs_sqnum); + } + + if (snod->sqnum < c->cs_sqnum) { + /* + * This means that we reached end of log and now + * look to the older log data, which was already + * committed but the eraseblock was not erased (UBIFS + * only unmaps it). So this basically means we have to + * exit with "end of log" code. + */ + err = 1; + goto out; + } + + /* Make sure the first node sits at offset zero of the LEB */ + if (snod->offs != 0) { + dbg_err("first node is not at zero offset"); + goto out_dump; + } + + list_for_each_entry(snod, &sleb->nodes, list) { + + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { + ubifs_err("file system's life ended"); + goto out_dump; + } + + if (snod->sqnum < c->cs_sqnum) { + dbg_err("bad sqnum %llu, commit sqnum %llu", + snod->sqnum, c->cs_sqnum); + goto out_dump; + } + + if (snod->sqnum > c->max_sqnum) + c->max_sqnum = snod->sqnum; + + switch (snod->type) { + case UBIFS_REF_NODE: { + const struct ubifs_ref_node *ref = snod->node; + + err = validate_ref(c, ref); + if (err == 1) + break; /* Already have this bud */ + if (err) + goto out_dump; + + err = add_replay_bud(c, le32_to_cpu(ref->lnum), + le32_to_cpu(ref->offs), + le32_to_cpu(ref->jhead), + snod->sqnum); + if (err) + goto out; + + break; + } + case UBIFS_CS_NODE: + /* Make sure it sits at the beginning of LEB */ + if (snod->offs != 0) { + ubifs_err("unexpected node in log"); + goto out_dump; + } + break; + default: + ubifs_err("unexpected node in log"); + goto out_dump; + } + } + + if (sleb->endpt || c->lhead_offs >= c->leb_size) { + c->lhead_lnum = lnum; + c->lhead_offs = sleb->endpt; + } + + err = !sleb->endpt; +out: + ubifs_scan_destroy(sleb); + return err; + +out_dump: + ubifs_err("log error detected while replying the log at LEB %d:%d", + lnum, offs + snod->offs); + dbg_dump_node(c, snod->node); + ubifs_scan_destroy(sleb); + return -EINVAL; +} + +/** + * take_ihead - update the status of the index head in lprops to 'taken'. + * @c: UBIFS file-system description object + * + * This function returns the amount of free space in the index head LEB or a + * negative error code. + */ +static int take_ihead(struct ubifs_info *c) +{ + const struct ubifs_lprops *lp; + int err, free; + + ubifs_get_lprops(c); + + lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + free = lp->free; + + lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + err = free; +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_replay_journal - replay journal. + * @c: UBIFS file-system description object + * + * This function scans the journal, replays and cleans it up. It makes sure all + * memory data structures related to uncommitted journal are built (dirty TNC + * tree, tree of buds, modified lprops, etc). + */ +int ubifs_replay_journal(struct ubifs_info *c) +{ + int err, i, lnum, offs, free; + void *sbuf = NULL; + + BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); + + /* Update the status of the index head in lprops to 'taken' */ + free = take_ihead(c); + if (free < 0) + return free; /* Error code */ + + if (c->ihead_offs != c->leb_size - free) { + ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, + c->ihead_offs); + return -EINVAL; + } + + sbuf = vmalloc(c->leb_size); + if (!sbuf) + return -ENOMEM; + + dbg_mnt("start replaying the journal"); + + c->replaying = 1; + + lnum = c->ltail_lnum = c->lhead_lnum; + offs = c->lhead_offs; + + for (i = 0; i < c->log_lebs; i++, lnum++) { + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { + /* + * The log is logically circular, we reached the last + * LEB, switch to the first one. + */ + lnum = UBIFS_LOG_LNUM; + offs = 0; + } + err = replay_log_leb(c, lnum, offs, sbuf); + if (err == 1) + /* We hit the end of the log */ + break; + if (err) + goto out; + offs = 0; + } + + err = replay_buds(c); + if (err) + goto out; + + err = apply_replay_tree(c); + if (err) + goto out; + + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " + "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + c->highest_inum); +out: + destroy_replay_tree(c); + destroy_bud_list(c); + vfree(sbuf); + c->replaying = 0; + return err; +} diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c new file mode 100644 index 000000000000..2bf753b38889 --- /dev/null +++ b/fs/ubifs/sb.c @@ -0,0 +1,629 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS superblock. The superblock is stored at the first + * LEB of the volume and is never changed by UBIFS. Only user-space tools may + * change it. The superblock node mostly contains geometry information. + */ + +#include "ubifs.h" +#include + +/* + * Default journal size in logical eraseblocks as a percent of total + * flash size. + */ +#define DEFAULT_JNL_PERCENT 5 + +/* Default maximum journal size in bytes */ +#define DEFAULT_MAX_JNL (32*1024*1024) + +/* Default indexing tree fanout */ +#define DEFAULT_FANOUT 8 + +/* Default number of data journal heads */ +#define DEFAULT_JHEADS_CNT 1 + +/* Default positions of different LEBs in the main area */ +#define DEFAULT_IDX_LEB 0 +#define DEFAULT_DATA_LEB 1 +#define DEFAULT_GC_LEB 2 + +/* Default number of LEB numbers in LPT's save table */ +#define DEFAULT_LSAVE_CNT 256 + +/* Default reserved pool size as a percent of maximum free space */ +#define DEFAULT_RP_PERCENT 5 + +/* The default maximum size of reserved pool in bytes */ +#define DEFAULT_MAX_RP_SIZE (5*1024*1024) + +/* Default time granularity in nanoseconds */ +#define DEFAULT_TIME_GRAN 1000000000 + +/** + * create_default_filesystem - format empty UBI volume. + * @c: UBIFS file-system description object + * + * This function creates default empty file-system. Returns zero in case of + * success and a negative error code in case of failure. + */ +static int create_default_filesystem(struct ubifs_info *c) +{ + struct ubifs_sb_node *sup; + struct ubifs_mst_node *mst; + struct ubifs_idx_node *idx; + struct ubifs_branch *br; + struct ubifs_ino_node *ino; + struct ubifs_cs_node *cs; + union ubifs_key key; + int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; + int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; + int min_leb_cnt = UBIFS_MIN_LEB_CNT; + uint64_t tmp64, main_bytes; + + /* Some functions called from here depend on the @c->key_len filed */ + c->key_len = UBIFS_SK_LEN; + + /* + * First of all, we have to calculate default file-system geometry - + * log size, journal size, etc. + */ + if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT) + /* We can first multiply then divide and have no overflow */ + jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100; + else + jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT; + + if (jnl_lebs < UBIFS_MIN_JNL_LEBS) + jnl_lebs = UBIFS_MIN_JNL_LEBS; + if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL) + jnl_lebs = DEFAULT_MAX_JNL / c->leb_size; + + /* + * The log should be large enough to fit reference nodes for all bud + * LEBs. Because buds do not have to start from the beginning of LEBs + * (half of the LEB may contain committed data), the log should + * generally be larger, make it twice as large. + */ + tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1; + log_lebs = tmp / c->leb_size; + /* Plus one LEB reserved for commit */ + log_lebs += 1; + if (c->leb_cnt - min_leb_cnt > 8) { + /* And some extra space to allow writes while committing */ + log_lebs += 1; + min_leb_cnt += 1; + } + + max_buds = jnl_lebs - log_lebs; + if (max_buds < UBIFS_MIN_BUD_LEBS) + max_buds = UBIFS_MIN_BUD_LEBS; + + /* + * Orphan nodes are stored in a separate area. One node can store a lot + * of orphan inode numbers, but when new orphan comes we just add a new + * orphan node. At some point the nodes are consolidated into one + * orphan node. + */ + orph_lebs = UBIFS_MIN_ORPH_LEBS; +#ifdef CONFIG_UBIFS_FS_DEBUG + if (c->leb_cnt - min_leb_cnt > 1) + /* + * For debugging purposes it is better to have at least 2 + * orphan LEBs, because the orphan subsystem would need to do + * consolidations and would be stressed more. + */ + orph_lebs += 1; +#endif + + main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; + main_lebs -= orph_lebs; + + lpt_first = UBIFS_LOG_LNUM + log_lebs; + c->lsave_cnt = DEFAULT_LSAVE_CNT; + c->max_leb_cnt = c->leb_cnt; + err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs, + &big_lpt); + if (err) + return err; + + dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first, + lpt_first + lpt_lebs - 1); + + main_first = c->leb_cnt - main_lebs; + + /* Create default superblock */ + tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); + sup = kzalloc(tmp, GFP_KERNEL); + if (!sup) + return -ENOMEM; + + tmp64 = (uint64_t)max_buds * c->leb_size; + if (big_lpt) + sup_flags |= UBIFS_FLG_BIGLPT; + + sup->ch.node_type = UBIFS_SB_NODE; + sup->key_hash = UBIFS_KEY_HASH_R5; + sup->flags = cpu_to_le32(sup_flags); + sup->min_io_size = cpu_to_le32(c->min_io_size); + sup->leb_size = cpu_to_le32(c->leb_size); + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt); + sup->max_bud_bytes = cpu_to_le64(tmp64); + sup->log_lebs = cpu_to_le32(log_lebs); + sup->lpt_lebs = cpu_to_le32(lpt_lebs); + sup->orph_lebs = cpu_to_le32(orph_lebs); + sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); + sup->fanout = cpu_to_le32(DEFAULT_FANOUT); + sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); + sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); + sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); + + generate_random_uuid(sup->uuid); + + main_bytes = (uint64_t)main_lebs * c->leb_size; + tmp64 = main_bytes * DEFAULT_RP_PERCENT; + do_div(tmp64, 100); + if (tmp64 > DEFAULT_MAX_RP_SIZE) + tmp64 = DEFAULT_MAX_RP_SIZE; + sup->rp_size = cpu_to_le64(tmp64); + + err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); + kfree(sup); + if (err) + return err; + + dbg_gen("default superblock created at LEB 0:0"); + + /* Create default master node */ + mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); + if (!mst) + return -ENOMEM; + + mst->ch.node_type = UBIFS_MST_NODE; + mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM); + mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO); + mst->cmt_no = 0; + mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); + mst->root_offs = 0; + tmp = ubifs_idx_node_sz(c, 1); + mst->root_len = cpu_to_le32(tmp); + mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB); + mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); + mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size)); + mst->index_size = cpu_to_le64(ALIGN(tmp, 8)); + mst->lpt_lnum = cpu_to_le32(c->lpt_lnum); + mst->lpt_offs = cpu_to_le32(c->lpt_offs); + mst->nhead_lnum = cpu_to_le32(c->nhead_lnum); + mst->nhead_offs = cpu_to_le32(c->nhead_offs); + mst->ltab_lnum = cpu_to_le32(c->ltab_lnum); + mst->ltab_offs = cpu_to_le32(c->ltab_offs); + mst->lsave_lnum = cpu_to_le32(c->lsave_lnum); + mst->lsave_offs = cpu_to_le32(c->lsave_offs); + mst->lscan_lnum = cpu_to_le32(main_first); + mst->empty_lebs = cpu_to_le32(main_lebs - 2); + mst->idx_lebs = cpu_to_le32(1); + mst->leb_cnt = cpu_to_le32(c->leb_cnt); + + /* Calculate lprops statistics */ + tmp64 = main_bytes; + tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); + tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); + mst->total_free = cpu_to_le64(tmp64); + + tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); + ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) - + UBIFS_INO_NODE_SZ; + tmp64 += ino_waste; + tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8); + mst->total_dirty = cpu_to_le64(tmp64); + + /* The indexing LEB does not contribute to dark space */ + tmp64 = (c->main_lebs - 1) * c->dark_wm; + mst->total_dark = cpu_to_le64(tmp64); + + mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); + + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0, + UBI_UNKNOWN); + if (err) { + kfree(mst); + return err; + } + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0, + UBI_UNKNOWN); + kfree(mst); + if (err) + return err; + + dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM); + + /* Create the root indexing node */ + tmp = ubifs_idx_node_sz(c, 1); + idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); + if (!idx) + return -ENOMEM; + + c->key_fmt = UBIFS_SIMPLE_KEY_FMT; + c->key_hash = key_r5_hash; + + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(1); + ino_key_init(c, &key, UBIFS_ROOT_INO); + br = ubifs_idx_branch(c, idx, 0); + key_write_idx(c, &key, &br->key); + br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); + br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); + err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0, + UBI_UNKNOWN); + kfree(idx); + if (err) + return err; + + dbg_gen("default root indexing node created LEB %d:0", + main_first + DEFAULT_IDX_LEB); + + /* Create default root inode */ + tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); + ino = kzalloc(tmp, GFP_KERNEL); + if (!ino) + return -ENOMEM; + + ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO); + ino->ch.node_type = UBIFS_INO_NODE; + ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); + ino->nlink = cpu_to_le32(2); + tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + ino->atime_sec = tmp; + ino->ctime_sec = tmp; + ino->mtime_sec = tmp; + ino->atime_nsec = 0; + ino->ctime_nsec = 0; + ino->mtime_nsec = 0; + ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); + ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); + + /* Set compression enabled by default */ + ino->flags = cpu_to_le32(UBIFS_COMPR_FL); + + err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, + main_first + DEFAULT_DATA_LEB, 0, + UBI_UNKNOWN); + kfree(ino); + if (err) + return err; + + dbg_gen("root inode created at LEB %d:0", + main_first + DEFAULT_DATA_LEB); + + /* + * The first node in the log has to be the commit start node. This is + * always the case during normal file-system operation. Write a fake + * commit start node to the log. + */ + tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size); + cs = kzalloc(tmp, GFP_KERNEL); + if (!cs) + return -ENOMEM; + + cs->ch.node_type = UBIFS_CS_NODE; + err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, + 0, UBI_UNKNOWN); + kfree(cs); + + ubifs_msg("default file-system created"); + return 0; +} + +/** + * validate_sb - validate superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node + * + * This function validates superblock node @sup. Since most of data was read + * from the superblock and stored in @c, the function validates fields in @c + * instead. Returns zero in case of success and %-EINVAL in case of validation + * failure. + */ +static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ + long long max_bytes; + int err = 1, min_leb_cnt; + + if (!c->key_hash) { + err = 2; + goto failed; + } + + if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { + err = 3; + goto failed; + } + + if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { + ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", + le32_to_cpu(sup->min_io_size), c->min_io_size); + goto failed; + } + + if (le32_to_cpu(sup->leb_size) != c->leb_size) { + ubifs_err("LEB size mismatch: %d in superblock, %d real", + le32_to_cpu(sup->leb_size), c->leb_size); + goto failed; + } + + if (c->log_lebs < UBIFS_MIN_LOG_LEBS || + c->lpt_lebs < UBIFS_MIN_LPT_LEBS || + c->orph_lebs < UBIFS_MIN_ORPH_LEBS || + c->main_lebs < UBIFS_MIN_MAIN_LEBS) { + err = 4; + goto failed; + } + + /* + * Calculate minimum allowed amount of main area LEBs. This is very + * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we + * have just read from the superblock. + */ + min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; + min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; + + if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { + ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " + "%d minimum required", c->leb_cnt, c->vi.size, + min_leb_cnt); + goto failed; + } + + if (c->max_leb_cnt < c->leb_cnt) { + ubifs_err("max. LEB count %d less than LEB count %d", + c->max_leb_cnt, c->leb_cnt); + goto failed; + } + + if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { + err = 7; + goto failed; + } + + if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || + c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { + err = 8; + goto failed; + } + + if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || + c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { + err = 9; + goto failed; + } + + if (c->fanout < UBIFS_MIN_FANOUT || + ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { + err = 10; + goto failed; + } + + if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && + c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - + c->log_lebs - c->lpt_lebs - c->orph_lebs)) { + err = 11; + goto failed; + } + + if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + + c->orph_lebs + c->main_lebs != c->leb_cnt) { + err = 12; + goto failed; + } + + if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { + err = 13; + goto failed; + } + + max_bytes = c->main_lebs * (long long)c->leb_size; + if (c->rp_size < 0 || max_bytes < c->rp_size) { + err = 14; + goto failed; + } + + if (le32_to_cpu(sup->time_gran) > 1000000000 || + le32_to_cpu(sup->time_gran) < 1) { + err = 15; + goto failed; + } + + return 0; + +failed: + ubifs_err("bad superblock, error %d", err); + dbg_dump_node(c, sup); + return -EINVAL; +} + +/** + * ubifs_read_sb_node - read superblock node. + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error + * code. + */ +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) +{ + struct ubifs_sb_node *sup; + int err; + + sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); + if (!sup) + return ERR_PTR(-ENOMEM); + + err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, + UBIFS_SB_LNUM, 0); + if (err) { + kfree(sup); + return ERR_PTR(err); + } + + return sup; +} + +/** + * ubifs_write_sb_node - write superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node read with 'ubifs_read_sb_node()' + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ + int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); + + ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); + return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM); +} + +/** + * ubifs_read_superblock - read superblock. + * @c: UBIFS file-system description object + * + * This function finds, reads and checks the superblock. If an empty UBI volume + * is being mounted, this function creates default superblock. Returns zero in + * case of success, and a negative error code in case of failure. + */ +int ubifs_read_superblock(struct ubifs_info *c) +{ + int err, sup_flags; + struct ubifs_sb_node *sup; + + if (c->empty) { + err = create_default_filesystem(c); + if (err) + return err; + } + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) + return PTR_ERR(sup); + + /* + * The software supports all previous versions but not future versions, + * due to the unavailability of time-travelling equipment. + */ + c->fmt_version = le32_to_cpu(sup->fmt_version); + if (c->fmt_version > UBIFS_FORMAT_VERSION) { + ubifs_err("on-flash format version is %d, but software only " + "supports up to version %d", c->fmt_version, + UBIFS_FORMAT_VERSION); + err = -EINVAL; + goto out; + } + + if (c->fmt_version < 3) { + ubifs_err("on-flash format version %d is not supported", + c->fmt_version); + err = -EINVAL; + goto out; + } + + switch (sup->key_hash) { + case UBIFS_KEY_HASH_R5: + c->key_hash = key_r5_hash; + c->key_hash_type = UBIFS_KEY_HASH_R5; + break; + + case UBIFS_KEY_HASH_TEST: + c->key_hash = key_test_hash; + c->key_hash_type = UBIFS_KEY_HASH_TEST; + break; + }; + + c->key_fmt = sup->key_fmt; + + switch (c->key_fmt) { + case UBIFS_SIMPLE_KEY_FMT: + c->key_len = UBIFS_SK_LEN; + break; + default: + ubifs_err("unsupported key format"); + err = -EINVAL; + goto out; + } + + c->leb_cnt = le32_to_cpu(sup->leb_cnt); + c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt); + c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); + c->log_lebs = le32_to_cpu(sup->log_lebs); + c->lpt_lebs = le32_to_cpu(sup->lpt_lebs); + c->orph_lebs = le32_to_cpu(sup->orph_lebs); + c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; + c->fanout = le32_to_cpu(sup->fanout); + c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); + c->default_compr = le16_to_cpu(sup->default_compr); + c->rp_size = le64_to_cpu(sup->rp_size); + c->rp_uid = le32_to_cpu(sup->rp_uid); + c->rp_gid = le32_to_cpu(sup->rp_gid); + sup_flags = le32_to_cpu(sup->flags); + + c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); + + memcpy(&c->uuid, &sup->uuid, 16); + + c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + + /* Automatically increase file system size to the maximum size */ + c->old_leb_cnt = c->leb_cnt; + if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { + c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); + if (c->vfs_sb->s_flags & MS_RDONLY) + dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + else { + dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); + if (err) + goto out; + c->old_leb_cnt = c->leb_cnt; + } + } + + c->log_bytes = (long long)c->log_lebs * c->leb_size; + c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; + c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; + c->lpt_last = c->lpt_first + c->lpt_lebs - 1; + c->orph_first = c->lpt_last + 1; + c->orph_last = c->orph_first + c->orph_lebs - 1; + c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; + c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; + c->main_first = c->leb_cnt - c->main_lebs; + c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + err = validate_sb(c, sup); +out: + kfree(sup); + return err; +} diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c new file mode 100644 index 000000000000..acf5c5fffc60 --- /dev/null +++ b/fs/ubifs/scan.c @@ -0,0 +1,362 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the scan which is a general-purpose function for + * determining what nodes are in an eraseblock. The scan is used to replay the + * journal, to do garbage collection. for the TNC in-the-gaps method, and by + * debugging functions. + */ + +#include "ubifs.h" + +/** + * scan_padding_bytes - scan for padding bytes. + * @buf: buffer to scan + * @len: length of buffer + * + * This function returns the number of padding bytes on success and + * %SCANNED_GARBAGE on failure. + */ +static int scan_padding_bytes(void *buf, int len) +{ + int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len); + uint8_t *p = buf; + + dbg_scan("not a node"); + + while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) + pad_len += 1; + + if (!pad_len || (pad_len & 7)) + return SCANNED_GARBAGE; + + dbg_scan("%d padding bytes", pad_len); + + return pad_len; +} + +/** + * ubifs_scan_a_node - scan for a node or padding. + * @c: UBIFS file-system description object + * @buf: buffer to scan + * @len: length of buffer + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * + * This function returns a scanning code to indicate what was scanned. + */ +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet) +{ + struct ubifs_ch *ch = buf; + uint32_t magic; + + magic = le32_to_cpu(ch->magic); + + if (magic == 0xFFFFFFFF) { + dbg_scan("hit empty space"); + return SCANNED_EMPTY_SPACE; + } + + if (magic != UBIFS_NODE_MAGIC) + return scan_padding_bytes(buf, len); + + if (len < UBIFS_CH_SZ) + return SCANNED_GARBAGE; + + dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + + if (ubifs_check_node(c, buf, lnum, offs, quiet)) + return SCANNED_A_CORRUPT_NODE; + + if (ch->node_type == UBIFS_PAD_NODE) { + struct ubifs_pad_node *pad = buf; + int pad_len = le32_to_cpu(pad->pad_len); + int node_len = le32_to_cpu(ch->len); + + /* Validate the padding node */ + if (pad_len < 0 || + offs + node_len + pad_len > c->leb_size) { + if (!quiet) { + ubifs_err("bad pad node at LEB %d:%d", + lnum, offs); + dbg_dump_node(c, pad); + } + return SCANNED_A_BAD_PAD_NODE; + } + + /* Make the node pads to 8-byte boundary */ + if ((node_len + pad_len) & 7) { + if (!quiet) { + dbg_err("bad padding length %d - %d", + offs, offs + node_len + pad_len); + } + return SCANNED_A_BAD_PAD_NODE; + } + + dbg_scan("%d bytes padded, offset now %d", + pad_len, ALIGN(offs + node_len + pad_len, 8)); + + return node_len + pad_len; + } + + return SCANNED_A_NODE; +} + +/** + * ubifs_start_scan - create LEB scanning information at start of scan. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + struct ubifs_scan_leb *sleb; + int err; + + dbg_scan("scan LEB %d:%d", lnum, offs); + + sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); + if (!sleb) + return ERR_PTR(-ENOMEM); + + sleb->lnum = lnum; + INIT_LIST_HEAD(&sleb->nodes); + sleb->buf = sbuf; + + err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + if (err && err != -EBADMSG) { + ubifs_err("cannot read %d bytes from LEB %d:%d," + " error %d", c->leb_size - offs, lnum, offs, err); + kfree(sleb); + return ERR_PTR(err); + } + + if (err == -EBADMSG) + sleb->ecc = 1; + + return sleb; +} + +/** + * ubifs_end_scan - update LEB scanning information at end of scan. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int lnum, int offs) +{ + lnum = lnum; + dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); + ubifs_assert(offs % c->min_io_size == 0); + + sleb->endpt = ALIGN(offs, c->min_io_size); +} + +/** + * ubifs_add_snod - add a scanned node to LEB scanning information. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @buf: buffer containing node + * @offs: offset of node on flash + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + void *buf, int offs) +{ + struct ubifs_ch *ch = buf; + struct ubifs_ino_node *ino = buf; + struct ubifs_scan_node *snod; + + snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); + if (!snod) + return -ENOMEM; + + snod->sqnum = le64_to_cpu(ch->sqnum); + snod->type = ch->node_type; + snod->offs = offs; + snod->len = le32_to_cpu(ch->len); + snod->node = buf; + + switch (ch->node_type) { + case UBIFS_INO_NODE: + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + case UBIFS_DATA_NODE: + case UBIFS_TRUN_NODE: + /* + * The key is in the same place in all keyed + * nodes. + */ + key_read(c, &ino->key, &snod->key); + break; + } + list_add_tail(&snod->list, &sleb->nodes); + sleb->nodes_cnt += 1; + return 0; +} + +/** + * ubifs_scanned_corruption - print information after UBIFS scanned corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number of corruption + * @offs: offset of corruption + * @buf: buffer containing corruption + */ +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + void *buf) +{ + int len; + + ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + if (dbg_failure_mode) + return; + len = c->leb_size - offs; + if (len > 4096) + len = 4096; + dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); +} + +/** + * ubifs_scan - scan a logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function scans LEB number @lnum and returns complete information about + * its contents. Returns an error code in case of failure. + */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf) +{ + void *buf = sbuf + offs; + int err, len = c->leb_size - offs; + struct ubifs_scan_leb *sleb; + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + + while (len >= 8) { + struct ubifs_ch *ch = buf; + int node_len, ret; + + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + + cond_resched(); + + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; + continue; + } + + if (ret == SCANNED_EMPTY_SPACE) + /* Empty space is checked later */ + break; + + switch (ret) { + case SCANNED_GARBAGE: + dbg_err("garbage"); + goto corrupted; + case SCANNED_A_NODE: + break; + case SCANNED_A_CORRUPT_NODE: + case SCANNED_A_BAD_PAD_NODE: + dbg_err("bad node"); + goto corrupted; + default: + dbg_err("unknown"); + goto corrupted; + } + + err = ubifs_add_snod(c, sleb, buf, offs); + if (err) + goto error; + + node_len = ALIGN(le32_to_cpu(ch->len), 8); + offs += node_len; + buf += node_len; + len -= node_len; + } + + if (offs % c->min_io_size) + goto corrupted; + + ubifs_end_scan(c, sleb, lnum, offs); + + for (; len > 4; offs += 4, buf = buf + 4, len -= 4) + if (*(uint32_t *)buf != 0xffffffff) + break; + for (; len; offs++, buf++, len--) + if (*(uint8_t *)buf != 0xff) { + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); + goto corrupted; + } + + return sleb; + +corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +error: + ubifs_err("LEB %d scanning failed", lnum); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); +} + +/** + * ubifs_scan_destroy - destroy LEB scanning information. + * @sleb: scanning information to free + */ +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) +{ + struct ubifs_scan_node *node; + struct list_head *head; + + head = &sleb->nodes; + while (!list_empty(head)) { + node = list_entry(head->next, struct ubifs_scan_node, list); + list_del(&node->list); + kfree(node); + } + kfree(sleb); +} diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c new file mode 100644 index 000000000000..f248533841a2 --- /dev/null +++ b/fs/ubifs/shrinker.c @@ -0,0 +1,322 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS shrinker which evicts clean znodes from the TNC + * tree when Linux VM needs more RAM. + * + * We do not implement any LRU lists to find oldest znodes to free because it + * would add additional overhead to the file system fast paths. So the shrinker + * just walks the TNC tree when searching for znodes to free. + * + * If the root of a TNC sub-tree is clean and old enough, then the children are + * also clean and old enough. So the shrinker walks the TNC in level order and + * dumps entire sub-trees. + * + * The age of znodes is just the time-stamp when they were last looked at. + * The current shrinker first tries to evict old znodes, then young ones. + * + * Since the shrinker is global, it has to protect against races with FS + * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. + */ + +#include "ubifs.h" + +/* List of all UBIFS file-system instances */ +LIST_HEAD(ubifs_infos); + +/* + * We number each shrinker run and record the number on the ubifs_info structure + * so that we can easily work out which ubifs_info structures have already been + * done by the current run. + */ +static unsigned int shrinker_run_no; + +/* Protects 'ubifs_infos' list */ +DEFINE_SPINLOCK(ubifs_infos_lock); + +/* Global clean znode counter (for all mounted UBIFS instances) */ +atomic_long_t ubifs_clean_zn_cnt; + +/** + * shrink_tnc - shrink TNC tree. + * @c: UBIFS file-system description object + * @nr: number of znodes to free + * @age: the age of znodes to free + * @contention: if any contention, this is set to %1 + * + * This function traverses TNC tree and frees clean znodes. It does not free + * clean znodes which younger then @age. Returns number of freed znodes. + */ +static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) +{ + int total_freed = 0; + struct ubifs_znode *znode, *zprev; + int time = get_seconds(); + + ubifs_assert(mutex_is_locked(&c->umount_mutex)); + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); + + if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) + return 0; + + /* + * Traverse the TNC tree in levelorder manner, so that it is possible + * to destroy large sub-trees. Indeed, if a znode is old, then all its + * children are older or of the same age. + * + * Note, we are holding 'c->tnc_mutex', so we do not have to lock the + * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is + * changed only when the 'c->tnc_mutex' is held. + */ + zprev = NULL; + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + while (znode && total_freed < nr && + atomic_long_read(&c->clean_zn_cnt) > 0) { + int freed; + + /* + * If the znode is clean, but it is in the 'c->cnext' list, this + * means that this znode has just been written to flash as a + * part of commit and was marked clean. They will be removed + * from the list at end commit. We cannot change the list, + * because it is not protected by any mutex (design decision to + * make commit really independent and parallel to main I/O). So + * we just skip these znodes. + * + * Note, the 'clean_zn_cnt' counters are not updated until + * after the commit, so the UBIFS shrinker does not report + * the znodes which are in the 'c->cnext' list as freeable. + * + * Also note, if the root of a sub-tree is not in 'c->cnext', + * then the whole sub-tree is not in 'c->cnext' as well, so it + * is safe to dump whole sub-tree. + */ + + if (znode->cnext) { + /* + * Very soon these znodes will be removed from the list + * and become freeable. + */ + *contention = 1; + } else if (!ubifs_zn_dirty(znode) && + abs(time - znode->time) >= age) { + if (znode->parent) + znode->parent->zbranch[znode->iip].znode = NULL; + else + c->zroot.znode = NULL; + + freed = ubifs_destroy_tnc_subtree(znode); + atomic_long_sub(freed, &ubifs_clean_zn_cnt); + atomic_long_sub(freed, &c->clean_zn_cnt); + ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); + total_freed += freed; + znode = zprev; + } + + if (unlikely(!c->zroot.znode)) + break; + + zprev = znode; + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + cond_resched(); + } + + return total_freed; +} + +/** + * shrink_tnc_trees - shrink UBIFS TNC trees. + * @nr: number of znodes to free + * @age: the age of znodes to free + * @contention: if any contention, this is set to %1 + * + * This function walks the list of mounted UBIFS file-systems and frees clean + * znodes which are older then @age, until at least @nr znodes are freed. + * Returns the number of freed znodes. + */ +static int shrink_tnc_trees(int nr, int age, int *contention) +{ + struct ubifs_info *c; + struct list_head *p; + unsigned int run_no; + int freed = 0; + + spin_lock(&ubifs_infos_lock); + do { + run_no = ++shrinker_run_no; + } while (run_no == 0); + /* Iterate over all mounted UBIFS file-systems and try to shrink them */ + p = ubifs_infos.next; + while (p != &ubifs_infos) { + c = list_entry(p, struct ubifs_info, infos_list); + /* + * We move the ones we do to the end of the list, so we stop + * when we see one we have already done. + */ + if (c->shrinker_run_no == run_no) + break; + if (!mutex_trylock(&c->umount_mutex)) { + /* Some un-mount is in progress, try next FS */ + *contention = 1; + p = p->next; + continue; + } + /* + * We're holding 'c->umount_mutex', so the file-system won't go + * away. + */ + if (!mutex_trylock(&c->tnc_mutex)) { + mutex_unlock(&c->umount_mutex); + *contention = 1; + p = p->next; + continue; + } + spin_unlock(&ubifs_infos_lock); + /* + * OK, now we have TNC locked, the file-system cannot go away - + * it is safe to reap the cache. + */ + c->shrinker_run_no = run_no; + freed += shrink_tnc(c, nr, age, contention); + mutex_unlock(&c->tnc_mutex); + spin_lock(&ubifs_infos_lock); + /* Get the next list element before we move this one */ + p = p->next; + /* + * Move this one to the end of the list to provide some + * fairness. + */ + list_del(&c->infos_list); + list_add_tail(&c->infos_list, &ubifs_infos); + mutex_unlock(&c->umount_mutex); + if (freed >= nr) + break; + } + spin_unlock(&ubifs_infos_lock); + return freed; +} + +/** + * kick_a_thread - kick a background thread to start commit. + * + * This function kicks a background thread to start background commit. Returns + * %-1 if a thread was kicked or there is another reason to assume the memory + * will soon be freed or become freeable. If there are no dirty znodes, returns + * %0. + */ +static int kick_a_thread(void) +{ + int i; + struct ubifs_info *c; + + /* + * Iterate over all mounted UBIFS file-systems and find out if there is + * already an ongoing commit operation there. If no, then iterate for + * the second time and initiate background commit. + */ + spin_lock(&ubifs_infos_lock); + for (i = 0; i < 2; i++) { + list_for_each_entry(c, &ubifs_infos, infos_list) { + long dirty_zn_cnt; + + if (!mutex_trylock(&c->umount_mutex)) { + /* + * Some un-mount is in progress, it will + * certainly free memory, so just return. + */ + spin_unlock(&ubifs_infos_lock); + return -1; + } + + dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); + + if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || + c->ro_media) { + mutex_unlock(&c->umount_mutex); + continue; + } + + if (c->cmt_state != COMMIT_RESTING) { + spin_unlock(&ubifs_infos_lock); + mutex_unlock(&c->umount_mutex); + return -1; + } + + if (i == 1) { + list_del(&c->infos_list); + list_add_tail(&c->infos_list, &ubifs_infos); + spin_unlock(&ubifs_infos_lock); + + ubifs_request_bg_commit(c); + mutex_unlock(&c->umount_mutex); + return -1; + } + mutex_unlock(&c->umount_mutex); + } + } + spin_unlock(&ubifs_infos_lock); + + return 0; +} + +int ubifs_shrinker(int nr, gfp_t gfp_mask) +{ + int freed, contention = 0; + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); + + if (nr == 0) + return clean_zn_cnt; + + if (!clean_zn_cnt) { + /* + * No clean znodes, nothing to reap. All we can do in this case + * is to kick background threads to start commit, which will + * probably make clean znodes which, in turn, will be freeable. + * And we return -1 which means will make VM call us again + * later. + */ + dbg_tnc("no clean znodes, kick a thread"); + return kick_a_thread(); + } + + freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); + if (freed >= nr) + goto out; + + dbg_tnc("not enough old znodes, try to free young ones"); + freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); + if (freed >= nr) + goto out; + + dbg_tnc("not enough young znodes, free all"); + freed += shrink_tnc_trees(nr - freed, 0, &contention); + + if (!freed && contention) { + dbg_tnc("freed nothing, but contention"); + return -1; + } + +out: + dbg_tnc("%d znodes were freed, requested %d", freed, nr); + return freed; +} diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c new file mode 100644 index 000000000000..00eb9c68ad03 --- /dev/null +++ b/fs/ubifs/super.c @@ -0,0 +1,1951 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS initialization and VFS superblock operations. Some + * initialization stuff which is rather large and complex is placed at + * corresponding subsystems, but most of it is here. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ubifs.h" + +/* Slab cache for UBIFS inodes */ +struct kmem_cache *ubifs_inode_slab; + +/* UBIFS TNC shrinker description */ +static struct shrinker ubifs_shrinker_info = { + .shrink = ubifs_shrinker, + .seeks = DEFAULT_SEEKS, +}; + +/** + * validate_inode - validate inode. + * @c: UBIFS file-system description object + * @inode: the inode to validate + * + * This is a helper function for 'ubifs_iget()' which validates various fields + * of a newly built inode to make sure they contain sane values and prevent + * possible vulnerabilities. Returns zero if the inode is all right and + * a non-zero error code if not. + */ +static int validate_inode(struct ubifs_info *c, const struct inode *inode) +{ + int err; + const struct ubifs_inode *ui = ubifs_inode(inode); + + if (inode->i_size > c->max_inode_sz) { + ubifs_err("inode is too large (%lld)", + (long long)inode->i_size); + return 1; + } + + if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { + ubifs_err("unknown compression type %d", ui->compr_type); + return 2; + } + + if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX) + return 3; + + if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) + return 4; + + if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) + return 5; + + if (!ubifs_compr_present(ui->compr_type)) { + ubifs_warn("inode %lu uses '%s' compression, but it was not " + "compiled in", inode->i_ino, + ubifs_compr_name(ui->compr_type)); + } + + err = dbg_check_dir_size(c, inode); + return err; +} + +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) +{ + int err; + union ubifs_key key; + struct ubifs_ino_node *ino; + struct ubifs_info *c = sb->s_fs_info; + struct inode *inode; + struct ubifs_inode *ui; + + dbg_gen("inode %lu", inum); + + inode = iget_locked(sb, inum); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + ui = ubifs_inode(inode); + + ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ino) { + err = -ENOMEM; + goto out; + } + + ino_key_init(c, &key, inode->i_ino); + + err = ubifs_tnc_lookup(c, &key, ino); + if (err) + goto out_ino; + + inode->i_flags |= (S_NOCMTIME | S_NOATIME); + inode->i_nlink = le32_to_cpu(ino->nlink); + inode->i_uid = le32_to_cpu(ino->uid); + inode->i_gid = le32_to_cpu(ino->gid); + inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); + inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); + inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); + inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); + inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); + inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); + inode->i_mode = le32_to_cpu(ino->mode); + inode->i_size = le64_to_cpu(ino->size); + + ui->data_len = le32_to_cpu(ino->data_len); + ui->flags = le32_to_cpu(ino->flags); + ui->compr_type = le16_to_cpu(ino->compr_type); + ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); + ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + ui->xattr_size = le32_to_cpu(ino->xattr_size); + ui->xattr_names = le32_to_cpu(ino->xattr_names); + ui->synced_i_size = ui->ui_size = inode->i_size; + + ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0; + + err = validate_inode(c, inode); + if (err) + goto out_invalid; + + /* Disable readahead */ + inode->i_mapping->backing_dev_info = &c->bdi; + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_mapping->a_ops = &ubifs_file_address_operations; + inode->i_op = &ubifs_file_inode_operations; + inode->i_fop = &ubifs_file_operations; + if (ui->xattr) { + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + } else if (ui->data_len != 0) { + err = 10; + goto out_invalid; + } + break; + case S_IFDIR: + inode->i_op = &ubifs_dir_inode_operations; + inode->i_fop = &ubifs_dir_operations; + if (ui->data_len != 0) { + err = 11; + goto out_invalid; + } + break; + case S_IFLNK: + inode->i_op = &ubifs_symlink_inode_operations; + if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { + err = 12; + goto out_invalid; + } + ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + memcpy(ui->data, ino->data, ui->data_len); + ((char *)ui->data)[ui->data_len] = '\0'; + break; + case S_IFBLK: + case S_IFCHR: + { + dev_t rdev; + union ubifs_dev_desc *dev; + + ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_ino; + } + + dev = (union ubifs_dev_desc *)ino->data; + if (ui->data_len == sizeof(dev->new)) + rdev = new_decode_dev(le32_to_cpu(dev->new)); + else if (ui->data_len == sizeof(dev->huge)) + rdev = huge_decode_dev(le64_to_cpu(dev->huge)); + else { + err = 13; + goto out_invalid; + } + memcpy(ui->data, ino->data, ui->data_len); + inode->i_op = &ubifs_file_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + break; + } + case S_IFSOCK: + case S_IFIFO: + inode->i_op = &ubifs_file_inode_operations; + init_special_inode(inode, inode->i_mode, 0); + if (ui->data_len != 0) { + err = 14; + goto out_invalid; + } + break; + default: + err = 15; + goto out_invalid; + } + + kfree(ino); + ubifs_set_inode_flags(inode); + unlock_new_inode(inode); + return inode; + +out_invalid: + ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); + dbg_dump_node(c, ino); + dbg_dump_inode(c, inode); + err = -EINVAL; +out_ino: + kfree(ino); +out: + ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); + iget_failed(inode); + return ERR_PTR(err); +} + +static struct inode *ubifs_alloc_inode(struct super_block *sb) +{ + struct ubifs_inode *ui; + + ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); + if (!ui) + return NULL; + + memset((void *)ui + sizeof(struct inode), 0, + sizeof(struct ubifs_inode) - sizeof(struct inode)); + mutex_init(&ui->ui_mutex); + spin_lock_init(&ui->ui_lock); + return &ui->vfs_inode; +}; + +static void ubifs_destroy_inode(struct inode *inode) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + + kfree(ui->data); + kmem_cache_free(ubifs_inode_slab, inode); +} + +/* + * Note, Linux write-back code calls this without 'i_mutex'. + */ +static int ubifs_write_inode(struct inode *inode, int wait) +{ + int err; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(!ui->xattr); + if (is_bad_inode(inode)) + return 0; + + mutex_lock(&ui->ui_mutex); + /* + * Due to races between write-back forced by budgeting + * (see 'sync_some_inodes()') and pdflush write-back, the inode may + * have already been synchronized, do not do this again. This might + * also happen if it was synchronized in an VFS operation, e.g. + * 'ubifs_link()'. + */ + if (!ui->dirty) { + mutex_unlock(&ui->ui_mutex); + return 0; + } + + dbg_gen("inode %lu", inode->i_ino); + err = ubifs_jnl_write_inode(c, inode, 0); + if (err) + ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + + ui->dirty = 0; + mutex_unlock(&ui->ui_mutex); + ubifs_release_dirty_inode_budget(c, ui); + return err; +} + +static void ubifs_delete_inode(struct inode *inode) +{ + int err; + struct ubifs_info *c = inode->i_sb->s_fs_info; + + if (ubifs_inode(inode)->xattr) + /* + * Extended attribute inode deletions are fully handled in + * 'ubifs_removexattr()'. These inodes are special and have + * limited usage, so there is nothing to do here. + */ + goto out; + + dbg_gen("inode %lu", inode->i_ino); + ubifs_assert(!atomic_read(&inode->i_count)); + ubifs_assert(inode->i_nlink == 0); + + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) + goto out; + + ubifs_inode(inode)->ui_size = inode->i_size = 0; + err = ubifs_jnl_write_inode(c, inode, 1); + if (err) + /* + * Worst case we have a lost orphan inode wasting space, so a + * simple error message is ok here. + */ + ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); +out: + clear_inode(inode); +} + +static void ubifs_dirty_inode(struct inode *inode) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(mutex_is_locked(&ui->ui_mutex)); + if (!ui->dirty) { + ui->dirty = 1; + dbg_gen("inode %lu", inode->i_ino); + } +} + +static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct ubifs_info *c = dentry->d_sb->s_fs_info; + unsigned long long free; + + free = ubifs_budg_get_free_space(c); + dbg_gen("free space %lld bytes (%lld blocks)", + free, free >> UBIFS_BLOCK_SHIFT); + + buf->f_type = UBIFS_SUPER_MAGIC; + buf->f_bsize = UBIFS_BLOCK_SIZE; + buf->f_blocks = c->block_cnt; + buf->f_bfree = free >> UBIFS_BLOCK_SHIFT; + if (free > c->report_rp_size) + buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT; + else + buf->f_bavail = 0; + buf->f_files = 0; + buf->f_ffree = 0; + buf->f_namelen = UBIFS_MAX_NLEN; + + return 0; +} + +static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct ubifs_info *c = mnt->mnt_sb->s_fs_info; + + if (c->mount_opts.unmount_mode == 2) + seq_printf(s, ",fast_unmount"); + else if (c->mount_opts.unmount_mode == 1) + seq_printf(s, ",norm_unmount"); + + return 0; +} + +static int ubifs_sync_fs(struct super_block *sb, int wait) +{ + struct ubifs_info *c = sb->s_fs_info; + int i, ret = 0, err; + + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err && !ret) + ret = err; + } + /* + * We ought to call sync for c->ubi but it does not have one. If it had + * it would in turn call mtd->sync, however mtd operations are + * synchronous anyway, so we don't lose any sleep here. + */ + return ret; +} + +/** + * init_constants_early - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This function initialize UBIFS constants which do not need the superblock to + * be read. It also checks that the UBI volume satisfies basic UBIFS + * requirements. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int init_constants_early(struct ubifs_info *c) +{ + if (c->vi.corrupted) { + ubifs_warn("UBI volume is corrupted - read-only mode"); + c->ro_media = 1; + } + + if (c->di.ro_mode) { + ubifs_msg("read-only UBI device"); + c->ro_media = 1; + } + + if (c->vi.vol_type == UBI_STATIC_VOLUME) { + ubifs_msg("static UBI volume - read-only mode"); + c->ro_media = 1; + } + + c->leb_cnt = c->vi.size; + c->leb_size = c->vi.usable_leb_size; + c->half_leb_size = c->leb_size / 2; + c->min_io_size = c->di.min_io_size; + c->min_io_shift = fls(c->min_io_size) - 1; + + if (c->leb_size < UBIFS_MIN_LEB_SZ) { + ubifs_err("too small LEBs (%d bytes), min. is %d bytes", + c->leb_size, UBIFS_MIN_LEB_SZ); + return -EINVAL; + } + + if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { + ubifs_err("too few LEBs (%d), min. is %d", + c->leb_cnt, UBIFS_MIN_LEB_CNT); + return -EINVAL; + } + + if (!is_power_of_2(c->min_io_size)) { + ubifs_err("bad min. I/O size %d", c->min_io_size); + return -EINVAL; + } + + /* + * UBIFS aligns all node to 8-byte boundary, so to make function in + * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is + * less than 8. + */ + if (c->min_io_size < 8) { + c->min_io_size = 8; + c->min_io_shift = 3; + } + + c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); + c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); + + /* + * Initialize node length ranges which are mostly needed for node + * length validation. + */ + c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ; + c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ; + c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ; + c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ; + c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; + c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ; + + c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; + c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; + c->ranges[UBIFS_ORPH_NODE].min_len = + UBIFS_ORPH_NODE_SZ + sizeof(__le64); + c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; + c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; + c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; + c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; + c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; + c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; + c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; + /* + * Minimum indexing node size is amended later when superblock is + * read and the key length is known. + */ + c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; + /* + * Maximum indexing node size is amended later when superblock is + * read and the fanout is known. + */ + c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; + + /* + * Initialize dead and dark LEB space watermarks. + * + * Dead space is the space which cannot be used. Its watermark is + * equivalent to min. I/O unit or minimum node size if it is greater + * then min. I/O unit. + * + * Dark space is the space which might be used, or might not, depending + * on which node should be written to the LEB. Its watermark is + * equivalent to maximum UBIFS node size. + */ + c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); + c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); + + return 0; +} + +/** + * bud_wbuf_callback - bud LEB write-buffer synchronization call-back. + * @c: UBIFS file-system description object + * @lnum: LEB the write-buffer was synchronized to + * @free: how many free bytes left in this LEB + * @pad: how many bytes were padded + * + * This is a callback function which is called by the I/O unit when the + * write-buffer is synchronized. We need this to correctly maintain space + * accounting in bud logical eraseblocks. This function returns zero in case of + * success and a negative error code in case of failure. + * + * This function actually belongs to the journal, but we keep it here because + * we want to keep it static. + */ +static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) +{ + return ubifs_update_one_lp(c, lnum, free, pad, 0, 0); +} + +/* + * init_constants_late - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the superblock has been read. It also checks various UBIFS parameters and + * makes sure they are all right. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int init_constants_late(struct ubifs_info *c) +{ + int tmp, err; + uint64_t tmp64; + + c->main_bytes = (long long)c->main_lebs * c->leb_size; + c->max_znode_sz = sizeof(struct ubifs_znode) + + c->fanout * sizeof(struct ubifs_zbranch); + + tmp = ubifs_idx_node_sz(c, 1); + c->ranges[UBIFS_IDX_NODE].min_len = tmp; + c->min_idx_node_sz = ALIGN(tmp, 8); + + tmp = ubifs_idx_node_sz(c, c->fanout); + c->ranges[UBIFS_IDX_NODE].max_len = tmp; + c->max_idx_node_sz = ALIGN(tmp, 8); + + /* Make sure LEB size is large enough to fit full commit */ + tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; + tmp = ALIGN(tmp, c->min_io_size); + if (tmp > c->leb_size) { + dbg_err("too small LEB size %d, at least %d needed", + c->leb_size, tmp); + return -EINVAL; + } + + /* + * Make sure that the log is large enough to fit reference nodes for + * all buds plus one reserved LEB. + */ + tmp64 = c->max_bud_bytes; + tmp = do_div(tmp64, c->leb_size); + c->max_bud_cnt = tmp64 + !!tmp; + tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); + tmp /= c->leb_size; + tmp += 1; + if (c->log_lebs < tmp) { + dbg_err("too small log %d LEBs, required min. %d LEBs", + c->log_lebs, tmp); + return -EINVAL; + } + + /* + * When budgeting we assume worst-case scenarios when the pages are not + * be compressed and direntries are of the maximum size. + * + * Note, data, which may be stored in inodes is budgeted separately, so + * it is not included into 'c->inode_budget'. + */ + c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; + c->inode_budget = UBIFS_INO_NODE_SZ; + c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + + /* + * When the amount of flash space used by buds becomes + * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. + * The writers are unblocked when the commit is finished. To avoid + * writers to be blocked UBIFS initiates background commit in advance, + * when number of bud bytes becomes above the limit defined below. + */ + c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; + + /* + * Ensure minimum journal size. All the bytes in the journal heads are + * considered to be used, when calculating the current journal usage. + * Consequently, if the journal is too small, UBIFS will treat it as + * always full. + */ + tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; + if (c->bg_bud_bytes < tmp64) + c->bg_bud_bytes = tmp64; + if (c->max_bud_bytes < tmp64 + c->leb_size) + c->max_bud_bytes = tmp64 + c->leb_size; + + err = ubifs_calc_lpt_geom(c); + if (err) + return err; + + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + /* + * Calculate total amount of FS blocks. This number is not used + * internally because it does not make much sense for UBIFS, but it is + * necessary to report something for the 'statfs()' call. + * + * Subtract the LEB reserved for GC and the LEB which is reserved for + * deletions. + * + * Review 'ubifs_calc_available()' if changing this calculation. + */ + tmp64 = c->main_lebs - 2; + tmp64 *= (uint64_t)c->leb_size - c->dark_wm; + tmp64 = ubifs_reported_space(c, tmp64); + c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; + + return 0; +} + +/** + * take_gc_lnum - reserve GC LEB. + * @c: UBIFS file-system description object + * + * This function ensures that the LEB reserved for garbage collection is + * unmapped and is marked as "taken" in lprops. We also have to set free space + * to LEB size and dirty space to zero, because lprops may contain out-of-date + * information if the file-system was un-mounted before it has been committed. + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int take_gc_lnum(struct ubifs_info *c) +{ + int err; + + if (c->gc_lnum == -1) { + ubifs_err("no LEB for GC"); + return -EINVAL; + } + + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + + /* And we have to tell lprops that this LEB is taken */ + err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, + LPROPS_TAKEN, 0, 0); + return err; +} + +/** + * alloc_wbufs - allocate write-buffers. + * @c: UBIFS file-system description object + * + * This helper function allocates and initializes UBIFS write-buffers. Returns + * zero in case of success and %-ENOMEM in case of failure. + */ +static int alloc_wbufs(struct ubifs_info *c) +{ + int i, err; + + c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), + GFP_KERNEL); + if (!c->jheads) + return -ENOMEM; + + /* Initialize journal heads */ + for (i = 0; i < c->jhead_cnt; i++) { + INIT_LIST_HEAD(&c->jheads[i].buds_list); + err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); + if (err) + return err; + + c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; + c->jheads[i].wbuf.jhead = i; + } + + c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; + /* + * Garbage Collector head likely contains long-term data and + * does not need to be synchronized by timer. + */ + c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; + c->jheads[GCHD].wbuf.timeout = 0; + + return 0; +} + +/** + * free_wbufs - free write-buffers. + * @c: UBIFS file-system description object + */ +static void free_wbufs(struct ubifs_info *c) +{ + int i; + + if (c->jheads) { + for (i = 0; i < c->jhead_cnt; i++) { + kfree(c->jheads[i].wbuf.buf); + kfree(c->jheads[i].wbuf.inodes); + } + kfree(c->jheads); + c->jheads = NULL; + } +} + +/** + * free_orphans - free orphans. + * @c: UBIFS file-system description object + */ +static void free_orphans(struct ubifs_info *c) +{ + struct ubifs_orphan *orph; + + while (c->orph_dnext) { + orph = c->orph_dnext; + c->orph_dnext = orph->dnext; + list_del(&orph->list); + kfree(orph); + } + + while (!list_empty(&c->orph_list)) { + orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); + list_del(&orph->list); + kfree(orph); + dbg_err("orphan list not empty at unmount"); + } + + vfree(c->orph_buf); + c->orph_buf = NULL; +} + +/** + * free_buds - free per-bud objects. + * @c: UBIFS file-system description object + */ +static void free_buds(struct ubifs_info *c) +{ + struct rb_node *this = c->buds.rb_node; + struct ubifs_bud *bud; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + bud = rb_entry(this, struct ubifs_bud, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &bud->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(bud); + } + } +} + +/** + * check_volume_empty - check if the UBI volume is empty. + * @c: UBIFS file-system description object + * + * This function checks if the UBIFS volume is empty by looking if its LEBs are + * mapped or not. The result of checking is stored in the @c->empty variable. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int check_volume_empty(struct ubifs_info *c) +{ + int lnum, err; + + c->empty = 1; + for (lnum = 0; lnum < c->leb_cnt; lnum++) { + err = ubi_is_mapped(c->ubi, lnum); + if (unlikely(err < 0)) + return err; + if (err == 1) { + c->empty = 0; + break; + } + + cond_resched(); + } + + return 0; +} + +/* + * UBIFS mount options. + * + * Opt_fast_unmount: do not run a journal commit before un-mounting + * Opt_norm_unmount: run a journal commit before un-mounting + * Opt_err: just end of array marker + */ +enum { + Opt_fast_unmount, + Opt_norm_unmount, + Opt_err, +}; + +static match_table_t tokens = { + {Opt_fast_unmount, "fast_unmount"}, + {Opt_norm_unmount, "norm_unmount"}, + {Opt_err, NULL}, +}; + +/** + * ubifs_parse_options - parse mount parameters. + * @c: UBIFS file-system description object + * @options: parameters to parse + * @is_remount: non-zero if this is FS re-mount + * + * This function parses UBIFS mount options and returns zero in case success + * and a negative error code in case of failure. + */ +static int ubifs_parse_options(struct ubifs_info *c, char *options, + int is_remount) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + if (!options) + return 0; + + while ((p = strsep(&options, ","))) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_fast_unmount: + c->mount_opts.unmount_mode = 2; + c->fast_unmount = 1; + break; + case Opt_norm_unmount: + c->mount_opts.unmount_mode = 1; + c->fast_unmount = 0; + break; + default: + ubifs_err("unrecognized mount option \"%s\" " + "or missing value", p); + return -EINVAL; + } + } + + return 0; +} + +/** + * destroy_journal - destroy journal data structures. + * @c: UBIFS file-system description object + * + * This function destroys journal data structures including those that may have + * been created by recovery functions. + */ +static void destroy_journal(struct ubifs_info *c) +{ + while (!list_empty(&c->unclean_leb_list)) { + struct ubifs_unclean_leb *ucleb; + + ucleb = list_entry(c->unclean_leb_list.next, + struct ubifs_unclean_leb, list); + list_del(&ucleb->list); + kfree(ucleb); + } + while (!list_empty(&c->old_buds)) { + struct ubifs_bud *bud; + + bud = list_entry(c->old_buds.next, struct ubifs_bud, list); + list_del(&bud->list); + kfree(bud); + } + ubifs_destroy_idx_gc(c); + ubifs_destroy_size_tree(c); + ubifs_tnc_close(c); + free_buds(c); +} + +/** + * mount_ubifs - mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * This function mounts UBIFS file system. Returns zero in case of success and + * a negative error code in case of failure. + * + * Note, the function does not de-allocate resources it it fails half way + * through, and the caller has to do this instead. + */ +static int mount_ubifs(struct ubifs_info *c) +{ + struct super_block *sb = c->vfs_sb; + int err, mounted_read_only = (sb->s_flags & MS_RDONLY); + long long x; + size_t sz; + + err = init_constants_early(c); + if (err) + return err; + +#ifdef CONFIG_UBIFS_FS_DEBUG + c->dbg_buf = vmalloc(c->leb_size); + if (!c->dbg_buf) + return -ENOMEM; +#endif + + err = check_volume_empty(c); + if (err) + goto out_free; + + if (c->empty && (mounted_read_only || c->ro_media)) { + /* + * This UBI volume is empty, and read-only, or the file system + * is mounted read-only - we cannot format it. + */ + ubifs_err("can't format empty UBI volume: read-only %s", + c->ro_media ? "UBI volume" : "mount"); + err = -EROFS; + goto out_free; + } + + if (c->ro_media && !mounted_read_only) { + ubifs_err("cannot mount read-write - read-only media"); + err = -EROFS; + goto out_free; + } + + /* + * The requirement for the buffer is that it should fit indexing B-tree + * height amount of integers. We assume the height if the TNC tree will + * never exceed 64. + */ + err = -ENOMEM; + c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); + if (!c->bottom_up_buf) + goto out_free; + + c->sbuf = vmalloc(c->leb_size); + if (!c->sbuf) + goto out_free; + + if (!mounted_read_only) { + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) + goto out_free; + } + + err = ubifs_read_superblock(c); + if (err) + goto out_free; + + /* + * Make sure the compressor which is set as the default on in the + * superblock was actually compiled in. + */ + if (!ubifs_compr_present(c->default_compr)) { + ubifs_warn("'%s' compressor is set by superblock, but not " + "compiled in", ubifs_compr_name(c->default_compr)); + c->default_compr = UBIFS_COMPR_NONE; + } + + dbg_failure_mode_registration(c); + + err = init_constants_late(c); + if (err) + goto out_dereg; + + sz = ALIGN(c->max_idx_node_sz, c->min_io_size); + sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); + c->cbuf = kmalloc(sz, GFP_NOFS); + if (!c->cbuf) { + err = -ENOMEM; + goto out_dereg; + } + + if (!mounted_read_only) { + err = alloc_wbufs(c); + if (err) + goto out_cbuf; + + /* Create background thread */ + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, + c->vi.vol_id); + c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + if (!c->bgt) + c->bgt = ERR_PTR(-EINVAL); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; + ubifs_err("cannot spawn \"%s\", error %d", + c->bgt_name, err); + goto out_wbufs; + } + wake_up_process(c->bgt); + } + + err = ubifs_read_master(c); + if (err) + goto out_master; + + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { + ubifs_msg("recovery needed"); + c->need_recovery = 1; + if (!mounted_read_only) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } + } else if (!mounted_read_only) { + /* + * Set the "dirty" flag so that if we reboot uncleanly we + * will notice this immediately on the next mount. + */ + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) + goto out_master; + } + + err = ubifs_lpt_init(c, 1, !mounted_read_only); + if (err) + goto out_lpt; + + err = dbg_check_idx_size(c, c->old_idx_sz); + if (err) + goto out_lpt; + + err = ubifs_replay_journal(c); + if (err) + goto out_journal; + + err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); + if (err) + goto out_orphans; + + if (!mounted_read_only) { + int lnum; + + /* Check for enough free space */ + if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { + ubifs_err("insufficient available space"); + err = -EINVAL; + goto out_orphans; + } + + /* Check for enough log space */ + lnum = c->lhead_lnum + 1; + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) + lnum = UBIFS_LOG_LNUM; + if (lnum == c->ltail_lnum) { + err = ubifs_consolidate_log(c); + if (err) + goto out_orphans; + } + + if (c->need_recovery) { + err = ubifs_recover_size(c); + if (err) + goto out_orphans; + err = ubifs_rcvry_gc_commit(c); + } else + err = take_gc_lnum(c); + if (err) + goto out_orphans; + + err = dbg_check_lprops(c); + if (err) + goto out_orphans; + } else if (c->need_recovery) { + err = ubifs_recover_size(c); + if (err) + goto out_orphans; + } + + spin_lock(&ubifs_infos_lock); + list_add_tail(&c->infos_list, &ubifs_infos); + spin_unlock(&ubifs_infos_lock); + + if (c->need_recovery) { + if (mounted_read_only) + ubifs_msg("recovery deferred"); + else { + c->need_recovery = 0; + ubifs_msg("recovery completed"); + } + } + + err = dbg_check_filesystem(c); + if (err) + goto out_infos; + + ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + if (mounted_read_only) + ubifs_msg("mounted read-only"); + x = (long long)c->main_lebs * c->leb_size; + ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", + x, x >> 10, x >> 20, c->main_lebs); + x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", + x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); + ubifs_msg("media format %d, latest format %d", + c->fmt_version, UBIFS_FORMAT_VERSION); + + dbg_msg("compiled on: " __DATE__ " at " __TIME__); + dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); + dbg_msg("LEB size: %d bytes (%d KiB)", + c->leb_size, c->leb_size / 1024); + dbg_msg("data journal heads: %d", + c->jhead_cnt - NONDATA_JHEADS_CNT); + dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" + "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", + c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], + c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], + c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], + c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); + dbg_msg("fast unmount: %d", c->fast_unmount); + dbg_msg("big_lpt %d", c->big_lpt); + dbg_msg("log LEBs: %d (%d - %d)", + c->log_lebs, UBIFS_LOG_LNUM, c->log_last); + dbg_msg("LPT area LEBs: %d (%d - %d)", + c->lpt_lebs, c->lpt_first, c->lpt_last); + dbg_msg("orphan area LEBs: %d (%d - %d)", + c->orph_lebs, c->orph_first, c->orph_last); + dbg_msg("main area LEBs: %d (%d - %d)", + c->main_lebs, c->main_first, c->leb_cnt - 1); + dbg_msg("index LEBs: %d", c->lst.idx_lebs); + dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", + c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); + dbg_msg("key hash type: %d", c->key_hash_type); + dbg_msg("tree fanout: %d", c->fanout); + dbg_msg("reserved GC LEB: %d", c->gc_lnum); + dbg_msg("first main LEB: %d", c->main_first); + dbg_msg("dead watermark: %d", c->dead_wm); + dbg_msg("dark watermark: %d", c->dark_wm); + x = (long long)c->main_lebs * c->dark_wm; + dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", + x, x >> 10, x >> 20); + dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", + c->max_bud_bytes, c->max_bud_bytes >> 10, + c->max_bud_bytes >> 20); + dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", + c->bg_bud_bytes, c->bg_bud_bytes >> 10, + c->bg_bud_bytes >> 20); + dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", + c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); + dbg_msg("max. seq. number: %llu", c->max_sqnum); + dbg_msg("commit number: %llu", c->cmt_no); + + return 0; + +out_infos: + spin_lock(&ubifs_infos_lock); + list_del(&c->infos_list); + spin_unlock(&ubifs_infos_lock); +out_orphans: + free_orphans(c); +out_journal: + destroy_journal(c); +out_lpt: + ubifs_lpt_free(c, 0); +out_master: + kfree(c->mst_node); + kfree(c->rcvrd_mst_node); + if (c->bgt) + kthread_stop(c->bgt); +out_wbufs: + free_wbufs(c); +out_cbuf: + kfree(c->cbuf); +out_dereg: + dbg_failure_mode_deregistration(c); +out_free: + vfree(c->ileb_buf); + vfree(c->sbuf); + kfree(c->bottom_up_buf); + UBIFS_DBG(vfree(c->dbg_buf)); + return err; +} + +/** + * ubifs_umount - un-mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * Note, this function is called to free allocated resourced when un-mounting, + * as well as free resources when an error occurred while we were half way + * through mounting (error path cleanup function). So it has to make sure the + * resource was actually allocated before freeing it. + */ +static void ubifs_umount(struct ubifs_info *c) +{ + dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + + spin_lock(&ubifs_infos_lock); + list_del(&c->infos_list); + spin_unlock(&ubifs_infos_lock); + + if (c->bgt) + kthread_stop(c->bgt); + + destroy_journal(c); + free_wbufs(c); + free_orphans(c); + ubifs_lpt_free(c, 0); + + kfree(c->cbuf); + kfree(c->rcvrd_mst_node); + kfree(c->mst_node); + vfree(c->sbuf); + kfree(c->bottom_up_buf); + UBIFS_DBG(vfree(c->dbg_buf)); + vfree(c->ileb_buf); + dbg_failure_mode_deregistration(c); +} + +/** + * ubifs_remount_rw - re-mount in read-write mode. + * @c: UBIFS file-system description object + * + * UBIFS avoids allocating many unnecessary resources when mounted in read-only + * mode. This function allocates the needed resources and re-mounts UBIFS in + * read-write mode. + */ +static int ubifs_remount_rw(struct ubifs_info *c) +{ + int err, lnum; + + if (c->ro_media) + return -EINVAL; + + mutex_lock(&c->umount_mutex); + c->remounting_rw = 1; + + /* Check for enough free space */ + if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { + ubifs_err("insufficient available space"); + err = -EINVAL; + goto out; + } + + if (c->old_leb_cnt != c->leb_cnt) { + struct ubifs_sb_node *sup; + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) { + err = PTR_ERR(sup); + goto out; + } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); + if (err) + goto out; + } + + if (c->need_recovery) { + ubifs_msg("completing deferred recovery"); + err = ubifs_write_rcvrd_mst_node(c); + if (err) + goto out; + err = ubifs_recover_size(c); + if (err) + goto out; + err = ubifs_clean_lebs(c, c->sbuf); + if (err) + goto out; + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out; + } + + if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) + goto out; + } + + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) { + err = -ENOMEM; + goto out; + } + + err = ubifs_lpt_init(c, 0, 1); + if (err) + goto out; + + err = alloc_wbufs(c); + if (err) + goto out; + + ubifs_create_buds_lists(c); + + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + if (!c->bgt) + c->bgt = ERR_PTR(-EINVAL); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; + ubifs_err("cannot spawn \"%s\", error %d", + c->bgt_name, err); + return err; + } + wake_up_process(c->bgt); + + c->orph_buf = vmalloc(c->leb_size); + if (!c->orph_buf) + return -ENOMEM; + + /* Check for enough log space */ + lnum = c->lhead_lnum + 1; + if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) + lnum = UBIFS_LOG_LNUM; + if (lnum == c->ltail_lnum) { + err = ubifs_consolidate_log(c); + if (err) + goto out; + } + + if (c->need_recovery) + err = ubifs_rcvry_gc_commit(c); + else + err = take_gc_lnum(c); + if (err) + goto out; + + if (c->need_recovery) { + c->need_recovery = 0; + ubifs_msg("deferred recovery completed"); + } + + dbg_gen("re-mounted read-write"); + c->vfs_sb->s_flags &= ~MS_RDONLY; + c->remounting_rw = 0; + mutex_unlock(&c->umount_mutex); + return 0; + +out: + vfree(c->orph_buf); + c->orph_buf = NULL; + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + free_wbufs(c); + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->remounting_rw = 0; + mutex_unlock(&c->umount_mutex); + return err; +} + +/** + * commit_on_unmount - commit the journal when un-mounting. + * @c: UBIFS file-system description object + * + * This function is called during un-mounting and it commits the journal unless + * the "fast unmount" mode is enabled. It also avoids committing the journal if + * it contains too few data. + * + * Sometimes recovery requires the journal to be committed at least once, and + * this function takes care about this. + */ +static void commit_on_unmount(struct ubifs_info *c) +{ + if (!c->fast_unmount) { + long long bud_bytes; + + spin_lock(&c->buds_lock); + bud_bytes = c->bud_bytes; + spin_unlock(&c->buds_lock); + if (bud_bytes > c->leb_size) + ubifs_run_commit(c); + } +} + +/** + * ubifs_remount_ro - re-mount in read-only mode. + * @c: UBIFS file-system description object + * + * We rely on VFS to have stopped writing. Possibly the background thread could + * be running a commit, however kthread_stop will wait in that case. + */ +static void ubifs_remount_ro(struct ubifs_info *c) +{ + int i, err; + + ubifs_assert(!c->need_recovery); + commit_on_unmount(c); + + mutex_lock(&c->umount_mutex); + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + + for (i = 0; i < c->jhead_cnt; i++) { + ubifs_wbuf_sync(&c->jheads[i].wbuf); + del_timer_sync(&c->jheads[i].wbuf.timer); + } + + if (!c->ro_media) { + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + ubifs_ro_mode(c, err); + } + + ubifs_destroy_idx_gc(c); + free_wbufs(c); + vfree(c->orph_buf); + c->orph_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + mutex_unlock(&c->umount_mutex); +} + +static void ubifs_put_super(struct super_block *sb) +{ + int i; + struct ubifs_info *c = sb->s_fs_info; + + ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, + c->vi.vol_id); + /* + * The following asserts are only valid if there has not been a failure + * of the media. For example, there will be dirty inodes if we failed + * to write them back because of I/O errors. + */ + ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); + ubifs_assert(c->budg_idx_growth == 0); + ubifs_assert(c->budg_data_growth == 0); + + /* + * The 'c->umount_lock' prevents races between UBIFS memory shrinker + * and file system un-mount. Namely, it prevents the shrinker from + * picking this superblock for shrinking - it will be just skipped if + * the mutex is locked. + */ + mutex_lock(&c->umount_mutex); + if (!(c->vfs_sb->s_flags & MS_RDONLY)) { + /* + * First of all kill the background thread to make sure it does + * not interfere with un-mounting and freeing resources. + */ + if (c->bgt) { + kthread_stop(c->bgt); + c->bgt = NULL; + } + + /* Synchronize write-buffers */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) { + ubifs_wbuf_sync(&c->jheads[i].wbuf); + del_timer_sync(&c->jheads[i].wbuf.timer); + } + + /* + * On fatal errors c->ro_media is set to 1, in which case we do + * not write the master node. + */ + if (!c->ro_media) { + /* + * We are being cleanly unmounted which means the + * orphans were killed - indicate this in the master + * node. Also save the reserved GC LEB number. + */ + int err; + + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + /* + * Recovery will attempt to fix the master area + * next mount, so we just print a message and + * continue to unmount normally. + */ + ubifs_err("failed to write master node, " + "error %d", err); + } + } + + ubifs_umount(c); + bdi_destroy(&c->bdi); + ubi_close_volume(c->ubi); + mutex_unlock(&c->umount_mutex); + kfree(c); +} + +static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + int err; + struct ubifs_info *c = sb->s_fs_info; + + dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); + + err = ubifs_parse_options(c, data, 1); + if (err) { + ubifs_err("invalid or unknown remount parameter"); + return err; + } + if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + err = ubifs_remount_rw(c); + if (err) + return err; + } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + ubifs_remount_ro(c); + + return 0; +} + +struct super_operations ubifs_super_operations = { + .alloc_inode = ubifs_alloc_inode, + .destroy_inode = ubifs_destroy_inode, + .put_super = ubifs_put_super, + .write_inode = ubifs_write_inode, + .delete_inode = ubifs_delete_inode, + .statfs = ubifs_statfs, + .dirty_inode = ubifs_dirty_inode, + .remount_fs = ubifs_remount_fs, + .show_options = ubifs_show_options, + .sync_fs = ubifs_sync_fs, +}; + +/** + * open_ubi - parse UBI device name string and open the UBI device. + * @name: UBI volume name + * @mode: UBI volume open mode + * + * There are several ways to specify UBI volumes when mounting UBIFS: + * o ubiX_Y - UBI device number X, volume Y; + * o ubiY - UBI device number 0, volume Y; + * o ubiX:NAME - mount UBI device X, volume with name NAME; + * o ubi:NAME - mount UBI device 0, volume with name NAME. + * + * Alternative '!' separator may be used instead of ':' (because some shells + * like busybox may interpret ':' as an NFS host name separator). This function + * returns ubi volume object in case of success and a negative error code in + * case of failure. + */ +static struct ubi_volume_desc *open_ubi(const char *name, int mode) +{ + int dev, vol; + char *endptr; + + if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') + return ERR_PTR(-EINVAL); + + /* ubi:NAME method */ + if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') + return ubi_open_volume_nm(0, name + 4, mode); + + if (!isdigit(name[3])) + return ERR_PTR(-EINVAL); + + dev = simple_strtoul(name + 3, &endptr, 0); + + /* ubiY method */ + if (*endptr == '\0') + return ubi_open_volume(0, dev, mode); + + /* ubiX_Y method */ + if (*endptr == '_' && isdigit(endptr[1])) { + vol = simple_strtoul(endptr + 1, &endptr, 0); + if (*endptr != '\0') + return ERR_PTR(-EINVAL); + return ubi_open_volume(dev, vol, mode); + } + + /* ubiX:NAME method */ + if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') + return ubi_open_volume_nm(dev, ++endptr, mode); + + return ERR_PTR(-EINVAL); +} + +static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct ubi_volume_desc *ubi = sb->s_fs_info; + struct ubifs_info *c; + struct inode *root; + int err; + + c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); + if (!c) + return -ENOMEM; + + spin_lock_init(&c->cnt_lock); + spin_lock_init(&c->cs_lock); + spin_lock_init(&c->buds_lock); + spin_lock_init(&c->space_lock); + spin_lock_init(&c->orphan_lock); + init_rwsem(&c->commit_sem); + mutex_init(&c->lp_mutex); + mutex_init(&c->tnc_mutex); + mutex_init(&c->log_mutex); + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; + c->size_tree = RB_ROOT; + c->orph_tree = RB_ROOT; + INIT_LIST_HEAD(&c->infos_list); + INIT_LIST_HEAD(&c->idx_gc); + INIT_LIST_HEAD(&c->replay_list); + INIT_LIST_HEAD(&c->replay_buds); + INIT_LIST_HEAD(&c->uncat_list); + INIT_LIST_HEAD(&c->empty_list); + INIT_LIST_HEAD(&c->freeable_list); + INIT_LIST_HEAD(&c->frdi_idx_list); + INIT_LIST_HEAD(&c->unclean_leb_list); + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); + + c->highest_inum = UBIFS_FIRST_INO; + get_random_bytes(&c->vfs_gen, sizeof(int)); + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + + ubi_get_volume_info(ubi, &c->vi); + ubi_get_device_info(c->vi.ubi_num, &c->di); + + /* Re-open the UBI device in read-write mode */ + c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); + if (IS_ERR(c->ubi)) { + err = PTR_ERR(c->ubi); + goto out_free; + } + + /* + * UBIFS provids 'backing_dev_info' in order to disable readahead. For + * UBIFS, I/O is not deferred, it is done immediately in readpage, + * which means the user would have to wait not just for their own I/O + * but the readahead I/O as well i.e. completely pointless. + * + * Read-ahead will be disabled because @c->bdi.ra_pages is 0. + */ + c->bdi.capabilities = BDI_CAP_MAP_COPY; + c->bdi.unplug_io_fn = default_unplug_io_fn; + err = bdi_init(&c->bdi); + if (err) + goto out_close; + + err = ubifs_parse_options(c, data, 0); + if (err) + goto out_bdi; + + c->vfs_sb = sb; + + sb->s_fs_info = c; + sb->s_magic = UBIFS_SUPER_MAGIC; + sb->s_blocksize = UBIFS_BLOCK_SIZE; + sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; + sb->s_dev = c->vi.cdev; + sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); + if (c->max_inode_sz > MAX_LFS_FILESIZE) + sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; + sb->s_op = &ubifs_super_operations; + + mutex_lock(&c->umount_mutex); + err = mount_ubifs(c); + if (err) { + ubifs_assert(err < 0); + goto out_unlock; + } + + /* Read the root inode */ + root = ubifs_iget(sb, UBIFS_ROOT_INO); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto out_umount; + } + + sb->s_root = d_alloc_root(root); + if (!sb->s_root) + goto out_iput; + + mutex_unlock(&c->umount_mutex); + + return 0; + +out_iput: + iput(root); +out_umount: + ubifs_umount(c); +out_unlock: + mutex_unlock(&c->umount_mutex); +out_bdi: + bdi_destroy(&c->bdi); +out_close: + ubi_close_volume(c->ubi); +out_free: + kfree(c); + return err; +} + +static int sb_test(struct super_block *sb, void *data) +{ + dev_t *dev = data; + + return sb->s_dev == *dev; +} + +static int sb_set(struct super_block *sb, void *data) +{ + dev_t *dev = data; + + sb->s_dev = *dev; + return 0; +} + +static int ubifs_get_sb(struct file_system_type *fs_type, int flags, + const char *name, void *data, struct vfsmount *mnt) +{ + struct ubi_volume_desc *ubi; + struct ubi_volume_info vi; + struct super_block *sb; + int err; + + dbg_gen("name %s, flags %#x", name, flags); + + /* + * Get UBI device number and volume ID. Mount it read-only so far + * because this might be a new mount point, and UBI allows only one + * read-write user at a time. + */ + ubi = open_ubi(name, UBI_READONLY); + if (IS_ERR(ubi)) { + ubifs_err("cannot open \"%s\", error %d", + name, (int)PTR_ERR(ubi)); + return PTR_ERR(ubi); + } + ubi_get_volume_info(ubi, &vi); + + dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + + sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); + if (IS_ERR(sb)) { + err = PTR_ERR(sb); + goto out_close; + } + + if (sb->s_root) { + /* A new mount point for already mounted UBIFS */ + dbg_gen("this ubi volume is already mounted"); + if ((flags ^ sb->s_flags) & MS_RDONLY) { + err = -EBUSY; + goto out_deact; + } + } else { + sb->s_flags = flags; + /* + * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is + * replaced by 'c'. + */ + sb->s_fs_info = ubi; + err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); + if (err) + goto out_deact; + /* We do not support atime */ + sb->s_flags |= MS_ACTIVE | MS_NOATIME; + } + + /* 'fill_super()' opens ubi again so we must close it here */ + ubi_close_volume(ubi); + + return simple_set_mnt(mnt, sb); + +out_deact: + up_write(&sb->s_umount); + deactivate_super(sb); +out_close: + ubi_close_volume(ubi); + return err; +} + +static void ubifs_kill_sb(struct super_block *sb) +{ + struct ubifs_info *c = sb->s_fs_info; + + /* + * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' + * in order to be outside BKL. + */ + if (sb->s_root && !(sb->s_flags & MS_RDONLY)) + commit_on_unmount(c); + /* The un-mount routine is actually done in put_super() */ + generic_shutdown_super(sb); +} + +static struct file_system_type ubifs_fs_type = { + .name = "ubifs", + .owner = THIS_MODULE, + .get_sb = ubifs_get_sb, + .kill_sb = ubifs_kill_sb +}; + +/* + * Inode slab cache constructor. + */ +static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) +{ + struct ubifs_inode *ui = obj; + inode_init_once(&ui->vfs_inode); +} + +static int __init ubifs_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); + + /* Make sure node sizes are 8-byte aligned */ + BUILD_BUG_ON(UBIFS_CH_SZ & 7); + BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); + + BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7); + BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7); + BUILD_BUG_ON(MIN_WRITE_SZ & 7); + + /* Check min. node size */ + BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); + BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); + + BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); + BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ); + + /* Defined node sizes */ + BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096); + BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); + BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); + BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); + + /* + * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to + * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. + */ + if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { + ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" + " at least 4096 bytes", + (unsigned int)PAGE_CACHE_SIZE); + return -EINVAL; + } + + err = register_filesystem(&ubifs_fs_type); + if (err) { + ubifs_err("cannot register file system, error %d", err); + return err; + } + + err = -ENOMEM; + ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", + sizeof(struct ubifs_inode), 0, + SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, + &inode_slab_ctor); + if (!ubifs_inode_slab) + goto out_reg; + + register_shrinker(&ubifs_shrinker_info); + + err = ubifs_compressors_init(); + if (err) + goto out_compr; + + return 0; + +out_compr: + unregister_shrinker(&ubifs_shrinker_info); + kmem_cache_destroy(ubifs_inode_slab); +out_reg: + unregister_filesystem(&ubifs_fs_type); + return err; +} +/* late_initcall to let compressors initialize first */ +late_initcall(ubifs_init); + +static void __exit ubifs_exit(void) +{ + ubifs_assert(list_empty(&ubifs_infos)); + ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + + ubifs_compressors_exit(); + unregister_shrinker(&ubifs_shrinker_info); + kmem_cache_destroy(ubifs_inode_slab); + unregister_filesystem(&ubifs_fs_type); +} +module_exit(ubifs_exit); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(__stringify(UBIFS_VERSION)); +MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); +MODULE_DESCRIPTION("UBIFS - UBI File System"); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c new file mode 100644 index 000000000000..e909f4a96443 --- /dev/null +++ b/fs/ubifs/tnc.c @@ -0,0 +1,2956 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements TNC (Tree Node Cache) which caches indexing nodes of + * the UBIFS B-tree. + * + * At the moment the locking rules of the TNC tree are quite simple and + * straightforward. We just have a mutex and lock it when we traverse the + * tree. If a znode is not in memory, we read it from flash while still having + * the mutex locked. + */ + +#include +#include "ubifs.h" + +/* + * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. + * @NAME_LESS: name corresponding to the first argument is less than second + * @NAME_MATCHES: names match + * @NAME_GREATER: name corresponding to the second argument is greater than + * first + * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media + * + * These constants were introduce to improve readability. + */ +enum { + NAME_LESS = 0, + NAME_MATCHES = 1, + NAME_GREATER = 2, + NOT_ON_MEDIA = 3, +}; + +/** + * insert_old_idx - record an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + * + * For recovery, there must always be a complete intact version of the index on + * flash at all times. That is called the "old index". It is the index as at the + * time of the last successful commit. Many of the index nodes in the old index + * may be dirty, but they must not be erased until the next successful commit + * (at which point that index becomes the old index). + * + * That means that the garbage collection and the in-the-gaps method of + * committing must be able to determine if an index node is in the old index. + * Most of the old index nodes can be found by looking up the TNC using the + * 'lookup_znode()' function. However, some of the old index nodes may have + * been deleted from the current index or may have been changed so much that + * they cannot be easily found. In those cases, an entry is added to an RB-tree. + * That is what this function does. The RB-tree is ordered by LEB number and + * offset because they uniquely identify the old index node. + */ +static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_old_idx *old_idx, *o; + struct rb_node **p, *parent = NULL; + + old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); + if (unlikely(!old_idx)) + return -ENOMEM; + old_idx->lnum = lnum; + old_idx->offs = offs; + + p = &c->old_idx.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_old_idx, rb); + if (lnum < o->lnum) + p = &(*p)->rb_left; + else if (lnum > o->lnum) + p = &(*p)->rb_right; + else if (offs < o->offs) + p = &(*p)->rb_left; + else if (offs > o->offs) + p = &(*p)->rb_right; + else { + ubifs_err("old idx added twice!"); + kfree(old_idx); + return 0; + } + } + rb_link_node(&old_idx->rb, parent, p); + rb_insert_color(&old_idx->rb, &c->old_idx); + return 0; +} + +/** + * insert_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) +{ + if (znode->parent) { + struct ubifs_zbranch *zbr; + + zbr = &znode->parent->zbranch[znode->iip]; + if (zbr->len) + return insert_old_idx(c, zbr->lnum, zbr->offs); + } else + if (c->zroot.len) + return insert_old_idx(c, c->zroot.lnum, + c->zroot.offs); + return 0; +} + +/** + * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +static int ins_clr_old_idx_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int err; + + if (znode->parent) { + struct ubifs_zbranch *zbr; + + zbr = &znode->parent->zbranch[znode->iip]; + if (zbr->len) { + err = insert_old_idx(c, zbr->lnum, zbr->offs); + if (err) + return err; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + } + } else + if (c->zroot.len) { + err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); + if (err) + return err; + c->zroot.lnum = 0; + c->zroot.offs = 0; + c->zroot.len = 0; + } + return 0; +} + +/** + * destroy_old_idx - destroy the old_idx RB-tree. + * @c: UBIFS file-system description object + * + * During start commit, the old_idx RB-tree is used to avoid overwriting index + * nodes that were in the index last commit but have since been deleted. This + * is necessary for recovery i.e. the old index must be kept intact until the + * new index is successfully written. The old-idx RB-tree is used for the + * in-the-gaps method of writing index nodes and is destroyed every commit. + */ +void destroy_old_idx(struct ubifs_info *c) +{ + struct rb_node *this = c->old_idx.rb_node; + struct ubifs_old_idx *old_idx; + + while (this) { + if (this->rb_left) { + this = this->rb_left; + continue; + } else if (this->rb_right) { + this = this->rb_right; + continue; + } + old_idx = rb_entry(this, struct ubifs_old_idx, rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &old_idx->rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + kfree(old_idx); + } + c->old_idx = RB_ROOT; +} + +/** + * copy_znode - copy a dirty znode. + * @c: UBIFS file-system description object + * @znode: znode to copy + * + * A dirty znode being committed may not be changed, so it is copied. + */ +static struct ubifs_znode *copy_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + struct ubifs_znode *zn; + + zn = kmalloc(c->max_znode_sz, GFP_NOFS); + if (unlikely(!zn)) + return ERR_PTR(-ENOMEM); + + memcpy(zn, znode, c->max_znode_sz); + zn->cnext = NULL; + __set_bit(DIRTY_ZNODE, &zn->flags); + __clear_bit(COW_ZNODE, &zn->flags); + + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + __set_bit(OBSOLETE_ZNODE, &znode->flags); + + if (znode->level != 0) { + int i; + const int n = zn->child_cnt; + + /* The children now have new parent */ + for (i = 0; i < n; i++) { + struct ubifs_zbranch *zbr = &zn->zbranch[i]; + + if (zbr->znode) + zbr->znode->parent = zn; + } + } + + atomic_long_inc(&c->dirty_zn_cnt); + return zn; +} + +/** + * add_idx_dirt - add dirt due to a dirty znode. + * @c: UBIFS file-system description object + * @lnum: LEB number of index node + * @dirt: size of index node + * + * This function updates lprops dirty space and the new size of the index. + */ +static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) +{ + c->calc_idx_sz -= ALIGN(dirt, 8); + return ubifs_add_dirt(c, lnum, dirt); +} + +/** + * dirty_cow_znode - ensure a znode is not being committed. + * @c: UBIFS file-system description object + * @zbr: branch of znode to check + * + * Returns dirtied znode on success or negative error code on failure. + */ +static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr) +{ + struct ubifs_znode *znode = zbr->znode; + struct ubifs_znode *zn; + int err; + + if (!test_bit(COW_ZNODE, &znode->flags)) { + /* znode is not being committed */ + if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { + atomic_long_inc(&c->dirty_zn_cnt); + atomic_long_dec(&c->clean_zn_cnt); + atomic_long_dec(&ubifs_clean_zn_cnt); + err = add_idx_dirt(c, zbr->lnum, zbr->len); + if (unlikely(err)) + return ERR_PTR(err); + } + return znode; + } + + zn = copy_znode(c, znode); + if (unlikely(IS_ERR(zn))) + return zn; + + if (zbr->len) { + err = insert_old_idx(c, zbr->lnum, zbr->offs); + if (unlikely(err)) + return ERR_PTR(err); + err = add_idx_dirt(c, zbr->lnum, zbr->len); + } else + err = 0; + + zbr->znode = zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + + if (unlikely(err)) + return ERR_PTR(err); + return zn; +} + +/** + * lnc_add - add a leaf node to the leaf node cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * Leaf nodes are non-index nodes directory entry nodes or data nodes. The + * purpose of the leaf node cache is to save re-reading the same leaf node over + * and over again. Most things are cached by VFS, however the file system must + * cache directory entries for readdir and for resolving hash collisions. The + * present implementation of the leaf node cache is extremely simple, and + * allows for error returns that are not used but that may be needed if a more + * complex implementation is created. + * + * Note, this function does not add the @node object to LNC directly, but + * allocates a copy of the object and adds the copy to LNC. The reason for this + * is that @node has been allocated outside of the TNC subsystem and will be + * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC + * may be changed at any time, e.g. freed by the shrinker. + */ +static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, + const void *node) +{ + int err; + void *lnc_node; + const struct ubifs_dent_node *dent = node; + + ubifs_assert(!zbr->leaf); + ubifs_assert(zbr->len != 0); + ubifs_assert(is_hash_key(c, &zbr->key)); + + err = ubifs_validate_entry(c, dent); + if (err) { + dbg_dump_stack(); + dbg_dump_node(c, dent); + return err; + } + + lnc_node = kmalloc(zbr->len, GFP_NOFS); + if (!lnc_node) + /* We don't have to have the cache, so no error */ + return 0; + + memcpy(lnc_node, node, zbr->len); + zbr->leaf = lnc_node; + return 0; +} + + /** + * lnc_add_directly - add a leaf node to the leaf-node-cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * This function is similar to 'lnc_add()', but it does not create a copy of + * @node but inserts @node to TNC directly. + */ +static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + int err; + + ubifs_assert(!zbr->leaf); + ubifs_assert(zbr->len != 0); + + err = ubifs_validate_entry(c, node); + if (err) { + dbg_dump_stack(); + dbg_dump_node(c, node); + return err; + } + + zbr->leaf = node; + return 0; +} + +/** + * lnc_free - remove a leaf node from the leaf node cache. + * @zbr: zbranch of leaf node + * @node: leaf node + */ +static void lnc_free(struct ubifs_zbranch *zbr) +{ + if (!zbr->leaf) + return; + kfree(zbr->leaf); + zbr->leaf = NULL; +} + +/** + * tnc_read_node_nm - read a "hashed" leaf node. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a "hashed" node defined by @zbr from the leaf node cache + * (in it is there) or from the hash media, in which case the node is also + * added to LNC. Returns zero in case of success or a negative negative error + * code in case of failure. + */ +static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + int err; + + ubifs_assert(is_hash_key(c, &zbr->key)); + + if (zbr->leaf) { + /* Read from the leaf node cache */ + ubifs_assert(zbr->len != 0); + memcpy(node, zbr->leaf, zbr->len); + return 0; + } + + err = ubifs_tnc_read_node(c, zbr, node); + if (err) + return err; + + /* Add the node to the leaf node cache */ + err = lnc_add(c, zbr, node); + return err; +} + +/** + * try_read_node - read a node if it is a node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: LEB number of node to read + * @offs: offset of node to read + * + * This function tries to read a node of known type and length, checks it and + * stores it in @buf. This function returns %1 if a node is present and %0 if + * a node is not present. A negative error code is returned for I/O errors. + * This function performs that same function as ubifs_read_node except that + * it does not require that there is actually a node present and instead + * the return code indicates if a node was read. + */ +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs) +{ + int err, node_len; + struct ubifs_ch *ch = buf; + uint32_t crc, node_crc; + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + + err = ubi_read(c->ubi, lnum, buf, offs, len); + if (err) { + ubifs_err("cannot read node type %d from LEB %d:%d, error %d", + type, lnum, offs, err); + return err; + } + + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) + return 0; + + if (ch->node_type != type) + return 0; + + node_len = le32_to_cpu(ch->len); + if (node_len != len) + return 0; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); + node_crc = le32_to_cpu(ch->crc); + if (crc != node_crc) + return 0; + + return 1; +} + +/** + * fallible_read_node - try to read a leaf node. + * @c: UBIFS file-system description object + * @key: key of node to read + * @zbr: position of node + * @node: node returned + * + * This function tries to read a node and returns %1 if the node is read, %0 + * if the node is not present, and a negative error code in the case of error. + */ +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node) +{ + int ret; + + dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + + ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, + zbr->offs); + if (ret == 1) { + union ubifs_key node_key; + struct ubifs_dent_node *dent = node; + + /* All nodes have key in the same place */ + key_read(c, &dent->key, &node_key); + if (keys_cmp(c, key, &node_key) != 0) + ret = 0; + } + if (ret == 0) + dbg_mnt("dangling branch LEB %d:%d len %d, key %s", + zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); + return ret; +} + +/** + * matches_name - determine if a direntry or xattr entry matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by + * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case + * of failure, a negative error code is returned. + */ +static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, + const struct qstr *nm) +{ + struct ubifs_dent_node *dent; + int nlen, err; + + /* If possible, match against the dent in the leaf node cache */ + if (!zbr->leaf) { + dent = kmalloc(zbr->len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + err = ubifs_tnc_read_node(c, zbr, dent); + if (err) + goto out_free; + + /* Add the node to the leaf node cache */ + err = lnc_add_directly(c, zbr, dent); + if (err) + goto out_free; + } else + dent = zbr->leaf; + + nlen = le16_to_cpu(dent->nlen); + err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); + if (err == 0) { + if (nlen == nm->len) + return NAME_MATCHES; + else if (nlen < nm->len) + return NAME_LESS; + else + return NAME_GREATER; + } else if (err < 0) + return NAME_LESS; + else + return NAME_GREATER; + +out_free: + kfree(dent); + return err; +} + +/** + * get_znode - get a TNC znode that may not be loaded yet. + * @c: UBIFS file-system description object + * @znode: parent znode + * @n: znode branch slot number + * + * This function returns the znode or a negative error code. + */ +static struct ubifs_znode *get_znode(struct ubifs_info *c, + struct ubifs_znode *znode, int n) +{ + struct ubifs_zbranch *zbr; + + zbr = &znode->zbranch[n]; + if (zbr->znode) + znode = zbr->znode; + else + znode = ubifs_load_znode(c, zbr, znode, n); + return znode; +} + +/** + * tnc_next - find next TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is passed and returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the next TNC entry is found, %-ENOENT if there is + * no next entry, or a negative error code otherwise. + */ +static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ + struct ubifs_znode *znode = *zn; + int nn = *n; + + nn += 1; + if (nn < znode->child_cnt) { + *n = nn; + return 0; + } + while (1) { + struct ubifs_znode *zp; + + zp = znode->parent; + if (!zp) + return -ENOENT; + nn = znode->iip + 1; + znode = zp; + if (nn < znode->child_cnt) { + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + while (znode->level != 0) { + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + nn = 0; + break; + } + } + *zn = znode; + *n = nn; + return 0; +} + +/** + * tnc_prev - find previous TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the previous TNC entry is found, %-ENOENT if + * there is no next entry, or a negative error code otherwise. + */ +static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ + struct ubifs_znode *znode = *zn; + int nn = *n; + + if (nn > 0) { + *n = nn - 1; + return 0; + } + while (1) { + struct ubifs_znode *zp; + + zp = znode->parent; + if (!zp) + return -ENOENT; + nn = znode->iip - 1; + znode = zp; + if (nn >= 0) { + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + while (znode->level != 0) { + nn = znode->child_cnt - 1; + znode = get_znode(c, znode, nn); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + nn = znode->child_cnt - 1; + break; + } + } + *zn = znode; + *n = nn; + return 0; +} + +/** + * resolve_collision - resolve a collision. + * @c: UBIFS file-system description object + * @key: key of a directory or extended attribute entry + * @zn: znode is returned here + * @n: zbranch number is passed and returned here + * @nm: name of the entry + * + * This function is called for "hashed" keys to make sure that the found key + * really corresponds to the looked up node (directory or extended attribute + * entry). It returns %1 and sets @zn and @n if the collision is resolved. + * %0 is returned if @nm is not found and @zn and @n are set to the previous + * entry, i.e. to the entry after which @nm could follow if it were in TNC. + * This means that @n may be set to %-1 if the leftmost key in @zn is the + * previous one. A negative error code is returned on failures. + */ +static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + const struct qstr *nm) +{ + int err; + + err = matches_name(c, &(*zn)->zbranch[*n], nm); + if (unlikely(err < 0)) + return err; + if (err == NAME_MATCHES) + return 1; + + if (err == NAME_GREATER) { + /* Look left */ + while (1) { + err = tnc_prev(c, zn, n); + if (err == -ENOENT) { + ubifs_assert(*n == 0); + *n = -1; + return 0; + } + if (err < 0) + return err; + if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { + /* + * We have found the branch after which we would + * like to insert, but inserting in this znode + * may still be wrong. Consider the following 3 + * znodes, in the case where we are resolving a + * collision with Key2. + * + * znode zp + * ---------------------- + * level 1 | Key0 | Key1 | + * ----------------------- + * | | + * znode za | | znode zb + * ------------ ------------ + * level 0 | Key0 | | Key2 | + * ------------ ------------ + * + * The lookup finds Key2 in znode zb. Lets say + * there is no match and the name is greater so + * we look left. When we find Key0, we end up + * here. If we return now, we will insert into + * znode za at slot n = 1. But that is invalid + * according to the parent's keys. Key2 must + * be inserted into znode zb. + * + * Note, this problem is not relevant for the + * case when we go right, because + * 'tnc_insert()' would correct the parent key. + */ + if (*n == (*zn)->child_cnt - 1) { + err = tnc_next(c, zn, n); + if (err) { + /* Should be impossible */ + ubifs_assert(0); + if (err == -ENOENT) + err = -EINVAL; + return err; + } + ubifs_assert(*n == 0); + *n = -1; + } + return 0; + } + err = matches_name(c, &(*zn)->zbranch[*n], nm); + if (err < 0) + return err; + if (err == NAME_LESS) + return 0; + if (err == NAME_MATCHES) + return 1; + ubifs_assert(err == NAME_GREATER); + } + } else { + int nn = *n; + struct ubifs_znode *znode = *zn; + + /* Look right */ + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + return 0; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + return 0; + err = matches_name(c, &znode->zbranch[nn], nm); + if (err < 0) + return err; + if (err == NAME_GREATER) + return 0; + *zn = znode; + *n = nn; + if (err == NAME_MATCHES) + return 1; + ubifs_assert(err == NAME_LESS); + } + } +} + +/** + * fallible_matches_name - determine if a dent matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This is a "fallible" version of 'matches_name()' function which does not + * panic if the direntry/xentry referred by @zbr does not exist on the media. + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr + * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA + * if xentry/direntry referred by @zbr does not exist on the media. A negative + * error code is returned in case of failure. + */ +static int fallible_matches_name(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + const struct qstr *nm) +{ + struct ubifs_dent_node *dent; + int nlen, err; + + /* If possible, match against the dent in the leaf node cache */ + if (!zbr->leaf) { + dent = kmalloc(zbr->len, GFP_NOFS); + if (!dent) + return -ENOMEM; + + err = fallible_read_node(c, &zbr->key, zbr, dent); + if (err < 0) + goto out_free; + if (err == 0) { + /* The node was not present */ + err = NOT_ON_MEDIA; + goto out_free; + } + ubifs_assert(err == 1); + + err = lnc_add_directly(c, zbr, dent); + if (err) + goto out_free; + } else + dent = zbr->leaf; + + nlen = le16_to_cpu(dent->nlen); + err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); + if (err == 0) { + if (nlen == nm->len) + return NAME_MATCHES; + else if (nlen < nm->len) + return NAME_LESS; + else + return NAME_GREATER; + } else if (err < 0) + return NAME_LESS; + else + return NAME_GREATER; + +out_free: + kfree(dent); + return err; +} + +/** + * fallible_resolve_collision - resolve a collision even if nodes are missing. + * @c: UBIFS file-system description object + * @key: key + * @zn: znode is returned here + * @n: branch number is passed and returned here + * @nm: name of directory entry + * @adding: indicates caller is adding a key to the TNC + * + * This is a "fallible" version of the 'resolve_collision()' function which + * does not panic if one of the nodes referred to by TNC does not exist on the + * media. This may happen when replaying the journal if a deleted node was + * Garbage-collected and the commit was not done. A branch that refers to a node + * that is not present is called a dangling branch. The following are the return + * codes for this function: + * o if @nm was found, %1 is returned and @zn and @n are set to the found + * branch; + * o if we are @adding and @nm was not found, %0 is returned; + * o if we are not @adding and @nm was not found, but a dangling branch was + * found, then %1 is returned and @zn and @n are set to the dangling branch; + * o a negative error code is returned in case of failure. + */ +static int fallible_resolve_collision(struct ubifs_info *c, + const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + const struct qstr *nm, int adding) +{ + struct ubifs_znode *o_znode = NULL, *znode = *zn; + int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; + + cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); + if (unlikely(cmp < 0)) + return cmp; + if (cmp == NAME_MATCHES) + return 1; + if (cmp == NOT_ON_MEDIA) { + o_znode = znode; + o_n = nn; + /* + * We are unlucky and hit a dangling branch straight away. + * Now we do not really know where to go to find the needed + * branch - to the left or to the right. Well, let's try left. + */ + unsure = 1; + } else if (!adding) + unsure = 1; /* Remove a dangling branch wherever it is */ + + if (cmp == NAME_GREATER || unsure) { + /* Look left */ + while (1) { + err = tnc_prev(c, zn, n); + if (err == -ENOENT) { + ubifs_assert(*n == 0); + *n = -1; + break; + } + if (err < 0) + return err; + if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { + /* See comments in 'resolve_collision()' */ + if (*n == (*zn)->child_cnt - 1) { + err = tnc_next(c, zn, n); + if (err) { + /* Should be impossible */ + ubifs_assert(0); + if (err == -ENOENT) + err = -EINVAL; + return err; + } + ubifs_assert(*n == 0); + *n = -1; + } + break; + } + err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); + if (err < 0) + return err; + if (err == NAME_MATCHES) + return 1; + if (err == NOT_ON_MEDIA) { + o_znode = *zn; + o_n = *n; + continue; + } + if (!adding) + continue; + if (err == NAME_LESS) + break; + else + unsure = 0; + } + } + + if (cmp == NAME_LESS || unsure) { + /* Look right */ + *zn = znode; + *n = nn; + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + break; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + break; + err = fallible_matches_name(c, &znode->zbranch[nn], nm); + if (err < 0) + return err; + if (err == NAME_GREATER) + break; + *zn = znode; + *n = nn; + if (err == NAME_MATCHES) + return 1; + if (err == NOT_ON_MEDIA) { + o_znode = znode; + o_n = nn; + } + } + } + + /* Never match a dangling branch when adding */ + if (adding || !o_znode) + return 0; + + dbg_mnt("dangling match LEB %d:%d len %d %s", + o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, + o_znode->zbranch[o_n].len, DBGKEY(key)); + *zn = o_znode; + *n = o_n; + return 1; +} + +/** + * matches_position - determine if a zbranch matches a given position. + * @zbr: zbranch of dent + * @lnum: LEB number of dent to match + * @offs: offset of dent to match + * + * This function returns %1 if @lnum:@offs matches, and %0 otherwise. + */ +static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) +{ + if (zbr->lnum == lnum && zbr->offs == offs) + return 1; + else + return 0; +} + +/** + * resolve_collision_directly - resolve a collision directly. + * @c: UBIFS file-system description object + * @key: key of directory entry + * @zn: znode is passed and returned here + * @n: zbranch number is passed and returned here + * @lnum: LEB number of dent node to match + * @offs: offset of dent node to match + * + * This function is used for "hashed" keys to make sure the found directory or + * extended attribute entry node is what was looked for. It is used when the + * flash address of the right node is known (@lnum:@offs) which makes it much + * easier to resolve collisions (no need to read entries and match full + * names). This function returns %1 and sets @zn and @n if the collision is + * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the + * previous directory entry. Otherwise a negative error code is returned. + */ +static int resolve_collision_directly(struct ubifs_info *c, + const union ubifs_key *key, + struct ubifs_znode **zn, int *n, + int lnum, int offs) +{ + struct ubifs_znode *znode; + int nn, err; + + znode = *zn; + nn = *n; + if (matches_position(&znode->zbranch[nn], lnum, offs)) + return 1; + + /* Look left */ + while (1) { + err = tnc_prev(c, &znode, &nn); + if (err == -ENOENT) + break; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + break; + if (matches_position(&znode->zbranch[nn], lnum, offs)) { + *zn = znode; + *n = nn; + return 1; + } + } + + /* Look right */ + znode = *zn; + nn = *n; + while (1) { + err = tnc_next(c, &znode, &nn); + if (err == -ENOENT) + return 0; + if (err < 0) + return err; + if (keys_cmp(c, &znode->zbranch[nn].key, key)) + return 0; + *zn = znode; + *n = nn; + if (matches_position(&znode->zbranch[nn], lnum, offs)) + return 1; + } +} + +/** + * dirty_cow_bottom_up - dirty a znode and its ancestors. + * @c: UBIFS file-system description object + * @znode: znode to dirty + * + * If we do not have a unique key that resides in a znode, then we cannot + * dirty that znode from the top down (i.e. by using lookup_level0_dirty) + * This function records the path back to the last dirty ancestor, and then + * dirties the znodes on that path. + */ +static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + struct ubifs_znode *zp; + int *path = c->bottom_up_buf, p = 0; + + ubifs_assert(c->zroot.znode); + ubifs_assert(znode); + if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { + kfree(c->bottom_up_buf); + c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), + GFP_NOFS); + if (!c->bottom_up_buf) + return ERR_PTR(-ENOMEM); + path = c->bottom_up_buf; + } + if (c->zroot.znode->level) { + /* Go up until parent is dirty */ + while (1) { + int n; + + zp = znode->parent; + if (!zp) + break; + n = znode->iip; + ubifs_assert(p < c->zroot.znode->level); + path[p++] = n; + if (!zp->cnext && ubifs_zn_dirty(znode)) + break; + znode = zp; + } + } + + /* Come back down, dirtying as we go */ + while (1) { + struct ubifs_zbranch *zbr; + + zp = znode->parent; + if (zp) { + ubifs_assert(path[p - 1] >= 0); + ubifs_assert(path[p - 1] < zp->child_cnt); + zbr = &zp->zbranch[path[--p]]; + znode = dirty_cow_znode(c, zbr); + } else { + ubifs_assert(znode == c->zroot.znode); + znode = dirty_cow_znode(c, &c->zroot); + } + if (unlikely(IS_ERR(znode)) || !p) + break; + ubifs_assert(path[p - 1] >= 0); + ubifs_assert(path[p - 1] < znode->child_cnt); + znode = znode->zbranch[path[p - 1]].znode; + } + + return znode; +} + +/** + * ubifs_lookup_level0 - search for zero-level znode. + * @c: UBIFS file-system description object + * @key: key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain + * @key, then %0 is returned and slot number of the closed branch is stored + * in @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %0 is stored in @n. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n) +{ + int err, exact; + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + + dbg_tnc("search key %s", DBGKEY(key)); + + znode = c->zroot.znode; + if (unlikely(!znode)) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + znode->time = time; + + while (1) { + struct ubifs_zbranch *zbr; + + exact = ubifs_search_zbranch(c, znode, key, n); + + if (znode->level == 0) + break; + + if (*n < 0) + *n = 0; + zbr = &znode->zbranch[*n]; + + if (zbr->znode) { + znode->time = time; + znode = zbr->znode; + continue; + } + + /* znode is not in TNC cache, load it from the media */ + znode = ubifs_load_znode(c, zbr, znode, *n); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + *zn = znode; + if (exact || !is_hash_key(c, key) || *n != -1) { + dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); + return exact; + } + + /* + * Here is a tricky place. We have not found the key and this is a + * "hashed" key, which may collide. The rest of the code deals with + * situations like this: + * + * | 3 | 5 | + * / \ + * | 3 | 5 | | 6 | 7 | (x) + * + * Or more a complex example: + * + * | 1 | 5 | + * / \ + * | 1 | 3 | | 5 | 8 | + * \ / + * | 5 | 5 | | 6 | 7 | (x) + * + * In the examples, if we are looking for key "5", we may reach nodes + * marked with "(x)". In this case what we have do is to look at the + * left and see if there is "5" key there. If there is, we have to + * return it. + * + * Note, this whole situation is possible because we allow to have + * elements which are equivalent to the next key in the parent in the + * children of current znode. For example, this happens if we split a + * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something + * like this: + * | 3 | 5 | + * / \ + * | 3 | 5 | | 5 | 6 | 7 | + * ^ + * And this becomes what is at the first "picture" after key "5" marked + * with "^" is removed. What could be done is we could prohibit + * splitting in the middle of the colliding sequence. Also, when + * removing the leftmost key, we would have to correct the key of the + * parent node, which would introduce additional complications. Namely, + * if we changed the the leftmost key of the parent znode, the garbage + * collector would be unable to find it (GC is doing this when GC'ing + * indexing LEBs). Although we already have an additional RB-tree where + * we save such changed znodes (see 'ins_clr_old_idx_znode()') until + * after the commit. But anyway, this does not look easy to implement + * so we did not try this. + */ + err = tnc_prev(c, &znode, n); + if (err == -ENOENT) { + dbg_tnc("found 0, lvl %d, n -1", znode->level); + *n = -1; + return 0; + } + if (unlikely(err < 0)) + return err; + if (keys_cmp(c, key, &znode->zbranch[*n].key)) { + dbg_tnc("found 0, lvl %d, n -1", znode->level); + *n = -1; + return 0; + } + + dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); + *zn = znode; + return 1; +} + +/** + * lookup_level0_dirty - search for zero-level znode dirtying. + * @c: UBIFS file-system description object + * @key: key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain @key + * then %0 is returned and slot number of the closed branch is stored in + * @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %-1 is stored in @n. + * + * Additionally all znodes in the path from the root to the located zero-level + * znode are marked as dirty. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n) +{ + int err, exact; + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + + dbg_tnc("search and dirty key %s", DBGKEY(key)); + + znode = c->zroot.znode; + if (unlikely(!znode)) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + znode = dirty_cow_znode(c, &c->zroot); + if (IS_ERR(znode)) + return PTR_ERR(znode); + + znode->time = time; + + while (1) { + struct ubifs_zbranch *zbr; + + exact = ubifs_search_zbranch(c, znode, key, n); + + if (znode->level == 0) + break; + + if (*n < 0) + *n = 0; + zbr = &znode->zbranch[*n]; + + if (zbr->znode) { + znode->time = time; + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + continue; + } + + /* znode is not in TNC cache, load it from the media */ + znode = ubifs_load_znode(c, zbr, znode, *n); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + *zn = znode; + if (exact || !is_hash_key(c, key) || *n != -1) { + dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); + return exact; + } + + /* + * See huge comment at 'lookup_level0_dirty()' what is the rest of the + * code. + */ + err = tnc_prev(c, &znode, n); + if (err == -ENOENT) { + *n = -1; + dbg_tnc("found 0, lvl %d, n -1", znode->level); + return 0; + } + if (unlikely(err < 0)) + return err; + if (keys_cmp(c, key, &znode->zbranch[*n].key)) { + *n = -1; + dbg_tnc("found 0, lvl %d, n -1", znode->level); + return 0; + } + + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) + return PTR_ERR(znode); + } + + dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); + *zn = znode; + return 1; +} + +/** + * ubifs_tnc_lookup - look up a file-system node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. + */ +int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, + void *node) +{ + int found, n, err; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr, *zt; + + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out; + } else if (found < 0) { + err = found; + goto out; + } + zt = &znode->zbranch[n]; + if (is_hash_key(c, key)) { + /* + * In this case the leaf node cache gets used, so we pass the + * address of the zbranch and keep the mutex locked + */ + err = tnc_read_node_nm(c, zt, node); + goto out; + } + zbr = znode->zbranch[n]; + mutex_unlock(&c->tnc_mutex); + + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + +out: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_locate - look up a file-system node and return it and its location. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @lnum: LEB number is returned here + * @offs: offset is returned here + * + * This function is the same as 'ubifs_tnc_lookup()' but it returns the node + * location also. See 'ubifs_tnc_lookup()'. + */ +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, + void *node, int *lnum, int *offs) +{ + int found, n, err; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr, *zt; + + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out; + } else if (found < 0) { + err = found; + goto out; + } + zt = &znode->zbranch[n]; + if (is_hash_key(c, key)) { + /* + * In this case the leaf node cache gets used, so we pass the + * address of the zbranch and keep the mutex locked + */ + *lnum = zt->lnum; + *offs = zt->offs; + err = tnc_read_node_nm(c, zt, node); + goto out; + } + zbr = znode->zbranch[n]; + mutex_unlock(&c->tnc_mutex); + + *lnum = zbr.lnum; + *offs = zbr.offs; + + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + +out: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * do_lookup_nm- look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm) +{ + int found, n, err; + struct ubifs_znode *znode; + struct ubifs_zbranch zbr; + + dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { + err = -ENOENT; + goto out_unlock; + } else if (found < 0) { + err = found; + goto out_unlock; + } + + ubifs_assert(n >= 0); + + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); + if (unlikely(err < 0)) + goto out_unlock; + if (err == 0) { + err = -ENOENT; + goto out_unlock; + } + + zbr = znode->zbranch[n]; + mutex_unlock(&c->tnc_mutex); + + err = tnc_read_node_nm(c, &zbr, node); + return err; + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_lookup_nm - look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm) +{ + int err, len; + const struct ubifs_dent_node *dent = node; + + /* + * We assume that in most of the cases there are no name collisions and + * 'ubifs_tnc_lookup()' returns us the right direntry. + */ + err = ubifs_tnc_lookup(c, key, node); + if (err) + return err; + + len = le16_to_cpu(dent->nlen); + if (nm->len == len && !memcmp(dent->name, nm->name, len)) + return 0; + + /* + * Unluckily, there are hash collisions and we have to iterate over + * them look at each direntry with colliding name hash sequentially. + */ + return do_lookup_nm(c, key, node, nm); +} + +/** + * correct_parent_keys - correct parent znodes' keys. + * @c: UBIFS file-system description object + * @znode: znode to correct parent znodes for + * + * This is a helper function for 'tnc_insert()'. When the key of the leftmost + * zbranch changes, keys of parent znodes have to be corrected. This helper + * function is called in such situations and corrects the keys if needed. + */ +static void correct_parent_keys(const struct ubifs_info *c, + struct ubifs_znode *znode) +{ + union ubifs_key *key, *key1; + + ubifs_assert(znode->parent); + ubifs_assert(znode->iip == 0); + + key = &znode->zbranch[0].key; + key1 = &znode->parent->zbranch[0].key; + + while (keys_cmp(c, key, key1) < 0) { + key_copy(c, key, key1); + znode = znode->parent; + znode->alt = 1; + if (!znode->parent || znode->iip) + break; + key1 = &znode->parent->zbranch[0].key; + } +} + +/** + * insert_zbranch - insert a zbranch into a znode. + * @znode: znode into which to insert + * @zbr: zbranch to insert + * @n: slot number to insert to + * + * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in + * znode's array of zbranches and keeps zbranches consolidated, so when a new + * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th + * slot, zbranches starting from @n have to be moved right. + */ +static void insert_zbranch(struct ubifs_znode *znode, + const struct ubifs_zbranch *zbr, int n) +{ + int i; + + ubifs_assert(ubifs_zn_dirty(znode)); + + if (znode->level) { + for (i = znode->child_cnt; i > n; i--) { + znode->zbranch[i] = znode->zbranch[i - 1]; + if (znode->zbranch[i].znode) + znode->zbranch[i].znode->iip = i; + } + if (zbr->znode) + zbr->znode->iip = n; + } else + for (i = znode->child_cnt; i > n; i--) + znode->zbranch[i] = znode->zbranch[i - 1]; + + znode->zbranch[n] = *zbr; + znode->child_cnt += 1; + + /* + * After inserting at slot zero, the lower bound of the key range of + * this znode may have changed. If this znode is subsequently split + * then the upper bound of the key range may change, and furthermore + * it could change to be lower than the original lower bound. If that + * happens, then it will no longer be possible to find this znode in the + * TNC using the key from the index node on flash. That is bad because + * if it is not found, we will assume it is obsolete and may overwrite + * it. Then if there is an unclean unmount, we will start using the + * old index which will be broken. + * + * So we first mark znodes that have insertions at slot zero, and then + * if they are split we add their lnum/offs to the old_idx tree. + */ + if (n == 0) + znode->alt = 1; +} + +/** + * tnc_insert - insert a node into TNC. + * @c: UBIFS file-system description object + * @znode: znode to insert into + * @zbr: branch to insert + * @n: slot number to insert new zbranch to + * + * This function inserts a new node described by @zbr into znode @znode. If + * znode does not have a free slot for new zbranch, it is split. Parent znodes + * are splat as well if needed. Returns zero in case of success or a negative + * error code in case of failure. + */ +static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, + struct ubifs_zbranch *zbr, int n) +{ + struct ubifs_znode *zn, *zi, *zp; + int i, keep, move, appending = 0; + union ubifs_key *key = &zbr->key; + + ubifs_assert(n >= 0 && n <= c->fanout); + + /* Implement naive insert for now */ +again: + zp = znode->parent; + if (znode->child_cnt < c->fanout) { + ubifs_assert(n != c->fanout); + dbg_tnc("inserted at %d level %d, key %s", n, znode->level, + DBGKEY(key)); + + insert_zbranch(znode, zbr, n); + + /* Ensure parent's key is correct */ + if (n == 0 && zp && znode->iip == 0) + correct_parent_keys(c, znode); + + return 0; + } + + /* + * Unfortunately, @znode does not have more empty slots and we have to + * split it. + */ + dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + + if (znode->alt) + /* + * We can no longer be sure of finding this znode by key, so we + * record it in the old_idx tree. + */ + ins_clr_old_idx_znode(c, znode); + + zn = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!zn) + return -ENOMEM; + zn->parent = zp; + zn->level = znode->level; + + /* Decide where to split */ + if (znode->level == 0 && n == c->fanout && + key_type(c, key) == UBIFS_DATA_KEY) { + union ubifs_key *key1; + + /* + * If this is an inode which is being appended - do not split + * it because no other zbranches can be inserted between + * zbranches of consecutive data nodes anyway. + */ + key1 = &znode->zbranch[n - 1].key; + if (key_inum(c, key1) == key_inum(c, key) && + key_type(c, key1) == UBIFS_DATA_KEY && + key_block(c, key1) == key_block(c, key) - 1) + appending = 1; + } + + if (appending) { + keep = c->fanout; + move = 0; + } else { + keep = (c->fanout + 1) / 2; + move = c->fanout - keep; + } + + /* + * Although we don't at present, we could look at the neighbors and see + * if we can move some zbranches there. + */ + + if (n < keep) { + /* Insert into existing znode */ + zi = znode; + move += 1; + keep -= 1; + } else { + /* Insert into new znode */ + zi = zn; + n -= keep; + /* Re-parent */ + if (zn->level != 0) + zbr->znode->parent = zn; + } + + __set_bit(DIRTY_ZNODE, &zn->flags); + atomic_long_inc(&c->dirty_zn_cnt); + + zn->child_cnt = move; + znode->child_cnt = keep; + + dbg_tnc("moving %d, keeping %d", move, keep); + + /* Move zbranch */ + for (i = 0; i < move; i++) { + zn->zbranch[i] = znode->zbranch[keep + i]; + /* Re-parent */ + if (zn->level != 0) + if (zn->zbranch[i].znode) { + zn->zbranch[i].znode->parent = zn; + zn->zbranch[i].znode->iip = i; + } + } + + /* Insert new key and branch */ + dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + + insert_zbranch(zi, zbr, n); + + /* Insert new znode (produced by spitting) into the parent */ + if (zp) { + i = n; + /* Locate insertion point */ + n = znode->iip + 1; + if (appending && n != c->fanout) + appending = 0; + + if (i == 0 && zi == znode && znode->iip == 0) + correct_parent_keys(c, znode); + + /* Tail recursion */ + zbr->key = zn->zbranch[0].key; + zbr->znode = zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + znode = zp; + + goto again; + } + + /* We have to split root znode */ + dbg_tnc("creating new zroot at level %d", znode->level + 1); + + zi = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!zi) + return -ENOMEM; + + zi->child_cnt = 2; + zi->level = znode->level + 1; + + __set_bit(DIRTY_ZNODE, &zi->flags); + atomic_long_inc(&c->dirty_zn_cnt); + + zi->zbranch[0].key = znode->zbranch[0].key; + zi->zbranch[0].znode = znode; + zi->zbranch[0].lnum = c->zroot.lnum; + zi->zbranch[0].offs = c->zroot.offs; + zi->zbranch[0].len = c->zroot.len; + zi->zbranch[1].key = zn->zbranch[0].key; + zi->zbranch[1].znode = zn; + + c->zroot.lnum = 0; + c->zroot.offs = 0; + c->zroot.len = 0; + c->zroot.znode = zi; + + zn->parent = zi; + zn->iip = 1; + znode->parent = zi; + znode->iip = 0; + + return 0; +} + +/** + * ubifs_tnc_add - add a node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function adds a node with key @key to TNC. The node may be new or it may + * obsolete some existing one. Returns %0 on success or negative error code on + * failure. + */ +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + int offs, int len) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (!found) { + struct ubifs_zbranch zbr; + + zbr.znode = NULL; + zbr.lnum = lnum; + zbr.offs = offs; + zbr.len = len; + key_copy(c, key, &zbr.key); + err = tnc_insert(c, znode, &zbr, n + 1); + } else if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else + err = found; + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + + return err; +} + +/** + * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. + * @c: UBIFS file-system description object + * @key: key to add + * @old_lnum: LEB number of old node + * @old_offs: old node offset + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function replaces a node with key @key in the TNC only if the old node + * is found. This function is called by garbage collection when node are moved. + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + int old_lnum, int old_offs, int lnum, int offs, int len) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, + old_offs, lnum, offs, len, DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + found = 0; + if (zbr->lnum == old_lnum && zbr->offs == old_offs) { + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + if (err) + goto out_unlock; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + found = 1; + } else if (is_hash_key(c, key)) { + found = resolve_collision_directly(c, key, &znode, &n, + old_lnum, old_offs); + dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", + found, znode, n, old_lnum, old_offs); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, + znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + zbr = &znode->zbranch[n]; + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, + zbr->len); + if (err) + goto out_unlock; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } + } + } + + if (!found) + err = ubifs_add_dirt(c, lnum, len); + + if (!err) + err = dbg_check_tnc(c, 0); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_add_nm - add a "hashed" node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * @nm: node name + * + * This is the same as 'ubifs_tnc_add()' but it should be used with keys which + * may have collisions, like directory entry keys. + */ +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + int lnum, int offs, int len, const struct qstr *nm) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, + DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + + if (found == 1) { + if (c->replaying) + found = fallible_resolve_collision(c, key, &znode, &n, + nm, 1); + else + found = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); + if (found < 0) { + err = found; + goto out_unlock; + } + + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + + if (found == 1) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + lnc_free(zbr); + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + goto out_unlock; + } + } + + if (!found) { + struct ubifs_zbranch zbr; + + zbr.znode = NULL; + zbr.lnum = lnum; + zbr.offs = offs; + zbr.len = len; + key_copy(c, key, &zbr.key); + err = tnc_insert(c, znode, &zbr, n + 1); + if (err) + goto out_unlock; + if (c->replaying) { + /* + * We did not find it in the index so there may be a + * dangling branch still in the index. So we remove it + * by passing 'ubifs_tnc_remove_nm()' the same key but + * an unmatchable name. + */ + struct qstr noname = { .len = 0, .name = "" }; + + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + if (err) + return err; + return ubifs_tnc_remove_nm(c, key, &noname); + } + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * tnc_delete - delete a znode form TNC. + * @c: UBIFS file-system description object + * @znode: znode to delete from + * @n: zbranch slot number to delete + * + * This function deletes a leaf node from @n-th slot of @znode. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) +{ + struct ubifs_zbranch *zbr; + struct ubifs_znode *zp; + int i, err; + + /* Delete without merge for now */ + ubifs_assert(znode->level == 0); + ubifs_assert(n >= 0 && n < c->fanout); + dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + + zbr = &znode->zbranch[n]; + lnc_free(zbr); + + err = ubifs_add_dirt(c, zbr->lnum, zbr->len); + if (err) { + dbg_dump_znode(c, znode); + return err; + } + + /* We do not "gap" zbranch slots */ + for (i = n; i < znode->child_cnt - 1; i++) + znode->zbranch[i] = znode->zbranch[i + 1]; + znode->child_cnt -= 1; + + if (znode->child_cnt > 0) + return 0; + + /* + * This was the last zbranch, we have to delete this znode from the + * parent. + */ + + do { + ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_dirty(znode)); + + zp = znode->parent; + n = znode->iip; + + atomic_long_dec(&c->dirty_zn_cnt); + + err = insert_old_idx_znode(c, znode); + if (err) + return err; + + if (znode->cnext) { + __set_bit(OBSOLETE_ZNODE, &znode->flags); + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } else + kfree(znode); + znode = zp; + } while (znode->child_cnt == 1); /* while removing last child */ + + /* Remove from znode, entry n - 1 */ + znode->child_cnt -= 1; + ubifs_assert(znode->level != 0); + for (i = n; i < znode->child_cnt; i++) { + znode->zbranch[i] = znode->zbranch[i + 1]; + if (znode->zbranch[i].znode) + znode->zbranch[i].znode->iip = i; + } + + /* + * If this is the root and it has only 1 child then + * collapse the tree. + */ + if (!znode->parent) { + while (znode->child_cnt == 1 && znode->level != 0) { + zp = znode; + zbr = &znode->zbranch[0]; + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode = dirty_cow_znode(c, zbr); + if (IS_ERR(znode)) + return PTR_ERR(znode); + znode->parent = NULL; + znode->iip = 0; + if (c->zroot.len) { + err = insert_old_idx(c, c->zroot.lnum, + c->zroot.offs); + if (err) + return err; + } + c->zroot.lnum = zbr->lnum; + c->zroot.offs = zbr->offs; + c->zroot.len = zbr->len; + c->zroot.znode = znode; + ubifs_assert(!test_bit(OBSOLETE_ZNODE, + &zp->flags)); + ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + atomic_long_dec(&c->dirty_zn_cnt); + + if (zp->cnext) { + __set_bit(OBSOLETE_ZNODE, &zp->flags); + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } else + kfree(zp); + } + } + + return 0; +} + +/** + * ubifs_tnc_remove - remove an index entry of a node. + * @c: UBIFS file-system description object + * @key: key of node + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) +{ + int found, n, err = 0; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("key %s", DBGKEY(key)); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; + goto out_unlock; + } + if (found == 1) + err = tnc_delete(c, znode, n); + if (!err) + err = dbg_check_tnc(c, 0); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. + * @c: UBIFS file-system description object + * @key: key of node + * @nm: directory entry name + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + const struct qstr *nm) +{ + int n, err; + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); + dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); + err = lookup_level0_dirty(c, key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) { + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); + if (err < 0) + goto out_unlock; + if (err) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + err = tnc_delete(c, znode, n); + } + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * key_in_range - determine if a key falls within a range of keys. + * @c: UBIFS file-system description object + * @key: key to check + * @from_key: lowest key in range + * @to_key: highest key in range + * + * This function returns %1 if the key is in range and %0 otherwise. + */ +static int key_in_range(struct ubifs_info *c, union ubifs_key *key, + union ubifs_key *from_key, union ubifs_key *to_key) +{ + if (keys_cmp(c, key, from_key) < 0) + return 0; + if (keys_cmp(c, key, to_key) > 0) + return 0; + return 1; +} + +/** + * ubifs_tnc_remove_range - remove index entries in range. + * @c: UBIFS file-system description object + * @from_key: lowest key to remove + * @to_key: highest key to remove + * + * This function removes index entries starting at @from_key and ending at + * @to_key. This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + union ubifs_key *to_key) +{ + int i, n, k, err = 0; + struct ubifs_znode *znode; + union ubifs_key *key; + + mutex_lock(&c->tnc_mutex); + while (1) { + /* Find first level 0 znode that contains keys to remove */ + err = ubifs_lookup_level0(c, from_key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) + key = from_key; + else { + err = tnc_next(c, &znode, &n); + if (err == -ENOENT) { + err = 0; + goto out_unlock; + } + if (err < 0) + goto out_unlock; + key = &znode->zbranch[n].key; + if (!key_in_range(c, key, from_key, to_key)) { + err = 0; + goto out_unlock; + } + } + + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + } + + /* Remove all keys in range except the first */ + for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { + key = &znode->zbranch[i].key; + if (!key_in_range(c, key, from_key, to_key)) + break; + lnc_free(&znode->zbranch[i]); + err = ubifs_add_dirt(c, znode->zbranch[i].lnum, + znode->zbranch[i].len); + if (err) { + dbg_dump_znode(c, znode); + goto out_unlock; + } + dbg_tnc("removing %s", DBGKEY(key)); + } + if (k) { + for (i = n + 1 + k; i < znode->child_cnt; i++) + znode->zbranch[i - k] = znode->zbranch[i]; + znode->child_cnt -= k; + } + + /* Now delete the first */ + err = tnc_delete(c, znode, n); + if (err) + goto out_unlock; + } + +out_unlock: + if (!err) + err = dbg_check_tnc(c, 0); + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_tnc_remove_ino - remove an inode from TNC. + * @c: UBIFS file-system description object + * @inum: inode number to remove + * + * This function remove inode @inum and all the extended attributes associated + * with the anode from TNC and returns zero in case of success or a negative + * error code in case of failure. + */ +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) +{ + union ubifs_key key1, key2; + struct ubifs_dent_node *xent, *pxent = NULL; + struct qstr nm = { .name = NULL }; + + dbg_tnc("ino %lu", inum); + + /* + * Walk all extended attribute entries and remove them together with + * corresponding extended attribute inodes. + */ + lowest_xent_key(c, &key1, inum); + while (1) { + ino_t xattr_inum; + int err; + + xent = ubifs_tnc_next_ent(c, &key1, &nm); + if (IS_ERR(xent)) { + err = PTR_ERR(xent); + if (err == -ENOENT) + break; + return err; + } + + xattr_inum = le64_to_cpu(xent->inum); + dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum); + + nm.name = xent->name; + nm.len = le16_to_cpu(xent->nlen); + err = ubifs_tnc_remove_nm(c, &key1, &nm); + if (err) { + kfree(xent); + return err; + } + + lowest_ino_key(c, &key1, xattr_inum); + highest_ino_key(c, &key2, xattr_inum); + err = ubifs_tnc_remove_range(c, &key1, &key2); + if (err) { + kfree(xent); + return err; + } + + kfree(pxent); + pxent = xent; + key_read(c, &xent->key, &key1); + } + + kfree(pxent); + lowest_ino_key(c, &key1, inum); + highest_ino_key(c, &key2, inum); + + return ubifs_tnc_remove_range(c, &key1, &key2); +} + +/** + * ubifs_tnc_next_ent - walk directory or extended attribute entries. + * @c: UBIFS file-system description object + * @key: key of last entry + * @nm: name of last entry found or %NULL + * + * This function finds and reads the next directory or extended attribute entry + * after the given key (@key) if there is one. @nm is used to resolve + * collisions. + * + * If the name of the current entry is not known and only the key is known, + * @nm->name has to be %NULL. In this case the semantics of this function is a + * little bit different and it returns the entry corresponding to this key, not + * the next one. If the key was not found, the closest "right" entry is + * returned. + * + * If the fist entry has to be found, @key has to contain the lowest possible + * key value for this inode and @name has to be %NULL. + * + * This function returns the found directory or extended attribute entry node + * in case of success, %-ENOENT is returned if no entry was found, and a + * negative error code is returned in case of failure. + */ +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + union ubifs_key *key, + const struct qstr *nm) +{ + int n, err, type = key_type(c, key); + struct ubifs_znode *znode; + struct ubifs_dent_node *dent; + struct ubifs_zbranch *zbr; + union ubifs_key *dkey; + + dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); + ubifs_assert(is_hash_key(c, key)); + + mutex_lock(&c->tnc_mutex); + err = ubifs_lookup_level0(c, key, &znode, &n); + if (unlikely(err < 0)) + goto out_unlock; + + if (nm->name) { + if (err) { + /* Handle collisions */ + err = resolve_collision(c, key, &znode, &n, nm); + dbg_tnc("rc returned %d, znode %p, n %d", + err, znode, n); + if (unlikely(err < 0)) + goto out_unlock; + } + + /* Now find next entry */ + err = tnc_next(c, &znode, &n); + if (unlikely(err)) + goto out_unlock; + } else { + /* + * The full name of the entry was not given, in which case the + * behavior of this function is a little different and it + * returns current entry, not the next one. + */ + if (!err) { + /* + * However, the given key does not exist in the TNC + * tree and @znode/@n variables contain the closest + * "preceding" element. Switch to the next one. + */ + err = tnc_next(c, &znode, &n); + if (err) + goto out_unlock; + } + } + + zbr = &znode->zbranch[n]; + dent = kmalloc(zbr->len, GFP_NOFS); + if (unlikely(!dent)) { + err = -ENOMEM; + goto out_unlock; + } + + /* + * The above 'tnc_next()' call could lead us to the next inode, check + * this. + */ + dkey = &zbr->key; + if (key_inum(c, dkey) != key_inum(c, key) || + key_type(c, dkey) != type) { + err = -ENOENT; + goto out_free; + } + + err = tnc_read_node_nm(c, zbr, dent); + if (unlikely(err)) + goto out_free; + + mutex_unlock(&c->tnc_mutex); + return dent; + +out_free: + kfree(dent); +out_unlock: + mutex_unlock(&c->tnc_mutex); + return ERR_PTR(err); +} + +/** + * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit. + * @c: UBIFS file-system description object + * + * Destroy left-over obsolete znodes from a failed commit. + */ +static void tnc_destroy_cnext(struct ubifs_info *c) +{ + struct ubifs_znode *cnext; + + if (!c->cnext) + return; + ubifs_assert(c->cmt_state == COMMIT_BROKEN); + cnext = c->cnext; + do { + struct ubifs_znode *znode = cnext; + + cnext = cnext->cnext; + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + kfree(znode); + } while (cnext && cnext != c->cnext); +} + +/** + * ubifs_tnc_close - close TNC subsystem and free all related resources. + * @c: UBIFS file-system description object + */ +void ubifs_tnc_close(struct ubifs_info *c) +{ + long clean_freed; + + tnc_destroy_cnext(c); + if (c->zroot.znode) { + clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); + atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); + } + kfree(c->gap_lebs); + kfree(c->ilebs); + destroy_old_idx(c); +} + +/** + * left_znode - get the znode to the left. + * @c: UBIFS file-system description object + * @znode: znode + * + * This function returns a pointer to the znode to the left of @znode or NULL if + * there is not one. A negative error code is returned on failure. + */ +static struct ubifs_znode *left_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int level = znode->level; + + while (1) { + int n = znode->iip - 1; + + /* Go up until we can go left */ + znode = znode->parent; + if (!znode) + return NULL; + if (n >= 0) { + /* Now go down the rightmost branch to 'level' */ + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + while (znode->level != level) { + n = znode->child_cnt - 1; + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + } + break; + } + } + return znode; +} + +/** + * right_znode - get the znode to the right. + * @c: UBIFS file-system description object + * @znode: znode + * + * This function returns a pointer to the znode to the right of @znode or NULL + * if there is not one. A negative error code is returned on failure. + */ +static struct ubifs_znode *right_znode(struct ubifs_info *c, + struct ubifs_znode *znode) +{ + int level = znode->level; + + while (1) { + int n = znode->iip + 1; + + /* Go up until we can go right */ + znode = znode->parent; + if (!znode) + return NULL; + if (n < znode->child_cnt) { + /* Now go down the leftmost branch to 'level' */ + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + while (znode->level != level) { + znode = get_znode(c, znode, 0); + if (IS_ERR(znode)) + return znode; + } + break; + } + } + return znode; +} + +/** + * lookup_znode - find a particular indexing node from TNC. + * @c: UBIFS file-system description object + * @key: index node key to lookup + * @level: index node level + * @lnum: index node LEB number + * @offs: index node offset + * + * This function searches an indexing node by its first key @key and its + * address @lnum:@offs. It looks up the indexing tree by pulling all indexing + * nodes it traverses to TNC. This function is called fro indexing nodes which + * were found on the media by scanning, for example when garbage-collecting or + * when doing in-the-gaps commit. This means that the indexing node which is + * looked for does not have to have exactly the same leftmost key @key, because + * the leftmost key may have been changed, in which case TNC will contain a + * dirty znode which still refers the same @lnum:@offs. This function is clever + * enough to recognize such indexing nodes. + * + * Note, if a znode was deleted or changed too much, then this function will + * not find it. For situations like this UBIFS has the old index RB-tree + * (indexed by @lnum:@offs). + * + * This function returns a pointer to the znode found or %NULL if it is not + * found. A negative error code is returned on failure. + */ +static struct ubifs_znode *lookup_znode(struct ubifs_info *c, + union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode, *zn; + int n, nn; + + /* + * The arguments have probably been read off flash, so don't assume + * they are valid. + */ + if (level < 0) + return ERR_PTR(-EINVAL); + + /* Get the root znode */ + znode = c->zroot.znode; + if (!znode) { + znode = ubifs_load_znode(c, &c->zroot, NULL, 0); + if (IS_ERR(znode)) + return znode; + } + /* Check if it is the one we are looking for */ + if (c->zroot.lnum == lnum && c->zroot.offs == offs) + return znode; + /* Descend to the parent level i.e. (level + 1) */ + if (level >= znode->level) + return NULL; + while (1) { + ubifs_search_zbranch(c, znode, key, &n); + if (n < 0) { + /* + * We reached a znode where the leftmost key is greater + * than the key we are searching for. This is the same + * situation as the one described in a huge comment at + * the end of the 'ubifs_lookup_level0()' function. And + * for exactly the same reasons we have to try to look + * left before giving up. + */ + znode = left_znode(c, znode); + if (!znode) + return NULL; + if (IS_ERR(znode)) + return znode; + ubifs_search_zbranch(c, znode, key, &n); + ubifs_assert(n >= 0); + } + if (znode->level == level + 1) + break; + znode = get_znode(c, znode, n); + if (IS_ERR(znode)) + return znode; + } + /* Check if the child is the one we are looking for */ + if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* If the key is unique, there is nowhere else to look */ + if (!is_hash_key(c, key)) + return NULL; + /* + * The key is not unique and so may be also in the znodes to either + * side. + */ + zn = znode; + nn = n; + /* Look left */ + while (1) { + /* Move one branch to the left */ + if (n) + n -= 1; + else { + znode = left_znode(c, znode); + if (!znode) + break; + if (IS_ERR(znode)) + return znode; + n = znode->child_cnt - 1; + } + /* Check it */ + if (znode->zbranch[n].lnum == lnum && + znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* Stop if the key is less than the one we are looking for */ + if (keys_cmp(c, &znode->zbranch[n].key, key) < 0) + break; + } + /* Back to the middle */ + znode = zn; + n = nn; + /* Look right */ + while (1) { + /* Move one branch to the right */ + if (++n >= znode->child_cnt) { + znode = right_znode(c, znode); + if (!znode) + break; + if (IS_ERR(znode)) + return znode; + n = 0; + } + /* Check it */ + if (znode->zbranch[n].lnum == lnum && + znode->zbranch[n].offs == offs) + return get_znode(c, znode, n); + /* Stop if the key is greater than the one we are looking for */ + if (keys_cmp(c, &znode->zbranch[n].key, key) > 0) + break; + } + return NULL; +} + +/** + * is_idx_node_in_tnc - determine if an index node is in the TNC. + * @c: UBIFS file-system description object + * @key: key of index node + * @level: index node level + * @lnum: LEB number of index node + * @offs: offset of index node + * + * This function returns %0 if the index node is not referred to in the TNC, %1 + * if the index node is referred to in the TNC and the corresponding znode is + * dirty, %2 if an index node is referred to in the TNC and the corresponding + * znode is clean, and a negative error code in case of failure. + * + * Note, the @key argument has to be the key of the first child. Also note, + * this function relies on the fact that 0:0 is never a valid LEB number and + * offset for a main-area node. + */ +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode; + + znode = lookup_znode(c, key, level, lnum, offs); + if (!znode) + return 0; + if (IS_ERR(znode)) + return PTR_ERR(znode); + + return ubifs_zn_dirty(znode) ? 1 : 2; +} + +/** + * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC. + * @c: UBIFS file-system description object + * @key: node key + * @lnum: node LEB number + * @offs: node offset + * + * This function returns %1 if the node is referred to in the TNC, %0 if it is + * not, and a negative error code in case of failure. + * + * Note, this function relies on the fact that 0:0 is never a valid LEB number + * and offset for a main-area node. + */ +static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, + int lnum, int offs) +{ + struct ubifs_zbranch *zbr; + struct ubifs_znode *znode, *zn; + int n, found, err, nn; + const int unique = !is_hash_key(c, key); + + found = ubifs_lookup_level0(c, key, &znode, &n); + if (found < 0) + return found; /* Error code */ + if (!found) + return 0; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + if (unique) + return 0; + /* + * Because the key is not unique, we have to look left + * and right as well + */ + zn = znode; + nn = n; + /* Look left */ + while (1) { + err = tnc_prev(c, &znode, &n); + if (err == -ENOENT) + break; + if (err) + return err; + if (keys_cmp(c, key, &znode->zbranch[n].key)) + break; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + } + /* Look right */ + znode = zn; + n = nn; + while (1) { + err = tnc_next(c, &znode, &n); + if (err) { + if (err == -ENOENT) + return 0; + return err; + } + if (keys_cmp(c, key, &znode->zbranch[n].key)) + break; + zbr = &znode->zbranch[n]; + if (lnum == zbr->lnum && offs == zbr->offs) + return 1; /* Found it */ + } + return 0; +} + +/** + * ubifs_tnc_has_node - determine whether a node is in the TNC. + * @c: UBIFS file-system description object + * @key: node key + * @level: index node level (if it is an index node) + * @lnum: node LEB number + * @offs: node offset + * @is_idx: non-zero if the node is an index node + * + * This function returns %1 if the node is in the TNC, %0 if it is not, and a + * negative error code in case of failure. For index nodes, @key has to be the + * key of the first child. An index node is considered to be in the TNC only if + * the corresponding znode is clean or has not been loaded. + */ +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs, int is_idx) +{ + int err; + + mutex_lock(&c->tnc_mutex); + if (is_idx) { + err = is_idx_node_in_tnc(c, key, level, lnum, offs); + if (err < 0) + goto out_unlock; + if (err == 1) + /* The index node was found but it was dirty */ + err = 0; + else if (err == 2) + /* The index node was found and it was clean */ + err = 1; + else + BUG_ON(err != 0); + } else + err = is_leaf_node_in_tnc(c, key, lnum, offs); + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * ubifs_dirty_idx_node - dirty an index node. + * @c: UBIFS file-system description object + * @key: index node key + * @level: index node level + * @lnum: index node LEB number + * @offs: index node offset + * + * This function loads and dirties an index node so that it can be garbage + * collected. The @key argument has to be the key of the first child. This + * function relies on the fact that 0:0 is never a valid LEB number and offset + * for a main-area node. Returns %0 on success and a negative error code on + * failure. + */ +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs) +{ + struct ubifs_znode *znode; + int err = 0; + + mutex_lock(&c->tnc_mutex); + znode = lookup_znode(c, key, level, lnum, offs); + if (!znode) + goto out_unlock; + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c new file mode 100644 index 000000000000..8117e65ba2e9 --- /dev/null +++ b/fs/ubifs/tnc_commit.c @@ -0,0 +1,1103 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* This file implements TNC functions for committing */ + +#include "ubifs.h" + +/** + * make_idx_node - make an index node for fill-the-gaps method of TNC commit. + * @c: UBIFS file-system description object + * @idx: buffer in which to place new index node + * @znode: znode from which to make new index node + * @lnum: LEB number where new index node will be written + * @offs: offset where new index node will be written + * @len: length of new index node + */ +static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, + struct ubifs_znode *znode, int lnum, int offs, int len) +{ + struct ubifs_znode *zp; + int i, err; + + /* Make index node */ + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(znode->child_cnt); + idx->level = cpu_to_le16(znode->level); + for (i = 0; i < znode->child_cnt; i++) { + struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_write_idx(c, &zbr->key, &br->key); + br->lnum = cpu_to_le32(zbr->lnum); + br->offs = cpu_to_le32(zbr->offs); + br->len = cpu_to_le32(zbr->len); + if (!zbr->lnum || !zbr->len) { + ubifs_err("bad ref in znode"); + dbg_dump_znode(c, znode); + if (zbr->znode) + dbg_dump_znode(c, zbr->znode); + } + } + ubifs_prepare_node(c, idx, len, 0); + +#ifdef CONFIG_UBIFS_FS_DEBUG + znode->lnum = lnum; + znode->offs = offs; + znode->len = len; +#endif + + err = insert_old_idx_znode(c, znode); + + /* Update the parent */ + zp = znode->parent; + if (zp) { + struct ubifs_zbranch *zbr; + + zbr = &zp->zbranch[znode->iip]; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else { + c->zroot.lnum = lnum; + c->zroot.offs = offs; + c->zroot.len = len; + } + c->calc_idx_sz += ALIGN(len, 8); + + atomic_long_dec(&c->dirty_zn_cnt); + + ubifs_assert(ubifs_zn_dirty(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + + __clear_bit(DIRTY_ZNODE, &znode->flags); + __clear_bit(COW_ZNODE, &znode->flags); + + return err; +} + +/** + * fill_gap - make index nodes in gaps in dirty index LEBs. + * @c: UBIFS file-system description object + * @lnum: LEB number that gap appears in + * @gap_start: offset of start of gap + * @gap_end: offset of end of gap + * @dirt: adds dirty space to this + * + * This function returns the number of index nodes written into the gap. + */ +static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end, + int *dirt) +{ + int len, gap_remains, gap_pos, written, pad_len; + + ubifs_assert((gap_start & 7) == 0); + ubifs_assert((gap_end & 7) == 0); + ubifs_assert(gap_end >= gap_start); + + gap_remains = gap_end - gap_start; + if (!gap_remains) + return 0; + gap_pos = gap_start; + written = 0; + while (c->enext) { + len = ubifs_idx_node_sz(c, c->enext->child_cnt); + if (len < gap_remains) { + struct ubifs_znode *znode = c->enext; + const int alen = ALIGN(len, 8); + int err; + + ubifs_assert(alen <= gap_remains); + err = make_idx_node(c, c->ileb_buf + gap_pos, znode, + lnum, gap_pos, len); + if (err) + return err; + gap_remains -= alen; + gap_pos += alen; + c->enext = znode->cnext; + if (c->enext == c->cnext) + c->enext = NULL; + written += 1; + } else + break; + } + if (gap_end == c->leb_size) { + c->ileb_len = ALIGN(gap_pos, c->min_io_size); + /* Pad to end of min_io_size */ + pad_len = c->ileb_len - gap_pos; + } else + /* Pad to end of gap */ + pad_len = gap_remains; + dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d", + lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len); + ubifs_pad(c, c->ileb_buf + gap_pos, pad_len); + *dirt += pad_len; + return written; +} + +/** + * find_old_idx - find an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %1 if found and %0 otherwise. + */ +static int find_old_idx(struct ubifs_info *c, int lnum, int offs) +{ + struct ubifs_old_idx *o; + struct rb_node *p; + + p = c->old_idx.rb_node; + while (p) { + o = rb_entry(p, struct ubifs_old_idx, rb); + if (lnum < o->lnum) + p = p->rb_left; + else if (lnum > o->lnum) + p = p->rb_right; + else if (offs < o->offs) + p = p->rb_left; + else if (offs > o->offs) + p = p->rb_right; + else + return 1; + } + return 0; +} + +/** + * is_idx_node_in_use - determine if an index node can be overwritten. + * @c: UBIFS file-system description object + * @key: key of index node + * @level: index node level + * @lnum: LEB number of index node + * @offs: offset of index node + * + * If @key / @lnum / @offs identify an index node that was not part of the old + * index, then this function returns %0 (obsolete). Else if the index node was + * part of the old index but is now dirty %1 is returned, else if it is clean %2 + * is returned. A negative error code is returned on failure. + */ +static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, + int level, int lnum, int offs) +{ + int ret; + + ret = is_idx_node_in_tnc(c, key, level, lnum, offs); + if (ret < 0) + return ret; /* Error code */ + if (ret == 0) + if (find_old_idx(c, lnum, offs)) + return 1; + return ret; +} + +/** + * layout_leb_in_gaps - layout index nodes using in-the-gaps method. + * @c: UBIFS file-system description object + * @p: return LEB number here + * + * This function lays out new index nodes for dirty znodes using in-the-gaps + * method of TNC commit. + * This function merely puts the next znode into the next gap, making no attempt + * to try to maximise the number of znodes that fit. + * This function returns the number of index nodes written into the gaps, or a + * negative error code on failure. + */ +static int layout_leb_in_gaps(struct ubifs_info *c, int *p) +{ + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written; + + tot_written = 0; + /* Get an index LEB with lots of obsolete index nodes */ + lnum = ubifs_find_dirty_idx_leb(c); + if (lnum < 0) + /* + * There also may be dirt in the index head that could be + * filled, however we do not check there at present. + */ + return lnum; /* Error code */ + *p = lnum; + dbg_gc("LEB %d", lnum); + /* + * Scan the index LEB. We use the generic scan for this even though + * it is more comprehensive and less efficient than is needed for this + * purpose. + */ + sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); + c->ileb_len = 0; + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + gap_start = 0; + list_for_each_entry(snod, &sleb->nodes, list) { + struct ubifs_idx_node *idx; + int in_use, level; + + ubifs_assert(snod->type == UBIFS_IDX_NODE); + idx = snod->node; + key_read(c, ubifs_idx_key(c, idx), &snod->key); + level = le16_to_cpu(idx->level); + /* Determine if the index node is in use (not obsolete) */ + in_use = is_idx_node_in_use(c, &snod->key, level, lnum, + snod->offs); + if (in_use < 0) { + ubifs_scan_destroy(sleb); + return in_use; /* Error code */ + } + if (in_use) { + if (in_use == 1) + dirt += ALIGN(snod->len, 8); + /* + * The obsolete index nodes form gaps that can be + * overwritten. This gap has ended because we have + * found an index node that is still in use + * i.e. not obsolete + */ + gap_end = snod->offs; + /* Try to fill gap */ + written = fill_gap(c, lnum, gap_start, gap_end, &dirt); + if (written < 0) { + ubifs_scan_destroy(sleb); + return written; /* Error code */ + } + tot_written += written; + gap_start = ALIGN(snod->offs + snod->len, 8); + } + } + ubifs_scan_destroy(sleb); + c->ileb_len = c->leb_size; + gap_end = c->leb_size; + /* Try to fill gap */ + written = fill_gap(c, lnum, gap_start, gap_end, &dirt); + if (written < 0) + return written; /* Error code */ + tot_written += written; + if (tot_written == 0) { + struct ubifs_lprops lp; + + dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); + err = ubifs_read_one_lp(c, lnum, &lp); + if (err) + return err; + if (lp.free == c->leb_size) { + /* + * We must have snatched this LEB from the idx_gc list + * so we need to correct the free and dirty space. + */ + err = ubifs_change_one_lp(c, lnum, + c->leb_size - c->ileb_len, + dirt, 0, 0, 0); + if (err) + return err; + } + return 0; + } + err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt, + 0, 0, 0); + if (err) + return err; + err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, + UBI_SHORTTERM); + if (err) + return err; + dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); + return tot_written; +} + +/** + * get_leb_cnt - calculate the number of empty LEBs needed to commit. + * @c: UBIFS file-system description object + * @cnt: number of znodes to commit + * + * This function returns the number of empty LEBs needed to commit @cnt znodes + * to the current index head. The number is not exact and may be more than + * needed. + */ +static int get_leb_cnt(struct ubifs_info *c, int cnt) +{ + int d; + + /* Assume maximum index node size (i.e. overestimate space needed) */ + cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz; + if (cnt < 0) + cnt = 0; + d = c->leb_size / c->max_idx_node_sz; + return DIV_ROUND_UP(cnt, d); +} + +/** + * layout_in_gaps - in-the-gaps method of committing TNC. + * @c: UBIFS file-system description object + * @cnt: number of dirty znodes to commit. + * + * This function lays out new index nodes for dirty znodes using in-the-gaps + * method of TNC commit. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_in_gaps(struct ubifs_info *c, int cnt) +{ + int err, leb_needed_cnt, written, *p; + + dbg_gc("%d znodes to write", cnt); + + c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS); + if (!c->gap_lebs) + return -ENOMEM; + + p = c->gap_lebs; + do { + ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); + written = layout_leb_in_gaps(c, p); + if (written < 0) { + err = written; + if (err == -ENOSPC) { + if (!dbg_force_in_the_gaps_enabled) { + /* + * Do not print scary warnings if the + * debugging option which forces + * in-the-gaps is enabled. + */ + ubifs_err("out of space"); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ + err = 0; + break; + } + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return err; + } + p++; + cnt -= written; + leb_needed_cnt = get_leb_cnt(c, cnt); + dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, + leb_needed_cnt, c->ileb_cnt); + } while (leb_needed_cnt > c->ileb_cnt); + + *p = -1; + return 0; +} + +/** + * layout_in_empty_space - layout index nodes in empty space. + * @c: UBIFS file-system description object + * + * This function lays out new index nodes for dirty znodes using empty LEBs. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_in_empty_space(struct ubifs_info *c) +{ + struct ubifs_znode *znode, *cnext, *zp; + int lnum, offs, len, next_len, buf_len, buf_offs, used, avail; + int wlen, blen, err; + + cnext = c->enext; + if (!cnext) + return 0; + + lnum = c->ihead_lnum; + buf_offs = c->ihead_offs; + + buf_len = ubifs_idx_node_sz(c, c->fanout); + buf_len = ALIGN(buf_len, c->min_io_size); + used = 0; + avail = buf_len; + + /* Ensure there is enough room for first write */ + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + if (buf_offs + next_len > c->leb_size) + lnum = -1; + + while (1) { + znode = cnext; + + len = ubifs_idx_node_sz(c, znode->child_cnt); + + /* Determine the index node position */ + if (lnum == -1) { + if (c->ileb_nxt >= c->ileb_cnt) { + ubifs_err("out of space"); + return -ENOSPC; + } + lnum = c->ilebs[c->ileb_nxt++]; + buf_offs = 0; + used = 0; + avail = buf_len; + } + + offs = buf_offs + used; + +#ifdef CONFIG_UBIFS_FS_DEBUG + znode->lnum = lnum; + znode->offs = offs; + znode->len = len; +#endif + + /* Update the parent */ + zp = znode->parent; + if (zp) { + struct ubifs_zbranch *zbr; + int i; + + i = znode->iip; + zbr = &zp->zbranch[i]; + zbr->lnum = lnum; + zbr->offs = offs; + zbr->len = len; + } else { + c->zroot.lnum = lnum; + c->zroot.offs = offs; + c->zroot.len = len; + } + c->calc_idx_sz += ALIGN(len, 8); + + /* + * Once lprops is updated, we can decrease the dirty znode count + * but it is easier to just do it here. + */ + atomic_long_dec(&c->dirty_zn_cnt); + + /* + * Calculate the next index node length to see if there is + * enough room for it + */ + cnext = znode->cnext; + if (cnext == c->cnext) + next_len = 0; + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + + if (c->min_io_size == 1) { + buf_offs += ALIGN(len, 8); + if (next_len) { + if (buf_offs + next_len <= c->leb_size) + continue; + err = ubifs_update_one_lp(c, lnum, 0, + c->leb_size - buf_offs, 0, 0); + if (err) + return err; + lnum = -1; + continue; + } + err = ubifs_update_one_lp(c, lnum, + c->leb_size - buf_offs, 0, 0, 0); + if (err) + return err; + break; + } + + /* Update buffer positions */ + wlen = used + len; + used += ALIGN(len, 8); + avail -= ALIGN(len, 8); + + if (next_len != 0 && + buf_offs + used + next_len <= c->leb_size && + avail > 0) + continue; + + if (avail <= 0 && next_len && + buf_offs + used + next_len <= c->leb_size) + blen = buf_len; + else + blen = ALIGN(wlen, c->min_io_size); + + /* The buffer is full or there are no more znodes to do */ + buf_offs += blen; + if (next_len) { + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + c->leb_size - buf_offs, blen - used, + 0, 0); + if (err) + return err; + lnum = -1; + } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + continue; + } + err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs, + blen - used, 0, 0); + if (err) + return err; + break; + } + +#ifdef CONFIG_UBIFS_FS_DEBUG + c->new_ihead_lnum = lnum; + c->new_ihead_offs = buf_offs; +#endif + + return 0; +} + +/** + * layout_commit - determine positions of index nodes to commit. + * @c: UBIFS file-system description object + * @no_space: indicates that insufficient empty LEBs were allocated + * @cnt: number of znodes to commit + * + * Calculate and update the positions of index nodes to commit. If there were + * an insufficient number of empty LEBs allocated, then index nodes are placed + * into the gaps created by obsolete index nodes in non-empty index LEBs. For + * this purpose, an obsolete index node is one that was not in the index as at + * the end of the last commit. To write "in-the-gaps" requires that those index + * LEBs are updated atomically in-place. + */ +static int layout_commit(struct ubifs_info *c, int no_space, int cnt) +{ + int err; + + if (no_space) { + err = layout_in_gaps(c, cnt); + if (err) + return err; + } + err = layout_in_empty_space(c); + return err; +} + +/** + * find_first_dirty - find first dirty znode. + * @znode: znode to begin searching from + */ +static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode) +{ + int i, cont; + + if (!znode) + return NULL; + + while (1) { + if (znode->level == 0) { + if (ubifs_zn_dirty(znode)) + return znode; + return NULL; + } + cont = 0; + for (i = 0; i < znode->child_cnt; i++) { + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + if (zbr->znode && ubifs_zn_dirty(zbr->znode)) { + znode = zbr->znode; + cont = 1; + break; + } + } + if (!cont) { + if (ubifs_zn_dirty(znode)) + return znode; + return NULL; + } + } +} + +/** + * find_next_dirty - find next dirty znode. + * @znode: znode to begin searching from + */ +static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode) +{ + int n = znode->iip + 1; + + znode = znode->parent; + if (!znode) + return NULL; + for (; n < znode->child_cnt; n++) { + struct ubifs_zbranch *zbr = &znode->zbranch[n]; + + if (zbr->znode && ubifs_zn_dirty(zbr->znode)) + return find_first_dirty(zbr->znode); + } + return znode; +} + +/** + * get_znodes_to_commit - create list of dirty znodes to commit. + * @c: UBIFS file-system description object + * + * This function returns the number of znodes to commit. + */ +static int get_znodes_to_commit(struct ubifs_info *c) +{ + struct ubifs_znode *znode, *cnext; + int cnt = 0; + + c->cnext = find_first_dirty(c->zroot.znode); + znode = c->enext = c->cnext; + if (!znode) { + dbg_cmt("no znodes to commit"); + return 0; + } + cnt += 1; + while (1) { + ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); + __set_bit(COW_ZNODE, &znode->flags); + znode->alt = 0; + cnext = find_next_dirty(znode); + if (!cnext) { + znode->cnext = c->cnext; + break; + } + znode->cnext = cnext; + znode = cnext; + cnt += 1; + } + dbg_cmt("committing %d znodes", cnt); + ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt)); + return cnt; +} + +/** + * alloc_idx_lebs - allocate empty LEBs to be used to commit. + * @c: UBIFS file-system description object + * @cnt: number of znodes to commit + * + * This function returns %-ENOSPC if it cannot allocate a sufficient number of + * empty LEBs. %0 is returned on success, otherwise a negative error code + * is returned. + */ +static int alloc_idx_lebs(struct ubifs_info *c, int cnt) +{ + int i, leb_cnt, lnum; + + c->ileb_cnt = 0; + c->ileb_nxt = 0; + leb_cnt = get_leb_cnt(c, cnt); + dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt); + if (!leb_cnt) + return 0; + c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS); + if (!c->ilebs) + return -ENOMEM; + for (i = 0; i < leb_cnt; i++) { + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) + return lnum; + c->ilebs[c->ileb_cnt++] = lnum; + dbg_cmt("LEB %d", lnum); + } + if (dbg_force_in_the_gaps()) + return -ENOSPC; + return 0; +} + +/** + * free_unused_idx_lebs - free unused LEBs that were allocated for the commit. + * @c: UBIFS file-system description object + * + * It is possible that we allocate more empty LEBs for the commit than we need. + * This functions frees the surplus. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int free_unused_idx_lebs(struct ubifs_info *c) +{ + int i, err = 0, lnum, er; + + for (i = c->ileb_nxt; i < c->ileb_cnt; i++) { + lnum = c->ilebs[i]; + dbg_cmt("LEB %d", lnum); + er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX | LPROPS_TAKEN, 0); + if (!err) + err = er; + } + return err; +} + +/** + * free_idx_lebs - free unused LEBs after commit end. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int free_idx_lebs(struct ubifs_info *c) +{ + int err; + + err = free_unused_idx_lebs(c); + kfree(c->ilebs); + c->ilebs = NULL; + return err; +} + +/** + * ubifs_tnc_start_commit - start TNC commit. + * @c: UBIFS file-system description object + * @zroot: new index root position is returned here + * + * This function prepares the list of indexing nodes to commit and lays out + * their positions on flash. If there is not enough free space it uses the + * in-gap commit method. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ + int err = 0, cnt; + + mutex_lock(&c->tnc_mutex); + err = dbg_check_tnc(c, 1); + if (err) + goto out; + cnt = get_znodes_to_commit(c); + if (cnt != 0) { + int no_space = 0; + + err = alloc_idx_lebs(c, cnt); + if (err == -ENOSPC) + no_space = 1; + else if (err) + goto out_free; + err = layout_commit(c, no_space, cnt); + if (err) + goto out_free; + ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); + err = free_unused_idx_lebs(c); + if (err) + goto out; + } + destroy_old_idx(c); + memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch)); + + err = ubifs_save_dirty_idx_lnums(c); + if (err) + goto out; + + spin_lock(&c->space_lock); + /* + * Although we have not finished committing yet, update size of the + * committed index ('c->old_idx_sz') and zero out the index growth + * budget. It is OK to do this now, because we've reserved all the + * space which is needed to commit the index, and it is save for the + * budgeting subsystem to assume the index is already committed, + * even though it is not. + */ + c->old_idx_sz = c->calc_idx_sz; + c->budg_uncommitted_idx = 0; + spin_unlock(&c->space_lock); + mutex_unlock(&c->tnc_mutex); + + dbg_cmt("number of index LEBs %d", c->lst.idx_lebs); + dbg_cmt("size of index %llu", c->calc_idx_sz); + return err; + +out_free: + free_idx_lebs(c); +out: + mutex_unlock(&c->tnc_mutex); + return err; +} + +/** + * write_index - write index nodes. + * @c: UBIFS file-system description object + * + * This function writes the index nodes whose positions were laid out in the + * layout_in_empty_space function. + */ +static int write_index(struct ubifs_info *c) +{ + struct ubifs_idx_node *idx; + struct ubifs_znode *znode, *cnext; + int i, lnum, offs, len, next_len, buf_len, buf_offs, used; + int avail, wlen, err, lnum_pos = 0; + + cnext = c->enext; + if (!cnext) + return 0; + + /* + * Always write index nodes to the index head so that index nodes and + * other types of nodes are never mixed in the same erase block. + */ + lnum = c->ihead_lnum; + buf_offs = c->ihead_offs; + + /* Allocate commit buffer */ + buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size); + used = 0; + avail = buf_len; + + /* Ensure there is enough room for first write */ + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0, + LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + + while (1) { + cond_resched(); + + znode = cnext; + idx = c->cbuf + used; + + /* Make index node */ + idx->ch.node_type = UBIFS_IDX_NODE; + idx->child_cnt = cpu_to_le16(znode->child_cnt); + idx->level = cpu_to_le16(znode->level); + for (i = 0; i < znode->child_cnt; i++) { + struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_write_idx(c, &zbr->key, &br->key); + br->lnum = cpu_to_le32(zbr->lnum); + br->offs = cpu_to_le32(zbr->offs); + br->len = cpu_to_le32(zbr->len); + if (!zbr->lnum || !zbr->len) { + ubifs_err("bad ref in znode"); + dbg_dump_znode(c, znode); + if (zbr->znode) + dbg_dump_znode(c, zbr->znode); + } + } + len = ubifs_idx_node_sz(c, znode->child_cnt); + ubifs_prepare_node(c, idx, len, 0); + + /* Determine the index node position */ + if (lnum == -1) { + lnum = c->ilebs[lnum_pos++]; + buf_offs = 0; + used = 0; + avail = buf_len; + } + offs = buf_offs + used; + +#ifdef CONFIG_UBIFS_FS_DEBUG + if (lnum != znode->lnum || offs != znode->offs || + len != znode->len) { + ubifs_err("inconsistent znode posn"); + return -EINVAL; + } +#endif + + /* Grab some stuff from znode while we still can */ + cnext = znode->cnext; + + ubifs_assert(ubifs_zn_dirty(znode)); + ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + + /* + * It is important that other threads should see %DIRTY_ZNODE + * flag cleared before %COW_ZNODE. Specifically, it matters in + * the 'dirty_cow_znode()' function. This is the reason for the + * first barrier. Also, we want the bit changes to be seen to + * other threads ASAP, to avoid unnecesarry copying, which is + * the reason for the second barrier. + */ + clear_bit(DIRTY_ZNODE, &znode->flags); + smp_mb__before_clear_bit(); + clear_bit(COW_ZNODE, &znode->flags); + smp_mb__after_clear_bit(); + + /* Do not access znode from this point on */ + + /* Update buffer positions */ + wlen = used + len; + used += ALIGN(len, 8); + avail -= ALIGN(len, 8); + + /* + * Calculate the next index node length to see if there is + * enough room for it + */ + if (cnext == c->cnext) + next_len = 0; + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + + if (c->min_io_size == 1) { + /* + * Write the prepared index node immediately if there is + * no minimum IO size + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + wlen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += ALIGN(wlen, 8); + if (next_len) { + used = 0; + avail = buf_len; + if (buf_offs + next_len > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + continue; + } + } else { + int blen, nxt_offs = buf_offs + used + next_len; + + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) + continue; + else + blen = buf_len; + } else { + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + /* + * The buffer is full or there are no more znodes + * to do + */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, + blen, UBI_SHORTTERM); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, + LPROPS_NC, 0, 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; + } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; + } + } + break; + } + +#ifdef CONFIG_UBIFS_FS_DEBUG + if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { + ubifs_err("inconsistent ihead"); + return -EINVAL; + } +#endif + + c->ihead_lnum = lnum; + c->ihead_offs = buf_offs; + + return 0; +} + +/** + * free_obsolete_znodes - free obsolete znodes. + * @c: UBIFS file-system description object + * + * At the end of commit end, obsolete znodes are freed. + */ +static void free_obsolete_znodes(struct ubifs_info *c) +{ + struct ubifs_znode *znode, *cnext; + + cnext = c->cnext; + do { + znode = cnext; + cnext = znode->cnext; + if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + kfree(znode); + else { + znode->cnext = NULL; + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } + } while (cnext != c->cnext); +} + +/** + * return_gap_lebs - return LEBs used by the in-gap commit method. + * @c: UBIFS file-system description object + * + * This function clears the "taken" flag for the LEBs which were used by the + * "commit in-the-gaps" method. + */ +static int return_gap_lebs(struct ubifs_info *c) +{ + int *p, err; + + if (!c->gap_lebs) + return 0; + + dbg_cmt(""); + for (p = c->gap_lebs; *p != -1; p++) { + err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0, + LPROPS_TAKEN, 0); + if (err) + return err; + } + + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return 0; +} + +/** + * ubifs_tnc_end_commit - update the TNC for commit end. + * @c: UBIFS file-system description object + * + * Write the dirty znodes. + */ +int ubifs_tnc_end_commit(struct ubifs_info *c) +{ + int err; + + if (!c->cnext) + return 0; + + err = return_gap_lebs(c); + if (err) + return err; + + err = write_index(c); + if (err) + return err; + + mutex_lock(&c->tnc_mutex); + + dbg_cmt("TNC height is %d", c->zroot.znode->level + 1); + + free_obsolete_znodes(c); + + c->cnext = NULL; + kfree(c->ilebs); + c->ilebs = NULL; + + mutex_unlock(&c->tnc_mutex); + + return 0; +} diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c new file mode 100644 index 000000000000..a25c1cc1f8d9 --- /dev/null +++ b/fs/ubifs/tnc_misc.c @@ -0,0 +1,494 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + * Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains miscelanious TNC-related functions shared betweend + * different files. This file does not form any logically separate TNC + * sub-system. The file was created because there is a lot of TNC code and + * putting it all in one file would make that file too big and unreadable. + */ + +#include "ubifs.h" + +/** + * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. + * @zr: root of the subtree to traverse + * @znode: previous znode + * + * This function implements levelorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, + struct ubifs_znode *znode) +{ + int level, iip, level_search = 0; + struct ubifs_znode *zn; + + ubifs_assert(zr); + + if (unlikely(!znode)) + return zr; + + if (unlikely(znode == zr)) { + if (znode->level == 0) + return NULL; + return ubifs_tnc_find_child(zr, 0); + } + + level = znode->level; + + iip = znode->iip; + while (1) { + ubifs_assert(znode->level <= zr->level); + + /* + * First walk up until there is a znode with next branch to + * look at. + */ + while (znode->parent != zr && iip >= znode->parent->child_cnt) { + znode = znode->parent; + iip = znode->iip; + } + + if (unlikely(znode->parent == zr && + iip >= znode->parent->child_cnt)) { + /* This level is done, switch to the lower one */ + level -= 1; + if (level_search || level < 0) + /* + * We were already looking for znode at lower + * level ('level_search'). As we are here + * again, it just does not exist. Or all levels + * were finished ('level < 0'). + */ + return NULL; + + level_search = 1; + iip = -1; + znode = ubifs_tnc_find_child(zr, 0); + ubifs_assert(znode); + } + + /* Switch to the next index */ + zn = ubifs_tnc_find_child(znode->parent, iip + 1); + if (!zn) { + /* No more children to look at, we have walk up */ + iip = znode->parent->child_cnt; + continue; + } + + /* Walk back down to the level we came from ('level') */ + while (zn->level != level) { + znode = zn; + zn = ubifs_tnc_find_child(zn, 0); + if (!zn) { + /* + * This path is not too deep so it does not + * reach 'level'. Try next path. + */ + iip = znode->iip; + break; + } + } + + if (zn) { + ubifs_assert(zn->level >= 0); + return zn; + } + } +} + +/** + * ubifs_search_zbranch - search znode branch. + * @c: UBIFS file-system description object + * @znode: znode to search in + * @key: key to search for + * @n: znode branch slot number is returned here + * + * This is a helper function which search branch with key @key in @znode using + * binary search. The result of the search may be: + * o exact match, then %1 is returned, and the slot number of the branch is + * stored in @n; + * o no exact match, then %0 is returned and the slot number of the left + * closest branch is returned in @n; the slot if all keys in this znode are + * greater than @key, then %-1 is returned in @n. + */ +int ubifs_search_zbranch(const struct ubifs_info *c, + const struct ubifs_znode *znode, + const union ubifs_key *key, int *n) +{ + int beg = 0, end = znode->child_cnt, uninitialized_var(mid); + int uninitialized_var(cmp); + const struct ubifs_zbranch *zbr = &znode->zbranch[0]; + + ubifs_assert(end > beg); + + while (end > beg) { + mid = (beg + end) >> 1; + cmp = keys_cmp(c, key, &zbr[mid].key); + if (cmp > 0) + beg = mid + 1; + else if (cmp < 0) + end = mid; + else { + *n = mid; + return 1; + } + } + + *n = end - 1; + + /* The insert point is after *n */ + ubifs_assert(*n >= -1 && *n < znode->child_cnt); + if (*n == -1) + ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); + else + ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); + if (*n + 1 < znode->child_cnt) + ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); + + return 0; +} + +/** + * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. + * @znode: znode to start at (root of the sub-tree to traverse) + * + * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is + * ignored. + */ +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) +{ + if (unlikely(!znode)) + return NULL; + + while (znode->level > 0) { + struct ubifs_znode *child; + + child = ubifs_tnc_find_child(znode, 0); + if (!child) + return znode; + znode = child; + } + + return znode; +} + +/** + * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. + * @znode: previous znode + * + * This function implements postorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) +{ + struct ubifs_znode *zn; + + ubifs_assert(znode); + if (unlikely(!znode->parent)) + return NULL; + + /* Switch to the next index in the parent */ + zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); + if (!zn) + /* This is in fact the last child, return parent */ + return znode->parent; + + /* Go to the first znode in this new subtree */ + return ubifs_tnc_postorder_first(zn); +} + +/** + * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree. + * @znode: znode defining subtree to destroy + * + * This function destroys subtree of the TNC tree. Returns number of clean + * znodes in the subtree. + */ +long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode) +{ + struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode); + long clean_freed = 0; + int n; + + ubifs_assert(zn); + while (1) { + for (n = 0; n < zn->child_cnt; n++) { + if (!zn->zbranch[n].znode) + continue; + + if (zn->level > 0 && + !ubifs_zn_dirty(zn->zbranch[n].znode)) + clean_freed += 1; + + cond_resched(); + kfree(zn->zbranch[n].znode); + } + + if (zn == znode) { + if (!ubifs_zn_dirty(zn)) + clean_freed += 1; + kfree(zn); + return clean_freed; + } + + zn = ubifs_tnc_postorder_next(zn); + } +} + +/** + * read_znode - read an indexing node from flash and fill znode. + * @c: UBIFS file-system description object + * @lnum: LEB of the indexing node to read + * @offs: node offset + * @len: node length + * @znode: znode to read to + * + * This function reads an indexing node from the flash media and fills znode + * with the read data. Returns zero in case of success and a negative error + * code in case of failure. The read indexing node is validated and if anything + * is wrong with it, this function prints complaint messages and returns + * %-EINVAL. + */ +static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, + struct ubifs_znode *znode) +{ + int i, err, type, cmp; + struct ubifs_idx_node *idx; + + idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); + if (!idx) + return -ENOMEM; + + err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); + if (err < 0) { + kfree(idx); + return err; + } + + znode->child_cnt = le16_to_cpu(idx->child_cnt); + znode->level = le16_to_cpu(idx->level); + + dbg_tnc("LEB %d:%d, level %d, %d branch", + lnum, offs, znode->level, znode->child_cnt); + + if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { + dbg_err("current fanout %d, branch count %d", + c->fanout, znode->child_cnt); + dbg_err("max levels %d, znode level %d", + UBIFS_MAX_LEVELS, znode->level); + err = 1; + goto out_dump; + } + + for (i = 0; i < znode->child_cnt; i++) { + const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); + struct ubifs_zbranch *zbr = &znode->zbranch[i]; + + key_read(c, &br->key, &zbr->key); + zbr->lnum = le32_to_cpu(br->lnum); + zbr->offs = le32_to_cpu(br->offs); + zbr->len = le32_to_cpu(br->len); + zbr->znode = NULL; + + /* Validate branch */ + + if (zbr->lnum < c->main_first || + zbr->lnum >= c->leb_cnt || zbr->offs < 0 || + zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { + dbg_err("bad branch %d", i); + err = 2; + goto out_dump; + } + + switch (key_type(c, &zbr->key)) { + case UBIFS_INO_KEY: + case UBIFS_DATA_KEY: + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: + break; + default: + dbg_msg("bad key type at slot %d: %s", i, + DBGKEY(&zbr->key)); + err = 3; + goto out_dump; + } + + if (znode->level) + continue; + + type = key_type(c, &zbr->key); + if (c->ranges[type].max_len == 0) { + if (zbr->len != c->ranges[type].len) { + dbg_err("bad target node (type %d) length (%d)", + type, zbr->len); + dbg_err("have to be %d", c->ranges[type].len); + err = 4; + goto out_dump; + } + } else if (zbr->len < c->ranges[type].min_len || + zbr->len > c->ranges[type].max_len) { + dbg_err("bad target node (type %d) length (%d)", + type, zbr->len); + dbg_err("have to be in range of %d-%d", + c->ranges[type].min_len, + c->ranges[type].max_len); + err = 5; + goto out_dump; + } + } + + /* + * Ensure that the next key is greater or equivalent to the + * previous one. + */ + for (i = 0; i < znode->child_cnt - 1; i++) { + const union ubifs_key *key1, *key2; + + key1 = &znode->zbranch[i].key; + key2 = &znode->zbranch[i + 1].key; + + cmp = keys_cmp(c, key1, key2); + if (cmp > 0) { + dbg_err("bad key order (keys %d and %d)", i, i + 1); + err = 6; + goto out_dump; + } else if (cmp == 0 && !is_hash_key(c, key1)) { + /* These can only be keys with colliding hash */ + dbg_err("keys %d and %d are not hashed but equivalent", + i, i + 1); + err = 7; + goto out_dump; + } + } + + kfree(idx); + return 0; + +out_dump: + ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); + dbg_dump_node(c, idx); + kfree(idx); + return -EINVAL; +} + +/** + * ubifs_load_znode - load znode to TNC cache. + * @c: UBIFS file-system description object + * @zbr: znode branch + * @parent: znode's parent + * @iip: index in parent + * + * This function loads znode pointed to by @zbr into the TNC cache and + * returns pointer to it in case of success and a negative error code in case + * of failure. + */ +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + struct ubifs_znode *parent, int iip) +{ + int err; + struct ubifs_znode *znode; + + ubifs_assert(!zbr->znode); + /* + * A slab cache is not presently used for znodes because the znode size + * depends on the fanout which is stored in the superblock. + */ + znode = kzalloc(c->max_znode_sz, GFP_NOFS); + if (!znode) + return ERR_PTR(-ENOMEM); + + err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); + if (err) + goto out; + + atomic_long_inc(&c->clean_zn_cnt); + + /* + * Increment the global clean znode counter as well. It is OK that + * global and per-FS clean znode counters may be inconsistent for some + * short time (because we might be preempted at this point), the global + * one is only used in shrinker. + */ + atomic_long_inc(&ubifs_clean_zn_cnt); + + zbr->znode = znode; + znode->parent = parent; + znode->time = get_seconds(); + znode->iip = iip; + + return znode; + +out: + kfree(znode); + return ERR_PTR(err); +} + +/** + * ubifs_tnc_read_node - read a leaf node from the flash media. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a node defined by @zbr from the flash media. Returns + * zero in case of success or a negative negative error code in case of + * failure. + */ +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node) +{ + union ubifs_key key1, *key = &zbr->key; + int err, type = key_type(c, key); + struct ubifs_wbuf *wbuf; + + /* + * 'zbr' has to point to on-flash node. The node may sit in a bud and + * may even be in a write buffer, so we have to take care about this. + */ + wbuf = ubifs_get_wbuf(c, zbr->lnum); + if (wbuf) + err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len, + zbr->lnum, zbr->offs); + else + err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, + zbr->offs); + + if (err) { + dbg_tnc("key %s", DBGKEY(key)); + return err; + } + + /* Make sure the key of the read node is correct */ + key_read(c, key, &key1); + if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); + dbg_tnc("looked for key %s found node's key %s", + DBGKEY(key), DBGKEY1(&key1)); + dbg_dump_node(c, node); + return -EINVAL; + } + + return 0; +} diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h new file mode 100644 index 000000000000..0cc7da9bed47 --- /dev/null +++ b/fs/ubifs/ubifs-media.h @@ -0,0 +1,745 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file describes UBIFS on-flash format and contains definitions of all the + * relevant data structures and constants. + * + * All UBIFS on-flash objects are stored in the form of nodes. All nodes start + * with the UBIFS node magic number and have the same common header. Nodes + * always sit at 8-byte aligned positions on the media and node header sizes are + * also 8-byte aligned (except for the indexing node and the padding node). + */ + +#ifndef __UBIFS_MEDIA_H__ +#define __UBIFS_MEDIA_H__ + +/* UBIFS node magic number (must not have the padding byte first or last) */ +#define UBIFS_NODE_MAGIC 0x06101831 + +/* UBIFS on-flash format version */ +#define UBIFS_FORMAT_VERSION 4 + +/* Minimum logical eraseblock size in bytes */ +#define UBIFS_MIN_LEB_SZ (15*1024) + +/* Initial CRC32 value used when calculating CRC checksums */ +#define UBIFS_CRC32_INIT 0xFFFFFFFFU + +/* + * UBIFS does not try to compress data if its length is less than the below + * constant. + */ +#define UBIFS_MIN_COMPR_LEN 128 + +/* Root inode number */ +#define UBIFS_ROOT_INO 1 + +/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ +#define UBIFS_FIRST_INO 64 + +/* + * Maximum file name and extended attribute length (must be a multiple of 8, + * minus 1). + */ +#define UBIFS_MAX_NLEN 255 + +/* Maximum number of data journal heads */ +#define UBIFS_MAX_JHEADS 1 + +/* + * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, + * which means that it does not treat the underlying media as consisting of + * blocks like in case of hard drives. Do not be confused. UBIFS block is just + * the maximum amount of data which one data node can have or which can be + * attached to an inode node. + */ +#define UBIFS_BLOCK_SIZE 4096 +#define UBIFS_BLOCK_SHIFT 12 +#define UBIFS_BLOCK_MASK 0x00000FFF + +/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ +#define UBIFS_PADDING_BYTE 0xCE + +/* Maximum possible key length */ +#define UBIFS_MAX_KEY_LEN 16 + +/* Key length ("simple" format) */ +#define UBIFS_SK_LEN 8 + +/* Minimum index tree fanout */ +#define UBIFS_MIN_FANOUT 2 + +/* Maximum number of levels in UBIFS indexing B-tree */ +#define UBIFS_MAX_LEVELS 512 + +/* Maximum amount of data attached to an inode in bytes */ +#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE + +/* LEB Properties Tree fanout (must be power of 2) and fanout shift */ +#define UBIFS_LPT_FANOUT 4 +#define UBIFS_LPT_FANOUT_SHIFT 2 + +/* LEB Properties Tree bit field sizes */ +#define UBIFS_LPT_CRC_BITS 16 +#define UBIFS_LPT_CRC_BYTES 2 +#define UBIFS_LPT_TYPE_BITS 4 + +/* The key is always at the same position in all keyed nodes */ +#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) + +/* + * LEB Properties Tree node types. + * + * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) + * UBIFS_LPT_NNODE: LPT internal node + * UBIFS_LPT_LTAB: LPT's own lprops table + * UBIFS_LPT_LSAVE: LPT's save table (big model only) + * UBIFS_LPT_NODE_CNT: count of LPT node types + * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type + */ +enum { + UBIFS_LPT_PNODE, + UBIFS_LPT_NNODE, + UBIFS_LPT_LTAB, + UBIFS_LPT_LSAVE, + UBIFS_LPT_NODE_CNT, + UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, +}; + +/* + * UBIFS inode types. + * + * UBIFS_ITYPE_REG: regular file + * UBIFS_ITYPE_DIR: directory + * UBIFS_ITYPE_LNK: soft link + * UBIFS_ITYPE_BLK: block device node + * UBIFS_ITYPE_CHR: character device node + * UBIFS_ITYPE_FIFO: fifo + * UBIFS_ITYPE_SOCK: socket + * UBIFS_ITYPES_CNT: count of supported file types + */ +enum { + UBIFS_ITYPE_REG, + UBIFS_ITYPE_DIR, + UBIFS_ITYPE_LNK, + UBIFS_ITYPE_BLK, + UBIFS_ITYPE_CHR, + UBIFS_ITYPE_FIFO, + UBIFS_ITYPE_SOCK, + UBIFS_ITYPES_CNT, +}; + +/* + * Supported key hash functions. + * + * UBIFS_KEY_HASH_R5: R5 hash + * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name + */ +enum { + UBIFS_KEY_HASH_R5, + UBIFS_KEY_HASH_TEST, +}; + +/* + * Supported key formats. + * + * UBIFS_SIMPLE_KEY_FMT: simple key format + */ +enum { + UBIFS_SIMPLE_KEY_FMT, +}; + +/* + * The simple key format uses 29 bits for storing UBIFS block number and hash + * value. + */ +#define UBIFS_S_KEY_BLOCK_BITS 29 +#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF +#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS +#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK + +/* + * Key types. + * + * UBIFS_INO_KEY: inode node key + * UBIFS_DATA_KEY: data node key + * UBIFS_DENT_KEY: directory entry node key + * UBIFS_XENT_KEY: extended attribute entry key + * UBIFS_KEY_TYPES_CNT: number of supported key types + */ +enum { + UBIFS_INO_KEY, + UBIFS_DATA_KEY, + UBIFS_DENT_KEY, + UBIFS_XENT_KEY, + UBIFS_KEY_TYPES_CNT, +}; + +/* Count of LEBs reserved for the superblock area */ +#define UBIFS_SB_LEBS 1 +/* Count of LEBs reserved for the master area */ +#define UBIFS_MST_LEBS 2 + +/* First LEB of the superblock area */ +#define UBIFS_SB_LNUM 0 +/* First LEB of the master area */ +#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) +/* First LEB of the log area */ +#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) + +/* + * The below constants define the absolute minimum values for various UBIFS + * media areas. Many of them actually depend of flash geometry and the FS + * configuration (number of journal heads, orphan LEBs, etc). This means that + * the smallest volume size which can be used for UBIFS cannot be pre-defined + * by these constants. The file-system that meets the below limitation will not + * necessarily mount. UBIFS does run-time calculations and validates the FS + * size. + */ + +/* Minimum number of logical eraseblocks in the log */ +#define UBIFS_MIN_LOG_LEBS 2 +/* Minimum number of bud logical eraseblocks (one for each head) */ +#define UBIFS_MIN_BUD_LEBS 3 +/* Minimum number of journal logical eraseblocks */ +#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) +/* Minimum number of LPT area logical eraseblocks */ +#define UBIFS_MIN_LPT_LEBS 2 +/* Minimum number of orphan area logical eraseblocks */ +#define UBIFS_MIN_ORPH_LEBS 1 +/* + * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 + * for GC, 1 for deletions, and at least 1 for committed data). + */ +#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) + +/* Minimum number of logical eraseblocks */ +#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ + UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ + UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) + +/* Node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_CH_SZ sizeof(struct ubifs_ch) +#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node) +#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) +#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) +#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) +#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node) +#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node) +#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node) +#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node) +#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node) +#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) +#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) +/* Extended attribute entry nodes are identical to directory entry nodes */ +#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ +/* Only this does not have to be multiple of 8 bytes */ +#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch) + +/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) +#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) +#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) +#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ + +/* The largest UBIFS node */ +#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ + +/* + * On-flash inode flags. + * + * UBIFS_COMPR_FL: use compression for this inode + * UBIFS_SYNC_FL: I/O on this inode has to be synchronous + * UBIFS_IMMUTABLE_FL: inode is immutable + * UBIFS_APPEND_FL: writes to the inode may only append data + * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous + * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value + * + * Note, these are on-flash flags which correspond to ioctl flags + * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not + * have to be the same. + */ +enum { + UBIFS_COMPR_FL = 0x01, + UBIFS_SYNC_FL = 0x02, + UBIFS_IMMUTABLE_FL = 0x04, + UBIFS_APPEND_FL = 0x08, + UBIFS_DIRSYNC_FL = 0x10, + UBIFS_XATTR_FL = 0x20, +}; + +/* Inode flag bits used by UBIFS */ +#define UBIFS_FL_MASK 0x0000001F + +/* + * UBIFS compression algorithms. + * + * UBIFS_COMPR_NONE: no compression + * UBIFS_COMPR_LZO: LZO compression + * UBIFS_COMPR_ZLIB: ZLIB compression + * UBIFS_COMPR_TYPES_CNT: count of supported compression types + */ +enum { + UBIFS_COMPR_NONE, + UBIFS_COMPR_LZO, + UBIFS_COMPR_ZLIB, + UBIFS_COMPR_TYPES_CNT, +}; + +/* + * UBIFS node types. + * + * UBIFS_INO_NODE: inode node + * UBIFS_DATA_NODE: data node + * UBIFS_DENT_NODE: directory entry node + * UBIFS_XENT_NODE: extended attribute node + * UBIFS_TRUN_NODE: truncation node + * UBIFS_PAD_NODE: padding node + * UBIFS_SB_NODE: superblock node + * UBIFS_MST_NODE: master node + * UBIFS_REF_NODE: LEB reference node + * UBIFS_IDX_NODE: index node + * UBIFS_CS_NODE: commit start node + * UBIFS_ORPH_NODE: orphan node + * UBIFS_NODE_TYPES_CNT: count of supported node types + * + * Note, we index arrays by these numbers, so keep them low and contiguous. + * Node type constants for inodes, direntries and so on have to be the same as + * corresponding key type constants. + */ +enum { + UBIFS_INO_NODE, + UBIFS_DATA_NODE, + UBIFS_DENT_NODE, + UBIFS_XENT_NODE, + UBIFS_TRUN_NODE, + UBIFS_PAD_NODE, + UBIFS_SB_NODE, + UBIFS_MST_NODE, + UBIFS_REF_NODE, + UBIFS_IDX_NODE, + UBIFS_CS_NODE, + UBIFS_ORPH_NODE, + UBIFS_NODE_TYPES_CNT, +}; + +/* + * Master node flags. + * + * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty + * UBIFS_MST_NO_ORPHS: no orphan inodes present + * UBIFS_MST_RCVRY: written by recovery + */ +enum { + UBIFS_MST_DIRTY = 1, + UBIFS_MST_NO_ORPHS = 2, + UBIFS_MST_RCVRY = 4, +}; + +/* + * Node group type (used by recovery to recover whole group or none). + * + * UBIFS_NO_NODE_GROUP: this node is not part of a group + * UBIFS_IN_NODE_GROUP: this node is a part of a group + * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group + */ +enum { + UBIFS_NO_NODE_GROUP = 0, + UBIFS_IN_NODE_GROUP, + UBIFS_LAST_OF_NODE_GROUP, +}; + +/* + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + */ +enum { + UBIFS_FLG_BIGLPT = 0x02, +}; + +/** + * struct ubifs_ch - common header node. + * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) + * @crc: CRC-32 checksum of the node header + * @sqnum: sequence number + * @len: full node length + * @node_type: node type + * @group_type: node group type + * @padding: reserved for future, zeroes + * + * Every UBIFS node starts with this common part. If the node has a key, the + * key always goes next. + */ +struct ubifs_ch { + __le32 magic; + __le32 crc; + __le64 sqnum; + __le32 len; + __u8 node_type; + __u8 group_type; + __u8 padding[2]; +} __attribute__ ((packed)); + +/** + * union ubifs_dev_desc - device node descriptor. + * @new: new type device descriptor + * @huge: huge type device descriptor + * + * This data structure describes major/minor numbers of a device node. In an + * inode is a device node then its data contains an object of this type. UBIFS + * uses standard Linux "new" and "huge" device node encodings. + */ +union ubifs_dev_desc { + __le32 new; + __le64 huge; +} __attribute__ ((packed)); + +/** + * struct ubifs_ino_node - inode node. + * @ch: common header + * @key: node key + * @creat_sqnum: sequence number at time of creation + * @size: inode size in bytes (amount of uncompressed data) + * @atime_sec: access time seconds + * @ctime_sec: creation time seconds + * @mtime_sec: modification time seconds + * @atime_nsec: access time nanoseconds + * @ctime_nsec: creation time nanoseconds + * @mtime_nsec: modification time nanoseconds + * @nlink: number of hard links + * @uid: owner ID + * @gid: group ID + * @mode: access flags + * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) + * @data_len: inode data length + * @xattr_cnt: count of extended attributes this inode has + * @xattr_size: summarized size of all extended attributes in bytes + * @padding1: reserved for future, zeroes + * @xattr_names: sum of lengths of all extended attribute names belonging to + * this inode + * @compr_type: compression type used for this inode + * @padding2: reserved for future, zeroes + * @data: data attached to the inode + * + * Note, even though inode compression type is defined by @compr_type, some + * nodes of this inode may be compressed with different compressor - this + * happens if compression type is changed while the inode already has data + * nodes. But @compr_type will be use for further writes to the inode. + * + * Note, do not forget to amend 'zero_ino_node_unused()' function when changing + * the padding fields. + */ +struct ubifs_ino_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le64 creat_sqnum; + __le64 size; + __le64 atime_sec; + __le64 ctime_sec; + __le64 mtime_sec; + __le32 atime_nsec; + __le32 ctime_nsec; + __le32 mtime_nsec; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le32 flags; + __le32 data_len; + __le32 xattr_cnt; + __le32 xattr_size; + __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ + __le32 xattr_names; + __le16 compr_type; + __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ + __u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_dent_node - directory entry node. + * @ch: common header + * @key: node key + * @inum: target inode number + * @padding1: reserved for future, zeroes + * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) + * @nlen: name length + * @padding2: reserved for future, zeroes + * @name: zero-terminated name + * + * Note, do not forget to amend 'zero_dent_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_dent_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le64 inum; + __u8 padding1; + __u8 type; + __le16 nlen; + __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ + __u8 name[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_data_node - data node. + * @ch: common header + * @key: node key + * @size: uncompressed data size in bytes + * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) + * @padding: reserved for future, zeroes + * @data: data + * + * Note, do not forget to amend 'zero_data_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_data_node { + struct ubifs_ch ch; + __u8 key[UBIFS_MAX_KEY_LEN]; + __le32 size; + __le16 compr_type; + __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ + __u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_trun_node - truncation node. + * @ch: common header + * @inum: truncated inode number + * @padding: reserved for future, zeroes + * @old_size: size before truncation + * @new_size: size after truncation + * + * This node exists only in the journal and never goes to the main area. Note, + * do not forget to amend 'zero_trun_node_unused()' function when changing the + * padding fields. + */ +struct ubifs_trun_node { + struct ubifs_ch ch; + __le32 inum; + __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ + __le64 old_size; + __le64 new_size; +} __attribute__ ((packed)); + +/** + * struct ubifs_pad_node - padding node. + * @ch: common header + * @pad_len: how many bytes after this node are unused (because padded) + * @padding: reserved for future, zeroes + */ +struct ubifs_pad_node { + struct ubifs_ch ch; + __le32 pad_len; +} __attribute__ ((packed)); + +/** + * struct ubifs_sb_node - superblock node. + * @ch: common header + * @padding: reserved for future, zeroes + * @key_hash: type of hash function used in keys + * @key_fmt: format of the key + * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) + * @min_io_size: minimal input/output unit size + * @leb_size: logical eraseblock size in bytes + * @leb_cnt: count of LEBs used by file-system + * @max_leb_cnt: maximum count of LEBs used by file-system + * @max_bud_bytes: maximum amount of data stored in buds + * @log_lebs: log size in logical eraseblocks + * @lpt_lebs: number of LEBs used for lprops table + * @orph_lebs: number of LEBs used for recording orphans + * @jhead_cnt: count of journal heads + * @fanout: tree fanout (max. number of links per indexing node) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @fmt_version: UBIFS on-flash format version + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @padding1: reserved for future, zeroes + * @rp_uid: reserve pool UID + * @rp_gid: reserve pool GID + * @rp_size: size of the reserved pool in bytes + * @padding2: reserved for future, zeroes + * @time_gran: time granularity in nanoseconds + * @uuid: UUID generated when the file system image was created + */ +struct ubifs_sb_node { + struct ubifs_ch ch; + __u8 padding[2]; + __u8 key_hash; + __u8 key_fmt; + __le32 flags; + __le32 min_io_size; + __le32 leb_size; + __le32 leb_cnt; + __le32 max_leb_cnt; + __le64 max_bud_bytes; + __le32 log_lebs; + __le32 lpt_lebs; + __le32 orph_lebs; + __le32 jhead_cnt; + __le32 fanout; + __le32 lsave_cnt; + __le32 fmt_version; + __le16 default_compr; + __u8 padding1[2]; + __le32 rp_uid; + __le32 rp_gid; + __le64 rp_size; + __le32 time_gran; + __u8 uuid[16]; + __u8 padding2[3972]; +} __attribute__ ((packed)); + +/** + * struct ubifs_mst_node - master node. + * @ch: common header + * @highest_inum: highest inode number in the committed index + * @cmt_no: commit number + * @flags: various flags (%UBIFS_MST_DIRTY, etc) + * @log_lnum: start of the log + * @root_lnum: LEB number of the root indexing node + * @root_offs: offset within @root_lnum + * @root_len: root indexing node length + * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was + * not reserved and should be reserved on mount) + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @index_size: size of index on flash + * @total_free: total free space in bytes + * @total_dirty: total dirty space in bytes + * @total_used: total used space in bytes (includes only data LEBs) + * @total_dead: total dead space in bytes (includes only data LEBs) + * @total_dark: total dark space in bytes (includes only data LEBs) + * @lpt_lnum: LEB number of LPT root nnode + * @lpt_offs: offset of LPT root nnode + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @lsave_lnum: LEB number of LPT's save table (big model only) + * @lsave_offs: offset of LPT's save table (big model only) + * @lscan_lnum: LEB number of last LPT scan + * @empty_lebs: number of empty logical eraseblocks + * @idx_lebs: number of indexing logical eraseblocks + * @leb_cnt: count of LEBs used by file-system + * @padding: reserved for future, zeroes + */ +struct ubifs_mst_node { + struct ubifs_ch ch; + __le64 highest_inum; + __le64 cmt_no; + __le32 flags; + __le32 log_lnum; + __le32 root_lnum; + __le32 root_offs; + __le32 root_len; + __le32 gc_lnum; + __le32 ihead_lnum; + __le32 ihead_offs; + __le64 index_size; + __le64 total_free; + __le64 total_dirty; + __le64 total_used; + __le64 total_dead; + __le64 total_dark; + __le32 lpt_lnum; + __le32 lpt_offs; + __le32 nhead_lnum; + __le32 nhead_offs; + __le32 ltab_lnum; + __le32 ltab_offs; + __le32 lsave_lnum; + __le32 lsave_offs; + __le32 lscan_lnum; + __le32 empty_lebs; + __le32 idx_lebs; + __le32 leb_cnt; + __u8 padding[344]; +} __attribute__ ((packed)); + +/** + * struct ubifs_ref_node - logical eraseblock reference node. + * @ch: common header + * @lnum: the referred logical eraseblock number + * @offs: start offset in the referred LEB + * @jhead: journal head number + * @padding: reserved for future, zeroes + */ +struct ubifs_ref_node { + struct ubifs_ch ch; + __le32 lnum; + __le32 offs; + __le32 jhead; + __u8 padding[28]; +} __attribute__ ((packed)); + +/** + * struct ubifs_branch - key/reference/length branch + * @lnum: LEB number of the target node + * @offs: offset within @lnum + * @len: target node length + * @key: key + */ +struct ubifs_branch { + __le32 lnum; + __le32 offs; + __le32 len; + __u8 key[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_idx_node - indexing node. + * @ch: common header + * @child_cnt: number of child index nodes + * @level: tree level + * @branches: LEB number / offset / length / key branches + */ +struct ubifs_idx_node { + struct ubifs_ch ch; + __le16 child_cnt; + __le16 level; + __u8 branches[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_cs_node - commit start node. + * @ch: common header + * @cmt_no: commit number + */ +struct ubifs_cs_node { + struct ubifs_ch ch; + __le64 cmt_no; +} __attribute__ ((packed)); + +/** + * struct ubifs_orph_node - orphan node. + * @ch: common header + * @cmt_no: commit number (also top bit is set on the last node of the commit) + * @inos: inode numbers of orphans + */ +struct ubifs_orph_node { + struct ubifs_ch ch; + __le64 cmt_no; + __le64 inos[]; +} __attribute__ ((packed)); + +#endif /* __UBIFS_MEDIA_H__ */ diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h new file mode 100644 index 000000000000..e4f89f271827 --- /dev/null +++ b/fs/ubifs/ubifs.h @@ -0,0 +1,1649 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* Implementation version 0.7 */ + +#ifndef __UBIFS_H__ +#define __UBIFS_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ubifs-media.h" + +/* Version of this UBIFS implementation */ +#define UBIFS_VERSION 1 + +/* Normal UBIFS messages */ +#define ubifs_msg(fmt, ...) \ + printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +/* UBIFS error messages */ +#define ubifs_err(fmt, ...) \ + printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__) +/* UBIFS warning messages */ +#define ubifs_warn(fmt, ...) \ + printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) + +/* UBIFS file system VFS magic number */ +#define UBIFS_SUPER_MAGIC 0x24051905 + +/* Number of UBIFS blocks per VFS page */ +#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) +#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) + +/* "File system end of life" sequence number watermark */ +#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL +#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL + +/* Minimum amount of data UBIFS writes to the flash */ +#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) + +/* + * Currently we do not support inode number overlapping and re-using, so this + * watermark defines dangerous inode number level. This should be fixed later, + * although it is difficult to exceed current limit. Another option is to use + * 64-bit inode numbers, but this means more overhead. + */ +#define INUM_WARN_WATERMARK 0xFFF00000 +#define INUM_WATERMARK 0xFFFFFF00 + +/* Largest key size supported in this implementation */ +#define CUR_MAX_KEY_LEN UBIFS_SK_LEN + +/* Maximum number of entries in each LPT (LEB category) heap */ +#define LPT_HEAP_SZ 256 + +/* + * Background thread name pattern. The numbers are UBI device and volume + * numbers. + */ +#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" + +/* Default write-buffer synchronization timeout (5 secs) */ +#define DEFAULT_WBUF_TIMEOUT (5 * HZ) + +/* Maximum possible inode number (only 32-bit inodes are supported now) */ +#define MAX_INUM 0xFFFFFFFF + +/* Number of non-data journal heads */ +#define NONDATA_JHEADS_CNT 2 + +/* Garbage collector head */ +#define GCHD 0 +/* Base journal head number */ +#define BASEHD 1 +/* First "general purpose" journal head */ +#define DATAHD 2 + +/* 'No change' value for 'ubifs_change_lp()' */ +#define LPROPS_NC 0x80000001 + +/* + * There is no notion of truncation key because truncation nodes do not exist + * in TNC. However, when replaying, it is handy to introduce fake "truncation" + * keys for truncation nodes because the code becomes simpler. So we define + * %UBIFS_TRUN_KEY type. + */ +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT + +/* + * How much a directory entry/extended attribute entry adds to the parent/host + * inode. + */ +#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) + +/* How much an extended attribute adds to the host inode */ +#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) + +/* + * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered + * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are + * considered "young". This is used by shrinker when selecting znode to trim + * off. + */ +#define OLD_ZNODE_AGE 20 +#define YOUNG_ZNODE_AGE 5 + +/* + * Some compressors, like LZO, may end up with more data then the input buffer. + * So UBIFS always allocates larger output buffer, to be sure the compressor + * will not corrupt memory in case of worst case compression. + */ +#define WORST_COMPR_FACTOR 2 + +/* Maximum expected tree height for use by bottom_up_buf */ +#define BOTTOM_UP_HEIGHT 64 + +/* + * Lockdep classes for UBIFS inode @ui_mutex. + */ +enum { + WB_MUTEX_1 = 0, + WB_MUTEX_2 = 1, + WB_MUTEX_3 = 2, +}; + +/* + * Znode flags (actually, bit numbers which store the flags). + * + * DIRTY_ZNODE: znode is dirty + * COW_ZNODE: znode is being committed and a new instance of this znode has to + * be created before changing this znode + * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is + * still in the commit list and the ongoing commit operation + * will commit it, and delete this znode after it is done + */ +enum { + DIRTY_ZNODE = 0, + COW_ZNODE = 1, + OBSOLETE_ZNODE = 2, +}; + +/* + * Commit states. + * + * COMMIT_RESTING: commit is not wanted + * COMMIT_BACKGROUND: background commit has been requested + * COMMIT_REQUIRED: commit is required + * COMMIT_RUNNING_BACKGROUND: background commit is running + * COMMIT_RUNNING_REQUIRED: commit is running and it is required + * COMMIT_BROKEN: commit failed + */ +enum { + COMMIT_RESTING = 0, + COMMIT_BACKGROUND, + COMMIT_REQUIRED, + COMMIT_RUNNING_BACKGROUND, + COMMIT_RUNNING_REQUIRED, + COMMIT_BROKEN, +}; + +/* + * 'ubifs_scan_a_node()' return values. + * + * SCANNED_GARBAGE: scanned garbage + * SCANNED_EMPTY_SPACE: scanned empty space + * SCANNED_A_NODE: scanned a valid node + * SCANNED_A_CORRUPT_NODE: scanned a corrupted node + * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length + * + * Greater than zero means: 'scanned that number of padding bytes' + */ +enum { + SCANNED_GARBAGE = 0, + SCANNED_EMPTY_SPACE = -1, + SCANNED_A_NODE = -2, + SCANNED_A_CORRUPT_NODE = -3, + SCANNED_A_BAD_PAD_NODE = -4, +}; + +/* + * LPT cnode flag bits. + * + * DIRTY_CNODE: cnode is dirty + * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), + * so it can (and must) be freed when the commit is finished + */ +enum { + DIRTY_CNODE = 0, + COW_CNODE = 1, + OBSOLETE_CNODE = 2, +}; + +/* + * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. + * + * LTAB_DIRTY: ltab node is dirty + * LSAVE_DIRTY: lsave node is dirty + */ +enum { + LTAB_DIRTY = 1, + LSAVE_DIRTY = 2, +}; + +/* + * Return codes used by the garbage collector. + * @LEB_FREED: the logical eraseblock was freed and is ready to use + * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit + * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes + */ +enum { + LEB_FREED, + LEB_FREED_IDX, + LEB_RETAINED, +}; + +/** + * struct ubifs_old_idx - index node obsoleted since last commit start. + * @rb: rb-tree node + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + */ +struct ubifs_old_idx { + struct rb_node rb; + int lnum; + int offs; +}; + +/* The below union makes it easier to deal with keys */ +union ubifs_key { + uint8_t u8[CUR_MAX_KEY_LEN]; + uint32_t u32[CUR_MAX_KEY_LEN/4]; + uint64_t u64[CUR_MAX_KEY_LEN/8]; + __le32 j32[CUR_MAX_KEY_LEN/4]; +}; + +/** + * struct ubifs_scan_node - UBIFS scanned node information. + * @list: list of scanned nodes + * @key: key of node scanned (if it has one) + * @sqnum: sequence number + * @type: type of node scanned + * @offs: offset with LEB of node scanned + * @len: length of node scanned + * @node: raw node + */ +struct ubifs_scan_node { + struct list_head list; + union ubifs_key key; + unsigned long long sqnum; + int type; + int offs; + int len; + void *node; +}; + +/** + * struct ubifs_scan_leb - UBIFS scanned LEB information. + * @lnum: logical eraseblock number + * @nodes_cnt: number of nodes scanned + * @nodes: list of struct ubifs_scan_node + * @endpt: end point (and therefore the start of empty space) + * @ecc: read returned -EBADMSG + * @buf: buffer containing entire LEB scanned + */ +struct ubifs_scan_leb { + int lnum; + int nodes_cnt; + struct list_head nodes; + int endpt; + int ecc; + void *buf; +}; + +/** + * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. + * @list: list + * @lnum: LEB number + * @unmap: OK to unmap this LEB + * + * This data structure is used to temporary store garbage-collected indexing + * LEBs - they are not released immediately, but only after the next commit. + * This is needed to guarantee recoverability. + */ +struct ubifs_gced_idx_leb { + struct list_head list; + int lnum; + int unmap; +}; + +/** + * struct ubifs_inode - UBIFS in-memory inode description. + * @vfs_inode: VFS inode description object + * @creat_sqnum: sequence number at time of creation + * @xattr_size: summarized size of all extended attributes in bytes + * @xattr_cnt: count of extended attributes this inode has + * @xattr_names: sum of lengths of all extended attribute names belonging to + * this inode + * @dirty: non-zero if the inode is dirty + * @xattr: non-zero if this is an extended attribute inode + * @ui_mutex: serializes inode write-back with the rest of VFS operations, + * serializes "clean <-> dirty" state changes, protects @dirty, + * @ui_size, and @xattr_size + * @ui_lock: protects @synced_i_size + * @synced_i_size: synchronized size of inode, i.e. the value of inode size + * currently stored on the flash; used only for regular file + * inodes + * @ui_size: inode size used by UBIFS when writing to flash + * @flags: inode flags (@UBIFS_COMPR_FL, etc) + * @compr_type: default compression type used for this inode + * @data_len: length of the data attached to the inode + * @data: inode's data + * + * @ui_mutex exists for two main reasons. At first it prevents inodes from + * being written back while UBIFS changing them, being in the middle of an VFS + * operation. This way UBIFS makes sure the inode fields are consistent. For + * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and + * write-back must not write any of them before we have finished. + * + * The second reason is budgeting - UBIFS has to budget all operations. If an + * operation is going to mark an inode dirty, it has to allocate budget for + * this. It cannot just mark it dirty because there is no guarantee there will + * be enough flash space to write the inode back later. This means UBIFS has + * to have full control over inode "clean <-> dirty" transitions (and pages + * actually). But unfortunately, VFS marks inodes dirty in many places, and it + * does not ask the file-system if it is allowed to do so (there is a notifier, + * but it is not enough), i.e., there is no mechanism to synchronize with this. + * So UBIFS has its own inode dirty flag and its own mutex to serialize + * "clean <-> dirty" transitions. + * + * The @synced_i_size field is used to make sure we never write pages which are + * beyond last synchronized inode size. See 'ubifs_writepage()' for more + * information. + * + * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses + * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot + * make sure @inode->i_size is always changed under @ui_mutex, because it + * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock + * with 'ubifs_writepage()' (see file.c). All the other inode fields are + * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * could consider to rework locking and base it on "shadow" fields. + */ +struct ubifs_inode { + struct inode vfs_inode; + unsigned long long creat_sqnum; + unsigned int xattr_size; + unsigned int xattr_cnt; + unsigned int xattr_names; + unsigned int dirty:1; + unsigned int xattr:1; + struct mutex ui_mutex; + spinlock_t ui_lock; + loff_t synced_i_size; + loff_t ui_size; + int flags; + int compr_type; + int data_len; + void *data; +}; + +/** + * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. + * @list: list + * @lnum: LEB number of recovered LEB + * @endpt: offset where recovery ended + * + * This structure records a LEB identified during recovery that needs to be + * cleaned but was not because UBIFS was mounted read-only. The information + * is used to clean the LEB when remounting to read-write mode. + */ +struct ubifs_unclean_leb { + struct list_head list; + int lnum; + int endpt; +}; + +/* + * LEB properties flags. + * + * LPROPS_UNCAT: not categorized + * LPROPS_DIRTY: dirty > 0, not index + * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index + * LPROPS_FREE: free > 0, not empty, not index + * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs + * LPROPS_EMPTY: LEB is empty, not taken + * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken + * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken + * LPROPS_CAT_MASK: mask for the LEB categories above + * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media) + * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash) + */ +enum { + LPROPS_UNCAT = 0, + LPROPS_DIRTY = 1, + LPROPS_DIRTY_IDX = 2, + LPROPS_FREE = 3, + LPROPS_HEAP_CNT = 3, + LPROPS_EMPTY = 4, + LPROPS_FREEABLE = 5, + LPROPS_FRDI_IDX = 6, + LPROPS_CAT_MASK = 15, + LPROPS_TAKEN = 16, + LPROPS_INDEX = 32, +}; + +/** + * struct ubifs_lprops - logical eraseblock properties. + * @free: amount of free space in bytes + * @dirty: amount of dirty space in bytes + * @flags: LEB properties flags (see above) + * @lnum: LEB number + * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE) + * @hpos: heap position in heap of same-category lprops (other categories) + */ +struct ubifs_lprops { + int free; + int dirty; + int flags; + int lnum; + union { + struct list_head list; + int hpos; + }; +}; + +/** + * struct ubifs_lpt_lprops - LPT logical eraseblock properties. + * @free: amount of free space in bytes + * @dirty: amount of dirty space in bytes + * @tgc: trivial GC flag (1 => unmap after commit end) + * @cmt: commit flag (1 => reserved for commit) + */ +struct ubifs_lpt_lprops { + int free; + int dirty; + unsigned tgc : 1; + unsigned cmt : 1; +}; + +/** + * struct ubifs_lp_stats - statistics of eraseblocks in the main area. + * @empty_lebs: number of empty LEBs + * @taken_empty_lebs: number of taken LEBs + * @idx_lebs: number of indexing LEBs + * @total_free: total free space in bytes + * @total_dirty: total dirty space in bytes + * @total_used: total used space in bytes (includes only data LEBs) + * @total_dead: total dead space in bytes (includes only data LEBs) + * @total_dark: total dark space in bytes (includes only data LEBs) + * + * N.B. total_dirty and total_used are different to other total_* fields, + * because they account _all_ LEBs, not just data LEBs. + * + * 'taken_empty_lebs' counts the LEBs that are in the transient state of having + * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed + * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used + * by itself (in which case 'unused_lebs' would be a better name). In the case + * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC, + * but unlike other empty LEBs that are 'taken', it may not be written straight + * away (i.e. before the next commit start or unmount), so either gc_lnum must + * be specially accounted for, or the current approach followed i.e. count it + * under 'taken_empty_lebs'. + */ +struct ubifs_lp_stats { + int empty_lebs; + int taken_empty_lebs; + int idx_lebs; + long long total_free; + long long total_dirty; + long long total_used; + long long total_dead; + long long total_dark; +}; + +struct ubifs_nnode; + +/** + * struct ubifs_cnode - LEB Properties Tree common node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (zero for pnodes, greater than zero for nnodes) + * @num: node number + */ +struct ubifs_cnode { + struct ubifs_nnode *parent; + struct ubifs_cnode *cnext; + unsigned long flags; + int iip; + int level; + int num; +}; + +/** + * struct ubifs_pnode - LEB Properties Tree leaf node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (always zero for pnodes) + * @num: node number + * @lprops: LEB properties array + */ +struct ubifs_pnode { + struct ubifs_nnode *parent; + struct ubifs_cnode *cnext; + unsigned long flags; + int iip; + int level; + int num; + struct ubifs_lprops lprops[UBIFS_LPT_FANOUT]; +}; + +/** + * struct ubifs_nbranch - LEB Properties Tree internal node branch. + * @lnum: LEB number of child + * @offs: offset of child + * @nnode: nnode child + * @pnode: pnode child + * @cnode: cnode child + */ +struct ubifs_nbranch { + int lnum; + int offs; + union { + struct ubifs_nnode *nnode; + struct ubifs_pnode *pnode; + struct ubifs_cnode *cnode; + }; +}; + +/** + * struct ubifs_nnode - LEB Properties Tree internal node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (always greater than zero for nnodes) + * @num: node number + * @nbranch: branches to child nodes + */ +struct ubifs_nnode { + struct ubifs_nnode *parent; + struct ubifs_cnode *cnext; + unsigned long flags; + int iip; + int level; + int num; + struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT]; +}; + +/** + * struct ubifs_lpt_heap - heap of categorized lprops. + * @arr: heap array + * @cnt: number in heap + * @max_cnt: maximum number allowed in heap + * + * There are %LPROPS_HEAP_CNT heaps. + */ +struct ubifs_lpt_heap { + struct ubifs_lprops **arr; + int cnt; + int max_cnt; +}; + +/* + * Return codes for LPT scan callback function. + * + * LPT_SCAN_CONTINUE: continue scanning + * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory + * LPT_SCAN_STOP: stop scanning + */ +enum { + LPT_SCAN_CONTINUE = 0, + LPT_SCAN_ADD = 1, + LPT_SCAN_STOP = 2, +}; + +struct ubifs_info; + +/* Callback used by the 'ubifs_lpt_scan_nolock()' function */ +typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, + const struct ubifs_lprops *lprops, + int in_tree, void *data); + +/** + * struct ubifs_wbuf - UBIFS write-buffer. + * @c: UBIFS file-system description object + * @buf: write-buffer (of min. flash I/O unit size) + * @lnum: logical eraseblock number the write-buffer points to + * @offs: write-buffer offset in this logical eraseblock + * @avail: number of bytes available in the write-buffer + * @used: number of used bytes in the write-buffer + * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, + * %UBI_UNKNOWN) + * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep + * up by 'mutex_lock_nested()). + * @sync_callback: write-buffer synchronization callback + * @io_mutex: serializes write-buffer I/O + * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes + * fields + * @timer: write-buffer timer + * @timeout: timer expire interval in jiffies + * @need_sync: it is set if its timer expired and needs sync + * @next_ino: points to the next position of the following inode number + * @inodes: stores the inode numbers of the nodes which are in wbuf + * + * The write-buffer synchronization callback is called when the write-buffer is + * synchronized in order to notify how much space was wasted due to + * write-buffer padding and how much free space is left in the LEB. + * + * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under + * spin-lock or mutex because they are written under both mutex and spin-lock. + * @buf is appended to under mutex but overwritten under both mutex and + * spin-lock. Thus the data between @buf and @buf + @used can be read under + * spinlock. + */ +struct ubifs_wbuf { + struct ubifs_info *c; + void *buf; + int lnum; + int offs; + int avail; + int used; + int dtype; + int jhead; + int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); + struct mutex io_mutex; + spinlock_t lock; + struct timer_list timer; + int timeout; + int need_sync; + int next_ino; + ino_t *inodes; +}; + +/** + * struct ubifs_bud - bud logical eraseblock. + * @lnum: logical eraseblock number + * @start: where the (uncommitted) bud data starts + * @jhead: journal head number this bud belongs to + * @list: link in the list buds belonging to the same journal head + * @rb: link in the tree of all buds + */ +struct ubifs_bud { + int lnum; + int start; + int jhead; + struct list_head list; + struct rb_node rb; +}; + +/** + * struct ubifs_jhead - journal head. + * @wbuf: head's write-buffer + * @buds_list: list of bud LEBs belonging to this journal head + * + * Note, the @buds list is protected by the @c->buds_lock. + */ +struct ubifs_jhead { + struct ubifs_wbuf wbuf; + struct list_head buds_list; +}; + +/** + * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. + * @key: key + * @znode: znode address in memory + * @lnum: LEB number of the indexing node + * @offs: offset of the indexing node within @lnum + * @len: target node length + */ +struct ubifs_zbranch { + union ubifs_key key; + union { + struct ubifs_znode *znode; + void *leaf; + }; + int lnum; + int offs; + int len; +}; + +/** + * struct ubifs_znode - in-memory representation of an indexing node. + * @parent: parent znode or NULL if it is the root + * @cnext: next znode to commit + * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) + * @time: last access time (seconds) + * @level: level of the entry in the TNC tree + * @child_cnt: count of child znodes + * @iip: index in parent's zbranch array + * @alt: lower bound of key range has altered i.e. child inserted at slot 0 + * @lnum: LEB number of the corresponding indexing node + * @offs: offset of the corresponding indexing node + * @len: length of the corresponding indexing node + * @zbranch: array of znode branches (@c->fanout elements) + */ +struct ubifs_znode { + struct ubifs_znode *parent; + struct ubifs_znode *cnext; + unsigned long flags; + unsigned long time; + int level; + int child_cnt; + int iip; + int alt; +#ifdef CONFIG_UBIFS_FS_DEBUG + int lnum, offs, len; +#endif + struct ubifs_zbranch zbranch[]; +}; + +/** + * struct ubifs_node_range - node length range description data structure. + * @len: fixed node length + * @min_len: minimum possible node length + * @max_len: maximum possible node length + * + * If @max_len is %0, the node has fixed length @len. + */ +struct ubifs_node_range { + union { + int len; + int min_len; + }; + int max_len; +}; + +/** + * struct ubifs_compressor - UBIFS compressor description structure. + * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) + * @cc: cryptoapi compressor handle + * @comp_mutex: mutex used during compression + * @decomp_mutex: mutex used during decompression + * @name: compressor name + * @capi_name: cryptoapi compressor name + */ +struct ubifs_compressor { + int compr_type; + struct crypto_comp *cc; + struct mutex *comp_mutex; + struct mutex *decomp_mutex; + const char *name; + const char *capi_name; +}; + +/** + * struct ubifs_budget_req - budget requirements of an operation. + * + * @fast: non-zero if the budgeting should try to aquire budget quickly and + * should not try to call write-back + * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields + * have to be re-calculated + * @new_page: non-zero if the operation adds a new page + * @dirtied_page: non-zero if the operation makes a page dirty + * @new_dent: non-zero if the operation adds a new directory entry + * @mod_dent: non-zero if the operation removes or modifies an existing + * directory entry + * @new_ino: non-zero if the operation adds a new inode + * @new_ino_d: now much data newly created inode contains + * @dirtied_ino: how many inodes the operation makes dirty + * @dirtied_ino_d: now much data dirtied inode contains + * @idx_growth: how much the index will supposedly grow + * @data_growth: how much new data the operation will supposedly add + * @dd_growth: how much data that makes other data dirty the operation will + * supposedly add + * + * @idx_growth, @data_growth and @dd_growth are not used in budget request. The + * budgeting subsystem caches index and data growth values there to avoid + * re-calculating them when the budget is released. However, if @idx_growth is + * %-1, it is calculated by the release function using other fields. + * + * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d + * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made + * dirty by the re-name operation. + */ +struct ubifs_budget_req { + unsigned int fast:1; + unsigned int recalculate:1; + unsigned int new_page:1; + unsigned int dirtied_page:1; + unsigned int new_dent:1; + unsigned int mod_dent:1; + unsigned int new_ino:1; + unsigned int new_ino_d:13; +#ifndef UBIFS_DEBUG + unsigned int dirtied_ino:4; + unsigned int dirtied_ino_d:15; +#else + /* Not bit-fields to check for overflows */ + unsigned int dirtied_ino; + unsigned int dirtied_ino_d; +#endif + int idx_growth; + int data_growth; + int dd_growth; +}; + +/** + * struct ubifs_orphan - stores the inode number of an orphan. + * @rb: rb-tree node of rb-tree of orphans sorted by inode number + * @list: list head of list of orphans in order added + * @new_list: list head of list of orphans added since the last commit + * @cnext: next orphan to commit + * @dnext: next orphan to delete + * @inum: inode number + * @new: %1 => added since the last commit, otherwise %0 + */ +struct ubifs_orphan { + struct rb_node rb; + struct list_head list; + struct list_head new_list; + struct ubifs_orphan *cnext; + struct ubifs_orphan *dnext; + ino_t inum; + int new; +}; + +/** + * struct ubifs_mount_opts - UBIFS-specific mount options information. + * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) + */ +struct ubifs_mount_opts { + unsigned int unmount_mode:2; +}; + +/** + * struct ubifs_info - UBIFS file-system description data structure + * (per-superblock). + * @vfs_sb: VFS @struct super_block object + * @bdi: backing device info object to make VFS happy and disable readahead + * + * @highest_inum: highest used inode number + * @vfs_gen: VFS inode generation counter + * @max_sqnum: current global sequence number + * @cmt_no: commit number (last successfully completed commit) + * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters + * @fmt_version: UBIFS on-flash format version + * @uuid: UUID from super block + * + * @lhead_lnum: log head logical eraseblock number + * @lhead_offs: log head offset + * @ltail_lnum: log tail logical eraseblock number (offset is always 0) + * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and + * @bud_bytes + * @min_log_bytes: minimum required number of bytes in the log + * @cmt_bud_bytes: used during commit to temporarily amount of bytes in + * committed buds + * + * @buds: tree of all buds indexed by bud LEB number + * @bud_bytes: how many bytes of flash is used by buds + * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud + * lists + * @jhead_cnt: count of journal heads + * @jheads: journal heads (head zero is base head) + * @max_bud_bytes: maximum number of bytes allowed in buds + * @bg_bud_bytes: number of bud bytes when background commit is initiated + * @old_buds: buds to be released after commit ends + * @max_bud_cnt: maximum number of buds + * + * @commit_sem: synchronizes committer with other processes + * @cmt_state: commit state + * @cs_lock: commit state lock + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * @fast_unmount: do not run journal commit before un-mounting + * @big_lpt: flag that LPT is too big to write whole during commit + * @check_lpt_free: flag that indicates LPT GC may be needed + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * + * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and + * @calc_idx_sz + * @zroot: zbranch which points to the root index node and znode + * @cnext: next znode to commit + * @enext: next znode to commit to empty space + * @gap_lebs: array of LEBs used by the in-gaps commit method + * @cbuf: commit buffer + * @ileb_buf: buffer for commit in-the-gaps method + * @ileb_len: length of data in ileb_buf + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @ilebs: pre-allocated index LEBs + * @ileb_cnt: number of pre-allocated index LEBs + * @ileb_nxt: next pre-allocated index LEBs + * @old_idx: tree of index nodes obsoleted since the last commit start + * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c + * @new_ihead_lnum: used by debugging to check ihead_lnum + * @new_ihead_offs: used by debugging to check ihead_offs + * + * @mst_node: master node + * @mst_offs: offset of valid master node + * @mst_mutex: protects the master node area, @mst_node, and @mst_offs + * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log + * @lpt_lebs: number of LEBs used for lprops table + * @lpt_first: first LEB of the lprops table area + * @lpt_last: last LEB of the lprops table area + * @orph_lebs: number of LEBs used for the orphan area + * @orph_first: first LEB of the orphan area + * @orph_last: last LEB of the orphan area + * @main_lebs: count of LEBs in the main area + * @main_first: first LEB of the main area + * @main_bytes: main area size in bytes + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * + * @key_hash_type: type of the key hash + * @key_hash: direntry key hash function + * @key_fmt: key format + * @key_len: key length + * @fanout: fanout of the index tree (number of links per indexing node) + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one + * @leb_size: logical eraseblock size in bytes + * @half_leb_size: half LEB size + * @leb_cnt: count of logical eraseblocks + * @max_leb_cnt: maximum count of logical eraseblocks + * @old_leb_cnt: count of logical eraseblocks before re-size + * @ro_media: the underlying UBI volume is read-only + * + * @dirty_pg_cnt: number of dirty pages (not used) + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * + * @budg_idx_growth: amount of bytes budgeted for index growth + * @budg_data_growth: amount of bytes budgeted for cached data + * @budg_dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, + * but which still have to be taken into account because + * the index has not been committed so far + * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, + * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; + * @min_idx_lebs: minimum number of LEBs required for the index + * @old_idx_sz: size of index on flash + * @calc_idx_sz: temporary variable which is used to calculate new index size + * (contains accurate new index size at end of TNC commit start) + * @lst: lprops statistics + * + * @page_budget: budget for a page + * @inode_budget: budget for an inode + * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash + * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary + * @max_inode_sz: maximum possible inode size in bytes + * @max_znode_sz: size of znode in bytes + * @dead_wm: LEB dead space watermark + * @dark_wm: LEB dark space watermark + * @block_cnt: count of 4KiB blocks on the FS + * + * @ranges: UBIFS node length ranges + * @ubi: UBI volume descriptor + * @di: UBI device information + * @vi: UBI volume information + * + * @orph_tree: rb-tree of orphan inode numbers + * @orph_list: list of orphan inode numbers in order added + * @orph_new: list of orphan inode numbers added since last commit + * @orph_cnext: next orphan to commit + * @orph_dnext: next orphan to delete + * @orphan_lock: lock for orph_tree and orph_new + * @orph_buf: buffer for orphan nodes + * @new_orphans: number of orphans since last commit + * @cmt_orphans: number of orphans being committed + * @tot_orphans: number of orphans in the rb_tree + * @max_orphans: maximum number of orphans allowed + * @ohead_lnum: orphan head LEB number + * @ohead_offs: orphan head offset + * @no_orphs: non-zero if there are no orphans + * + * @bgt: UBIFS background thread + * @bgt_name: background thread name + * @need_bgt: if background thread should run + * @need_wbuf_sync: if write-buffers have to be synchronized + * + * @gc_lnum: LEB number used for garbage collection + * @sbuf: a buffer of LEB size used by GC and replay for scanning + * @idx_gc: list of index LEBs that have been garbage collected + * @idx_gc_cnt: number of elements on the idx_gc list + * + * @infos_list: links all 'ubifs_info' objects + * @umount_mutex: serializes shrinker and un-mount + * @shrinker_run_no: shrinker run number + * + * @space_bits: number of bits needed to record free or dirty space + * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT + * @lpt_offs_bits: number of bits needed to record an offset in the LPT + * @lpt_spc_bits: number of bits needed to space in the LPT + * @pcnt_bits: number of bits needed to record pnode or nnode number + * @lnum_bits: number of bits needed to record LEB number + * @nnode_sz: size of on-flash nnode + * @pnode_sz: size of on-flash pnode + * @ltab_sz: size of on-flash LPT lprops table + * @lsave_sz: size of on-flash LPT save table + * @pnode_cnt: number of pnodes + * @nnode_cnt: number of nnodes + * @lpt_hght: height of the LPT + * @pnodes_have: number of pnodes in memory + * + * @lp_mutex: protects lprops table and all the other lprops-related fields + * @lpt_lnum: LEB number of the root nnode of the LPT + * @lpt_offs: offset of the root nnode of the LPT + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab + * @dirty_nn_cnt: number of dirty nnodes + * @dirty_pn_cnt: number of dirty pnodes + * @lpt_sz: LPT size + * @lpt_nod_buf: buffer for an on-flash nnode or pnode + * @lpt_buf: buffer of LEB size used by LPT + * @nroot: address in memory of the root nnode of the LPT + * @lpt_cnext: next LPT node to commit + * @lpt_heap: array of heaps of categorized lprops + * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at + * previous commit start + * @uncat_list: list of un-categorized LEBs + * @empty_list: list of empty LEBs + * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_cnt: number of freeable LEBs in @freeable_list + * + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @ltab: LPT's own lprops table + * @ltab_cmt: LPT's own lprops table (commit copy) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @lsave_lnum: LEB number of LPT's save table + * @lsave_offs: offset of LPT's save table + * @lsave: LPT's save table + * @lscan_lnum: LEB number of last LPT scan + * + * @rp_size: size of the reserved pool in bytes + * @report_rp_size: size of the reserved pool reported to user-space + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * + * @empty: if the UBI device is empty + * @replay_tree: temporary tree used during journal replay + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed + * @need_recovery: file-system needs recovery + * @replaying: set to %1 during journal replay + * @unclean_leb_list: LEBs to recover when mounting ro to rw + * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @size_tree: inode size information for recovery + * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) + * @mount_opts: UBIFS-specific mount options + * + * @dbg_buf: a buffer of LEB size used for debugging purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode + */ +struct ubifs_info { + struct super_block *vfs_sb; + struct backing_dev_info bdi; + + ino_t highest_inum; + unsigned int vfs_gen; + unsigned long long max_sqnum; + unsigned long long cmt_no; + spinlock_t cnt_lock; + int fmt_version; + unsigned char uuid[16]; + + int lhead_lnum; + int lhead_offs; + int ltail_lnum; + struct mutex log_mutex; + int min_log_bytes; + long long cmt_bud_bytes; + + struct rb_root buds; + long long bud_bytes; + spinlock_t buds_lock; + int jhead_cnt; + struct ubifs_jhead *jheads; + long long max_bud_bytes; + long long bg_bud_bytes; + struct list_head old_buds; + int max_bud_cnt; + + struct rw_semaphore commit_sem; + int cmt_state; + spinlock_t cs_lock; + wait_queue_head_t cmt_wq; + unsigned int fast_unmount:1; + unsigned int big_lpt:1; + unsigned int check_lpt_free:1; + unsigned int nospace:1; + unsigned int nospace_rp:1; + + struct mutex tnc_mutex; + struct ubifs_zbranch zroot; + struct ubifs_znode *cnext; + struct ubifs_znode *enext; + int *gap_lebs; + void *cbuf; + void *ileb_buf; + int ileb_len; + int ihead_lnum; + int ihead_offs; + int *ilebs; + int ileb_cnt; + int ileb_nxt; + struct rb_root old_idx; + int *bottom_up_buf; +#ifdef CONFIG_UBIFS_FS_DEBUG + int new_ihead_lnum; + int new_ihead_offs; +#endif + + struct ubifs_mst_node *mst_node; + int mst_offs; + struct mutex mst_mutex; + + int log_lebs; + long long log_bytes; + int log_last; + int lpt_lebs; + int lpt_first; + int lpt_last; + int orph_lebs; + int orph_first; + int orph_last; + int main_lebs; + int main_first; + long long main_bytes; + int default_compr; + + uint8_t key_hash_type; + uint32_t (*key_hash)(const char *str, int len); + int key_fmt; + int key_len; + int fanout; + + int min_io_size; + int min_io_shift; + int leb_size; + int half_leb_size; + int leb_cnt; + int max_leb_cnt; + int old_leb_cnt; + int ro_media; + + atomic_long_t dirty_pg_cnt; + atomic_long_t dirty_zn_cnt; + atomic_long_t clean_zn_cnt; + + long long budg_idx_growth; + long long budg_data_growth; + long long budg_dd_growth; + long long budg_uncommitted_idx; + spinlock_t space_lock; + int min_idx_lebs; + unsigned long long old_idx_sz; + unsigned long long calc_idx_sz; + struct ubifs_lp_stats lst; + + int page_budget; + int inode_budget; + int dent_budget; + + int ref_node_alsz; + int mst_node_alsz; + int min_idx_node_sz; + int max_idx_node_sz; + long long max_inode_sz; + int max_znode_sz; + int dead_wm; + int dark_wm; + int block_cnt; + + struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; + struct ubi_volume_desc *ubi; + struct ubi_device_info di; + struct ubi_volume_info vi; + + struct rb_root orph_tree; + struct list_head orph_list; + struct list_head orph_new; + struct ubifs_orphan *orph_cnext; + struct ubifs_orphan *orph_dnext; + spinlock_t orphan_lock; + void *orph_buf; + int new_orphans; + int cmt_orphans; + int tot_orphans; + int max_orphans; + int ohead_lnum; + int ohead_offs; + int no_orphs; + + struct task_struct *bgt; + char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; + int need_bgt; + int need_wbuf_sync; + + int gc_lnum; + void *sbuf; + struct list_head idx_gc; + int idx_gc_cnt; + + struct list_head infos_list; + struct mutex umount_mutex; + unsigned int shrinker_run_no; + + int space_bits; + int lpt_lnum_bits; + int lpt_offs_bits; + int lpt_spc_bits; + int pcnt_bits; + int lnum_bits; + int nnode_sz; + int pnode_sz; + int ltab_sz; + int lsave_sz; + int pnode_cnt; + int nnode_cnt; + int lpt_hght; + int pnodes_have; + + struct mutex lp_mutex; + int lpt_lnum; + int lpt_offs; + int nhead_lnum; + int nhead_offs; + int lpt_drty_flgs; + int dirty_nn_cnt; + int dirty_pn_cnt; + long long lpt_sz; + void *lpt_nod_buf; + void *lpt_buf; + struct ubifs_nnode *nroot; + struct ubifs_cnode *lpt_cnext; + struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT]; + struct ubifs_lpt_heap dirty_idx; + struct list_head uncat_list; + struct list_head empty_list; + struct list_head freeable_list; + struct list_head frdi_idx_list; + int freeable_cnt; + + int ltab_lnum; + int ltab_offs; + struct ubifs_lpt_lprops *ltab; + struct ubifs_lpt_lprops *ltab_cmt; + int lsave_cnt; + int lsave_lnum; + int lsave_offs; + int *lsave; + int lscan_lnum; + + long long rp_size; + long long report_rp_size; + uid_t rp_uid; + gid_t rp_gid; + + /* The below fields are used only during mounting and re-mounting */ + int empty; + struct rb_root replay_tree; + struct list_head replay_list; + struct list_head replay_buds; + unsigned long long cs_sqnum; + unsigned long long replay_sqnum; + int need_recovery; + int replaying; + struct list_head unclean_leb_list; + struct ubifs_mst_node *rcvrd_mst_node; + struct rb_root size_tree; + int remounting_rw; + struct ubifs_mount_opts mount_opts; + +#ifdef CONFIG_UBIFS_FS_DEBUG + void *dbg_buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; +#endif +}; + +extern struct list_head ubifs_infos; +extern spinlock_t ubifs_infos_lock; +extern atomic_long_t ubifs_clean_zn_cnt; +extern struct kmem_cache *ubifs_inode_slab; +extern struct super_operations ubifs_super_operations; +extern struct address_space_operations ubifs_file_address_operations; +extern struct file_operations ubifs_file_operations; +extern struct inode_operations ubifs_file_inode_operations; +extern struct file_operations ubifs_dir_operations; +extern struct inode_operations ubifs_dir_inode_operations; +extern struct inode_operations ubifs_symlink_inode_operations; +extern struct backing_dev_info ubifs_backing_dev_info; +extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/* io.c */ +int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype); +int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + int lnum, int offs); +int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + int lnum, int offs); +int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, + int offs, int dtype); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + int offs, int quiet); +void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); +int ubifs_io_init(struct ubifs_info *c); +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); +int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf); +int ubifs_bg_wbufs_sync(struct ubifs_info *c); +void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum); +int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); + +/* scan.c */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf); +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet); +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + int offs, void *sbuf); +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int lnum, int offs); +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, + void *buf, int offs); +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + void *buf); + +/* log.c */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); +void ubifs_create_buds_lists(struct ubifs_info *c); +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); +int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); +int ubifs_consolidate_log(struct ubifs_info *c); + +/* journal.c */ +int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, + const struct qstr *nm, const struct inode *inode, + int deletion, int xent); +int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + const union ubifs_key *key, const void *buf, int len); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, + int last_reference); +int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, int sync); +int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + loff_t old_size, loff_t new_size); +int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, + const struct inode *inode, const struct qstr *nm); +int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, + const struct inode *inode2); + +/* budget.c */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + struct ubifs_inode *ui); +int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, + struct ubifs_budget_req *req); +long long ubifs_budg_get_free_space(struct ubifs_info *c); +int ubifs_calc_min_idx_lebs(struct ubifs_info *c); +void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); + +/* find.c */ +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, + int squeeze); +int ubifs_find_free_leb_for_idx(struct ubifs_info *c); +int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + int min_space, int pick_free); +int ubifs_find_dirty_idx_leb(struct ubifs_info *c); +int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); + +/* tnc.c */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode **zn, int *n); +int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, + void *node); +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + void *node, const struct qstr *nm); +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, + void *node, int *lnum, int *offs); +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + int offs, int len); +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + int old_lnum, int old_offs, int lnum, int offs, int len); +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + int lnum, int offs, int len, const struct qstr *nm); +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + const struct qstr *nm); +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + union ubifs_key *to_key); +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + union ubifs_key *key, + const struct qstr *nm); +void ubifs_tnc_close(struct ubifs_info *c); +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs, int is_idx); +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs); +/* Shared by tnc.c for tnc_commit.c */ +void destroy_old_idx(struct ubifs_info *c); +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, + int lnum, int offs); +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); + +/* tnc_misc.c */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, + struct ubifs_znode *znode); +int ubifs_search_zbranch(const struct ubifs_info *c, + const struct ubifs_znode *znode, + const union ubifs_key *key, int *n); +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); +long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, + struct ubifs_zbranch *zbr, + struct ubifs_znode *parent, int iip); +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + void *node); + +/* tnc_commit.c */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int ubifs_tnc_end_commit(struct ubifs_info *c); + +/* shrinker.c */ +int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); + +/* commit.c */ +int ubifs_bg_thread(void *info); +void ubifs_commit_required(struct ubifs_info *c); +void ubifs_request_bg_commit(struct ubifs_info *c); +int ubifs_run_commit(struct ubifs_info *c); +void ubifs_recovery_commit(struct ubifs_info *c); +int ubifs_gc_should_commit(struct ubifs_info *c); +void ubifs_wait_for_commit(struct ubifs_info *c); + +/* master.c */ +int ubifs_read_master(struct ubifs_info *c); +int ubifs_write_master(struct ubifs_info *c); + +/* sb.c */ +int ubifs_read_superblock(struct ubifs_info *c); +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); + +/* replay.c */ +int ubifs_validate_entry(struct ubifs_info *c, + const struct ubifs_dent_node *dent); +int ubifs_replay_journal(struct ubifs_info *c); + +/* gc.c */ +int ubifs_garbage_collect(struct ubifs_info *c, int anyway); +int ubifs_gc_start_commit(struct ubifs_info *c); +int ubifs_gc_end_commit(struct ubifs_info *c); +void ubifs_destroy_idx_gc(struct ubifs_info *c); +int ubifs_get_idx_gc_leb(struct ubifs_info *c); +int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); + +/* orphan.c */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum); +void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); +int ubifs_orphan_start_commit(struct ubifs_info *c); +int ubifs_orphan_end_commit(struct ubifs_info *c); +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); + +/* lpt.c */ +int ubifs_calc_lpt_geom(struct ubifs_info *c); +int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + int *lpt_lebs, int *big_lpt); +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr); +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum); +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum); +int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, + ubifs_lpt_scan_callback scan_cb, void *data); + +/* Shared by lpt.c for lpt_commit.c */ +void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave); +void ubifs_pack_ltab(struct ubifs_info *c, void *buf, + struct ubifs_lpt_lprops *ltab); +void ubifs_pack_pnode(struct ubifs_info *c, void *buf, + struct ubifs_pnode *pnode); +void ubifs_pack_nnode(struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode); +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip); +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, + struct ubifs_nnode *parent, int iip); +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip); +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); +struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); + +/* lpt_commit.c */ +int ubifs_lpt_start_commit(struct ubifs_info *c); +int ubifs_lpt_end_commit(struct ubifs_info *c); +int ubifs_lpt_post_commit(struct ubifs_info *c); +void ubifs_lpt_free(struct ubifs_info *c, int wr_only); + +/* lprops.c */ +void ubifs_get_lprops(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, + const struct ubifs_lprops *lp, + int free, int dirty, int flags, + int idx_gc_cnt); +void ubifs_release_lprops(struct ubifs_info *c); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, + int cat); +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, + struct ubifs_lprops *new_lprops); +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops); +int ubifs_categorize_lprops(const struct ubifs_info *c, + const struct ubifs_lprops *lprops); +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean, int idx_gc_cnt); +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, + int flags_set, int flags_clean); +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp); +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); + +/* file.c */ +int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_setattr(struct dentry *dentry, struct iattr *attr); + +/* dir.c */ +struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + int mode); +int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* xattr.c */ +int ubifs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size); +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int ubifs_removexattr(struct dentry *dentry, const char *name); + +/* super.c */ +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); + +/* recovery.c */ +int ubifs_recover_master_node(struct ubifs_info *c); +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf, int grouped); +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf); +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_rcvry_gc_commit(struct ubifs_info *c); +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size); +int ubifs_recover_size(struct ubifs_info *c); +void ubifs_destroy_size_tree(struct ubifs_info *c); + +/* ioctl.c */ +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +void ubifs_set_inode_flags(struct inode *inode); +#ifdef CONFIG_COMPAT +long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +#endif + +/* compressor.c */ +int __init ubifs_compressors_init(void); +void __exit ubifs_compressors_exit(void); +void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, + int *compr_type); +int ubifs_decompress(const void *buf, int len, void *out, int *out_len, + int compr_type); + +#include "debug.h" +#include "misc.h" +#include "key.h" + +#endif /* !__UBIFS_H__ */ diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c new file mode 100644 index 000000000000..1388a078e1a9 --- /dev/null +++ b/fs/ubifs/xattr.c @@ -0,0 +1,581 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + * Adrian Hunter + */ + +/* + * This file implements UBIFS extended attributes support. + * + * Extended attributes are implemented as regular inodes with attached data, + * which limits extended attribute size to UBIFS block size (4KiB). Names of + * extended attributes are described by extended attribute entries (xentries), + * which are almost identical to directory entries, but have different key type. + * + * In other words, the situation with extended attributes is very similar to + * directories. Indeed, any inode (but of course not xattr inodes) may have a + * number of associated xentries, just like directory inodes have associated + * directory entries. Extended attribute entries store the name of the extended + * attribute, the host inode number, and the extended attribute inode number. + * Similarly, direntries store the name, the parent and the target inode + * numbers. Thus, most of the common UBIFS mechanisms may be re-used for + * extended attributes. + * + * The number of extended attributes is not limited, but there is Linux + * limitation on the maximum possible size of the list of all extended + * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure + * the sum of all extended attribute names of the inode does not exceed that + * limit. + * + * Extended attributes are synchronous, which means they are written to the + * flash media synchronously and there is no write-back for extended attribute + * inodes. The extended attribute values are not stored in compressed form on + * the media. + * + * Since extended attributes are represented by regular inodes, they are cached + * in the VFS inode cache. The xentries are cached in the LNC cache (see + * tnc.c). + * + * ACL support is not implemented. + */ + +#include +#include +#include "ubifs.h" + +/* + * Limit the number of extended attributes per inode so that the total size + * (xattr_size) is guaranteeded to fit in an 'unsigned int'. + */ +#define MAX_XATTRS_PER_INODE 65535 + +/* + * Extended attribute type constants. + * + * USER_XATTR: user extended attribute ("user.*") + * TRUSTED_XATTR: trusted extended attribute ("trusted.*) + * SECURITY_XATTR: security extended attribute ("security.*") + */ +enum { + USER_XATTR, + TRUSTED_XATTR, + SECURITY_XATTR, +}; + +static struct inode_operations none_inode_operations; +static struct address_space_operations none_address_operations; +static struct file_operations none_file_operations; + +/** + * create_xattr - create an extended attribute. + * @c: UBIFS file-system description object + * @host: host inode + * @nm: extended attribute name + * @value: extended attribute value + * @size: size of extended attribute value + * + * This is a helper function which creates an extended attribute of name @nm + * and value @value for inode @host. The host inode is also updated on flash + * because the ctime and extended attribute accounting data changes. This + * function returns zero in case of success and a negative error code in case + * of failure. + */ +static int create_xattr(struct ubifs_info *c, struct inode *host, + const struct qstr *nm, const void *value, int size) +{ + int err; + struct inode *inode; + struct ubifs_inode *ui, *host_ui = ubifs_inode(host); + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .new_ino_d = size, .dirtied_ino = 1, + .dirtied_ino_d = host_ui->data_len}; + + if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) + return -ENOSPC; + /* + * Linux limits the maximum size of the extended attribute names list + * to %XATTR_LIST_MAX. This means we should not allow creating more* + * extended attributes if the name list becomes larger. This limitation + * is artificial for UBIFS, though. + */ + if (host_ui->xattr_names + host_ui->xattr_cnt + + nm->len + 1 > XATTR_LIST_MAX) + return -ENOSPC; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_budg; + } + + mutex_lock(&host_ui->ui_mutex); + /* Re-define all operations to be "nothing" */ + inode->i_mapping->a_ops = &none_address_operations; + inode->i_op = &none_inode_operations; + inode->i_fop = &none_file_operations; + + inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; + ui = ubifs_inode(inode); + ui->xattr = 1; + ui->flags |= UBIFS_XATTR_FL; + ui->data = kmalloc(size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(ui->data, value, size); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_cnt += 1; + host_ui->xattr_size += CALC_DENT_SIZE(nm->len); + host_ui->xattr_size += CALC_XATTR_BYTES(size); + host_ui->xattr_names += nm->len; + + /* + * We do not use i_size_write() because nobody can race with us as we + * are holding host @host->i_mutex - every xattr operation for this + * inode is serialized by it. + */ + inode->i_size = ui->ui_size = size; + ui->data_len = size; + err = ubifs_jnl_update(c, host, nm, inode, 0, 1); + if (err) + goto out_cancel; + mutex_unlock(&host_ui->ui_mutex); + + ubifs_release_budget(c, &req); + insert_inode_hash(inode); + iput(inode); + return 0; + +out_cancel: + host_ui->xattr_cnt -= 1; + host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); + host_ui->xattr_size -= CALC_XATTR_BYTES(size); +out_unlock: + mutex_unlock(&host_ui->ui_mutex); + make_bad_inode(inode); + iput(inode); +out_budg: + ubifs_release_budget(c, &req); + return err; +} + +/** + * change_xattr - change an extended attribute. + * @c: UBIFS file-system description object + * @host: host inode + * @inode: extended attribute inode + * @value: extended attribute value + * @size: size of extended attribute value + * + * This helper function changes the value of extended attribute @inode with new + * data from @value. Returns zero in case of success and a negative error code + * in case of failure. + */ +static int change_xattr(struct ubifs_info *c, struct inode *host, + struct inode *inode, const void *value, int size) +{ + int err; + struct ubifs_inode *host_ui = ubifs_inode(host); + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_budget_req req = { .dirtied_ino = 2, + .dirtied_ino_d = size + host_ui->data_len }; + + ubifs_assert(ui->data_len == inode->i_size); + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&host_ui->ui_mutex); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_size += CALC_XATTR_BYTES(size); + + kfree(ui->data); + ui->data = kmalloc(size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + + /* + * It is important to write the host inode after the xattr inode + * because if the host inode gets synchronized (via 'fsync()'), then + * the extended attribute inode gets synchronized, because it goes + * before the host inode in the write-buffer. + */ + err = ubifs_jnl_change_xattr(c, inode, host); + if (err) + goto out_cancel; + mutex_unlock(&host_ui->ui_mutex); + + ubifs_release_budget(c, &req); + return 0; + +out_cancel: + host_ui->xattr_size -= CALC_XATTR_BYTES(size); + host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); + make_bad_inode(inode); +out_unlock: + mutex_unlock(&host_ui->ui_mutex); + ubifs_release_budget(c, &req); + return err; +} + +/** + * check_namespace - check extended attribute name-space. + * @nm: extended attribute name + * + * This function makes sure the extended attribute name belongs to one of the + * supported extended attribute name-spaces. Returns name-space index in case + * of success and a negative error code in case of failure. + */ +static int check_namespace(const struct qstr *nm) +{ + int type; + + if (nm->len > UBIFS_MAX_NLEN) + return -ENAMETOOLONG; + + if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, + XATTR_TRUSTED_PREFIX_LEN)) { + if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') + return -EINVAL; + type = TRUSTED_XATTR; + } else if (!strncmp(nm->name, XATTR_USER_PREFIX, + XATTR_USER_PREFIX_LEN)) { + if (nm->name[XATTR_USER_PREFIX_LEN] == '\0') + return -EINVAL; + type = USER_XATTR; + } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN)) { + if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') + return -EINVAL; + type = SECURITY_XATTR; + } else + return -EOPNOTSUPP; + + return type; +} + +static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) +{ + struct inode *inode; + + inode = ubifs_iget(c->vfs_sb, inum); + if (IS_ERR(inode)) { + ubifs_err("dead extended attribute entry, error %d", + (int)PTR_ERR(inode)); + return inode; + } + if (ubifs_inode(inode)->xattr) + return inode; + ubifs_err("corrupt extended attribute entry"); + iput(inode); + return ERR_PTR(-EINVAL); +} + +int ubifs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode, *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct qstr nm = { .name = name, .len = strlen(name) }; + struct ubifs_dent_node *xent; + union ubifs_key key; + int err, type; + + dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + + if (size > UBIFS_MAX_INO_DATA) + return -ERANGE; + + type = check_namespace(&nm); + if (type < 0) + return type; + + xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); + if (!xent) + return -ENOMEM; + + /* + * The extended attribute entries are stored in LNC, so multiple + * look-ups do not involve reading the flash. + */ + xent_key_init(c, &key, host->i_ino, &nm); + err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); + if (err) { + if (err != -ENOENT) + goto out_free; + + if (flags & XATTR_REPLACE) + /* We are asked not to create the xattr */ + err = -ENODATA; + else + err = create_xattr(c, host, &nm, value, size); + goto out_free; + } + + if (flags & XATTR_CREATE) { + /* We are asked not to replace the xattr */ + err = -EEXIST; + goto out_free; + } + + inode = iget_xattr(c, le64_to_cpu(xent->inum)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + err = change_xattr(c, host, inode, value, size); + iput(inode); + +out_free: + kfree(xent); + return err; +} + +ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size) +{ + struct inode *inode, *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct qstr nm = { .name = name, .len = strlen(name) }; + struct ubifs_inode *ui; + struct ubifs_dent_node *xent; + union ubifs_key key; + int err; + + dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + + err = check_namespace(&nm); + if (err < 0) + return err; + + xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); + if (!xent) + return -ENOMEM; + + mutex_lock(&host->i_mutex); + xent_key_init(c, &key, host->i_ino, &nm); + err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); + if (err) { + if (err == -ENOENT) + err = -ENODATA; + goto out_unlock; + } + + inode = iget_xattr(c, le64_to_cpu(xent->inum)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_unlock; + } + + ui = ubifs_inode(inode); + ubifs_assert(inode->i_size == ui->data_len); + ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); + + if (buf) { + /* If @buf is %NULL we are supposed to return the length */ + if (ui->data_len > size) { + dbg_err("buffer size %zd, xattr len %d", + size, ui->data_len); + err = -ERANGE; + goto out_iput; + } + + memcpy(buf, ui->data, ui->data_len); + } + err = ui->data_len; + +out_iput: + iput(inode); +out_unlock: + mutex_unlock(&host->i_mutex); + kfree(xent); + return err; +} + +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + union ubifs_key key; + struct inode *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct ubifs_inode *host_ui = ubifs_inode(host); + struct ubifs_dent_node *xent, *pxent = NULL; + int err, len, written = 0; + struct qstr nm = { .name = NULL }; + + dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, + dentry->d_name.len, dentry->d_name.name, size); + + len = host_ui->xattr_names + host_ui->xattr_cnt; + if (!buffer) + /* + * We should return the minimum buffer size which will fit a + * null-terminated list of all the extended attribute names. + */ + return len; + + if (len > size) + return -ERANGE; + + lowest_xent_key(c, &key, host->i_ino); + + mutex_lock(&host->i_mutex); + while (1) { + int type; + + xent = ubifs_tnc_next_ent(c, &key, &nm); + if (unlikely(IS_ERR(xent))) { + err = PTR_ERR(xent); + break; + } + + nm.name = xent->name; + nm.len = le16_to_cpu(xent->nlen); + + type = check_namespace(&nm); + if (unlikely(type < 0)) { + err = type; + break; + } + + /* Show trusted namespace only for "power" users */ + if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) { + memcpy(buffer + written, nm.name, nm.len + 1); + written += nm.len + 1; + } + + kfree(pxent); + pxent = xent; + key_read(c, &xent->key, &key); + } + mutex_unlock(&host->i_mutex); + + kfree(pxent); + if (err != -ENOENT) { + ubifs_err("cannot find next direntry, error %d", err); + return err; + } + + ubifs_assert(written <= size); + return written; +} + +static int remove_xattr(struct ubifs_info *c, struct inode *host, + struct inode *inode, const struct qstr *nm) +{ + int err; + struct ubifs_inode *host_ui = ubifs_inode(host); + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, + .dirtied_ino_d = host_ui->data_len }; + + ubifs_assert(ui->data_len == inode->i_size); + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&host_ui->ui_mutex); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_cnt -= 1; + host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); + host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_names -= nm->len; + + err = ubifs_jnl_delete_xattr(c, host, inode, nm); + if (err) + goto out_cancel; + mutex_unlock(&host_ui->ui_mutex); + + ubifs_release_budget(c, &req); + return 0; + +out_cancel: + host_ui->xattr_cnt += 1; + host_ui->xattr_size += CALC_DENT_SIZE(nm->len); + host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); + mutex_unlock(&host_ui->ui_mutex); + ubifs_release_budget(c, &req); + make_bad_inode(inode); + return err; +} + +int ubifs_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode, *host = dentry->d_inode; + struct ubifs_info *c = host->i_sb->s_fs_info; + struct qstr nm = { .name = name, .len = strlen(name) }; + struct ubifs_dent_node *xent; + union ubifs_key key; + int err; + + dbg_gen("xattr '%s', ino %lu ('%.*s')", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name); + ubifs_assert(mutex_is_locked(&host->i_mutex)); + + err = check_namespace(&nm); + if (err < 0) + return err; + + xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); + if (!xent) + return -ENOMEM; + + xent_key_init(c, &key, host->i_ino, &nm); + err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); + if (err) { + if (err == -ENOENT) + err = -ENODATA; + goto out_free; + } + + inode = iget_xattr(c, le64_to_cpu(xent->inum)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + ubifs_assert(inode->i_nlink == 1); + inode->i_nlink = 0; + err = remove_xattr(c, host, inode, &nm); + if (err) + inode->i_nlink = 1; + + /* If @i_nlink is 0, 'iput()' will delete the inode */ + iput(inode); + +out_free: + kfree(xent); + return err; +} diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 28fe8bae1037..4eb75a88795a 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -63,7 +63,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20080321 +#define ACPI_CA_VERSION 0x20080609 /* * OS name, used for the _OS object. The _OS object is essentially obsolete, diff --git a/include/acpi/acdisasm.h b/include/acpi/acdisasm.h index 788f88782012..f53faca8ec80 100644 --- a/include/acpi/acdisasm.h +++ b/include/acpi/acdisasm.h @@ -162,6 +162,7 @@ extern struct acpi_dmtable_info acpi_dm_table_info_dmar_hdr[]; extern struct acpi_dmtable_info acpi_dm_table_info_dmar_scope[]; extern struct acpi_dmtable_info acpi_dm_table_info_dmar0[]; extern struct acpi_dmtable_info acpi_dm_table_info_dmar1[]; +extern struct acpi_dmtable_info acpi_dm_table_info_dmar2[]; extern struct acpi_dmtable_info acpi_dm_table_info_ecdt[]; extern struct acpi_dmtable_info acpi_dm_table_info_einj[]; extern struct acpi_dmtable_info acpi_dm_table_info_einj0[]; diff --git a/include/acpi/acdispat.h b/include/acpi/acdispat.h index 910f018d92c7..21a73a105d0a 100644 --- a/include/acpi/acdispat.h +++ b/include/acpi/acdispat.h @@ -221,7 +221,7 @@ acpi_ds_method_error(acpi_status status, struct acpi_walk_state *walk_state); * dsinit */ acpi_status -acpi_ds_initialize_objects(acpi_native_uint table_index, +acpi_ds_initialize_objects(u32 table_index, struct acpi_namespace_node *start_node); /* diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index 1f591171bf31..e5a890ffeb02 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -108,8 +108,9 @@ #define AE_BAD_HEX_CONSTANT (acpi_status) (0x0007 | AE_CODE_PROGRAMMER) #define AE_BAD_OCTAL_CONSTANT (acpi_status) (0x0008 | AE_CODE_PROGRAMMER) #define AE_BAD_DECIMAL_CONSTANT (acpi_status) (0x0009 | AE_CODE_PROGRAMMER) +#define AE_MISSING_ARGUMENTS (acpi_status) (0x000A | AE_CODE_PROGRAMMER) -#define AE_CODE_PGM_MAX 0x0009 +#define AE_CODE_PGM_MAX 0x000A /* * Acpi table exceptions @@ -225,6 +226,7 @@ char const *acpi_gbl_exception_names_env[] = { }; char const *acpi_gbl_exception_names_pgm[] = { + NULL, "AE_BAD_PARAMETER", "AE_BAD_CHARACTER", "AE_BAD_PATHNAME", @@ -233,10 +235,12 @@ char const *acpi_gbl_exception_names_pgm[] = { "AE_ALIGNMENT", "AE_BAD_HEX_CONSTANT", "AE_BAD_OCTAL_CONSTANT", - "AE_BAD_DECIMAL_CONSTANT" + "AE_BAD_DECIMAL_CONSTANT", + "AE_MISSING_ARGUMENTS" }; char const *acpi_gbl_exception_names_tbl[] = { + NULL, "AE_BAD_SIGNATURE", "AE_BAD_HEADER", "AE_BAD_CHECKSUM", @@ -246,6 +250,7 @@ char const *acpi_gbl_exception_names_tbl[] = { }; char const *acpi_gbl_exception_names_aml[] = { + NULL, "AE_AML_ERROR", "AE_AML_PARSE", "AE_AML_BAD_OPCODE", @@ -283,6 +288,7 @@ char const *acpi_gbl_exception_names_aml[] = { }; char const *acpi_gbl_exception_names_ctrl[] = { + NULL, "AE_CTRL_RETURN_VALUE", "AE_CTRL_PENDING", "AE_CTRL_TERMINATE", diff --git a/include/acpi/acglobal.h b/include/acpi/acglobal.h index 74ad971241db..15dda46b70d1 100644 --- a/include/acpi/acglobal.h +++ b/include/acpi/acglobal.h @@ -140,7 +140,7 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags; */ ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list; ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT; -extern acpi_native_uint acpi_gbl_permanent_mmap; +extern u8 acpi_gbl_permanent_mmap; /* These addresses are calculated from FADT address values */ diff --git a/include/acpi/achware.h b/include/acpi/achware.h index d4fb9bbc903c..97a72b193276 100644 --- a/include/acpi/achware.h +++ b/include/acpi/achware.h @@ -87,6 +87,8 @@ acpi_status acpi_hw_clear_acpi_status(void); /* * hwgpe - GPE support */ +acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info); + acpi_status acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info); @@ -100,11 +102,9 @@ acpi_status acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block); -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, acpi_event_status * event_status); -#endif /* ACPI_FUTURE_USAGE */ acpi_status acpi_hw_disable_all_gpes(void); diff --git a/include/acpi/acinterp.h b/include/acpi/acinterp.h index e249ce5d3300..e8db7a3143a5 100644 --- a/include/acpi/acinterp.h +++ b/include/acpi/acinterp.h @@ -366,10 +366,7 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth); void acpi_ex_dump_operands(union acpi_operand_object **operands, - acpi_interpreter_mode interpreter_mode, - char *ident, - u32 num_levels, - char *note, char *module_name, u32 line_number); + const char *opcode_name, u32 num_opcodes); #ifdef ACPI_FUTURE_USAGE void diff --git a/include/acpi/aclocal.h b/include/acpi/aclocal.h index c5cdc32ac2f8..b221c8583ddd 100644 --- a/include/acpi/aclocal.h +++ b/include/acpi/aclocal.h @@ -98,8 +98,8 @@ union acpi_parse_object; static char *acpi_gbl_mutex_names[ACPI_NUM_MUTEX] = { "ACPI_MTX_Interpreter", - "ACPI_MTX_Tables", "ACPI_MTX_Namespace", + "ACPI_MTX_Tables", "ACPI_MTX_Events", "ACPI_MTX_Caches", "ACPI_MTX_Memory", @@ -282,8 +282,8 @@ struct acpi_predefined_names { /* Info structure used to convert external<->internal namestrings */ struct acpi_namestring_info { - char *external_name; - char *next_external_char; + const char *external_name; + const char *next_external_char; char *internal_name; u32 length; u32 num_segments; diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h index fb41a3b802fc..57ab9e9d7593 100644 --- a/include/acpi/acmacros.h +++ b/include/acpi/acmacros.h @@ -80,12 +80,12 @@ */ #define ACPI_CAST_PTR(t, p) ((t *) (acpi_uintptr_t) (p)) #define ACPI_CAST_INDIRECT_PTR(t, p) ((t **) (acpi_uintptr_t) (p)) -#define ACPI_ADD_PTR(t,a,b) ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8,(a)) + (acpi_native_uint)(b))) -#define ACPI_PTR_DIFF(a,b) (acpi_native_uint) (ACPI_CAST_PTR (u8,(a)) - ACPI_CAST_PTR (u8,(b))) +#define ACPI_ADD_PTR(t, a, b) ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8,(a)) + (acpi_size)(b))) +#define ACPI_PTR_DIFF(a, b) (acpi_size) (ACPI_CAST_PTR (u8,(a)) - ACPI_CAST_PTR (u8,(b))) /* Pointer/Integer type conversions */ -#define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void,(void *) NULL,(acpi_native_uint) i) +#define ACPI_TO_POINTER(i) ACPI_ADD_PTR (void, (void *) NULL, (acpi_size) i) #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p,(void *) NULL) #define ACPI_OFFSET(d,f) (acpi_size) ACPI_PTR_DIFF (&(((d *)0)->f),(void *) NULL) #define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i) @@ -296,22 +296,22 @@ struct acpi_integer_overlay { /* * Rounding macros (Power of two boundaries only) */ -#define ACPI_ROUND_DOWN(value,boundary) (((acpi_native_uint)(value)) & \ - (~(((acpi_native_uint) boundary)-1))) +#define ACPI_ROUND_DOWN(value, boundary) (((acpi_size)(value)) & \ + (~(((acpi_size) boundary)-1))) -#define ACPI_ROUND_UP(value,boundary) ((((acpi_native_uint)(value)) + \ - (((acpi_native_uint) boundary)-1)) & \ - (~(((acpi_native_uint) boundary)-1))) +#define ACPI_ROUND_UP(value, boundary) ((((acpi_size)(value)) + \ + (((acpi_size) boundary)-1)) & \ + (~(((acpi_size) boundary)-1))) -/* Note: sizeof(acpi_native_uint) evaluates to either 2, 4, or 8 */ +/* Note: sizeof(acpi_size) evaluates to either 4 or 8 (32- vs 64-bit mode) */ #define ACPI_ROUND_DOWN_TO_32BIT(a) ACPI_ROUND_DOWN(a,4) #define ACPI_ROUND_DOWN_TO_64BIT(a) ACPI_ROUND_DOWN(a,8) -#define ACPI_ROUND_DOWN_TO_NATIVE_WORD(a) ACPI_ROUND_DOWN(a,sizeof(acpi_native_uint)) +#define ACPI_ROUND_DOWN_TO_NATIVE_WORD(a) ACPI_ROUND_DOWN(a,sizeof(acpi_size)) #define ACPI_ROUND_UP_TO_32BIT(a) ACPI_ROUND_UP(a,4) #define ACPI_ROUND_UP_TO_64BIT(a) ACPI_ROUND_UP(a,8) -#define ACPI_ROUND_UP_TO_NATIVE_WORD(a) ACPI_ROUND_UP(a,sizeof(acpi_native_uint)) +#define ACPI_ROUND_UP_TO_NATIVE_WORD(a) ACPI_ROUND_UP(a,sizeof(acpi_size)) #define ACPI_ROUND_BITS_UP_TO_BYTES(a) ACPI_DIV_8((a) + 7) #define ACPI_ROUND_BITS_DOWN_TO_BYTES(a) ACPI_DIV_8((a)) @@ -322,7 +322,7 @@ struct acpi_integer_overlay { #define ACPI_ROUND_UP_TO(value,boundary) (((value) + ((boundary)-1)) / (boundary)) -#define ACPI_IS_MISALIGNED(value) (((acpi_native_uint)value) & (sizeof(acpi_native_uint)-1)) +#define ACPI_IS_MISALIGNED(value) (((acpi_size)value) & (sizeof(acpi_size)-1)) /* * Bitmask creation @@ -414,7 +414,7 @@ struct acpi_integer_overlay { * error messages. The __FILE__ macro is not very useful for this, because it * often includes the entire pathname to the module */ -#define ACPI_MODULE_NAME(name) static char ACPI_UNUSED_VAR *_acpi_module_name = name; +#define ACPI_MODULE_NAME(name) static const char ACPI_UNUSED_VAR _acpi_module_name[] = name; #else #define ACPI_MODULE_NAME(name) #endif @@ -467,19 +467,17 @@ struct acpi_integer_overlay { /* * If ACPI_GET_FUNCTION_NAME was not defined in the compiler-dependent header, * define it now. This is the case where there the compiler does not support - * a __FUNCTION__ macro or equivalent. We save the function name on the - * local stack. + * a __FUNCTION__ macro or equivalent. */ #ifndef ACPI_GET_FUNCTION_NAME #define ACPI_GET_FUNCTION_NAME _acpi_function_name /* * The Name parameter should be the procedure name as a quoted string. - * This is declared as a local string ("MyFunctionName") so that it can - * be also used by the function exit macros below. + * The function name is also used by the function exit macros below. * Note: (const char) is used to be compatible with the debug interfaces * and macros such as __FUNCTION__. */ -#define ACPI_FUNCTION_NAME(name) const char *_acpi_function_name = #name; +#define ACPI_FUNCTION_NAME(name) static const char _acpi_function_name[] = #name; #else /* Compiler supports __FUNCTION__ (or equivalent) -- Ignore this macro */ @@ -599,7 +597,7 @@ struct acpi_integer_overlay { /* Stack and buffer dumping */ #define ACPI_DUMP_STACK_ENTRY(a) acpi_ex_dump_operand((a),0) -#define ACPI_DUMP_OPERANDS(a,b,c,d,e) acpi_ex_dump_operands(a,b,c,d,e,_acpi_module_name,__LINE__) +#define ACPI_DUMP_OPERANDS(a,b,c) acpi_ex_dump_operands(a,b,c) #define ACPI_DUMP_ENTRY(a,b) acpi_ns_dump_entry (a,b) #define ACPI_DUMP_PATHNAME(a,b,c,d) acpi_ns_dump_pathname(a,b,c,d) @@ -635,7 +633,7 @@ struct acpi_integer_overlay { #define ACPI_FUNCTION_VALUE_EXIT(s) do { } while(0) #define ACPI_FUNCTION_ENTRY() do { } while(0) #define ACPI_DUMP_STACK_ENTRY(a) do { } while(0) -#define ACPI_DUMP_OPERANDS(a,b,c,d,e) do { } while(0) +#define ACPI_DUMP_OPERANDS(a,b,c) do { } while(0) #define ACPI_DUMP_ENTRY(a,b) do { } while(0) #define ACPI_DUMP_TABLES(a,b) do { } while(0) #define ACPI_DUMP_PATHNAME(a,b,c,d) do { } while(0) diff --git a/include/acpi/acnamesp.h b/include/acpi/acnamesp.h index 713b30903fe5..9ed70a050580 100644 --- a/include/acpi/acnamesp.h +++ b/include/acpi/acnamesp.h @@ -86,8 +86,7 @@ acpi_status acpi_ns_initialize_devices(void); acpi_status acpi_ns_load_namespace(void); acpi_status -acpi_ns_load_table(acpi_native_uint table_index, - struct acpi_namespace_node *node); +acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node); /* * nswalk - walk the namespace @@ -108,12 +107,11 @@ struct acpi_namespace_node *acpi_ns_get_next_node(acpi_object_type type, struct * nsparse - table parsing */ acpi_status -acpi_ns_parse_table(acpi_native_uint table_index, - struct acpi_namespace_node *start_node); +acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node); acpi_status -acpi_ns_one_complete_parse(acpi_native_uint pass_number, - acpi_native_uint table_index, +acpi_ns_one_complete_parse(u32 pass_number, + u32 table_index, struct acpi_namespace_node *start_node); /* @@ -201,7 +199,7 @@ acpi_ns_pattern_match(struct acpi_namespace_node *obj_node, char *search_for); acpi_status acpi_ns_get_node(struct acpi_namespace_node *prefix_node, - char *external_pathname, + const char *external_pathname, u32 flags, struct acpi_namespace_node **out_node); acpi_size acpi_ns_get_pathname_length(struct acpi_namespace_node *node); @@ -265,28 +263,30 @@ acpi_object_type acpi_ns_get_type(struct acpi_namespace_node *node); u32 acpi_ns_local(acpi_object_type type); void -acpi_ns_report_error(char *module_name, +acpi_ns_report_error(const char *module_name, u32 line_number, - char *internal_name, acpi_status lookup_status); + const char *internal_name, acpi_status lookup_status); void -acpi_ns_report_method_error(char *module_name, +acpi_ns_report_method_error(const char *module_name, u32 line_number, - char *message, + const char *message, struct acpi_namespace_node *node, - char *path, acpi_status lookup_status); + const char *path, acpi_status lookup_status); -void acpi_ns_print_node_pathname(struct acpi_namespace_node *node, char *msg); +void +acpi_ns_print_node_pathname(struct acpi_namespace_node *node, const char *msg); acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info); void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info); -acpi_status acpi_ns_internalize_name(char *dotted_name, char **converted_name); +acpi_status +acpi_ns_internalize_name(const char *dotted_name, char **converted_name); acpi_status acpi_ns_externalize_name(u32 internal_name_length, - char *internal_name, + const char *internal_name, u32 * converted_name_length, char **converted_name); struct acpi_namespace_node *acpi_ns_map_handle_to_node(acpi_handle handle); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 2f1c68c7a727..a5ac0bc7f52e 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -259,6 +259,7 @@ struct acpi_device_perf { /* Wakeup Management */ struct acpi_device_wakeup_flags { u8 valid:1; /* Can successfully enable wakeup? */ + u8 prepared:1; /* Has the wake-up capability been enabled? */ u8 run_wake:1; /* Run-Wake GPE devices */ }; @@ -335,6 +336,8 @@ void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context); int acpi_bus_get_status(struct acpi_device *device); int acpi_bus_get_power(acpi_handle handle, int *state); int acpi_bus_set_power(acpi_handle handle, int state); +bool acpi_bus_power_manageable(acpi_handle handle); +bool acpi_bus_can_wakeup(acpi_handle handle); #ifdef CONFIG_ACPI_PROC_EVENT int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data); int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data); @@ -376,14 +379,19 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle)) #ifdef CONFIG_PM_SLEEP -int acpi_pm_device_sleep_state(struct device *, int, int *); +int acpi_pm_device_sleep_state(struct device *, int *); +int acpi_pm_device_sleep_wake(struct device *, bool); #else /* !CONFIG_PM_SLEEP */ -static inline int acpi_pm_device_sleep_state(struct device *d, int w, int *p) +static inline int acpi_pm_device_sleep_state(struct device *d, int *p) { if (p) *p = ACPI_STATE_D0; return ACPI_STATE_D3; } +static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +{ + return -ENODEV; +} #endif /* !CONFIG_PM_SLEEP */ #endif /* CONFIG_ACPI */ diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 9757a040a505..e5f38e5ce86f 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -87,7 +87,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain, -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI_POWER -int acpi_enable_wakeup_device_power(struct acpi_device *dev); +int acpi_device_sleep_wake(struct acpi_device *dev, + int enable, int sleep_state, int dev_state); +int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); int acpi_power_get_inferred_state(struct acpi_device *device); int acpi_power_transition(struct acpi_device *device, int state); diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index d4a560d2deb6..3f93a6b4e17f 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -144,7 +144,7 @@ void acpi_os_release_mutex(acpi_mutex handle); void *acpi_os_allocate(acpi_size size); void __iomem *acpi_os_map_memory(acpi_physical_address where, - acpi_native_uint length); + acpi_size length); void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 2c3806e6546f..94d94e126e9f 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -98,7 +98,7 @@ void acpi_free(void *address); */ acpi_status acpi_reallocate_root_table(void); -acpi_status acpi_find_root_pointer(acpi_native_uint * rsdp_address); +acpi_status acpi_find_root_pointer(acpi_size *rsdp_address); acpi_status acpi_load_tables(void); @@ -108,15 +108,15 @@ acpi_status acpi_unload_table_id(acpi_owner_id id); acpi_status acpi_get_table_header(acpi_string signature, - acpi_native_uint instance, + u32 instance, struct acpi_table_header *out_table_header); acpi_status acpi_get_table(acpi_string signature, - acpi_native_uint instance, struct acpi_table_header **out_table); + u32 instance, struct acpi_table_header **out_table); acpi_status -acpi_get_table_by_index(acpi_native_uint table_index, +acpi_get_table_by_index(u32 table_index, struct acpi_table_header **out_table); acpi_status @@ -248,9 +248,7 @@ acpi_status acpi_disable_event(u32 event, u32 flags); acpi_status acpi_clear_event(u32 event); -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status); -#endif /* ACPI_FUTURE_USAGE */ acpi_status acpi_set_gpe_type(acpi_handle gpe_device, u32 gpe_number, u8 type); @@ -260,12 +258,10 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags); acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags); -#ifdef ACPI_FUTURE_USAGE acpi_status acpi_get_gpe_status(acpi_handle gpe_device, u32 gpe_number, u32 flags, acpi_event_status * event_status); -#endif /* ACPI_FUTURE_USAGE */ acpi_status acpi_install_gpe_block(acpi_handle gpe_device, diff --git a/include/acpi/acstruct.h b/include/acpi/acstruct.h index a907c67d651e..7980a26bad35 100644 --- a/include/acpi/acstruct.h +++ b/include/acpi/acstruct.h @@ -108,7 +108,6 @@ struct acpi_walk_state { union acpi_operand_object **caller_return_desc; union acpi_generic_state *control_state; /* List of control states (nested IFs) */ struct acpi_namespace_node *deferred_node; /* Used when executing deferred opcodes */ - struct acpi_gpe_event_info *gpe_event_info; /* Info for GPE (_Lxx/_Exx methods only */ union acpi_operand_object *implicit_return_obj; struct acpi_namespace_node *method_call_node; /* Called method Node */ union acpi_parse_object *method_call_op; /* method_call Op if running a method */ @@ -143,7 +142,7 @@ struct acpi_init_walk_info { u16 package_init; u16 object_count; acpi_owner_id owner_id; - acpi_native_uint table_index; + u32 table_index; }; struct acpi_get_devices_info { @@ -189,17 +188,12 @@ struct acpi_evaluate_info { union acpi_operand_object **parameters; struct acpi_namespace_node *resolved_node; union acpi_operand_object *return_object; + u8 param_count; u8 pass_number; - u8 parameter_type; u8 return_object_type; u8 flags; }; -/* Types for parameter_type above */ - -#define ACPI_PARAM_ARGS 0 -#define ACPI_PARAM_GPE 1 - /* Values for Flags above */ #define ACPI_IGNORE_RETURN_VALUE 1 diff --git a/include/acpi/actables.h b/include/acpi/actables.h index 4b36a55b0b3b..0cbe1b9ab522 100644 --- a/include/acpi/actables.h +++ b/include/acpi/actables.h @@ -49,7 +49,7 @@ acpi_status acpi_allocate_root_table(u32 initial_table_count); /* * tbfadt - FADT parse/convert/validate */ -void acpi_tb_parse_fadt(acpi_native_uint table_index, u8 flags); +void acpi_tb_parse_fadt(u32 table_index, u8 flags); void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length); @@ -58,8 +58,7 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length); */ acpi_status acpi_tb_find_table(char *signature, - char *oem_id, - char *oem_table_id, acpi_native_uint * table_index); + char *oem_id, char *oem_table_id, u32 *table_index); /* * tbinstal - Table removal and deletion @@ -69,30 +68,28 @@ acpi_status acpi_tb_resize_root_table_list(void); acpi_status acpi_tb_verify_table(struct acpi_table_desc *table_desc); acpi_status -acpi_tb_add_table(struct acpi_table_desc *table_desc, - acpi_native_uint * table_index); +acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index); acpi_status acpi_tb_store_table(acpi_physical_address address, struct acpi_table_header *table, - u32 length, u8 flags, acpi_native_uint * table_index); + u32 length, u8 flags, u32 *table_index); void acpi_tb_delete_table(struct acpi_table_desc *table_desc); void acpi_tb_terminate(void); -void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index); +void acpi_tb_delete_namespace_by_owner(u32 table_index); -acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index); +acpi_status acpi_tb_allocate_owner_id(u32 table_index); -acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index); +acpi_status acpi_tb_release_owner_id(u32 table_index); -acpi_status -acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id); +acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id); -u8 acpi_tb_is_table_loaded(acpi_native_uint table_index); +u8 acpi_tb_is_table_loaded(u32 table_index); -void acpi_tb_set_table_loaded_flag(acpi_native_uint table_index, u8 is_loaded); +void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded); /* * tbutils - table manager utilities @@ -103,14 +100,14 @@ void acpi_tb_print_table_header(acpi_physical_address address, struct acpi_table_header *header); -u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length); +u8 acpi_tb_checksum(u8 *buffer, u32 length); acpi_status acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length); void acpi_tb_install_table(acpi_physical_address address, - u8 flags, char *signature, acpi_native_uint table_index); + u8 flags, char *signature, u32 table_index); acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 9af239bd1153..d38f9be2f6ee 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -300,6 +300,7 @@ struct acpi_table_dbgp { /******************************************************************************* * * DMAR - DMA Remapping table + * From "Intel Virtualization Technology for Directed I/O", Sept. 2007 * ******************************************************************************/ @@ -310,6 +311,10 @@ struct acpi_table_dmar { u8 reserved[10]; }; +/* Flags */ + +#define ACPI_DMAR_INTR_REMAP (1) + /* DMAR subtable header */ struct acpi_dmar_header { @@ -382,6 +387,20 @@ struct acpi_dmar_reserved_memory { #define ACPI_DMAR_ALLOW_ALL (1) + +/* 2: Root Port ATS Capability Reporting Structure */ + +struct acpi_dmar_atsr { + struct acpi_dmar_header header; + u8 flags; + u8 reserved; + u16 segment; +}; + +/* Flags */ + +#define ACPI_DMAR_ALL_PORTS (1) + /******************************************************************************* * * ECDT - Embedded Controller Boot Resources Table @@ -1156,9 +1175,9 @@ struct acpi_srat_mem_affinity { u16 reserved; /* Reserved, must be zero */ u64 base_address; u64 length; - u32 memory_type; /* See acpi_address_range_id */ + u32 reserved1; u32 flags; - u64 reserved1; /* Reserved, must be zero */ + u64 reserved2; /* Reserved, must be zero */ }; /* Flags */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index dfea2d440488..4ea4f40bf894 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -110,10 +110,10 @@ * usually used for memory allocation, efficient loop counters, and array * indexes. The types are similar to the size_t type in the C library and are * required because there is no C type that consistently represents the native - * data width. + * data width. ACPI_SIZE is needed because there is no guarantee that a + * kernel-level C library is present. * * ACPI_SIZE 16/32/64-bit unsigned value - * ACPI_NATIVE_UINT 16/32/64-bit unsigned value * ACPI_NATIVE_INT 16/32/64-bit signed value * */ @@ -147,9 +147,9 @@ typedef int INT32; /*! [End] no source code translation !*/ -typedef u64 acpi_native_uint; typedef s64 acpi_native_int; +typedef u64 acpi_size; typedef u64 acpi_io_address; typedef u64 acpi_physical_address; @@ -186,9 +186,9 @@ typedef int INT32; /*! [End] no source code translation !*/ -typedef u32 acpi_native_uint; typedef s32 acpi_native_int; +typedef u32 acpi_size; typedef u32 acpi_io_address; typedef u32 acpi_physical_address; @@ -202,10 +202,6 @@ typedef u32 acpi_physical_address; #error unknown ACPI_MACHINE_WIDTH #endif -/* Variable-width type, used instead of clib size_t */ - -typedef acpi_native_uint acpi_size; - /******************************************************************************* * * OS-dependent and compiler-dependent types @@ -219,7 +215,7 @@ typedef acpi_native_uint acpi_size; /* Value returned by acpi_os_get_thread_id */ #ifndef acpi_thread_id -#define acpi_thread_id acpi_native_uint +#define acpi_thread_id acpi_size #endif /* Object returned from acpi_os_create_lock */ @@ -231,7 +227,7 @@ typedef acpi_native_uint acpi_size; /* Flags for acpi_os_acquire_lock/acpi_os_release_lock */ #ifndef acpi_cpu_flags -#define acpi_cpu_flags acpi_native_uint +#define acpi_cpu_flags acpi_size #endif /* Object returned from acpi_os_create_cache */ diff --git a/include/acpi/acutils.h b/include/acpi/acutils.h index b42cadf07302..69f8888771ff 100644 --- a/include/acpi/acutils.h +++ b/include/acpi/acutils.h @@ -172,7 +172,7 @@ char *acpi_ut_strstr(char *string1, char *string2); void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count); -void *acpi_ut_memset(void *dest, acpi_native_uint value, acpi_size count); +void *acpi_ut_memset(void *dest, u8 value, acpi_size count); int acpi_ut_to_upper(int c); @@ -245,41 +245,45 @@ void acpi_ut_track_stack_ptr(void); void acpi_ut_trace(u32 line_number, - const char *function_name, char *module_name, u32 component_id); + const char *function_name, + const char *module_name, u32 component_id); void acpi_ut_trace_ptr(u32 line_number, const char *function_name, - char *module_name, u32 component_id, void *pointer); + const char *module_name, u32 component_id, void *pointer); void acpi_ut_trace_u32(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u32 integer); + const char *module_name, u32 component_id, u32 integer); void acpi_ut_trace_str(u32 line_number, const char *function_name, - char *module_name, u32 component_id, char *string); + const char *module_name, u32 component_id, char *string); void acpi_ut_exit(u32 line_number, - const char *function_name, char *module_name, u32 component_id); + const char *function_name, + const char *module_name, u32 component_id); void acpi_ut_status_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_status status); + const char *module_name, + u32 component_id, acpi_status status); void acpi_ut_value_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, acpi_integer value); + const char *module_name, + u32 component_id, acpi_integer value); void acpi_ut_ptr_exit(u32 line_number, const char *function_name, - char *module_name, u32 component_id, u8 * ptr); + const char *module_name, u32 component_id, u8 *ptr); void acpi_ut_dump_buffer(u8 * buffer, u32 count, u32 display, u32 component_id); @@ -297,33 +301,35 @@ void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, - u32 component_id, char *format, ...) ACPI_PRINTF_LIKE(6); + const char *module_name, + u32 component_id, + const char *format, ...) ACPI_PRINTF_LIKE(6); void ACPI_INTERNAL_VAR_XFACE acpi_ut_debug_print_raw(u32 requested_debug_level, u32 line_number, const char *function_name, - char *module_name, + const char *module_name, u32 component_id, - char *format, ...) ACPI_PRINTF_LIKE(6); + const char *format, ...) ACPI_PRINTF_LIKE(6); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_error(char *module_name, - u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3); +acpi_ut_error(const char *module_name, + u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_exception(char *module_name, +acpi_ut_exception(const char *module_name, u32 line_number, - acpi_status status, char *format, ...) ACPI_PRINTF_LIKE(4); + acpi_status status, + const char *format, ...) ACPI_PRINTF_LIKE(4); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_warning(char *module_name, - u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3); +acpi_ut_warning(const char *module_name, + u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); void ACPI_INTERNAL_VAR_XFACE -acpi_ut_info(char *module_name, - u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3); +acpi_ut_info(const char *module_name, + u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3); /* * utdelete - Object deletion and reference counts @@ -376,13 +382,14 @@ acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest); /* * utobject - internal object create/delete/cache routines */ -union acpi_operand_object *acpi_ut_create_internal_object_dbg(char *module_name, +union acpi_operand_object *acpi_ut_create_internal_object_dbg(const char + *module_name, u32 line_number, u32 component_id, acpi_object_type type); -void *acpi_ut_allocate_object_desc_dbg(char *module_name, +void *acpi_ut_allocate_object_desc_dbg(const char *module_name, u32 line_number, u32 component_id); #define acpi_ut_create_internal_object(t) acpi_ut_create_internal_object_dbg (_acpi_module_name,__LINE__,_COMPONENT,t) @@ -476,7 +483,7 @@ u8 acpi_ut_valid_acpi_name(u32 name); acpi_name acpi_ut_repair_name(char *name); -u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position); +u8 acpi_ut_valid_acpi_char(char character, u32 position); acpi_status acpi_ut_strtoul64(char *string, u32 base, acpi_integer * ret_integer); @@ -543,26 +550,29 @@ acpi_status acpi_ut_initialize_buffer(struct acpi_buffer *buffer, acpi_size required_length); -void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line); +void *acpi_ut_allocate(acpi_size size, + u32 component, const char *module, u32 line); void *acpi_ut_allocate_zeroed(acpi_size size, - u32 component, char *module, u32 line); + u32 component, const char *module, u32 line); #ifdef ACPI_DBG_TRACK_ALLOCATIONS void *acpi_ut_allocate_and_track(acpi_size size, - u32 component, char *module, u32 line); + u32 component, const char *module, u32 line); void *acpi_ut_allocate_zeroed_and_track(acpi_size size, - u32 component, char *module, u32 line); + u32 component, + const char *module, u32 line); void -acpi_ut_free_and_track(void *address, u32 component, char *module, u32 line); +acpi_ut_free_and_track(void *address, + u32 component, const char *module, u32 line); #ifdef ACPI_FUTURE_USAGE void acpi_ut_dump_allocation_info(void); #endif /* ACPI_FUTURE_USAGE */ -void acpi_ut_dump_allocations(u32 component, char *module); +void acpi_ut_dump_allocations(u32 component, const char *module); acpi_status acpi_ut_create_list(char *list_name, diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 06ebb6ef72aa..3795590e152a 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -255,7 +255,7 @@ extern void acpi_processor_unregister_performance(struct int acpi_processor_notify_smm(struct module *calling_module); /* for communication between multiple parts of the processor kernel module */ -extern struct acpi_processor *processors[NR_CPUS]; +DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; void arch_acpi_processor_init_pdc(struct acpi_processor *pr); diff --git a/include/acpi/reboot.h b/include/acpi/reboot.h index 8857f57e0b78..0419184ce886 100644 --- a/include/acpi/reboot.h +++ b/include/acpi/reboot.h @@ -1,9 +1,11 @@ +#ifndef __ACPI_REBOOT_H +#define __ACPI_REBOOT_H + +#ifdef CONFIG_ACPI +extern void acpi_reboot(void); +#else +static inline void acpi_reboot(void) { } +#endif -/* - * Dummy placeholder to make the EFI patches apply to the x86 tree. - * Andrew/Len, please just kill this file if you encounter it. - */ -#ifndef acpi_reboot -# define acpi_reboot() do { } while (0) #endif diff --git a/include/asm-arm/arch-at91/at91_mci.h b/include/asm-arm/arch-at91/at91_mci.h index 1551fc24eb43..400ec10014b4 100644 --- a/include/asm-arm/arch-at91/at91_mci.h +++ b/include/asm-arm/arch-at91/at91_mci.h @@ -75,6 +75,10 @@ #define AT91_MCI_TRTYP_MULTIPLE (1 << 19) #define AT91_MCI_TRTYP_STREAM (2 << 19) +#define AT91_MCI_BLKR 0x18 /* Block Register */ +#define AT91_MCI_BLKR_BCNT(n) ((0xffff & (n)) << 0) /* Block count */ +#define AT91_MCI_BLKR_BLKLEN(n) ((0xffff & (n)) << 16) /* Block lenght */ + #define AT91_MCI_RSPR(n) (0x20 + ((n) * 4)) /* Response Registers 0-3 */ #define AT91_MCR_RDR 0x30 /* Receive Data Register */ #define AT91_MCR_TDR 0x34 /* Transmit Data Register */ diff --git a/include/asm-arm/arch-s3c2410/regs-sdi.h b/include/asm-arm/arch-s3c2410/regs-sdi.h index bb9d30b72952..bfb222fa4abb 100644 --- a/include/asm-arm/arch-s3c2410/regs-sdi.h +++ b/include/asm-arm/arch-s3c2410/regs-sdi.h @@ -28,9 +28,15 @@ #define S3C2410_SDIDCNT (0x30) #define S3C2410_SDIDSTA (0x34) #define S3C2410_SDIFSTA (0x38) + #define S3C2410_SDIDATA (0x3C) #define S3C2410_SDIIMSK (0x40) +#define S3C2440_SDIDATA (0x40) +#define S3C2440_SDIIMSK (0x3C) + +#define S3C2440_SDICON_SDRESET (1<<8) +#define S3C2440_SDICON_MMCCLOCK (1<<5) #define S3C2410_SDICON_BYTEORDER (1<<4) #define S3C2410_SDICON_SDIOIRQ (1<<3) #define S3C2410_SDICON_RWAITEN (1<<2) @@ -42,7 +48,8 @@ #define S3C2410_SDICMDCON_LONGRSP (1<<10) #define S3C2410_SDICMDCON_WAITRSP (1<<9) #define S3C2410_SDICMDCON_CMDSTART (1<<8) -#define S3C2410_SDICMDCON_INDEX (0xff) +#define S3C2410_SDICMDCON_SENDERHOST (1<<6) +#define S3C2410_SDICMDCON_INDEX (0x3f) #define S3C2410_SDICMDSTAT_CRCFAIL (1<<12) #define S3C2410_SDICMDSTAT_CMDSENT (1<<11) @@ -51,6 +58,9 @@ #define S3C2410_SDICMDSTAT_XFERING (1<<8) #define S3C2410_SDICMDSTAT_INDEX (0xff) +#define S3C2440_SDIDCON_DS_BYTE (0<<22) +#define S3C2440_SDIDCON_DS_HALFWORD (1<<22) +#define S3C2440_SDIDCON_DS_WORD (2<<22) #define S3C2410_SDIDCON_IRQPERIOD (1<<21) #define S3C2410_SDIDCON_TXAFTERRESP (1<<20) #define S3C2410_SDIDCON_RXAFTERCMD (1<<19) @@ -59,6 +69,7 @@ #define S3C2410_SDIDCON_WIDEBUS (1<<16) #define S3C2410_SDIDCON_DMAEN (1<<15) #define S3C2410_SDIDCON_STOP (1<<14) +#define S3C2440_SDIDCON_DATSTART (1<<14) #define S3C2410_SDIDCON_DATMODE (3<<12) #define S3C2410_SDIDCON_BLKNUM (0x7ff) @@ -68,6 +79,7 @@ #define S3C2410_SDIDCON_XFER_RXSTART (2<<12) #define S3C2410_SDIDCON_XFER_TXSTART (3<<12) +#define S3C2410_SDIDCON_BLKNUM_MASK (0xFFF) #define S3C2410_SDIDCNT_BLKNUM_SHIFT (12) #define S3C2410_SDIDSTA_RDYWAITREQ (1<<10) @@ -82,10 +94,12 @@ #define S3C2410_SDIDSTA_TXDATAON (1<<1) #define S3C2410_SDIDSTA_RXDATAON (1<<0) +#define S3C2440_SDIFSTA_FIFORESET (1<<16) +#define S3C2440_SDIFSTA_FIFOFAIL (3<<14) /* 3 is correct (2 bits) */ #define S3C2410_SDIFSTA_TFDET (1<<13) #define S3C2410_SDIFSTA_RFDET (1<<12) -#define S3C2410_SDIFSTA_TXHALF (1<<11) -#define S3C2410_SDIFSTA_TXEMPTY (1<<10) +#define S3C2410_SDIFSTA_TFHALF (1<<11) +#define S3C2410_SDIFSTA_TFEMPTY (1<<10) #define S3C2410_SDIFSTA_RFLAST (1<<9) #define S3C2410_SDIFSTA_RFFULL (1<<8) #define S3C2410_SDIFSTA_RFHALF (1<<7) diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h new file mode 100644 index 000000000000..2d0852ac3b27 --- /dev/null +++ b/include/asm-arm/plat-s3c24xx/mci.h @@ -0,0 +1,15 @@ +#ifndef _ARCH_MCI_H +#define _ARCH_MCI_H + +struct s3c24xx_mci_pdata { + unsigned int wprotect_invert : 1; + unsigned int detect_invert : 1; /* set => detect active high. */ + + unsigned int gpio_detect; + unsigned int gpio_wprotect; + unsigned long ocr_avail; + void (*set_power)(unsigned char power_mode, + unsigned short vdd); +}; + +#endif /* _ARCH_NCI_H */ diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h index b4cddfaca90e..a3783861cdd2 100644 --- a/include/asm-avr32/arch-at32ap/board.h +++ b/include/asm-avr32/arch-at32ap/board.h @@ -77,7 +77,11 @@ struct i2c_board_info; struct platform_device *at32_add_device_twi(unsigned int id, struct i2c_board_info *b, unsigned int n); -struct platform_device *at32_add_device_mci(unsigned int id); + +struct mci_platform_data; +struct platform_device * +at32_add_device_mci(unsigned int id, struct mci_platform_data *data); + struct platform_device *at32_add_device_ac97c(unsigned int id); struct platform_device *at32_add_device_abdac(unsigned int id); struct platform_device *at32_add_device_psif(unsigned int id); diff --git a/include/asm-avr32/atmel-mci.h b/include/asm-avr32/atmel-mci.h new file mode 100644 index 000000000000..c2ea6e1c9aa1 --- /dev/null +++ b/include/asm-avr32/atmel-mci.h @@ -0,0 +1,9 @@ +#ifndef __ASM_AVR32_ATMEL_MCI_H +#define __ASM_AVR32_ATMEL_MCI_H + +struct mci_platform_data { + int detect_pin; + int wp_pin; +}; + +#endif /* __ASM_AVR32_ATMEL_MCI_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b20409404c7d..729f6b0a60e9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -86,6 +86,12 @@ VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \ *(.pci_fixup_resume) \ VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \ + VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \ + *(.pci_fixup_resume_early) \ + VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \ + VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \ + *(.pci_fixup_suspend) \ + VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \ } \ \ /* Built-in firmware blobs */ \ diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 6aff126fc07e..f88fa054d01d 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -763,6 +763,8 @@ prefetchw (const void *x) #define spin_lock_prefetch(x) prefetchw(x) extern unsigned long boot_option_idle_override; +extern unsigned long idle_halt; +extern unsigned long idle_nomwait; #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-mips/mach-au1x00/au1100_mmc.h b/include/asm-mips/mach-au1x00/au1100_mmc.h index 9e0028f60a43..c35e20918490 100644 --- a/include/asm-mips/mach-au1x00/au1100_mmc.h +++ b/include/asm-mips/mach-au1x00/au1100_mmc.h @@ -38,15 +38,15 @@ #ifndef __ASM_AU1100_MMC_H #define __ASM_AU1100_MMC_H - -#define NUM_AU1100_MMC_CONTROLLERS 2 - -#if defined(CONFIG_SOC_AU1100) -#define AU1100_SD_IRQ AU1100_SD_INT -#elif defined(CONFIG_SOC_AU1200) -#define AU1100_SD_IRQ AU1200_SD_INT -#endif - +#include + +struct au1xmmc_platform_data { + int(*cd_setup)(void *mmc_host, int on); + int(*card_inserted)(void *mmc_host); + int(*card_readonly)(void *mmc_host); + void(*set_power)(void *mmc_host, int state); + struct led_classdev *led; +}; #define SD0_BASE 0xB0600000 #define SD1_BASE 0xB0680000 diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index 09f312501eb5..bb5e9edb9825 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild @@ -8,9 +8,9 @@ header-y += ucontext.h header-y += vtoc.h header-y += zcrypt.h header-y += kvm.h -header-y += schid.h header-y += chsc.h unifdef-y += cmb.h unifdef-y += debug.h unifdef-y += chpid.h +unifdef-y += schid.h diff --git a/include/asm-s390/chpid.h b/include/asm-s390/chpid.h index 606844d0a5c3..dfe3c7f3439a 100644 --- a/include/asm-s390/chpid.h +++ b/include/asm-s390/chpid.h @@ -20,6 +20,9 @@ struct chp_id { u8 id; } __attribute__((packed)); +#ifdef __KERNEL__ +#include + static inline void chp_id_init(struct chp_id *chpid) { memset(chpid, 0, sizeof(struct chp_id)); @@ -40,9 +43,6 @@ static inline void chp_id_next(struct chp_id *chpid) } } -#ifdef __KERNEL__ -#include - static inline int chp_id_is_valid(struct chp_id *chpid) { return (chpid->cssid <= __MAX_CSSID); diff --git a/include/asm-s390/qdio.h b/include/asm-s390/qdio.h index 11240342a0f4..6813772171f2 100644 --- a/include/asm-s390/qdio.h +++ b/include/asm-s390/qdio.h @@ -1,404 +1,382 @@ /* * linux/include/asm-s390/qdio.h * - * Linux for S/390 QDIO base support, Hipersocket base support - * version 2 - * - * Copyright 2000,2002 IBM Corporation + * Copyright 2000,2008 IBM Corp. * Author(s): Utz Bacher + * Jan Glauber * */ #ifndef __QDIO_H__ #define __QDIO_H__ -/* note, that most of the typedef's are from ingo. */ - #include #include #include -#define QDIO_NAME "qdio " - -#ifndef __s390x__ -#define QDIO_32_BIT -#endif /* __s390x__ */ - -/**** CONSTANTS, that are relied on without using these symbols *****/ -#define QDIO_MAX_QUEUES_PER_IRQ 32 /* used in width of unsigned int */ -/************************ END of CONSTANTS **************************/ -#define QDIO_MAX_BUFFERS_PER_Q 128 /* must be a power of 2 (%x=&(x-1)*/ -#define QDIO_BUF_ORDER 7 /* 2**this == number of pages used for sbals in 1 q */ -#define QDIO_MAX_ELEMENTS_PER_BUFFER 16 -#define SBAL_SIZE 256 - -#define QDIO_QETH_QFMT 0 -#define QDIO_ZFCP_QFMT 1 -#define QDIO_IQDIO_QFMT 2 -#define QDIO_IQDIO_QFMT_ASYNCH 3 - -struct qdio_buffer_element{ - unsigned int flags; - unsigned int length; -#ifdef QDIO_32_BIT - void *reserved; -#endif /* QDIO_32_BIT */ - void *addr; -} __attribute__ ((packed,aligned(16))); - -struct qdio_buffer{ - volatile struct qdio_buffer_element element[16]; -} __attribute__ ((packed,aligned(256))); - - -/* params are: ccw_device, status, qdio_error, siga_error, - queue_number, first element processed, number of elements processed, - int_parm */ -typedef void qdio_handler_t(struct ccw_device *,unsigned int,unsigned int, - unsigned int,unsigned int,int,int,unsigned long); - - -#define QDIO_STATUS_INBOUND_INT 0x01 -#define QDIO_STATUS_OUTBOUND_INT 0x02 -#define QDIO_STATUS_LOOK_FOR_ERROR 0x04 -#define QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR 0x08 -#define QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR 0x10 -#define QDIO_STATUS_ACTIVATE_CHECK_CONDITION 0x20 - -#define QDIO_SIGA_ERROR_ACCESS_EXCEPTION 0x10 -#define QDIO_SIGA_ERROR_B_BIT_SET 0x20 - -/* for qdio_initialize */ -#define QDIO_INBOUND_0COPY_SBALS 0x01 -#define QDIO_OUTBOUND_0COPY_SBALS 0x02 -#define QDIO_USE_OUTBOUND_PCIS 0x04 - -/* for qdio_cleanup */ -#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 -#define QDIO_FLAG_CLEANUP_USING_HALT 0x02 - -struct qdio_initialize { - struct ccw_device *cdev; - unsigned char q_format; - unsigned char adapter_name[8]; - unsigned int qib_param_field_format; /*adapter dependent*/ - /* pointer to 128 bytes or NULL, if no param field */ - unsigned char *qib_param_field; /* adapter dependent */ - /* pointer to no_queues*128 words of data or NULL */ - unsigned long *input_slib_elements; - unsigned long *output_slib_elements; - unsigned int min_input_threshold; - unsigned int max_input_threshold; - unsigned int min_output_threshold; - unsigned int max_output_threshold; - unsigned int no_input_qs; - unsigned int no_output_qs; - qdio_handler_t *input_handler; - qdio_handler_t *output_handler; - unsigned long int_parm; - unsigned long flags; - void **input_sbal_addr_array; /* addr of n*128 void ptrs */ - void **output_sbal_addr_array; /* addr of n*128 void ptrs */ -}; - -extern int qdio_initialize(struct qdio_initialize *init_data); -extern int qdio_allocate(struct qdio_initialize *init_data); -extern int qdio_establish(struct qdio_initialize *init_data); - -extern int qdio_activate(struct ccw_device *,int flags); - -#define QDIO_STATE_MUST_USE_OUTB_PCI 0x00000001 -#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ -#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_initialize */ -#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ -#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ -extern unsigned long qdio_get_status(int irq); - - -#define QDIO_FLAG_SYNC_INPUT 0x01 -#define QDIO_FLAG_SYNC_OUTPUT 0x02 -#define QDIO_FLAG_UNDER_INTERRUPT 0x04 -#define QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT 0x08 /* no effect on - adapter interrupts */ -#define QDIO_FLAG_DONT_SIGA 0x10 -#define QDIO_FLAG_PCI_OUT 0x20 - -extern int do_QDIO(struct ccw_device*, unsigned int flags, - unsigned int queue_number, - unsigned int qidx,unsigned int count, - struct qdio_buffer *buffers); - -extern int qdio_get_ssqd_pct(struct ccw_device*); -extern int qdio_synchronize(struct ccw_device*, unsigned int flags, - unsigned int queue_number); - -extern int qdio_cleanup(struct ccw_device*, int how); -extern int qdio_shutdown(struct ccw_device*, int how); -extern int qdio_free(struct ccw_device*); - -unsigned char qdio_get_slsb_state(struct ccw_device*, unsigned int flag, - unsigned int queue_number, - unsigned int qidx); - -extern void qdio_init_scrubber(void); - +#define QDIO_MAX_QUEUES_PER_IRQ 32 +#define QDIO_MAX_BUFFERS_PER_Q 128 +#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1) +#define QDIO_MAX_ELEMENTS_PER_BUFFER 16 +#define QDIO_SBAL_SIZE 256 + +#define QDIO_QETH_QFMT 0 +#define QDIO_ZFCP_QFMT 1 +#define QDIO_IQDIO_QFMT 2 + +/** + * struct qdesfmt0 - queue descriptor, format 0 + * @sliba: storage list information block address + * @sla: storage list address + * @slsba: storage list state block address + * @akey: access key for DLIB + * @bkey: access key for SL + * @ckey: access key for SBALs + * @dkey: access key for SLSB + */ struct qdesfmt0 { -#ifdef QDIO_32_BIT - unsigned long res1; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sliba; /* storage-list-information-block - address */ -#ifdef QDIO_32_BIT - unsigned long res2; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sla; /* storage-list address */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long slsba; /* storage-list-state-block address */ - unsigned int res4; /* reserved */ - unsigned int akey : 4; /* access key for DLIB */ - unsigned int bkey : 4; /* access key for SL */ - unsigned int ckey : 4; /* access key for SBALs */ - unsigned int dkey : 4; /* access key for SLSB */ - unsigned int res5 : 16; /* reserved */ + u64 sliba; + u64 sla; + u64 slsba; + u32 : 32; + u32 akey : 4; + u32 bkey : 4; + u32 ckey : 4; + u32 dkey : 4; + u32 : 16; } __attribute__ ((packed)); -/* - * Queue-Description record (QDR) +/** + * struct qdr - queue description record (QDR) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @ac: adapter characteristics + * @iqdcnt: input queue descriptor count + * @oqdcnt: output queue descriptor count + * @iqdsz: inpout queue descriptor size + * @oqdsz: output queue descriptor size + * @qiba: queue information block address + * @qkey: queue information block key + * @qdf0: queue descriptions */ struct qdr { - unsigned int qfmt : 8; /* queue format */ - unsigned int pfmt : 8; /* impl. dep. parameter format */ - unsigned int res1 : 8; /* reserved */ - unsigned int ac : 8; /* adapter characteristics */ - unsigned int res2 : 8; /* reserved */ - unsigned int iqdcnt : 8; /* input-queue-descriptor count */ - unsigned int res3 : 8; /* reserved */ - unsigned int oqdcnt : 8; /* output-queue-descriptor count */ - unsigned int res4 : 8; /* reserved */ - unsigned int iqdsz : 8; /* input-queue-descriptor size */ - unsigned int res5 : 8; /* reserved */ - unsigned int oqdsz : 8; /* output-queue-descriptor size */ - unsigned int res6[9]; /* reserved */ -#ifdef QDIO_32_BIT - unsigned long res7; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long qiba; /* queue-information-block address */ - unsigned int res8; /* reserved */ - unsigned int qkey : 4; /* queue-information-block key */ - unsigned int res9 : 28; /* reserved */ -/* union _qd {*/ /* why this? */ - struct qdesfmt0 qdf0[126]; -/* } qd;*/ -} __attribute__ ((packed,aligned(4096))); - - -/* - * queue information block (QIB) - */ -#define QIB_AC_INBOUND_PCI_SUPPORTED 0x80 -#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 + u32 qfmt : 8; + u32 pfmt : 8; + u32 : 8; + u32 ac : 8; + u32 : 8; + u32 iqdcnt : 8; + u32 : 8; + u32 oqdcnt : 8; + u32 : 8; + u32 iqdsz : 8; + u32 : 8; + u32 oqdsz : 8; + /* private: */ + u32 res[9]; + /* public: */ + u64 qiba; + u32 : 32; + u32 qkey : 4; + u32 : 28; + struct qdesfmt0 qdf0[126]; +} __attribute__ ((packed, aligned(4096))); + +#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 #define QIB_RFLAGS_ENABLE_QEBSM 0x80 +/** + * struct qib - queue information block (QIB) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @rflags: QEBSM + * @ac: adapter characteristics + * @isliba: absolute address of first input SLIB + * @osliba: absolute address of first output SLIB + * @ebcnam: adapter identifier in EBCDIC + * @parm: implementation dependent parameters + */ struct qib { - unsigned int qfmt : 8; /* queue format */ - unsigned int pfmt : 8; /* impl. dep. parameter format */ - unsigned int rflags : 8; /* QEBSM */ - unsigned int ac : 8; /* adapter characteristics */ - unsigned int res2; /* reserved */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long isliba; /* absolute address of 1st - input SLIB */ -#ifdef QDIO_32_BIT - unsigned long res4; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long osliba; /* absolute address of 1st - output SLIB */ - unsigned int res5; /* reserved */ - unsigned int res6; /* reserved */ - unsigned char ebcnam[8]; /* adapter identifier in EBCDIC */ - unsigned char res7[88]; /* reserved */ - unsigned char parm[QDIO_MAX_BUFFERS_PER_Q]; - /* implementation dependent - parameters */ -} __attribute__ ((packed,aligned(256))); - - -/* - * storage-list-information block element (SLIBE) + u32 qfmt : 8; + u32 pfmt : 8; + u32 rflags : 8; + u32 ac : 8; + u32 : 32; + u64 isliba; + u64 osliba; + u32 : 32; + u32 : 32; + u8 ebcnam[8]; + /* private: */ + u8 res[88]; + /* public: */ + u8 parm[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct slibe - storage list information block element (SLIBE) + * @parms: implementation dependent parameters */ struct slibe { -#ifdef QDIO_32_BIT - unsigned long res; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long parms; /* implementation dependent - parameters */ + u64 parms; }; -/* - * storage-list-information block (SLIB) +/** + * struct slib - storage list information block (SLIB) + * @nsliba: next SLIB address (if any) + * @sla: SL address + * @slsba: SLSB address + * @slibe: SLIB elements */ struct slib { -#ifdef QDIO_32_BIT - unsigned long res1; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long nsliba; /* next SLIB address (if any) */ -#ifdef QDIO_32_BIT - unsigned long res2; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sla; /* SL address */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long slsba; /* SLSB address */ - unsigned char res4[1000]; /* reserved */ - struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; /* SLIB elements */ -} __attribute__ ((packed,aligned(2048))); - + u64 nsliba; + u64 sla; + u64 slsba; + /* private: */ + u8 res[1000]; + /* public: */ + struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(2048))); + +/** + * struct sbal_flags - storage block address list flags + * @last: last entry + * @cont: contiguous storage + * @frag: fragmentation + */ struct sbal_flags { - unsigned char res1 : 1; /* reserved */ - unsigned char last : 1; /* last entry */ - unsigned char cont : 1; /* contiguous storage */ - unsigned char res2 : 1; /* reserved */ - unsigned char frag : 2; /* fragmentation (s.below) */ - unsigned char res3 : 2; /* reserved */ + u8 : 1; + u8 last : 1; + u8 cont : 1; + u8 : 1; + u8 frag : 2; + u8 : 2; } __attribute__ ((packed)); -#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL -#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL -#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL -#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL -#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL +#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL +#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL +#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL +#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL +#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL -#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL +#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL /* Awesome OpenFCP extensions */ -#define SBAL_FLAGS0_TYPE_STATUS 0x00UL -#define SBAL_FLAGS0_TYPE_WRITE 0x08UL -#define SBAL_FLAGS0_TYPE_READ 0x10UL -#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL -#define SBAL_FLAGS0_MORE_SBALS 0x04UL -#define SBAL_FLAGS0_COMMAND 0x02UL -#define SBAL_FLAGS0_LAST_SBAL 0x00UL -#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS -#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND -/* Naught of interest beyond this point */ - -#define SBAL_FLAGS0_PCI 0x40 +#define SBAL_FLAGS0_TYPE_STATUS 0x00UL +#define SBAL_FLAGS0_TYPE_WRITE 0x08UL +#define SBAL_FLAGS0_TYPE_READ 0x10UL +#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL +#define SBAL_FLAGS0_MORE_SBALS 0x04UL +#define SBAL_FLAGS0_COMMAND 0x02UL +#define SBAL_FLAGS0_LAST_SBAL 0x00UL +#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND +#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS +#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND +#define SBAL_FLAGS0_PCI 0x40 + +/** + * struct sbal_sbalf_0 - sbal flags for sbale 0 + * @pci: PCI indicator + * @cont: data continuation + * @sbtype: storage-block type (FCP) + */ struct sbal_sbalf_0 { - unsigned char res1 : 1; /* reserved */ - unsigned char pci : 1; /* PCI indicator */ - unsigned char cont : 1; /* data continuation */ - unsigned char sbtype: 2; /* storage-block type (OpenFCP) */ - unsigned char res2 : 3; /* reserved */ + u8 : 1; + u8 pci : 1; + u8 cont : 1; + u8 sbtype : 2; + u8 : 3; } __attribute__ ((packed)); +/** + * struct sbal_sbalf_1 - sbal flags for sbale 1 + * @key: storage key + */ struct sbal_sbalf_1 { - unsigned char res1 : 4; /* reserved */ - unsigned char key : 4; /* storage key */ + u8 : 4; + u8 key : 4; } __attribute__ ((packed)); +/** + * struct sbal_sbalf_14 - sbal flags for sbale 14 + * @erridx: error index + */ struct sbal_sbalf_14 { - unsigned char res1 : 4; /* reserved */ - unsigned char erridx : 4; /* error index */ + u8 : 4; + u8 erridx : 4; } __attribute__ ((packed)); +/** + * struct sbal_sbalf_15 - sbal flags for sbale 15 + * @reason: reason for error state + */ struct sbal_sbalf_15 { - unsigned char reason; /* reserved */ + u8 reason; } __attribute__ ((packed)); +/** + * union sbal_sbalf - storage block address list flags + * @i0: sbalf0 + * @i1: sbalf1 + * @i14: sbalf14 + * @i15: sblaf15 + * @value: raw value + */ union sbal_sbalf { struct sbal_sbalf_0 i0; struct sbal_sbalf_1 i1; struct sbal_sbalf_14 i14; struct sbal_sbalf_15 i15; - unsigned char value; + u8 value; }; -struct sbal_element { - union { - struct sbal_flags bits; /* flags */ - unsigned char value; - } flags; - unsigned int res1 : 16; /* reserved */ - union sbal_sbalf sbalf; /* SBAL flags */ - unsigned int res2 : 16; /* reserved */ - unsigned int count : 16; /* data count */ -#ifdef QDIO_32_BIT - unsigned long res3; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long addr; /* absolute data address */ -} __attribute__ ((packed,aligned(16))); +/** + * struct qdio_buffer_element - SBAL entry + * @flags: flags + * @length: length + * @addr: address +*/ +struct qdio_buffer_element { + u32 flags; + u32 length; +#ifdef CONFIG_32BIT + /* private: */ + void *reserved; + /* public: */ +#endif + void *addr; +} __attribute__ ((packed, aligned(16))); -/* - * strorage-block access-list (SBAL) +/** + * struct qdio_buffer - storage block address list (SBAL) + * @element: SBAL entries */ -struct sbal { - struct sbal_element element[QDIO_MAX_ELEMENTS_PER_BUFFER]; -} __attribute__ ((packed,aligned(256))); +struct qdio_buffer { + struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER]; +} __attribute__ ((packed, aligned(256))); -/* - * storage-list (SL) +/** + * struct sl_element - storage list entry + * @sbal: absolute SBAL address */ struct sl_element { -#ifdef QDIO_32_BIT - unsigned long res; /* reserved */ -#endif /* QDIO_32_BIT */ - unsigned long sbal; /* absolute SBAL address */ +#ifdef CONFIG_32BIT + /* private: */ + unsigned long reserved; + /* public: */ +#endif + unsigned long sbal; } __attribute__ ((packed)); +/** + * struct sl - storage list (SL) + * @element: SL entries + */ struct sl { struct sl_element element[QDIO_MAX_BUFFERS_PER_Q]; -} __attribute__ ((packed,aligned(1024))); +} __attribute__ ((packed, aligned(1024))); -/* - * storage-list-state block (SLSB) +/** + * struct slsb - storage list state block (SLSB) + * @val: state per buffer */ -struct slsb_flags { - unsigned char owner : 2; /* SBAL owner */ - unsigned char type : 1; /* buffer type */ - unsigned char state : 5; /* processing state */ +struct slsb { + u8 val[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +struct qdio_ssqd_desc { + u8 flags; + u8:8; + u16 sch; + u8 qfmt; + u8 parm; + u8 qdioac1; + u8 sch_class; + u8 pcnt; + u8 icnt; + u8:8; + u8 ocnt; + u8:8; + u8 mbccnt; + u16 qdioac2; + u64 sch_token; + u64:64; } __attribute__ ((packed)); +/* params are: ccw_device, qdio_error, queue_number, + first element processed, number of elements processed, int_parm */ +typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, + int, int, unsigned long); -struct slsb { - union { - unsigned char val[QDIO_MAX_BUFFERS_PER_Q]; - struct slsb_flags flags[QDIO_MAX_BUFFERS_PER_Q]; - } acc; -} __attribute__ ((packed,aligned(256))); +/* qdio errors reported to the upper-layer program */ +#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10 +#define QDIO_ERROR_SIGA_BUSY 0x20 +#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40 +#define QDIO_ERROR_SLSB_STATE 0x80 -/* - * SLSB values +/* for qdio_initialize */ +#define QDIO_INBOUND_0COPY_SBALS 0x01 +#define QDIO_OUTBOUND_0COPY_SBALS 0x02 +#define QDIO_USE_OUTBOUND_PCIS 0x04 + +/* for qdio_cleanup */ +#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 +#define QDIO_FLAG_CLEANUP_USING_HALT 0x02 + +/** + * struct qdio_initialize - qdio initalization data + * @cdev: associated ccw device + * @q_format: queue format + * @adapter_name: name for the adapter + * @qib_param_field_format: format for qib_parm_field + * @qib_param_field: pointer to 128 bytes or NULL, if no param field + * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL + * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL + * @no_input_qs: number of input queues + * @no_output_qs: number of output queues + * @input_handler: handler to be called for input queues + * @output_handler: handler to be called for output queues + * @int_parm: interruption parameter + * @flags: initialization flags + * @input_sbal_addr_array: address of no_input_qs * 128 pointers + * @output_sbal_addr_array: address of no_output_qs * 128 pointers */ -#define SLSB_OWNER_PROG 1 -#define SLSB_OWNER_CU 2 - -#define SLSB_TYPE_INPUT 0 -#define SLSB_TYPE_OUTPUT 1 - -#define SLSB_STATE_NOT_INIT 0 -#define SLSB_STATE_EMPTY 1 -#define SLSB_STATE_PRIMED 2 -#define SLSB_STATE_HALTED 0xe -#define SLSB_STATE_ERROR 0xf - -#define SLSB_P_INPUT_NOT_INIT 0x80 -#define SLSB_P_INPUT_PROCESSING 0x81 -#define SLSB_CU_INPUT_EMPTY 0x41 -#define SLSB_P_INPUT_PRIMED 0x82 -#define SLSB_P_INPUT_HALTED 0x8E -#define SLSB_P_INPUT_ERROR 0x8F - -#define SLSB_P_OUTPUT_NOT_INIT 0xA0 -#define SLSB_P_OUTPUT_EMPTY 0xA1 -#define SLSB_CU_OUTPUT_PRIMED 0x62 -#define SLSB_P_OUTPUT_HALTED 0xAE -#define SLSB_P_OUTPUT_ERROR 0xAF - -#define SLSB_ERROR_DURING_LOOKUP 0xFF +struct qdio_initialize { + struct ccw_device *cdev; + unsigned char q_format; + unsigned char adapter_name[8]; + unsigned int qib_param_field_format; + unsigned char *qib_param_field; + unsigned long *input_slib_elements; + unsigned long *output_slib_elements; + unsigned int no_input_qs; + unsigned int no_output_qs; + qdio_handler_t *input_handler; + qdio_handler_t *output_handler; + unsigned long int_parm; + unsigned long flags; + void **input_sbal_addr_array; + void **output_sbal_addr_array; +}; + +#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ +#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ +#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ +#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ + +#define QDIO_FLAG_SYNC_INPUT 0x01 +#define QDIO_FLAG_SYNC_OUTPUT 0x02 +#define QDIO_FLAG_PCI_OUT 0x10 + +extern int qdio_initialize(struct qdio_initialize *init_data); +extern int qdio_allocate(struct qdio_initialize *init_data); +extern int qdio_establish(struct qdio_initialize *init_data); +extern int qdio_activate(struct ccw_device *); + +extern int do_QDIO(struct ccw_device*, unsigned int flags, + int q_nr, int qidx, int count); +extern int qdio_cleanup(struct ccw_device*, int how); +extern int qdio_shutdown(struct ccw_device*, int how); +extern int qdio_free(struct ccw_device *); +extern struct qdio_ssqd_desc *qdio_get_ssqd_desc(struct ccw_device *cdev); #endif /* __QDIO_H__ */ diff --git a/include/asm-s390/schid.h b/include/asm-s390/schid.h index 5017ffa78e04..7bdc0fe15691 100644 --- a/include/asm-s390/schid.h +++ b/include/asm-s390/schid.h @@ -10,6 +10,7 @@ struct subchannel_id { __u32 sch_no : 16; } __attribute__ ((packed, aligned(4))); +#ifdef __KERNEL__ /* Helper function for sane state of pre-allocated subchannel_id. */ static inline void @@ -25,4 +26,6 @@ schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2) return !memcmp(schid1, schid2, sizeof(struct subchannel_id)); } +#endif /* __KERNEL__ */ + #endif /* ASM_SCHID_H */ diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index f09ee3f72977..4ba14e463e83 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -65,6 +65,7 @@ extern unsigned long machine_flags; #define MACHINE_FLAG_VM (1UL << 0) #define MACHINE_FLAG_IEEE (1UL << 1) +#define MACHINE_FLAG_P390 (1UL << 2) #define MACHINE_FLAG_CSP (1UL << 3) #define MACHINE_FLAG_MVPG (1UL << 4) #define MACHINE_FLAG_DIAG44 (1UL << 5) @@ -77,7 +78,6 @@ extern unsigned long machine_flags; #define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM) -#define MACHINE_IS_P390 (machine_flags & MACHINE_FLAG_P390) #define MACHINE_HAS_DIAG9C (machine_flags & MACHINE_FLAG_DIAG9C) #ifndef __s390x__ diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h index 5b21485be573..80c775d9fe20 100644 --- a/include/asm-x86/pci-direct.h +++ b/include/asm-x86/pci-direct.h @@ -11,7 +11,11 @@ extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); +extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); extern int early_pci_allowed(void); +extern unsigned int pci_early_dump_regs; +extern void early_dump_pci_device(u8 bus, u8 slot, u8 func); +extern void early_dump_pci_devices(void); #endif diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 7f7382704592..55402d2ab938 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -727,6 +727,8 @@ extern int force_mwait; extern void select_idle_routine(const struct cpuinfo_x86 *c); extern unsigned long boot_option_idle_override; +extern unsigned long idle_halt; +extern unsigned long idle_nomwait; extern void enable_sep_cpu(void); extern int sysenter_setup(void); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0601075d09a1..a17177639376 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -235,6 +235,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_check_mem_region(resource_size_t start, resource_size_t n, const char *name); +#ifdef CONFIG_PM_SLEEP +void __init acpi_old_suspend_ordering(void); +#endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32a441b05fd5..88d68081a0f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -985,6 +985,9 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) static inline int blk_integrity_rq(struct request *rq) { + if (rq->bio == NULL) + return 0; + return bio_integrity(rq->bio); } diff --git a/include/linux/device.h b/include/linux/device.h index 6a2d04c011bc..f71a78d123ae 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -68,6 +68,8 @@ struct bus_type { int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); + struct pm_ext_ops *pm; + struct bus_type_private *p; }; @@ -131,6 +133,8 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; + struct pm_ops *pm; + struct driver_private *p; }; @@ -197,6 +201,8 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + + struct pm_ops *pm; }; extern int __must_check class_register(struct class *class); @@ -248,8 +254,11 @@ struct device_type { struct attribute_group **groups; int (*uevent)(struct device *dev, struct kobj_uevent_env *env); void (*release)(struct device *dev); + int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + + struct pm_ops *pm; }; /* interface for exporting device attributes */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 08934995c7ab..deddeedf3257 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -127,6 +127,15 @@ static inline void set_freezable(void) current->flags &= ~PF_NOFREEZE; } +/* + * Tell the freezer that the current task should be frozen by it and that it + * should send a fake signal to the task to freeze it. + */ +static inline void set_freezable_with_signal(void) +{ + current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); +} + /* * Freezer-friendly wrappers around wait_event_interruptible() and * wait_event_interruptible_timeout(), originally defined in @@ -174,6 +183,7 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/fs.h b/include/linux/fs.h index 52e510a0aec2..c6455dadb21b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1729,6 +1729,8 @@ static inline void invalidate_remote_inode(struct inode *inode) extern int invalidate_inode_pages2(struct address_space *mapping); extern int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); +extern void generic_sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc); extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); diff --git a/include/linux/ide.h b/include/linux/ide.h index ac4eeb2932ef..4726126f5a59 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -138,6 +138,12 @@ struct ide_io_ports { #define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ #define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ +/* + * Op codes for special requests to be handled by ide_special_rq(). + * Values should be in the range of 0x20 to 0x3f. + */ +#define REQ_DRIVE_RESET 0x20 + /* * Check for an interrupt and acknowledge the interrupt status */ @@ -171,7 +177,7 @@ typedef struct hw_regs_s { int irq; /* our irq number */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ hwif_chipset_t chipset; - struct device *dev; + struct device *dev, *parent; } hw_regs_t; void ide_init_port_data(struct hwif_s *, unsigned int); @@ -405,8 +411,8 @@ typedef struct ide_drive_s { struct ide_port_info; struct ide_port_ops { - /* host specific initialization of devices on a port */ - void (*port_init_devs)(struct hwif_s *); + /* host specific initialization of a device */ + void (*init_dev)(ide_drive_t *); /* routine to program host for PIO mode */ void (*set_pio_mode)(ide_drive_t *, const u8); /* routine to program host for DMA mode */ @@ -565,8 +571,6 @@ typedef struct hwgroup_s { unsigned int sleeping : 1; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; - /* BOOL: in a polling reset situation. Must not trigger another reset yet */ - unsigned int resetting : 1; /* current drive */ ide_drive_t *drive; @@ -786,7 +790,6 @@ struct ide_driver_s { ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); - ide_startstop_t (*abort)(ide_drive_t *, struct request *rq); struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); @@ -801,18 +804,6 @@ struct ide_driver_s { int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); -/* - * ide_hwifs[] is the master data structure used to keep track - * of just about everything in ide.c. Whenever possible, routines - * should be using pointers to a drive (ide_drive_t *) or - * pointers to a hwif (ide_hwif_t *), rather than indexing this - * structure directly (the allocation/layout may change!). - * - */ -#ifndef _IDE_C -extern ide_hwif_t ide_hwifs[]; /* master data repository */ -#endif - extern int ide_vlb_clk; extern int ide_pci_clk; @@ -840,10 +831,6 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -ide_startstop_t __ide_abort(ide_drive_t *, struct request *); - -extern ide_startstop_t ide_abort(ide_drive_t *, const char *); - extern void ide_fix_driveid(struct hd_driveid *); extern void ide_fixstring(u8 *, const int, const int); @@ -1271,16 +1258,43 @@ static inline int ide_dev_is_sata(struct hd_driveid *id) u64 ide_get_lba_addr(struct ide_taskfile *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); -typedef struct ide_pio_timings_s { - int setup_time; /* Address setup (ns) minimum */ - int active_time; /* Active pulse (ns) minimum */ - int cycle_time; /* Cycle time (ns) minimum = */ - /* active + recovery (+ setup for some chips) */ -} ide_pio_timings_t; +struct ide_timing { + u8 mode; + u8 setup; /* t1 */ + u16 act8b; /* t2 for 8-bit io */ + u16 rec8b; /* t2i for 8-bit io */ + u16 cyc8b; /* t0 for 8-bit io */ + u16 active; /* t2 or tD */ + u16 recover; /* t2i or tK */ + u16 cycle; /* t0 */ + u16 udma; /* t2CYCTYP/2 */ +}; + +enum { + IDE_TIMING_SETUP = (1 << 0), + IDE_TIMING_ACT8B = (1 << 1), + IDE_TIMING_REC8B = (1 << 2), + IDE_TIMING_CYC8B = (1 << 3), + IDE_TIMING_8BIT = IDE_TIMING_ACT8B | IDE_TIMING_REC8B | + IDE_TIMING_CYC8B, + IDE_TIMING_ACTIVE = (1 << 4), + IDE_TIMING_RECOVER = (1 << 5), + IDE_TIMING_CYCLE = (1 << 6), + IDE_TIMING_UDMA = (1 << 7), + IDE_TIMING_ALL = IDE_TIMING_SETUP | IDE_TIMING_8BIT | + IDE_TIMING_ACTIVE | IDE_TIMING_RECOVER | + IDE_TIMING_CYCLE | IDE_TIMING_UDMA, +}; + +struct ide_timing *ide_timing_find_mode(u8); +u16 ide_pio_cycle_time(ide_drive_t *, u8); +void ide_timing_merge(struct ide_timing *, struct ide_timing *, + struct ide_timing *, unsigned int); +int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); + +int ide_scan_pio_blacklist(char *); -unsigned int ide_pio_cycle_time(ide_drive_t *, u8); u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); -extern const ide_pio_timings_t ide_pio_timings[6]; int ide_set_pio_mode(ide_drive_t *, u8); int ide_set_dma_mode(ide_drive_t *, u8); diff --git a/include/linux/inet.h b/include/linux/inet.h index 1354080cf8cf..4cca05c9678e 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -44,6 +44,13 @@ #include +/* + * These mimic similar macros defined in user-space for inet_ntop(3). + * See /usr/include/netinet/in.h . + */ +#define INET_ADDRSTRLEN (16) +#define INET6_ADDRSTRLEN (48) + extern __be32 in_aton(const char *str); extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9927a88674a3..93c45acf249a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -140,8 +140,8 @@ extern struct group_info init_groups; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ - .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ + .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ + .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ .parent = &tsk, \ .children = LIST_HEAD_INIT(tsk.children), \ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index c6801bffe76d..2cd07cc29687 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -59,6 +59,7 @@ struct resource_list { #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) +#define IORESOURCE_IRQ_OPTIONAL (1<<5) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) @@ -88,6 +89,10 @@ struct resource_list { #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) +/* PnP I/O specific bits (IORESOURCE_BITS) */ +#define IORESOURCE_IO_16BIT_ADDR (1<<0) +#define IORESOURCE_IO_FIXED (1<<1) + /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ #define IORESOURCE_ROM_SHADOW (1<<1) /* ROM is copy at C000:0 */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index d0c3abed74c2..143cebf0586f 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -135,6 +135,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); +extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int); extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort); extern void mmc_release_host(struct mmc_host *host); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 7ab962fa1d73..10a2080086ca 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -51,8 +51,30 @@ struct mmc_ios { struct mmc_host_ops { void (*request)(struct mmc_host *host, struct mmc_request *req); + /* + * Avoid calling these three functions too often or in a "fast path", + * since underlaying controller might implement them in an expensive + * and/or slow way. + * + * Also note that these functions might sleep, so don't call them + * in the atomic contexts! + * + * Return values for the get_ro callback should be: + * 0 for a read/write card + * 1 for a read-only card + * -ENOSYS when not supported (equal to NULL callback) + * or a negative errno value when something bad happened + * + * Return values for the get_ro callback should be: + * 0 for a absent card + * 1 for a present card + * -ENOSYS when not supported (equal to NULL callback) + * or a negative errno value when something bad happened + */ void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); int (*get_ro)(struct mmc_host *host); + int (*get_cd)(struct mmc_host *host); + void (*enable_sdio_irq)(struct mmc_host *host, int enable); }; @@ -89,11 +111,11 @@ struct mmc_host { unsigned long caps; /* Host capabilities */ #define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ -#define MMC_CAP_MULTIWRITE (1 << 1) /* Can accurately report bytes sent to card on error */ -#define MMC_CAP_MMC_HIGHSPEED (1 << 2) /* Can do MMC high-speed timing */ -#define MMC_CAP_SD_HIGHSPEED (1 << 3) /* Can do SD high-speed timing */ -#define MMC_CAP_SDIO_IRQ (1 << 4) /* Can signal pending SDIO IRQs */ -#define MMC_CAP_SPI (1 << 5) /* Talks only SPI protocols */ +#define MMC_CAP_MMC_HIGHSPEED (1 << 1) /* Can do MMC high-speed timing */ +#define MMC_CAP_SD_HIGHSPEED (1 << 2) /* Can do SD high-speed timing */ +#define MMC_CAP_SDIO_IRQ (1 << 3) /* Can signal pending SDIO IRQs */ +#define MMC_CAP_SPI (1 << 4) /* Talks only SPI protocols */ +#define MMC_CAP_NEEDS_POLL (1 << 5) /* Needs polling for card-detection */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 4236fbf0b6fb..14b81f3e5232 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -16,7 +16,6 @@ * Based strongly on code by: * * Author: Yong-iL Joh - * Date : $Date: 2002/06/18 12:37:30 $ * * Author: Andrew Christian * 15 May 2002 diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index b050f4d7b41f..07bee4a0d457 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -1,7 +1,7 @@ /* * include/linux/mmc/sdio_func.h * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,6 +46,8 @@ struct sdio_func { unsigned max_blksize; /* maximum block size */ unsigned cur_blksize; /* current block size */ + unsigned enable_timeout; /* max enable timeout in msec */ + unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ @@ -120,23 +122,22 @@ extern int sdio_set_block_size(struct sdio_func *func, unsigned blksz); extern int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler); extern int sdio_release_irq(struct sdio_func *func); -extern unsigned char sdio_readb(struct sdio_func *func, - unsigned int addr, int *err_ret); -extern unsigned short sdio_readw(struct sdio_func *func, - unsigned int addr, int *err_ret); -extern unsigned long sdio_readl(struct sdio_func *func, - unsigned int addr, int *err_ret); +extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz); + +extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret); extern int sdio_memcpy_fromio(struct sdio_func *func, void *dst, unsigned int addr, int count); extern int sdio_readsb(struct sdio_func *func, void *dst, unsigned int addr, int count); -extern void sdio_writeb(struct sdio_func *func, unsigned char b, +extern void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret); -extern void sdio_writew(struct sdio_func *func, unsigned short b, +extern void sdio_writew(struct sdio_func *func, u16 b, unsigned int addr, int *err_ret); -extern void sdio_writel(struct sdio_func *func, unsigned long b, +extern void sdio_writel(struct sdio_func *func, u32 b, unsigned int addr, int *err_ret); extern int sdio_memcpy_toio(struct sdio_func *func, unsigned int addr, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 27d6a8d98cef..29d261918734 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -12,9 +12,19 @@ #include /* Default timeout values */ +#define NFS_DEF_UDP_TIMEO (11) +#define NFS_DEF_UDP_RETRANS (3) +#define NFS_DEF_TCP_TIMEO (600) +#define NFS_DEF_TCP_RETRANS (2) + #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) +#define NFS_DEF_ACREGMIN (3) +#define NFS_DEF_ACREGMAX (60) +#define NFS_DEF_ACDIRMIN (30) +#define NFS_DEF_ACDIRMAX (60) + /* * When flushing a cluster of dirty pages, there can be different * strategies: diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h new file mode 100644 index 000000000000..1cb9a3fed2b3 --- /dev/null +++ b/include/linux/nfs_iostat.h @@ -0,0 +1,119 @@ +/* + * User-space visible declarations for NFS client per-mount + * point statistics + * + * Copyright (C) 2005, 2006 Chuck Lever + * + * NFS client per-mount statistics provide information about the + * health of the NFS client and the health of each NFS mount point. + * Generally these are not for detailed problem diagnosis, but + * simply to indicate that there is a problem. + * + * These counters are not meant to be human-readable, but are meant + * to be integrated into system monitoring tools such as "sar" and + * "iostat". As such, the counters are sampled by the tools over + * time, and are never zeroed after a file system is mounted. + * Moving averages can be computed by the tools by taking the + * difference between two instantaneous samples and dividing that + * by the time between the samples. + */ + +#ifndef _LINUX_NFS_IOSTAT +#define _LINUX_NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written + * to the server by the NFS client via an NFS READ or WRITE + * request. + * + * 2. NORMAL - the number of bytes read or written by applications + * via the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files + * opened with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out + * of the NFS client. Comparing the number of bytes requested by + * an application with the number of bytes the client requests from + * the server can provide an indication of client efficiency + * (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods + * are in use. DIRECT by itself shows whether there is any O_DIRECT + * traffic. NORMAL + DIRECT shows how much data is going through + * the system call interface. A large amount of SERVER traffic + * without much NORMAL or DIRECT traffic shows that applications + * are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + * + * NB: When adding new byte counters, please include the measured + * units in the name of each byte counter to help users of this + * interface determine what exactly is being counted. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client + * activity without enabling NFS trace debugging. The counters + * show the rate at which VFS requests are made, and how often the + * client invalidates its data and attribute caches. This allows + * system administrators to monitor such things as how close-to-open + * is working, and answer questions such as "why are there so many + * GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, + * silly renames due to close-after-delete, and operations that + * change the size of a file (such operations can often be the + * source of data corruption if applications aren't using file + * locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + NFSIOS_DELAY, + __NFSIOS_COUNTSMAX, +}; + +#endif /* _LINUX_NFS_IOSTAT */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index a1676e19e491..3c60685d972b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -27,9 +27,12 @@ /* * Valid flags for a dirty buffer */ -#define PG_BUSY 0 -#define PG_NEED_COMMIT 1 -#define PG_NEED_RESCHED 2 +enum { + PG_BUSY = 0, + PG_CLEAN, + PG_NEED_COMMIT, + PG_NEED_RESCHED, +}; struct nfs_inode; struct nfs_page { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 24263bb8e0be..8c77c11224d1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -829,9 +829,8 @@ struct nfs_rpc_ops { int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); int (*commit_done) (struct rpc_task *, struct nfs_write_data *); - int (*file_open) (struct inode *, struct file *); - int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); + int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); }; diff --git a/include/linux/pci.h b/include/linux/pci.h index d18b1dd49fab..a6a088e1a804 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -17,8 +17,7 @@ #ifndef LINUX_PCI_H #define LINUX_PCI_H -/* Include the pci register defines */ -#include +#include /* The pci register defines */ /* * The PCI interface treats multi-function devices as independent @@ -49,12 +48,22 @@ #include #include #include +#include #include #include /* Include the ID list */ #include +/* pci_slot represents a physical slot */ +struct pci_slot { + struct pci_bus *bus; /* The bus this slot is on */ + struct list_head list; /* node in list of slots on this bus */ + struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */ + unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ + struct kobject kobj; +}; + /* File state for mmap()s on /proc/bus/pci/X/Y */ enum pci_mmap_state { pci_mmap_io, @@ -142,6 +151,7 @@ struct pci_dev { void *sysdata; /* hook for sys-specific extension */ struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ + struct pci_slot *slot; /* Physical slot this device is in */ unsigned int devfn; /* encoded device & function index */ unsigned short vendor; @@ -167,6 +177,13 @@ struct pci_dev { pci_power_t current_state; /* Current operating state. In ACPI-speak, this is D0-D3, D0 being fully functional, and D3 being off. */ + int pm_cap; /* PM capability offset in the + configuration space */ + unsigned int pme_support:5; /* Bitmask of states from which PME# + can be generated */ + unsigned int d1_support:1; /* Low power state D1 is supported */ + unsigned int d2_support:1; /* Low power state D2 is supported */ + unsigned int no_d1d2:1; /* Only allow D0 and D3 */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state. */ @@ -191,7 +208,6 @@ struct pci_dev { unsigned int is_added:1; unsigned int is_busmaster:1; /* device is busmaster */ unsigned int no_msi:1; /* device may not use msi */ - unsigned int no_d1d2:1; /* only allow d0 or d3 */ unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int msi_enabled:1; @@ -267,6 +283,7 @@ struct pci_bus { struct list_head children; /* list of child buses */ struct list_head devices; /* list of devices on this bus */ struct pci_dev *self; /* bridge device as seen by parent */ + struct list_head slots; /* list of slots on this bus */ struct resource *resource[PCI_BUS_NUM_RESOURCES]; /* address space routed to this bus */ @@ -328,7 +345,7 @@ struct pci_bus_region { struct pci_dynids { spinlock_t lock; /* protects list, index */ struct list_head list; /* for IDs added at runtime */ - unsigned int use_driver_data:1; /* pci_driver->driver_data is used */ + unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */ }; /* ---------------------------------------------------------------- */ @@ -390,7 +407,7 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - + struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; @@ -489,6 +506,10 @@ struct pci_bus *pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); +struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, + const char *name); +void pci_destroy_slot(struct pci_slot *slot); +void pci_update_slot_number(struct pci_slot *slot, int slot_nr); int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn); void pci_device_add(struct pci_dev *dev, struct pci_bus *bus); @@ -618,6 +639,8 @@ int pci_restore_state(struct pci_dev *dev); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); +int pci_prepare_to_sleep(struct pci_dev *dev); +int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); @@ -839,6 +862,11 @@ static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) return -EIO; } +static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) +{ + return -EIO; +} + static inline int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size) { @@ -977,9 +1005,9 @@ static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) /* If you want to know what to call your pci_dev, ask this function. * Again, it's a wrapper around the generic device. */ -static inline char *pci_name(struct pci_dev *pdev) +static inline const char *pci_name(struct pci_dev *pdev) { - return pdev->dev.bus_id; + return dev_name(&pdev->dev); } @@ -1014,7 +1042,9 @@ enum pci_fixup_pass { pci_fixup_header, /* After reading configuration header */ pci_fixup_final, /* Final phase of device fixups */ pci_fixup_enable, /* pci_enable_device() time */ - pci_fixup_resume, /* pci_enable_device() time */ + pci_fixup_resume, /* pci_device_resume() */ + pci_fixup_suspend, /* pci_device_suspend */ + pci_fixup_resume_early, /* pci_device_resume_early() */ }; /* Anonymous variables would be nice... */ @@ -1036,6 +1066,12 @@ enum pci_fixup_pass { #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ resume##vendor##device##hook, vendor, device, hook) +#define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook) \ + DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ + resume_early##vendor##device##hook, vendor, device, hook) +#define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook) \ + DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ + suspend##vendor##device##hook, vendor, device, hook) void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); @@ -1060,7 +1096,10 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; -extern int pcibios_add_platform_entries(struct pci_dev *dev); +int pcibios_add_platform_entries(struct pci_dev *dev); +void pcibios_disable_device(struct pci_dev *dev); +int pcibios_set_pcie_reset_state(struct pci_dev *dev, + enum pcie_reset_state state); #ifdef CONFIG_PCI_MMCONFIG extern void __init pci_mmcfg_early_init(void); diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 8f67e8f2a3cc..a08cd06b541a 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -95,9 +95,6 @@ struct hotplug_slot_attribute { * @get_adapter_status: Called to get see if an adapter is present in the slot or not. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. - * @get_address: Called to get pci address of a slot. - * If this field is NULL, the value passed in the struct hotplug_slot_info - * will be used when this value is requested by a user. * @get_max_bus_speed: Called to get the max bus speed for a slot. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. @@ -120,7 +117,6 @@ struct hotplug_slot_ops { int (*get_attention_status) (struct hotplug_slot *slot, u8 *value); int (*get_latch_status) (struct hotplug_slot *slot, u8 *value); int (*get_adapter_status) (struct hotplug_slot *slot, u8 *value); - int (*get_address) (struct hotplug_slot *slot, u32 *value); int (*get_max_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value); int (*get_cur_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value); }; @@ -140,7 +136,6 @@ struct hotplug_slot_info { u8 attention_status; u8 latch_status; u8 adapter_status; - u32 address; enum pci_bus_speed max_bus_speed; enum pci_bus_speed cur_bus_speed; }; @@ -166,15 +161,14 @@ struct hotplug_slot { /* Variables below this are for use only by the hotplug pci core. */ struct list_head slot_list; - struct kobject kobj; + struct pci_slot *pci_slot; }; #define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj) -extern int pci_hp_register (struct hotplug_slot *slot); -extern int pci_hp_deregister (struct hotplug_slot *slot); +extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr); +extern int pci_hp_deregister(struct hotplug_slot *slot); extern int __must_check pci_hp_change_slot_info (struct hotplug_slot *slot, struct hotplug_slot_info *info); -extern struct kset *pci_hotplug_slots_kset; /* PCI Setting Record (Type 0) */ struct hpp_type0 { @@ -227,9 +221,9 @@ struct hotplug_params { #include #include #include -extern acpi_status acpi_run_oshp(acpi_handle handle); extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); #endif #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1cf4084b51e8..6be6a7943d8b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2190,6 +2190,7 @@ #define PCI_DEVICE_ID_JMICRON_JMB366 0x2366 #define PCI_DEVICE_ID_JMICRON_JMB368 0x2368 #define PCI_DEVICE_ID_JMICRON_JMB38X_SD 0x2381 +#define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382 #define PCI_DEVICE_ID_JMICRON_JMB38X_MS 0x2383 #define PCI_VENDOR_ID_KORENIX 0x1982 diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index c0c1223c9194..19958b929905 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -231,6 +231,7 @@ #define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ #define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ #define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 3261681c82a4..95ac21ab3a09 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -53,6 +53,7 @@ struct platform_driver { int (*suspend_late)(struct platform_device *, pm_message_t state); int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); + struct pm_ext_ops *pm; struct device_driver driver; }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 39a7ee859b67..4ad9de94449a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -112,7 +112,9 @@ typedef struct pm_message { int event; } pm_message_t; -/* +/** + * struct pm_ops - device PM callbacks + * * Several driver power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) * interrupts, wakeups, DMA, and other hardware state. There may also be @@ -120,6 +122,284 @@ typedef struct pm_message { * to the rest of the driver stack (such as a driver that's ON gating off * clocks which are not in active use). * + * The externally visible transitions are handled with the help of the following + * callbacks included in this structure: + * + * @prepare: Prepare the device for the upcoming transition, but do NOT change + * its hardware state. Prevent new children of the device from being + * registered after @prepare() returns (the driver's subsystem and + * generally the rest of the kernel is supposed to prevent new calls to the + * probe method from being made too once @prepare() has succeeded). If + * @prepare() detects a situation it cannot handle (e.g. registration of a + * child already in progress), it may return -EAGAIN, so that the PM core + * can execute it once again (e.g. after the new child has been registered) + * to recover from the race condition. This method is executed for all + * kinds of suspend transitions and is followed by one of the suspend + * callbacks: @suspend(), @freeze(), or @poweroff(). + * The PM core executes @prepare() for all devices before starting to + * execute suspend callbacks for any of them, so drivers may assume all of + * the other devices to be present and functional while @prepare() is being + * executed. In particular, it is safe to make GFP_KERNEL memory + * allocations from within @prepare(). However, drivers may NOT assume + * anything about the availability of the user space at that time and it + * is not correct to request firmware from within @prepare() (it's too + * late to do that). [To work around this limitation, drivers may + * register suspend and hibernation notifiers that are executed before the + * freezing of tasks.] + * + * @complete: Undo the changes made by @prepare(). This method is executed for + * all kinds of resume transitions, following one of the resume callbacks: + * @resume(), @thaw(), @restore(). Also called if the state transition + * fails before the driver's suspend callback (@suspend(), @freeze(), + * @poweroff()) can be executed (e.g. if the suspend callback fails for one + * of the other devices that the PM core has unsuccessfully attempted to + * suspend earlier). + * The PM core executes @complete() after it has executed the appropriate + * resume callback for all devices. + * + * @suspend: Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. Quiesce the device, put it into + * a low power state appropriate for the upcoming system state (such as + * PCI_D3hot), and enable wakeup events as appropriate. + * + * @resume: Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. Put the device into the + * appropriate state, according to the information saved in memory by the + * preceding @suspend(). The driver starts working again, responding to + * hardware events and software requests. The hardware may have gone + * through a power-off reset, or it may have maintained state from the + * previous suspend() which the driver may rely on while resuming. On most + * platforms, there are no restrictions on availability of resources like + * clocks during @resume(). + * + * @freeze: Hibernation-specific, executed before creating a hibernation image. + * Quiesce operations so that a consistent image can be created, but do NOT + * otherwise put the device into a low power device state and do NOT emit + * system wakeup events. Save in main memory the device settings to be + * used by @restore() during the subsequent resume from hibernation or by + * the subsequent @thaw(), if the creation of the image or the restoration + * of main memory contents from it fails. + * + * @thaw: Hibernation-specific, executed after creating a hibernation image OR + * if the creation of the image fails. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + * + * @poweroff: Hibernation-specific, executed after saving a hibernation image. + * Quiesce the device, put it into a low power state appropriate for the + * upcoming system state (such as PCI_D3hot), and enable wakeup events as + * appropriate. + * + * @restore: Hibernation-specific, executed after restoring the contents of main + * memory from a hibernation image. Driver starts working again, + * responding to hardware events and software requests. Drivers may NOT + * make ANY assumptions about the hardware state right prior to @restore(). + * On most platforms, there are no restrictions on availability of + * resources like clocks during @restore(). + * + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by the resume operations, @resume(), + * @thaw(), and @restore(), do not cause the PM core to abort the resume + * transition during which they are returned. The error codes returned in + * that cases are only printed by the PM core to the system logs for debugging + * purposes. Still, it is recommended that drivers only return error codes + * from their resume methods in case of an unrecoverable failure (i.e. when the + * device being handled refuses to resume and becomes unusable) to allow us to + * modify the PM core in the future, so that it can avoid attempting to handle + * devices that failed to resume and their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, it is not allowed to unregister a device from within any + * of its own callbacks. + */ + +struct pm_ops { + int (*prepare)(struct device *dev); + void (*complete)(struct device *dev); + int (*suspend)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*restore)(struct device *dev); +}; + +/** + * struct pm_ext_ops - extended device PM callbacks + * + * Some devices require certain operations related to suspend and hibernation + * to be carried out with interrupts disabled. Thus, 'struct pm_ext_ops' below + * is defined, adding callbacks to be executed with interrupts disabled to + * 'struct pm_ops'. + * + * The following callbacks included in 'struct pm_ext_ops' are executed with + * the nonboot CPUs switched off and with interrupts disabled on the only + * functional CPU. They also are executed with the PM core list of devices + * locked, so they must NOT unregister any devices. + * + * @suspend_noirq: Complete the operations of ->suspend() by carrying out any + * actions required for suspending the device that need interrupts to be + * disabled + * + * @resume_noirq: Prepare for the execution of ->resume() by carrying out any + * actions required for resuming the device that need interrupts to be + * disabled + * + * @freeze_noirq: Complete the operations of ->freeze() by carrying out any + * actions required for freezing the device that need interrupts to be + * disabled + * + * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any + * actions required for thawing the device that need interrupts to be + * disabled + * + * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any + * actions required for handling the device that need interrupts to be + * disabled + * + * @restore_noirq: Prepare for the execution of ->restore() by carrying out any + * actions required for restoring the operations of the device that need + * interrupts to be disabled + * + * All of the above callbacks return error codes, but the error codes returned + * by the resume operations, @resume_noirq(), @thaw_noirq(), and + * @restore_noirq(), do not cause the PM core to abort the resume transition + * during which they are returned. The error codes returned in that cases are + * only printed by the PM core to the system logs for debugging purposes. + * Still, as stated above, it is recommended that drivers only return error + * codes from their resume methods if the device being handled fails to resume + * and is not usable any more. + */ + +struct pm_ext_ops { + struct pm_ops base; + int (*suspend_noirq)(struct device *dev); + int (*resume_noirq)(struct device *dev); + int (*freeze_noirq)(struct device *dev); + int (*thaw_noirq)(struct device *dev); + int (*poweroff_noirq)(struct device *dev); + int (*restore_noirq)(struct device *dev); +}; + +/** + * PM_EVENT_ messages + * + * The following PM_EVENT_ messages are defined for the internal use of the PM + * core, in order to provide a mechanism allowing the high level suspend and + * hibernation code to convey the necessary information to the device PM core + * code: + * + * ON No transition. + * + * FREEZE System is going to hibernate, call ->prepare() and ->freeze() + * for all devices. + * + * SUSPEND System is going to suspend, call ->prepare() and ->suspend() + * for all devices. + * + * HIBERNATE Hibernation image has been saved, call ->prepare() and + * ->poweroff() for all devices. + * + * QUIESCE Contents of main memory are going to be restored from a (loaded) + * hibernation image, call ->prepare() and ->freeze() for all + * devices. + * + * RESUME System is resuming, call ->resume() and ->complete() for all + * devices. + * + * THAW Hibernation image has been created, call ->thaw() and + * ->complete() for all devices. + * + * RESTORE Contents of main memory have been restored from a hibernation + * image, call ->restore() and ->complete() for all devices. + * + * RECOVER Creation of a hibernation image or restoration of the main + * memory contents from a hibernation image has failed, call + * ->thaw() and ->complete() for all devices. + */ + +#define PM_EVENT_ON 0x0000 +#define PM_EVENT_FREEZE 0x0001 +#define PM_EVENT_SUSPEND 0x0002 +#define PM_EVENT_HIBERNATE 0x0004 +#define PM_EVENT_QUIESCE 0x0008 +#define PM_EVENT_RESUME 0x0010 +#define PM_EVENT_THAW 0x0020 +#define PM_EVENT_RESTORE 0x0040 +#define PM_EVENT_RECOVER 0x0080 + +#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) + +#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_QUIESCE ((struct pm_message){ .event = PM_EVENT_QUIESCE, }) +#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) +#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, }) +#define PMSG_RESUME ((struct pm_message){ .event = PM_EVENT_RESUME, }) +#define PMSG_THAW ((struct pm_message){ .event = PM_EVENT_THAW, }) +#define PMSG_RESTORE ((struct pm_message){ .event = PM_EVENT_RESTORE, }) +#define PMSG_RECOVER ((struct pm_message){ .event = PM_EVENT_RECOVER, }) +#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) + +/** + * Device power management states + * + * These state labels are used internally by the PM core to indicate the current + * status of a device with respect to the PM core operations. + * + * DPM_ON Device is regarded as operational. Set this way + * initially and when ->complete() is about to be called. + * Also set when ->prepare() fails. + * + * DPM_PREPARING Device is going to be prepared for a PM transition. Set + * when ->prepare() is about to be called. + * + * DPM_RESUMING Device is going to be resumed. Set when ->resume(), + * ->thaw(), or ->restore() is about to be called. + * + * DPM_SUSPENDING Device has been prepared for a power transition. Set + * when ->prepare() has just succeeded. + * + * DPM_OFF Device is regarded as inactive. Set immediately after + * ->suspend(), ->freeze(), or ->poweroff() has succeeded. + * Also set when ->resume()_noirq, ->thaw_noirq(), or + * ->restore_noirq() is about to be called. + * + * DPM_OFF_IRQ Device is in a "deep sleep". Set immediately after + * ->suspend_noirq(), ->freeze_noirq(), or + * ->poweroff_noirq() has just succeeded. + */ + +enum dpm_state { + DPM_INVALID, + DPM_ON, + DPM_PREPARING, + DPM_RESUMING, + DPM_SUSPENDING, + DPM_OFF, + DPM_OFF_IRQ, +}; + +struct dev_pm_info { + pm_message_t power_state; + unsigned can_wakeup:1; + unsigned should_wakeup:1; + enum dpm_state status; /* Owned by the PM core */ +#ifdef CONFIG_PM_SLEEP + struct list_head entry; +#endif +}; + +/* + * The PM_EVENT_ messages are also used by drivers implementing the legacy + * suspend framework, based on the ->suspend() and ->resume() callbacks common + * for suspend and hibernation transitions, according to the rules below. + */ + +/* Necessary, because several drivers use PM_EVENT_PRETHAW */ +#define PM_EVENT_PRETHAW PM_EVENT_QUIESCE + +/* * One transition is triggered by resume(), after a suspend() call; the * message is implicit: * @@ -164,35 +444,13 @@ typedef struct pm_message { * or from system low-power states such as standby or suspend-to-RAM. */ -#define PM_EVENT_ON 0 -#define PM_EVENT_FREEZE 1 -#define PM_EVENT_SUSPEND 2 -#define PM_EVENT_HIBERNATE 4 -#define PM_EVENT_PRETHAW 8 - -#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) - -#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) -#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, }) -#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) -#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, }) -#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) - -struct dev_pm_info { - pm_message_t power_state; - unsigned can_wakeup:1; - unsigned should_wakeup:1; - bool sleeping:1; /* Owned by the PM core */ -#ifdef CONFIG_PM_SLEEP - struct list_head entry; -#endif -}; +#ifdef CONFIG_PM_SLEEP +extern void device_pm_lock(void); +extern void device_power_up(pm_message_t state); +extern void device_resume(pm_message_t state); +extern void device_pm_unlock(void); extern int device_power_down(pm_message_t state); -extern void device_power_up(void); -extern void device_resume(void); - -#ifdef CONFIG_PM_SLEEP extern int device_suspend(pm_message_t state); extern int device_prepare_suspend(pm_message_t state); diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index f0d0b2cb8d20..0aae7776185e 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -35,6 +35,11 @@ static inline void device_init_wakeup(struct device *dev, int val) dev->power.can_wakeup = dev->power.should_wakeup = !!val; } +static inline void device_set_wakeup_capable(struct device *dev, int val) +{ + dev->power.can_wakeup = !!val; +} + static inline int device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; @@ -47,21 +52,7 @@ static inline void device_set_wakeup_enable(struct device *dev, int val) static inline int device_may_wakeup(struct device *dev) { - return dev->power.can_wakeup & dev->power.should_wakeup; -} - -/* - * Platform hook to activate device wakeup capability, if that's not already - * handled by enable_irq_wake() etc. - * Returns zero on success, else negative errno - */ -extern int (*platform_enable_wakeup)(struct device *dev, int is_on); - -static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -{ - if (platform_enable_wakeup) - return (*platform_enable_wakeup)(dev, is_on); - return 0; + return dev->power.can_wakeup && dev->power.should_wakeup; } #else /* !CONFIG_PM */ @@ -72,6 +63,8 @@ static inline void device_init_wakeup(struct device *dev, int val) dev->power.can_wakeup = !!val; } +static inline void device_set_wakeup_capable(struct device *dev, int val) { } + static inline int device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; @@ -80,11 +73,6 @@ static inline int device_can_wakeup(struct device *dev) #define device_set_wakeup_enable(dev, val) do {} while (0) #define device_may_wakeup(dev) 0 -static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -{ - return 0; -} - #endif /* !CONFIG_PM */ #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 63b128d512fb..1ce54b63085d 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -1,6 +1,8 @@ /* * Linux Plug and Play Support * Copyright by Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #ifndef _LINUX_PNP_H @@ -15,7 +17,6 @@ struct pnp_protocol; struct pnp_dev; -struct pnp_resource_table; /* * Resource Management @@ -24,7 +25,14 @@ struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); static inline int pnp_resource_valid(struct resource *res) { - if (res && !(res->flags & IORESOURCE_UNSET)) + if (res) + return 1; + return 0; +} + +static inline int pnp_resource_enabled(struct resource *res) +{ + if (res && !(res->flags & IORESOURCE_DISABLED)) return 1; return 0; } @@ -40,19 +48,31 @@ static inline resource_size_t pnp_resource_len(struct resource *res) static inline resource_size_t pnp_port_start(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->start; + return 0; } static inline resource_size_t pnp_port_end(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->end; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->end; + return 0; } static inline unsigned long pnp_port_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_IO | IORESOURCE_AUTO; } static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) @@ -63,25 +83,41 @@ static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_port_len(struct pnp_dev *dev, unsigned int bar) { - return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_IO, bar)); + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return pnp_resource_len(res); + return 0; } static inline resource_size_t pnp_mem_start(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->start; + return 0; } static inline resource_size_t pnp_mem_end(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->end; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->end; + return 0; } static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_MEM | IORESOURCE_AUTO; } static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) @@ -92,18 +128,30 @@ static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_mem_len(struct pnp_dev *dev, unsigned int bar) { - return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_MEM, bar)); + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return pnp_resource_len(res); + return 0; } static inline resource_size_t pnp_irq(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar); + + if (pnp_resource_valid(res)) + return res->start; + return -1; } static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_IRQ | IORESOURCE_AUTO; } static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) @@ -114,12 +162,20 @@ static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_dma(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_DMA, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar); + + if (pnp_resource_valid(res)) + return res->start; + return -1; } static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_DMA, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_DMA | IORESOURCE_AUTO; } static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) @@ -128,57 +184,6 @@ static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) } -#define PNP_PORT_FLAG_16BITADDR (1<<0) -#define PNP_PORT_FLAG_FIXED (1<<1) - -struct pnp_port { - unsigned short min; /* min base number */ - unsigned short max; /* max base number */ - unsigned char align; /* align boundary */ - unsigned char size; /* size of range */ - unsigned char flags; /* port flags */ - unsigned char pad; /* pad */ - struct pnp_port *next; /* next port */ -}; - -#define PNP_IRQ_NR 256 -struct pnp_irq { - DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmask for IRQ lines */ - unsigned char flags; /* IRQ flags */ - unsigned char pad; /* pad */ - struct pnp_irq *next; /* next IRQ */ -}; - -struct pnp_dma { - unsigned char map; /* bitmask for DMA channels */ - unsigned char flags; /* DMA flags */ - struct pnp_dma *next; /* next port */ -}; - -struct pnp_mem { - unsigned int min; /* min base number */ - unsigned int max; /* max base number */ - unsigned int align; /* align boundary */ - unsigned int size; /* size of range */ - unsigned char flags; /* memory flags */ - unsigned char pad; /* pad */ - struct pnp_mem *next; /* next memory resource */ -}; - -#define PNP_RES_PRIORITY_PREFERRED 0 -#define PNP_RES_PRIORITY_ACCEPTABLE 1 -#define PNP_RES_PRIORITY_FUNCTIONAL 2 -#define PNP_RES_PRIORITY_INVALID 65535 - -struct pnp_option { - unsigned short priority; /* priority */ - struct pnp_port *port; /* first port */ - struct pnp_irq *irq; /* first IRQ */ - struct pnp_dma *dma; /* first DMA */ - struct pnp_mem *mem; /* first memory resource */ - struct pnp_option *next; /* used to chain dependent resources */ -}; - /* * Device Management */ @@ -246,9 +251,9 @@ struct pnp_dev { int active; int capabilities; - struct pnp_option *independent; - struct pnp_option *dependent; - struct pnp_resource_table *res; + unsigned int num_dependent_sets; + struct list_head resources; + struct list_head options; char name[PNP_NAME_LEN]; /* contains a human-readable name */ int flags; /* used by protocols */ @@ -425,6 +430,8 @@ void pnp_unregister_card_driver(struct pnp_card_driver *drv); extern struct list_head pnp_cards; /* resource management */ +int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t base, + resource_size_t size); int pnp_auto_config_dev(struct pnp_dev *dev); int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); @@ -452,6 +459,9 @@ static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { } /* resource management */ +static inline int pnp_possible_config(struct pnp_dev *dev, int type, + resource_size_t base, + resource_size_t size) { return 0; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 21349173d148..1941d8b5cf11 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1062,12 +1062,6 @@ struct task_struct { #endif struct list_head tasks; - /* - * ptrace_list/ptrace_children forms the list of my children - * that were stolen by a ptracer. - */ - struct list_head ptrace_children; - struct list_head ptrace_list; struct mm_struct *mm, *active_mm; @@ -1089,18 +1083,25 @@ struct task_struct { /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with - * p->parent->pid) + * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process (when being debugged) */ - struct task_struct *parent; /* parent process */ + struct task_struct *real_parent; /* real parent process */ + struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ /* - * children/sibling forms the list of my children plus the - * tasks I'm ptracing. + * children/sibling forms the list of my natural children */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ + /* + * ptraced is the list of tasks this task is using ptrace on. + * This includes both natural children and PTRACE_ATTACH targets. + * p->ptrace_entry is p's link on the p->parent->ptraced list. + */ + struct list_head ptraced; + struct list_head ptrace_entry; + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; @@ -1494,6 +1495,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other @@ -1875,9 +1877,6 @@ extern void wait_task_inactive(struct task_struct * p); #define wait_task_inactive(p) do { } while (0) #endif -#define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) - #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) #define for_each_process(p) \ diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h index d5ca78b93a3b..a3626aedaec9 100644 --- a/include/linux/spi/mmc_spi.h +++ b/include/linux/spi/mmc_spi.h @@ -23,6 +23,15 @@ struct mmc_spi_platform_data { /* sense switch on sd cards */ int (*get_ro)(struct device *); + /* + * If board does not use CD interrupts, driver can optimize polling + * using this function. + */ + int (*get_cd)(struct device *); + + /* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */ + unsigned long caps; + /* how long to debounce card detect, in msecs */ u16 detect_delay; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6fff7f82ef12..e5bfe01ee305 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -42,7 +42,8 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1;/* use getport() */ + cl_autobind : 1,/* use getport() */ + cl_chatty : 1;/* be verbose */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ @@ -114,6 +115,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) #define RPC_CLNT_CREATE_NOPING (1UL << 4) #define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) +#define RPC_CLNT_CREATE_QUIET (1UL << 6) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, @@ -123,6 +125,9 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, + const char *netid, int *result); int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d1a5c8c1a0f1..64981a2f1cae 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -135,7 +135,6 @@ struct rpc_task_setup { #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_RUNNING 0 diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a6977423baf7..e8e69159af71 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -86,6 +86,11 @@ typedef int __bitwise suspend_state_t; * that implement @begin(), but platforms implementing @begin() should * also provide a @end() which cleans up transitions aborted before * @enter(). + * + * @recover: Recover the platform from a suspend failure. + * Called by the PM core if the suspending of devices fails. + * This callback is optional and should only be implemented by platforms + * which require special recovery actions in that situation. */ struct platform_suspend_ops { int (*valid)(suspend_state_t state); @@ -94,6 +99,7 @@ struct platform_suspend_ops { int (*enter)(suspend_state_t state); void (*finish)(void); void (*end)(void); + void (*recover)(void); }; #ifdef CONFIG_SUSPEND @@ -149,7 +155,7 @@ extern void mark_free_pages(struct zone *zone); * The methods in this structure allow a platform to carry out special * operations required by it during a hibernation transition. * - * All the methods below must be implemented. + * All the methods below, except for @recover(), must be implemented. * * @begin: Tell the platform driver that we're starting hibernation. * Called right after shrinking memory and before freezing devices. @@ -189,6 +195,11 @@ extern void mark_free_pages(struct zone *zone); * @restore_cleanup: Clean up after a failing image restoration. * Called right after the nonboot CPUs have been enabled and before * thawing devices (runs with IRQs on). + * + * @recover: Recover the platform from a failure to suspend devices. + * Called by the PM core if the suspending of devices during hibernation + * fails. This callback is optional and should only be implemented by + * platforms which require special recovery actions in that situation. */ struct platform_hibernation_ops { int (*begin)(void); @@ -200,6 +211,7 @@ struct platform_hibernation_ops { void (*leave)(void); int (*pre_restore)(void); void (*restore_cleanup)(void); + void (*recover)(void); }; #ifdef CONFIG_HIBERNATION diff --git a/init/do_mounts.c b/init/do_mounts.c index 660c1e50c91b..a1de1bf3d6b9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -372,7 +372,8 @@ void __init prepare_namespace(void) if (saved_root_name[0]) { root_device_name = saved_root_name; - if (!strncmp(root_device_name, "mtd", 3)) { + if (!strncmp(root_device_name, "mtd", 3) || + !strncmp(root_device_name, "ubi", 3)) { mount_block_root(root_device_name, root_mountflags); goto out; } diff --git a/kernel/Makefile b/kernel/Makefile index 0a7ed838984b..985ddb7da4d0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o -CFLAGS_REMOVE_sched.o = -mno-spe - ifdef CONFIG_FTRACE # Do not trace debug files and internal ftrace files CFLAGS_REMOVE_lockdep.o = -pg @@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg +CFLAGS_REMOVE_sched.o = -mno-spe -pg endif obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o diff --git a/kernel/cpu.c b/kernel/cpu.c index b11f06dc149a..cfb1d43ab801 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -299,6 +299,7 @@ int __ref cpu_down(unsigned int cpu) cpu_maps_update_done(); return err; } +EXPORT_SYMBOL(cpu_down); #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ diff --git a/kernel/exit.c b/kernel/exit.c index ceb258782835..93d2711b9381 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p) __get_cpu_var(process_counts)--; } list_del_rcu(&p->thread_group); - remove_parent(p); + list_del_init(&p->sibling); } /* @@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp) put_task_struct(container_of(rhp, struct task_struct, rcu)); } +/* + * Do final ptrace-related cleanup of a zombie being reaped. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_release_task(struct task_struct *p) +{ + BUG_ON(!list_empty(&p->ptraced)); + ptrace_unlink(p); + BUG_ON(!list_empty(&p->ptrace_entry)); +} + void release_task(struct task_struct * p) { struct task_struct *leader; @@ -160,8 +172,7 @@ repeat: atomic_dec(&p->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); - ptrace_unlink(p); - BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); + ptrace_release_task(p); __exit_signal(p); /* @@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void) ptrace_unlink(current); /* Reparent to init */ - remove_parent(current); current->real_parent = current->parent = kthreadd_task; - add_parent(current); + list_move_tail(¤t->sibling, ¤t->real_parent->children); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; @@ -692,37 +702,97 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static void -reparent_thread(struct task_struct *p, struct task_struct *father, int traced) +/* + * Return nonzero if @parent's children should reap themselves. + * + * Called with write_lock_irq(&tasklist_lock) held. + */ +static int ignoring_children(struct task_struct *parent) { - if (p->pdeath_signal) - /* We already hold the tasklist_lock here. */ - group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + int ret; + struct sighand_struct *psig = parent->sighand; + unsigned long flags; + spin_lock_irqsave(&psig->siglock, flags); + ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || + (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); + spin_unlock_irqrestore(&psig->siglock, flags); + return ret; +} - /* Move the child from its dying parent to the new one. */ - if (unlikely(traced)) { - /* Preserve ptrace links if someone else is tracing this child. */ - list_del_init(&p->ptrace_list); - if (ptrace_reparented(p)) - list_add(&p->ptrace_list, &p->real_parent->ptrace_children); - } else { - /* If this child is being traced, then we're the one tracing it - * anyway, so let go of it. +/* + * Detach all tasks we were using ptrace on. + * Any that need to be release_task'd are put on the @dead list. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_exit(struct task_struct *parent, struct list_head *dead) +{ + struct task_struct *p, *n; + int ign = -1; + + list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { + __ptrace_unlink(p); + + if (p->exit_state != EXIT_ZOMBIE) + continue; + + /* + * If it's a zombie, our attachedness prevented normal + * parent notification or self-reaping. Do notification + * now if it would have happened earlier. If it should + * reap itself, add it to the @dead list. We can't call + * release_task() here because we already hold tasklist_lock. + * + * If it's our own child, there is no notification to do. + * But if our normal children self-reap, then this child + * was prevented by ptrace and we must reap it now. */ - p->ptrace = 0; - remove_parent(p); - p->parent = p->real_parent; - add_parent(p); + if (!task_detached(p) && thread_group_empty(p)) { + if (!same_thread_group(p->real_parent, parent)) + do_notify_parent(p, p->exit_signal); + else { + if (ign < 0) + ign = ignoring_children(parent); + if (ign) + p->exit_signal = -1; + } + } - if (task_is_traced(p)) { + if (task_detached(p)) { /* - * If it was at a trace stop, turn it into - * a normal stop since it's no longer being - * traced. + * Mark it as in the process of being reaped. */ - ptrace_untrace(p); + p->exit_state = EXIT_DEAD; + list_add(&p->ptrace_entry, dead); } } +} + +/* + * Finish up exit-time ptrace cleanup. + * + * Called without locks. + */ +static void ptrace_exit_finish(struct task_struct *parent, + struct list_head *dead) +{ + struct task_struct *p, *n; + + BUG_ON(!list_empty(&parent->ptraced)); + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } +} + +static void reparent_thread(struct task_struct *p, struct task_struct *father) +{ + if (p->pdeath_signal) + /* We already hold the tasklist_lock here. */ + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + + list_move_tail(&p->sibling, &p->real_parent->children); /* If this is a threaded reparent there is no need to * notify anyone anything has happened. @@ -737,7 +807,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) /* If we'd notified the old parent about this child's death, * also notify the new parent. */ - if (!traced && p->exit_state == EXIT_ZOMBIE && + if (!ptrace_reparented(p) && + p->exit_state == EXIT_ZOMBIE && !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); @@ -754,12 +825,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) static void forget_original_parent(struct task_struct *father) { struct task_struct *p, *n, *reaper = father; - struct list_head ptrace_dead; - - INIT_LIST_HEAD(&ptrace_dead); + LIST_HEAD(ptrace_dead); write_lock_irq(&tasklist_lock); + /* + * First clean up ptrace if we were using it. + */ + ptrace_exit(father, &ptrace_dead); + do { reaper = next_thread(reaper); if (reaper == father) { @@ -768,58 +842,19 @@ static void forget_original_parent(struct task_struct *father) } } while (reaper->flags & PF_EXITING); - /* - * There are only two places where our children can be: - * - * - in our child list - * - in our ptraced child list - * - * Search them and reparent children. - */ list_for_each_entry_safe(p, n, &father->children, sibling) { - int ptrace; - - ptrace = p->ptrace; - - /* if father isn't the real parent, then ptrace must be enabled */ - BUG_ON(father != p->real_parent && !ptrace); - - if (father == p->real_parent) { - /* reparent with a reaper, real father it's us */ - p->real_parent = reaper; - reparent_thread(p, father, 0); - } else { - /* reparent ptraced task to its real parent */ - __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && - thread_group_empty(p)) - do_notify_parent(p, p->exit_signal); - } - - /* - * if the ptraced child is a detached zombie we must collect - * it before we exit, or it will remain zombie forever since - * we prevented it from self-reap itself while it was being - * traced by us, to be able to see it in wait4. - */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) - list_add(&p->ptrace_list, &ptrace_dead); - } - - list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) { p->real_parent = reaper; - reparent_thread(p, father, 1); + if (p->parent == father) { + BUG_ON(p->ptrace); + p->parent = p->real_parent; + } + reparent_thread(p, father); } write_unlock_irq(&tasklist_lock); BUG_ON(!list_empty(&father->children)); - BUG_ON(!list_empty(&father->ptrace_children)); - - list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) { - list_del_init(&p->ptrace_list); - release_task(p); - } + ptrace_exit_finish(father, &ptrace_dead); } /* @@ -1180,13 +1215,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, return 0; } - /* - * Do not consider detached threads that are - * not ptraced: - */ - if (task_detached(p) && !p->ptrace) - return 0; - /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1197,14 +1225,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, return 0; err = security_task_wait(p); - if (likely(!err)) - return 1; + if (err) + return err; - if (type != PIDTYPE_PID) - return 0; - /* This child was explicitly requested, abort */ - read_unlock(&tasklist_lock); - return err; + return 1; } static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, @@ -1238,7 +1262,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_zombie(struct task_struct *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1246,7 +1270,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap, int retval, status, traced; pid_t pid = task_pid_vnr(p); - if (unlikely(noreap)) { + if (!likely(options & WEXITED)) + return 0; + + if (unlikely(options & WNOWAIT)) { uid_t uid = p->uid; int exit_code = p->exit_code; int why, status; @@ -1396,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(struct task_struct *p, - int noreap, struct siginfo __user *infop, +static int wait_task_stopped(int ptrace, struct task_struct *p, + int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { int retval, exit_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; + if (!(options & WUNTRACED)) + return 0; + exit_code = 0; spin_lock_irq(&p->sighand->siglock); if (unlikely(!task_is_stopped_or_traced(p))) goto unlock_sig; - if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) + if (!ptrace && p->signal->group_stop_count > 0) /* * A group stop is in progress and this is the group leader. * We won't report until all threads have stopped. @@ -1421,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p, if (!exit_code) goto unlock_sig; - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->exit_code = 0; uid = p->uid; @@ -1439,10 +1469,10 @@ unlock_sig: */ get_task_struct(p); pid = task_pid_vnr(p); - why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; + why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); - if (unlikely(noreap)) + if (unlikely(options & WNOWAIT)) return wait_noreap_copyout(p, pid, uid, why, exit_code, infop, ru); @@ -1476,7 +1506,7 @@ unlock_sig: * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_continued(struct task_struct *p, int noreap, +static int wait_task_continued(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1484,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap, pid_t pid; uid_t uid; + if (!unlikely(options & WCONTINUED)) + return 0; + if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) return 0; @@ -1493,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, spin_unlock_irq(&p->sighand->siglock); return 0; } - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; spin_unlock_irq(&p->sighand->siglock); @@ -1519,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap, return retval; } +/* + * Consider @p for a wait by @parent. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; + * then *@notask_error is 0 if @p is an eligible child, + * or another error from security_task_wait(), or still -ECHILD. + */ +static int wait_consider_task(struct task_struct *parent, int ptrace, + struct task_struct *p, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *ru) +{ + int ret = eligible_child(type, pid, options, p); + if (!ret) + return ret; + + if (unlikely(ret < 0)) { + /* + * If we have not yet seen any eligible child, + * then let this error code replace -ECHILD. + * A permission error will give the user a clue + * to look for security policy problems, rather + * than for mysterious wait bugs. + */ + if (*notask_error) + *notask_error = ret; + } + + if (likely(!ptrace) && unlikely(p->ptrace)) { + /* + * This child is hidden by ptrace. + * We aren't allowed to see it now, but eventually we will. + */ + *notask_error = 0; + return 0; + } + + if (p->exit_state == EXIT_DEAD) + return 0; + + /* + * We don't reap group leaders with subthreads. + */ + if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) + return wait_task_zombie(p, options, infop, stat_addr, ru); + + /* + * It's stopped or running now, so it might + * later continue, exit, or stop again. + */ + *notask_error = 0; + + if (task_is_stopped_or_traced(p)) + return wait_task_stopped(ptrace, p, options, + infop, stat_addr, ru); + + return wait_task_continued(p, options, infop, stat_addr, ru); +} + +/* + * Do the work of do_wait() for one thread in the group, @tsk. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; then + * *@notask_error is 0 if there were any eligible children, + * or another error from security_task_wait(), or still -ECHILD. + */ +static int do_wait_thread(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + list_for_each_entry(p, &tsk->children, sibling) { + /* + * Do not consider detached threads. + */ + if (!task_detached(p)) { + int ret = wait_consider_task(tsk, 0, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } + } + + return 0; +} + +static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + /* + * Traditionally we see ptrace'd stopped tasks regardless of options. + */ + options |= WUNTRACED; + + list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { + int ret = wait_consider_task(tsk, 1, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } + + return 0; +} + static long do_wait(enum pid_type type, struct pid *pid, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; - int flag, retval; + int retval; add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: - /* If there is nothing that can match our critier just get out */ + /* + * If there is nothing that can match our critiera just get out. + * We will clear @retval to zero if we see any child that might later + * match our criteria, even if we are not able to reap it yet. + */ retval = -ECHILD; if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) goto end; - /* - * We will set this flag if we see any child that might later - * match our criteria, even if we are not able to reap it yet. - */ - flag = retval = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { - struct task_struct *p; - - list_for_each_entry(p, &tsk->children, sibling) { - int ret = eligible_child(type, pid, options, p); - if (!ret) - continue; - - if (unlikely(ret < 0)) { - retval = ret; - } else if (task_is_stopped_or_traced(p)) { - /* - * It's stopped now, so it might later - * continue, exit, or stop again. - */ - flag = 1; - if (!(p->ptrace & PT_PTRACED) && - !(options & WUNTRACED)) - continue; - - retval = wait_task_stopped(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } else if (p->exit_state == EXIT_ZOMBIE && - !delay_group_leader(p)) { - /* - * We don't reap group leaders with subthreads. - */ - if (!likely(options & WEXITED)) - continue; - retval = wait_task_zombie(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } else if (p->exit_state != EXIT_DEAD) { - /* - * It's running now, so it might later - * exit, stop, or stop and then continue. - */ - flag = 1; - if (!unlikely(options & WCONTINUED)) - continue; - retval = wait_task_continued(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } - if (retval != 0) /* tasklist_lock released */ - goto end; - } - if (!flag) { - list_for_each_entry(p, &tsk->ptrace_children, - ptrace_list) { - flag = eligible_child(type, pid, options, p); - if (!flag) - continue; - if (likely(flag > 0)) - break; - retval = flag; - goto end; - } + int tsk_result = do_wait_thread(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (!tsk_result) + tsk_result = ptrace_do_wait(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (tsk_result) { + /* + * tasklist_lock is unlocked and we have a final result. + */ + retval = tsk_result; + goto end; } + if (options & __WNOTHREAD) break; tsk = next_thread(tsk); @@ -1609,16 +1714,14 @@ repeat: } while (tsk != current); read_unlock(&tasklist_lock); - if (flag) { - if (options & WNOHANG) - goto end; + if (!retval && !(options & WNOHANG)) { retval = -ERESTARTSYS; - if (signal_pending(current)) - goto end; - schedule(); - goto repeat; + if (!signal_pending(current)) { + schedule(); + goto repeat; + } } - retval = -ECHILD; + end: current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); diff --git a/kernel/fork.c b/kernel/fork.c index 4bd2f516401f..adefc1131f27 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_LIST_HEAD(&p->ptrace_children); - INIT_LIST_HEAD(&p->ptrace_list); + INIT_LIST_HEAD(&p->ptrace_entry); + INIT_LIST_HEAD(&p->ptraced); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible @@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, } if (likely(p->pid)) { - add_parent(p); + list_add_tail(&p->sibling, &p->real_parent->children); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); diff --git a/kernel/kthread.c b/kernel/kthread.c index 97747cdd37c9..ac3fb7326641 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -235,7 +235,7 @@ int kthreadd(void *unused) set_user_nice(tsk, KTHREAD_NICE_LEVEL); set_cpus_allowed(tsk, CPU_MASK_ALL); - current->flags |= PF_NOFREEZE; + current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; for (;;) { set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 14a656cdc652..f011e0870b52 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -179,6 +179,17 @@ static void platform_restore_cleanup(int platform_mode) hibernation_ops->restore_cleanup(); } +/** + * platform_recover - recover the platform from a failure to suspend + * devices. + */ + +static void platform_recover(int platform_mode) +{ + if (platform_mode && hibernation_ops && hibernation_ops->recover) + hibernation_ops->recover(); +} + /** * create_image - freeze devices that need to be frozen with interrupts * off, create the hibernation image and thaw those devices. Control @@ -193,6 +204,7 @@ static int create_image(int platform_mode) if (error) return error; + device_pm_lock(); local_irq_disable(); /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. @@ -224,9 +236,11 @@ static int create_image(int platform_mode) /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ - device_power_up(); + device_power_up(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: local_irq_enable(); + device_pm_unlock(); return error; } @@ -255,10 +269,10 @@ int hibernation_snapshot(int platform_mode) suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) - goto Resume_console; + goto Recover_platform; if (hibernation_test(TEST_DEVICES)) - goto Resume_devices; + goto Recover_platform; error = platform_pre_snapshot(platform_mode); if (error || hibernation_test(TEST_PLATFORM)) @@ -280,12 +294,16 @@ int hibernation_snapshot(int platform_mode) Finish: platform_finish(platform_mode); Resume_devices: - device_resume(); - Resume_console: + device_resume(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); resume_console(); Close: platform_end(platform_mode); return error; + + Recover_platform: + platform_recover(platform_mode); + goto Resume_devices; } /** @@ -300,8 +318,9 @@ static int resume_target_kernel(void) { int error; + device_pm_lock(); local_irq_disable(); - error = device_power_down(PMSG_PRETHAW); + error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -329,9 +348,10 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); - device_power_up(); + device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); + device_pm_unlock(); return error; } @@ -350,7 +370,7 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); - error = device_suspend(PMSG_PRETHAW); + error = device_suspend(PMSG_QUIESCE); if (error) goto Finish; @@ -362,7 +382,7 @@ int hibernation_restore(int platform_mode) enable_nonboot_cpus(); } platform_restore_cleanup(platform_mode); - device_resume(); + device_resume(PMSG_RECOVER); Finish: resume_console(); pm_restore_console(); @@ -392,8 +412,11 @@ int hibernation_platform_enter(void) suspend_console(); error = device_suspend(PMSG_HIBERNATE); - if (error) - goto Resume_console; + if (error) { + if (hibernation_ops->recover) + hibernation_ops->recover(); + goto Resume_devices; + } error = hibernation_ops->prepare(); if (error) @@ -403,6 +426,7 @@ int hibernation_platform_enter(void) if (error) goto Finish; + device_pm_lock(); local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { @@ -411,6 +435,7 @@ int hibernation_platform_enter(void) while (1); } local_irq_enable(); + device_pm_unlock(); /* * We don't need to reenable the nonboot CPUs or resume consoles, since @@ -419,8 +444,7 @@ int hibernation_platform_enter(void) Finish: hibernation_ops->finish(); Resume_devices: - device_resume(); - Resume_console: + device_resume(PMSG_RESTORE); resume_console(); Close: hibernation_ops->end(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 6a6d5eb3524e..3398f4651aa1 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state) { int error = 0; + device_pm_lock(); arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state) if (!suspend_test(TEST_CORE)) error = suspend_ops->enter(state); - device_power_up(); + device_power_up(PMSG_RESUME); Done: arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + device_pm_unlock(); return error; } @@ -267,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state) error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Resume_console; + goto Recover_platform; } if (suspend_test(TEST_DEVICES)) - goto Resume_devices; + goto Recover_platform; if (suspend_ops->prepare) { error = suspend_ops->prepare(); @@ -291,13 +293,17 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); Resume_devices: - device_resume(); - Resume_console: + device_resume(PMSG_RESUME); resume_console(); Close: if (suspend_ops->end) suspend_ops->end(); return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; } /** diff --git a/kernel/power/process.c b/kernel/power/process.c index f1d0b345c9ba..5fb87652f214 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -19,9 +19,6 @@ */ #define TIMEOUT (20 * HZ) -#define FREEZER_KERNEL_THREADS 0 -#define FREEZER_USER_SPACE 1 - static inline int freezeable(struct task_struct * p) { if ((p == current) || @@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } -static int has_mm(struct task_struct *p) +static inline bool should_send_signal(struct task_struct *p) { - return (p->mm && !(p->flags & PF_BORROWED_MM)); + return !(p->flags & PF_FREEZER_NOSIG); } /** * freeze_task - send a freeze request to given task * @p: task to send the request to - * @with_mm_only: if set, the request will only be sent if the task has its - * own mm - * Return value: 0, if @with_mm_only is set and the task has no mm of its - * own or the task is frozen, 1, otherwise + * @sig_only: if set, the request will only be sent if the task has the + * PF_FREEZER_NOSIG flag unset + * Return value: 'false', if @sig_only is set and the task has + * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise * - * The freeze request is sent by seting the tasks's TIF_FREEZE flag and + * The freeze request is sent by setting the tasks's TIF_FREEZE flag and * either sending a fake signal to it or waking it up, depending on whether - * or not it has its own mm (ie. it is a user land task). If @with_mm_only - * is set and the task has no mm of its own (ie. it is a kernel thread), - * its TIF_FREEZE flag should not be set. - * - * The task_lock() is necessary to prevent races with exit_mm() or - * use_mm()/unuse_mm() from occuring. + * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task + * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its + * TIF_FREEZE flag will not be set. */ -static int freeze_task(struct task_struct *p, int with_mm_only) +static bool freeze_task(struct task_struct *p, bool sig_only) { - int ret = 1; + /* + * We first check if the task is freezing and next if it has already + * been frozen to avoid the race with frozen_process() which first marks + * the task as frozen and next clears its TIF_FREEZE. + */ + if (!freezing(p)) { + rmb(); + if (frozen(p)) + return false; - task_lock(p); - if (freezing(p)) { - if (has_mm(p)) { - if (!signal_pending(p)) - fake_signal_wake_up(p); - } else { - if (with_mm_only) - ret = 0; - else - wake_up_state(p, TASK_INTERRUPTIBLE); - } + if (!sig_only || should_send_signal(p)) + set_freeze_flag(p); + else + return false; + } + + if (should_send_signal(p)) { + if (!signal_pending(p)) + fake_signal_wake_up(p); + } else if (sig_only) { + return false; } else { - rmb(); - if (frozen(p)) { - ret = 0; - } else { - if (has_mm(p)) { - set_freeze_flag(p); - fake_signal_wake_up(p); - } else { - if (with_mm_only) { - ret = 0; - } else { - set_freeze_flag(p); - wake_up_state(p, TASK_INTERRUPTIBLE); - } - } - } + wake_up_state(p, TASK_INTERRUPTIBLE); } - task_unlock(p); - return ret; + + return true; } static void cancel_freezing(struct task_struct *p) @@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p) } } -static int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; @@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space) if (frozen(p) || !freezeable(p)) continue; - if (!freeze_task(p, freeze_user_space)) + if (!freeze_task(p, sig_only)) continue; /* @@ -235,13 +222,13 @@ int freeze_processes(void) int error; printk("Freezing user space processes ... "); - error = try_to_freeze_tasks(FREEZER_USER_SPACE); + error = try_to_freeze_tasks(true); if (error) goto Exit; printk("done.\n"); printk("Freezing remaining freezable tasks ... "); - error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + error = try_to_freeze_tasks(false); if (error) goto Exit; printk("done."); @@ -251,7 +238,7 @@ int freeze_processes(void) return error; } -static void thaw_tasks(int thaw_user_space) +static void thaw_tasks(bool nosig_only) { struct task_struct *g, *p; @@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (!p->mm == thaw_user_space) + if (nosig_only && should_send_signal(p)) continue; thaw_process(p); @@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space) void thaw_processes(void) { printk("Restarting tasks ... "); - thaw_tasks(FREEZER_KERNEL_THREADS); - thaw_tasks(FREEZER_USER_SPACE); + thaw_tasks(true); + thaw_tasks(false); schedule(); printk("done.\n"); } diff --git a/kernel/power/user.c b/kernel/power/user.c index f5512cb3aa86..a6332a313262 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -69,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) - return -EBUSY; + mutex_lock(&pm_mutex); + + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } if ((filp->f_flags & O_ACCMODE) == O_RDWR) { atomic_inc(&snapshot_device_available); - return -ENOSYS; + error = -ENOSYS; + goto Unlock; } if(create_basic_memory_bitmaps()) { atomic_inc(&snapshot_device_available); - return -ENOMEM; + error = -ENOMEM; + goto Unlock; } nonseekable_open(inode, filp); data = &snapshot_state; @@ -98,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); } - if (error) { + if (error) atomic_inc(&snapshot_device_available); - return error; - } data->frozen = 0; data->ready = 0; data->platform_support = 0; - return 0; + Unlock: + mutex_unlock(&pm_mutex); + + return error; } static int snapshot_release(struct inode *inode, struct file *filp) { struct snapshot_data *data; + mutex_lock(&pm_mutex); + swsusp_free(); free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); - if (data->frozen) { - mutex_lock(&pm_mutex); + if (data->frozen) thaw_processes(); - mutex_unlock(&pm_mutex); - } pm_notifier_call_chain(data->mode == O_WRONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); + + mutex_unlock(&pm_mutex); + return 0; } @@ -134,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, struct snapshot_data *data; ssize_t res; + mutex_lock(&pm_mutex); + data = filp->private_data; - if (!data->ready) - return -ENODATA; + if (!data->ready) { + res = -ENODATA; + goto Unlock; + } res = snapshot_read_next(&data->handle, count); if (res > 0) { if (copy_to_user(buf, data_of(data->handle), res)) @@ -144,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, else *offp = data->handle.offset; } + + Unlock: + mutex_unlock(&pm_mutex); + return res; } @@ -153,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, struct snapshot_data *data; ssize_t res; + mutex_lock(&pm_mutex); + data = filp->private_data; res = snapshot_write_next(&data->handle, count); if (res > 0) { @@ -161,11 +181,14 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, else *offp = data->handle.offset; } + + mutex_unlock(&pm_mutex); + return res; } -static int snapshot_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long snapshot_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = 0; struct snapshot_data *data; @@ -179,6 +202,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + data = filp->private_data; switch (cmd) { @@ -186,7 +212,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case SNAPSHOT_FREEZE: if (data->frozen) break; - mutex_lock(&pm_mutex); printk("Syncing filesystems ... "); sys_sync(); printk("done.\n"); @@ -194,7 +219,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = freeze_processes(); if (error) thaw_processes(); - mutex_unlock(&pm_mutex); if (!error) data->frozen = 1; break; @@ -202,9 +226,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; - mutex_lock(&pm_mutex); thaw_processes(); - mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -307,16 +329,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - if (!mutex_trylock(&pm_mutex)) { - error = -EBUSY; - break; - } /* * Tasks are frozen and the notifiers have been called with * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); - mutex_unlock(&pm_mutex); break; case SNAPSHOT_PLATFORM_SUPPORT: @@ -390,6 +407,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, } + mutex_unlock(&pm_mutex); + return error; } @@ -399,7 +418,7 @@ static const struct file_operations snapshot_fops = { .read = snapshot_read, .write = snapshot_write, .llseek = no_llseek, - .ioctl = snapshot_ioctl, + .unlocked_ioctl = snapshot_ioctl, }; static struct miscdevice snapshot_device = { diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e337390fce01..8392a9da6450 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -33,13 +33,9 @@ */ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_list)); - if (child->parent == new_parent) - return; - list_add(&child->ptrace_list, &child->parent->ptrace_children); - remove_parent(child); + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; - add_parent(child); } /* @@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); child->ptrace = 0; - if (ptrace_reparented(child)) { - list_del_init(&child->ptrace_list); - remove_parent(child); - child->parent = child->real_parent; - add_parent(child); - } + child->parent = child->real_parent; + list_del_init(&child->ptrace_entry); if (task_is_traced(child)) ptrace_untrace(child); @@ -492,15 +484,34 @@ int ptrace_traceme(void) /* * Are we already being traced? */ +repeat: task_lock(current); if (!(current->ptrace & PT_PTRACED)) { + /* + * See ptrace_attach() comments about the locking here. + */ + unsigned long flags; + if (!write_trylock_irqsave(&tasklist_lock, flags)) { + task_unlock(current); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } + ret = security_ptrace(current->parent, current, PTRACE_MODE_ATTACH); + /* * Set the ptrace bit in the process ptrace flags. + * Then link us on our parent's ptraced list. */ - if (!ret) + if (!ret) { current->ptrace |= PT_PTRACED; + __ptrace_link(current, current->real_parent); + } + + write_unlock_irqrestore(&tasklist_lock, flags); } task_unlock(current); return ret; diff --git a/lib/Makefile b/lib/Makefile index 2c62a9c06fbe..818c4d455518 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -2,21 +2,17 @@ # Makefile for some libs needed in the kernel. # +ifdef CONFIG_FTRACE +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +endif + lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o -ifdef CONFIG_FTRACE -# Do not profile string.o, since it may be used in early boot or vdso -CFLAGS_REMOVE_string.o = -pg -# Also do not profile any debug utilities -CFLAGS_REMOVE_spinlock_debug.o = -pg -CFLAGS_REMOVE_list_debug.o = -pg -CFLAGS_REMOVE_debugobjects.o = -pg -endif - lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/kobject.c b/lib/kobject.c index 718e5101c263..dcade0543bd2 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -439,6 +439,7 @@ out: return error; } +EXPORT_SYMBOL_GPL(kobject_rename); /** * kobject_move - move object to another parent diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc12d5f5d5da..834a83199bdf 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -63,22 +63,11 @@ static const struct rpc_credops gss_nullops; # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define NFS_NGROUPS 16 - -#define GSS_CRED_SLACK 1024 /* XXX: unused */ +#define GSS_CRED_SLACK 1024 /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -/* XXX this define must match the gssd define -* as it is passed to gssd to signal the use of -* machine creds should be part of the shared rpc interface */ - -#define CA_RUN_AS_MACHINE 0x00000200 - -/* dump the buffer in `emacs-hexl' style */ -#define isprint(c) ((c > 0x1f) && (c < 0x7f)) - struct gss_auth { struct kref kref; struct rpc_auth rpc_auth; @@ -146,7 +135,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup(p, len, GFP_NOFS); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); dest->len = len; @@ -171,7 +160,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_NOFS); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -272,7 +261,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) return NULL; } -/* Try to add a upcall to the pipefs queue. +/* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ @@ -341,7 +330,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) { struct gss_upcall_msg *gss_msg; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg != NULL) { INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -493,7 +482,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; - struct rpc_clnt *clnt; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; @@ -503,11 +491,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_KERNEL); + buf = kmalloc(mlen, GFP_NOFS); if (!buf) goto out; - clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -806,7 +793,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60c3dba545d7..ef45eba22485 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); res->len = len; @@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p, struct krb5_ctx *ctx; int tmp; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5deb4b6e4514..035e1dd6af1b 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); return q; @@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, struct spkm3_ctx *ctx; int version; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 6cdd241ad267..3308157436d2 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) { - if (!(out->data = kzalloc(explen,GFP_KERNEL))) + if (!(out->data = kzalloc(explen,GFP_NOFS))) return 0; out->len = explen; memcpy(out->data, in, enclen); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 44920b90bdc4..46b2647c5bd2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) return ERR_PTR(-ENOMEM); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8945307556ec..76739e928d0d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -58,7 +59,6 @@ static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); -static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); @@ -70,9 +70,9 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 * call_header(struct rpc_task *task); -static __be32 * call_verify(struct rpc_task *task); +static __be32 *rpc_encode_header(struct rpc_task *task); +static __be32 *rpc_verify_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt, int flags); static void rpc_register_client(struct rpc_clnt *clnt) @@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_autobind = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; return clnt; } @@ -690,6 +692,21 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); +#ifdef RPC_DEBUG +static const char *rpc_proc_name(const struct rpc_task *task) +{ + const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + + if (proc) { + if (proc->p_name) + return proc->p_name; + else + return "NULL"; + } else + return "no proc"; +} +#endif + /* * 0. Initial state * @@ -701,9 +718,9 @@ call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid, + dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, clnt->cl_protname, clnt->cl_vers, - task->tk_msg.rpc_proc->p_proc, + rpc_proc_name(task), (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count */ @@ -861,7 +878,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) * 3. Encode arguments of an RPC call */ static void -call_encode(struct rpc_task *task) +rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t encode; @@ -876,23 +893,19 @@ call_encode(struct rpc_task *task) (char *)req->rq_buffer + req->rq_callsize, req->rq_rcvsize); - /* Encode header and provided arguments */ - encode = task->tk_msg.rpc_proc->p_encode; - if (!(p = call_header(task))) { - printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); + p = rpc_encode_header(task); + if (p == NULL) { + printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); rpc_exit(task, -EIO); return; } + + encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) return; task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - if (task->tk_status == -ENOMEM) { - /* XXX: Is this sane? */ - rpc_delay(task, 3*HZ); - task->tk_status = -EAGAIN; - } } /* @@ -929,11 +942,9 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { - case -EAGAIN: - dprintk("RPC: %5u rpcbind waiting for another request " - "to finish\n", task->tk_pid); - /* avoid busy-waiting here -- could be a network outage. */ - rpc_delay(task, 5*HZ); + case -ENOMEM: + dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); + rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " @@ -1046,10 +1057,16 @@ call_transmit(struct rpc_task *task) /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); - call_encode(task); + rpc_xdr_encode(task); /* Did the encode result in an error condition? */ - if (task->tk_status != 0) + if (task->tk_status != 0) { + /* Was the error nonfatal? */ + if (task->tk_status == -EAGAIN) + rpc_delay(task, HZ >> 4); + else + rpc_exit(task, task->tk_status); return; + } } xprt_transmit(task); if (task->tk_status < 0) @@ -1132,7 +1149,8 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - printk("%s: RPC call returned error %d\n", + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); } @@ -1157,7 +1175,8 @@ call_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFT(task)) { - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; @@ -1165,7 +1184,8 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); @@ -1196,8 +1216,9 @@ call_decode(struct rpc_task *task) task->tk_pid, task->tk_status); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; } @@ -1224,8 +1245,7 @@ call_decode(struct rpc_task *task) goto out_retry; } - /* Verify the RPC header */ - p = call_verify(task); + p = rpc_verify_header(task); if (IS_ERR(p)) { if (p == ERR_PTR(-EAGAIN)) goto out_retry; @@ -1243,7 +1263,7 @@ call_decode(struct rpc_task *task) return; out_retry: task->tk_status = 0; - /* Note: call_verify() may have freed the RPC slot */ + /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { req->rq_received = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) @@ -1290,11 +1310,8 @@ call_refreshresult(struct rpc_task *task) return; } -/* - * Call header serialization - */ static __be32 * -call_header(struct rpc_task *task) +rpc_encode_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; @@ -1314,11 +1331,8 @@ call_header(struct rpc_task *task) return p; } -/* - * Reply header verification - */ static __be32 * -call_verify(struct rpc_task *task) +rpc_verify_header(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; @@ -1392,7 +1406,7 @@ call_verify(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "call_verify: server %s requires stronger " + printk(KERN_NOTICE "RPC: server %s requires stronger " "authentication.\n", task->tk_client->cl_server); break; default: @@ -1431,10 +1445,10 @@ call_verify(struct rpc_task *task) error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %p unsupported by program %u, " + dprintk("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, - task->tk_msg.rpc_proc, + rpc_proc_name(task), task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); @@ -1517,44 +1531,53 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG +static void rpc_show_header(void) +{ + printk(KERN_INFO "-pid- flgs status -client- --rqstp- " + "-timeout ---ops--\n"); +} + +static void rpc_show_task(const struct rpc_clnt *clnt, + const struct rpc_task *task) +{ + const char *rpc_waitq = "none"; + char *p, action[KSYM_SYMBOL_LEN]; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + /* map tk_action pointer to a function name; then trim off + * the "+0x0 [sunrpc]" */ + sprint_symbol(action, (unsigned long)task->tk_action); + p = strchr(action, '+'); + if (p) + *p = '\0'; + + printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), + action, rpc_waitq); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; - struct rpc_task *t; + struct rpc_task *task; + int header = 0; spin_lock(&rpc_client_lock); - if (list_empty(&all_clients)) - goto out; - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); list_for_each_entry(clnt, &all_clients, cl_clients) { - if (list_empty(&clnt->cl_tasks)) - continue; spin_lock(&clnt->cl_lock); - list_for_each_entry(t, &clnt->cl_tasks, tk_task) { - const char *rpc_waitq = "none"; - int proc; - - if (t->tk_msg.rpc_proc) - proc = t->tk_msg.rpc_proc->p_proc; - else - proc = -1; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->tk_waitqueue); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, proc, - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { + if (!header) { + rpc_show_header(); + header++; + } + rpc_show_task(clnt, task); } spin_unlock(&clnt->cl_lock); } -out: spin_unlock(&rpc_client_lock); } #endif diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e6fb21b19b86..24db2b4d12d3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,6 +32,10 @@ #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) +#define RPCBVERS_2 (2u) +#define RPCBVERS_3 (3u) +#define RPCBVERS_4 (4u) + enum { RPCBPROC_NULL, RPCBPROC_SET, @@ -64,6 +68,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); +static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; struct rpcbind_args { @@ -76,41 +81,73 @@ struct rpcbind_args { const char * r_netid; const char * r_addr; const char * r_owner; + + int r_status; }; static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; +static struct rpc_procinfo rpcb_procedures4[]; struct rpcb_info { - int rpc_vers; + u32 rpc_vers; struct rpc_procinfo * rpc_proc; }; static struct rpcb_info rpcb_next_version[]; static struct rpcb_info rpcb_next_version6[]; +static const struct rpc_call_ops rpcb_getport_ops = { + .rpc_call_done = rpcb_getport_done, + .rpc_release = rpcb_map_release, +}; + +static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + static void rpcb_map_release(void *data) { struct rpcbind_args *map = data; + rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); kfree(map); } -static const struct rpc_call_ops rpcb_getport_ops = { - .rpc_call_done = rpcb_getport_done, - .rpc_release = rpcb_map_release, +static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), }; -static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +static const struct sockaddr_in6 rpcb_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + .sin6_port = htons(RPCBIND_PORT), +}; + +static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, + size_t addrlen, u32 version) { - xprt_clear_binding(xprt); - rpc_wake_up_status(&xprt->binding, status); + struct rpc_create_args args = { + .protocol = XPRT_TRANSPORT_UDP, + .address = addr, + .addrsize = addrlen, + .servername = "localhost", + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + + return rpc_create(&args); } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version, - int privileged) + size_t salen, int proto, u32 version) { struct rpc_create_args args = { .protocol = proto, @@ -120,7 +157,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_NONPRIVPORT), }; switch (srvaddr->sa_family) { @@ -134,29 +172,72 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return NULL; } - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); } +static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, + u32 version, struct rpc_message *msg, + int *result) +{ + struct rpc_clnt *rpcb_clnt; + int error = 0; + + *result = 0; + + rpcb_clnt = rpcb_create_local(addr, addrlen, version); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); + + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + dprintk("RPC: registration status %d/%d\n", error, *result); + + return error; +} + /** * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request + * @prot: transport protocol to register * @port: port value to register - * @okay: result code + * @okay: OUT: result code + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, transport] + * tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the passed-in port to zero. This removes + * all registered transports for [program, version] from the local + * rpcbind database. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * boolean result code is stored in *okay. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. * - * port == 0 means unregister, port != 0 means register. + * This function uses rpcbind protocol version 2 to contact the + * local rpcbind daemon. * - * This routine supports only rpcbind version 2. + * Registration works over both AF_INET and AF_INET6, and services + * registered via this function are advertised as available for any + * address. If the local rpcbind daemon is listening on AF_INET6, + * services registered via this function will be advertised on + * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 + * addresses). */ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, @@ -164,32 +245,159 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .r_port = port, }; struct rpc_message msg = { - .rpc_proc = &rpcb_procedures2[port ? - RPCBPROC_SET : RPCBPROC_UNSET], .rpc_argp = &map, .rpc_resp = okay, }; - struct rpc_clnt *rpcb_clnt; - int error = 0; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); - if (IS_ERR(rpcb_clnt)) - return PTR_ERR(rpcb_clnt); + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; + if (port) + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - error = rpc_call_sync(rpcb_clnt, &msg, 0); + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2, &msg, okay); +} - rpc_shutdown_client(rpcb_clnt); - if (error < 0) - printk(KERN_WARNING "RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *okay); +/* + * Fill in AF_INET family-specific arguments to register + */ +static int rpcb_register_netid4(struct sockaddr_in *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin_port); + char buf[32]; + + /* Construct AF_INET universal address */ + snprintf(buf, sizeof(buf), + NIPQUAD_FMT".%u.%u", + NIPQUAD(address_to_register->sin_addr.s_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} - return error; +/* + * Fill in AF_INET6 family-specific arguments to register + */ +static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin6_port); + char buf[64]; + + /* Construct AF_INET6 universal address */ + snprintf(buf, sizeof(buf), + NIP6_FMT".%u.%u", + NIP6(address_to_register->sin6_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, + sizeof(rpcb_in6addr_loopback), + RPCBVERS_4, msg, msg->rpc_resp); +} + +/** + * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @program: RPC program number of service to (un)register + * @version: RPC version number of service to (un)register + * @address: address family, IP address, and port to (un)register + * @netid: netid of transport protocol to (un)register + * @result: result code from rpcbind RPC call + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, address, + * netid] tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the port number in the passed-in address + * to zero. Callers pass a netid of "" to unregister all + * transport netids associated with [program, version, address]. + * + * Returns zero if the registration request was dispatched + * successfully and a reply was received. The rpcbind daemon's + * result code is stored in *result. + * + * Returns an errno value and sets *result to zero if there was + * some problem that prevented the rpcbind request from being + * dispatched, or if the rpcbind daemon did not respond within + * the timeout. + * + * This function uses rpcbind protocol version 4 to contact the + * local rpcbind daemon. The local rpcbind daemon must support + * version 4 of the rpcbind protocol in order for these functions + * to register a service successfully. + * + * Supported netids include "udp" and "tcp" for UDP and TCP over + * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6, + * respectively. + * + * The contents of @address determine the address family and the + * port to be registered. The usual practice is to pass INADDR_ANY + * as the raw address, but specifying a non-zero address is also + * supported by this API if the caller wishes to advertise an RPC + * service on a specific network interface. + * + * Note that passing in INADDR_ANY does not create the same service + * registration as IN6ADDR_ANY. The former advertises an RPC + * service on any IPv4 address, but not on IPv6. The latter + * advertises the service on all IPv4 and IPv6 addresses. + */ +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, const char *netid, + int *result) +{ + struct rpcbind_args map = { + .r_prog = program, + .r_vers = version, + .r_netid = netid, + .r_owner = RPCB_OWNER_STRING, + }; + struct rpc_message msg = { + .rpc_argp = &map, + .rpc_resp = result, + }; + + *result = 0; + + switch (address->sa_family) { + case AF_INET: + return rpcb_register_netid4((struct sockaddr_in *)address, + &msg); + case AF_INET6: + return rpcb_register_netid6((struct sockaddr_in6 *)address, + &msg); + } + + return -EAFNOSUPPORT; } /** @@ -227,7 +435,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, 2, 0); + sizeof(*sin), prot, RPCBVERS_2); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -289,17 +497,16 @@ void rpcb_getport_async(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); + /* Put self on the wait queue to ensure we get notified if + * some other task is already attempting to bind the port */ + rpc_sleep_on(&xprt->binding, task, NULL); + if (xprt_test_and_set_binding(xprt)) { - status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); - goto bailout_nowake; + return; } - /* Put self on queue before sending rpcbind request, in case - * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL); - /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; @@ -338,7 +545,7 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__, bind_version); rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version, 0); + bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", @@ -361,15 +568,15 @@ void rpcb_getport_async(struct rpc_task *task) map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_status = -EIO; child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { - status = -EIO; /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout_nofree; + return; } rpc_put_task(child); @@ -378,7 +585,6 @@ void rpcb_getport_async(struct rpc_task *task) bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); -bailout_nowake: task->tk_status = status; } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -417,9 +623,13 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", child->tk_pid, status, map->r_port); - rpcb_wake_rpcbind_waiters(xprt, status); + map->r_status = status; } +/* + * XDR functions for rpcbind + */ + static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { @@ -438,7 +648,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb_decode_getport result %u\n", + dprintk("RPC: rpcb_decode_getport result %u\n", *portp); return 0; } @@ -447,8 +657,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb_decode_set result %u\n", - *boolp); + dprintk("RPC: rpcb_decode_set: call %s\n", + (*boolp ? "succeeded" : "failed")); return 0; } @@ -571,52 +781,60 @@ out_err: static struct rpc_procinfo rpcb_procedures2[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), - PROC(GETADDR, mapping, getport), + PROC(GETPORT, mapping, getport), }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), + PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; static struct rpcb_info rpcb_next_version[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, - { 0, NULL }, + { + .rpc_vers = RPCBVERS_2, + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + }, + { + .rpc_proc = NULL, + }, }; static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 0, NULL }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, + { + .rpc_proc = NULL, + }, }; static struct rpc_version rpcb_version2 = { - .number = 2, + .number = RPCBVERS_2, .nrprocs = RPCB_HIGHPROC_2, .procs = rpcb_procedures2 }; static struct rpc_version rpcb_version3 = { - .number = 3, + .number = RPCBVERS_3, .nrprocs = RPCB_HIGHPROC_3, .procs = rpcb_procedures3 }; static struct rpc_version rpcb_version4 = { - .number = 4, + .number = RPCBVERS_4, .nrprocs = RPCB_HIGHPROC_4, .procs = rpcb_procedures4 }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6eab9bf94baf..385f427bedad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); */ static void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { - if (ops->rpc_release != NULL) { - lock_kernel(); + if (ops->rpc_release != NULL) ops->rpc_release(calldata); - unlock_kernel(); - } } /* @@ -626,19 +619,15 @@ static void __rpc_execute(struct rpc_task *task) /* * Execute any pending callback. */ - if (RPC_DO_CALLBACK(task)) { - /* Define a callback save pointer */ + if (task->tk_callback) { void (*save_callback)(struct rpc_task *); /* - * If a callback exists, save it, reset it, - * call it. - * The save is needed to stop from resetting - * another callback set within the callback handler - * - Dave + * We set tk_callback to NULL before calling it, + * in case it sets the tk_callback field itself: */ - save_callback=task->tk_callback; - task->tk_callback=NULL; + save_callback = task->tk_callback; + task->tk_callback = NULL; save_callback(task); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1770f7ba0b3..99a52aabe332 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - if (task->tk_status >= 0) { + if (task->tk_status == 0) { xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; dprintk("RPC: %5u xprt_connect_status: connection established\n", @@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ECONNREFUSED: - case -ECONNRESET: - dprintk("RPC: %5u xprt_connect_status: server %s refused " - "connection\n", task->tk_pid, - task->tk_client->cl_server); - break; case -ENOTCONN: dprintk("RPC: %5u xprt_connect_status: connection broken\n", task->tk_pid); @@ -878,6 +872,7 @@ void xprt_transmit(struct rpc_task *task) return; req->rq_connect_cookie = xprt->connect_cookie; + req->rq_xtime = jiffies; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddbe981ab516..4486c59c3aca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task) * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent); diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst index 3d2f4609578f..c972c0f54ce0 100644 --- a/scripts/Makefile.fwinst +++ b/scripts/Makefile.fwinst @@ -28,18 +28,39 @@ endif installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all)) installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/. +# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work. +PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs +$(INSTALL_FW_PATH)/$$(%): install-all-dirs + @true +install-all-dirs: $(installed-fw-dirs) + @true + quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@) cmd_install = $(INSTALL) -m0644 $< $@ $(installed-fw-dirs): $(call cmd,mkdir) -$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)/ +$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %) $(call cmd,install) -.PHONY: __fw_install __fw_modinst FORCE +PHONY += __fw_install __fw_modinst FORCE + +.PHONY: $(PHONY) __fw_install: $(installed-fw) __fw_modinst: $(mod-fw) FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building using $(if_changed{,_dep}). As an optimization, we +# don't need to read them if the target does not exist; we will rebuild +# anyway in that case. + +targets := $(wildcard $(sort $(targets))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif