Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 17 Jul 2008 17:38:59 +0000 (10:38 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 17 Jul 2008 17:38:59 +0000 (10:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Jul 2008 17:38:59 +0000 (10:38 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Jul 2008 17:38:59 +0000 (10:38 -0700)
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi

index 9470ed9afcc07238077440312c24af3d5ef88ad0..f27be7d1a49f4038951827c583734b1cf1be3125 100644 (file)
--- a/Documentation/ABI/testing/sysfs-firmware-acpi
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -29,46 +29,46 @@ Description:
  
                 $ cd /sys/firmware/acpi/interrupts
                 $ grep . *
-               error:0
-               ff_gbl_lock:0
-               ff_pmtimer:0
-               ff_pwr_btn:0
-               ff_rt_clk:0
-               ff_slp_btn:0
-               gpe00:0
-               gpe01:0
-               gpe02:0
-               gpe03:0
-               gpe04:0
-               gpe05:0
-               gpe06:0
-               gpe07:0
-               gpe08:0
-               gpe09:174
-               gpe0A:0
-               gpe0B:0
-               gpe0C:0
-               gpe0D:0
-               gpe0E:0
-               gpe0F:0
-               gpe10:0
-               gpe11:60
-               gpe12:0
-               gpe13:0
-               gpe14:0
-               gpe15:0
-               gpe16:0
-               gpe17:0
-               gpe18:0
-               gpe19:7
-               gpe1A:0
-               gpe1B:0
-               gpe1C:0
-               gpe1D:0
-               gpe1E:0
-               gpe1F:0
-               gpe_all:241
-               sci:241
+               error:       0
+               ff_gbl_lock:       0   enable
+               ff_pmtimer:       0  invalid
+               ff_pwr_btn:       0   enable
+               ff_rt_clk:       2  disable
+               ff_slp_btn:       0  invalid
+               gpe00:       0  invalid
+               gpe01:       0   enable
+               gpe02:     108   enable
+               gpe03:       0  invalid
+               gpe04:       0  invalid
+               gpe05:       0  invalid
+               gpe06:       0   enable
+               gpe07:       0   enable
+               gpe08:       0  invalid
+               gpe09:       0  invalid
+               gpe0A:       0  invalid
+               gpe0B:       0  invalid
+               gpe0C:       0  invalid
+               gpe0D:       0  invalid
+               gpe0E:       0  invalid
+               gpe0F:       0  invalid
+               gpe10:       0  invalid
+               gpe11:       0  invalid
+               gpe12:       0  invalid
+               gpe13:       0  invalid
+               gpe14:       0  invalid
+               gpe15:       0  invalid
+               gpe16:       0  invalid
+               gpe17:    1084   enable
+               gpe18:       0   enable
+               gpe19:       0  invalid
+               gpe1A:       0  invalid
+               gpe1B:       0  invalid
+               gpe1C:       0  invalid
+               gpe1D:       0  invalid
+               gpe1E:       0  invalid
+               gpe1F:       0  invalid
+               gpe_all:    1192
+               sci:    1194
  
                 sci - The total number of times the ACPI SCI
                 has claimed an interrupt.
@@ -89,6 +89,13 @@ Description:
  
                 error - an interrupt that can't be accounted for above.
  
+               invalid: it's either a wakeup GPE or a GPE/Fixed Event that
+                       doesn't have an event handler.
+
+               disable: the GPE/Fixed Event is valid but disabled.
+
+               enable: the GPE/Fixed Event is valid and enabled.
+
                 Root has permission to clear any of these counters.  Eg.
                 # echo 0 > gpe11
  
@@ -97,3 +104,43 @@ Description:
  
                 None of these counters has an effect on the function
                 of the system, they are simply statistics.
+
+               Besides this, user can also write specific strings to these files
+               to enable/disable/clear ACPI interrupts in user space, which can be
+               used to debug some ACPI interrupt storm issues.
+
+               Note that only writting to VALID GPE/Fixed Event is allowed,
+               i.e. user can only change the status of runtime GPE and
+               Fixed Event with event handler installed.
+
+               Let's take power button fixed event for example, please kill acpid
+               and other user space applications so that the machine won't shutdown
+               when pressing the power button.
+               # cat ff_pwr_btn
+               0
+               # press the power button for 3 times;
+               # cat ff_pwr_btn
+               3
+               # echo disable > ff_pwr_btn
+               # cat ff_pwr_btn
+               disable
+               # press the power button for 3 times;
+               # cat ff_pwr_btn
+               disable
+               # echo enable > ff_pwr_btn
+               # cat ff_pwr_btn
+               4
+               /*
+                * this is because the status bit is set even if the enable bit is cleared,
+                * and it triggers an ACPI fixed event when the enable bit is set again
+                */
+               # press the power button for 3 times;
+               # cat ff_pwr_btn
+               7
+               # echo disable > ff_pwr_btn
+               # press the power button for 3 times;
+               # echo clear > ff_pwr_btn       /* clear the status bit */
+               # echo disable > ff_pwr_btn
+               # cat ff_pwr_btn
+               7
+
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt

new file mode 100644 (file)

index 0000000..540e9e7
--- /dev/null
+++ b/Documentation/filesystems/ubifs.txt
@@ -0,0 +1,164 @@
+Introduction
+=============
+
+UBIFS file-system stands for UBI File System. UBI stands for "Unsorted
+Block Images". UBIFS is a flash file system, which means it is designed
+to work with flash devices. It is important to understand, that UBIFS
+is completely different to any traditional file-system in Linux, like
+Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems
+which work with MTD devices, not block devices. The other Linux
+file-system of this class is JFFS2.
+
+To make it more clear, here is a small comparison of MTD devices and
+block devices.
+
+1 MTD devices represent flash devices and they consist of eraseblocks of
+  rather large size, typically about 128KiB. Block devices consist of
+  small blocks, typically 512 bytes.
+2 MTD devices support 3 main operations - read from some offset within an
+  eraseblock, write to some offset within an eraseblock, and erase a whole
+  eraseblock. Block  devices support 2 main operations - read a whole
+  block and write a whole block.
+3 The whole eraseblock has to be erased before it becomes possible to
+  re-write its contents. Blocks may be just re-written.
+4 Eraseblocks become worn out after some number of erase cycles -
+  typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC
+  NAND flashes. Blocks do not have the wear-out property.
+5 Eraseblocks may become bad (only on NAND flashes) and software should
+  deal with this. Blocks on hard drives typically do not become bad,
+  because hardware has mechanisms to substitute bad blocks, at least in
+  modern LBA disks.
+
+It should be quite obvious why UBIFS is very different to traditional
+file-systems.
+
+UBIFS works on top of UBI. UBI is a separate software layer which may be
+found in drivers/mtd/ubi. UBI is basically a volume management and
+wear-leveling layer. It provides so called UBI volumes which is a higher
+level abstraction than a MTD device. The programming model of UBI devices
+is very similar to MTD devices - they still consist of large eraseblocks,
+they have read/write/erase operations, but UBI devices are devoid of
+limitations like wear and bad blocks (items 4 and 5 in the above list).
+
+In a sense, UBIFS is a next generation of JFFS2 file-system, but it is
+very different and incompatible to JFFS2. The following are the main
+differences.
+
+* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on
+  top of UBI volumes.
+* JFFS2 does not have on-media index and has to build it while mounting,
+  which requires full media scan. UBIFS maintains the FS indexing
+  information on the flash media and does not require full media scan,
+  so it mounts many times faster than JFFS2.
+* JFFS2 is a write-through file-system, while UBIFS supports write-back,
+  which makes UBIFS much faster on writes.
+
+Similarly to JFFS2, UBIFS supports on-the-flight compression which makes
+it possible to fit quite a lot of data to the flash.
+
+Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts.
+It does not need stuff like ckfs.ext2. UBIFS automatically replays its
+journal and recovers from crashes, ensuring that the on-flash data
+structures are consistent.
+
+UBIFS scales logarithmically (most of the data structures it uses are
+trees), so the mount time and memory consumption do not linearly depend
+on the flash size, like in case of JFFS2. This is because UBIFS
+maintains the FS index on the flash media. However, UBIFS depends on
+UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly.
+Nevertheless, UBI/UBIFS scales considerably better than JFFS2.
+
+The authors of UBIFS believe, that it is possible to develop UBI2 which
+would scale logarithmically as well. UBI2 would support the same API as UBI,
+but it would be binary incompatible to UBI. So UBIFS would not need to be
+changed to use UBI2
+
+
+Mount options
+=============
+
+(*) == default.
+
+norm_unmount (*)       commit on unmount; the journal is committed
+                       when the file-system is unmounted so that the
+                       next mount does not have to replay the journal
+                       and it becomes very fast;
+fast_unmount           do not commit on unmount; this option makes
+                       unmount faster, but the next mount slower
+                       because of the need to replay the journal.
+
+
+Quick usage instructions
+========================
+
+The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax,
+where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is
+UBI volume name.
+
+Mount volume 0 on UBI device 0 to /mnt/ubifs:
+$ mount -t ubifs ubi0_0 /mnt/ubifs
+
+Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume
+name):
+$ mount -t ubifs ubi0:rootfs /mnt/ubifs
+
+The following is an example of the kernel boot arguments to attach mtd0
+to UBI and mount volume "rootfs":
+ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
+
+
+Module Parameters for Debugging
+===============================
+
+When UBIFS has been compiled with debugging enabled, there are 3 module
+parameters that are available to control aspects of testing and debugging.
+The parameters are unsigned integers where each bit controls an option.
+The parameters are:
+
+debug_msgs     Selects which debug messages to display, as follows:
+
+               Message Type                            Flag value
+
+               General messages                        1
+               Journal messages                        2
+               Mount messages                          4
+               Commit messages                         8
+               LEB search messages                     16
+               Budgeting messages                      32
+               Garbage collection messages             64
+               Tree Node Cache (TNC) messages          128
+               LEB properties (lprops) messages        256
+               Input/output messages                   512
+               Log messages                            1024
+               Scan messages                           2048
+               Recovery messages                       4096
+
+debug_chks     Selects extra checks that UBIFS can do while running:
+
+               Check                                   Flag value
+
+               General checks                          1
+               Check Tree Node Cache (TNC)             2
+               Check indexing tree size                4
+               Check orphan area                       8
+               Check old indexing tree                 16
+               Check LEB properties (lprops)           32
+               Check leaf nodes and inodes             64
+
+debug_tsts     Selects a mode of testing, as follows:
+
+               Test mode                               Flag value
+
+               Force in-the-gaps method                2
+               Failure mode for recovery testing       4
+
+For example, set debug_msgs to 5 to display General messages and Mount
+messages.
+
+
+References
+==========
+
+UBIFS documentation and FAQ/HOWTO at the MTD web site:
+http://www.linux-mtd.infradead.org/doc/ubifs.html
+http://www.linux-mtd.infradead.org/faq/ubifs.html
diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt

index c19efdeace2cd220a73554b66f5c0fbef1d81d0b..91a6ecbae0bb9601b7d50e084af1eeb96869f730 100644 (file)
--- a/Documentation/ioctl/hdio.txt
+++ b/Documentation/ioctl/hdio.txt
@@ -508,12 +508,13 @@ HDIO_DRIVE_RESET          execute a device reset
  
         error returns:
           EACCES        Access denied:  requires CAP_SYS_ADMIN
+         ENXIO         No such device: phy dead or ctl_addr == 0
+         EIO           I/O error:      reset timed out or hardware error
  
         notes:
  
-         Abort any current command, prevent anything else from being
-         queued, execute a reset on the device, and issue BLKRRPART
-         ioctl on the block device.
+         Execute a reset on the device as soon as the current IO
+         operation has completed.
  
           Executes an ATAPI soft reset if applicable, otherwise
           executes an ATA soft reset on the controller.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 312fe77764a48cba9fb04e000fc2dffeba7fa978..09ad7450647bc81dff32a3eaf7ea3c0858f4a896 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -147,10 +147,14 @@ and is between 256 and 4096 characters. It is defined in the file
                         default: 0
  
         acpi_sleep=     [HW,ACPI] Sleep options
-                       Format: { s3_bios, s3_mode, s3_beep }
+                       Format: { s3_bios, s3_mode, s3_beep, old_ordering }
                         See Documentation/power/video.txt for s3_bios and s3_mode.
                         s3_beep is for debugging; it makes the PC's speaker beep
                         as soon as the kernel's real-mode entry point is called.
+                       old_ordering causes the ACPI 1.0 ordering of the _PTS
+                       control method, wrt putting devices into low power
+                       states, to be enforced (the ACPI 2.0 ordering of _PTS is
+                       used by default).
  
         acpi_sci=       [HW,ACPI] ACPI System Control Interrupt trigger mode
                         Format: { level | edge | high | low }
@@ -818,7 +822,7 @@ and is between 256 and 4096 characters. It is defined in the file
                         See Documentation/ide/ide.txt.
  
         idle=           [X86]
-                       Format: idle=poll or idle=mwait
+                       Format: idle=poll or idle=mwait, idle=halt, idle=nomwait
                         Poll forces a polling idle loop that can slightly improves the performance
                         of waking up a idle CPU, but will use a lot of power and make the system
                         run hot. Not recommended.
@@ -826,6 +830,9 @@ and is between 256 and 4096 characters. It is defined in the file
                         to not use it because it doesn't save as much power as a normal idle
                         loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
                         as idle=poll.
+                       idle=halt. Halt is forced to be used for CPU idle.
+                       In such case C2/C3 won't be used again.
+                       idle=nomwait. Disable mwait for CPU C-states
  
         ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
                         Claim all unknown PCI IDE storage controllers.
@@ -1534,6 +1541,9 @@ and is between 256 and 4096 characters. It is defined in the file
                                 Use with caution as certain devices share
                                 address decoders between ROMs and other
                                 resources.
+               norom           [X86-32,X86_64] Do not assign address space to
+                               expansion ROMs that do not already have
+                               BIOS assigned address ranges.
                 irqmask=0xMMMM  [X86-32] Set a bit mask of IRQs allowed to be
                                 assigned automatically to PCI devices. You can
                                 make the kernel exclude IRQs of your ISA cards
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt

index 79b7dbd2214190ae9bf9bce988faa176491c5bce..69b5dd4e5a59bd74550397266122c465a8a12b5f 100644 (file)
--- a/Documentation/laptops/acer-wmi.txt
+++ b/Documentation/laptops/acer-wmi.txt
@@ -174,8 +174,6 @@ The LED is exposed through the LED subsystem, and can be found in:
  The mail LED is autodetected, so if you don't have one, the LED device won't
  be registered.
  
-If you have a mail LED that is not green, please report this to me.
-
  Backlight
  *********
  
diff --git a/MAINTAINERS b/MAINTAINERS

index ee1c56a20750699c3e797629169a8a748dcf7f33..93fd6b2efeee80c4948e0f33f02d8081a26c637c 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -216,8 +216,8 @@ W:  http://code.google.com/p/aceracpi
  S:     Maintained
  
  ACPI
-P:     Len Brown
-M:     len.brown@intel.com
+P:     Andi Kleen
+M:     ak@linux.intel.com
  M:     lenb@kernel.org
  L:     linux-acpi@vger.kernel.org
  W:     http://www.lesswatts.org/projects/acpi/
@@ -239,8 +239,8 @@ W:  http://www.lesswatts.org/projects/acpi/
  S:     Supported
  
  ACPI FAN DRIVER
-P:     Len Brown
-M:     len.brown@intel.com
+P:     Zhang Rui
+M:     rui.zhang@intel.com
  L:     linux-acpi@vger.kernel.org
  W:     http://www.lesswatts.org/projects/acpi/
  S:     Supported
@@ -248,18 +248,18 @@ S:        Supported
  ACPI PCI HOTPLUG DRIVER
  P:     Kristen Carlson Accardi
  M:     kristen.c.accardi@intel.com
-L:     pcihpd-discuss@lists.sourceforge.net
+L:     linux-pci@vger.kernel.org
  S:     Supported
  
  ACPI THERMAL DRIVER
-P:     Len Brown
-M:     len.brown@intel.com
+P:     Zhang Rui
+M:     rui.zhang@intel.com
  L:     linux-acpi@vger.kernel.org
  W:     http://www.lesswatts.org/projects/acpi/
  S:     Supported
  
  ACPI VIDEO DRIVER
-P:     Rui Zhang
+P:     Zhang Rui
  M:     rui.zhang@intel.com
  L:     linux-acpi@vger.kernel.org
  W:     http://www.lesswatts.org/projects/acpi/
@@ -348,7 +348,9 @@ W:  http://www.linux-usb.org/SpeedTouch/
  S:     Maintained
  
  ALCHEMY AU1XX0 MMC DRIVER
-S:     Orphan
+P:     Manuel Lauss
+M:     manuel.lauss@gmail.com
+S:     Maintained
  
  ALI1563 I2C DRIVER
  P:     Rudolf Marek
@@ -1143,23 +1145,28 @@ COMPACTPCI HOTPLUG CORE
  P:     Scott Murray
  M:     scottm@somanetworks.com
  M:     scott@spiteful.org
-L:     pcihpd-discuss@lists.sourceforge.net
+L:     linux-pci@vger.kernel.org
  S:     Supported
  
  COMPACTPCI HOTPLUG ZIATECH ZT5550 DRIVER
  P:     Scott Murray
  M:     scottm@somanetworks.com
  M:     scott@spiteful.org
-L:     pcihpd-discuss@lists.sourceforge.net
+L:     linux-pci@vger.kernel.org
  S:     Supported
  
  COMPACTPCI HOTPLUG GENERIC DRIVER
  P:     Scott Murray
  M:     scottm@somanetworks.com
  M:     scott@spiteful.org
-L:     pcihpd-discuss@lists.sourceforge.net
+L:     linux-pci@vger.kernel.org
  S:     Supported
  
+COMPAL LAPTOP SUPPORT
+P:     Cezary Jackiewicz
+M:     cezary.jackiewicz@gmail.com
+S:     Maintained
+
  COMPUTONE INTELLIPORT MULTIPORT CARD
  P:     Michael H. Warfield
  M:     mhw@wittsend.com
@@ -1787,6 +1794,12 @@ P:       David Howells
  M:     dhowells@redhat.com
  S:     Maintained
  
+FUJITSU LAPTOP EXTRAS
+P:     Jonathan Woithe
+M:     jwoithe@physics.adelaide.edu.au
+L:     linux-acpi@vger.kernel.org
+S:     Maintained
+
  FUSE: FILESYSTEM IN USERSPACE
  P:     Miklos Szeredi
  M:     miklos@szeredi.hu
@@ -2325,6 +2338,16 @@ L:       linux-mtd@lists.infradead.org
  W:     http://www.linux-mtd.infradead.org/doc/jffs2.html
  S:     Maintained
  
+UBI FILE SYSTEM (UBIFS)
+P:     Artem Bityutskiy
+M:     dedekind@infradead.org
+P:     Adrian Hunter
+M:     ext-adrian.hunter@nokia.com
+L:     linux-mtd@lists.infradead.org
+T:     git git://git.infradead.org/~dedekind/ubifs-2.6.git
+W:     http://www.linux-mtd.infradead.org/doc/ubifs.html
+S:     Maintained
+
  JFS FILESYSTEM
  P:     Dave Kleikamp
  M:     shaggy@austin.ibm.com
@@ -3196,7 +3219,7 @@ S:        Supported
  PCIE HOTPLUG DRIVER
  P:     Kristen Carlson Accardi
  M:     kristen.c.accardi@intel.com
-L:     pcihpd-discuss@lists.sourceforge.net
+L:     linux-pci@vger.kernel.org
  S:     Supported
  
  PCMCIA SUBSYSTEM
@@ -3538,6 +3561,13 @@ L:       linux-s390@vger.kernel.org
  W:     http://www.ibm.com/developerworks/linux/linux390/
  S:     Supported
  
+S3C24XX SD/MMC Driver
+P:     Ben Dooks
+M:     ben-linux@fluff.org
+L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+L:     linux-kernel@vger.kernel.org
+S:     Supported
+
  SAA7146 VIDEO4LINUX-2 DRIVER
  P:     Michael Hunold
  M:     michael@mihu.de
@@ -3610,6 +3640,12 @@ P:       Jim Cromie
  M:     jim.cromie@gmail.com
  S:     Maintained
  
+SDRICOH_CS MMC/SD HOST CONTROLLER INTERFACE DRIVER
+P:     Sascha Sommer
+M:     saschasommer@freenet.de
+L:     sdricohcs-devel@lists.sourceforge.net (subscribers-only)
+S:     Maintained
+
  SECURITY CONTACT
  P:     Security Officers
  M:     security@kernel.org
@@ -3829,7 +3865,7 @@ S:        Maintained
  SHPC HOTPLUG DRIVER
  P:     Kristen Carlson Accardi
  M:     kristen.c.accardi@intel.com
-L:     pcihpd-discuss@lists.sourceforge.net
+L:     linux-pci@vger.kernel.org
  S:     Supported
  
  SECURE DIGITAL HOST CONTROLLER INTERFACE DRIVER
diff --git a/Makefile b/Makefile

index 1564577bdc5386dd9a9f2856ef244303ac302436..6192922de9c06e7d6b648944ebbde0db96d9c596 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,3 @@
-FRED=42
  VERSION = 2
  PATCHLEVEL = 6
  SUBLEVEL = 26
diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c

index a51bb9fb3c89d2047151a5da22d14c7582a43643..c7fe94d03a1eb1358a2d77b2a663bc6a81fba757 100644 (file)
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -19,6 +19,7 @@
  #include <linux/leds.h>
  #include <linux/spi/spi.h>
  
+#include <asm/atmel-mci.h>
  #include <asm/io.h>
  #include <asm/setup.h>
  
@@ -51,6 +52,11 @@ static struct spi_board_info spi0_board_info[] __initdata = {
         },
  };
  
+static struct mci_platform_data __initdata mci0_data = {
+       .detect_pin     = GPIO_PIN_PC(25),
+       .wp_pin         = GPIO_PIN_PE(0),
+};
+
  /*
   * The next two functions should go away as the boot loader is
   * supposed to initialize the macb address registers with a valid
@@ -170,6 +176,7 @@ static int __init atngw100_init(void)
         set_hw_addr(at32_add_device_eth(1, &eth_data[1]));
  
         at32_add_device_spi(0, spi0_board_info, ARRAY_SIZE(spi0_board_info));
+       at32_add_device_mci(0, &mci0_data);
         at32_add_device_usba(0, NULL);
  
         for (i = 0; i < ARRAY_SIZE(ngw_leds); i++) {
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c

index 86b363c1c25bea00dfe011425c044d6d91edd0f9..e11659b732fab61d743f5e72b3487b298c5f9646 100644 (file)
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -234,6 +234,9 @@ static int __init atstk1002_init(void)
  #ifdef CONFIG_BOARD_ATSTK100X_SPI1
         at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
  #endif
+#ifndef CONFIG_BOARD_ATSTK1002_SW2_CUSTOM
+       at32_add_device_mci(0, NULL);
+#endif
  #ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM
         set_hw_addr(at32_add_device_eth(1, &eth_data[1]));
  #else
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c

index 07b21b121eef995726a6e8a981f3c416497e997e..021d5121718469387fc1c4d95dd797a12f40bb3b 100644 (file)
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -14,6 +14,7 @@
  #include <linux/spi/spi.h>
  #include <linux/usb/atmel_usba_udc.h>
  
+#include <asm/atmel-mci.h>
  #include <asm/io.h>
  #include <asm/irq.h>
  
@@ -1278,20 +1279,32 @@ static struct clk atmel_mci0_pclk = {
         .index          = 9,
  };
  
-struct platform_device *__init at32_add_device_mci(unsigned int id)
+struct platform_device *__init
+at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
  {
-       struct platform_device *pdev;
+       struct mci_platform_data        _data;
+       struct platform_device          *pdev;
+       struct dw_dma_slave             *dws;
  
         if (id != 0)
                 return NULL;
  
         pdev = platform_device_alloc("atmel_mci", id);
         if (!pdev)
-               return NULL;
+               goto fail;
  
         if (platform_device_add_resources(pdev, atmel_mci0_resource,
                                 ARRAY_SIZE(atmel_mci0_resource)))
-               goto err_add_resources;
+               goto fail;
+
+       if (!data) {
+               data = &_data;
+               memset(data, 0, sizeof(struct mci_platform_data));
+       }
+
+       if (platform_device_add_data(pdev, data,
+                               sizeof(struct mci_platform_data)))
+               goto fail;
  
         select_peripheral(PA(10), PERIPH_A, 0); /* CLK   */
         select_peripheral(PA(11), PERIPH_A, 0); /* CMD   */
@@ -1300,12 +1313,19 @@ struct platform_device *__init at32_add_device_mci(unsigned int id)
         select_peripheral(PA(14), PERIPH_A, 0); /* DATA2 */
         select_peripheral(PA(15), PERIPH_A, 0); /* DATA3 */
  
+       if (data) {
+               if (data->detect_pin != GPIO_PIN_NONE)
+                       at32_select_gpio(data->detect_pin, 0);
+               if (data->wp_pin != GPIO_PIN_NONE)
+                       at32_select_gpio(data->wp_pin, 0);
+       }
+
         atmel_mci0_pclk.dev = &pdev->dev;
  
         platform_device_add(pdev);
         return pdev;
  
-err_add_resources:
+fail:
         platform_device_put(pdev);
         return NULL;
  }
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c

index 4f165c93be424545ebcad65ee1d2b10be477f7fe..edae117fcc2bcc93ef81a8d1d8a167e6a2050a68 100644 (file)
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -19,36 +19,6 @@
  
  #include "pci-frv.h"
  
-#if 0
-void
-pcibios_update_resource(struct pci_dev *dev, struct resource *root,
-                       struct resource *res, int resource)
-{
-       u32 new, check;
-       int reg;
-
-       new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
-       if (resource < 6) {
-               reg = PCI_BASE_ADDRESS_0 + 4*resource;
-       } else if (resource == PCI_ROM_RESOURCE) {
-               res->flags |= IORESOURCE_ROM_ENABLE;
-               new |= PCI_ROM_ADDRESS_ENABLE;
-               reg = dev->rom_base_reg;
-       } else {
-               /* Somebody might have asked allocation of a non-standard resource */
-               return;
-       }
-
-       pci_write_config_dword(dev, reg, new);
-       pci_read_config_dword(dev, reg, &check);
-       if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
-               printk(KERN_ERR "PCI: Error while updating region "
-                      "%s/%d (%08x != %08x)\n", pci_name(dev), resource,
-                      new, check);
-       }
-}
-#endif
-
  /*
   * We need to avoid collisions with `mirrored' VGA ports
   * and other strange ISA hardware, so we always want the
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c

index fabaf08d9a695bbb088951da7e96af4a9b640443..3ab8373103ecf9252f1ceeff4f2076e8da471113 100644 (file)
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -55,6 +55,10 @@ void (*ia64_mark_idle)(int);
  
  unsigned long boot_option_idle_override = 0;
  EXPORT_SYMBOL(boot_option_idle_override);
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
  
  void
  ia64_do_show_stack (struct unw_frame_info *info, void *arg)
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c

index 318b811006236fe08b7508e3a16f6ccaa326a28d..68c978be9a5189b4eccc0f782124a8e752abd3c9 100644 (file)
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -395,7 +395,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 if (kvm->vcpus[i]->cpu != -1) {
                         call_data.vcpu = kvm->vcpus[i];
                         smp_call_function_single(kvm->vcpus[i]->cpu,
-                                       vcpu_global_purge, &call_data, 0, 1);
+                                       vcpu_global_purge, &call_data, 1);
                 } else
                         printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
  
@@ -1693,7 +1693,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
                 wake_up_interruptible(&vcpu->wq);
  
         if (vcpu->guest_mode)
-               smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+               smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
  }
  
  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c

index 091f936c44853d83dab753d37d1d930eaee6740d..0c69d9ec92d49dcb04edacb29a6e46b3e1857027 100644 (file)
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -130,7 +130,7 @@ static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
         args.cache_type = gr29;
         args.operation = gr30;
         smp_call_function(remote_pal_cache_flush,
-                               (void *)&args, 1, 1);
+                               (void *)&args, 1);
         if (args.status != 0)
                 printk(KERN_ERR"pal_cache_flush error!,"
                                 "status:0x%lx\n", args.status);
diff --git a/arch/m68knommu/kernel/comempci.c b/arch/m68knommu/kernel/comempci.c

index 6ee00effbad28e4549383ff4ebb27afe440f37ee..0a68b5a85f86837b54b912328944c079a4222a24 100644 (file)
--- a/arch/m68knommu/kernel/comempci.c
+++ b/arch/m68knommu/kernel/comempci.c
@@ -373,15 +373,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
         return(0);
  }
  
-/*****************************************************************************/
-
-void pcibios_update_resource(struct pci_dev *dev, struct resource *root, struct resource *r, int resource)
-{
-       printk(KERN_WARNING "%s(%d): no support for changing PCI resources...\n",
-               __FILE__, __LINE__);
-}
-
-
  /*****************************************************************************/
  
  /*
diff --git a/arch/mips/pmc-sierra/yosemite/ht.c b/arch/mips/pmc-sierra/yosemite/ht.c

index 6380662bbf3ceeb29ac054446372293117751e51..678388fd34b1a9e521d812c307a0aec42478fbc4 100644 (file)
--- a/arch/mips/pmc-sierra/yosemite/ht.c
+++ b/arch/mips/pmc-sierra/yosemite/ht.c
@@ -345,42 +345,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
          return pcibios_enable_resources(dev);
  }
  
-
-
-void pcibios_update_resource(struct pci_dev *dev, struct resource *root,
-                             struct resource *res, int resource)
-{
-        u32 new, check;
-        int reg;
-
-        return;
-
-        new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
-        if (resource < 6) {
-                reg = PCI_BASE_ADDRESS_0 + 4 * resource;
-        } else if (resource == PCI_ROM_RESOURCE) {
-               res->flags |= IORESOURCE_ROM_ENABLE;
-                reg = dev->rom_base_reg;
-        } else {
-                /*
-                 * Somebody might have asked allocation of a non-standard
-                 * resource
-                 */
-                return;
-        }
-
-        pci_write_config_dword(dev, reg, new);
-        pci_read_config_dword(dev, reg, &check);
-        if ((new ^ check) &
-            ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK :
-             PCI_BASE_ADDRESS_MEM_MASK)) {
-                printk(KERN_ERR "PCI: Error while updating region "
-                       "%s/%d (%08x != %08x)\n", pci_name(dev), resource,
-                       new, check);
-        }
-}
-
-
  void pcibios_align_resource(void *data, struct resource *res,
                              resource_size_t size, resource_size_t align)
  {
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig

index 5dc8f8028d520b6c0a09dd4e0a6ffb70d4891d84..eb530b4128ba9353a9fa6d397102b0e52de77cd7 100644 (file)
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -289,7 +289,7 @@ config WARN_STACK_SIZE
         int "Maximum frame size considered safe (128-2048)"
         range 128 2048
         depends on WARN_STACK
-       default "256"
+       default "2048"
         help
           This allows you to specify the maximum frame size a function may
           have without the compiler complaining about it.
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c

index 57571f10270cb3f8f6187a1a5a132db23181057d..8841919ef7e64b9417f8fc8934403970a47644f8 100644 (file)
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -10,6 +10,7 @@
  #include <linux/sched.h>
  #include <linux/stacktrace.h>
  #include <linux/kallsyms.h>
+#include <linux/module.h>
  
  static unsigned long save_context_stack(struct stack_trace *trace,
                                         unsigned long sp,
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c

index 08d2e7325252a19071fe446055417bd4d5368dd3..f57095a2617c064700fa0ec7123322eddcfadd80 100644 (file)
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -76,38 +76,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
         pci_read_bridge_bases(bus);
  }
  
-void
-pcibios_update_resource(struct pci_dev *dev, struct resource *root,
-                       struct resource *res, int resource)
-{
-       u32 new, check;
-       int reg;
-
-       new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
-       if (resource < 6) {
-               reg = PCI_BASE_ADDRESS_0 + 4*resource;
-       } else if (resource == PCI_ROM_RESOURCE) {
-               res->flags |= IORESOURCE_ROM_ENABLE;
-               new |= PCI_ROM_ADDRESS_ENABLE;
-               reg = dev->rom_base_reg;
-       } else {
-               /*
-                * Somebody might have asked allocation of a non-standard
-                * resource
-                */
-               return;
-       }
-
-       pci_write_config_dword(dev, reg, new);
-       pci_read_config_dword(dev, reg, &check);
-       if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ?
-               PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
-               printk(KERN_ERR "PCI: Error while updating region "
-                      "%s/%d (%08x != %08x)\n", pci_name(dev), resource,
-                      new, check);
-       }
-}
-
  void pcibios_align_resource(void *data, struct resource *res,
                             resource_size_t size, resource_size_t align)
                             __attribute__ ((weak));
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c

index 112b09f16f367ffe788e2623a91debb118454c16..d00a3656c287fed07c13b4609997e1ff09f8e375 100644 (file)
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -408,7 +408,7 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
         dev->class = class >> 8;
         dev->revision = class & 0xff;
  
-       sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+       sprintf(dev->dev.bus_id, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
                 dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
  
         if (ofpci_verbose)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile

index 5112c84f542164e37a74ed9785451d127cd8da95..da140611bb57593ed401a5de6ee63a84cc408349 100644 (file)
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,8 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
  
  ifdef CONFIG_FTRACE
  # Do not profile debug utilities
-CFLAGS_REMOVE_tsc_64.o = -pg
-CFLAGS_REMOVE_tsc_32.o = -pg
+CFLAGS_REMOVE_tsc.o = -pg
  CFLAGS_REMOVE_rtc.o = -pg
  endif
  
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c

index de2d2e4ebad97217be93de05b32886cfa09d0526..7c074eec39fb56ebff16c26f3351b6eae79cca50 100644 (file)
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
         if (cpu_has(c, X86_FEATURE_ACPI))
                 buf[2] |= ACPI_PDC_T_FFH;
  
+       /*
+        * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+        */
+       if (!cpu_has(c, X86_FEATURE_MWAIT))
+               buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+
         obj->type = ACPI_TYPE_BUFFER;
         obj->buffer.length = 12;
         obj->buffer.pointer = (u8 *) buf;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c

index 793ad2045f585acceb8501d5c08c86f20803f871..868de3d5c39de9144bfc42574bcb8dd0d24cca3e 100644 (file)
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -158,6 +158,8 @@ static int __init acpi_sleep_setup(char *str)
                         acpi_realmode_flags |= 2;
                 if (strncmp(str, "s3_beep", 7) == 0)
                         acpi_realmode_flags |= 4;
+               if (strncmp(str, "old_ordering", 12) == 0)
+                       acpi_old_suspend_ordering();
                 str = strchr(str, ',');
                 if (str != NULL)
                         str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c

index 75cb5da4ea0ad1880d6c25f02f8b510cb5d9ac97..bf9b441331e9fb2f42ec1a89fcc683d48c6af839 100644 (file)
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1213,9 +1213,9 @@ static int suspend(int vetoable)
         if (err != APM_SUCCESS)
                 apm_error("suspend", err);
         err = (err == APM_SUCCESS) ? 0 : -EIO;
-       device_power_up();
+       device_power_up(PMSG_RESUME);
         local_irq_enable();
-       device_resume();
+       device_resume(PMSG_RESUME);
         queue_event(APM_NORMAL_RESUME, NULL);
         spin_lock(&user_list_lock);
         for (as = user_list; as != NULL; as = as->next) {
@@ -1240,7 +1240,7 @@ static void standby(void)
                 apm_error("standby", err);
  
         local_irq_disable();
-       device_power_up();
+       device_power_up(PMSG_RESUME);
         local_irq_enable();
  }
  
@@ -1326,7 +1326,7 @@ static void check_events(void)
                         ignore_bounce = 1;
                         if ((event != APM_NORMAL_RESUME)
                             || (ignore_normal_resume == 0)) {
-                               device_resume();
+                               device_resume(PMSG_RESUME);
                                 queue_event(event, NULL);
                         }
                         ignore_normal_resume = 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c

index a4665f37cfc5dc9f3d3b7097c08166d061643de6..a0e11c0cc872f03b1ca9b85fd9521f19fa8a3269 100644 (file)
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = {
         {}
  };
  
-static void __init check_dev_quirk(int num, int slot, int func)
+/**
+ * check_dev_quirk - apply early quirks to a given PCI device
+ * @num: bus number
+ * @slot: slot number
+ * @func: PCI function
+ *
+ * Check the vendor & device ID against the early quirks table.
+ *
+ * If the device is single function, let early_quirks() know so we don't
+ * poke at this device again.
+ */
+static int __init check_dev_quirk(int num, int slot, int func)
  {
         u16 class;
         u16 vendor;
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func)
         class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
  
         if (class == 0xffff)
-               return;
+               return -1; /* no class, treat as single function */
  
         vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
  
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func)
         type = read_pci_config_byte(num, slot, func,
                                     PCI_HEADER_TYPE);
         if (!(type & 0x80))
-               return;
+               return -1;
+
+       return 0;
  }
  
  void __init early_quirks(void)
@@ -167,6 +180,9 @@ void __init early_quirks(void)
         /* Poor man's PCI discovery */
         for (num = 0; num < 32; num++)
                 for (slot = 0; slot < 32; slot++)
-                       for (func = 0; func < 8; func++)
-                               check_dev_quirk(num, slot, func);
+                       for (func = 0; func < 8; func++) {
+                               /* Only probe function 0 on single fn devices */
+                               if (check_dev_quirk(num, slot, func))
+                                       break;
+                       }
  }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c

index 7dceea947232f5739b7844c10ba9c2b22f336e95..4d629c62f4f8fbb993a49c0d5f12d7e88bbd94b5 100644 (file)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,12 @@
  #include <linux/module.h>
  #include <linux/pm.h>
  #include <linux/clockchips.h>
+#include <asm/system.h>
+
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
  
  struct kmem_cache *task_xstate_cachep;
  
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str)
                 pm_idle = poll_idle;
         } else if (!strcmp(str, "mwait"))
                 force_mwait = 1;
-       else
+       else if (!strcmp(str, "halt")) {
+               /*
+                * When the boot option of idle=halt is added, halt is
+                * forced to be used for CPU idle. In such case CPU C2/C3
+                * won't be used again.
+                * To continue to load the CPU idle driver, don't touch
+                * the boot_option_idle_override.
+                */
+               pm_idle = default_idle;
+               idle_halt = 1;
+               return 0;
+       } else if (!strcmp(str, "nomwait")) {
+               /*
+                * If the boot option of "idle=nomwait" is added,
+                * it means that mwait will be disabled for CPU C2/C3
+                * states. In such case it won't touch the variable
+                * of boot_option_idle_override.
+                */
+               idle_nomwait = 1;
+               return 0;
+       } else
                 return -1;
  
         boot_option_idle_override = 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c

index 36c540d4ac4b6164dbd02d6b23951ea1315f134f..531b55b8e81a1de1827eac5691d5f8aef1d8d10c 100644 (file)
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -684,6 +684,11 @@ void __init setup_arch(char **cmdline_p)
                 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
         }
  
+#ifdef CONFIG_PCI
+       if (pci_early_dump_regs)
+               early_dump_pci_devices();
+#endif
+
         finish_e820_parsing();
  
  #ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c

index f41d67f8f83153b7ed079b05131ad878ac7235be..1eb2973a301ce4552e931fb1968a27ae46771fd0 100644 (file)
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
  
         num_memory_chunks++;
  
-       printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
+       printk(KERN_DEBUG "Memory range %08lx to %08lx"
                           " in proximity domain %02x %s\n",
                 start_pfn, end_pfn,
-               memory_affinity->memory_type,
                 pxm,
                 ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
                  "enabled and removable" : "enabled" ) );
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c

index 20b9f59f95df85203009cce11d688f2510cbeab3..b67732bbb85a3a562063c36e579ee769ccdf86c1 100644 (file)
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -20,6 +20,7 @@
  unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
                                 PCI_PROBE_MMCONF;
  
+unsigned int pci_early_dump_regs;
  static int pci_bf_sort;
  int pci_routeirq;
  int pcibios_last_bus = -1;
@@ -31,7 +32,7 @@ struct pci_raw_ops *raw_pci_ext_ops;
  int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
                                                 int reg, int len, u32 *val)
  {
-       if (reg < 256 && raw_pci_ops)
+       if (domain == 0 && reg < 256 && raw_pci_ops)
                 return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
         if (raw_pci_ext_ops)
                 return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
@@ -41,7 +42,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
  int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
                                                 int reg, int len, u32 val)
  {
-       if (reg < 256 && raw_pci_ops)
+       if (domain == 0 && reg < 256 && raw_pci_ops)
                 return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
         if (raw_pci_ext_ops)
                 return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
@@ -121,6 +122,21 @@ void __init dmi_check_skip_isa_align(void)
         dmi_check_system(can_skip_pciprobe_dmi_table);
  }
  
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+       struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
+
+       if (pci_probe & PCI_NOASSIGN_ROMS) {
+               if (rom_r->parent)
+                       return;
+               if (rom_r->start) {
+                       /* we deal with BIOS assigned ROM later */
+                       return;
+               }
+               rom_r->start = rom_r->end = rom_r->flags = 0;
+       }
+}
+
  /*
   *  Called after each bus is probed, but before its children
   *  are examined.
@@ -128,7 +144,11 @@ void __init dmi_check_skip_isa_align(void)
  
  void __devinit  pcibios_fixup_bus(struct pci_bus *b)
  {
+       struct pci_dev *dev;
+
         pci_read_bridge_bases(b);
+       list_for_each_entry(dev, &b->devices, bus_list)
+               pcibios_fixup_device_resources(dev);
  }
  
  /*
@@ -481,12 +501,18 @@ char * __devinit  pcibios_setup(char *str)
         else if (!strcmp(str, "rom")) {
                 pci_probe |= PCI_ASSIGN_ROMS;
                 return NULL;
+       } else if (!strcmp(str, "norom")) {
+               pci_probe |= PCI_NOASSIGN_ROMS;
+               return NULL;
         } else if (!strcmp(str, "assign-busses")) {
                 pci_probe |= PCI_ASSIGN_ALL_BUSSES;
                 return NULL;
         } else if (!strcmp(str, "use_crs")) {
                 pci_probe |= PCI_USE__CRS;
                 return NULL;
+       } else if (!strcmp(str, "earlydump")) {
+               pci_early_dump_regs = 1;
+               return NULL;
         } else if (!strcmp(str, "routeirq")) {
                 pci_routeirq = 1;
                 return NULL;
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c

index 42df4b6606dfdbc6f21d2cce0cbb6e8ae3e3ea42..858dbe3399f9f79d5b318bef2767ffcb37153960 100644 (file)
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
  {
         PDprintk("%x writing to %x: %x\n", slot, offset, val);
         outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
-       outb(val, 0xcfc);
+       outb(val, 0xcfc + (offset&3));
+}
+
+void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
+{
+       PDprintk("%x writing to %x: %x\n", slot, offset, val);
+       outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+       outw(val, 0xcfc + (offset&2));
  }
  
  int early_pci_allowed(void)
@@ -57,3 +64,54 @@ int early_pci_allowed(void)
         return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
                         PCI_PROBE_CONF1;
  }
+
+void early_dump_pci_device(u8 bus, u8 slot, u8 func)
+{
+       int i;
+       int j;
+       u32 val;
+
+       printk("PCI: %02x:%02x:%02x", bus, slot, func);
+
+       for (i = 0; i < 256; i += 4) {
+               if (!(i & 0x0f))
+                       printk("\n%04x:",i);
+
+               val = read_pci_config(bus, slot, func, i);
+               for (j = 0; j < 4; j++) {
+                       printk(" %02x", val & 0xff);
+                       val >>= 8;
+               }
+       }
+       printk("\n");
+}
+
+void early_dump_pci_devices(void)
+{
+       unsigned bus, slot, func;
+
+       if (!early_pci_allowed())
+               return;
+
+       for (bus = 0; bus < 256; bus++) {
+               for (slot = 0; slot < 32; slot++) {
+                       for (func = 0; func < 8; func++) {
+                               u32 class;
+                               u8 type;
+                               class = read_pci_config(bus, slot, func,
+                                                       PCI_CLASS_REVISION);
+                               if (class == 0xffffffff)
+                                       break;
+
+                               early_dump_pci_device(bus, slot, func);
+
+                               /* No multi-function device? */
+                               type = read_pci_config_byte(bus, slot, func,
+                                                              PCI_HEADER_TYPE);
+                               if (!(type & 0x80))
+                                       break;
+                       }
+               }
+       }
+}
+
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c

index dc568c6b83f8b5a5203bc0452c06427ae9b97b2a..6a06a2eb05973c5e2f129de7a9d1bbcdefc6564a 100644 (file)
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -45,7 +45,8 @@ struct irq_router {
         char *name;
         u16 vendor, device;
         int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
-       int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+       int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
+               int new);
  };
  
  struct irq_router_handler {
@@ -77,7 +78,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
         for (i = 0; i < rt->size; i++)
                 sum += addr[i];
         if (!sum) {
-               DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt);
+               DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
+                       rt);
                 return rt;
         }
         return NULL;
@@ -183,7 +185,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset,
         return (nr & 1) ? (x >> 4) : (x & 0xf);
  }
  
-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+static void write_config_nybble(struct pci_dev *router, unsigned offset,
+       unsigned nr, unsigned int val)
  {
         u8 x;
         unsigned reg = offset + (nr >> 1);
@@ -467,7 +470,8 @@ static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int
         return inb(0xc01) & 0xf;
  }
  
-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev,
+       int pirq, int irq)
  {
         outb(pirq, 0xc00);
         outb(irq, 0xc01);
@@ -660,7 +664,8 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router
  }
  
  
-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+static __init int serverworks_router_probe(struct irq_router *r,
+               struct pci_dev *router, u16 device)
  {
         switch (device) {
         case PCI_DEVICE_ID_SERVERWORKS_OSB4:
@@ -827,10 +832,12 @@ static void __init pirq_find_router(struct irq_router *r)
  
         for (h = pirq_routers; h->vendor; h++) {
                 /* First look for a router match */
-               if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
+               if (rt->rtr_vendor == h->vendor &&
+                       h->probe(r, pirq_router_dev, rt->rtr_device))
                         break;
                 /* Fall back to a device match */
-               if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
+               if (pirq_router_dev->vendor == h->vendor &&
+                       h->probe(r, pirq_router_dev, pirq_router_dev->device))
                         break;
         }
         printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
@@ -845,11 +852,13 @@ static void __init pirq_find_router(struct irq_router *r)
  static struct irq_info *pirq_get_info(struct pci_dev *dev)
  {
         struct irq_routing_table *rt = pirq_table;
-       int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+       int entries = (rt->size - sizeof(struct irq_routing_table)) /
+               sizeof(struct irq_info);
         struct irq_info *info;
  
         for (info = rt->slots; entries--; info++)
-               if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+               if (info->bus == dev->bus->number &&
+                       PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
                         return info;
         return NULL;
  }
@@ -890,7 +899,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                 DBG(" -> not routed\n" KERN_DEBUG);
                 return 0;
         }
-       DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+       DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask,
+               pirq_table->exclusive_irqs);
         mask &= pcibios_irq_mask;
  
         /* Work around broken HP Pavilion Notebooks which assign USB to
@@ -903,7 +913,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
         }
  
         /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
-       if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
+       if (acer_tm360_irqrouting && dev->irq == 11 &&
+               dev->vendor == PCI_VENDOR_ID_O2) {
                 pirq = 0x68;
                 mask = 0x400;
                 dev->irq = r->get(pirq_router_dev, dev, pirq);
@@ -920,15 +931,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                         newirq = 0;
                 else
                         printk("\n" KERN_WARNING
-                       "PCI: IRQ %i for device %s doesn't match PIRQ mask "
-                       "- try pci=usepirqmask\n" KERN_DEBUG, newirq,
-                       pci_name(dev));
+                               "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n"
+                               KERN_DEBUG, newirq,
+                               pci_name(dev));
         }
         if (!newirq && assign) {
                 for (i = 0; i < 16; i++) {
                         if (!(mask & (1 << i)))
                                 continue;
-                       if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED))
+                       if (pirq_penalty[i] < pirq_penalty[newirq] &&
+                               can_request_irq(i, IRQF_SHARED))
                                 newirq = i;
                 }
         }
@@ -944,7 +956,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                 DBG(" -> got IRQ %d\n", irq);
                 msg = "Found";
                 eisa_set_level_irq(irq);
-       } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+       } else if (newirq && r->set &&
+               (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
                 DBG(" -> assigning IRQ %d", newirq);
                 if (r->set(pirq_router_dev, dev, pirq, newirq)) {
                         eisa_set_level_irq(newirq);
@@ -962,7 +975,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                 } else
                         return 0;
         }
-       printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
+       printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq,
+               pci_name(dev));
  
         /* Update IRQ for all devices with the same pirq value */
         while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
@@ -974,7 +988,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                 if (!info)
                         continue;
                 if (info->irq[pin].link == pirq) {
-                       /* We refuse to override the dev->irq information. Give a warning! */
+                       /*
+                        * We refuse to override the dev->irq
+                        * information. Give a warning!
+                        */
                         if (dev2->irq && dev2->irq != irq && \
                         (!(pci_probe & PCI_USE_PIRQ_MASK) || \
                         ((1 << dev2->irq) & mask))) {
@@ -987,7 +1004,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                         dev2->irq = irq;
                         pirq_penalty[irq]++;
                         if (dev != dev2)
-                               printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
+                               printk(KERN_INFO
+                                       "PCI: Sharing IRQ %d with %s\n",
+                                       irq, pci_name(dev2));
                 }
         }
         return 1;
@@ -1001,15 +1020,21 @@ static void __init pcibios_fixup_irqs(void)
         DBG(KERN_DEBUG "PCI: IRQ fixup\n");
         while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
                 /*
-                * If the BIOS has set an out of range IRQ number, just ignore it.
-                * Also keep track of which IRQ's are already in use.
+                * If the BIOS has set an out of range IRQ number, just
+                * ignore it.  Also keep track of which IRQ's are
+                * already in use.
                  */
                 if (dev->irq >= 16) {
-                       DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
+                       DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n",
+                               pci_name(dev), dev->irq);
                         dev->irq = 0;
                 }
-               /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
-               if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+               /*
+                * If the IRQ is already assigned to a PCI device,
+                * ignore its ISA use penalty
+                */
+               if (pirq_penalty[dev->irq] >= 100 &&
+                               pirq_penalty[dev->irq] < 100000)
                         pirq_penalty[dev->irq] = 0;
                 pirq_penalty[dev->irq]++;
         }
@@ -1025,8 +1050,13 @@ static void __init pcibios_fixup_irqs(void)
                         int irq;
  
                         if (pin) {
-                               pin--;          /* interrupt pins are numbered starting from 1 */
-                               irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+                               /*
+                                * interrupt pins are numbered starting
+                                * from 1
+                                */
+                               pin--;
+                               irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+                                       PCI_SLOT(dev->devfn), pin);
         /*
          * Busses behind bridges are typically not listed in the MP-table.
          * In this case we have to look up the IRQ based on the parent bus,
@@ -1067,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d)
  {
         if (!broken_hp_bios_irq9) {
                 broken_hp_bios_irq9 = 1;
-               printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+               printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
+                       d->ident);
         }
         return 0;
  }
@@ -1080,7 +1111,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d)
  {
         if (!acer_tm360_irqrouting) {
                 acer_tm360_irqrouting = 1;
-               printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+               printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
+                       d->ident);
         }
         return 0;
  }
@@ -1092,7 +1124,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                         DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION,
+                               "HP Pavilion Notebook Model GE"),
                         DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
                 },
         },
@@ -1131,7 +1164,10 @@ int __init pcibios_irq_init(void)
                                 if (!(pirq_table->exclusive_irqs & (1 << i)))
                                         pirq_penalty[i] += 100;
                 }
-               /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+               /*
+                * If we're using the I/O APIC, avoid using the PCI IRQ
+                * routing table
+                */
                 if (io_apic_assign_pci_irqs)
                         pirq_table = NULL;
         }
@@ -1175,7 +1211,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
         if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
                 char *msg = "";
  
-               pin--;          /* interrupt pins are numbered starting from 1 */
+               pin--; /* interrupt pins are numbered starting from 1 */
  
                 if (io_apic_assign_pci_irqs) {
                         int irq;
@@ -1195,13 +1231,16 @@ static int pirq_enable_irq(struct pci_dev *dev)
                                 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
                                                 PCI_SLOT(bridge->devfn), pin);
                                 if (irq >= 0)
-                                       printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
-                                               pci_name(bridge), 'A' + pin, irq);
+                                       printk(KERN_WARNING
+                                               "PCI: using PPB %s[%c] to get irq %d\n",
+                                               pci_name(bridge),
+                                               'A' + pin, irq);
                                 dev = bridge;
                         }
                         dev = temp_dev;
                         if (irq >= 0) {
-                               printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+                               printk(KERN_INFO
+                                       "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
                                         pci_name(dev), 'A' + pin, irq);
                                 dev->irq = irq;
                                 return 0;
@@ -1212,12 +1251,17 @@ static int pirq_enable_irq(struct pci_dev *dev)
                 else
                         msg = " Please try using pci=biosirq.";
  
-               /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
-               if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+               /*
+                * With IDE legacy devices the IRQ lookup failure is not
+                * a problem..
+                */
+               if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE &&
+                               !(dev->class & 0x5))
                         return 0;
  
-               printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
-                      'A' + pin, pci_name(dev), msg);
+               printk(KERN_WARNING
+                       "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+                       'A' + pin, pci_name(dev), msg);
         }
         return 0;
  }
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h

index b2270a55b0cf48ed934913d48a6c297cee38e1c8..3e25deb821ac9152feb484db2414565c541f18de 100644 (file)
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -28,6 +28,7 @@
  #define PCI_USE__CRS           0x10000
  #define PCI_CHECK_ENABLE_AMD_MMCONF    0x20000
  #define PCI_HAS_IO_ECS         0x40000
+#define PCI_NOASSIGN_ROMS      0x80000
  
  extern unsigned int pci_probe;
  extern unsigned long pirq_table_addr;
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index bba867391a85bff4c08f9e61632026084cf59cea..735f5ea17473152fc9560a49f15de97cb1dd71b4 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -336,6 +336,15 @@ config ACPI_EC
           the battery and thermal drivers.  If you are compiling for a 
           mobile system, say Y.
  
+config ACPI_PCI_SLOT
+       tristate "PCI slot detection driver"
+       default n
+       help
+         This driver will attempt to discover all PCI slots in your system,
+         and creates entries in /sys/bus/pci/slots/. This feature can
+         help you correlate PCI bus addresses with the physical geography
+         of your slots. If you are unsure, say N.
+
  config ACPI_POWER
         bool
         default y
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile

index 40b0fcae4c78a71ac083119c96f6396e6e3bab07..52a4cd4b81d0735e916f4ab6ce037265106c27d5 100644 (file)
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -21,7 +21,7 @@ obj-$(CONFIG_X86)             += blacklist.o
  #
  # ACPI Core Subsystem (Interpreter)
  #
-obj-y                          += osl.o utils.o \
+obj-y                          += osl.o utils.o reboot.o\
                                    dispatcher/ events/ executer/ hardware/ \
                                    namespace/ parser/ resources/ tables/ \
                                    utilities/
@@ -48,6 +48,7 @@ obj-$(CONFIG_ACPI_DOCK)               += dock.o
  obj-$(CONFIG_ACPI_BAY)         += bay.o
  obj-$(CONFIG_ACPI_VIDEO)       += video.o
  obj-y                          += pci_root.o pci_link.o pci_irq.o pci_bind.o
+obj-$(CONFIG_ACPI_PCI_SLOT)    += pci_slot.o
  obj-$(CONFIG_ACPI_POWER)       += power.o
  obj-$(CONFIG_ACPI_PROCESSOR)   += processor.o
  obj-$(CONFIG_ACPI_CONTAINER)   += container.o
diff --git a/drivers/acpi/bay.c b/drivers/acpi/bay.c

index 61b6c5beb2d347d32d1fe1e3ee1ff8195e5f731d..e6caf5d42e0eb72a7f49a9b44fa00c290937dbcf 100644 (file)
--- a/drivers/acpi/bay.c
+++ b/drivers/acpi/bay.c
@@ -377,6 +377,9 @@ static int __init bay_init(void)
  
         INIT_LIST_HEAD(&drive_bays);
  
+       if (acpi_disabled)
+               return -ENODEV;
+
         if (acpi_disabled)
                 return -ENODEV;
  
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c

index a6dbcf4d9ef57dcdfa93ecc045e51fd60a4b20d1..ccae305ee55dcb554c441d908c20b43d509b3709 100644 (file)
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -295,6 +295,28 @@ int acpi_bus_set_power(acpi_handle handle, int state)
  
  EXPORT_SYMBOL(acpi_bus_set_power);
  
+bool acpi_bus_power_manageable(acpi_handle handle)
+{
+       struct acpi_device *device;
+       int result;
+
+       result = acpi_bus_get_device(handle, &device);
+       return result ? false : device->flags.power_manageable;
+}
+
+EXPORT_SYMBOL(acpi_bus_power_manageable);
+
+bool acpi_bus_can_wakeup(acpi_handle handle)
+{
+       struct acpi_device *device;
+       int result;
+
+       result = acpi_bus_get_device(handle, &device);
+       return result ? false : device->wakeup.flags.valid;
+}
+
+EXPORT_SYMBOL(acpi_bus_can_wakeup);
+
  /* --------------------------------------------------------------------------
                                  Event Management
     -------------------------------------------------------------------------- */
@@ -612,7 +634,7 @@ static int __init acpi_bus_init_irq(void)
         return 0;
  }
  
-acpi_native_uint acpi_gbl_permanent_mmap;
+u8 acpi_gbl_permanent_mmap;
  
  
  void __init acpi_early_init(void)
diff --git a/drivers/acpi/dispatcher/dsinit.c b/drivers/acpi/dispatcher/dsinit.c

index 610b1ee102b02c9b9e3bdb6f3a5fea8599a7cb1b..949f7c75029ee05380cc23e01f17c6e2d43e6fe1 100644 (file)
--- a/drivers/acpi/dispatcher/dsinit.c
+++ b/drivers/acpi/dispatcher/dsinit.c
@@ -151,7 +151,7 @@ acpi_ds_init_one_object(acpi_handle obj_handle,
   ******************************************************************************/
  
  acpi_status
-acpi_ds_initialize_objects(acpi_native_uint table_index,
+acpi_ds_initialize_objects(u32 table_index,
                            struct acpi_namespace_node * start_node)
  {
         acpi_status status;
diff --git a/drivers/acpi/dispatcher/dsmethod.c b/drivers/acpi/dispatcher/dsmethod.c

index 2509809a36cf3bb4ac269bb2b09c80fb869a1454..4613b9ca57929f5c4ba867c6f8aa496a887d3dbe 100644 (file)
--- a/drivers/acpi/dispatcher/dsmethod.c
+++ b/drivers/acpi/dispatcher/dsmethod.c
@@ -377,7 +377,6 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread,
         }
  
         info->parameters = &this_walk_state->operands[0];
-       info->parameter_type = ACPI_PARAM_ARGS;
  
         status = acpi_ds_init_aml_walk(next_walk_state, NULL, method_node,
                                        obj_desc->method.aml_start,
diff --git a/drivers/acpi/dispatcher/dsopcode.c b/drivers/acpi/dispatcher/dsopcode.c

index a818e0ddb996c0bf124a6060adc25cf74f5f2cd4..6a81c4400edf4767b770fcec8cfe807d8ce36ade 100644 (file)
--- a/drivers/acpi/dispatcher/dsopcode.c
+++ b/drivers/acpi/dispatcher/dsopcode.c
@@ -691,12 +691,6 @@ acpi_ds_eval_buffer_field_operands(struct acpi_walk_state *walk_state,
  
         status = acpi_ex_resolve_operands(op->common.aml_opcode,
                                           ACPI_WALK_OPERANDS, walk_state);
-
-       ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE,
-                          acpi_ps_get_opcode_name(op->common.aml_opcode),
-                          walk_state->num_operands,
-                          "after AcpiExResolveOperands");
-
         if (ACPI_FAILURE(status)) {
                 ACPI_ERROR((AE_INFO, "(%s) bad operand(s) (%X)",
                             acpi_ps_get_opcode_name(op->common.aml_opcode),
@@ -785,10 +779,6 @@ acpi_ds_eval_region_operands(struct acpi_walk_state *walk_state,
                 return_ACPI_STATUS(status);
         }
  
-       ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE,
-                          acpi_ps_get_opcode_name(op->common.aml_opcode),
-                          1, "after AcpiExResolveOperands");
-
         obj_desc = acpi_ns_get_attached_object(node);
         if (!obj_desc) {
                 return_ACPI_STATUS(AE_NOT_EXIST);
@@ -848,7 +838,7 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state,
         union acpi_operand_object **operand;
         struct acpi_namespace_node *node;
         union acpi_parse_object *next_op;
-       acpi_native_uint table_index;
+       u32 table_index;
         struct acpi_table_header *table;
  
         ACPI_FUNCTION_TRACE_PTR(ds_eval_table_region_operands, op);
@@ -882,10 +872,6 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state,
                 return_ACPI_STATUS(status);
         }
  
-       ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE,
-                          acpi_ps_get_opcode_name(op->common.aml_opcode),
-                          1, "after AcpiExResolveOperands");
-
         operand = &walk_state->operands[0];
  
         /* Find the ACPI table */
@@ -1091,10 +1077,8 @@ acpi_ds_eval_bank_field_operands(struct acpi_walk_state *walk_state,
                 return_ACPI_STATUS(status);
         }
  
-       ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS, ACPI_IMODE_EXECUTE,
-                          acpi_ps_get_opcode_name(op->common.aml_opcode),
-                          1, "after AcpiExResolveOperands");
-
+       ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS,
+                          acpi_ps_get_opcode_name(op->common.aml_opcode), 1);
         /*
          * Get the bank_value operand and save it
          * (at Top of stack)
diff --git a/drivers/acpi/dispatcher/dswexec.c b/drivers/acpi/dispatcher/dswexec.c

index b246b9657eada3b0c2f7e2fd55eb156ce78ed7a9..b5072fa9c9205210ac731f45237a22cfd52c0d8a 100644 (file)
--- a/drivers/acpi/dispatcher/dswexec.c
+++ b/drivers/acpi/dispatcher/dswexec.c
@@ -408,14 +408,6 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state)
                                                             [walk_state->
                                                              num_operands - 1]),
                                                           walk_state);
-                       if (ACPI_SUCCESS(status)) {
-                               ACPI_DUMP_OPERANDS(ACPI_WALK_OPERANDS,
-                                                  ACPI_IMODE_EXECUTE,
-                                                  acpi_ps_get_opcode_name
-                                                  (walk_state->opcode),
-                                                  walk_state->num_operands,
-                                                  "after ExResolveOperands");
-                       }
                 }
  
                 if (ACPI_SUCCESS(status)) {
diff --git a/drivers/acpi/dispatcher/dswstate.c b/drivers/acpi/dispatcher/dswstate.c

index 1386ced332ecedcacdd0a7c3d61fdcf89f409b20..b00d4af791aaa7829e4c020fdc19680dba1b1433 100644 (file)
--- a/drivers/acpi/dispatcher/dswstate.c
+++ b/drivers/acpi/dispatcher/dswstate.c
@@ -70,7 +70,7 @@ acpi_status
  acpi_ds_result_pop(union acpi_operand_object **object,
                    struct acpi_walk_state *walk_state)
  {
-       acpi_native_uint index;
+       u32 index;
         union acpi_generic_state *state;
         acpi_status status;
  
@@ -122,7 +122,7 @@ acpi_ds_result_pop(union acpi_operand_object **object,
         ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
                           "Obj=%p [%s] Index=%X State=%p Num=%X\n", *object,
                           acpi_ut_get_object_type_name(*object),
-                         (u32) index, walk_state, walk_state->result_count));
+                         index, walk_state, walk_state->result_count));
  
         return (AE_OK);
  }
@@ -146,7 +146,7 @@ acpi_ds_result_push(union acpi_operand_object * object,
  {
         union acpi_generic_state *state;
         acpi_status status;
-       acpi_native_uint index;
+       u32 index;
  
         ACPI_FUNCTION_NAME(ds_result_push);
  
@@ -400,7 +400,7 @@ void
  acpi_ds_obj_stack_pop_and_delete(u32 pop_count,
                                  struct acpi_walk_state *walk_state)
  {
-       acpi_native_int i;
+       s32 i;
         union acpi_operand_object *obj_desc;
  
         ACPI_FUNCTION_NAME(ds_obj_stack_pop_and_delete);
@@ -409,7 +409,7 @@ acpi_ds_obj_stack_pop_and_delete(u32 pop_count,
                 return;
         }
  
-       for (i = (acpi_native_int) (pop_count - 1); i >= 0; i--) {
+       for (i = (s32) pop_count - 1; i >= 0; i--) {
                 if (walk_state->num_operands == 0) {
                         return;
                 }
@@ -615,14 +615,8 @@ acpi_ds_init_aml_walk(struct acpi_walk_state *walk_state,
         walk_state->pass_number = pass_number;
  
         if (info) {
-               if (info->parameter_type == ACPI_PARAM_GPE) {
-                       walk_state->gpe_event_info =
-                           ACPI_CAST_PTR(struct acpi_gpe_event_info,
-                                         info->parameters);
-               } else {
-                       walk_state->params = info->parameters;
-                       walk_state->caller_return_desc = &info->return_object;
-               }
+               walk_state->params = info->parameters;
+               walk_state->caller_return_desc = &info->return_object;
         }
  
         status = acpi_ps_init_scope(&walk_state->parser_state, op);
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c

index bb7c51f712bd6cde630e4e8e5a771ca9d9e5fc55..1e872e79db33bdeaf35cdf4fd049afc0e31acd14 100644 (file)
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -917,6 +917,9 @@ static int __init dock_init(void)
  
         dock_station = NULL;
  
+       if (acpi_disabled)
+               return 0;
+
         if (acpi_disabled)
                 return 0;
  
diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c

index 5d30e5be1b1c73edad8df547dfa1f7ce9eb55809..c56c5c6ea77b9d5c8cd0d35cc794aad5ed3ba16b 100644 (file)
--- a/drivers/acpi/events/evevent.c
+++ b/drivers/acpi/events/evevent.c
@@ -188,7 +188,7 @@ acpi_status acpi_ev_install_xrupt_handlers(void)
  
  static acpi_status acpi_ev_fixed_event_initialize(void)
  {
-       acpi_native_uint i;
+       u32 i;
         acpi_status status;
  
         /*
@@ -231,7 +231,7 @@ u32 acpi_ev_fixed_event_detect(void)
         u32 int_status = ACPI_INTERRUPT_NOT_HANDLED;
         u32 fixed_status;
         u32 fixed_enable;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_NAME(ev_fixed_event_detect);
  
@@ -260,7 +260,7 @@ u32 acpi_ev_fixed_event_detect(void)
  
                         /* Found an active (signalled) event */
                         acpi_os_fixed_event_count(i);
-                       int_status |= acpi_ev_fixed_event_dispatch((u32) i);
+                       int_status |= acpi_ev_fixed_event_dispatch(i);
                 }
         }
  
diff --git a/drivers/acpi/events/evgpe.c b/drivers/acpi/events/evgpe.c

index 5354be44f87678beda3531645527a8e95a02edaf..c5e53aae86f7c53385f48ec2b30bc47178ed8d45 100644 (file)
--- a/drivers/acpi/events/evgpe.c
+++ b/drivers/acpi/events/evgpe.c
@@ -256,7 +256,7 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
                 return_ACPI_STATUS(status);
         }
  
-       /* Mark wake-disabled or HW disable, or both */
+       /* Clear the appropriate enabled flags for this GPE */
  
         switch (gpe_event_info->flags & ACPI_GPE_TYPE_MASK) {
         case ACPI_GPE_TYPE_WAKE:
@@ -273,13 +273,23 @@ acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
                 /* Disable the requested runtime GPE */
  
                 ACPI_CLEAR_BIT(gpe_event_info->flags, ACPI_GPE_RUN_ENABLED);
-
-               /* fallthrough */
+               break;
  
         default:
-               acpi_hw_write_gpe_enable_reg(gpe_event_info);
+               break;
         }
  
+       /*
+        * Even if we don't know the GPE type, make sure that we always
+        * disable it. low_disable_gpe will just clear the enable bit for this
+        * GPE and write it. It will not write out the current GPE enable mask,
+        * since this may inadvertently enable GPEs too early, if a rogue GPE has
+        * come in during ACPICA initialization - possibly as a result of AML or
+        * other code that has enabled the GPE.
+        */
+       status = acpi_hw_low_disable_gpe(gpe_event_info);
+       return_ACPI_STATUS(status);
+
         return_ACPI_STATUS(AE_OK);
  }
  
@@ -305,7 +315,7 @@ struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device,
  {
         union acpi_operand_object *obj_desc;
         struct acpi_gpe_block_info *gpe_block;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_ENTRY();
  
@@ -379,8 +389,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
         u32 status_reg;
         u32 enable_reg;
         acpi_cpu_flags flags;
-       acpi_native_uint i;
-       acpi_native_uint j;
+       u32 i;
+       u32 j;
  
         ACPI_FUNCTION_NAME(ev_gpe_detect);
  
@@ -462,13 +472,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
                                          */
                                         int_status |=
                                             acpi_ev_gpe_dispatch(&gpe_block->
-                                                                event_info[(i *
-                                                                            ACPI_GPE_REGISTER_WIDTH)
-                                                                           +
-                                                                           j],
-                                                                (u32) j +
-                                                                gpe_register_info->
-                                                                base_gpe_number);
+                                               event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number);
                                 }
                         }
                 }
@@ -555,10 +559,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
                          */
                         info->prefix_node =
                             local_gpe_event_info.dispatch.method_node;
-                       info->parameters =
-                           ACPI_CAST_PTR(union acpi_operand_object *,
-                                         gpe_event_info);
-                       info->parameter_type = ACPI_PARAM_GPE;
                         info->flags = ACPI_IGNORE_RETURN_VALUE;
  
                         status = acpi_ns_evaluate(info);
diff --git a/drivers/acpi/events/evgpeblk.c b/drivers/acpi/events/evgpeblk.c

index e6c4d4c49e79ee1f1fb4d409e60b949b2cff5e05..73c058e2f5c213b9aa3b8e01f958bc7f81b52d26 100644 (file)
--- a/drivers/acpi/events/evgpeblk.c
+++ b/drivers/acpi/events/evgpeblk.c
@@ -189,8 +189,8 @@ acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
                             struct acpi_gpe_block_info *gpe_block)
  {
         struct acpi_gpe_event_info *gpe_event_info;
-       acpi_native_uint i;
-       acpi_native_uint j;
+       u32 i;
+       u32 j;
  
         ACPI_FUNCTION_TRACE(ev_delete_gpe_handlers);
  
@@ -203,7 +203,8 @@ acpi_ev_delete_gpe_handlers(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
                 for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) {
                         gpe_event_info =
                             &gpe_block->
-                           event_info[(i * ACPI_GPE_REGISTER_WIDTH) + j];
+                           event_info[((acpi_size) i *
+                                       ACPI_GPE_REGISTER_WIDTH) + j];
  
                         if ((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) ==
                             ACPI_GPE_DISPATCH_HANDLER) {
@@ -744,8 +745,8 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
         struct acpi_gpe_event_info *gpe_event_info = NULL;
         struct acpi_gpe_event_info *this_event;
         struct acpi_gpe_register_info *this_register;
-       acpi_native_uint i;
-       acpi_native_uint j;
+       u32 i;
+       u32 j;
         acpi_status status;
  
         ACPI_FUNCTION_TRACE(ev_create_gpe_info_blocks);
@@ -983,8 +984,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
         struct acpi_gpe_walk_info gpe_info;
         u32 wake_gpe_count;
         u32 gpe_enabled_count;
-       acpi_native_uint i;
-       acpi_native_uint j;
+       u32 i;
+       u32 j;
  
         ACPI_FUNCTION_TRACE(ev_initialize_gpe_block);
  
@@ -1033,7 +1034,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
  
                         gpe_event_info =
                             &gpe_block->
-                           event_info[(i * ACPI_GPE_REGISTER_WIDTH) + j];
+                           event_info[((acpi_size) i *
+                                       ACPI_GPE_REGISTER_WIDTH) + j];
  
                         if (((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) ==
                              ACPI_GPE_DISPATCH_METHOD)
diff --git a/drivers/acpi/events/evmisc.c b/drivers/acpi/events/evmisc.c

index 2113e58e2221520b6091155b84ac3ab8b9956197..1d5670be729ab3cc13e8d57c639463adce8788dc 100644 (file)
--- a/drivers/acpi/events/evmisc.c
+++ b/drivers/acpi/events/evmisc.c
@@ -575,7 +575,7 @@ acpi_status acpi_ev_release_global_lock(void)
  
  void acpi_ev_terminate(void)
  {
-       acpi_native_uint i;
+       u32 i;
         acpi_status status;
  
         ACPI_FUNCTION_TRACE(ev_terminate);
@@ -589,7 +589,7 @@ void acpi_ev_terminate(void)
                 /* Disable all fixed events */
  
                 for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) {
-                       status = acpi_disable_event((u32) i, 0);
+                       status = acpi_disable_event(i, 0);
                         if (ACPI_FAILURE(status)) {
                                 ACPI_ERROR((AE_INFO,
                                             "Could not disable fixed event %d",
diff --git a/drivers/acpi/events/evregion.c b/drivers/acpi/events/evregion.c

index 1628f59347524c9bbf49bc800581b93a224fb5c2..236fbd1ca438ae35c1cff68c76cc0df69556b82f 100644 (file)
--- a/drivers/acpi/events/evregion.c
+++ b/drivers/acpi/events/evregion.c
@@ -81,7 +81,7 @@ acpi_ev_install_handler(acpi_handle obj_handle,
  acpi_status acpi_ev_install_region_handlers(void)
  {
         acpi_status status;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(ev_install_region_handlers);
  
@@ -151,7 +151,7 @@ acpi_status acpi_ev_install_region_handlers(void)
  acpi_status acpi_ev_initialize_op_regions(void)
  {
         acpi_status status;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(ev_initialize_op_regions);
  
@@ -219,7 +219,6 @@ acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function)
         info->prefix_node = region_obj2->extra.method_REG;
         info->pathname = NULL;
         info->parameters = args;
-       info->parameter_type = ACPI_PARAM_ARGS;
         info->flags = ACPI_IGNORE_RETURN_VALUE;
  
         /*
diff --git a/drivers/acpi/events/evrgnini.c b/drivers/acpi/events/evrgnini.c

index 2e3d2c5e4f4d2a7a7b4571475646fe970733f1e3..6b94b38df07dd30b77580af0459c81e640776d29 100644 (file)
--- a/drivers/acpi/events/evrgnini.c
+++ b/drivers/acpi/events/evrgnini.c
@@ -380,7 +380,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
         acpi_status status;
         struct acpica_device_id hid;
         struct acpi_compatible_id_list *cid;
-       acpi_native_uint i;
+       u32 i;
  
         /*
          * Get the _HID and check for a PCI Root Bridge
diff --git a/drivers/acpi/events/evxfevnt.c b/drivers/acpi/events/evxfevnt.c

index 99a7502e6a87465894e262b566b99f2a8dec35bf..73bfd6bf962ff9758e910dda2a073b892d76f1cf 100644 (file)
--- a/drivers/acpi/events/evxfevnt.c
+++ b/drivers/acpi/events/evxfevnt.c
@@ -472,7 +472,6 @@ acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags)
  }
  
  ACPI_EXPORT_SYMBOL(acpi_clear_gpe)
-#ifdef ACPI_FUTURE_USAGE
  /*******************************************************************************
   *
   * FUNCTION:    acpi_get_event_status
@@ -489,6 +488,7 @@ ACPI_EXPORT_SYMBOL(acpi_clear_gpe)
  acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status)
  {
         acpi_status status = AE_OK;
+       u32 value;
  
         ACPI_FUNCTION_TRACE(acpi_get_event_status);
  
@@ -506,7 +506,20 @@ acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status)
  
         status =
             acpi_get_register(acpi_gbl_fixed_event_info[event].
-                             status_register_id, event_status);
+                             enable_register_id, &value);
+       if (ACPI_FAILURE(status))
+               return_ACPI_STATUS(status);
+
+       *event_status = value;
+
+       status =
+           acpi_get_register(acpi_gbl_fixed_event_info[event].
+                             status_register_id, &value);
+       if (ACPI_FAILURE(status))
+               return_ACPI_STATUS(status);
+
+       if (value)
+               *event_status |= ACPI_EVENT_FLAG_SET;
  
         return_ACPI_STATUS(status);
  }
@@ -566,7 +579,6 @@ acpi_get_gpe_status(acpi_handle gpe_device,
  }
  
  ACPI_EXPORT_SYMBOL(acpi_get_gpe_status)
-#endif                         /*  ACPI_FUTURE_USAGE  */
  /*******************************************************************************
   *
   * FUNCTION:    acpi_install_gpe_block
diff --git a/drivers/acpi/executer/exconfig.c b/drivers/acpi/executer/exconfig.c

index 39d7421905840f1437c5408d2e4fe47e31f8b68d..2a32c843cb4a384a3557d80293cc4abef5a000ee 100644 (file)
--- a/drivers/acpi/executer/exconfig.c
+++ b/drivers/acpi/executer/exconfig.c
@@ -53,7 +53,7 @@ ACPI_MODULE_NAME("exconfig")
  
  /* Local prototypes */
  static acpi_status
-acpi_ex_add_table(acpi_native_uint table_index,
+acpi_ex_add_table(u32 table_index,
                   struct acpi_namespace_node *parent_node,
                   union acpi_operand_object **ddb_handle);
  
@@ -73,7 +73,7 @@ acpi_ex_add_table(acpi_native_uint table_index,
   ******************************************************************************/
  
  static acpi_status
-acpi_ex_add_table(acpi_native_uint table_index,
+acpi_ex_add_table(u32 table_index,
                   struct acpi_namespace_node *parent_node,
                   union acpi_operand_object **ddb_handle)
  {
@@ -96,7 +96,8 @@ acpi_ex_add_table(acpi_native_uint table_index,
  
         /* Install the new table into the local data structures */
  
-       obj_desc->reference.object = ACPI_CAST_PTR(void, table_index);
+       obj_desc->reference.object = ACPI_CAST_PTR(void,
+                       (unsigned long)table_index);
  
         /* Add the table to the namespace */
  
@@ -128,12 +129,12 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
  {
         acpi_status status;
         union acpi_operand_object **operand = &walk_state->operands[0];
-       acpi_native_uint table_index;
         struct acpi_namespace_node *parent_node;
         struct acpi_namespace_node *start_node;
         struct acpi_namespace_node *parameter_node = NULL;
         union acpi_operand_object *ddb_handle;
         struct acpi_table_header *table;
+       u32 table_index;
  
         ACPI_FUNCTION_TRACE(ex_load_table_op);
  
@@ -280,7 +281,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
  {
         union acpi_operand_object *ddb_handle;
         struct acpi_table_desc table_desc;
-       acpi_native_uint table_index;
+       u32 table_index;
         acpi_status status;
         u32 length;
  
@@ -437,7 +438,7 @@ acpi_status acpi_ex_unload_table(union acpi_operand_object *ddb_handle)
  {
         acpi_status status = AE_OK;
         union acpi_operand_object *table_desc = ddb_handle;
-       acpi_native_uint table_index;
+       u32 table_index;
         struct acpi_table_header *table;
  
         ACPI_FUNCTION_TRACE(ex_unload_table);
@@ -454,9 +455,9 @@ acpi_status acpi_ex_unload_table(union acpi_operand_object *ddb_handle)
                 return_ACPI_STATUS(AE_BAD_PARAMETER);
         }
  
-       /* Get the table index from the ddb_handle */
+       /* Get the table index from the ddb_handle (acpi_size for 64-bit case) */
  
-       table_index = (acpi_native_uint) table_desc->reference.object;
+       table_index = (u32) (acpi_size) table_desc->reference.object;
  
         /* Invoke table handler if present */
  
diff --git a/drivers/acpi/executer/exconvrt.c b/drivers/acpi/executer/exconvrt.c

index fd954b4ed83d7fc1cfdbaaa4a42c41e9fbc89473..261d97516d9b3cf69451f0559560b846b265a875 100644 (file)
--- a/drivers/acpi/executer/exconvrt.c
+++ b/drivers/acpi/executer/exconvrt.c
@@ -288,11 +288,11 @@ acpi_ex_convert_to_ascii(acpi_integer integer,
                          u16 base, u8 * string, u8 data_width)
  {
         acpi_integer digit;
-       acpi_native_uint i;
-       acpi_native_uint j;
-       acpi_native_uint k = 0;
-       acpi_native_uint hex_length;
-       acpi_native_uint decimal_length;
+       u32 i;
+       u32 j;
+       u32 k = 0;
+       u32 hex_length;
+       u32 decimal_length;
         u32 remainder;
         u8 supress_zeros;
  
@@ -348,7 +348,7 @@ acpi_ex_convert_to_ascii(acpi_integer integer,
  
                 /* hex_length: 2 ascii hex chars per data byte */
  
-               hex_length = (acpi_native_uint) ACPI_MUL_2(data_width);
+               hex_length = ACPI_MUL_2(data_width);
                 for (i = 0, j = (hex_length - 1); i < hex_length; i++, j--) {
  
                         /* Get one hex digit, most significant digits first */
diff --git a/drivers/acpi/executer/excreate.c b/drivers/acpi/executer/excreate.c

index 60e62c4f0577aae22c5171500bfc6236b0b75285..ad09696d50690c7116a3eb741dd3d92cb9a49f4d 100644 (file)
--- a/drivers/acpi/executer/excreate.c
+++ b/drivers/acpi/executer/excreate.c
@@ -45,8 +45,6 @@
  #include <acpi/acinterp.h>
  #include <acpi/amlcode.h>
  #include <acpi/acnamesp.h>
-#include <acpi/acevents.h>
-#include <acpi/actables.h>
  
  #define _COMPONENT          ACPI_EXECUTER
  ACPI_MODULE_NAME("excreate")
diff --git a/drivers/acpi/executer/exdump.c b/drivers/acpi/executer/exdump.c

index 74f1b22601b39b4b5106966d211c12fdd62946ee..2be2e2bf95bf1447aa031be9d1cf3a33a46f39a9 100644 (file)
--- a/drivers/acpi/executer/exdump.c
+++ b/drivers/acpi/executer/exdump.c
@@ -580,25 +580,22 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth)
  
         case ACPI_TYPE_BUFFER:
  
-               acpi_os_printf("Buffer len %X @ %p\n",
+               acpi_os_printf("Buffer length %.2X @ %p\n",
                                obj_desc->buffer.length,
                                obj_desc->buffer.pointer);
  
-               length = obj_desc->buffer.length;
-               if (length > 64) {
-                       length = 64;
-               }
-
                 /* Debug only -- dump the buffer contents */
  
                 if (obj_desc->buffer.pointer) {
-                       acpi_os_printf("Buffer Contents: ");
-
-                       for (index = 0; index < length; index++) {
-                               acpi_os_printf(" %02x",
-                                              obj_desc->buffer.pointer[index]);
+                       length = obj_desc->buffer.length;
+                       if (length > 128) {
+                               length = 128;
                         }
-                       acpi_os_printf("\n");
+
+                       acpi_os_printf
+                           ("Buffer Contents: (displaying length 0x%.2X)\n",
+                            length);
+                       ACPI_DUMP_BUFFER(obj_desc->buffer.pointer, length);
                 }
                 break;
  
@@ -756,54 +753,42 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth)
   *
   * FUNCTION:    acpi_ex_dump_operands
   *
- * PARAMETERS:  Operands            - Operand list
- *              interpreter_mode    - Load or Exec
- *              Ident               - Identification
- *              num_levels          - # of stack entries to dump above line
- *              Note                - Output notation
- *              module_name         - Caller's module name
- *              line_number         - Caller's invocation line number
+ * PARAMETERS: Operands            - A list of Operand objects
+ *             opcode_name         - AML opcode name
+ *             num_operands        - Operand count for this opcode
   *
- * DESCRIPTION: Dump the object stack
+ * DESCRIPTION: Dump the operands associated with the opcode
   *
   ******************************************************************************/
  
  void
  acpi_ex_dump_operands(union acpi_operand_object **operands,
-                     acpi_interpreter_mode interpreter_mode,
-                     char *ident,
-                     u32 num_levels,
-                     char *note, char *module_name, u32 line_number)
+                     const char *opcode_name, u32 num_operands)
  {
-       acpi_native_uint i;
-
         ACPI_FUNCTION_NAME(ex_dump_operands);
  
-       if (!ident) {
-               ident = "?";
-       }
-
-       if (!note) {
-               note = "?";
+       if (!opcode_name) {
+               opcode_name = "UNKNOWN";
         }
  
         ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-                         "************* Operand Stack Contents (Opcode [%s], %d Operands)\n",
-                         ident, num_levels));
+                         "**** Start operand dump for opcode [%s], %d operands\n",
+                         opcode_name, num_operands));
  
-       if (num_levels == 0) {
-               num_levels = 1;
+       if (num_operands == 0) {
+               num_operands = 1;
         }
  
-       /* Dump the operand stack starting at the top */
+       /* Dump the individual operands */
  
-       for (i = 0; num_levels > 0; i--, num_levels--) {
-               acpi_ex_dump_operand(operands[i], 0);
+       while (num_operands) {
+               acpi_ex_dump_operand(*operands, 0);
+               operands++;
+               num_operands--;
         }
  
         ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-                         "************* Operand Stack dump from %s(%d), %s\n",
-                         module_name, line_number, note));
+                         "**** End operand dump for [%s]\n", opcode_name));
         return;
  }
  
diff --git a/drivers/acpi/executer/exfldio.c b/drivers/acpi/executer/exfldio.c

index e336b5dc7a50431720fa4aa437b85c5f1fd0ff94..9ff9d1f4615d8841eac8650bf4605e7e3cfb24fa 100644 (file)
--- a/drivers/acpi/executer/exfldio.c
+++ b/drivers/acpi/executer/exfldio.c
@@ -153,14 +153,15 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc,
                         /*
                          * Slack mode only:  We will go ahead and allow access to this
                          * field if it is within the region length rounded up to the next
-                        * access width boundary.
+                        * access width boundary. acpi_size cast for 64-bit compile.
                          */
                         if (ACPI_ROUND_UP(rgn_desc->region.length,
                                           obj_desc->common_field.
                                           access_byte_width) >=
-                           (obj_desc->common_field.base_byte_offset +
-                            (acpi_native_uint) obj_desc->common_field.
-                            access_byte_width + field_datum_byte_offset)) {
+                           ((acpi_size) obj_desc->common_field.
+                            base_byte_offset +
+                            obj_desc->common_field.access_byte_width +
+                            field_datum_byte_offset)) {
                                 return_ACPI_STATUS(AE_OK);
                         }
                 }
diff --git a/drivers/acpi/executer/exmisc.c b/drivers/acpi/executer/exmisc.c

index cc956a5b5267b5e0564b2836b235803bfc170182..731414a581a6226f1d8c53fed278123465b6253c 100644 (file)
--- a/drivers/acpi/executer/exmisc.c
+++ b/drivers/acpi/executer/exmisc.c
@@ -329,8 +329,8 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0,
  
                 /* Result of two Strings is a String */
  
-               return_desc = acpi_ut_create_string_object((acpi_size)
-                                                          (operand0->string.
+               return_desc = acpi_ut_create_string_object(((acpi_size)
+                                                           operand0->string.
                                                             length +
                                                             local_operand1->
                                                             string.length));
@@ -352,8 +352,8 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0,
  
                 /* Result of two Buffers is a Buffer */
  
-               return_desc = acpi_ut_create_buffer_object((acpi_size)
-                                                          (operand0->buffer.
+               return_desc = acpi_ut_create_buffer_object(((acpi_size)
+                                                           operand0->buffer.
                                                             length +
                                                             local_operand1->
                                                             buffer.length));
diff --git a/drivers/acpi/executer/exprep.c b/drivers/acpi/executer/exprep.c

index 3a2f8cd4c62a9bffc6d27c25ed2b4bff3c112c0c..5d438c32989d892be3d41b08e738c167ff1ad3c2 100644 (file)
--- a/drivers/acpi/executer/exprep.c
+++ b/drivers/acpi/executer/exprep.c
@@ -503,11 +503,11 @@ acpi_status acpi_ex_prep_field_value(struct acpi_create_field_info *info)
                  */
                 second_desc = obj_desc->common.next_object;
                 second_desc->extra.aml_start =
-                   ((union acpi_parse_object *)(info->data_register_node))->
-                   named.data;
+                   ACPI_CAST_PTR(union acpi_parse_object,
+                                 info->data_register_node)->named.data;
                 second_desc->extra.aml_length =
-                   ((union acpi_parse_object *)(info->data_register_node))->
-                   named.length;
+                   ACPI_CAST_PTR(union acpi_parse_object,
+                                 info->data_register_node)->named.length;
  
                 break;
  
diff --git a/drivers/acpi/executer/exregion.c b/drivers/acpi/executer/exregion.c

index 7cd8bb54fa016ba38aa07a7f1109e8304a7a8c05..7a41c409ae4d89e14165c8f28bdc131d88d1b5e1 100644 (file)
--- a/drivers/acpi/executer/exregion.c
+++ b/drivers/acpi/executer/exregion.c
@@ -156,7 +156,7 @@ acpi_ex_system_memory_space_handler(u32 function,
                 /* Create a new mapping starting at the address given */
  
                 mem_info->mapped_logical_address =
-                   acpi_os_map_memory((acpi_native_uint) address, window_size);
+                       acpi_os_map_memory((acpi_physical_address) address, window_size);
                 if (!mem_info->mapped_logical_address) {
                         ACPI_ERROR((AE_INFO,
                                     "Could not map memory at %8.8X%8.8X, size %X",
diff --git a/drivers/acpi/executer/exresop.c b/drivers/acpi/executer/exresop.c

index 73e29e566a70450eca74348cbfa3a723316999f6..54085f16ec28360d4f252c3f8e34dc230da3cbff 100644 (file)
--- a/drivers/acpi/executer/exresop.c
+++ b/drivers/acpi/executer/exresop.c
@@ -698,5 +698,9 @@ acpi_ex_resolve_operands(u16 opcode,
                 }
         }
  
+       ACPI_DUMP_OPERANDS(walk_state->operands,
+                          acpi_ps_get_opcode_name(opcode),
+                          walk_state->num_operands);
+
         return_ACPI_STATUS(status);
  }
diff --git a/drivers/acpi/executer/exstore.c b/drivers/acpi/executer/exstore.c

index 76c875bc3154cf8ff799efd80fc6ae3d047aab0c..38b55e352495df4ed3808ffe713995dafe05e8db 100644 (file)
--- a/drivers/acpi/executer/exstore.c
+++ b/drivers/acpi/executer/exstore.c
@@ -343,12 +343,6 @@ acpi_ex_store(union acpi_operand_object *source_desc,
                             acpi_ut_get_object_type_name(dest_desc),
                             dest_desc));
  
-               ACPI_DUMP_STACK_ENTRY(source_desc);
-               ACPI_DUMP_STACK_ENTRY(dest_desc);
-               ACPI_DUMP_OPERANDS(&dest_desc, ACPI_IMODE_EXECUTE, "ExStore",
-                                  2,
-                                  "Target is not a Reference or Constant object");
-
                 return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
         }
  
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c

index 6cf10cbc1eee41687e462d14168827d6830b4cb0..55c17afbe669c86cc6da590994a143fe6d543e77 100644 (file)
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -148,7 +148,7 @@ acpi_fan_write_state(struct file *file, const char __user * buffer,
         int result = 0;
         struct seq_file *m = file->private_data;
         struct acpi_device *device = m->private;
-       char state_string[12] = { '\0' };
+       char state_string[3] = { '\0' };
  
         if (count > sizeof(state_string) - 1)
                 return -EINVAL;
@@ -157,6 +157,12 @@ acpi_fan_write_state(struct file *file, const char __user * buffer,
                 return -EFAULT;
  
         state_string[count] = '\0';
+       if ((state_string[0] < '0') || (state_string[0] > '3'))
+               return -EINVAL;
+       if (state_string[1] == '\n')
+               state_string[1] = '\0';
+       if (state_string[1] != '\0')
+               return -EINVAL;
  
         result = acpi_bus_set_power(device->handle,
                                     simple_strtoul(state_string, NULL, 0));
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c

index 9b227d4dc9c9bbd43b7649b66b7ee311371f7eaf..0f2dd81736bda796c8ace56f1f216edca65f88fd 100644 (file)
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -166,6 +166,8 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle)
                                 "firmware_node");
                 ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
                                 "physical_node");
+               if (acpi_dev->wakeup.flags.valid)
+                       device_set_wakeup_capable(dev, true);
         }
  
         return 0;
@@ -333,6 +335,9 @@ static int __init acpi_rtc_init(void)
  {
         struct device *dev = get_rtc_dev();
  
+       if (acpi_disabled)
+               return 0;
+
         if (acpi_disabled)
                 return 0;
  
diff --git a/drivers/acpi/hardware/hwgpe.c b/drivers/acpi/hardware/hwgpe.c

index 14bc4f456ae8c3762889fef4c3badaf812f9977b..0b80db9d91979e17ce418f840a56d3096afb5f2d 100644 (file)
--- a/drivers/acpi/hardware/hwgpe.c
+++ b/drivers/acpi/hardware/hwgpe.c
@@ -53,6 +53,54 @@ static acpi_status
  acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
                                 struct acpi_gpe_block_info *gpe_block);
  
+/******************************************************************************
+ *
+ * FUNCTION:   acpi_hw_low_disable_gpe
+ *
+ * PARAMETERS: gpe_event_info      - Info block for the GPE to be disabled
+ *
+ * RETURN:     Status
+ *
+ * DESCRIPTION: Disable a single GPE in the enable register.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
+{
+       struct acpi_gpe_register_info *gpe_register_info;
+       acpi_status status;
+       u32 enable_mask;
+
+       /* Get the info block for the entire GPE register */
+
+       gpe_register_info = gpe_event_info->register_info;
+       if (!gpe_register_info) {
+               return (AE_NOT_EXIST);
+       }
+
+       /* Get current value of the enable register that contains this GPE */
+
+       status = acpi_hw_low_level_read(ACPI_GPE_REGISTER_WIDTH, &enable_mask,
+                                       &gpe_register_info->enable_address);
+       if (ACPI_FAILURE(status)) {
+               return (status);
+       }
+
+       /* Clear just the bit that corresponds to this GPE */
+
+       ACPI_CLEAR_BIT(enable_mask,
+                      ((u32) 1 <<
+                       (gpe_event_info->gpe_number -
+                        gpe_register_info->base_gpe_number)));
+
+       /* Write the updated enable mask */
+
+       status = acpi_hw_low_level_write(ACPI_GPE_REGISTER_WIDTH, enable_mask,
+                                        &gpe_register_info->enable_address);
+
+       return (status);
+}
+
  /******************************************************************************
   *
   * FUNCTION:    acpi_hw_write_gpe_enable_reg
@@ -68,7 +116,7 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
   ******************************************************************************/
  
  acpi_status
-acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info)
+acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info)
  {
         struct acpi_gpe_register_info *gpe_register_info;
         acpi_status status;
@@ -138,7 +186,6 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
   *
   ******************************************************************************/
  
-#ifdef ACPI_FUTURE_USAGE
  acpi_status
  acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
                        acpi_event_status * event_status)
@@ -198,7 +245,6 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
        unlock_and_exit:
         return (status);
  }
-#endif                         /*  ACPI_FUTURE_USAGE  */
  
  /******************************************************************************
   *
diff --git a/drivers/acpi/namespace/nsdump.c b/drivers/acpi/namespace/nsdump.c

index 5445751b8a3e7ee6e9f3eae788fc297995d36d43..0ab22004728a9aae08f7697cb8230456177d0c9f 100644 (file)
--- a/drivers/acpi/namespace/nsdump.c
+++ b/drivers/acpi/namespace/nsdump.c
@@ -73,7 +73,7 @@ acpi_ns_dump_one_device(acpi_handle obj_handle,
  
  void acpi_ns_print_pathname(u32 num_segments, char *pathname)
  {
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_NAME(ns_print_pathname);
  
@@ -515,12 +515,12 @@ acpi_ns_dump_one_object(acpi_handle obj_handle,
  
                         if (obj_type > ACPI_TYPE_LOCAL_MAX) {
                                 acpi_os_printf
-                                   ("(Ptr to ACPI Object type %X [UNKNOWN])\n",
+                                   ("(Pointer to ACPI Object type %.2X [UNKNOWN])\n",
                                      obj_type);
                                 bytes_to_dump = 32;
                         } else {
                                 acpi_os_printf
-                                   ("(Ptr to ACPI Object type %X [%s])\n",
+                                   ("(Pointer to ACPI Object type %.2X [%s])\n",
                                      obj_type, acpi_ut_get_type_name(obj_type));
                                 bytes_to_dump =
                                     sizeof(union acpi_operand_object);
diff --git a/drivers/acpi/namespace/nseval.c b/drivers/acpi/namespace/nseval.c

index 14bdfa92bea04cc9da1bfde6a0344ad57a29e2f4..d369164e00b0bc16d594e8617e93be3ada99d95f 100644 (file)
--- a/drivers/acpi/namespace/nseval.c
+++ b/drivers/acpi/namespace/nseval.c
@@ -138,6 +138,41 @@ acpi_status acpi_ns_evaluate(struct acpi_evaluate_info * info)
                         return_ACPI_STATUS(AE_NULL_OBJECT);
                 }
  
+               /*
+                * Calculate the number of arguments being passed to the method
+                */
+
+               info->param_count = 0;
+               if (info->parameters) {
+                       while (info->parameters[info->param_count])
+                               info->param_count++;
+               }
+
+               /* Error if too few arguments were passed in */
+
+               if (info->param_count < info->obj_desc->method.param_count) {
+                       ACPI_ERROR((AE_INFO,
+                                   "Insufficient arguments - "
+                                   "method [%4.4s] needs %d, found %d",
+                                   acpi_ut_get_node_name(info->resolved_node),
+                                   info->obj_desc->method.param_count,
+                                   info->param_count));
+                       return_ACPI_STATUS(AE_MISSING_ARGUMENTS);
+               }
+
+               /* Just a warning if too many arguments */
+
+               else if (info->param_count >
+                               info->obj_desc->method.param_count) {
+                       ACPI_WARNING((AE_INFO,
+                                     "Excess arguments - "
+                                     "method [%4.4s] needs %d, found %d",
+                                     acpi_ut_get_node_name(info->
+                                                           resolved_node),
+                                     info->obj_desc->method.param_count,
+                                     info->param_count));
+               }
+
                 ACPI_DUMP_PATHNAME(info->resolved_node, "Execute Method:",
                                    ACPI_LV_INFO, _COMPONENT);
  
diff --git a/drivers/acpi/namespace/nsinit.c b/drivers/acpi/namespace/nsinit.c

index 6d6d930c8e1820d21d0bc40441065dddf9ba0ef9..e4c57510d798753cd8366f4a2fbbaecb4d545450 100644 (file)
--- a/drivers/acpi/namespace/nsinit.c
+++ b/drivers/acpi/namespace/nsinit.c
@@ -542,7 +542,6 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
         info->prefix_node = device_node;
         info->pathname = METHOD_NAME__INI;
         info->parameters = NULL;
-       info->parameter_type = ACPI_PARAM_ARGS;
         info->flags = ACPI_IGNORE_RETURN_VALUE;
  
         /*
diff --git a/drivers/acpi/namespace/nsload.c b/drivers/acpi/namespace/nsload.c

index 2c92f6cf5ce173c94a4a2aeaf64294e036089ded..a4a412b7c029526faabd771b3742fd65db3ba7a3 100644 (file)
--- a/drivers/acpi/namespace/nsload.c
+++ b/drivers/acpi/namespace/nsload.c
@@ -71,8 +71,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle);
   ******************************************************************************/
  
  acpi_status
-acpi_ns_load_table(acpi_native_uint table_index,
-                  struct acpi_namespace_node *node)
+acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
  {
         acpi_status status;
  
diff --git a/drivers/acpi/namespace/nsparse.c b/drivers/acpi/namespace/nsparse.c

index 46a79b0103b676ef5edf0008f40f70f1eb0ad80a..a82271a9dbb3d5b42495f7f10a809313730fc1c3 100644 (file)
--- a/drivers/acpi/namespace/nsparse.c
+++ b/drivers/acpi/namespace/nsparse.c
@@ -63,13 +63,13 @@ ACPI_MODULE_NAME("nsparse")
   *
   ******************************************************************************/
  acpi_status
-acpi_ns_one_complete_parse(acpi_native_uint pass_number,
-                          acpi_native_uint table_index,
-                          struct acpi_namespace_node * start_node)
+acpi_ns_one_complete_parse(u32 pass_number,
+                          u32 table_index,
+                          struct acpi_namespace_node *start_node)
  {
         union acpi_parse_object *parse_root;
         acpi_status status;
-       acpi_native_uint aml_length;
+       u32 aml_length;
         u8 *aml_start;
         struct acpi_walk_state *walk_state;
         struct acpi_table_header *table;
@@ -112,8 +112,8 @@ acpi_ns_one_complete_parse(acpi_native_uint pass_number,
                 aml_start = (u8 *) table + sizeof(struct acpi_table_header);
                 aml_length = table->length - sizeof(struct acpi_table_header);
                 status = acpi_ds_init_aml_walk(walk_state, parse_root, NULL,
-                                              aml_start, (u32) aml_length,
-                                              NULL, (u8) pass_number);
+                                              aml_start, aml_length, NULL,
+                                              (u8) pass_number);
         }
  
         if (ACPI_FAILURE(status)) {
@@ -158,8 +158,7 @@ acpi_ns_one_complete_parse(acpi_native_uint pass_number,
   ******************************************************************************/
  
  acpi_status
-acpi_ns_parse_table(acpi_native_uint table_index,
-                   struct acpi_namespace_node *start_node)
+acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
  {
         acpi_status status;
  
diff --git a/drivers/acpi/namespace/nsutils.c b/drivers/acpi/namespace/nsutils.c

index 64c039843ed2ba9efb6da4df7978c5cce674811d..b0817e1127b105c15108ae5055240008b3a0ec6d 100644 (file)
--- a/drivers/acpi/namespace/nsutils.c
+++ b/drivers/acpi/namespace/nsutils.c
@@ -73,9 +73,9 @@ acpi_name acpi_ns_find_parent_name(struct acpi_namespace_node *node_to_search);
   ******************************************************************************/
  
  void
-acpi_ns_report_error(char *module_name,
+acpi_ns_report_error(const char *module_name,
                      u32 line_number,
-                    char *internal_name, acpi_status lookup_status)
+                    const char *internal_name, acpi_status lookup_status)
  {
         acpi_status status;
         u32 bad_name;
@@ -130,11 +130,11 @@ acpi_ns_report_error(char *module_name,
   ******************************************************************************/
  
  void
-acpi_ns_report_method_error(char *module_name,
+acpi_ns_report_method_error(const char *module_name,
                             u32 line_number,
-                           char *message,
+                           const char *message,
                             struct acpi_namespace_node *prefix_node,
-                           char *path, acpi_status method_status)
+                           const char *path, acpi_status method_status)
  {
         acpi_status status;
         struct acpi_namespace_node *node = prefix_node;
@@ -167,7 +167,8 @@ acpi_ns_report_method_error(char *module_name,
   ******************************************************************************/
  
  void
-acpi_ns_print_node_pathname(struct acpi_namespace_node *node, char *message)
+acpi_ns_print_node_pathname(struct acpi_namespace_node *node,
+                           const char *message)
  {
         struct acpi_buffer buffer;
         acpi_status status;
@@ -296,7 +297,7 @@ u32 acpi_ns_local(acpi_object_type type)
  
  void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info)
  {
-       char *next_external_char;
+       const char *next_external_char;
         u32 i;
  
         ACPI_FUNCTION_ENTRY();
@@ -363,9 +364,9 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
  {
         u32 num_segments = info->num_segments;
         char *internal_name = info->internal_name;
-       char *external_name = info->next_external_char;
+       const char *external_name = info->next_external_char;
         char *result = NULL;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(ns_build_internal_name);
  
@@ -400,12 +401,11 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
                         result = &internal_name[i];
                 } else if (num_segments == 2) {
                         internal_name[i] = AML_DUAL_NAME_PREFIX;
-                       result = &internal_name[(acpi_native_uint) (i + 1)];
+                       result = &internal_name[(acpi_size) i + 1];
                 } else {
                         internal_name[i] = AML_MULTI_NAME_PREFIX_OP;
-                       internal_name[(acpi_native_uint) (i + 1)] =
-                           (char)num_segments;
-                       result = &internal_name[(acpi_native_uint) (i + 2)];
+                       internal_name[(acpi_size) i + 1] = (char)num_segments;
+                       result = &internal_name[(acpi_size) i + 2];
                 }
         }
  
@@ -472,7 +472,8 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
   *
   *******************************************************************************/
  
-acpi_status acpi_ns_internalize_name(char *external_name, char **converted_name)
+acpi_status
+acpi_ns_internalize_name(const char *external_name, char **converted_name)
  {
         char *internal_name;
         struct acpi_namestring_info info;
@@ -528,15 +529,15 @@ acpi_status acpi_ns_internalize_name(char *external_name, char **converted_name)
  
  acpi_status
  acpi_ns_externalize_name(u32 internal_name_length,
-                        char *internal_name,
+                        const char *internal_name,
                          u32 * converted_name_length, char **converted_name)
  {
-       acpi_native_uint names_index = 0;
-       acpi_native_uint num_segments = 0;
-       acpi_native_uint required_length;
-       acpi_native_uint prefix_length = 0;
-       acpi_native_uint i = 0;
-       acpi_native_uint j = 0;
+       u32 names_index = 0;
+       u32 num_segments = 0;
+       u32 required_length;
+       u32 prefix_length = 0;
+       u32 i = 0;
+       u32 j = 0;
  
         ACPI_FUNCTION_TRACE(ns_externalize_name);
  
@@ -582,9 +583,8 @@ acpi_ns_externalize_name(u32 internal_name_length,
                         /* <count> 4-byte names */
  
                         names_index = prefix_length + 2;
-                       num_segments = (acpi_native_uint) (u8)
-                           internal_name[(acpi_native_uint)
-                                         (prefix_length + 1)];
+                       num_segments = (u8)
+                           internal_name[(acpi_size) prefix_length + 1];
                         break;
  
                 case AML_DUAL_NAME_PREFIX:
@@ -823,7 +823,7 @@ u32 acpi_ns_opens_scope(acpi_object_type type)
  
  acpi_status
  acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
-                char *pathname,
+                const char *pathname,
                  u32 flags, struct acpi_namespace_node **return_node)
  {
         union acpi_generic_state scope_info;
diff --git a/drivers/acpi/namespace/nsxfeval.c b/drivers/acpi/namespace/nsxfeval.c

index a8d549187c84262d5ae7ca9bd34ceee6d4a358dd..38be5865d95df30e382f21b2adb8d2b6837aa2f9 100644 (file)
--- a/drivers/acpi/namespace/nsxfeval.c
+++ b/drivers/acpi/namespace/nsxfeval.c
@@ -182,7 +182,6 @@ acpi_evaluate_object(acpi_handle handle,
         }
  
         info->pathname = pathname;
-       info->parameter_type = ACPI_PARAM_ARGS;
  
         /* Convert and validate the device handle */
  
@@ -442,7 +441,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
         u32 flags;
         struct acpica_device_id hid;
         struct acpi_compatible_id_list *cid;
-       acpi_native_uint i;
+       u32 i;
         int found;
  
         status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c

index 658e5f3abae0dda28d73395955d8e91c7126c846..cb9864e39bae743f0943c41075e557dd696ac3ac 100644 (file)
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -120,10 +120,10 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header)
                         struct acpi_srat_mem_affinity *p =
                             (struct acpi_srat_mem_affinity *)header;
                         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                         "SRAT Memory (0x%lx length 0x%lx type 0x%x) in proximity domain %d %s%s\n",
+                                         "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s\n",
                                           (unsigned long)p->base_address,
                                           (unsigned long)p->length,
-                                         p->memory_type, p->proximity_domain,
+                                         p->proximity_domain,
                                           (p->flags & ACPI_SRAT_MEM_ENABLED)?
                                           "enabled" : "disabled",
                                           (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)?
diff --git a/drivers/acpi/parser/psargs.c b/drivers/acpi/parser/psargs.c

index e94463778845567ce02f6494fb9580bf2da85512..d830b29b85b120ceb87f793f4ddde61db98fbd4f 100644 (file)
--- a/drivers/acpi/parser/psargs.c
+++ b/drivers/acpi/parser/psargs.c
@@ -76,7 +76,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state)
  {
         u8 *aml = parser_state->aml;
         u32 package_length = 0;
-       acpi_native_uint byte_count;
+       u32 byte_count;
         u8 byte_zero_mask = 0x3F;       /* Default [0:5] */
  
         ACPI_FUNCTION_TRACE(ps_get_next_package_length);
@@ -86,7 +86,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state)
          * used to encode the package length, either 0,1,2, or 3
          */
         byte_count = (aml[0] >> 6);
-       parser_state->aml += (byte_count + 1);
+       parser_state->aml += ((acpi_size) byte_count + 1);
  
         /* Get bytes 3, 2, 1 as needed */
  
diff --git a/drivers/acpi/parser/psxface.c b/drivers/acpi/parser/psxface.c

index 52581454c47c3a21ca071df64818d19f173f25f0..270469aae8429201df4edfae11db91d8c5628133 100644 (file)
--- a/drivers/acpi/parser/psxface.c
+++ b/drivers/acpi/parser/psxface.c
@@ -333,9 +333,9 @@ acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info)
  static void
  acpi_ps_update_parameter_list(struct acpi_evaluate_info *info, u16 action)
  {
-       acpi_native_uint i;
+       u32 i;
  
-       if ((info->parameter_type == ACPI_PARAM_ARGS) && (info->parameters)) {
+       if (info->parameters) {
  
                 /* Update reference count for each parameter */
  
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c

index 89022a74faeeda9d9ed9b11f431c2e4c4eecb4d7..11acaee14d66ddb1752f9432107f46bf0fc1b146 100644 (file)
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -162,7 +162,7 @@ do_prt_fixups(struct acpi_prt_entry *entry, struct acpi_pci_routing_table *prt)
                     !strcmp(prt->source, quirk->source) &&
                     strlen(prt->source) >= strlen(quirk->actual_source)) {
                         printk(KERN_WARNING PREFIX "firmware reports "
-                               "%04x:%02x:%02x[%c] connected to %s; "
+                               "%04x:%02x:%02x PCI INT %c connected to %s; "
                                 "changing to %s\n",
                                 entry->id.segment, entry->id.bus,
                                 entry->id.device, 'A' + entry->pin,
@@ -429,7 +429,7 @@ acpi_pci_irq_derive(struct pci_dev *dev,
  {
         struct pci_dev *bridge = dev;
         int irq = -1;
-       u8 bridge_pin = 0;
+       u8 bridge_pin = 0, orig_pin = pin;
  
  
         if (!dev)
@@ -463,8 +463,8 @@ acpi_pci_irq_derive(struct pci_dev *dev,
         }
  
         if (irq < 0) {
-               printk(KERN_WARNING PREFIX "Unable to derive IRQ for device %s\n",
-                             pci_name(dev));
+               dev_warn(&dev->dev, "can't derive routing for PCI INT %c\n",
+                        'A' + orig_pin);
                 return -1;
         }
  
@@ -487,6 +487,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
         int triggering = ACPI_LEVEL_SENSITIVE;
         int polarity = ACPI_ACTIVE_LOW;
         char *link = NULL;
+       char link_desc[16];
         int rc;
  
  
@@ -503,7 +504,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
         pin--;
  
         if (!dev->bus) {
-               printk(KERN_ERR PREFIX "Invalid (NULL) 'bus' field\n");
+               dev_err(&dev->dev, "invalid (NULL) 'bus' field\n");
                 return -ENODEV;
         }
  
@@ -538,8 +539,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
          * driver reported one, then use it. Exit in any case.
          */
         if (irq < 0) {
-               printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: no GSI",
-                      pci_name(dev), ('A' + pin));
+               dev_warn(&dev->dev, "PCI INT %c: no GSI", 'A' + pin);
                 /* Interrupt Line values above 0xF are forbidden */
                 if (dev->irq > 0 && (dev->irq <= 0xF)) {
                         printk(" - using IRQ %d\n", dev->irq);
@@ -554,21 +554,21 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
  
         rc = acpi_register_gsi(irq, triggering, polarity);
         if (rc < 0) {
-               printk(KERN_WARNING PREFIX "PCI Interrupt %s[%c]: failed "
-                      "to register GSI\n", pci_name(dev), ('A' + pin));
+               dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
+                        'A' + pin);
                 return rc;
         }
         dev->irq = rc;
  
-       printk(KERN_INFO PREFIX "PCI Interrupt %s[%c] -> ",
-              pci_name(dev), 'A' + pin);
-
         if (link)
-               printk("Link [%s] -> ", link);
+               snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
+       else
+               link_desc[0] = '\0';
  
-       printk("GSI %u (%s, %s) -> IRQ %d\n", irq,
-              (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge",
-              (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq);
+       dev_info(&dev->dev, "PCI INT %c%s -> GSI %u (%s, %s) -> IRQ %d\n",
+                'A' + pin, link_desc, irq,
+                (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge",
+                (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq);
  
         return 0;
  }
@@ -616,10 +616,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
          * (e.g. PCI_UNDEFINED_IRQ).
          */
  
-       printk(KERN_INFO PREFIX "PCI interrupt for device %s disabled\n",
-              pci_name(dev));
-
+       dev_info(&dev->dev, "PCI INT %c disabled\n", 'A' + pin);
         acpi_unregister_gsi(gsi);
-
-       return;
  }
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c

new file mode 100644 (file)

index 0000000..b9ab030
--- /dev/null
+++ b/drivers/acpi/pci_slot.c
@@ -0,0 +1,368 @@
+/*
+ *  pci_slot.c - ACPI PCI Slot Driver
+ *
+ *  The code here is heavily leveraged from the acpiphp module.
+ *  Thanks to Matthew Wilcox <matthew@wil.cx> for much guidance.
+ *  Thanks to Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> for code
+ *  review and fixes.
+ *
+ *  Copyright (C) 2007 Alex Chiang <achiang@hp.com>
+ *  Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+
+static int debug;
+static int check_sta_before_sun;
+
+#define DRIVER_VERSION         "0.1"
+#define DRIVER_AUTHOR  "Alex Chiang <achiang@hp.com>"
+#define DRIVER_DESC    "ACPI PCI Slot Detection Driver"
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
+module_param(debug, bool, 0644);
+
+#define _COMPONENT             ACPI_PCI_COMPONENT
+ACPI_MODULE_NAME("pci_slot");
+
+#define MY_NAME "pci_slot"
+#define err(format, arg...) printk(KERN_ERR "%s: " format , MY_NAME , ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format , MY_NAME , ## arg)
+#define dbg(format, arg...)                                    \
+       do {                                                    \
+               if (debug)                                      \
+                       printk(KERN_DEBUG "%s: " format,        \
+                               MY_NAME , ## arg);              \
+       } while (0)
+
+#define SLOT_NAME_SIZE 20              /* Inspired by #define in acpiphp.h */
+
+struct acpi_pci_slot {
+       acpi_handle root_handle;        /* handle of the root bridge */
+       struct pci_slot *pci_slot;      /* corresponding pci_slot */
+       struct list_head list;          /* node in the list of slots */
+};
+
+static int acpi_pci_slot_add(acpi_handle handle);
+static void acpi_pci_slot_remove(acpi_handle handle);
+
+static LIST_HEAD(slot_list);
+static DEFINE_MUTEX(slot_list_lock);
+static struct acpi_pci_driver acpi_pci_slot_driver = {
+       .add = acpi_pci_slot_add,
+       .remove = acpi_pci_slot_remove,
+};
+
+static int
+check_slot(acpi_handle handle, int *device, unsigned long *sun)
+{
+       int retval = 0;
+       unsigned long adr, sta;
+       acpi_status status;
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+       acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+       dbg("Checking slot on path: %s\n", (char *)buffer.pointer);
+
+       if (check_sta_before_sun) {
+               /* If SxFy doesn't have _STA, we just assume it's there */
+               status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
+               if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
+                       retval = -1;
+                       goto out;
+               }
+       }
+
+       status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
+       if (ACPI_FAILURE(status)) {
+               dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer);
+               retval = -1;
+               goto out;
+       }
+
+       *device = (adr >> 16) & 0xffff;
+
+       /* No _SUN == not a slot == bail */
+       status = acpi_evaluate_integer(handle, "_SUN", NULL, sun);
+       if (ACPI_FAILURE(status)) {
+               dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer);
+               retval = -1;
+               goto out;
+       }
+
+out:
+       kfree(buffer.pointer);
+       return retval;
+}
+
+struct callback_args {
+       acpi_walk_callback      user_function;  /* only for walk_p2p_bridge */
+       struct pci_bus          *pci_bus;
+       acpi_handle             root_handle;
+};
+
+/*
+ * register_slot
+ *
+ * Called once for each SxFy object in the namespace. Don't worry about
+ * calling pci_create_slot multiple times for the same pci_bus:device,
+ * since each subsequent call simply bumps the refcount on the pci_slot.
+ *
+ * The number of calls to pci_destroy_slot from unregister_slot is
+ * symmetrical.
+ */
+static acpi_status
+register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+       int device;
+       unsigned long sun;
+       char name[SLOT_NAME_SIZE];
+       struct acpi_pci_slot *slot;
+       struct pci_slot *pci_slot;
+       struct callback_args *parent_context = context;
+       struct pci_bus *pci_bus = parent_context->pci_bus;
+
+       if (check_slot(handle, &device, &sun))
+               return AE_OK;
+
+       slot = kmalloc(sizeof(*slot), GFP_KERNEL);
+       if (!slot) {
+               err("%s: cannot allocate memory\n", __func__);
+               return AE_OK;
+       }
+
+       snprintf(name, sizeof(name), "%u", (u32)sun);
+       pci_slot = pci_create_slot(pci_bus, device, name);
+       if (IS_ERR(pci_slot)) {
+               err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot));
+               kfree(slot);
+       }
+
+       slot->root_handle = parent_context->root_handle;
+       slot->pci_slot = pci_slot;
+       INIT_LIST_HEAD(&slot->list);
+       mutex_lock(&slot_list_lock);
+       list_add(&slot->list, &slot_list);
+       mutex_unlock(&slot_list_lock);
+
+       dbg("pci_slot: %p, pci_bus: %x, device: %d, name: %s\n",
+               pci_slot, pci_bus->number, device, name);
+
+       return AE_OK;
+}
+
+/*
+ * walk_p2p_bridge - discover and walk p2p bridges
+ * @handle: points to an acpi_pci_root
+ * @context: p2p_bridge_context pointer
+ *
+ * Note that when we call ourselves recursively, we pass a different
+ * value of pci_bus in the child_context.
+ */
+static acpi_status
+walk_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+       int device, function;
+       unsigned long adr;
+       acpi_status status;
+       acpi_handle dummy_handle;
+       acpi_walk_callback user_function;
+
+       struct pci_dev *dev;
+       struct pci_bus *pci_bus;
+       struct callback_args child_context;
+       struct callback_args *parent_context = context;
+
+       pci_bus = parent_context->pci_bus;
+       user_function = parent_context->user_function;
+
+       status = acpi_get_handle(handle, "_ADR", &dummy_handle);
+       if (ACPI_FAILURE(status))
+               return AE_OK;
+
+       status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
+       if (ACPI_FAILURE(status))
+               return AE_OK;
+
+       device = (adr >> 16) & 0xffff;
+       function = adr & 0xffff;
+
+       dev = pci_get_slot(pci_bus, PCI_DEVFN(device, function));
+       if (!dev || !dev->subordinate)
+               goto out;
+
+       child_context.pci_bus = dev->subordinate;
+       child_context.user_function = user_function;
+       child_context.root_handle = parent_context->root_handle;
+
+       dbg("p2p bridge walk, pci_bus = %x\n", dev->subordinate->number);
+       status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+                                    user_function, &child_context, NULL);
+       if (ACPI_FAILURE(status))
+               goto out;
+
+       status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+                                    walk_p2p_bridge, &child_context, NULL);
+out:
+       pci_dev_put(dev);
+       return AE_OK;
+}
+
+/*
+ * walk_root_bridge - generic root bridge walker
+ * @handle: points to an acpi_pci_root
+ * @user_function: user callback for slot objects
+ *
+ * Call user_function for all objects underneath this root bridge.
+ * Walk p2p bridges underneath us and call user_function on those too.
+ */
+static int
+walk_root_bridge(acpi_handle handle, acpi_walk_callback user_function)
+{
+       int seg, bus;
+       unsigned long tmp;
+       acpi_status status;
+       acpi_handle dummy_handle;
+       struct pci_bus *pci_bus;
+       struct callback_args context;
+
+       /* If the bridge doesn't have _STA, we assume it is always there */
+       status = acpi_get_handle(handle, "_STA", &dummy_handle);
+       if (ACPI_SUCCESS(status)) {
+               status = acpi_evaluate_integer(handle, "_STA", NULL, &tmp);
+               if (ACPI_FAILURE(status)) {
+                       info("%s: _STA evaluation failure\n", __func__);
+                       return 0;
+               }
+               if ((tmp & ACPI_STA_DEVICE_FUNCTIONING) == 0)
+                       /* don't register this object */
+                       return 0;
+       }
+
+       status = acpi_evaluate_integer(handle, "_SEG", NULL, &tmp);
+       seg = ACPI_SUCCESS(status) ? tmp : 0;
+
+       status = acpi_evaluate_integer(handle, "_BBN", NULL, &tmp);
+       bus = ACPI_SUCCESS(status) ? tmp : 0;
+
+       pci_bus = pci_find_bus(seg, bus);
+       if (!pci_bus)
+               return 0;
+
+       context.pci_bus = pci_bus;
+       context.user_function = user_function;
+       context.root_handle = handle;
+
+       dbg("root bridge walk, pci_bus = %x\n", pci_bus->number);
+       status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+                                    user_function, &context, NULL);
+       if (ACPI_FAILURE(status))
+               return status;
+
+       status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+                                    walk_p2p_bridge, &context, NULL);
+       if (ACPI_FAILURE(status))
+               err("%s: walk_p2p_bridge failure - %d\n", __func__, status);
+
+       return status;
+}
+
+/*
+ * acpi_pci_slot_add
+ * @handle: points to an acpi_pci_root
+ */
+static int
+acpi_pci_slot_add(acpi_handle handle)
+{
+       acpi_status status;
+
+       status = walk_root_bridge(handle, register_slot);
+       if (ACPI_FAILURE(status))
+               err("%s: register_slot failure - %d\n", __func__, status);
+
+       return status;
+}
+
+/*
+ * acpi_pci_slot_remove
+ * @handle: points to an acpi_pci_root
+ */
+static void
+acpi_pci_slot_remove(acpi_handle handle)
+{
+       struct acpi_pci_slot *slot, *tmp;
+
+       mutex_lock(&slot_list_lock);
+       list_for_each_entry_safe(slot, tmp, &slot_list, list) {
+               if (slot->root_handle == handle) {
+                       list_del(&slot->list);
+                       pci_destroy_slot(slot->pci_slot);
+                       kfree(slot);
+               }
+       }
+       mutex_unlock(&slot_list_lock);
+}
+
+static int do_sta_before_sun(const struct dmi_system_id *d)
+{
+       info("%s detected: will evaluate _STA before calling _SUN\n", d->ident);
+       check_sta_before_sun = 1;
+       return 0;
+}
+
+static struct dmi_system_id acpi_pci_slot_dmi_table[] __initdata = {
+       /*
+        * Fujitsu Primequest machines will return 1023 to indicate an
+        * error if the _SUN method is evaluated on SxFy objects that
+        * are not present (as indicated by _STA), so for those machines,
+        * we want to check _STA before evaluating _SUN.
+        */
+       {
+        .callback = do_sta_before_sun,
+        .ident = "Fujitsu PRIMEQUEST",
+        .matches = {
+               DMI_MATCH(DMI_BIOS_VENDOR, "FUJITSU LIMITED"),
+               DMI_MATCH(DMI_BIOS_VERSION, "PRIMEQUEST"),
+               },
+       },
+       {}
+};
+
+static int __init
+acpi_pci_slot_init(void)
+{
+       dmi_check_system(acpi_pci_slot_dmi_table);
+       acpi_pci_register_driver(&acpi_pci_slot_driver);
+       return 0;
+}
+
+static void __exit
+acpi_pci_slot_exit(void)
+{
+       acpi_pci_unregister_driver(&acpi_pci_slot_driver);
+}
+
+module_init(acpi_pci_slot_init);
+module_exit(acpi_pci_slot_exit);
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c

index 81e4f081a4aefd2d53d3004fae15d0935610bb90..4ab21cb1c8c7ebde037b51256de4c326efcfd7f6 100644 (file)
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -292,69 +292,135 @@ static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev)
         return 0;
  }
  
+/**
+ * acpi_device_sleep_wake - execute _DSW (Device Sleep Wake) or (deprecated in
+ *                          ACPI 3.0) _PSW (Power State Wake)
+ * @dev: Device to handle.
+ * @enable: 0 - disable, 1 - enable the wake capabilities of the device.
+ * @sleep_state: Target sleep state of the system.
+ * @dev_state: Target power state of the device.
+ *
+ * Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power
+ * State Wake) for the device, if present.  On failure reset the device's
+ * wakeup.flags.valid flag.
+ *
+ * RETURN VALUE:
+ * 0 if either _DSW or _PSW has been successfully executed
+ * 0 if neither _DSW nor _PSW has been found
+ * -ENODEV if the execution of either _DSW or _PSW has failed
+ */
+int acpi_device_sleep_wake(struct acpi_device *dev,
+                           int enable, int sleep_state, int dev_state)
+{
+       union acpi_object in_arg[3];
+       struct acpi_object_list arg_list = { 3, in_arg };
+       acpi_status status = AE_OK;
+
+       /*
+        * Try to execute _DSW first.
+        *
+        * Three agruments are needed for the _DSW object:
+        * Argument 0: enable/disable the wake capabilities
+        * Argument 1: target system state
+        * Argument 2: target device state
+        * When _DSW object is called to disable the wake capabilities, maybe
+        * the first argument is filled. The values of the other two agruments
+        * are meaningless.
+        */
+       in_arg[0].type = ACPI_TYPE_INTEGER;
+       in_arg[0].integer.value = enable;
+       in_arg[1].type = ACPI_TYPE_INTEGER;
+       in_arg[1].integer.value = sleep_state;
+       in_arg[2].type = ACPI_TYPE_INTEGER;
+       in_arg[2].integer.value = dev_state;
+       status = acpi_evaluate_object(dev->handle, "_DSW", &arg_list, NULL);
+       if (ACPI_SUCCESS(status)) {
+               return 0;
+       } else if (status != AE_NOT_FOUND) {
+               printk(KERN_ERR PREFIX "_DSW execution failed\n");
+               dev->wakeup.flags.valid = 0;
+               return -ENODEV;
+       }
+
+       /* Execute _PSW */
+       arg_list.count = 1;
+       in_arg[0].integer.value = enable;
+       status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL);
+       if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
+               printk(KERN_ERR PREFIX "_PSW execution failed\n");
+               dev->wakeup.flags.valid = 0;
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
  /*
   * Prepare a wakeup device, two steps (Ref ACPI 2.0:P229):
   * 1. Power on the power resources required for the wakeup device 
- * 2. Enable _PSW (power state wake) for the device if present
+ * 2. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power
+ *    State Wake) for the device, if present
   */
-int acpi_enable_wakeup_device_power(struct acpi_device *dev)
+int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
  {
-       union acpi_object arg = { ACPI_TYPE_INTEGER };
-       struct acpi_object_list arg_list = { 1, &arg };
-       acpi_status status = AE_OK;
-       int i;
-       int ret = 0;
+       int i, err;
  
         if (!dev || !dev->wakeup.flags.valid)
-               return -1;
+               return -EINVAL;
+
+       /*
+        * Do not execute the code below twice in a row without calling
+        * acpi_disable_wakeup_device_power() in between for the same device
+        */
+       if (dev->wakeup.flags.prepared)
+               return 0;
  
-       arg.integer.value = 1;
         /* Open power resource */
         for (i = 0; i < dev->wakeup.resources.count; i++) {
-               ret = acpi_power_on(dev->wakeup.resources.handles[i], dev);
+               int ret = acpi_power_on(dev->wakeup.resources.handles[i], dev);
                 if (ret) {
                         printk(KERN_ERR PREFIX "Transition power state\n");
                         dev->wakeup.flags.valid = 0;
-                       return -1;
+                       return -ENODEV;
                 }
         }
  
-       /* Execute PSW */
-       status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL);
-       if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
-               printk(KERN_ERR PREFIX "Evaluate _PSW\n");
-               dev->wakeup.flags.valid = 0;
-               ret = -1;
-       }
+       /*
+        * Passing 3 as the third argument below means the device may be placed
+        * in arbitrary power state afterwards.
+        */
+       err = acpi_device_sleep_wake(dev, 1, sleep_state, 3);
+       if (!err)
+               dev->wakeup.flags.prepared = 1;
  
-       return ret;
+       return err;
  }
  
  /*
   * Shutdown a wakeup device, counterpart of above method
- * 1. Disable _PSW (power state wake)
+ * 1. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power
+ *    State Wake) for the device, if present
   * 2. Shutdown down the power resources
   */
  int acpi_disable_wakeup_device_power(struct acpi_device *dev)
  {
-       union acpi_object arg = { ACPI_TYPE_INTEGER };
-       struct acpi_object_list arg_list = { 1, &arg };
-       acpi_status status = AE_OK;
-       int i;
-       int ret = 0;
-
+       int i, ret;
  
         if (!dev || !dev->wakeup.flags.valid)
-               return -1;
+               return -EINVAL;
  
-       arg.integer.value = 0;
-       /* Execute PSW */
-       status = acpi_evaluate_object(dev->handle, "_PSW", &arg_list, NULL);
-       if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
-               printk(KERN_ERR PREFIX "Evaluate _PSW\n");
-               dev->wakeup.flags.valid = 0;
-               return -1;
-       }
+       /*
+        * Do not execute the code below twice in a row without calling
+        * acpi_enable_wakeup_device_power() in between for the same device
+        */
+       if (!dev->wakeup.flags.prepared)
+               return 0;
+
+       dev->wakeup.flags.prepared = 0;
+
+       ret = acpi_device_sleep_wake(dev, 0, 0, 0);
+       if (ret)
+               return ret;
  
         /* Close power resource */
         for (i = 0; i < dev->wakeup.resources.count; i++) {
@@ -362,7 +428,7 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
                 if (ret) {
                         printk(KERN_ERR PREFIX "Transition power state\n");
                         dev->wakeup.flags.valid = 0;
-                       return -1;
+                       return -ENODEV;
                 }
         }
  
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c

index 9dd0fa93b9e1c1e2a9bd459638918ba47c25125e..ec0f2d581ece851d67e98f51f4107552f9146f50 100644 (file)
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -118,8 +118,31 @@ static const struct file_operations acpi_processor_info_fops = {
         .release = single_release,
  };
  
-struct acpi_processor *processors[NR_CPUS];
+DEFINE_PER_CPU(struct acpi_processor *, processors);
  struct acpi_processor_errata errata __read_mostly;
+static int set_no_mwait(const struct dmi_system_id *id)
+{
+       printk(KERN_NOTICE PREFIX "%s detected - "
+               "disable mwait for CPU C-stetes\n", id->ident);
+       idle_nomwait = 1;
+       return 0;
+}
+
+static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
+       {
+       set_no_mwait, "IFL91 board", {
+       DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
+       DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
+       DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
+       DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
+       {
+       set_no_mwait, "Extensa 5220", {
+       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+       DMI_MATCH(DMI_SYS_VENDOR, "ACER"),
+       DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+       DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
+       {},
+};
  
  /* --------------------------------------------------------------------------
                                  Errata Handling
@@ -265,7 +288,20 @@ static int acpi_processor_set_pdc(struct acpi_processor *pr)
  
         if (!pdc_in)
                 return status;
+       if (idle_nomwait) {
+               /*
+                * If mwait is disabled for CPU C-states, the C2C3_FFH access
+                * mode will be disabled in the parameter of _PDC object.
+                * Of course C1_FFH access mode will also be disabled.
+                */
+               union acpi_object *obj;
+               u32 *buffer = NULL;
  
+               obj = pdc_in->pointer;
+               buffer = (u32 *)(obj->buffer.pointer);
+               buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
+
+       }
         status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
  
         if (ACPI_FAILURE(status))
@@ -614,14 +650,14 @@ static int acpi_processor_get_info(struct acpi_processor *pr, unsigned has_uid)
         return 0;
  }
  
-static void *processor_device_array[NR_CPUS];
+static DEFINE_PER_CPU(void *, processor_device_array);
  
  static int __cpuinit acpi_processor_start(struct acpi_device *device)
  {
         int result = 0;
         acpi_status status = AE_OK;
         struct acpi_processor *pr;
-
+       struct sys_device *sysdev;
  
         pr = acpi_driver_data(device);
  
@@ -638,20 +674,24 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
          * ACPI id of processors can be reported wrongly by the BIOS.
          * Don't trust it blindly
          */
-       if (processor_device_array[pr->id] != NULL &&
-           processor_device_array[pr->id] != device) {
+       if (per_cpu(processor_device_array, pr->id) != NULL &&
+           per_cpu(processor_device_array, pr->id) != device) {
                 printk(KERN_WARNING "BIOS reported wrong ACPI id "
                         "for the processor\n");
                 return -ENODEV;
         }
-       processor_device_array[pr->id] = device;
+       per_cpu(processor_device_array, pr->id) = device;
  
-       processors[pr->id] = pr;
+       per_cpu(processors, pr->id) = pr;
  
         result = acpi_processor_add_fs(device);
         if (result)
                 goto end;
  
+       sysdev = get_cpu_sysdev(pr->id);
+       if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev"))
+               return -EFAULT;
+
         status = acpi_install_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY,
                                              acpi_processor_notify, pr);
  
@@ -749,7 +789,7 @@ static int acpi_cpu_soft_notify(struct notifier_block *nfb,
                 unsigned long action, void *hcpu)
  {
         unsigned int cpu = (unsigned long)hcpu;
-       struct acpi_processor *pr = processors[cpu];
+       struct acpi_processor *pr = per_cpu(processors, cpu);
  
         if (action == CPU_ONLINE && pr) {
                 acpi_processor_ppc_has_changed(pr);
@@ -810,6 +850,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type)
         status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY,
                                             acpi_processor_notify);
  
+       sysfs_remove_link(&device->dev.kobj, "sysdev");
+
         acpi_processor_remove_fs(device);
  
         if (pr->cdev) {
@@ -819,8 +861,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type)
                 pr->cdev = NULL;
         }
  
-       processors[pr->id] = NULL;
-       processor_device_array[pr->id] = NULL;
+       per_cpu(processors, pr->id) = NULL;
+       per_cpu(processor_device_array, pr->id) = NULL;
         kfree(pr);
  
         return 0;
@@ -1014,9 +1056,9 @@ static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu)
  
  static int acpi_processor_handle_eject(struct acpi_processor *pr)
  {
-       if (cpu_online(pr->id)) {
-               return (-EINVAL);
-       }
+       if (cpu_online(pr->id))
+               cpu_down(pr->id);
+
         arch_unregister_cpu(pr->id);
         acpi_unmap_lsapic(pr->id);
         return (0);
@@ -1068,8 +1110,6 @@ static int __init acpi_processor_init(void)
  {
         int result = 0;
  
-
-       memset(&processors, 0, sizeof(processors));
         memset(&errata, 0, sizeof(errata));
  
  #ifdef CONFIG_SMP
@@ -1083,6 +1123,11 @@ static int __init acpi_processor_init(void)
                 return -ENOMEM;
         acpi_processor_dir->owner = THIS_MODULE;
  
+       /*
+        * Check whether the system is DMI table. If yes, OSPM
+        * should not use mwait for CPU-states.
+        */
+       dmi_check_system(processor_idle_dmi_table);
         result = cpuidle_register_driver(&acpi_idle_driver);
         if (result < 0)
                 goto out_proc;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c

index 4976e5db2b3f3436026d7a5c341f1abec92f300c..d592dbb1d12acb04b029c1357b1516715890cefe 100644 (file)
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -41,6 +41,7 @@
  #include <linux/pm_qos_params.h>
  #include <linux/clockchips.h>
  #include <linux/cpuidle.h>
+#include <linux/cpuidle.h>
  
  /*
   * Include the apic definitions for x86 to have the APIC timer related defines
@@ -57,6 +58,7 @@
  
  #include <acpi/acpi_bus.h>
  #include <acpi/processor.h>
+#include <asm/processor.h>
  
  #define ACPI_PROCESSOR_COMPONENT        0x01000000
  #define ACPI_PROCESSOR_CLASS            "processor"
@@ -401,7 +403,7 @@ static void acpi_processor_idle(void)
          */
         local_irq_disable();
  
-       pr = processors[smp_processor_id()];
+       pr = __get_cpu_var(processors);
         if (!pr) {
                 local_irq_enable();
                 return;
@@ -955,6 +957,21 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
                         } else {
                                 continue;
                         }
+                       if (cx.type == ACPI_STATE_C1 &&
+                                       (idle_halt || idle_nomwait)) {
+                               /*
+                                * In most cases the C1 space_id obtained from
+                                * _CST object is FIXED_HARDWARE access mode.
+                                * But when the option of idle=halt is added,
+                                * the entry_method type should be changed from
+                                * CSTATE_FFH to CSTATE_HALT.
+                                * When the option of idle=nomwait is added,
+                                * the C1 entry_method type should be
+                                * CSTATE_HALT.
+                                */
+                               cx.entry_method = ACPI_CSTATE_HALT;
+                               snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
+                       }
                 } else {
                         snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
                                  cx.address);
@@ -1431,7 +1448,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
         struct acpi_processor *pr;
         struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
  
-       pr = processors[smp_processor_id()];
+       pr = __get_cpu_var(processors);
  
         if (unlikely(!pr))
                 return 0;
@@ -1471,7 +1488,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
         u32 t1, t2;
         int sleep_ticks = 0;
  
-       pr = processors[smp_processor_id()];
+       pr = __get_cpu_var(processors);
  
         if (unlikely(!pr))
                 return 0;
@@ -1549,7 +1566,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
         u32 t1, t2;
         int sleep_ticks = 0;
  
-       pr = processors[smp_processor_id()];
+       pr = __get_cpu_var(processors);
  
         if (unlikely(!pr))
                 return 0;
@@ -1780,6 +1797,15 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
                 return 0;
  
         if (!first_run) {
+               if (idle_halt) {
+                       /*
+                        * When the boot option of "idle=halt" is added, halt
+                        * is used for CPU IDLE.
+                        * In such case C2/C3 is meaningless. So the max_cstate
+                        * is set to one.
+                        */
+                       max_cstate = 1;
+               }
                 dmi_check_system(processor_power_dmi_table);
                 max_cstate = acpi_processor_cstate_check(max_cstate);
                 if (max_cstate < ACPI_C_STATES_MAX)
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c

index d80b2d1441afbadb561cf58abfdb135dd20aeb8e..b4749969c6b40ab10ac7d9f624adfdac4ac1d637 100644 (file)
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -89,7 +89,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
         if (event != CPUFREQ_INCOMPATIBLE)
                 goto out;
  
-       pr = processors[policy->cpu];
+       pr = per_cpu(processors, policy->cpu);
         if (!pr || !pr->performance)
                 goto out;
  
@@ -572,7 +572,7 @@ int acpi_processor_preregister_performance(
  
         /* Call _PSD for all CPUs */
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr) {
                         /* Look only at processors in ACPI namespace */
                         continue;
@@ -603,7 +603,7 @@ int acpi_processor_preregister_performance(
          * domain info.
          */
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr)
                         continue;
  
@@ -624,7 +624,7 @@ int acpi_processor_preregister_performance(
  
         cpus_clear(covered_cpus);
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr)
                         continue;
  
@@ -651,7 +651,7 @@ int acpi_processor_preregister_performance(
                         if (i == j)
                                 continue;
  
-                       match_pr = processors[j];
+                       match_pr = per_cpu(processors, j);
                         if (!match_pr)
                                 continue;
  
@@ -680,7 +680,7 @@ int acpi_processor_preregister_performance(
                         if (i == j)
                                 continue;
  
-                       match_pr = processors[j];
+                       match_pr = per_cpu(processors, j);
                         if (!match_pr)
                                 continue;
  
@@ -697,7 +697,7 @@ int acpi_processor_preregister_performance(
  
  err_ret:
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr || !pr->performance)
                         continue;
  
@@ -728,7 +728,7 @@ acpi_processor_register_performance(struct acpi_processor_performance
  
         mutex_lock(&performance_mutex);
  
-       pr = processors[cpu];
+       pr = per_cpu(processors, cpu);
         if (!pr) {
                 mutex_unlock(&performance_mutex);
                 return -ENODEV;
@@ -766,7 +766,7 @@ acpi_processor_unregister_performance(struct acpi_processor_performance
  
         mutex_lock(&performance_mutex);
  
-       pr = processors[cpu];
+       pr = per_cpu(processors, cpu);
         if (!pr) {
                 mutex_unlock(&performance_mutex);
                 return;
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c

index bb06738860c4113599ab185435834253d8af4afb..0622ace05220b26d02ff6b72c49155a67e71213c 100644 (file)
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -71,7 +71,7 @@ static int acpi_processor_update_tsd_coord(void)
          * coordination between all CPUs.
          */
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr)
                         continue;
  
@@ -93,7 +93,7 @@ static int acpi_processor_update_tsd_coord(void)
  
         cpus_clear(covered_cpus);
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr)
                         continue;
  
@@ -119,7 +119,7 @@ static int acpi_processor_update_tsd_coord(void)
                         if (i == j)
                                 continue;
  
-                       match_pr = processors[j];
+                       match_pr = per_cpu(processors, j);
                         if (!match_pr)
                                 continue;
  
@@ -152,7 +152,7 @@ static int acpi_processor_update_tsd_coord(void)
                         if (i == j)
                                 continue;
  
-                       match_pr = processors[j];
+                       match_pr = per_cpu(processors, j);
                         if (!match_pr)
                                 continue;
  
@@ -172,7 +172,7 @@ static int acpi_processor_update_tsd_coord(void)
  
  err_ret:
         for_each_possible_cpu(i) {
-               pr = processors[i];
+               pr = per_cpu(processors, i);
                 if (!pr)
                         continue;
  
@@ -214,7 +214,7 @@ static int acpi_processor_throttling_notifier(unsigned long event, void *data)
         struct acpi_processor_throttling *p_throttling;
  
         cpu = p_tstate->cpu;
-       pr = processors[cpu];
+       pr = per_cpu(processors, cpu);
         if (!pr) {
                 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Invalid pr pointer\n"));
                 return 0;
@@ -1035,7 +1035,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
                  * cpus.
                  */
                 for_each_cpu_mask(i, online_throttling_cpus) {
-                       match_pr = processors[i];
+                       match_pr = per_cpu(processors, i);
                         /*
                          * If the pointer is invalid, we will report the
                          * error message and continue.
@@ -1232,7 +1232,10 @@ static ssize_t acpi_processor_write_throttling(struct file *file,
         int result = 0;
         struct seq_file *m = file->private_data;
         struct acpi_processor *pr = m->private;
-       char state_string[12] = { '\0' };
+       char state_string[5] = "";
+       char *charp = NULL;
+       size_t state_val = 0;
+       char tmpbuf[5] = "";
  
         if (!pr || (count > sizeof(state_string) - 1))
                 return -EINVAL;
@@ -1241,10 +1244,23 @@ static ssize_t acpi_processor_write_throttling(struct file *file,
                 return -EFAULT;
  
         state_string[count] = '\0';
+       if ((count > 0) && (state_string[count-1] == '\n'))
+               state_string[count-1] = '\0';
  
-       result = acpi_processor_set_throttling(pr,
-                                              simple_strtoul(state_string,
-                                                             NULL, 0));
+       charp = state_string;
+       if ((state_string[0] == 't') || (state_string[0] == 'T'))
+               charp++;
+
+       state_val = simple_strtoul(charp, NULL, 0);
+       if (state_val >= pr->throttling.state_count)
+               return -EINVAL;
+
+       snprintf(tmpbuf, 5, "%zu", state_val);
+
+       if (strcmp(tmpbuf, charp) != 0)
+               return -EINVAL;
+
+       result = acpi_processor_set_throttling(pr, state_val);
         if (result)
                 return result;
  
diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c

new file mode 100644 (file)

index 0000000..a6b662c
--- /dev/null
+++ b/drivers/acpi/reboot.c
@@ -0,0 +1,50 @@
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <acpi/reboot.h>
+
+void acpi_reboot(void)
+{
+       struct acpi_generic_address *rr;
+       struct pci_bus *bus0;
+       u8 reset_value;
+       unsigned int devfn;
+
+       if (acpi_disabled)
+               return;
+
+       rr = &acpi_gbl_FADT.reset_register;
+
+       /* Is the reset register supported? */
+       if (!(acpi_gbl_FADT.flags & ACPI_FADT_RESET_REGISTER) ||
+           rr->bit_width != 8 || rr->bit_offset != 0)
+               return;
+
+       reset_value = acpi_gbl_FADT.reset_value;
+
+       /* The reset register can only exist in I/O, Memory or PCI config space
+        * on a device on bus 0. */
+       switch (rr->space_id) {
+       case ACPI_ADR_SPACE_PCI_CONFIG:
+               /* The reset register can only live on bus 0. */
+               bus0 = pci_find_bus(0, 0);
+               if (!bus0)
+                       return;
+               /* Form PCI device/function pair. */
+               devfn = PCI_DEVFN((rr->address >> 32) & 0xffff,
+                                 (rr->address >> 16) & 0xffff);
+               printk(KERN_DEBUG "Resetting with ACPI PCI RESET_REG.");
+               /* Write the value that resets us. */
+               pci_bus_write_config_byte(bus0, devfn,
+                               (rr->address & 0xffff), reset_value);
+               break;
+
+       case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+       case ACPI_ADR_SPACE_SYSTEM_IO:
+               printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n");
+               acpi_hw_low_level_write(8, reset_value, rr);
+               break;
+       }
+       /* Wait ten seconds */
+       acpi_os_stall(10000000);
+}
diff --git a/drivers/acpi/resources/rscalc.c b/drivers/acpi/resources/rscalc.c

index 8a112d11d491eebac39708e4d0d1cc59e3f4fc96..f61ebc679e6645514525a1f5d632c87acbdfd751 100644 (file)
--- a/drivers/acpi/resources/rscalc.c
+++ b/drivers/acpi/resources/rscalc.c
@@ -73,7 +73,7 @@ acpi_rs_stream_option_length(u32 resource_length, u32 minimum_total_length);
  
  static u8 acpi_rs_count_set_bits(u16 bit_field)
  {
-       acpi_native_uint bits_set;
+       u8 bits_set;
  
         ACPI_FUNCTION_ENTRY();
  
@@ -84,7 +84,7 @@ static u8 acpi_rs_count_set_bits(u16 bit_field)
                 bit_field &= (u16) (bit_field - 1);
         }
  
-       return ((u8) bits_set);
+       return bits_set;
  }
  
  /*******************************************************************************
diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/resources/rscreate.c

index faddaee1bc07529c38e5ce075a88e53077d6ab72..7804a8c40e7a36e0d1505ee7b10994f1ef1805f3 100644 (file)
--- a/drivers/acpi/resources/rscreate.c
+++ b/drivers/acpi/resources/rscreate.c
@@ -181,9 +181,9 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
         }
  
         /*
-        * Loop through the ACPI_INTERNAL_OBJECTS - Each object
-        * should be a package that in turn contains an
-        * acpi_integer Address, a u8 Pin, a Name and a u8 source_index.
+        * Loop through the ACPI_INTERNAL_OBJECTS - Each object should be a
+        * package that in turn contains an acpi_integer Address, a u8 Pin,
+        * a Name, and a u8 source_index.
          */
         top_object_list = package_object->package.elements;
         number_of_elements = package_object->package.count;
@@ -240,9 +240,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
                 /* 1) First subobject: Dereference the PRT.Address */
  
                 obj_desc = sub_object_list[0];
-               if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) {
-                       user_prt->address = obj_desc->integer.value;
-               } else {
+               if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) {
                         ACPI_ERROR((AE_INFO,
                                     "(PRT[%X].Address) Need Integer, found %s",
                                     index,
@@ -250,12 +248,12 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
                         return_ACPI_STATUS(AE_BAD_DATA);
                 }
  
+               user_prt->address = obj_desc->integer.value;
+
                 /* 2) Second subobject: Dereference the PRT.Pin */
  
                 obj_desc = sub_object_list[1];
-               if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) {
-                       user_prt->pin = (u32) obj_desc->integer.value;
-               } else {
+               if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) {
                         ACPI_ERROR((AE_INFO,
                                     "(PRT[%X].Pin) Need Integer, found %s",
                                     index,
@@ -284,6 +282,25 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
                         }
                 }
  
+               user_prt->pin = (u32) obj_desc->integer.value;
+
+               /*
+                * If the BIOS has erroneously reversed the _PRT source_name (index 2)
+                * and the source_index (index 3), fix it. _PRT is important enough to
+                * workaround this BIOS error. This also provides compatibility with
+                * other ACPI implementations.
+                */
+               obj_desc = sub_object_list[3];
+               if (!obj_desc
+                   || (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER)) {
+                       sub_object_list[3] = sub_object_list[2];
+                       sub_object_list[2] = obj_desc;
+
+                       ACPI_WARNING((AE_INFO,
+                                     "(PRT[%X].Source) SourceName and SourceIndex are reversed, fixed",
+                                     index));
+               }
+
                 /*
                  * 3) Third subobject: Dereference the PRT.source_name
                  * The name may be unresolved (slack mode), so allow a null object
@@ -364,9 +381,7 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
                 /* 4) Fourth subobject: Dereference the PRT.source_index */
  
                 obj_desc = sub_object_list[source_index_index];
-               if (ACPI_GET_OBJECT_TYPE(obj_desc) == ACPI_TYPE_INTEGER) {
-                       user_prt->source_index = (u32) obj_desc->integer.value;
-               } else {
+               if (ACPI_GET_OBJECT_TYPE(obj_desc) != ACPI_TYPE_INTEGER) {
                         ACPI_ERROR((AE_INFO,
                                     "(PRT[%X].SourceIndex) Need Integer, found %s",
                                     index,
@@ -374,6 +389,8 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
                         return_ACPI_STATUS(AE_BAD_DATA);
                 }
  
+               user_prt->source_index = (u32) obj_desc->integer.value;
+
                 /* Point to the next union acpi_operand_object in the top level package */
  
                 top_object_list++;
diff --git a/drivers/acpi/resources/rsmisc.c b/drivers/acpi/resources/rsmisc.c

index de1ac3881b224ab1121a5edd97409aecf7831071..96a6c0353255fed289e059dab481624f651681a3 100644 (file)
--- a/drivers/acpi/resources/rsmisc.c
+++ b/drivers/acpi/resources/rsmisc.c
@@ -82,7 +82,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource,
  
         ACPI_FUNCTION_TRACE(rs_convert_aml_to_resource);
  
-       if (((acpi_native_uint) resource) & 0x3) {
+       if (((acpi_size) resource) & 0x3) {
  
                 /* Each internal resource struct is expected to be 32-bit aligned */
  
diff --git a/drivers/acpi/resources/rsutils.c b/drivers/acpi/resources/rsutils.c

index befe2302f41b31b622dec454924a68ab5b847dd7..f7b3bcd59ba788e341c9b5aca199a108aeed873b 100644 (file)
--- a/drivers/acpi/resources/rsutils.c
+++ b/drivers/acpi/resources/rsutils.c
@@ -62,7 +62,7 @@ ACPI_MODULE_NAME("rsutils")
   ******************************************************************************/
  u8 acpi_rs_decode_bitmask(u16 mask, u8 * list)
  {
-       acpi_native_uint i;
+       u8 i;
         u8 bit_count;
  
         ACPI_FUNCTION_ENTRY();
@@ -71,7 +71,7 @@ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list)
  
         for (i = 0, bit_count = 0; mask; i++) {
                 if (mask & 0x0001) {
-                       list[bit_count] = (u8) i;
+                       list[bit_count] = i;
                         bit_count++;
                 }
  
@@ -96,8 +96,8 @@ u8 acpi_rs_decode_bitmask(u16 mask, u8 * list)
  
  u16 acpi_rs_encode_bitmask(u8 * list, u8 count)
  {
-       acpi_native_uint i;
-       acpi_native_uint mask;
+       u32 i;
+       u16 mask;
  
         ACPI_FUNCTION_ENTRY();
  
@@ -107,7 +107,7 @@ u16 acpi_rs_encode_bitmask(u8 * list, u8 count)
                 mask |= (0x1 << list[i]);
         }
  
-       return ((u16) mask);
+       return mask;
  }
  
  /*******************************************************************************
@@ -130,7 +130,7 @@ u16 acpi_rs_encode_bitmask(u8 * list, u8 count)
  void
  acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type)
  {
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_ENTRY();
  
@@ -679,7 +679,6 @@ acpi_rs_set_srs_method_data(struct acpi_namespace_node *node,
         info->prefix_node = node;
         info->pathname = METHOD_NAME__SRS;
         info->parameters = args;
-       info->parameter_type = ACPI_PARAM_ARGS;
         info->flags = ACPI_IGNORE_RETURN_VALUE;
  
         /*
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index 6d85289f1c12ad50fdf46ddd72a863c00d1070db..f3132aa47a69d7b0656d9fe4fa40e9c5a9f8edcf 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -6,6 +6,8 @@
  #include <linux/init.h>
  #include <linux/kernel.h>
  #include <linux/acpi.h>
+#include <linux/signal.h>
+#include <linux/kthread.h>
  
  #include <acpi/acpi_drivers.h>
  #include <acpi/acinterp.h>     /* for acpi_ex_eisa_id_to_string() */
@@ -92,17 +94,37 @@ acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, cha
  }
  static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL);
  
-static int acpi_eject_operation(acpi_handle handle, int lockable)
+static int acpi_bus_hot_remove_device(void *context)
  {
+       struct acpi_device *device;
+       acpi_handle handle = context;
         struct acpi_object_list arg_list;
         union acpi_object arg;
         acpi_status status = AE_OK;
  
-       /*
-        * TBD: evaluate _PS3?
-        */
+       if (acpi_bus_get_device(handle, &device))
+               return 0;
+
+       if (!device)
+               return 0;
+
+       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+               "Hot-removing device %s...\n", device->dev.bus_id));
+
  
-       if (lockable) {
+       if (acpi_bus_trim(device, 1)) {
+               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+                               "Removing device failed\n"));
+               return -1;
+       }
+
+       /* power off device */
+       status = acpi_evaluate_object(handle, "_PS3", NULL, NULL);
+       if (ACPI_FAILURE(status) && status != AE_NOT_FOUND)
+               ACPI_DEBUG_PRINT((ACPI_DB_WARN,
+                               "Power-off device failed\n"));
+
+       if (device->flags.lockable) {
                 arg_list.count = 1;
                 arg_list.pointer = &arg;
                 arg.type = ACPI_TYPE_INTEGER;
@@ -118,26 +140,22 @@ static int acpi_eject_operation(acpi_handle handle, int lockable)
         /*
          * TBD: _EJD support.
          */
-
         status = acpi_evaluate_object(handle, "_EJ0", &arg_list, NULL);
-       if (ACPI_FAILURE(status)) {
-               return (-ENODEV);
-       }
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
  
-       return (0);
+       return 0;
  }
  
  static ssize_t
  acpi_eject_store(struct device *d, struct device_attribute *attr,
                 const char *buf, size_t count)
  {
-       int result;
         int ret = count;
-       int islockable;
         acpi_status status;
-       acpi_handle handle;
         acpi_object_type type = 0;
         struct acpi_device *acpi_device = to_acpi_device(d);
+       struct task_struct *task;
  
         if ((!count) || (buf[0] != '1')) {
                 return -EINVAL;
@@ -154,18 +172,12 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
                 goto err;
         }
  
-       islockable = acpi_device->flags.lockable;
-       handle = acpi_device->handle;
-
-       result = acpi_bus_trim(acpi_device, 1);
-
-       if (!result)
-               result = acpi_eject_operation(handle, islockable);
-
-       if (result) {
-               ret = -EBUSY;
-       }
-      err:
+       /* remove the device in another thread to fix the deadlock issue */
+       task = kthread_run(acpi_bus_hot_remove_device,
+                               acpi_device->handle, "acpi_hot_remove_device");
+       if (IS_ERR(task))
+               ret = PTR_ERR(task);
+err:
         return ret;
  }
  
@@ -691,9 +703,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
         acpi_status status = 0;
         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
         union acpi_object *package = NULL;
-       union acpi_object in_arg[3];
-       struct acpi_object_list arg_list = { 3, in_arg };
-       acpi_status psw_status = AE_OK;
+       int psw_error;
  
         struct acpi_device_id button_device_ids[] = {
                 {"PNP0C0D", 0},
@@ -725,39 +735,11 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
          * So it is necessary to call _DSW object first. Only when it is not
          * present will the _PSW object used.
          */
-       /*
-        * Three agruments are needed for the _DSW object.
-        * Argument 0: enable/disable the wake capabilities
-        * When _DSW object is called to disable the wake capabilities, maybe
-        * the first argument is filled. The value of the other two agruments
-        * is meaningless.
-        */
-       in_arg[0].type = ACPI_TYPE_INTEGER;
-       in_arg[0].integer.value = 0;
-       in_arg[1].type = ACPI_TYPE_INTEGER;
-       in_arg[1].integer.value = 0;
-       in_arg[2].type = ACPI_TYPE_INTEGER;
-       in_arg[2].integer.value = 0;
-       psw_status = acpi_evaluate_object(device->handle, "_DSW",
-                                               &arg_list, NULL);
-       if (ACPI_FAILURE(psw_status) && (psw_status != AE_NOT_FOUND))
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO, "error in evaluate _DSW\n"));
-       /*
-        * When the _DSW object is not present, OSPM will call _PSW object.
-        */
-       if (psw_status == AE_NOT_FOUND) {
-               /*
-                * Only one agruments is required for the _PSW object.
-                * agrument 0: enable/disable the wake capabilities
-                */
-               arg_list.count = 1;
-               in_arg[0].integer.value = 0;
-               psw_status = acpi_evaluate_object(device->handle, "_PSW",
-                                               &arg_list, NULL);
-               if (ACPI_FAILURE(psw_status) && (psw_status != AE_NOT_FOUND))
-                       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "error in "
-                                               "evaluate _PSW\n"));
-       }
+       psw_error = acpi_device_sleep_wake(device, 0, 0, 0);
+       if (psw_error)
+               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+                               "error in _DSW or _PSW evaluation\n"));
+
         /* Power button, Lid switch always enable wakeup */
         if (!acpi_match_device_ids(device, button_device_ids))
                 device->wakeup.flags.run_wake = 1;
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c

index 495c63a3e0afb1fce2e4fcb6801020ad2f8413ad..0489a7d1d42c91dcf404c50c56e07c722a41ea1f 100644 (file)
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -24,10 +24,6 @@
  
  u8 sleep_states[ACPI_S_STATE_COUNT];
  
-#ifdef CONFIG_PM_SLEEP
-static u32 acpi_target_sleep_state = ACPI_STATE_S0;
-#endif
-
  static int acpi_sleep_prepare(u32 acpi_state)
  {
  #ifdef CONFIG_ACPI_SLEEP
@@ -49,9 +45,96 @@ static int acpi_sleep_prepare(u32 acpi_state)
         return 0;
  }
  
-#ifdef CONFIG_SUSPEND
-static struct platform_suspend_ops acpi_suspend_ops;
+#ifdef CONFIG_PM_SLEEP
+static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+
+/*
+ * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
+ * user to request that behavior by using the 'acpi_old_suspend_ordering'
+ * kernel command line option that causes the following variable to be set.
+ */
+static bool old_suspend_ordering;
  
+void __init acpi_old_suspend_ordering(void)
+{
+       old_suspend_ordering = true;
+}
+
+/**
+ *     acpi_pm_disable_gpes - Disable the GPEs.
+ */
+static int acpi_pm_disable_gpes(void)
+{
+       acpi_hw_disable_all_gpes();
+       return 0;
+}
+
+/**
+ *     __acpi_pm_prepare - Prepare the platform to enter the target state.
+ *
+ *     If necessary, set the firmware waking vector and do arch-specific
+ *     nastiness to get the wakeup code to the waking vector.
+ */
+static int __acpi_pm_prepare(void)
+{
+       int error = acpi_sleep_prepare(acpi_target_sleep_state);
+
+       if (error)
+               acpi_target_sleep_state = ACPI_STATE_S0;
+       return error;
+}
+
+/**
+ *     acpi_pm_prepare - Prepare the platform to enter the target sleep
+ *             state and disable the GPEs.
+ */
+static int acpi_pm_prepare(void)
+{
+       int error = __acpi_pm_prepare();
+
+       if (!error)
+               acpi_hw_disable_all_gpes();
+       return error;
+}
+
+/**
+ *     acpi_pm_finish - Instruct the platform to leave a sleep state.
+ *
+ *     This is called after we wake back up (or if entering the sleep state
+ *     failed).
+ */
+static void acpi_pm_finish(void)
+{
+       u32 acpi_state = acpi_target_sleep_state;
+
+       if (acpi_state == ACPI_STATE_S0)
+               return;
+
+       printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n",
+               acpi_state);
+       acpi_disable_wakeup_device(acpi_state);
+       acpi_leave_sleep_state(acpi_state);
+
+       /* reset firmware waking vector */
+       acpi_set_firmware_waking_vector((acpi_physical_address) 0);
+
+       acpi_target_sleep_state = ACPI_STATE_S0;
+}
+
+/**
+ *     acpi_pm_end - Finish up suspend sequence.
+ */
+static void acpi_pm_end(void)
+{
+       /*
+        * This is necessary in case acpi_pm_finish() is not called during a
+        * failing transition to a sleep state.
+        */
+       acpi_target_sleep_state = ACPI_STATE_S0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_SUSPEND
  extern void do_suspend_lowlevel(void);
  
  static u32 acpi_suspend_states[] = {
@@ -61,13 +144,10 @@ static u32 acpi_suspend_states[] = {
         [PM_SUSPEND_MAX] = ACPI_STATE_S5
  };
  
-static int init_8259A_after_S1;
-
  /**
   *     acpi_suspend_begin - Set the target system sleep state to the state
   *             associated with given @pm_state, if supported.
   */
-
  static int acpi_suspend_begin(suspend_state_t pm_state)
  {
         u32 acpi_state = acpi_suspend_states[pm_state];
@@ -83,25 +163,6 @@ static int acpi_suspend_begin(suspend_state_t pm_state)
         return error;
  }
  
-/**
- *     acpi_suspend_prepare - Do preliminary suspend work.
- *
- *     If necessary, set the firmware waking vector and do arch-specific
- *     nastiness to get the wakeup code to the waking vector.
- */
-
-static int acpi_suspend_prepare(void)
-{
-       int error = acpi_sleep_prepare(acpi_target_sleep_state);
-
-       if (error) {
-               acpi_target_sleep_state = ACPI_STATE_S0;
-               return error;
-       }
-
-       return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT;
-}
-
  /**
   *     acpi_suspend_enter - Actually enter a sleep state.
   *     @pm_state: ignored
@@ -110,7 +171,6 @@ static int acpi_suspend_prepare(void)
   *     assembly, which in turn call acpi_enter_sleep_state().
   *     It's unfortunate, but it works. Please fix if you're feeling frisky.
   */
-
  static int acpi_suspend_enter(suspend_state_t pm_state)
  {
         acpi_status status = AE_OK;
@@ -167,46 +227,6 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
         return ACPI_SUCCESS(status) ? 0 : -EFAULT;
  }
  
-/**
- *     acpi_suspend_finish - Instruct the platform to leave a sleep state.
- *
- *     This is called after we wake back up (or if entering the sleep state
- *     failed). 
- */
-
-static void acpi_suspend_finish(void)
-{
-       u32 acpi_state = acpi_target_sleep_state;
-
-       acpi_disable_wakeup_device(acpi_state);
-       acpi_leave_sleep_state(acpi_state);
-
-       /* reset firmware waking vector */
-       acpi_set_firmware_waking_vector((acpi_physical_address) 0);
-
-       acpi_target_sleep_state = ACPI_STATE_S0;
-
-#ifdef CONFIG_X86
-       if (init_8259A_after_S1) {
-               printk("Broken toshiba laptop -> kicking interrupts\n");
-               init_8259A(0);
-       }
-#endif
-}
-
-/**
- *     acpi_suspend_end - Finish up suspend sequence.
- */
-
-static void acpi_suspend_end(void)
-{
-       /*
-        * This is necessary in case acpi_suspend_finish() is not called during a
-        * failing transition to a sleep state.
-        */
-       acpi_target_sleep_state = ACPI_STATE_S0;
-}
-
  static int acpi_suspend_state_valid(suspend_state_t pm_state)
  {
         u32 acpi_state;
@@ -226,30 +246,39 @@ static int acpi_suspend_state_valid(suspend_state_t pm_state)
  static struct platform_suspend_ops acpi_suspend_ops = {
         .valid = acpi_suspend_state_valid,
         .begin = acpi_suspend_begin,
-       .prepare = acpi_suspend_prepare,
+       .prepare = acpi_pm_prepare,
         .enter = acpi_suspend_enter,
-       .finish = acpi_suspend_finish,
-       .end = acpi_suspend_end,
+       .finish = acpi_pm_finish,
+       .end = acpi_pm_end,
  };
  
-/*
- * Toshiba fails to preserve interrupts over S1, reinitialization
- * of 8259 is needed after S1 resume.
+/**
+ *     acpi_suspend_begin_old - Set the target system sleep state to the
+ *             state associated with given @pm_state, if supported, and
+ *             execute the _PTS control method.  This function is used if the
+ *             pre-ACPI 2.0 suspend ordering has been requested.
   */
-static int __init init_ints_after_s1(const struct dmi_system_id *d)
+static int acpi_suspend_begin_old(suspend_state_t pm_state)
  {
-       printk(KERN_WARNING "%s with broken S1 detected.\n", d->ident);
-       init_8259A_after_S1 = 1;
-       return 0;
+       int error = acpi_suspend_begin(pm_state);
+
+       if (!error)
+               error = __acpi_pm_prepare();
+       return error;
  }
  
-static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
-       {
-        .callback = init_ints_after_s1,
-        .ident = "Toshiba Satellite 4030cdt",
-        .matches = {DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),},
-        },
-       {},
+/*
+ * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has
+ * been requested.
+ */
+static struct platform_suspend_ops acpi_suspend_ops_old = {
+       .valid = acpi_suspend_state_valid,
+       .begin = acpi_suspend_begin_old,
+       .prepare = acpi_pm_disable_gpes,
+       .enter = acpi_suspend_enter,
+       .finish = acpi_pm_finish,
+       .end = acpi_pm_end,
+       .recover = acpi_pm_finish,
  };
  #endif /* CONFIG_SUSPEND */
  
@@ -257,22 +286,9 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
  static int acpi_hibernation_begin(void)
  {
         acpi_target_sleep_state = ACPI_STATE_S4;
-
         return 0;
  }
  
-static int acpi_hibernation_prepare(void)
-{
-       int error = acpi_sleep_prepare(ACPI_STATE_S4);
-
-       if (error) {
-               acpi_target_sleep_state = ACPI_STATE_S0;
-               return error;
-       }
-
-       return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT;
-}
-
  static int acpi_hibernation_enter(void)
  {
         acpi_status status = AE_OK;
@@ -302,52 +318,55 @@ static void acpi_hibernation_leave(void)
         acpi_leave_sleep_state_prep(ACPI_STATE_S4);
  }
  
-static void acpi_hibernation_finish(void)
+static void acpi_pm_enable_gpes(void)
  {
-       acpi_disable_wakeup_device(ACPI_STATE_S4);
-       acpi_leave_sleep_state(ACPI_STATE_S4);
-
-       /* reset firmware waking vector */
-       acpi_set_firmware_waking_vector((acpi_physical_address) 0);
-
-       acpi_target_sleep_state = ACPI_STATE_S0;
+       acpi_hw_enable_all_runtime_gpes();
  }
  
-static void acpi_hibernation_end(void)
-{
-       /*
-        * This is necessary in case acpi_hibernation_finish() is not called
-        * during a failing transition to the sleep state.
-        */
-       acpi_target_sleep_state = ACPI_STATE_S0;
-}
+static struct platform_hibernation_ops acpi_hibernation_ops = {
+       .begin = acpi_hibernation_begin,
+       .end = acpi_pm_end,
+       .pre_snapshot = acpi_pm_prepare,
+       .finish = acpi_pm_finish,
+       .prepare = acpi_pm_prepare,
+       .enter = acpi_hibernation_enter,
+       .leave = acpi_hibernation_leave,
+       .pre_restore = acpi_pm_disable_gpes,
+       .restore_cleanup = acpi_pm_enable_gpes,
+};
  
-static int acpi_hibernation_pre_restore(void)
+/**
+ *     acpi_hibernation_begin_old - Set the target system sleep state to
+ *             ACPI_STATE_S4 and execute the _PTS control method.  This
+ *             function is used if the pre-ACPI 2.0 suspend ordering has been
+ *             requested.
+ */
+static int acpi_hibernation_begin_old(void)
  {
-       acpi_status status;
-
-       status = acpi_hw_disable_all_gpes();
-
-       return ACPI_SUCCESS(status) ? 0 : -EFAULT;
-}
+       int error = acpi_sleep_prepare(ACPI_STATE_S4);
  
-static void acpi_hibernation_restore_cleanup(void)
-{
-       acpi_hw_enable_all_runtime_gpes();
+       if (!error)
+               acpi_target_sleep_state = ACPI_STATE_S4;
+       return error;
  }
  
-static struct platform_hibernation_ops acpi_hibernation_ops = {
-       .begin = acpi_hibernation_begin,
-       .end = acpi_hibernation_end,
-       .pre_snapshot = acpi_hibernation_prepare,
-       .finish = acpi_hibernation_finish,
-       .prepare = acpi_hibernation_prepare,
+/*
+ * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has
+ * been requested.
+ */
+static struct platform_hibernation_ops acpi_hibernation_ops_old = {
+       .begin = acpi_hibernation_begin_old,
+       .end = acpi_pm_end,
+       .pre_snapshot = acpi_pm_disable_gpes,
+       .finish = acpi_pm_finish,
+       .prepare = acpi_pm_disable_gpes,
         .enter = acpi_hibernation_enter,
         .leave = acpi_hibernation_leave,
-       .pre_restore = acpi_hibernation_pre_restore,
-       .restore_cleanup = acpi_hibernation_restore_cleanup,
+       .pre_restore = acpi_pm_disable_gpes,
+       .restore_cleanup = acpi_pm_enable_gpes,
+       .recover = acpi_pm_finish,
  };
-#endif                         /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION */
  
  int acpi_suspend(u32 acpi_state)
  {
@@ -368,8 +387,8 @@ int acpi_suspend(u32 acpi_state)
  /**
   *     acpi_pm_device_sleep_state - return preferred power state of ACPI device
   *             in the system sleep state given by %acpi_target_sleep_state
- *     @dev: device to examine
- *     @wake: if set, the device should be able to wake up the system
+ *     @dev: device to examine; its driver model wakeup flags control
+ *             whether it should be able to wake up the system
   *     @d_min_p: used to store the upper limit of allowed states range
   *     Return value: preferred power state of the device on success, -ENODEV on
   *             failure (ie. if there's no 'struct acpi_device' for @dev)
@@ -387,7 +406,7 @@ int acpi_suspend(u32 acpi_state)
   *     via @wake.
   */
  
-int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p)
+int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
  {
         acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
         struct acpi_device *adev;
@@ -426,7 +445,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p)
          * can wake the system.  _S0W may be valid, too.
          */
         if (acpi_target_sleep_state == ACPI_STATE_S0 ||
-           (wake && adev->wakeup.state.enabled &&
+           (device_may_wakeup(dev) && adev->wakeup.state.enabled &&
              adev->wakeup.sleep_state <= acpi_target_sleep_state)) {
                 acpi_status status;
  
@@ -448,6 +467,31 @@ int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p)
                 *d_min_p = d_min;
         return d_max;
  }
+
+/**
+ *     acpi_pm_device_sleep_wake - enable or disable the system wake-up
+ *                                  capability of given device
+ *     @dev: device to handle
+ *     @enable: 'true' - enable, 'false' - disable the wake-up capability
+ */
+int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
+{
+       acpi_handle handle;
+       struct acpi_device *adev;
+
+       if (!device_may_wakeup(dev))
+               return -EINVAL;
+
+       handle = DEVICE_ACPI_HANDLE(dev);
+       if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) {
+               printk(KERN_DEBUG "ACPI handle has no context!\n");
+               return -ENODEV;
+       }
+
+       return enable ?
+               acpi_enable_wakeup_device_power(adev, acpi_target_sleep_state) :
+               acpi_disable_wakeup_device_power(adev);
+}
  #endif
  
  static void acpi_power_off_prepare(void)
@@ -472,8 +516,6 @@ int __init acpi_sleep_init(void)
         u8 type_a, type_b;
  #ifdef CONFIG_SUSPEND
         int i = 0;
-
-       dmi_check_system(acpisleep_dmi_table);
  #endif
  
         if (acpi_disabled)
@@ -491,13 +533,15 @@ int __init acpi_sleep_init(void)
                 }
         }
  
-       suspend_set_ops(&acpi_suspend_ops);
+       suspend_set_ops(old_suspend_ordering ?
+               &acpi_suspend_ops_old : &acpi_suspend_ops);
  #endif
  
  #ifdef CONFIG_HIBERNATION
         status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b);
         if (ACPI_SUCCESS(status)) {
-               hibernation_set_ops(&acpi_hibernation_ops);
+               hibernation_set_ops(old_suspend_ordering ?
+                       &acpi_hibernation_ops_old : &acpi_hibernation_ops);
                 sleep_states[ACPI_STATE_S4] = 1;
                 printk(" S4");
         }
diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c

index ed8e41becf0c4997177697157ac8ee8a368d9ab1..38655eb132dcca3a42ac2db56f2daf79d3b10286 100644 (file)
--- a/drivers/acpi/sleep/wakeup.c
+++ b/drivers/acpi/sleep/wakeup.c
@@ -42,7 +42,7 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state)
                         continue;
  
                 spin_unlock(&acpi_device_lock);
-               acpi_enable_wakeup_device_power(dev);
+               acpi_enable_wakeup_device_power(dev, sleep_state);
                 spin_lock(&acpi_device_lock);
         }
         spin_unlock(&acpi_device_lock);
@@ -66,13 +66,15 @@ void acpi_enable_wakeup_device(u8 sleep_state)
         list_for_each_safe(node, next, &acpi_wakeup_device_list) {
                 struct acpi_device *dev =
                         container_of(node, struct acpi_device, wakeup_list);
+
                 if (!dev->wakeup.flags.valid)
                         continue;
+
                 /* If users want to disable run-wake GPE,
                  * we only disable it for wake and leave it for runtime
                  */
-               if (!dev->wakeup.state.enabled ||
-                   sleep_state > (u32) dev->wakeup.sleep_state) {
+               if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
+                   || sleep_state > (u32) dev->wakeup.sleep_state) {
                         if (dev->wakeup.flags.run_wake) {
                                 spin_unlock(&acpi_device_lock);
                                 /* set_gpe_type will disable GPE, leave it like that */
@@ -110,8 +112,9 @@ void acpi_disable_wakeup_device(u8 sleep_state)
  
                 if (!dev->wakeup.flags.valid)
                         continue;
-               if (!dev->wakeup.state.enabled ||
-                   sleep_state > (u32) dev->wakeup.sleep_state) {
+
+               if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
+                   || sleep_state > (u32) dev->wakeup.sleep_state) {
                         if (dev->wakeup.flags.run_wake) {
                                 spin_unlock(&acpi_device_lock);
                                 acpi_set_gpe_type(dev->wakeup.gpe_device,
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c

index 5bd2dec9a7ac34848be5c5a5fe4a74a411a06391..d8e3f153b29587dc144fc1e77f639242b2aa9a00 100644 (file)
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -167,7 +167,13 @@ static int acpi_system_sysfs_init(void)
  #define COUNT_ERROR 2  /* other */
  #define NUM_COUNTERS_EXTRA 3
  
-static u32 *all_counters;
+#define ACPI_EVENT_VALID       0x01
+struct event_counter {
+       u32 count;
+       u32 flags;
+};
+
+static struct event_counter *all_counters;
  static u32 num_gpes;
  static u32 num_counters;
  static struct attribute **all_attrs;
@@ -202,9 +208,44 @@ static int count_num_gpes(void)
         return count;
  }
  
+static int get_gpe_device(int index, acpi_handle *handle)
+{
+       struct acpi_gpe_xrupt_info *gpe_xrupt_info;
+       struct acpi_gpe_block_info *gpe_block;
+       acpi_cpu_flags flags;
+       struct acpi_namespace_node *node;
+
+       flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
+
+       gpe_xrupt_info = acpi_gbl_gpe_xrupt_list_head;
+       while (gpe_xrupt_info) {
+               gpe_block = gpe_xrupt_info->gpe_block_list_head;
+               node = gpe_block->node;
+               while (gpe_block) {
+                       index -= gpe_block->register_count *
+                           ACPI_GPE_REGISTER_WIDTH;
+                       if (index < 0) {
+                               acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+                               /* return NULL if it's FADT GPE */
+                               if (node->type != ACPI_TYPE_DEVICE)
+                                       *handle = NULL;
+                               else
+                                       *handle = node;
+                               return 0;
+                       }
+                       node = gpe_block->node;
+                       gpe_block = gpe_block->next;
+               }
+               gpe_xrupt_info = gpe_xrupt_info->next;
+       }
+       acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+
+       return -ENODEV;
+}
+
  static void delete_gpe_attr_array(void)
  {
-       u32 *tmp = all_counters;
+       struct event_counter *tmp = all_counters;
  
         all_counters = NULL;
         kfree(tmp);
@@ -230,9 +271,10 @@ void acpi_os_gpe_count(u32 gpe_number)
                 return;
  
         if (gpe_number < num_gpes)
-               all_counters[gpe_number]++;
+               all_counters[gpe_number].count++;
         else
-               all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++;
+               all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR].
+                                       count++;
  
         return;
  }
@@ -243,44 +285,144 @@ void acpi_os_fixed_event_count(u32 event_number)
                 return;
  
         if (event_number < ACPI_NUM_FIXED_EVENTS)
-               all_counters[num_gpes + event_number]++;
+               all_counters[num_gpes + event_number].count++;
         else
-               all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR]++;
+               all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR].
+                               count++;
  
         return;
  }
  
+static int get_status(u32 index, acpi_event_status *status, acpi_handle *handle)
+{
+       int result = 0;
+
+       if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS)
+               goto end;
+
+       if (index < num_gpes) {
+               result = get_gpe_device(index, handle);
+               if (result) {
+                       ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND,
+                               "Invalid GPE 0x%x\n", index));
+                       goto end;
+               }
+               result = acpi_get_gpe_status(*handle, index,
+                                               ACPI_NOT_ISR, status);
+       } else if (index < (num_gpes + ACPI_NUM_FIXED_EVENTS))
+               result = acpi_get_event_status(index - num_gpes, status);
+
+       /*
+        * sleep/power button GPE/Fixed Event is enabled after acpi_system_init,
+        * check the status at runtime and mark it as valid once it's enabled
+        */
+       if (!result && (*status & ACPI_EVENT_FLAG_ENABLED))
+               all_counters[index].flags |= ACPI_EVENT_VALID;
+end:
+       return result;
+}
+
  static ssize_t counter_show(struct kobject *kobj,
         struct kobj_attribute *attr, char *buf)
  {
-       all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI] =
+       int index = attr - counter_attrs;
+       int size;
+       acpi_handle handle;
+       acpi_event_status status;
+       int result = 0;
+
+       all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI].count =
                 acpi_irq_handled;
-       all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE] =
+       all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE].count =
                 acpi_gpe_count;
  
-       return sprintf(buf, "%d\n", all_counters[attr - counter_attrs]);
+       size = sprintf(buf, "%8d", all_counters[index].count);
+
+       /* "gpe_all" or "sci" */
+       if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS)
+               goto end;
+
+       result = get_status(index, &status, &handle);
+       if (result)
+               goto end;
+
+       if (!(all_counters[index].flags & ACPI_EVENT_VALID))
+               size += sprintf(buf + size, "  invalid");
+       else if (status & ACPI_EVENT_FLAG_ENABLED)
+               size += sprintf(buf + size, "   enable");
+       else
+               size += sprintf(buf + size, "  disable");
+
+end:
+       size += sprintf(buf + size, "\n");
+       return result ? result : size;
  }
  
  /*
   * counter_set() sets the specified counter.
   * setting the total "sci" file to any value clears all counters.
+ * enable/disable/clear a gpe/fixed event in user space.
   */
  static ssize_t counter_set(struct kobject *kobj,
         struct kobj_attribute *attr, const char *buf, size_t size)
  {
         int index = attr - counter_attrs;
+       acpi_event_status status;
+       acpi_handle handle;
+       int result = 0;
  
         if (index == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) {
                 int i;
                 for (i = 0; i < num_counters; ++i)
-                       all_counters[i] = 0;
+                       all_counters[i].count = 0;
                 acpi_gpe_count = 0;
                 acpi_irq_handled = 0;
+               goto end;
+       }
  
+       /* show the event status for both GPEs and Fixed Events */
+       result = get_status(index, &status, &handle);
+       if (result)
+               goto end;
+
+       if (!(all_counters[index].flags & ACPI_EVENT_VALID)) {
+               ACPI_DEBUG_PRINT((ACPI_DB_WARN,
+                       "Can not change Invalid GPE/Fixed Event status\n"));
+               return -EINVAL;
+       }
+
+       if (index < num_gpes) {
+               if (!strcmp(buf, "disable\n") &&
+                               (status & ACPI_EVENT_FLAG_ENABLED))
+                       result = acpi_disable_gpe(handle, index, ACPI_NOT_ISR);
+               else if (!strcmp(buf, "enable\n") &&
+                               !(status & ACPI_EVENT_FLAG_ENABLED))
+                       result = acpi_enable_gpe(handle, index, ACPI_NOT_ISR);
+               else if (!strcmp(buf, "clear\n") &&
+                               (status & ACPI_EVENT_FLAG_SET))
+                       result = acpi_clear_gpe(handle, index, ACPI_NOT_ISR);
+               else
+                       all_counters[index].count = strtoul(buf, NULL, 0);
+       } else if (index < num_gpes + ACPI_NUM_FIXED_EVENTS) {
+               int event = index - num_gpes;
+               if (!strcmp(buf, "disable\n") &&
+                               (status & ACPI_EVENT_FLAG_ENABLED))
+                       result = acpi_disable_event(event, ACPI_NOT_ISR);
+               else if (!strcmp(buf, "enable\n") &&
+                               !(status & ACPI_EVENT_FLAG_ENABLED))
+                       result = acpi_enable_event(event, ACPI_NOT_ISR);
+               else if (!strcmp(buf, "clear\n") &&
+                               (status & ACPI_EVENT_FLAG_SET))
+                       result = acpi_clear_event(event);
+               else
+                       all_counters[index].count = strtoul(buf, NULL, 0);
         } else
-               all_counters[index] = strtoul(buf, NULL, 0);
+               all_counters[index].count = strtoul(buf, NULL, 0);
  
-       return size;
+       if (ACPI_FAILURE(result))
+               result = -EINVAL;
+end:
+       return result ? result : size;
  }
  
  void acpi_irq_stats_init(void)
@@ -298,7 +440,8 @@ void acpi_irq_stats_init(void)
         if (all_attrs == NULL)
                 return;
  
-       all_counters = kzalloc(sizeof(u32) * (num_counters), GFP_KERNEL);
+       all_counters = kzalloc(sizeof(struct event_counter) * (num_counters),
+                               GFP_KERNEL);
         if (all_counters == NULL)
                 goto fail;
  
diff --git a/drivers/acpi/tables/tbfadt.c b/drivers/acpi/tables/tbfadt.c

index 949d4114eb9feb92ae17ff7183d4cc4b314d0d45..ccb5b64bbef3396036796509da86ba4b52efc6cd 100644 (file)
--- a/drivers/acpi/tables/tbfadt.c
+++ b/drivers/acpi/tables/tbfadt.c
@@ -124,7 +124,7 @@ static struct acpi_fadt_info fadt_info_table[] = {
  
  static void inline
  acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
-                            u8 bit_width, u64 address)
+                            u8 byte_width, u64 address)
  {
  
         /*
@@ -136,7 +136,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
         /* All other fields are byte-wide */
  
         generic_address->space_id = ACPI_ADR_SPACE_SYSTEM_IO;
-       generic_address->bit_width = bit_width;
+       generic_address->bit_width = byte_width << 3;
         generic_address->bit_offset = 0;
         generic_address->access_width = 0;
  }
@@ -155,7 +155,7 @@ acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
   *
   ******************************************************************************/
  
-void acpi_tb_parse_fadt(acpi_native_uint table_index, u8 flags)
+void acpi_tb_parse_fadt(u32 table_index, u8 flags)
  {
         u32 length;
         struct acpi_table_header *table;
@@ -280,7 +280,7 @@ static void acpi_tb_convert_fadt(void)
  {
         u8 pm1_register_length;
         struct acpi_generic_address *target;
-       acpi_native_uint i;
+       u32 i;
  
         /* Update the local FADT table header length */
  
@@ -343,9 +343,11 @@ static void acpi_tb_convert_fadt(void)
          *
          * The PM event blocks are split into two register blocks, first is the
          * PM Status Register block, followed immediately by the PM Enable Register
-        * block. Each is of length (pm1_event_length/2)
+        * block. Each is of length (xpm1x_event_block.bit_width/2)
          */
-       pm1_register_length = (u8) ACPI_DIV_2(acpi_gbl_FADT.pm1_event_length);
+       WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1a_event_block.bit_width));
+       pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT
+                                              .xpm1a_event_block.bit_width);
  
         /* The PM1A register block is required */
  
@@ -360,14 +362,17 @@ static void acpi_tb_convert_fadt(void)
         /* The PM1B register block is optional, ignore if not present */
  
         if (acpi_gbl_FADT.xpm1b_event_block.address) {
+               WARN_ON(ACPI_MOD_16(acpi_gbl_FADT.xpm1b_event_block.bit_width));
+               pm1_register_length = (u8) ACPI_DIV_16(acpi_gbl_FADT
+                                                      .xpm1b_event_block
+                                                      .bit_width);
                 acpi_tb_init_generic_address(&acpi_gbl_xpm1b_enable,
                                              pm1_register_length,
                                              (acpi_gbl_FADT.xpm1b_event_block.
                                               address + pm1_register_length));
                 /* Don't forget to copy space_id of the GAS */
                 acpi_gbl_xpm1b_enable.space_id =
-                   acpi_gbl_FADT.xpm1a_event_block.space_id;
-
+                   acpi_gbl_FADT.xpm1b_event_block.space_id;
         }
  }
  
@@ -396,7 +401,7 @@ static void acpi_tb_validate_fadt(void)
         u32 *address32;
         struct acpi_generic_address *address64;
         u8 length;
-       acpi_native_uint i;
+       u32 i;
  
         /* Examine all of the 64-bit extended address fields (X fields) */
  
diff --git a/drivers/acpi/tables/tbfind.c b/drivers/acpi/tables/tbfind.c

index 9ca3afc98c80027fe46fc037678520b33f3126ad..531584defbb826596c742acb2b5c9a90a3b45fc1 100644 (file)
--- a/drivers/acpi/tables/tbfind.c
+++ b/drivers/acpi/tables/tbfind.c
@@ -65,10 +65,9 @@ ACPI_MODULE_NAME("tbfind")
   ******************************************************************************/
  acpi_status
  acpi_tb_find_table(char *signature,
-                  char *oem_id,
-                  char *oem_table_id, acpi_native_uint * table_index)
+                  char *oem_id, char *oem_table_id, u32 *table_index)
  {
-       acpi_native_uint i;
+       u32 i;
         acpi_status status;
         struct acpi_table_header header;
  
diff --git a/drivers/acpi/tables/tbinstal.c b/drivers/acpi/tables/tbinstal.c

index 5336ce88f89f74b47790be39ceadabd8a5b345b3..b22185f55a16acf80e9814059acff89a4a1e508b 100644 (file)
--- a/drivers/acpi/tables/tbinstal.c
+++ b/drivers/acpi/tables/tbinstal.c
@@ -107,11 +107,10 @@ acpi_status acpi_tb_verify_table(struct acpi_table_desc *table_desc)
   ******************************************************************************/
  
  acpi_status
-acpi_tb_add_table(struct acpi_table_desc *table_desc,
-                 acpi_native_uint * table_index)
+acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
  {
-       acpi_native_uint i;
-       acpi_native_uint length;
+       u32 i;
+       u32 length;
         acpi_status status = AE_OK;
  
         ACPI_FUNCTION_TRACE(tb_add_table);
@@ -207,8 +206,8 @@ acpi_status acpi_tb_resize_root_table_list(void)
  
         /* Increase the Table Array size */
  
-       tables = ACPI_ALLOCATE_ZEROED((acpi_gbl_root_table_list.size +
-                                      ACPI_ROOT_TABLE_SIZE_INCREMENT)
+       tables = ACPI_ALLOCATE_ZEROED(((acpi_size) acpi_gbl_root_table_list.
+                                      size + ACPI_ROOT_TABLE_SIZE_INCREMENT)
                                       * sizeof(struct acpi_table_desc));
         if (!tables) {
                 ACPI_ERROR((AE_INFO,
@@ -220,7 +219,7 @@ acpi_status acpi_tb_resize_root_table_list(void)
  
         if (acpi_gbl_root_table_list.tables) {
                 ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables,
-                           acpi_gbl_root_table_list.size *
+                           (acpi_size) acpi_gbl_root_table_list.size *
                             sizeof(struct acpi_table_desc));
  
                 if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) {
@@ -253,7 +252,7 @@ acpi_status acpi_tb_resize_root_table_list(void)
  acpi_status
  acpi_tb_store_table(acpi_physical_address address,
                     struct acpi_table_header *table,
-                   u32 length, u8 flags, acpi_native_uint * table_index)
+                   u32 length, u8 flags, u32 *table_index)
  {
         acpi_status status = AE_OK;
  
@@ -334,7 +333,7 @@ void acpi_tb_delete_table(struct acpi_table_desc *table_desc)
  
  void acpi_tb_terminate(void)
  {
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(tb_terminate);
  
@@ -374,7 +373,7 @@ void acpi_tb_terminate(void)
   *
   ******************************************************************************/
  
-void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index)
+void acpi_tb_delete_namespace_by_owner(u32 table_index)
  {
         acpi_owner_id owner_id;
  
@@ -403,7 +402,7 @@ void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index)
   *
   ******************************************************************************/
  
-acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index)
+acpi_status acpi_tb_allocate_owner_id(u32 table_index)
  {
         acpi_status status = AE_BAD_PARAMETER;
  
@@ -431,7 +430,7 @@ acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index)
   *
   ******************************************************************************/
  
-acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index)
+acpi_status acpi_tb_release_owner_id(u32 table_index)
  {
         acpi_status status = AE_BAD_PARAMETER;
  
@@ -462,8 +461,7 @@ acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index)
   *
   ******************************************************************************/
  
-acpi_status
-acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id)
+acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id)
  {
         acpi_status status = AE_BAD_PARAMETER;
  
@@ -490,7 +488,7 @@ acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id)
   *
   ******************************************************************************/
  
-u8 acpi_tb_is_table_loaded(acpi_native_uint table_index)
+u8 acpi_tb_is_table_loaded(u32 table_index)
  {
         u8 is_loaded = FALSE;
  
@@ -518,7 +516,7 @@ u8 acpi_tb_is_table_loaded(acpi_native_uint table_index)
   *
   ******************************************************************************/
  
-void acpi_tb_set_table_loaded_flag(acpi_native_uint table_index, u8 is_loaded)
+void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded)
  {
  
         (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/tables/tbutils.c

index bc019b9b6a68ec7e690c17da8aa5751e62cdbe30..0cc92ef5236fc261a2167a5a6aa8acb9fbe43d55 100644 (file)
--- a/drivers/acpi/tables/tbutils.c
+++ b/drivers/acpi/tables/tbutils.c
@@ -49,8 +49,8 @@ ACPI_MODULE_NAME("tbutils")
  
  /* Local prototypes */
  static acpi_physical_address
-acpi_tb_get_root_table_entry(u8 * table_entry,
-                            acpi_native_uint table_entry_size);
+acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size);
+
  /*******************************************************************************
   *
   * FUNCTION:    acpi_tb_check_xsdt
@@ -238,7 +238,7 @@ acpi_status acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length)
   *
   ******************************************************************************/
  
-u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length)
+u8 acpi_tb_checksum(u8 *buffer, u32 length)
  {
         u8 sum = 0;
         u8 *end = buffer + length;
@@ -268,7 +268,7 @@ u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length)
  
  void
  acpi_tb_install_table(acpi_physical_address address,
-                     u8 flags, char *signature, acpi_native_uint table_index)
+                     u8 flags, char *signature, u32 table_index)
  {
         struct acpi_table_header *table;
  
@@ -336,8 +336,7 @@ acpi_tb_install_table(acpi_physical_address address,
   ******************************************************************************/
  
  static acpi_physical_address
-acpi_tb_get_root_table_entry(u8 * table_entry,
-                            acpi_native_uint table_entry_size)
+acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size)
  {
         u64 address64;
  
@@ -395,8 +394,8 @@ acpi_status __init
  acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags)
  {
         struct acpi_table_rsdp *rsdp;
-       acpi_native_uint table_entry_size;
-       acpi_native_uint i;
+       u32 table_entry_size;
+       u32 i;
         u32 table_count;
         struct acpi_table_header *table;
         acpi_physical_address address;
diff --git a/drivers/acpi/tables/tbxface.c b/drivers/acpi/tables/tbxface.c

index 0e319604d3e78fe5e197a2fcd4f6140cf0a1dcd3..fd7770aa10618e256816350fe2638b78bb9af450 100644 (file)
--- a/drivers/acpi/tables/tbxface.c
+++ b/drivers/acpi/tables/tbxface.c
@@ -125,7 +125,7 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array,
                 /* Root Table Array has been statically allocated by the host */
  
                 ACPI_MEMSET(initial_table_array, 0,
-                           initial_table_count *
+                           (acpi_size) initial_table_count *
                             sizeof(struct acpi_table_desc));
  
                 acpi_gbl_root_table_list.tables = initial_table_array;
@@ -183,9 +183,9 @@ acpi_status acpi_reallocate_root_table(void)
                 return_ACPI_STATUS(AE_SUPPORT);
         }
  
-       new_size =
-           (acpi_gbl_root_table_list.count +
-            ACPI_ROOT_TABLE_SIZE_INCREMENT) * sizeof(struct acpi_table_desc);
+       new_size = ((acpi_size) acpi_gbl_root_table_list.count +
+                   ACPI_ROOT_TABLE_SIZE_INCREMENT) *
+           sizeof(struct acpi_table_desc);
  
         /* Create new array and copy the old array */
  
@@ -222,7 +222,7 @@ acpi_status acpi_reallocate_root_table(void)
  acpi_status acpi_load_table(struct acpi_table_header *table_ptr)
  {
         acpi_status status;
-       acpi_native_uint table_index;
+       u32 table_index;
         struct acpi_table_desc table_desc;
  
         if (!table_ptr)
@@ -264,11 +264,10 @@ ACPI_EXPORT_SYMBOL(acpi_load_table)
   *****************************************************************************/
  acpi_status
  acpi_get_table_header(char *signature,
-                     acpi_native_uint instance,
-                     struct acpi_table_header * out_table_header)
+                     u32 instance, struct acpi_table_header *out_table_header)
  {
-       acpi_native_uint i;
-       acpi_native_uint j;
+       u32 i;
+       u32 j;
         struct acpi_table_header *header;
  
         /* Parameter validation */
@@ -378,10 +377,10 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id)
   *****************************************************************************/
  acpi_status
  acpi_get_table(char *signature,
-              acpi_native_uint instance, struct acpi_table_header **out_table)
+              u32 instance, struct acpi_table_header **out_table)
  {
-       acpi_native_uint i;
-       acpi_native_uint j;
+       u32 i;
+       u32 j;
         acpi_status status;
  
         /* Parameter validation */
@@ -435,8 +434,7 @@ ACPI_EXPORT_SYMBOL(acpi_get_table)
   *
   ******************************************************************************/
  acpi_status
-acpi_get_table_by_index(acpi_native_uint table_index,
-                       struct acpi_table_header ** table)
+acpi_get_table_by_index(u32 table_index, struct acpi_table_header **table)
  {
         acpi_status status;
  
@@ -493,7 +491,7 @@ static acpi_status acpi_tb_load_namespace(void)
  {
         acpi_status status;
         struct acpi_table_header *table;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(tb_load_namespace);
  
diff --git a/drivers/acpi/tables/tbxfroot.c b/drivers/acpi/tables/tbxfroot.c

index b8c0dfa084f621e9c9731d75245cfe19fd06fc12..2d157e0f98d2716d3965f4390fcf6db1ae60edba 100644 (file)
--- a/drivers/acpi/tables/tbxfroot.c
+++ b/drivers/acpi/tables/tbxfroot.c
@@ -118,7 +118,7 @@ static acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp)
   *
   ******************************************************************************/
  
-acpi_status acpi_find_root_pointer(acpi_native_uint * table_address)
+acpi_status acpi_find_root_pointer(acpi_size *table_address)
  {
         u8 *table_ptr;
         u8 *mem_rover;
@@ -153,7 +153,7 @@ acpi_status acpi_find_root_pointer(acpi_native_uint * table_address)
                  * 1b) Search EBDA paragraphs (EBDA is required to be a
                  *     minimum of 1_k length)
                  */
-               table_ptr = acpi_os_map_memory((acpi_native_uint)
+               table_ptr = acpi_os_map_memory((acpi_physical_address)
                                                physical_address,
                                                ACPI_EBDA_WINDOW_SIZE);
                 if (!table_ptr) {
diff --git a/drivers/acpi/utilities/utalloc.c b/drivers/acpi/utilities/utalloc.c

index ede084829a705a70209641d4f899ae95d8ab5612..3dfb8a442b2604e4186c2dd901efb4ef67bfd2b3 100644 (file)
--- a/drivers/acpi/utilities/utalloc.c
+++ b/drivers/acpi/utilities/utalloc.c
@@ -309,7 +309,8 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer,
   *
   ******************************************************************************/
  
-void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line)
+void *acpi_ut_allocate(acpi_size size,
+                      u32 component, const char *module, u32 line)
  {
         void *allocation;
  
@@ -353,7 +354,7 @@ void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line)
   ******************************************************************************/
  
  void *acpi_ut_allocate_zeroed(acpi_size size,
-                             u32 component, char *module, u32 line)
+                             u32 component, const char *module, u32 line)
  {
         void *allocation;
  
diff --git a/drivers/acpi/utilities/utcopy.c b/drivers/acpi/utilities/utcopy.c

index 655c290aca7b6bb95c69b7147ff4fda7b0d0ca13..53499ac90988a19477b39bd9c0113c14594cbf5f 100644 (file)
--- a/drivers/acpi/utilities/utcopy.c
+++ b/drivers/acpi/utilities/utcopy.c
@@ -572,7 +572,7 @@ acpi_ut_copy_epackage_to_ipackage(union acpi_object *external_object,
         acpi_status status = AE_OK;
         union acpi_operand_object *package_object;
         union acpi_operand_object **package_elements;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(ut_copy_epackage_to_ipackage);
  
@@ -599,7 +599,7 @@ acpi_ut_copy_epackage_to_ipackage(union acpi_object *external_object,
  
                         /* Truncate package and delete it */
  
-                       package_object->package.count = (u32) i;
+                       package_object->package.count = i;
                         package_elements[i] = NULL;
                         acpi_ut_remove_reference(package_object);
                         return_ACPI_STATUS(status);
diff --git a/drivers/acpi/utilities/utdebug.c b/drivers/acpi/utilities/utdebug.c

index f938f465efa4df6d0c2bd03f9dab870b7ec7e0f7..fd66ecb6741e3a2675a0e2c0358192807c8b2e5e 100644 (file)
--- a/drivers/acpi/utilities/utdebug.c
+++ b/drivers/acpi/utilities/utdebug.c
@@ -157,7 +157,8 @@ void ACPI_INTERNAL_VAR_XFACE
  acpi_ut_debug_print(u32 requested_debug_level,
                     u32 line_number,
                     const char *function_name,
-                   char *module_name, u32 component_id, char *format, ...)
+                   const char *module_name,
+                   u32 component_id, const char *format, ...)
  {
         acpi_thread_id thread_id;
         va_list args;
@@ -228,7 +229,8 @@ void ACPI_INTERNAL_VAR_XFACE
  acpi_ut_debug_print_raw(u32 requested_debug_level,
                         u32 line_number,
                         const char *function_name,
-                       char *module_name, u32 component_id, char *format, ...)
+                       const char *module_name,
+                       u32 component_id, const char *format, ...)
  {
         va_list args;
  
@@ -261,7 +263,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_debug_print_raw)
   ******************************************************************************/
  void
  acpi_ut_trace(u32 line_number,
-             const char *function_name, char *module_name, u32 component_id)
+             const char *function_name,
+             const char *module_name, u32 component_id)
  {
  
         acpi_gbl_nesting_level++;
@@ -293,7 +296,7 @@ ACPI_EXPORT_SYMBOL(acpi_ut_trace)
  void
  acpi_ut_trace_ptr(u32 line_number,
                   const char *function_name,
-                 char *module_name, u32 component_id, void *pointer)
+                 const char *module_name, u32 component_id, void *pointer)
  {
         acpi_gbl_nesting_level++;
         acpi_ut_track_stack_ptr();
@@ -324,7 +327,7 @@ acpi_ut_trace_ptr(u32 line_number,
  void
  acpi_ut_trace_str(u32 line_number,
                   const char *function_name,
-                 char *module_name, u32 component_id, char *string)
+                 const char *module_name, u32 component_id, char *string)
  {
  
         acpi_gbl_nesting_level++;
@@ -356,7 +359,7 @@ acpi_ut_trace_str(u32 line_number,
  void
  acpi_ut_trace_u32(u32 line_number,
                   const char *function_name,
-                 char *module_name, u32 component_id, u32 integer)
+                 const char *module_name, u32 component_id, u32 integer)
  {
  
         acpi_gbl_nesting_level++;
@@ -386,7 +389,8 @@ acpi_ut_trace_u32(u32 line_number,
  
  void
  acpi_ut_exit(u32 line_number,
-            const char *function_name, char *module_name, u32 component_id)
+            const char *function_name,
+            const char *module_name, u32 component_id)
  {
  
         acpi_ut_debug_print(ACPI_LV_FUNCTIONS,
@@ -417,7 +421,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_exit)
  void
  acpi_ut_status_exit(u32 line_number,
                     const char *function_name,
-                   char *module_name, u32 component_id, acpi_status status)
+                   const char *module_name,
+                   u32 component_id, acpi_status status)
  {
  
         if (ACPI_SUCCESS(status)) {
@@ -458,7 +463,8 @@ ACPI_EXPORT_SYMBOL(acpi_ut_status_exit)
  void
  acpi_ut_value_exit(u32 line_number,
                    const char *function_name,
-                  char *module_name, u32 component_id, acpi_integer value)
+                  const char *module_name,
+                  u32 component_id, acpi_integer value)
  {
  
         acpi_ut_debug_print(ACPI_LV_FUNCTIONS,
@@ -490,7 +496,7 @@ ACPI_EXPORT_SYMBOL(acpi_ut_value_exit)
  void
  acpi_ut_ptr_exit(u32 line_number,
                  const char *function_name,
-                char *module_name, u32 component_id, u8 * ptr)
+                const char *module_name, u32 component_id, u8 *ptr)
  {
  
         acpi_ut_debug_print(ACPI_LV_FUNCTIONS,
@@ -519,8 +525,8 @@ acpi_ut_ptr_exit(u32 line_number,
  
  void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display)
  {
-       acpi_native_uint i = 0;
-       acpi_native_uint j;
+       u32 i = 0;
+       u32 j;
         u32 temp32;
         u8 buf_char;
  
@@ -539,7 +545,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display)
  
                 /* Print current offset */
  
-               acpi_os_printf("%6.4X: ", (u32) i);
+               acpi_os_printf("%6.4X: ", i);
  
                 /* Print 16 hex chars */
  
@@ -549,7 +555,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display)
                                 /* Dump fill spaces */
  
                                 acpi_os_printf("%*s", ((display * 2) + 1), " ");
-                               j += (acpi_native_uint) display;
+                               j += display;
                                 continue;
                         }
  
@@ -557,32 +563,38 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display)
                         case DB_BYTE_DISPLAY:
                         default:        /* Default is BYTE display */
  
-                               acpi_os_printf("%02X ", buffer[i + j]);
+                               acpi_os_printf("%02X ",
+                                              buffer[(acpi_size) i + j]);
                                 break;
  
                         case DB_WORD_DISPLAY:
  
-                               ACPI_MOVE_16_TO_32(&temp32, &buffer[i + j]);
+                               ACPI_MOVE_16_TO_32(&temp32,
+                                                  &buffer[(acpi_size) i + j]);
                                 acpi_os_printf("%04X ", temp32);
                                 break;
  
                         case DB_DWORD_DISPLAY:
  
-                               ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j]);
+                               ACPI_MOVE_32_TO_32(&temp32,
+                                                  &buffer[(acpi_size) i + j]);
                                 acpi_os_printf("%08X ", temp32);
                                 break;
  
                         case DB_QWORD_DISPLAY:
  
-                               ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j]);
+                               ACPI_MOVE_32_TO_32(&temp32,
+                                                  &buffer[(acpi_size) i + j]);
                                 acpi_os_printf("%08X", temp32);
  
-                               ACPI_MOVE_32_TO_32(&temp32, &buffer[i + j + 4]);
+                               ACPI_MOVE_32_TO_32(&temp32,
+                                                  &buffer[(acpi_size) i + j +
+                                                          4]);
                                 acpi_os_printf("%08X ", temp32);
                                 break;
                         }
  
-                       j += (acpi_native_uint) display;
+                       j += display;
                 }
  
                 /*
@@ -596,7 +608,7 @@ void acpi_ut_dump_buffer2(u8 * buffer, u32 count, u32 display)
                                 return;
                         }
  
-                       buf_char = buffer[i + j];
+                       buf_char = buffer[(acpi_size) i + j];
                         if (ACPI_IS_PRINT(buf_char)) {
                                 acpi_os_printf("%c", buf_char);
                         } else {
diff --git a/drivers/acpi/utilities/utdelete.c b/drivers/acpi/utilities/utdelete.c

index 1fbc35139e84ebdd85d1c5de4b89c24f04c8e549..c5c791a575c9203f63c0a44ad07ad93487282643 100644 (file)
--- a/drivers/acpi/utilities/utdelete.c
+++ b/drivers/acpi/utilities/utdelete.c
@@ -442,7 +442,7 @@ acpi_ut_update_object_reference(union acpi_operand_object *object, u16 action)
         union acpi_generic_state *state_list = NULL;
         union acpi_operand_object *next_object = NULL;
         union acpi_generic_state *state;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE_PTR(ut_update_object_reference, object);
  
diff --git a/drivers/acpi/utilities/uteval.c b/drivers/acpi/utilities/uteval.c

index 05e61be267d5b09450f9299f4a1e1e73c91c62f5..352747e49c7a623930fd17491533a436394099ad 100644 (file)
--- a/drivers/acpi/utilities/uteval.c
+++ b/drivers/acpi/utilities/uteval.c
@@ -97,7 +97,7 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
         acpi_status status;
         union acpi_operand_object *string_desc;
         union acpi_operand_object *return_desc;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(ut_osi_implementation);
  
@@ -217,7 +217,6 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
  
         info->prefix_node = prefix_node;
         info->pathname = path;
-       info->parameter_type = ACPI_PARAM_ARGS;
  
         /* Evaluate the object/method */
  
@@ -514,7 +513,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node * device_node,
         u32 count;
         u32 size;
         struct acpi_compatible_id_list *cid_list;
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_TRACE(ut_execute_CID);
  
diff --git a/drivers/acpi/utilities/utmisc.c b/drivers/acpi/utilities/utmisc.c

index 1f057b71db1a4aca5c68f271ff9c50d4a04acec5..f34be6773556beebd53f3dde5f63bc03a78e07cf 100644 (file)
--- a/drivers/acpi/utilities/utmisc.c
+++ b/drivers/acpi/utilities/utmisc.c
@@ -64,7 +64,7 @@ ACPI_MODULE_NAME("utmisc")
   ******************************************************************************/
  const char *acpi_ut_validate_exception(acpi_status status)
  {
-       acpi_status sub_status;
+       u32 sub_status;
         const char *exception = NULL;
  
         ACPI_FUNCTION_ENTRY();
@@ -85,32 +85,28 @@ const char *acpi_ut_validate_exception(acpi_status status)
         case AE_CODE_PROGRAMMER:
  
                 if (sub_status <= AE_CODE_PGM_MAX) {
-                       exception =
-                           acpi_gbl_exception_names_pgm[sub_status - 1];
+                       exception = acpi_gbl_exception_names_pgm[sub_status];
                 }
                 break;
  
         case AE_CODE_ACPI_TABLES:
  
                 if (sub_status <= AE_CODE_TBL_MAX) {
-                       exception =
-                           acpi_gbl_exception_names_tbl[sub_status - 1];
+                       exception = acpi_gbl_exception_names_tbl[sub_status];
                 }
                 break;
  
         case AE_CODE_AML:
  
                 if (sub_status <= AE_CODE_AML_MAX) {
-                       exception =
-                           acpi_gbl_exception_names_aml[sub_status - 1];
+                       exception = acpi_gbl_exception_names_aml[sub_status];
                 }
                 break;
  
         case AE_CODE_CONTROL:
  
                 if (sub_status <= AE_CODE_CTRL_MAX) {
-                       exception =
-                           acpi_gbl_exception_names_ctrl[sub_status - 1];
+                       exception = acpi_gbl_exception_names_ctrl[sub_status];
                 }
                 break;
  
@@ -165,9 +161,9 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table)
  
  acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id)
  {
-       acpi_native_uint i;
-       acpi_native_uint j;
-       acpi_native_uint k;
+       u32 i;
+       u32 j;
+       u32 k;
         acpi_status status;
  
         ACPI_FUNCTION_TRACE(ut_allocate_owner_id);
@@ -273,7 +269,7 @@ void acpi_ut_release_owner_id(acpi_owner_id * owner_id_ptr)
  {
         acpi_owner_id owner_id = *owner_id_ptr;
         acpi_status status;
-       acpi_native_uint index;
+       u32 index;
         u32 bit;
  
         ACPI_FUNCTION_TRACE_U32(ut_release_owner_id, owner_id);
@@ -593,7 +589,7 @@ acpi_ut_display_init_pathname(u8 type,
   *
   ******************************************************************************/
  
-u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position)
+u8 acpi_ut_valid_acpi_char(char character, u32 position)
  {
  
         if (!((character >= 'A' && character <= 'Z') ||
@@ -628,7 +624,7 @@ u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position)
  
  u8 acpi_ut_valid_acpi_name(u32 name)
  {
-       acpi_native_uint i;
+       u32 i;
  
         ACPI_FUNCTION_ENTRY();
  
@@ -657,7 +653,7 @@ u8 acpi_ut_valid_acpi_name(u32 name)
  
  acpi_name acpi_ut_repair_name(char *name)
  {
-       acpi_native_uint i;
+       u32 i;
         char new_name[ACPI_NAME_SIZE];
  
         for (i = 0; i < ACPI_NAME_SIZE; i++) {
@@ -1024,7 +1020,7 @@ acpi_ut_walk_package_tree(union acpi_operand_object * source_object,
   ******************************************************************************/
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_error(char *module_name, u32 line_number, char *format, ...)
+acpi_ut_error(const char *module_name, u32 line_number, const char *format, ...)
  {
         va_list args;
  
@@ -1037,8 +1033,8 @@ acpi_ut_error(char *module_name, u32 line_number, char *format, ...)
  }
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_exception(char *module_name,
-                 u32 line_number, acpi_status status, char *format, ...)
+acpi_ut_exception(const char *module_name,
+                 u32 line_number, acpi_status status, const char *format, ...)
  {
         va_list args;
  
@@ -1054,7 +1050,8 @@ acpi_ut_exception(char *module_name,
  EXPORT_SYMBOL(acpi_ut_exception);
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_warning(char *module_name, u32 line_number, char *format, ...)
+acpi_ut_warning(const char *module_name,
+               u32 line_number, const char *format, ...)
  {
         va_list args;
  
@@ -1067,7 +1064,7 @@ acpi_ut_warning(char *module_name, u32 line_number, char *format, ...)
  }
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_info(char *module_name, u32 line_number, char *format, ...)
+acpi_ut_info(const char *module_name, u32 line_number, const char *format, ...)
  {
         va_list args;
  
diff --git a/drivers/acpi/utilities/utmutex.c b/drivers/acpi/utilities/utmutex.c

index f7d602b1a89467bedd215b222899c9ca47880faa..7331dde9e1b3d9dd9b15677cfa410ed82ed2d07e 100644 (file)
--- a/drivers/acpi/utilities/utmutex.c
+++ b/drivers/acpi/utilities/utmutex.c
@@ -218,7 +218,7 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id)
                  * the mutex ordering rule.  This indicates a coding error somewhere in
                  * the ACPI subsystem code.
                  */
-               for (i = mutex_id; i < ACPI_MAX_MUTEX; i++) {
+               for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) {
                         if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) {
                                 if (i == mutex_id) {
                                         ACPI_ERROR((AE_INFO,
@@ -315,7 +315,7 @@ acpi_status acpi_ut_release_mutex(acpi_mutex_handle mutex_id)
                  * ordering rule.  This indicates a coding error somewhere in
                  * the ACPI subsystem code.
                  */
-               for (i = mutex_id; i < ACPI_MAX_MUTEX; i++) {
+               for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) {
                         if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) {
                                 if (i == mutex_id) {
                                         continue;
diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/utilities/utobject.c

index e68466de80447c7068ffbd740ff9f265a22421fd..e25484495e652b026bda9c2dade35c0a07648ba1 100644 (file)
--- a/drivers/acpi/utilities/utobject.c
+++ b/drivers/acpi/utilities/utobject.c
@@ -83,7 +83,8 @@ acpi_ut_get_element_length(u8 object_type,
   *
   ******************************************************************************/
  
-union acpi_operand_object *acpi_ut_create_internal_object_dbg(char *module_name,
+union acpi_operand_object *acpi_ut_create_internal_object_dbg(const char
+                                                             *module_name,
                                                               u32 line_number,
                                                               u32 component_id,
                                                               acpi_object_type
@@ -175,8 +176,8 @@ union acpi_operand_object *acpi_ut_create_package_object(u32 count)
          * Create the element array. Count+1 allows the array to be null
          * terminated.
          */
-       package_elements = ACPI_ALLOCATE_ZEROED((acpi_size)
-                                               (count + 1) * sizeof(void *));
+       package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size) count +
+                                                1) * sizeof(void *));
         if (!package_elements) {
                 acpi_ut_remove_reference(package_desc);
                 return_PTR(NULL);
@@ -347,7 +348,7 @@ u8 acpi_ut_valid_internal_object(void *object)
   *
   ******************************************************************************/
  
-void *acpi_ut_allocate_object_desc_dbg(char *module_name,
+void *acpi_ut_allocate_object_desc_dbg(const char *module_name,
                                        u32 line_number, u32 component_id)
  {
         union acpi_operand_object *object;
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c

index d089c4519d456a796287ae9bfd77ad08d5f753f6..64c889331f3bd97be35ca4eeb0153e50aae74111 100644 (file)
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -631,6 +631,76 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag)
   *     device  : video output device (LCD, CRT, ..)
   *
   *  Return Value:
+ *     Maximum brightness level
+ *
+ *  Allocate and initialize device->brightness.
+ */
+
+static int
+acpi_video_init_brightness(struct acpi_video_device *device)
+{
+       union acpi_object *obj = NULL;
+       int i, max_level = 0, count = 0;
+       union acpi_object *o;
+       struct acpi_video_device_brightness *br = NULL;
+
+       if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) {
+               ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available "
+                                               "LCD brightness level\n"));
+               goto out;
+       }
+
+       if (obj->package.count < 2)
+               goto out;
+
+       br = kzalloc(sizeof(*br), GFP_KERNEL);
+       if (!br) {
+               printk(KERN_ERR "can't allocate memory\n");
+               goto out;
+       }
+
+       br->levels = kmalloc(obj->package.count * sizeof *(br->levels),
+                               GFP_KERNEL);
+       if (!br->levels)
+               goto out_free;
+
+       for (i = 0; i < obj->package.count; i++) {
+               o = (union acpi_object *)&obj->package.elements[i];
+               if (o->type != ACPI_TYPE_INTEGER) {
+                       printk(KERN_ERR PREFIX "Invalid data\n");
+                       continue;
+               }
+               br->levels[count] = (u32) o->integer.value;
+
+               if (br->levels[count] > max_level)
+                       max_level = br->levels[count];
+               count++;
+       }
+
+       if (count < 2)
+               goto out_free_levels;
+
+       br->count = count;
+       device->brightness = br;
+       ACPI_DEBUG_PRINT((ACPI_DB_INFO, "found %d brightness levels\n", count));
+       kfree(obj);
+       return max_level;
+
+out_free_levels:
+       kfree(br->levels);
+out_free:
+       kfree(br);
+out:
+       device->brightness = NULL;
+       kfree(obj);
+       return 0;
+}
+
+/*
+ *  Arg:
+ *     device  : video output device (LCD, CRT, ..)
+ *
+ *  Return Value:
   *     None
   *
   *  Find out all required AML methods defined under the output
@@ -640,10 +710,7 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag)
  static void acpi_video_device_find_cap(struct acpi_video_device *device)
  {
         acpi_handle h_dummy1;
-       int i;
         u32 max_level = 0;
-       union acpi_object *obj = NULL;
-       struct acpi_video_device_brightness *br = NULL;
  
  
         memset(&device->cap, 0, sizeof(device->cap));
@@ -672,53 +739,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
                 device->cap._DSS = 1;
         }
  
-       if (ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) {
-
-               if (obj->package.count >= 2) {
-                       int count = 0;
-                       union acpi_object *o;
-
-                       br = kzalloc(sizeof(*br), GFP_KERNEL);
-                       if (!br) {
-                               printk(KERN_ERR "can't allocate memory\n");
-                       } else {
-                               br->levels = kmalloc(obj->package.count *
-                                                    sizeof *(br->levels), GFP_KERNEL);
-                               if (!br->levels)
-                                       goto out;
-
-                               for (i = 0; i < obj->package.count; i++) {
-                                       o = (union acpi_object *)&obj->package.
-                                           elements[i];
-                                       if (o->type != ACPI_TYPE_INTEGER) {
-                                               printk(KERN_ERR PREFIX "Invalid data\n");
-                                               continue;
-                                       }
-                                       br->levels[count] = (u32) o->integer.value;
-
-                                       if (br->levels[count] > max_level)
-                                               max_level = br->levels[count];
-                                       count++;
-                               }
-                             out:
-                               if (count < 2) {
-                                       kfree(br->levels);
-                                       kfree(br);
-                               } else {
-                                       br->count = count;
-                                       device->brightness = br;
-                                       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-                                                         "found %d brightness levels\n",
-                                                         count));
-                               }
-                       }
-               }
-
-       } else {
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available LCD brightness level\n"));
-       }
-
-       kfree(obj);
+       max_level = acpi_video_init_brightness(device);
  
         if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){
                 int result;
@@ -1695,6 +1716,8 @@ static void
  acpi_video_switch_brightness(struct acpi_video_device *device, int event)
  {
         unsigned long level_current, level_next;
+       if (!device->brightness)
+               return;
         acpi_video_device_lcd_get_level_current(device, &level_current);
         level_next = acpi_video_get_next_level(device, level_current, event);
         acpi_video_device_lcd_set_level(device, level_next);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c

index 911ec600fe71dec287cf922441a8cf53855ecf3f..3f940393d6c7904cdd68a5db804a3c0365ac696a 100644 (file)
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -453,6 +453,8 @@ int platform_driver_register(struct platform_driver *drv)
                 drv->driver.suspend = platform_drv_suspend;
         if (drv->resume)
                 drv->driver.resume = platform_drv_resume;
+       if (drv->pm)
+               drv->driver.pm = &drv->pm->base;
         return driver_register(&drv->driver);
  }
  EXPORT_SYMBOL_GPL(platform_driver_register);
@@ -560,7 +562,9 @@ static int platform_match(struct device *dev, struct device_driver *drv)
         return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0);
  }
  
-static int platform_suspend(struct device *dev, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+
+static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
  {
         int ret = 0;
  
@@ -570,7 +574,7 @@ static int platform_suspend(struct device *dev, pm_message_t mesg)
         return ret;
  }
  
-static int platform_suspend_late(struct device *dev, pm_message_t mesg)
+static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg)
  {
         struct platform_driver *drv = to_platform_driver(dev->driver);
         struct platform_device *pdev;
@@ -583,7 +587,7 @@ static int platform_suspend_late(struct device *dev, pm_message_t mesg)
         return ret;
  }
  
-static int platform_resume_early(struct device *dev)
+static int platform_legacy_resume_early(struct device *dev)
  {
         struct platform_driver *drv = to_platform_driver(dev->driver);
         struct platform_device *pdev;
@@ -596,7 +600,7 @@ static int platform_resume_early(struct device *dev)
         return ret;
  }
  
-static int platform_resume(struct device *dev)
+static int platform_legacy_resume(struct device *dev)
  {
         int ret = 0;
  
@@ -606,15 +610,291 @@ static int platform_resume(struct device *dev)
         return ret;
  }
  
+static int platform_pm_prepare(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (drv && drv->pm && drv->pm->prepare)
+               ret = drv->pm->prepare(dev);
+
+       return ret;
+}
+
+static void platform_pm_complete(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+
+       if (drv && drv->pm && drv->pm->complete)
+               drv->pm->complete(dev);
+}
+
+#ifdef CONFIG_SUSPEND
+
+static int platform_pm_suspend(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->suspend)
+                       ret = drv->pm->suspend(dev);
+       } else {
+               ret = platform_legacy_suspend(dev, PMSG_SUSPEND);
+       }
+
+       return ret;
+}
+
+static int platform_pm_suspend_noirq(struct device *dev)
+{
+       struct platform_driver *pdrv;
+       int ret = 0;
+
+       if (!dev->driver)
+               return 0;
+
+       pdrv = to_platform_driver(dev->driver);
+       if (pdrv->pm) {
+               if (pdrv->pm->suspend_noirq)
+                       ret = pdrv->pm->suspend_noirq(dev);
+       } else {
+               ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND);
+       }
+
+       return ret;
+}
+
+static int platform_pm_resume(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->resume)
+                       ret = drv->pm->resume(dev);
+       } else {
+               ret = platform_legacy_resume(dev);
+       }
+
+       return ret;
+}
+
+static int platform_pm_resume_noirq(struct device *dev)
+{
+       struct platform_driver *pdrv;
+       int ret = 0;
+
+       if (!dev->driver)
+               return 0;
+
+       pdrv = to_platform_driver(dev->driver);
+       if (pdrv->pm) {
+               if (pdrv->pm->resume_noirq)
+                       ret = pdrv->pm->resume_noirq(dev);
+       } else {
+               ret = platform_legacy_resume_early(dev);
+       }
+
+       return ret;
+}
+
+#else /* !CONFIG_SUSPEND */
+
+#define platform_pm_suspend            NULL
+#define platform_pm_resume             NULL
+#define platform_pm_suspend_noirq      NULL
+#define platform_pm_resume_noirq       NULL
+
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_HIBERNATION
+
+static int platform_pm_freeze(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (!drv)
+               return 0;
+
+       if (drv->pm) {
+               if (drv->pm->freeze)
+                       ret = drv->pm->freeze(dev);
+       } else {
+               ret = platform_legacy_suspend(dev, PMSG_FREEZE);
+       }
+
+       return ret;
+}
+
+static int platform_pm_freeze_noirq(struct device *dev)
+{
+       struct platform_driver *pdrv;
+       int ret = 0;
+
+       if (!dev->driver)
+               return 0;
+
+       pdrv = to_platform_driver(dev->driver);
+       if (pdrv->pm) {
+               if (pdrv->pm->freeze_noirq)
+                       ret = pdrv->pm->freeze_noirq(dev);
+       } else {
+               ret = platform_legacy_suspend_late(dev, PMSG_FREEZE);
+       }
+
+       return ret;
+}
+
+static int platform_pm_thaw(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->thaw)
+                       ret = drv->pm->thaw(dev);
+       } else {
+               ret = platform_legacy_resume(dev);
+       }
+
+       return ret;
+}
+
+static int platform_pm_thaw_noirq(struct device *dev)
+{
+       struct platform_driver *pdrv;
+       int ret = 0;
+
+       if (!dev->driver)
+               return 0;
+
+       pdrv = to_platform_driver(dev->driver);
+       if (pdrv->pm) {
+               if (pdrv->pm->thaw_noirq)
+                       ret = pdrv->pm->thaw_noirq(dev);
+       } else {
+               ret = platform_legacy_resume_early(dev);
+       }
+
+       return ret;
+}
+
+static int platform_pm_poweroff(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->poweroff)
+                       ret = drv->pm->poweroff(dev);
+       } else {
+               ret = platform_legacy_suspend(dev, PMSG_HIBERNATE);
+       }
+
+       return ret;
+}
+
+static int platform_pm_poweroff_noirq(struct device *dev)
+{
+       struct platform_driver *pdrv;
+       int ret = 0;
+
+       if (!dev->driver)
+               return 0;
+
+       pdrv = to_platform_driver(dev->driver);
+       if (pdrv->pm) {
+               if (pdrv->pm->poweroff_noirq)
+                       ret = pdrv->pm->poweroff_noirq(dev);
+       } else {
+               ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE);
+       }
+
+       return ret;
+}
+
+static int platform_pm_restore(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int ret = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->restore)
+                       ret = drv->pm->restore(dev);
+       } else {
+               ret = platform_legacy_resume(dev);
+       }
+
+       return ret;
+}
+
+static int platform_pm_restore_noirq(struct device *dev)
+{
+       struct platform_driver *pdrv;
+       int ret = 0;
+
+       if (!dev->driver)
+               return 0;
+
+       pdrv = to_platform_driver(dev->driver);
+       if (pdrv->pm) {
+               if (pdrv->pm->restore_noirq)
+                       ret = pdrv->pm->restore_noirq(dev);
+       } else {
+               ret = platform_legacy_resume_early(dev);
+       }
+
+       return ret;
+}
+
+#else /* !CONFIG_HIBERNATION */
+
+#define platform_pm_freeze             NULL
+#define platform_pm_thaw               NULL
+#define platform_pm_poweroff           NULL
+#define platform_pm_restore            NULL
+#define platform_pm_freeze_noirq       NULL
+#define platform_pm_thaw_noirq         NULL
+#define platform_pm_poweroff_noirq     NULL
+#define platform_pm_restore_noirq      NULL
+
+#endif /* !CONFIG_HIBERNATION */
+
+struct pm_ext_ops platform_pm_ops = {
+       .base = {
+               .prepare = platform_pm_prepare,
+               .complete = platform_pm_complete,
+               .suspend = platform_pm_suspend,
+               .resume = platform_pm_resume,
+               .freeze = platform_pm_freeze,
+               .thaw = platform_pm_thaw,
+               .poweroff = platform_pm_poweroff,
+               .restore = platform_pm_restore,
+       },
+       .suspend_noirq = platform_pm_suspend_noirq,
+       .resume_noirq = platform_pm_resume_noirq,
+       .freeze_noirq = platform_pm_freeze_noirq,
+       .thaw_noirq = platform_pm_thaw_noirq,
+       .poweroff_noirq = platform_pm_poweroff_noirq,
+       .restore_noirq = platform_pm_restore_noirq,
+};
+
+#define PLATFORM_PM_OPS_PTR    &platform_pm_ops
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define PLATFORM_PM_OPS_PTR    NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
  struct bus_type platform_bus_type = {
         .name           = "platform",
         .dev_attrs      = platform_dev_attrs,
         .match          = platform_match,
         .uevent         = platform_uevent,
-       .suspend        = platform_suspend,
-       .suspend_late   = platform_suspend_late,
-       .resume_early   = platform_resume_early,
-       .resume         = platform_resume,
+       .pm             = PLATFORM_PM_OPS_PTR,
  };
  EXPORT_SYMBOL_GPL(platform_bus_type);
  
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c

index 45cc3d9eacb874db52f663fe4cc3dea83b41d92c..3250c5257b748286cfec992558a7590f0ab915e9 100644 (file)
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -12,11 +12,9 @@
   * and add it to the list of power-controlled devices. sysfs entries for
   * controlling device power management will also be added.
   *
- * A different set of lists than the global subsystem list are used to
- * keep track of power info because we use different lists to hold
- * devices based on what stage of the power management process they
- * are in. The power domain dependencies may also differ from the
- * ancestral dependencies that the subsystem list maintains.
+ * A separate list is used for keeping track of power info, because the power
+ * domain dependencies may differ from the ancestral dependencies that the
+ * subsystem list maintains.
   */
  
  #include <linux/device.h>
@@ -30,31 +28,40 @@
  #include "power.h"
  
  /*
- * The entries in the dpm_active list are in a depth first order, simply
+ * The entries in the dpm_list list are in a depth first order, simply
   * because children are guaranteed to be discovered after parents, and
   * are inserted at the back of the list on discovery.
   *
- * All the other lists are kept in the same order, for consistency.
- * However the lists aren't always traversed in the same order.
- * Semaphores must be acquired from the top (i.e., front) down
- * and released in the opposite order.  Devices must be suspended
- * from the bottom (i.e., end) up and resumed in the opposite order.
- * That way no parent will be suspended while it still has an active
- * child.
- *
   * Since device_pm_add() may be called with a device semaphore held,
   * we must never try to acquire a device semaphore while holding
   * dpm_list_mutex.
   */
  
-LIST_HEAD(dpm_active);
-static LIST_HEAD(dpm_off);
-static LIST_HEAD(dpm_off_irq);
+LIST_HEAD(dpm_list);
  
  static DEFINE_MUTEX(dpm_list_mtx);
  
-/* 'true' if all devices have been suspended, protected by dpm_list_mtx */
-static bool all_sleeping;
+/*
+ * Set once the preparation of devices for a PM transition has started, reset
+ * before starting to resume devices.  Protected by dpm_list_mtx.
+ */
+static bool transition_started;
+
+/**
+ *     device_pm_lock - lock the list of active devices used by the PM core
+ */
+void device_pm_lock(void)
+{
+       mutex_lock(&dpm_list_mtx);
+}
+
+/**
+ *     device_pm_unlock - unlock the list of active devices used by the PM core
+ */
+void device_pm_unlock(void)
+{
+       mutex_unlock(&dpm_list_mtx);
+}
  
  /**
   *     device_pm_add - add a device to the list of active devices
@@ -68,17 +75,25 @@ int device_pm_add(struct device *dev)
                  dev->bus ? dev->bus->name : "No Bus",
                  kobject_name(&dev->kobj));
         mutex_lock(&dpm_list_mtx);
-       if ((dev->parent && dev->parent->power.sleeping) || all_sleeping) {
-               if (dev->parent->power.sleeping)
-                       dev_warn(dev, "parent %s is sleeping\n",
+       if (dev->parent) {
+               if (dev->parent->power.status >= DPM_SUSPENDING) {
+                       dev_warn(dev, "parent %s is sleeping, will not add\n",
                                 dev->parent->bus_id);
-               else
-                       dev_warn(dev, "all devices are sleeping\n");
+                       WARN_ON(true);
+               }
+       } else if (transition_started) {
+               /*
+                * We refuse to register parentless devices while a PM
+                * transition is in progress in order to avoid leaving them
+                * unhandled down the road
+                */
                 WARN_ON(true);
         }
         error = dpm_sysfs_add(dev);
-       if (!error)
-               list_add_tail(&dev->power.entry, &dpm_active);
+       if (!error) {
+               dev->power.status = DPM_ON;
+               list_add_tail(&dev->power.entry, &dpm_list);
+       }
         mutex_unlock(&dpm_list_mtx);
         return error;
  }
@@ -100,73 +115,243 @@ void device_pm_remove(struct device *dev)
         mutex_unlock(&dpm_list_mtx);
  }
  
+/**
+ *     pm_op - execute the PM operation appropiate for given PM event
+ *     @dev:   Device.
+ *     @ops:   PM operations to choose from.
+ *     @state: PM transition of the system being carried out.
+ */
+static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state)
+{
+       int error = 0;
+
+       switch (state.event) {
+#ifdef CONFIG_SUSPEND
+       case PM_EVENT_SUSPEND:
+               if (ops->suspend) {
+                       error = ops->suspend(dev);
+                       suspend_report_result(ops->suspend, error);
+               }
+               break;
+       case PM_EVENT_RESUME:
+               if (ops->resume) {
+                       error = ops->resume(dev);
+                       suspend_report_result(ops->resume, error);
+               }
+               break;
+#endif /* CONFIG_SUSPEND */
+#ifdef CONFIG_HIBERNATION
+       case PM_EVENT_FREEZE:
+       case PM_EVENT_QUIESCE:
+               if (ops->freeze) {
+                       error = ops->freeze(dev);
+                       suspend_report_result(ops->freeze, error);
+               }
+               break;
+       case PM_EVENT_HIBERNATE:
+               if (ops->poweroff) {
+                       error = ops->poweroff(dev);
+                       suspend_report_result(ops->poweroff, error);
+               }
+               break;
+       case PM_EVENT_THAW:
+       case PM_EVENT_RECOVER:
+               if (ops->thaw) {
+                       error = ops->thaw(dev);
+                       suspend_report_result(ops->thaw, error);
+               }
+               break;
+       case PM_EVENT_RESTORE:
+               if (ops->restore) {
+                       error = ops->restore(dev);
+                       suspend_report_result(ops->restore, error);
+               }
+               break;
+#endif /* CONFIG_HIBERNATION */
+       default:
+               error = -EINVAL;
+       }
+       return error;
+}
+
+/**
+ *     pm_noirq_op - execute the PM operation appropiate for given PM event
+ *     @dev:   Device.
+ *     @ops:   PM operations to choose from.
+ *     @state: PM transition of the system being carried out.
+ *
+ *     The operation is executed with interrupts disabled by the only remaining
+ *     functional CPU in the system.
+ */
+static int pm_noirq_op(struct device *dev, struct pm_ext_ops *ops,
+                       pm_message_t state)
+{
+       int error = 0;
+
+       switch (state.event) {
+#ifdef CONFIG_SUSPEND
+       case PM_EVENT_SUSPEND:
+               if (ops->suspend_noirq) {
+                       error = ops->suspend_noirq(dev);
+                       suspend_report_result(ops->suspend_noirq, error);
+               }
+               break;
+       case PM_EVENT_RESUME:
+               if (ops->resume_noirq) {
+                       error = ops->resume_noirq(dev);
+                       suspend_report_result(ops->resume_noirq, error);
+               }
+               break;
+#endif /* CONFIG_SUSPEND */
+#ifdef CONFIG_HIBERNATION
+       case PM_EVENT_FREEZE:
+       case PM_EVENT_QUIESCE:
+               if (ops->freeze_noirq) {
+                       error = ops->freeze_noirq(dev);
+                       suspend_report_result(ops->freeze_noirq, error);
+               }
+               break;
+       case PM_EVENT_HIBERNATE:
+               if (ops->poweroff_noirq) {
+                       error = ops->poweroff_noirq(dev);
+                       suspend_report_result(ops->poweroff_noirq, error);
+               }
+               break;
+       case PM_EVENT_THAW:
+       case PM_EVENT_RECOVER:
+               if (ops->thaw_noirq) {
+                       error = ops->thaw_noirq(dev);
+                       suspend_report_result(ops->thaw_noirq, error);
+               }
+               break;
+       case PM_EVENT_RESTORE:
+               if (ops->restore_noirq) {
+                       error = ops->restore_noirq(dev);
+                       suspend_report_result(ops->restore_noirq, error);
+               }
+               break;
+#endif /* CONFIG_HIBERNATION */
+       default:
+               error = -EINVAL;
+       }
+       return error;
+}
+
+static char *pm_verb(int event)
+{
+       switch (event) {
+       case PM_EVENT_SUSPEND:
+               return "suspend";
+       case PM_EVENT_RESUME:
+               return "resume";
+       case PM_EVENT_FREEZE:
+               return "freeze";
+       case PM_EVENT_QUIESCE:
+               return "quiesce";
+       case PM_EVENT_HIBERNATE:
+               return "hibernate";
+       case PM_EVENT_THAW:
+               return "thaw";
+       case PM_EVENT_RESTORE:
+               return "restore";
+       case PM_EVENT_RECOVER:
+               return "recover";
+       default:
+               return "(unknown PM event)";
+       }
+}
+
+static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
+{
+       dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
+               ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
+               ", may wakeup" : "");
+}
+
+static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
+                       int error)
+{
+       printk(KERN_ERR "PM: Device %s failed to %s%s: error %d\n",
+               kobject_name(&dev->kobj), pm_verb(state.event), info, error);
+}
+
  /*------------------------- Resume routines -------------------------*/
  
  /**
- *     resume_device_early - Power on one device (early resume).
+ *     resume_device_noirq - Power on one device (early resume).
   *     @dev:   Device.
+ *     @state: PM transition of the system being carried out.
   *
   *     Must be called with interrupts disabled.
   */
-static int resume_device_early(struct device *dev)
+static int resume_device_noirq(struct device *dev, pm_message_t state)
  {
         int error = 0;
  
         TRACE_DEVICE(dev);
         TRACE_RESUME(0);
  
-       if (dev->bus && dev->bus->resume_early) {
-               dev_dbg(dev, "EARLY resume\n");
+       if (!dev->bus)
+               goto End;
+
+       if (dev->bus->pm) {
+               pm_dev_dbg(dev, state, "EARLY ");
+               error = pm_noirq_op(dev, dev->bus->pm, state);
+       } else if (dev->bus->resume_early) {
+               pm_dev_dbg(dev, state, "legacy EARLY ");
                 error = dev->bus->resume_early(dev);
         }
-
+ End:
         TRACE_RESUME(error);
         return error;
  }
  
  /**
   *     dpm_power_up - Power on all regular (non-sysdev) devices.
+ *     @state: PM transition of the system being carried out.
   *
- *     Walk the dpm_off_irq list and power each device up. This
- *     is used for devices that required they be powered down with
- *     interrupts disabled. As devices are powered on, they are moved
- *     to the dpm_off list.
+ *     Execute the appropriate "noirq resume" callback for all devices marked
+ *     as DPM_OFF_IRQ.
   *
   *     Must be called with interrupts disabled and only one CPU running.
   */
-static void dpm_power_up(void)
+static void dpm_power_up(pm_message_t state)
  {
+       struct device *dev;
  
-       while (!list_empty(&dpm_off_irq)) {
-               struct list_head *entry = dpm_off_irq.next;
-               struct device *dev = to_device(entry);
+       list_for_each_entry(dev, &dpm_list, power.entry)
+               if (dev->power.status > DPM_OFF) {
+                       int error;
  
-               list_move_tail(entry, &dpm_off);
-               resume_device_early(dev);
-       }
+                       dev->power.status = DPM_OFF;
+                       error = resume_device_noirq(dev, state);
+                       if (error)
+                               pm_dev_err(dev, state, " early", error);
+               }
  }
  
  /**
   *     device_power_up - Turn on all devices that need special attention.
+ *     @state: PM transition of the system being carried out.
   *
   *     Power on system devices, then devices that required we shut them down
   *     with interrupts disabled.
   *
   *     Must be called with interrupts disabled.
   */
-void device_power_up(void)
+void device_power_up(pm_message_t state)
  {
         sysdev_resume();
-       dpm_power_up();
+       dpm_power_up(state);
  }
  EXPORT_SYMBOL_GPL(device_power_up);
  
  /**
   *     resume_device - Restore state for one device.
   *     @dev:   Device.
- *
+ *     @state: PM transition of the system being carried out.
   */
-static int resume_device(struct device *dev)
+static int resume_device(struct device *dev, pm_message_t state)
  {
         int error = 0;
  
@@ -175,21 +360,40 @@ static int resume_device(struct device *dev)
  
         down(&dev->sem);
  
-       if (dev->bus && dev->bus->resume) {
-               dev_dbg(dev,"resuming\n");
-               error = dev->bus->resume(dev);
+       if (dev->bus) {
+               if (dev->bus->pm) {
+                       pm_dev_dbg(dev, state, "");
+                       error = pm_op(dev, &dev->bus->pm->base, state);
+               } else if (dev->bus->resume) {
+                       pm_dev_dbg(dev, state, "legacy ");
+                       error = dev->bus->resume(dev);
+               }
+               if (error)
+                       goto End;
         }
  
-       if (!error && dev->type && dev->type->resume) {
-               dev_dbg(dev,"resuming\n");
-               error = dev->type->resume(dev);
+       if (dev->type) {
+               if (dev->type->pm) {
+                       pm_dev_dbg(dev, state, "type ");
+                       error = pm_op(dev, dev->type->pm, state);
+               } else if (dev->type->resume) {
+                       pm_dev_dbg(dev, state, "legacy type ");
+                       error = dev->type->resume(dev);
+               }
+               if (error)
+                       goto End;
         }
  
-       if (!error && dev->class && dev->class->resume) {
-               dev_dbg(dev,"class resume\n");
-               error = dev->class->resume(dev);
+       if (dev->class) {
+               if (dev->class->pm) {
+                       pm_dev_dbg(dev, state, "class ");
+                       error = pm_op(dev, dev->class->pm, state);
+               } else if (dev->class->resume) {
+                       pm_dev_dbg(dev, state, "legacy class ");
+                       error = dev->class->resume(dev);
+               }
         }
-
+ End:
         up(&dev->sem);
  
         TRACE_RESUME(error);
@@ -198,78 +402,161 @@ static int resume_device(struct device *dev)
  
  /**
   *     dpm_resume - Resume every device.
+ *     @state: PM transition of the system being carried out.
   *
- *     Resume the devices that have either not gone through
- *     the late suspend, or that did go through it but also
- *     went through the early resume.
+ *     Execute the appropriate "resume" callback for all devices the status of
+ *     which indicates that they are inactive.
+ */
+static void dpm_resume(pm_message_t state)
+{
+       struct list_head list;
+
+       INIT_LIST_HEAD(&list);
+       mutex_lock(&dpm_list_mtx);
+       transition_started = false;
+       while (!list_empty(&dpm_list)) {
+               struct device *dev = to_device(dpm_list.next);
+
+               get_device(dev);
+               if (dev->power.status >= DPM_OFF) {
+                       int error;
+
+                       dev->power.status = DPM_RESUMING;
+                       mutex_unlock(&dpm_list_mtx);
+
+                       error = resume_device(dev, state);
+
+                       mutex_lock(&dpm_list_mtx);
+                       if (error)
+                               pm_dev_err(dev, state, "", error);
+               } else if (dev->power.status == DPM_SUSPENDING) {
+                       /* Allow new children of the device to be registered */
+                       dev->power.status = DPM_RESUMING;
+               }
+               if (!list_empty(&dev->power.entry))
+                       list_move_tail(&dev->power.entry, &list);
+               put_device(dev);
+       }
+       list_splice(&list, &dpm_list);
+       mutex_unlock(&dpm_list_mtx);
+}
+
+/**
+ *     complete_device - Complete a PM transition for given device
+ *     @dev:   Device.
+ *     @state: PM transition of the system being carried out.
+ */
+static void complete_device(struct device *dev, pm_message_t state)
+{
+       down(&dev->sem);
+
+       if (dev->class && dev->class->pm && dev->class->pm->complete) {
+               pm_dev_dbg(dev, state, "completing class ");
+               dev->class->pm->complete(dev);
+       }
+
+       if (dev->type && dev->type->pm && dev->type->pm->complete) {
+               pm_dev_dbg(dev, state, "completing type ");
+               dev->type->pm->complete(dev);
+       }
+
+       if (dev->bus && dev->bus->pm && dev->bus->pm->base.complete) {
+               pm_dev_dbg(dev, state, "completing ");
+               dev->bus->pm->base.complete(dev);
+       }
+
+       up(&dev->sem);
+}
+
+/**
+ *     dpm_complete - Complete a PM transition for all devices.
+ *     @state: PM transition of the system being carried out.
   *
- *     Take devices from the dpm_off_list, resume them,
- *     and put them on the dpm_locked list.
+ *     Execute the ->complete() callbacks for all devices that are not marked
+ *     as DPM_ON.
   */
-static void dpm_resume(void)
+static void dpm_complete(pm_message_t state)
  {
+       struct list_head list;
+
+       INIT_LIST_HEAD(&list);
         mutex_lock(&dpm_list_mtx);
-       all_sleeping = false;
-       while(!list_empty(&dpm_off)) {
-               struct list_head *entry = dpm_off.next;
-               struct device *dev = to_device(entry);
+       while (!list_empty(&dpm_list)) {
+               struct device *dev = to_device(dpm_list.prev);
  
-               list_move_tail(entry, &dpm_active);
-               dev->power.sleeping = false;
-               mutex_unlock(&dpm_list_mtx);
-               resume_device(dev);
-               mutex_lock(&dpm_list_mtx);
+               get_device(dev);
+               if (dev->power.status > DPM_ON) {
+                       dev->power.status = DPM_ON;
+                       mutex_unlock(&dpm_list_mtx);
+
+                       complete_device(dev, state);
+
+                       mutex_lock(&dpm_list_mtx);
+               }
+               if (!list_empty(&dev->power.entry))
+                       list_move(&dev->power.entry, &list);
+               put_device(dev);
         }
+       list_splice(&list, &dpm_list);
         mutex_unlock(&dpm_list_mtx);
  }
  
  /**
   *     device_resume - Restore state of each device in system.
+ *     @state: PM transition of the system being carried out.
   *
   *     Resume all the devices, unlock them all, and allow new
   *     devices to be registered once again.
   */
-void device_resume(void)
+void device_resume(pm_message_t state)
  {
         might_sleep();
-       dpm_resume();
+       dpm_resume(state);
+       dpm_complete(state);
  }
  EXPORT_SYMBOL_GPL(device_resume);
  
  
  /*------------------------- Suspend routines -------------------------*/
  
-static inline char *suspend_verb(u32 event)
+/**
+ *     resume_event - return a PM message representing the resume event
+ *                    corresponding to given sleep state.
+ *     @sleep_state: PM message representing a sleep state.
+ */
+static pm_message_t resume_event(pm_message_t sleep_state)
  {
-       switch (event) {
-       case PM_EVENT_SUSPEND:  return "suspend";
-       case PM_EVENT_FREEZE:   return "freeze";
-       case PM_EVENT_PRETHAW:  return "prethaw";
-       default:                return "(unknown suspend event)";
+       switch (sleep_state.event) {
+       case PM_EVENT_SUSPEND:
+               return PMSG_RESUME;
+       case PM_EVENT_FREEZE:
+       case PM_EVENT_QUIESCE:
+               return PMSG_RECOVER;
+       case PM_EVENT_HIBERNATE:
+               return PMSG_RESTORE;
         }
-}
-
-static void
-suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
-{
-       dev_dbg(dev, "%s%s%s\n", info, suspend_verb(state.event),
-               ((state.event == PM_EVENT_SUSPEND) && device_may_wakeup(dev)) ?
-               ", may wakeup" : "");
+       return PMSG_ON;
  }
  
  /**
- *     suspend_device_late - Shut down one device (late suspend).
+ *     suspend_device_noirq - Shut down one device (late suspend).
   *     @dev:   Device.
- *     @state: Power state device is entering.
+ *     @state: PM transition of the system being carried out.
   *
   *     This is called with interrupts off and only a single CPU running.
   */
-static int suspend_device_late(struct device *dev, pm_message_t state)
+static int suspend_device_noirq(struct device *dev, pm_message_t state)
  {
         int error = 0;
  
-       if (dev->bus && dev->bus->suspend_late) {
-               suspend_device_dbg(dev, state, "LATE ");
+       if (!dev->bus)
+               return 0;
+
+       if (dev->bus->pm) {
+               pm_dev_dbg(dev, state, "LATE ");
+               error = pm_noirq_op(dev, dev->bus->pm, state);
+       } else if (dev->bus->suspend_late) {
+               pm_dev_dbg(dev, state, "legacy LATE ");
                 error = dev->bus->suspend_late(dev, state);
                 suspend_report_result(dev->bus->suspend_late, error);
         }
@@ -278,37 +565,30 @@ static int suspend_device_late(struct device *dev, pm_message_t state)
  
  /**
   *     device_power_down - Shut down special devices.
- *     @state:         Power state to enter.
+ *     @state: PM transition of the system being carried out.
   *
- *     Power down devices that require interrupts to be disabled
- *     and move them from the dpm_off list to the dpm_off_irq list.
+ *     Power down devices that require interrupts to be disabled.
   *     Then power down system devices.
   *
   *     Must be called with interrupts disabled and only one CPU running.
   */
  int device_power_down(pm_message_t state)
  {
+       struct device *dev;
         int error = 0;
  
-       while (!list_empty(&dpm_off)) {
-               struct list_head *entry = dpm_off.prev;
-               struct device *dev = to_device(entry);
-
-               error = suspend_device_late(dev, state);
+       list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
+               error = suspend_device_noirq(dev, state);
                 if (error) {
-                       printk(KERN_ERR "Could not power down device %s: "
-                                       "error %d\n",
-                                       kobject_name(&dev->kobj), error);
+                       pm_dev_err(dev, state, " late", error);
                         break;
                 }
-               if (!list_empty(&dev->power.entry))
-                       list_move(&dev->power.entry, &dpm_off_irq);
+               dev->power.status = DPM_OFF_IRQ;
         }
-
         if (!error)
                 error = sysdev_suspend(state);
         if (error)
-               dpm_power_up();
+               dpm_power_up(resume_event(state));
         return error;
  }
  EXPORT_SYMBOL_GPL(device_power_down);
@@ -316,7 +596,7 @@ EXPORT_SYMBOL_GPL(device_power_down);
  /**
   *     suspend_device - Save state of one device.
   *     @dev:   Device.
- *     @state: Power state device is entering.
+ *     @state: PM transition of the system being carried out.
   */
  static int suspend_device(struct device *dev, pm_message_t state)
  {
@@ -324,24 +604,43 @@ static int suspend_device(struct device *dev, pm_message_t state)
  
         down(&dev->sem);
  
-       if (dev->class && dev->class->suspend) {
-               suspend_device_dbg(dev, state, "class ");
-               error = dev->class->suspend(dev, state);
-               suspend_report_result(dev->class->suspend, error);
+       if (dev->class) {
+               if (dev->class->pm) {
+                       pm_dev_dbg(dev, state, "class ");
+                       error = pm_op(dev, dev->class->pm, state);
+               } else if (dev->class->suspend) {
+                       pm_dev_dbg(dev, state, "legacy class ");
+                       error = dev->class->suspend(dev, state);
+                       suspend_report_result(dev->class->suspend, error);
+               }
+               if (error)
+                       goto End;
         }
  
-       if (!error && dev->type && dev->type->suspend) {
-               suspend_device_dbg(dev, state, "type ");
-               error = dev->type->suspend(dev, state);
-               suspend_report_result(dev->type->suspend, error);
+       if (dev->type) {
+               if (dev->type->pm) {
+                       pm_dev_dbg(dev, state, "type ");
+                       error = pm_op(dev, dev->type->pm, state);
+               } else if (dev->type->suspend) {
+                       pm_dev_dbg(dev, state, "legacy type ");
+                       error = dev->type->suspend(dev, state);
+                       suspend_report_result(dev->type->suspend, error);
+               }
+               if (error)
+                       goto End;
         }
  
-       if (!error && dev->bus && dev->bus->suspend) {
-               suspend_device_dbg(dev, state, "");
-               error = dev->bus->suspend(dev, state);
-               suspend_report_result(dev->bus->suspend, error);
+       if (dev->bus) {
+               if (dev->bus->pm) {
+                       pm_dev_dbg(dev, state, "");
+                       error = pm_op(dev, &dev->bus->pm->base, state);
+               } else if (dev->bus->suspend) {
+                       pm_dev_dbg(dev, state, "legacy ");
+                       error = dev->bus->suspend(dev, state);
+                       suspend_report_result(dev->bus->suspend, error);
+               }
         }
-
+ End:
         up(&dev->sem);
  
         return error;
@@ -349,67 +648,139 @@ static int suspend_device(struct device *dev, pm_message_t state)
  
  /**
   *     dpm_suspend - Suspend every device.
- *     @state: Power state to put each device in.
- *
- *     Walk the dpm_locked list.  Suspend each device and move it
- *     to the dpm_off list.
+ *     @state: PM transition of the system being carried out.
   *
- *     (For historical reasons, if it returns -EAGAIN, that used to mean
- *     that the device would be called again with interrupts disabled.
- *     These days, we use the "suspend_late()" callback for that, so we
- *     print a warning and consider it an error).
+ *     Execute the appropriate "suspend" callbacks for all devices.
   */
  static int dpm_suspend(pm_message_t state)
  {
+       struct list_head list;
         int error = 0;
  
+       INIT_LIST_HEAD(&list);
         mutex_lock(&dpm_list_mtx);
-       while (!list_empty(&dpm_active)) {
-               struct list_head *entry = dpm_active.prev;
-               struct device *dev = to_device(entry);
+       while (!list_empty(&dpm_list)) {
+               struct device *dev = to_device(dpm_list.prev);
  
-               WARN_ON(dev->parent && dev->parent->power.sleeping);
-
-               dev->power.sleeping = true;
+               get_device(dev);
                 mutex_unlock(&dpm_list_mtx);
+
                 error = suspend_device(dev, state);
+
                 mutex_lock(&dpm_list_mtx);
                 if (error) {
-                       printk(KERN_ERR "Could not suspend device %s: "
-                                       "error %d%s\n",
-                                       kobject_name(&dev->kobj),
-                                       error,
-                                       (error == -EAGAIN ?
-                                       " (please convert to suspend_late)" :
-                                       ""));
-                       dev->power.sleeping = false;
+                       pm_dev_err(dev, state, "", error);
+                       put_device(dev);
                         break;
                 }
+               dev->power.status = DPM_OFF;
                 if (!list_empty(&dev->power.entry))
-                       list_move(&dev->power.entry, &dpm_off);
+                       list_move(&dev->power.entry, &list);
+               put_device(dev);
         }
-       if (!error)
-               all_sleeping = true;
+       list_splice(&list, dpm_list.prev);
         mutex_unlock(&dpm_list_mtx);
+       return error;
+}
+
+/**
+ *     prepare_device - Execute the ->prepare() callback(s) for given device.
+ *     @dev:   Device.
+ *     @state: PM transition of the system being carried out.
+ */
+static int prepare_device(struct device *dev, pm_message_t state)
+{
+       int error = 0;
+
+       down(&dev->sem);
+
+       if (dev->bus && dev->bus->pm && dev->bus->pm->base.prepare) {
+               pm_dev_dbg(dev, state, "preparing ");
+               error = dev->bus->pm->base.prepare(dev);
+               suspend_report_result(dev->bus->pm->base.prepare, error);
+               if (error)
+                       goto End;
+       }
+
+       if (dev->type && dev->type->pm && dev->type->pm->prepare) {
+               pm_dev_dbg(dev, state, "preparing type ");
+               error = dev->type->pm->prepare(dev);
+               suspend_report_result(dev->type->pm->prepare, error);
+               if (error)
+                       goto End;
+       }
+
+       if (dev->class && dev->class->pm && dev->class->pm->prepare) {
+               pm_dev_dbg(dev, state, "preparing class ");
+               error = dev->class->pm->prepare(dev);
+               suspend_report_result(dev->class->pm->prepare, error);
+       }
+ End:
+       up(&dev->sem);
+
+       return error;
+}
+
+/**
+ *     dpm_prepare - Prepare all devices for a PM transition.
+ *     @state: PM transition of the system being carried out.
+ *
+ *     Execute the ->prepare() callback for all devices.
+ */
+static int dpm_prepare(pm_message_t state)
+{
+       struct list_head list;
+       int error = 0;
+
+       INIT_LIST_HEAD(&list);
+       mutex_lock(&dpm_list_mtx);
+       transition_started = true;
+       while (!list_empty(&dpm_list)) {
+               struct device *dev = to_device(dpm_list.next);
+
+               get_device(dev);
+               dev->power.status = DPM_PREPARING;
+               mutex_unlock(&dpm_list_mtx);
  
+               error = prepare_device(dev, state);
+
+               mutex_lock(&dpm_list_mtx);
+               if (error) {
+                       dev->power.status = DPM_ON;
+                       if (error == -EAGAIN) {
+                               put_device(dev);
+                               continue;
+                       }
+                       printk(KERN_ERR "PM: Failed to prepare device %s "
+                               "for power transition: error %d\n",
+                               kobject_name(&dev->kobj), error);
+                       put_device(dev);
+                       break;
+               }
+               dev->power.status = DPM_SUSPENDING;
+               if (!list_empty(&dev->power.entry))
+                       list_move_tail(&dev->power.entry, &list);
+               put_device(dev);
+       }
+       list_splice(&list, &dpm_list);
+       mutex_unlock(&dpm_list_mtx);
         return error;
  }
  
  /**
   *     device_suspend - Save state and stop all devices in system.
- *     @state: new power management state
+ *     @state: PM transition of the system being carried out.
   *
- *     Prevent new devices from being registered, then lock all devices
- *     and suspend them.
+ *     Prepare and suspend all devices.
   */
  int device_suspend(pm_message_t state)
  {
         int error;
  
         might_sleep();
-       error = dpm_suspend(state);
-       if (error)
-               device_resume();
+       error = dpm_prepare(state);
+       if (!error)
+               error = dpm_suspend(state);
         return error;
  }
  EXPORT_SYMBOL_GPL(device_suspend);
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h

index a6894f2a4b9960d1a19094fa8b4b7890b9bd0e6d..a3252c0e28878fde258ebac8c10ad7c10cafbb86 100644 (file)
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -4,7 +4,7 @@
   * main.c
   */
  
-extern struct list_head dpm_active;    /* The active device list */
+extern struct list_head dpm_list;      /* The active device list */
  
  static inline struct device *to_device(struct list_head *entry)
  {
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c

index d11f74b038db2039026a6f4a6b1a490e994b51f6..596aeecfdffe4be62c7e6773046e9c23d4dd37bb 100644 (file)
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -6,9 +6,6 @@
  #include <linux/string.h>
  #include "power.h"
  
-int (*platform_enable_wakeup)(struct device *dev, int is_on);
-
-
  /*
   *     wakeup - Report/change current wakeup option for device
   *
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c

index 87a7f1d02578f194c821eb26049cbaafcf46a54c..9b1b20b59e0a7b2dcc61bad05f078813e65f92fe 100644 (file)
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -188,9 +188,9 @@ static int show_file_hash(unsigned int value)
  static int show_dev_hash(unsigned int value)
  {
         int match = 0;
-       struct list_head * entry = dpm_active.prev;
+       struct list_head *entry = dpm_list.prev;
  
-       while (entry != &dpm_active) {
+       while (entry != &dpm_list) {
                 struct device * dev = to_device(entry);
                 unsigned int hash = hash_string(DEVSEED, dev->bus_id, DEVHASH);
                 if (hash == value) {
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig

index 0d1d2133d9bc239fe800a3e64c05ceef6c1e898a..61ad8d639ba39830ee96687d2ef18d3fcba8cad4 100644 (file)
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -433,4 +433,16 @@ config VIRTIO_BLK
           This is the virtual block driver for virtio.  It can be used with
            lguest or QEMU based VMMs (like KVM or Xen).  Say Y or M.
  
+config BLK_DEV_HD
+       bool "Very old hard disk (MFM/RLL/IDE) driver"
+       depends on HAVE_IDE
+       depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN
+       help
+         This is a very old hard disk driver that lacks the enhanced
+         functionality of the newer ones.
+
+         It is required for systems with ancient MFM/RLL/ESDI drives.
+
+         If unsure, say N.
+
  endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile

index 5e584306be9992aae29c39a0c7221bdc82f053c8..204332b29578f7911ba4b73eb808ddfe21ff1246 100644 (file)
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -29,5 +29,6 @@ obj-$(CONFIG_VIRTIO_BLK)      += virtio_blk.o
  obj-$(CONFIG_VIODASD)          += viodasd.o
  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
+obj-$(CONFIG_BLK_DEV_HD)       += hd.o
  
  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += xen-blkfront.o
diff --git a/drivers/block/hd.c b/drivers/block/hd.c

new file mode 100644 (file)

index 0000000..682243b
--- /dev/null
+++ b/drivers/block/hd.c
@@ -0,0 +1,814 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This is the low-level hd interrupt support. It traverses the
+ * request-list, using interrupts to jump between functions. As
+ * all the functions are called within interrupts, we may not
+ * sleep. Special care is recommended.
+ *
+ *  modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ *
+ *  Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ *  in the early extended-partition checks and added DM partitions
+ *
+ *  IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ *  and general streamlining by Mark Lord.
+ *
+ *  Removed 99% of above. Use Mark's ide driver for those options.
+ *  This is now a lightweight ST-506 driver. (Paul Gortmaker)
+ *
+ *  Modified 1995 Russell King for ARM processor.
+ *
+ *  Bugfix: max_sectors must be <= 255 or the wheels tend to come
+ *  off in a hurry once you queue things up - Paul G. 02/2001
+ */
+
+/* Uncomment the following if you want verbose error reports. */
+/* #define VERBOSE_ERRORS */
+
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/genhd.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/blkpg.h>
+#include <linux/hdreg.h>
+
+#define REALLY_SLOW_IO
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#ifdef __arm__
+#undef  HD_IRQ
+#endif
+#include <asm/irq.h>
+#ifdef __arm__
+#define HD_IRQ IRQ_HARDDISK
+#endif
+
+/* Hd controller regster ports */
+
+#define HD_DATA                0x1f0           /* _CTL when writing */
+#define HD_ERROR       0x1f1           /* see err-bits */
+#define HD_NSECTOR     0x1f2           /* nr of sectors to read/write */
+#define HD_SECTOR      0x1f3           /* starting sector */
+#define HD_LCYL                0x1f4           /* starting cylinder */
+#define HD_HCYL                0x1f5           /* high byte of starting cyl */
+#define HD_CURRENT     0x1f6           /* 101dhhhh , d=drive, hhhh=head */
+#define HD_STATUS      0x1f7           /* see status-bits */
+#define HD_FEATURE     HD_ERROR        /* same io address, read=error, write=feature */
+#define HD_PRECOMP     HD_FEATURE      /* obsolete use of this port - predates IDE */
+#define HD_COMMAND     HD_STATUS       /* same io address, read=status, write=cmd */
+
+#define HD_CMD         0x3f6           /* used for resets */
+#define HD_ALTSTATUS   0x3f6           /* same as HD_STATUS but doesn't clear irq */
+
+/* Bits of HD_STATUS */
+#define ERR_STAT               0x01
+#define INDEX_STAT             0x02
+#define ECC_STAT               0x04    /* Corrected error */
+#define DRQ_STAT               0x08
+#define SEEK_STAT              0x10
+#define SERVICE_STAT           SEEK_STAT
+#define WRERR_STAT             0x20
+#define READY_STAT             0x40
+#define BUSY_STAT              0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR               0x01    /* Bad address mark */
+#define TRK0_ERR               0x02    /* couldn't find track 0 */
+#define ABRT_ERR               0x04    /* Command aborted */
+#define MCR_ERR                        0x08    /* media change request */
+#define ID_ERR                 0x10    /* ID field not found */
+#define MC_ERR                 0x20    /* media changed */
+#define ECC_ERR                        0x40    /* Uncorrectable ECC error */
+#define BBD_ERR                        0x80    /* pre-EIDE meaning:  block marked bad */
+#define ICRC_ERR               0x80    /* new meaning:  CRC error during transfer */
+
+static DEFINE_SPINLOCK(hd_lock);
+static struct request_queue *hd_queue;
+
+#define MAJOR_NR HD_MAJOR
+#define QUEUE (hd_queue)
+#define CURRENT elv_next_request(hd_queue)
+
+#define TIMEOUT_VALUE  (6*HZ)
+#define        HD_DELAY        0
+
+#define MAX_ERRORS     16      /* Max read/write errors/sector */
+#define RESET_FREQ      8      /* Reset controller every 8th retry */
+#define RECAL_FREQ      4      /* Recalibrate every 4th retry */
+#define MAX_HD         2
+
+#define STAT_OK                (READY_STAT|SEEK_STAT)
+#define OK_STATUS(s)   (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK)
+
+static void recal_intr(void);
+static void bad_rw_intr(void);
+
+static int reset;
+static int hd_error;
+
+/*
+ *  This struct defines the HD's and their types.
+ */
+struct hd_i_struct {
+       unsigned int head, sect, cyl, wpcom, lzone, ctl;
+       int unit;
+       int recalibrate;
+       int special_op;
+};
+
+#ifdef HD_TYPE
+static struct hd_i_struct hd_info[] = { HD_TYPE };
+static int NR_HD = ARRAY_SIZE(hd_info);
+#else
+static struct hd_i_struct hd_info[MAX_HD];
+static int NR_HD;
+#endif
+
+static struct gendisk *hd_gendisk[MAX_HD];
+
+static struct timer_list device_timer;
+
+#define TIMEOUT_VALUE (6*HZ)
+
+#define SET_TIMER                                                      \
+       do {                                                            \
+               mod_timer(&device_timer, jiffies + TIMEOUT_VALUE);      \
+       } while (0)
+
+static void (*do_hd)(void) = NULL;
+#define SET_HANDLER(x) \
+if ((do_hd = (x)) != NULL) \
+       SET_TIMER; \
+else \
+       del_timer(&device_timer);
+
+
+#if (HD_DELAY > 0)
+
+#include <asm/i8253.h>
+
+unsigned long last_req;
+
+unsigned long read_timer(void)
+{
+       unsigned long t, flags;
+       int i;
+
+       spin_lock_irqsave(&i8253_lock, flags);
+       t = jiffies * 11932;
+       outb_p(0, 0x43);
+       i = inb_p(0x40);
+       i |= inb(0x40) << 8;
+       spin_unlock_irqrestore(&i8253_lock, flags);
+       return(t - i);
+}
+#endif
+
+static void __init hd_setup(char *str, int *ints)
+{
+       int hdind = 0;
+
+       if (ints[0] != 3)
+               return;
+       if (hd_info[0].head != 0)
+               hdind = 1;
+       hd_info[hdind].head = ints[2];
+       hd_info[hdind].sect = ints[3];
+       hd_info[hdind].cyl = ints[1];
+       hd_info[hdind].wpcom = 0;
+       hd_info[hdind].lzone = ints[1];
+       hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0);
+       NR_HD = hdind+1;
+}
+
+static void dump_status(const char *msg, unsigned int stat)
+{
+       char *name = "hd?";
+       if (CURRENT)
+               name = CURRENT->rq_disk->disk_name;
+
+#ifdef VERBOSE_ERRORS
+       printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
+       if (stat & BUSY_STAT)   printk("Busy ");
+       if (stat & READY_STAT)  printk("DriveReady ");
+       if (stat & WRERR_STAT)  printk("WriteFault ");
+       if (stat & SEEK_STAT)   printk("SeekComplete ");
+       if (stat & DRQ_STAT)    printk("DataRequest ");
+       if (stat & ECC_STAT)    printk("CorrectedError ");
+       if (stat & INDEX_STAT)  printk("Index ");
+       if (stat & ERR_STAT)    printk("Error ");
+       printk("}\n");
+       if ((stat & ERR_STAT) == 0) {
+               hd_error = 0;
+       } else {
+               hd_error = inb(HD_ERROR);
+               printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff);
+               if (hd_error & BBD_ERR)         printk("BadSector ");
+               if (hd_error & ECC_ERR)         printk("UncorrectableError ");
+               if (hd_error & ID_ERR)          printk("SectorIdNotFound ");
+               if (hd_error & ABRT_ERR)        printk("DriveStatusError ");
+               if (hd_error & TRK0_ERR)        printk("TrackZeroNotFound ");
+               if (hd_error & MARK_ERR)        printk("AddrMarkNotFound ");
+               printk("}");
+               if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
+                       printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
+                               inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
+                       if (CURRENT)
+                               printk(", sector=%ld", CURRENT->sector);
+               }
+               printk("\n");
+       }
+#else
+       printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff);
+       if ((stat & ERR_STAT) == 0) {
+               hd_error = 0;
+       } else {
+               hd_error = inb(HD_ERROR);
+               printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff);
+       }
+#endif
+}
+
+static void check_status(void)
+{
+       int i = inb_p(HD_STATUS);
+
+       if (!OK_STATUS(i)) {
+               dump_status("check_status", i);
+               bad_rw_intr();
+       }
+}
+
+static int controller_busy(void)
+{
+       int retries = 100000;
+       unsigned char status;
+
+       do {
+               status = inb_p(HD_STATUS);
+       } while ((status & BUSY_STAT) && --retries);
+       return status;
+}
+
+static int status_ok(void)
+{
+       unsigned char status = inb_p(HD_STATUS);
+
+       if (status & BUSY_STAT)
+               return 1;       /* Ancient, but does it make sense??? */
+       if (status & WRERR_STAT)
+               return 0;
+       if (!(status & READY_STAT))
+               return 0;
+       if (!(status & SEEK_STAT))
+               return 0;
+       return 1;
+}
+
+static int controller_ready(unsigned int drive, unsigned int head)
+{
+       int retry = 100;
+
+       do {
+               if (controller_busy() & BUSY_STAT)
+                       return 0;
+               outb_p(0xA0 | (drive<<4) | head, HD_CURRENT);
+               if (status_ok())
+                       return 1;
+       } while (--retry);
+       return 0;
+}
+
+static void hd_out(struct hd_i_struct *disk,
+                  unsigned int nsect,
+                  unsigned int sect,
+                  unsigned int head,
+                  unsigned int cyl,
+                  unsigned int cmd,
+                  void (*intr_addr)(void))
+{
+       unsigned short port;
+
+#if (HD_DELAY > 0)
+       while (read_timer() - last_req < HD_DELAY)
+               /* nothing */;
+#endif
+       if (reset)
+               return;
+       if (!controller_ready(disk->unit, head)) {
+               reset = 1;
+               return;
+       }
+       SET_HANDLER(intr_addr);
+       outb_p(disk->ctl, HD_CMD);
+       port = HD_DATA;
+       outb_p(disk->wpcom >> 2, ++port);
+       outb_p(nsect, ++port);
+       outb_p(sect, ++port);
+       outb_p(cyl, ++port);
+       outb_p(cyl >> 8, ++port);
+       outb_p(0xA0 | (disk->unit << 4) | head, ++port);
+       outb_p(cmd, ++port);
+}
+
+static void hd_request (void);
+
+static int drive_busy(void)
+{
+       unsigned int i;
+       unsigned char c;
+
+       for (i = 0; i < 500000 ; i++) {
+               c = inb_p(HD_STATUS);
+               if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK)
+                       return 0;
+       }
+       dump_status("reset timed out", c);
+       return 1;
+}
+
+static void reset_controller(void)
+{
+       int     i;
+
+       outb_p(4, HD_CMD);
+       for (i = 0; i < 1000; i++) barrier();
+       outb_p(hd_info[0].ctl & 0x0f, HD_CMD);
+       for (i = 0; i < 1000; i++) barrier();
+       if (drive_busy())
+               printk("hd: controller still busy\n");
+       else if ((hd_error = inb(HD_ERROR)) != 1)
+               printk("hd: controller reset failed: %02x\n", hd_error);
+}
+
+static void reset_hd(void)
+{
+       static int i;
+
+repeat:
+       if (reset) {
+               reset = 0;
+               i = -1;
+               reset_controller();
+       } else {
+               check_status();
+               if (reset)
+                       goto repeat;
+       }
+       if (++i < NR_HD) {
+               struct hd_i_struct *disk = &hd_info[i];
+               disk->special_op = disk->recalibrate = 1;
+               hd_out(disk, disk->sect, disk->sect, disk->head-1,
+                       disk->cyl, WIN_SPECIFY, &reset_hd);
+               if (reset)
+                       goto repeat;
+       } else
+               hd_request();
+}
+
+/*
+ * Ok, don't know what to do with the unexpected interrupts: on some machines
+ * doing a reset and a retry seems to result in an eternal loop. Right now I
+ * ignore it, and just set the timeout.
+ *
+ * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
+ * drive enters "idle", "standby", or "sleep" mode, so if the status looks
+ * "good", we just ignore the interrupt completely.
+ */
+static void unexpected_hd_interrupt(void)
+{
+       unsigned int stat = inb_p(HD_STATUS);
+
+       if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) {
+               dump_status("unexpected interrupt", stat);
+               SET_TIMER;
+       }
+}
+
+/*
+ * bad_rw_intr() now tries to be a bit smarter and does things
+ * according to the error returned by the controller.
+ * -Mika Liljeberg (liljeber@cs.Helsinki.FI)
+ */
+static void bad_rw_intr(void)
+{
+       struct request *req = CURRENT;
+       if (req != NULL) {
+               struct hd_i_struct *disk = req->rq_disk->private_data;
+               if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
+                       end_request(req, 0);
+                       disk->special_op = disk->recalibrate = 1;
+               } else if (req->errors % RESET_FREQ == 0)
+                       reset = 1;
+               else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0)
+                       disk->special_op = disk->recalibrate = 1;
+               /* Otherwise just retry */
+       }
+}
+
+static inline int wait_DRQ(void)
+{
+       int retries;
+       int stat;
+
+       for (retries = 0; retries < 100000; retries++) {
+               stat = inb_p(HD_STATUS);
+               if (stat & DRQ_STAT)
+                       return 0;
+       }
+       dump_status("wait_DRQ", stat);
+       return -1;
+}
+
+static void read_intr(void)
+{
+       struct request *req;
+       int i, retries = 100000;
+
+       do {
+               i = (unsigned) inb_p(HD_STATUS);
+               if (i & BUSY_STAT)
+                       continue;
+               if (!OK_STATUS(i))
+                       break;
+               if (i & DRQ_STAT)
+                       goto ok_to_read;
+       } while (--retries > 0);
+       dump_status("read_intr", i);
+       bad_rw_intr();
+       hd_request();
+       return;
+ok_to_read:
+       req = CURRENT;
+       insw(HD_DATA, req->buffer, 256);
+       req->sector++;
+       req->buffer += 512;
+       req->errors = 0;
+       i = --req->nr_sectors;
+       --req->current_nr_sectors;
+#ifdef DEBUG
+       printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n",
+               req->rq_disk->disk_name, req->sector, req->nr_sectors,
+               req->buffer+512);
+#endif
+       if (req->current_nr_sectors <= 0)
+               end_request(req, 1);
+       if (i > 0) {
+               SET_HANDLER(&read_intr);
+               return;
+       }
+       (void) inb_p(HD_STATUS);
+#if (HD_DELAY > 0)
+       last_req = read_timer();
+#endif
+       if (elv_next_request(QUEUE))
+               hd_request();
+       return;
+}
+
+static void write_intr(void)
+{
+       struct request *req = CURRENT;
+       int i;
+       int retries = 100000;
+
+       do {
+               i = (unsigned) inb_p(HD_STATUS);
+               if (i & BUSY_STAT)
+                       continue;
+               if (!OK_STATUS(i))
+                       break;
+               if ((req->nr_sectors <= 1) || (i & DRQ_STAT))
+                       goto ok_to_write;
+       } while (--retries > 0);
+       dump_status("write_intr", i);
+       bad_rw_intr();
+       hd_request();
+       return;
+ok_to_write:
+       req->sector++;
+       i = --req->nr_sectors;
+       --req->current_nr_sectors;
+       req->buffer += 512;
+       if (!i || (req->bio && req->current_nr_sectors <= 0))
+               end_request(req, 1);
+       if (i > 0) {
+               SET_HANDLER(&write_intr);
+               outsw(HD_DATA, req->buffer, 256);
+               local_irq_enable();
+       } else {
+#if (HD_DELAY > 0)
+               last_req = read_timer();
+#endif
+               hd_request();
+       }
+       return;
+}
+
+static void recal_intr(void)
+{
+       check_status();
+#if (HD_DELAY > 0)
+       last_req = read_timer();
+#endif
+       hd_request();
+}
+
+/*
+ * This is another of the error-routines I don't know what to do with. The
+ * best idea seems to just set reset, and start all over again.
+ */
+static void hd_times_out(unsigned long dummy)
+{
+       char *name;
+
+       do_hd = NULL;
+
+       if (!CURRENT)
+               return;
+
+       disable_irq(HD_IRQ);
+       local_irq_enable();
+       reset = 1;
+       name = CURRENT->rq_disk->disk_name;
+       printk("%s: timeout\n", name);
+       if (++CURRENT->errors >= MAX_ERRORS) {
+#ifdef DEBUG
+               printk("%s: too many errors\n", name);
+#endif
+               end_request(CURRENT, 0);
+       }
+       local_irq_disable();
+       hd_request();
+       enable_irq(HD_IRQ);
+}
+
+static int do_special_op(struct hd_i_struct *disk, struct request *req)
+{
+       if (disk->recalibrate) {
+               disk->recalibrate = 0;
+               hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr);
+               return reset;
+       }
+       if (disk->head > 16) {
+               printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
+               end_request(req, 0);
+       }
+       disk->special_op = 0;
+       return 1;
+}
+
+/*
+ * The driver enables interrupts as much as possible.  In order to do this,
+ * (a) the device-interrupt is disabled before entering hd_request(),
+ * and (b) the timeout-interrupt is disabled before the sti().
+ *
+ * Interrupts are still masked (by default) whenever we are exchanging
+ * data/cmds with a drive, because some drives seem to have very poor
+ * tolerance for latency during I/O. The IDE driver has support to unmask
+ * interrupts for non-broken hardware, so use that driver if required.
+ */
+static void hd_request(void)
+{
+       unsigned int block, nsect, sec, track, head, cyl;
+       struct hd_i_struct *disk;
+       struct request *req;
+
+       if (do_hd)
+               return;
+repeat:
+       del_timer(&device_timer);
+       local_irq_enable();
+
+       req = CURRENT;
+       if (!req) {
+               do_hd = NULL;
+               return;
+       }
+
+       if (reset) {
+               local_irq_disable();
+               reset_hd();
+               return;
+       }
+       disk = req->rq_disk->private_data;
+       block = req->sector;
+       nsect = req->nr_sectors;
+       if (block >= get_capacity(req->rq_disk) ||
+           ((block+nsect) > get_capacity(req->rq_disk))) {
+               printk("%s: bad access: block=%d, count=%d\n",
+                       req->rq_disk->disk_name, block, nsect);
+               end_request(req, 0);
+               goto repeat;
+       }
+
+       if (disk->special_op) {
+               if (do_special_op(disk, req))
+                       goto repeat;
+               return;
+       }
+       sec   = block % disk->sect + 1;
+       track = block / disk->sect;
+       head  = track % disk->head;
+       cyl   = track / disk->head;
+#ifdef DEBUG
+       printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
+               req->rq_disk->disk_name,
+               req_data_dir(req) == READ ? "read" : "writ",
+               cyl, head, sec, nsect, req->buffer);
+#endif
+       if (blk_fs_request(req)) {
+               switch (rq_data_dir(req)) {
+               case READ:
+                       hd_out(disk, nsect, sec, head, cyl, WIN_READ,
+                               &read_intr);
+                       if (reset)
+                               goto repeat;
+                       break;
+               case WRITE:
+                       hd_out(disk, nsect, sec, head, cyl, WIN_WRITE,
+                               &write_intr);
+                       if (reset)
+                               goto repeat;
+                       if (wait_DRQ()) {
+                               bad_rw_intr();
+                               goto repeat;
+                       }
+                       outsw(HD_DATA, req->buffer, 256);
+                       break;
+               default:
+                       printk("unknown hd-command\n");
+                       end_request(req, 0);
+                       break;
+               }
+       }
+}
+
+static void do_hd_request(struct request_queue *q)
+{
+       disable_irq(HD_IRQ);
+       hd_request();
+       enable_irq(HD_IRQ);
+}
+
+static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct hd_i_struct *disk = bdev->bd_disk->private_data;
+
+       geo->heads = disk->head;
+       geo->sectors = disk->sect;
+       geo->cylinders = disk->cyl;
+       return 0;
+}
+
+/*
+ * Releasing a block device means we sync() it, so that it can safely
+ * be forgotten about...
+ */
+
+static irqreturn_t hd_interrupt(int irq, void *dev_id)
+{
+       void (*handler)(void) = do_hd;
+
+       do_hd = NULL;
+       del_timer(&device_timer);
+       if (!handler)
+               handler = unexpected_hd_interrupt;
+       handler();
+       local_irq_enable();
+       return IRQ_HANDLED;
+}
+
+static struct block_device_operations hd_fops = {
+       .getgeo =       hd_getgeo,
+};
+
+/*
+ * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags
+ * means we run the IRQ-handler with interrupts disabled:  this is bad for
+ * interrupt latency, but anything else has led to problems on some
+ * machines.
+ *
+ * We enable interrupts in some of the routines after making sure it's
+ * safe.
+ */
+
+static int __init hd_init(void)
+{
+       int drive;
+
+       if (register_blkdev(MAJOR_NR, "hd"))
+               return -1;
+
+       hd_queue = blk_init_queue(do_hd_request, &hd_lock);
+       if (!hd_queue) {
+               unregister_blkdev(MAJOR_NR, "hd");
+               return -ENOMEM;
+       }
+
+       blk_queue_max_sectors(hd_queue, 255);
+       init_timer(&device_timer);
+       device_timer.function = hd_times_out;
+       blk_queue_hardsect_size(hd_queue, 512);
+
+       if (!NR_HD) {
+               /*
+                * We don't know anything about the drive.  This means
+                * that you *MUST* specify the drive parameters to the
+                * kernel yourself.
+                *
+                * If we were on an i386, we used to read this info from
+                * the BIOS or CMOS.  This doesn't work all that well,
+                * since this assumes that this is a primary or secondary
+                * drive, and if we're using this legacy driver, it's
+                * probably an auxilliary controller added to recover
+                * legacy data off an ST-506 drive.  Either way, it's
+                * definitely safest to have the user explicitly specify
+                * the information.
+                */
+               printk("hd: no drives specified - use hd=cyl,head,sectors"
+                       " on kernel command line\n");
+               goto out;
+       }
+
+       for (drive = 0 ; drive < NR_HD ; drive++) {
+               struct gendisk *disk = alloc_disk(64);
+               struct hd_i_struct *p = &hd_info[drive];
+               if (!disk)
+                       goto Enomem;
+               disk->major = MAJOR_NR;
+               disk->first_minor = drive << 6;
+               disk->fops = &hd_fops;
+               sprintf(disk->disk_name, "hd%c", 'a'+drive);
+               disk->private_data = p;
+               set_capacity(disk, p->head * p->sect * p->cyl);
+               disk->queue = hd_queue;
+               p->unit = drive;
+               hd_gendisk[drive] = disk;
+               printk("%s: %luMB, CHS=%d/%d/%d\n",
+                       disk->disk_name, (unsigned long)get_capacity(disk)/2048,
+                       p->cyl, p->head, p->sect);
+       }
+
+       if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) {
+               printk("hd: unable to get IRQ%d for the hard disk driver\n",
+                       HD_IRQ);
+               goto out1;
+       }
+       if (!request_region(HD_DATA, 8, "hd")) {
+               printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA);
+               goto out2;
+       }
+       if (!request_region(HD_CMD, 1, "hd(cmd)")) {
+               printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD);
+               goto out3;
+       }
+
+       /* Let them fly */
+       for (drive = 0; drive < NR_HD; drive++)
+               add_disk(hd_gendisk[drive]);
+
+       return 0;
+
+out3:
+       release_region(HD_DATA, 8);
+out2:
+       free_irq(HD_IRQ, NULL);
+out1:
+       for (drive = 0; drive < NR_HD; drive++)
+               put_disk(hd_gendisk[drive]);
+       NR_HD = 0;
+out:
+       del_timer(&device_timer);
+       unregister_blkdev(MAJOR_NR, "hd");
+       blk_cleanup_queue(hd_queue);
+       return -1;
+Enomem:
+       while (drive--)
+               put_disk(hd_gendisk[drive]);
+       goto out;
+}
+
+static int __init parse_hd_setup(char *line)
+{
+       int ints[6];
+
+       (void) get_options(line, ARRAY_SIZE(ints), ints);
+       hd_setup(NULL, ints);
+
+       return 1;
+}
+__setup("hd=", parse_hd_setup);
+
+late_initcall(hd_init);
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c

index da8a1658a273b6253172b0622631796c4af137f0..aaca40283be932811cefe4ed3c1109100d990f81 100644 (file)
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -58,6 +58,55 @@ struct apm_queue {
         apm_event_t             events[APM_MAX_EVENTS];
  };
  
+/*
+ * thread states (for threads using a writable /dev/apm_bios fd):
+ *
+ * SUSPEND_NONE:       nothing happening
+ * SUSPEND_PENDING:    suspend event queued for thread and pending to be read
+ * SUSPEND_READ:       suspend event read, pending acknowledgement
+ * SUSPEND_ACKED:      acknowledgement received from thread (via ioctl),
+ *                     waiting for resume
+ * SUSPEND_ACKTO:      acknowledgement timeout
+ * SUSPEND_DONE:       thread had acked suspend and is now notified of
+ *                     resume
+ *
+ * SUSPEND_WAIT:       this thread invoked suspend and is waiting for resume
+ *
+ * A thread migrates in one of three paths:
+ *     NONE -1-> PENDING -2-> READ -3-> ACKED -4-> DONE -5-> NONE
+ *                                 -6-> ACKTO -7-> NONE
+ *     NONE -8-> WAIT -9-> NONE
+ *
+ * While in PENDING or READ, the thread is accounted for in the
+ * suspend_acks_pending counter.
+ *
+ * The transitions are invoked as follows:
+ *     1: suspend event is signalled from the core PM code
+ *     2: the suspend event is read from the fd by the userspace thread
+ *     3: userspace thread issues the APM_IOC_SUSPEND ioctl (as ack)
+ *     4: core PM code signals that we have resumed
+ *     5: APM_IOC_SUSPEND ioctl returns
+ *
+ *     6: the notifier invoked from the core PM code timed out waiting
+ *        for all relevant threds to enter ACKED state and puts those
+ *        that haven't into ACKTO
+ *     7: those threads issue APM_IOC_SUSPEND ioctl too late,
+ *        get an error
+ *
+ *     8: userspace thread issues the APM_IOC_SUSPEND ioctl (to suspend),
+ *        ioctl code invokes pm_suspend()
+ *     9: pm_suspend() returns indicating resume
+ */
+enum apm_suspend_state {
+       SUSPEND_NONE,
+       SUSPEND_PENDING,
+       SUSPEND_READ,
+       SUSPEND_ACKED,
+       SUSPEND_ACKTO,
+       SUSPEND_WAIT,
+       SUSPEND_DONE,
+};
+
  /*
   * The per-file APM data
   */
@@ -69,13 +118,7 @@ struct apm_user {
         unsigned int            reader: 1;
  
         int                     suspend_result;
-       unsigned int            suspend_state;
-#define SUSPEND_NONE   0               /* no suspend pending */
-#define SUSPEND_PENDING        1               /* suspend pending read */
-#define SUSPEND_READ   2               /* suspend read, pending ack */
-#define SUSPEND_ACKED  3               /* suspend acked */
-#define SUSPEND_WAIT   4               /* waiting for suspend */
-#define SUSPEND_DONE   5               /* suspend completed */
+       enum apm_suspend_state  suspend_state;
  
         struct apm_queue        queue;
  };
@@ -83,7 +126,8 @@ struct apm_user {
  /*
   * Local variables
   */
-static int suspends_pending;
+static atomic_t suspend_acks_pending = ATOMIC_INIT(0);
+static atomic_t userspace_notification_inhibit = ATOMIC_INIT(0);
  static int apm_disabled;
  static struct task_struct *kapmd_tsk;
  
@@ -166,78 +210,6 @@ static void queue_event(apm_event_t event)
         wake_up_interruptible(&apm_waitqueue);
  }
  
-/*
- * queue_suspend_event - queue an APM suspend event.
- *
- * Check that we're in a state where we can suspend.  If not,
- * return -EBUSY.  Otherwise, queue an event to all "writer"
- * users.  If there are no "writer" users, return '1' to
- * indicate that we can immediately suspend.
- */
-static int queue_suspend_event(apm_event_t event, struct apm_user *sender)
-{
-       struct apm_user *as;
-       int ret = 1;
-
-       mutex_lock(&state_lock);
-       down_read(&user_list_lock);
-
-       /*
-        * If a thread is still processing, we can't suspend, so reject
-        * the request.
-        */
-       list_for_each_entry(as, &apm_user_list, list) {
-               if (as != sender && as->reader && as->writer && as->suser &&
-                   as->suspend_state != SUSPEND_NONE) {
-                       ret = -EBUSY;
-                       goto out;
-               }
-       }
-
-       list_for_each_entry(as, &apm_user_list, list) {
-               if (as != sender && as->reader && as->writer && as->suser) {
-                       as->suspend_state = SUSPEND_PENDING;
-                       suspends_pending++;
-                       queue_add_event(&as->queue, event);
-                       ret = 0;
-               }
-       }
- out:
-       up_read(&user_list_lock);
-       mutex_unlock(&state_lock);
-       wake_up_interruptible(&apm_waitqueue);
-       return ret;
-}
-
-static void apm_suspend(void)
-{
-       struct apm_user *as;
-       int err = pm_suspend(PM_SUSPEND_MEM);
-
-       /*
-        * Anyone on the APM queues will think we're still suspended.
-        * Send a message so everyone knows we're now awake again.
-        */
-       queue_event(APM_NORMAL_RESUME);
-
-       /*
-        * Finally, wake up anyone who is sleeping on the suspend.
-        */
-       mutex_lock(&state_lock);
-       down_read(&user_list_lock);
-       list_for_each_entry(as, &apm_user_list, list) {
-               if (as->suspend_state == SUSPEND_WAIT ||
-                   as->suspend_state == SUSPEND_ACKED) {
-                       as->suspend_result = err;
-                       as->suspend_state = SUSPEND_DONE;
-               }
-       }
-       up_read(&user_list_lock);
-       mutex_unlock(&state_lock);
-
-       wake_up(&apm_suspend_waitqueue);
-}
-
  static ssize_t apm_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
  {
         struct apm_user *as = fp->private_data;
@@ -308,25 +280,22 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg)
  
                 as->suspend_result = -EINTR;
  
-               if (as->suspend_state == SUSPEND_READ) {
-                       int pending;
-
+               switch (as->suspend_state) {
+               case SUSPEND_READ:
                         /*
                          * If we read a suspend command from /dev/apm_bios,
                          * then the corresponding APM_IOC_SUSPEND ioctl is
                          * interpreted as an acknowledge.
                          */
                         as->suspend_state = SUSPEND_ACKED;
-                       suspends_pending--;
-                       pending = suspends_pending == 0;
+                       atomic_dec(&suspend_acks_pending);
                         mutex_unlock(&state_lock);
  
                         /*
-                        * If there are no further acknowledges required,
-                        * suspend the system.
+                        * suspend_acks_pending changed, the notifier needs to
+                        * be woken up for this
                          */
-                       if (pending)
-                               apm_suspend();
+                       wake_up(&apm_suspend_waitqueue);
  
                         /*
                          * Wait for the suspend/resume to complete.  If there
@@ -342,35 +311,21 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg)
                          * try_to_freeze() in freezer_count() will not trigger
                          */
                         freezer_count();
-               } else {
+                       break;
+               case SUSPEND_ACKTO:
+                       as->suspend_result = -ETIMEDOUT;
+                       mutex_unlock(&state_lock);
+                       break;
+               default:
                         as->suspend_state = SUSPEND_WAIT;
                         mutex_unlock(&state_lock);
  
                         /*
                          * Otherwise it is a request to suspend the system.
-                        * Queue an event for all readers, and expect an
-                        * acknowledge from all writers who haven't already
-                        * acknowledged.
-                        */
-                       err = queue_suspend_event(APM_USER_SUSPEND, as);
-                       if (err < 0) {
-                               /*
-                                * Avoid taking the lock here - this
-                                * should be fine.
-                                */
-                               as->suspend_state = SUSPEND_NONE;
-                               break;
-                       }
-
-                       if (err > 0)
-                               apm_suspend();
-
-                       /*
-                        * Wait for the suspend/resume to complete.  If there
-                        * are pending acknowledges, we wait here for them.
+                        * Just invoke pm_suspend(), we'll handle it from
+                        * there via the notifier.
                          */
-                       wait_event_freezable(apm_suspend_waitqueue,
-                                        as->suspend_state == SUSPEND_DONE);
+                       as->suspend_result = pm_suspend(PM_SUSPEND_MEM);
                 }
  
                 mutex_lock(&state_lock);
@@ -386,7 +341,6 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg)
  static int apm_release(struct inode * inode, struct file * filp)
  {
         struct apm_user *as = filp->private_data;
-       int pending = 0;
  
         filp->private_data = NULL;
  
@@ -396,18 +350,15 @@ static int apm_release(struct inode * inode, struct file * filp)
  
         /*
          * We are now unhooked from the chain.  As far as new
-        * events are concerned, we no longer exist.  However, we
-        * need to balance suspends_pending, which means the
-        * possibility of sleeping.
+        * events are concerned, we no longer exist.
          */
         mutex_lock(&state_lock);
-       if (as->suspend_state != SUSPEND_NONE) {
-               suspends_pending -= 1;
-               pending = suspends_pending == 0;
-       }
+       if (as->suspend_state == SUSPEND_PENDING ||
+           as->suspend_state == SUSPEND_READ)
+               atomic_dec(&suspend_acks_pending);
         mutex_unlock(&state_lock);
-       if (pending)
-               apm_suspend();
+
+       wake_up(&apm_suspend_waitqueue);
  
         kfree(as);
         return 0;
@@ -545,7 +496,6 @@ static int kapmd(void *arg)
  {
         do {
                 apm_event_t event;
-               int ret;
  
                 wait_event_interruptible(kapmd_wait,
                                 !queue_empty(&kapmd_queue) || kthread_should_stop());
@@ -570,20 +520,13 @@ static int kapmd(void *arg)
  
                 case APM_USER_SUSPEND:
                 case APM_SYS_SUSPEND:
-                       ret = queue_suspend_event(event, NULL);
-                       if (ret < 0) {
-                               /*
-                                * We were busy.  Try again in 50ms.
-                                */
-                               queue_add_event(&kapmd_queue, event);
-                               msleep(50);
-                       }
-                       if (ret > 0)
-                               apm_suspend();
+                       pm_suspend(PM_SUSPEND_MEM);
                         break;
  
                 case APM_CRITICAL_SUSPEND:
-                       apm_suspend();
+                       atomic_inc(&userspace_notification_inhibit);
+                       pm_suspend(PM_SUSPEND_MEM);
+                       atomic_dec(&userspace_notification_inhibit);
                         break;
                 }
         } while (1);
@@ -591,6 +534,120 @@ static int kapmd(void *arg)
         return 0;
  }
  
+static int apm_suspend_notifier(struct notifier_block *nb,
+                               unsigned long event,
+                               void *dummy)
+{
+       struct apm_user *as;
+       int err;
+
+       /* short-cut emergency suspends */
+       if (atomic_read(&userspace_notification_inhibit))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case PM_SUSPEND_PREPARE:
+               /*
+                * Queue an event to all "writer" users that we want
+                * to suspend and need their ack.
+                */
+               mutex_lock(&state_lock);
+               down_read(&user_list_lock);
+
+               list_for_each_entry(as, &apm_user_list, list) {
+                       if (as->suspend_state != SUSPEND_WAIT && as->reader &&
+                           as->writer && as->suser) {
+                               as->suspend_state = SUSPEND_PENDING;
+                               atomic_inc(&suspend_acks_pending);
+                               queue_add_event(&as->queue, APM_USER_SUSPEND);
+                       }
+               }
+
+               up_read(&user_list_lock);
+               mutex_unlock(&state_lock);
+               wake_up_interruptible(&apm_waitqueue);
+
+               /*
+                * Wait for the the suspend_acks_pending variable to drop to
+                * zero, meaning everybody acked the suspend event (or the
+                * process was killed.)
+                *
+                * If the app won't answer within a short while we assume it
+                * locked up and ignore it.
+                */
+               err = wait_event_interruptible_timeout(
+                       apm_suspend_waitqueue,
+                       atomic_read(&suspend_acks_pending) == 0,
+                       5*HZ);
+
+               /* timed out */
+               if (err == 0) {
+                       /*
+                        * Move anybody who timed out to "ack timeout" state.
+                        *
+                        * We could time out and the userspace does the ACK
+                        * right after we time out but before we enter the
+                        * locked section here, but that's fine.
+                        */
+                       mutex_lock(&state_lock);
+                       down_read(&user_list_lock);
+                       list_for_each_entry(as, &apm_user_list, list) {
+                               if (as->suspend_state == SUSPEND_PENDING ||
+                                   as->suspend_state == SUSPEND_READ) {
+                                       as->suspend_state = SUSPEND_ACKTO;
+                                       atomic_dec(&suspend_acks_pending);
+                               }
+                       }
+                       up_read(&user_list_lock);
+                       mutex_unlock(&state_lock);
+               }
+
+               /* let suspend proceed */
+               if (err >= 0)
+                       return NOTIFY_OK;
+
+               /* interrupted by signal */
+               return NOTIFY_BAD;
+
+       case PM_POST_SUSPEND:
+               /*
+                * Anyone on the APM queues will think we're still suspended.
+                * Send a message so everyone knows we're now awake again.
+                */
+               queue_event(APM_NORMAL_RESUME);
+
+               /*
+                * Finally, wake up anyone who is sleeping on the suspend.
+                */
+               mutex_lock(&state_lock);
+               down_read(&user_list_lock);
+               list_for_each_entry(as, &apm_user_list, list) {
+                       if (as->suspend_state == SUSPEND_ACKED) {
+                               /*
+                                * TODO: maybe grab error code, needs core
+                                * changes to push the error to the notifier
+                                * chain (could use the second parameter if
+                                * implemented)
+                                */
+                               as->suspend_result = 0;
+                               as->suspend_state = SUSPEND_DONE;
+                       }
+               }
+               up_read(&user_list_lock);
+               mutex_unlock(&state_lock);
+
+               wake_up(&apm_suspend_waitqueue);
+               return NOTIFY_OK;
+
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+static struct notifier_block apm_notif_block = {
+       .notifier_call = apm_suspend_notifier,
+};
+
  static int __init apm_init(void)
  {
         int ret;
@@ -604,7 +661,7 @@ static int __init apm_init(void)
         if (IS_ERR(kapmd_tsk)) {
                 ret = PTR_ERR(kapmd_tsk);
                 kapmd_tsk = NULL;
-               return ret;
+               goto out;
         }
         wake_up_process(kapmd_tsk);
  
@@ -613,16 +670,27 @@ static int __init apm_init(void)
  #endif
  
         ret = misc_register(&apm_device);
-       if (ret != 0) {
-               remove_proc_entry("apm", NULL);
-               kthread_stop(kapmd_tsk);
-       }
+       if (ret)
+               goto out_stop;
  
+       ret = register_pm_notifier(&apm_notif_block);
+       if (ret)
+               goto out_unregister;
+
+       return 0;
+
+ out_unregister:
+       misc_deregister(&apm_device);
+ out_stop:
+       remove_proc_entry("apm", NULL);
+       kthread_stop(kapmd_tsk);
+ out:
         return ret;
  }
  
  static void __exit apm_exit(void)
  {
+       unregister_pm_notifier(&apm_notif_block);
         misc_deregister(&apm_device);
         remove_proc_entry("apm", NULL);
  
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig

index cf707c8f08d4c09028e926dd17e2d6335dec9a47..15b09b89588a033b1e6f967b05f5fe8615286e08 100644 (file)
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -98,6 +98,9 @@ if BLK_DEV_IDE
  
  comment "Please see Documentation/ide/ide.txt for help/info on IDE drives"
  
+config IDE_TIMINGS
+       bool
+
  config IDE_ATAPI
         bool
  
@@ -326,6 +329,7 @@ config BLK_DEV_PLATFORM
  config BLK_DEV_CMD640
         tristate "CMD640 chipset bugfix/support"
         depends on X86
+       select IDE_TIMINGS
         ---help---
           The CMD-Technologies CMD640 IDE chip is used on many common 486 and
           Pentium motherboards, usually in combination with a "Neptune" or
@@ -455,6 +459,7 @@ config BLK_DEV_AEC62XX
  
  config BLK_DEV_ALI15X3
         tristate "ALI M15x3 chipset support"
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_PCI
         help
           This driver ensures (U)DMA support for ALI 1533, 1543 and 1543C
@@ -469,6 +474,7 @@ config BLK_DEV_ALI15X3
  config BLK_DEV_AMD74XX
         tristate "AMD and nVidia IDE support"
         depends on !ARM
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_PCI
         help
           This driver adds explicit support for AMD-7xx and AMD-8111 chips
@@ -489,6 +495,7 @@ config BLK_DEV_ATIIXP
  
  config BLK_DEV_CMD64X
         tristate "CMD64{3|6|8|9} chipset support"
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_PCI
         help
           Say Y here if you have an IDE controller which uses any of these
@@ -503,6 +510,7 @@ config BLK_DEV_TRIFLEX
  
  config BLK_DEV_CY82C693
         tristate "CY82C693 chipset support"
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_PCI
         help
           This driver adds detection and support for the CY82C693 chipset
@@ -695,6 +703,7 @@ config BLK_DEV_SIS5513
  config BLK_DEV_SL82C105
         tristate "Winbond SL82c105 support"
         depends on (PPC || ARM)
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_PCI
         help
           If you have a Winbond SL82c105 IDE controller, say Y here to enable
@@ -725,6 +734,7 @@ config BLK_DEV_TRM290
  
  config BLK_DEV_VIA82CXXX
         tristate "VIA82CXXX chipset support"
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_PCI
         help
           This driver adds explicit support for VIA BusMastering IDE chips.
@@ -751,6 +761,7 @@ endif
  config BLK_DEV_IDE_PMAC
         tristate "PowerMac on-board IDE support"
         depends on PPC_PMAC && IDE=y && BLK_DEV_IDE=y
+       select IDE_TIMINGS
         help
           This driver provides support for the on-board IDE controller on
           most of the recent Apple Power Macintoshes and PowerBooks.
@@ -829,13 +840,6 @@ config BLK_DEV_IDE_RAPIDE
           Say Y here if you want to support the Yellowstone RapIDE controller
           manufactured for use with Acorn computers.
  
-config BLK_DEV_IDE_BAST
-       tristate "Simtec BAST / Thorcom VR1000 IDE support"
-       depends on ARM && (ARCH_BAST || MACH_VR1000)
-       help
-         Say Y here if you want to support the onboard IDE channels on the
-         Simtec BAST or the Thorcom VR1000
-
  config IDE_H8300
         tristate "H8300 IDE support"
         depends on H8300
@@ -919,51 +923,12 @@ config BLK_DEV_Q40IDE
  config BLK_DEV_PALMCHIP_BK3710
         tristate "Palmchip bk3710 IDE controller support"
         depends on ARCH_DAVINCI
+       select IDE_TIMINGS
         select BLK_DEV_IDEDMA_SFF
         help
           Say Y here if you want to support the onchip IDE controller on the
           TI DaVinci SoC
  
-
-config BLK_DEV_MPC8xx_IDE
-       tristate "MPC8xx IDE support"
-       depends on 8xx && (LWMON || IVMS8 || IVML24 || TQM8xxL) && IDE=y && BLK_DEV_IDE=y && !PPC_MERGE
-       help
-         This option provides support for IDE on Motorola MPC8xx Systems.
-         Please see 'Type of MPC8xx IDE interface' for details.
-
-         If unsure, say N.
-
-choice
-       prompt "Type of MPC8xx IDE interface"
-       depends on BLK_DEV_MPC8xx_IDE
-       default IDE_8xx_PCCARD
-
-config IDE_8xx_PCCARD
-       bool "8xx_PCCARD"
-       ---help---
-         Select how the IDE devices are connected to the MPC8xx system:
-
-         8xx_PCCARD uses the 8xx internal PCMCIA interface in combination
-         with a PC Card (e.g. ARGOSY portable Hard Disk Adapter),
-         ATA PC Card HDDs or ATA PC Flash Cards (example: TQM8xxL
-         systems)
-
-         8xx_DIRECT is used for directly connected IDE devices using the 8xx
-         internal PCMCIA interface (example: IVMS8 systems)
-
-         EXT_DIRECT is used for IDE devices directly connected to the 8xx
-         bus using some glue logic, but _not_ the 8xx internal
-         PCMCIA interface (example: IDIF860 systems)
-
-config IDE_8xx_DIRECT
-       bool "8xx_DIRECT"
-
-config IDE_EXT_DIRECT
-       bool "EXT_DIRECT"
-
-endchoice
-
  # no isa -> no vlb
  if ISA && (ALPHA || X86 || MIPS)
  
@@ -981,6 +946,7 @@ config BLK_DEV_4DRIVES
  
  config BLK_DEV_ALI14XX
         tristate "ALI M14xx support"
+       select IDE_TIMINGS
         help
           This driver is enabled at runtime using the "ali14xx.probe" kernel
           boot parameter.  It enables support for the secondary IDE interface
@@ -1000,6 +966,7 @@ config BLK_DEV_DTC2278
  
  config BLK_DEV_HT6560B
         tristate "Holtek HT6560B support"
+       select IDE_TIMINGS
         help
           This driver is enabled at runtime using the "ht6560b.probe" kernel
           boot parameter. It enables support for the secondary IDE interface
@@ -1009,6 +976,7 @@ config BLK_DEV_HT6560B
  
  config BLK_DEV_QD65XX
         tristate "QDI QD65xx support"
+       select IDE_TIMINGS
         help
           This driver is enabled at runtime using the "qd65xx.probe" kernel
           boot parameter.  It permits faster I/O speeds to be set.  See the
@@ -1032,30 +1000,4 @@ config BLK_DEV_IDEDMA
  
  endif
  
-config BLK_DEV_HD_ONLY
-       bool "Old hard disk (MFM/RLL/IDE) driver"
-       depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN
-       help
-         There are two drivers for MFM/RLL/IDE hard disks. Most people use
-         the newer enhanced driver, but this old one is still around for two
-         reasons. Some older systems have strange timing problems and seem to
-         work only with the old driver (which itself does not work with some
-         newer systems). The other reason is that the old driver is smaller,
-         since it lacks the enhanced functionality of the new one. This makes
-         it a good choice for systems with very tight memory restrictions, or
-         for systems with only older MFM/RLL/ESDI drives. Choosing the old
-         driver can save 13 KB or so of kernel memory.
-
-         If you want to use this driver together with the new one you have
-         to use "hda=noprobe hdb=noprobe" kernel parameters to prevent the new
-         driver from probing the primary interface.
-
-         If you are unsure, then just choose the Enhanced IDE/MFM/RLL driver
-         instead of this one. For more detailed information, read the
-         Disk-HOWTO, available from
-         <http://www.tldp.org/docs.html#howto>.
-
-config BLK_DEV_HD
-       def_bool BLK_DEV_HD_ONLY
-
  endif # IDE
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile

index a2b3f84d710d3a7c22e152b731ea4c0f3b9f71f5..5d414e301a5a9c319a23e9da0463ca0231035e5c 100644 (file)
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -11,9 +11,11 @@
  
  EXTRA_CFLAGS                           += -Idrivers/ide
  
-ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o
+ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o \
+             ide-pio-blacklist.o
  
  # core IDE code
+ide-core-$(CONFIG_IDE_TIMINGS)         += ide-timings.o
  ide-core-$(CONFIG_IDE_ATAPI)           += ide-atapi.o
  ide-core-$(CONFIG_BLK_DEV_IDEPCI)      += setup-pci.o
  ide-core-$(CONFIG_BLK_DEV_IDEDMA)      += ide-dma.o
@@ -59,9 +61,3 @@ ifeq ($(CONFIG_BLK_DEV_PLATFORM), y)
  endif
  
  obj-$(CONFIG_BLK_DEV_IDE)              += arm/ mips/
-
-# old hd driver must be last
-ifeq ($(CONFIG_BLK_DEV_HD), y)
-       hd-core-y += legacy/hd.o
-       obj-y += hd-core.o
-endif
diff --git a/drivers/ide/arm/Makefile b/drivers/ide/arm/Makefile

index 936e7b0237f5499fe0305845679c12a96d025d16..5bc26053afa609c83e05426edc3d999cdef8d3a7 100644 (file)
--- a/drivers/ide/arm/Makefile
+++ b/drivers/ide/arm/Makefile
@@ -1,7 +1,6 @@
  
  obj-$(CONFIG_BLK_DEV_IDE_ICSIDE)       += icside.o
  obj-$(CONFIG_BLK_DEV_IDE_RAPIDE)       += rapide.o
-obj-$(CONFIG_BLK_DEV_IDE_BAST)         += bast-ide.o
  obj-$(CONFIG_BLK_DEV_PALMCHIP_BK3710)  += palm_bk3710.o
  
  ifeq ($(CONFIG_IDE_ARM), m)
diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c

deleted file mode 100644 (file)

index 8e8c281..0000000
--- a/drivers/ide/arm/bast-ide.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2003-2004 Simtec Electronics
- *  Ben Dooks <ben@simtec.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
-*/
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/mach-types.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/arch/map.h>
-#include <asm/arch/bast-map.h>
-#include <asm/arch/bast-irq.h>
-
-#define DRV_NAME "bast-ide"
-
-static int __init bastide_register(unsigned int base, unsigned int aux, int irq)
-{
-       ide_hwif_t *hwif;
-       hw_regs_t hw;
-       int i;
-       u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
-
-       memset(&hw, 0, sizeof(hw));
-
-       base += BAST_IDE_CS;
-       aux  += BAST_IDE_CS;
-
-       for (i = 0; i <= 7; i++) {
-               hw.io_ports_array[i] = (unsigned long)base;
-               base += 0x20;
-       }
-
-       hw.io_ports.ctl_addr = aux + (6 * 0x20);
-       hw.irq = irq;
-       hw.chipset = ide_generic;
-
-       hwif = ide_find_port();
-       if (hwif == NULL)
-               goto out;
-
-       i = hwif->index;
-
-       ide_init_port_data(hwif, i);
-       ide_init_port_hw(hwif, &hw);
-       hwif->port_ops = NULL;
-
-       idx[0] = i;
-
-       ide_device_add(idx, NULL);
-out:
-       return 0;
-}
-
-static int __init bastide_init(void)
-{
-       unsigned long base = BAST_VA_IDEPRI + BAST_IDE_CS;
-
-       /* we can treat the VR1000 and the BAST the same */
-
-       if (!(machine_is_bast() || machine_is_vr1000()))
-               return 0;
-
-       printk("BAST: IDE driver, (c) 2003-2004 Simtec Electronics\n");
-
-       if (!request_mem_region(base, 0x400000, DRV_NAME)) {
-               printk(KERN_ERR "%s: resources busy\n", DRV_NAME);
-               return -EBUSY;
-       }
-
-       bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0);
-       bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1);
-
-       return 0;
-}
-
-module_init(bastide_init);
-
-MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Simtec BAST / Thorcom VR1000 IDE driver");
diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c

index 061456914ca352469ca644df2e83e311ac92d9fb..52f58c88578337a1029265250ca8775b89bf90e4 100644 (file)
--- a/drivers/ide/arm/icside.c
+++ b/drivers/ide/arm/icside.c
@@ -21,6 +21,8 @@
  #include <asm/dma.h>
  #include <asm/ecard.h>
  
+#define DRV_NAME "icside"
+
  #define ICS_IDENT_OFFSET               0x2280
  
  #define ICS_ARCIN_V5_INTRSTAT          0x0000
@@ -68,6 +70,7 @@ struct icside_state {
         unsigned int enabled;
         void __iomem *irq_port;
         void __iomem *ioc_base;
+       unsigned int sel;
         unsigned int type;
         ide_hwif_t *hwif[2];
  };
@@ -165,7 +168,8 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
  static void icside_maskproc(ide_drive_t *drive, int mask)
  {
         ide_hwif_t *hwif = HWIF(drive);
-       struct icside_state *state = hwif->hwif_data;
+       struct expansion_card *ec = ECARD_DEV(hwif->dev);
+       struct icside_state *state = ecard_get_drvdata(ec);
         unsigned long flags;
  
         local_irq_save(flags);
@@ -308,6 +312,7 @@ static int icside_dma_setup(ide_drive_t *drive)
  {
         ide_hwif_t *hwif = HWIF(drive);
         struct expansion_card *ec = ECARD_DEV(hwif->dev);
+       struct icside_state *state = ecard_get_drvdata(ec);
         struct request *rq = hwif->hwgroup->rq;
         unsigned int dma_mode;
  
@@ -331,7 +336,7 @@ static int icside_dma_setup(ide_drive_t *drive)
         /*
          * Route the DMA signals to the correct interface.
          */
-       writeb(hwif->select_data, hwif->config_data);
+       writeb(state->sel | hwif->channel, state->ioc_base);
  
         /*
          * Select the correct timing for this drive.
@@ -359,7 +364,8 @@ static void icside_dma_exec_cmd(ide_drive_t *drive, u8 cmd)
  static int icside_dma_test_irq(ide_drive_t *drive)
  {
         ide_hwif_t *hwif = HWIF(drive);
-       struct icside_state *state = hwif->hwif_data;
+       struct expansion_card *ec = ECARD_DEV(hwif->dev);
+       struct icside_state *state = ecard_get_drvdata(ec);
  
         return readb(state->irq_port +
                      (hwif->channel ?
@@ -411,36 +417,24 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d)
         return -EOPNOTSUPP;
  }
  
-static ide_hwif_t *
-icside_setup(void __iomem *base, struct cardinfo *info, struct expansion_card *ec)
+static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
+                              struct cardinfo *info, struct expansion_card *ec)
  {
         unsigned long port = (unsigned long)base + info->dataoffset;
-       ide_hwif_t *hwif;
  
-       hwif = ide_find_port();
-       if (hwif) {
-               /*
-                * Ensure we're using MMIO
-                */
-               default_hwif_mmiops(hwif);
-
-               hwif->io_ports.data_addr = port;
-               hwif->io_ports.error_addr = port + (1 << info->stepping);
-               hwif->io_ports.nsect_addr = port + (2 << info->stepping);
-               hwif->io_ports.lbal_addr = port + (3 << info->stepping);
-               hwif->io_ports.lbam_addr = port + (4 << info->stepping);
-               hwif->io_ports.lbah_addr = port + (5 << info->stepping);
-               hwif->io_ports.device_addr = port + (6 << info->stepping);
-               hwif->io_ports.status_addr = port + (7 << info->stepping);
-               hwif->io_ports.ctl_addr =
-                       (unsigned long)base + info->ctrloffset;
-               hwif->irq     = ec->irq;
-               hwif->chipset = ide_acorn;
-               hwif->gendev.parent = &ec->dev;
-               hwif->dev = &ec->dev;
-       }
-
-       return hwif;
+       hw->io_ports.data_addr   = port;
+       hw->io_ports.error_addr  = port + (1 << info->stepping);
+       hw->io_ports.nsect_addr  = port + (2 << info->stepping);
+       hw->io_ports.lbal_addr   = port + (3 << info->stepping);
+       hw->io_ports.lbam_addr   = port + (4 << info->stepping);
+       hw->io_ports.lbah_addr   = port + (5 << info->stepping);
+       hw->io_ports.device_addr = port + (6 << info->stepping);
+       hw->io_ports.status_addr = port + (7 << info->stepping);
+       hw->io_ports.ctl_addr    = (unsigned long)base + info->ctrloffset;
+
+       hw->irq = ec->irq;
+       hw->dev = &ec->dev;
+       hw->chipset = ide_acorn;
  }
  
  static int __init
@@ -449,6 +443,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
         ide_hwif_t *hwif;
         void __iomem *base;
         u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
+       hw_regs_t hw;
  
         base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
         if (!base)
@@ -466,12 +461,19 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
          */
         icside_irqdisable_arcin_v5(ec, 0);
  
-       hwif = icside_setup(base, &icside_cardinfo_v5, ec);
+       icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
+
+       hwif = ide_find_port();
         if (!hwif)
                 return -ENODEV;
  
+       ide_init_port_hw(hwif, &hw);
+       default_hwif_mmiops(hwif);
+
         state->hwif[0] = hwif;
  
+       ecard_set_drvdata(ec, state);
+
         idx[0] = hwif->index;
  
         ide_device_add(idx, NULL);
@@ -497,6 +499,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
         int ret;
         u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
         struct ide_port_info d = icside_v6_port_info;
+       hw_regs_t hw[2];
  
         ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
         if (!ioc_base) {
@@ -525,43 +528,47 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
  
         state->irq_port   = easi_base;
         state->ioc_base   = ioc_base;
+       state->sel        = sel;
  
         /*
          * Be on the safe side - disable interrupts
          */
         icside_irqdisable_arcin_v6(ec, 0);
  
+       icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
+       icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
+
         /*
          * Find and register the interfaces.
          */
-       hwif = icside_setup(easi_base, &icside_cardinfo_v6_1, ec);
-       mate = icside_setup(easi_base, &icside_cardinfo_v6_2, ec);
+       hwif = ide_find_port();
+       if (hwif == NULL)
+               return -ENODEV;
  
-       if (!hwif || !mate) {
-               ret = -ENODEV;
-               goto out;
+       ide_init_port_hw(hwif, &hw[0]);
+       default_hwif_mmiops(hwif);
+
+       idx[0] = hwif->index;
+
+       mate = ide_find_port();
+       if (mate) {
+               ide_init_port_hw(mate, &hw[1]);
+               default_hwif_mmiops(mate);
+
+               idx[1] = mate->index;
         }
  
         state->hwif[0]    = hwif;
         state->hwif[1]    = mate;
  
-       hwif->hwif_data   = state;
-       hwif->config_data = (unsigned long)ioc_base;
-       hwif->select_data = sel;
-
-       mate->hwif_data   = state;
-       mate->config_data = (unsigned long)ioc_base;
-       mate->select_data = sel | 1;
+       ecard_set_drvdata(ec, state);
  
-       if (ec->dma != NO_DMA && !request_dma(ec->dma, hwif->name)) {
+       if (ec->dma != NO_DMA && !request_dma(ec->dma, DRV_NAME)) {
                 d.init_dma = icside_dma_init;
                 d.port_ops = &icside_v6_port_ops;
                 d.dma_ops = NULL;
         }
  
-       idx[0] = hwif->index;
-       idx[1] = mate->index;
-
         ide_device_add(idx, &d);
  
         return 0;
@@ -627,10 +634,8 @@ icside_probe(struct expansion_card *ec, const struct ecard_id *id)
                 break;
         }
  
-       if (ret == 0) {
-               ecard_set_drvdata(ec, state);
+       if (ret == 0)
                 goto out;
-       }
  
         kfree(state);
   release:
diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c

index 3839f5722985128bcce2ded6ef7ba1dd15597560..c79b85b6e4a34e82b135cee906842656bc3652a9 100644 (file)
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c
@@ -74,8 +74,6 @@ struct palm_bk3710_udmatiming {
  #define BK3710_IORDYTMP                0x78
  #define BK3710_IORDYTMS                0x7C
  
-#include "../ide-timing.h"
-
  static unsigned ideclk_period; /* in nanoseconds */
  
  static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
@@ -402,7 +400,6 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev)
  
         i = hwif->index;
  
-       ide_init_port_data(hwif, i);
         ide_init_port_hw(hwif, &hw);
  
         default_hwif_mmiops(hwif);
diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c

index 1747b23587758f93c1fc44f69b264f8198ad6def..43057e0303c89016e1098f6b410d6870c8cd5bf0 100644 (file)
--- a/drivers/ide/arm/rapide.c
+++ b/drivers/ide/arm/rapide.c
@@ -11,6 +11,10 @@
  
  #include <asm/ecard.h>
  
+static struct const ide_port_info rapide_port_info = {
+       .host_flags             = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+};
+
  static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
                                void __iomem *ctrl, unsigned int sz, int irq)
  {
@@ -44,25 +48,26 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
                 goto release;
         }
  
-       hwif = ide_find_port();
-       if (hwif) {
-               memset(&hw, 0, sizeof(hw));
-               rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
-               hw.chipset = ide_generic;
-               hw.dev = &ec->dev;
+       memset(&hw, 0, sizeof(hw));
+       rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
+       hw.chipset = ide_generic;
+       hw.dev = &ec->dev;
  
-               ide_init_port_hw(hwif, &hw);
+       hwif = ide_find_port();
+       if (hwif == NULL) {
+               ret = -ENOENT;
+               goto release;
+       }
  
-               hwif->host_flags = IDE_HFLAG_MMIO;
-               default_hwif_mmiops(hwif);
+       ide_init_port_hw(hwif, &hw);
+       default_hwif_mmiops(hwif);
  
-               idx[0] = hwif->index;
+       idx[0] = hwif->index;
  
-               ide_device_add(idx, NULL);
+       ide_device_add(idx, &rapide_port_info);
  
-               ecard_set_drvdata(ec, hwif);
-               goto out;
-       }
+       ecard_set_drvdata(ec, hwif);
+       goto out;
  
   release:
         ecard_release_resources(ec);
diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c

index ae37ee58bae27a474f80fc3e72edbafc6528cad2..20fad6d542cc75178dee5cd0629ad6d890f8a88a 100644 (file)
--- a/drivers/ide/h8300/ide-h8300.c
+++ b/drivers/ide/h8300/ide-h8300.c
@@ -8,6 +8,8 @@
  #include <asm/io.h>
  #include <asm/irq.h>
  
+#define DRV_NAME "ide-h8300"
+
  #define bswap(d) \
  ({                                     \
         u16 r;                          \
@@ -176,6 +178,10 @@ static inline void hwif_setup(ide_hwif_t *hwif)
         hwif->output_data = h8300_output_data;
  }
  
+static const struct ide_port_info h8300_port_info = {
+       .host_flags             = IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA,
+};
+
  static int __init h8300_ide_init(void)
  {
         hw_regs_t hw;
@@ -183,6 +189,8 @@ static int __init h8300_ide_init(void)
         int index;
         u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
  
+       printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
+
         if (!request_region(CONFIG_H8300_IDE_BASE, H8300_IDE_GAP*8, "ide-h8300"))
                 goto out_busy;
         if (!request_region(CONFIG_H8300_IDE_ALT, H8300_IDE_GAP, "ide-h8300")) {
@@ -192,22 +200,17 @@ static int __init h8300_ide_init(void)
  
         hw_setup(&hw);
  
-       hwif = ide_find_port();
-       if (hwif == NULL) {
-               printk(KERN_ERR "ide-h8300: IDE I/F register failed\n");
+       hwif = ide_find_port_slot(&h8300_port_info);
+       if (hwif == NULL)
                 return -ENOENT;
-       }
  
         index = hwif->index;
-       ide_init_port_data(hwif, index);
         ide_init_port_hw(hwif, &hw);
         hwif_setup(hwif);
-       hwif->host_flags = IDE_HFLAG_NO_IO_32BIT;
-       printk(KERN_INFO "ide%d: H8/300 generic IDE interface\n", index);
  
         idx[0] = index;
  
-       ide_device_add(idx, NULL);
+       ide_device_add(idx, &h8300_port_info);
  
         return 0;
  
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c

index d99847157186e9669efd3f56bfdb5eac54de7042..6e29dd5320901c65cfe9f9b985cf28c8997832ac 100644 (file)
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -517,14 +517,9 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
                                                   int xferlen,
                                                   ide_handler_t *handler)
  {
-       ide_startstop_t startstop;
         struct cdrom_info *info = drive->driver_data;
         ide_hwif_t *hwif = drive->hwif;
  
-       /* wait for the controller to be idle */
-       if (ide_wait_stat(&startstop, drive, 0, BUSY_STAT, WAIT_READY))
-               return startstop;
-
         /* FIXME: for Virtual DMA we must check harder */
         if (info->dma)
                 info->dma = !hwif->dma_ops->dma_setup(drive);
@@ -603,28 +598,6 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
         return ide_started;
  }
  
-/*
- * Block read functions.
- */
-static void ide_cd_pad_transfer(ide_drive_t *drive, xfer_func_t *xf, int len)
-{
-       while (len > 0) {
-               int dum = 0;
-               xf(drive, NULL, &dum, sizeof(dum));
-               len -= sizeof(dum);
-       }
-}
-
-static void ide_cd_drain_data(ide_drive_t *drive, int nsects)
-{
-       while (nsects > 0) {
-               static char dum[SECTOR_SIZE];
-
-               drive->hwif->input_data(drive, NULL, dum, sizeof(dum));
-               nsects--;
-       }
-}
-
  /*
   * Check the contents of the interrupt reason register from the cdrom
   * and attempt to recover if there are problems.  Returns  0 if everything's
@@ -640,15 +613,12 @@ static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq,
         if (ireason == (!rw << 1))
                 return 0;
         else if (ireason == (rw << 1)) {
-               ide_hwif_t *hwif = drive->hwif;
-               xfer_func_t *xf;
  
                 /* whoops... */
                 printk(KERN_ERR "%s: %s: wrong transfer direction!\n",
                                 drive->name, __func__);
  
-               xf = rw ? hwif->output_data : hwif->input_data;
-               ide_cd_pad_transfer(drive, xf, len);
+               ide_pad_transfer(drive, rw, len);
         } else  if (rw == 0 && ireason == 1) {
                 /*
                  * Some drives (ASUS) seem to tell us that status info is
@@ -696,16 +666,9 @@ static int ide_cd_check_transfer_size(ide_drive_t *drive, int len)
  
  static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
  
-/*
- * Routine to send a read/write packet command to the drive. This is usually
- * called directly from cdrom_start_{read,write}(). However, for drq_interrupt
- * devices, it is called from an interrupt when the drive is ready to accept
- * the command.
- */
-static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
+static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive,
+                                                struct request *rq)
  {
-       struct request *rq = HWGROUP(drive)->rq;
-
         if (rq_data_dir(rq) == READ) {
                 unsigned short sectors_per_frame =
                         queue_hardsect_size(drive->queue) >> SECTOR_BITS;
@@ -742,6 +705,19 @@ static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
         /* set up the command */
         rq->timeout = ATAPI_WAIT_PC;
  
+       return ide_started;
+}
+
+/*
+ * Routine to send a read/write packet command to the drive. This is usually
+ * called directly from cdrom_start_{read,write}(). However, for drq_interrupt
+ * devices, it is called from an interrupt when the drive is ready to accept
+ * the command.
+ */
+static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
+{
+       struct request *rq = drive->hwif->hwgroup->rq;
+
         /* send the command to the drive and return */
         return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
  }
@@ -768,9 +744,8 @@ static ide_startstop_t cdrom_seek_intr(ide_drive_t *drive)
         return ide_stopped;
  }
  
-static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive)
+static void ide_cd_prepare_seek_request(ide_drive_t *drive, struct request *rq)
  {
-       struct request *rq = HWGROUP(drive)->rq;
         sector_t frame = rq->sector;
  
         sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS);
@@ -780,17 +755,13 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive)
         put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]);
  
         rq->timeout = ATAPI_WAIT_PC;
-       return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr);
  }
  
-static ide_startstop_t cdrom_start_seek(ide_drive_t *drive, unsigned int block)
+static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive)
  {
-       struct cdrom_info *info = drive->driver_data;
+       struct request *rq = drive->hwif->hwgroup->rq;
  
-       info->dma = 0;
-       info->start_seek = jiffies;
-       return cdrom_start_packet_command(drive, 0,
-                                         cdrom_start_seek_continuation);
+       return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr);
  }
  
  /*
@@ -1011,7 +982,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
                                            - bio_cur_sectors(rq->bio),
                                            thislen >> 9);
                         if (nskip > 0) {
-                               ide_cd_drain_data(drive, nskip);
+                               ide_pad_transfer(drive, write, nskip << 9);
                                 rq->current_nr_sectors -= nskip;
                                 thislen -= (nskip << 9);
                         }
@@ -1048,7 +1019,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
                                  * If the buffers are full, pipe the rest into
                                  * oblivion.
                                  */
-                               ide_cd_drain_data(drive, thislen >> 9);
+                               ide_pad_transfer(drive, 0, thislen);
                         else {
                                 printk(KERN_ERR "%s: confused, missing data\n",
                                                 drive->name);
@@ -1096,7 +1067,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
  
         /* pad, if necessary */
         if (!blk_fs_request(rq) && len > 0)
-               ide_cd_pad_transfer(drive, xferfunc, len);
+               ide_pad_transfer(drive, write, len);
  
         if (blk_pc_request(rq)) {
                 timeout = rq->timeout;
@@ -1165,21 +1136,17 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
         if (write)
                 cd->devinfo.media_written = 1;
  
-       /* start sending the read/write request to the drive */
-       return cdrom_start_packet_command(drive, 32768, cdrom_start_rw_cont);
+       return ide_started;
  }
  
  static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
  {
         struct request *rq = HWGROUP(drive)->rq;
  
-       if (!rq->timeout)
-               rq->timeout = ATAPI_WAIT_PC;
-
         return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
  }
  
-static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
+static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
  {
         struct cdrom_info *info = drive->driver_data;
  
@@ -1191,10 +1158,16 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
         info->dma = 0;
  
         /* sg request */
-       if (rq->bio) {
-               int mask = drive->queue->dma_alignment;
-               unsigned long addr =
-                       (unsigned long)page_address(bio_page(rq->bio));
+       if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+               struct request_queue *q = drive->queue;
+               unsigned int alignment;
+               unsigned long addr;
+               unsigned long stack_mask = ~(THREAD_SIZE - 1);
+
+               if (rq->bio)
+                       addr = (unsigned long)bio_data(rq->bio);
+               else
+                       addr = (unsigned long)rq->data;
  
                 info->dma = drive->using_dma;
  
@@ -1204,23 +1177,25 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
                  * NOTE! The "len" and "addr" checks should possibly have
                  * separate masks.
                  */
-               if ((rq->data_len & 15) || (addr & mask))
+               alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+               if (addr & alignment || rq->data_len & alignment)
                         info->dma = 0;
-       }
  
-       /* start sending the command to the drive */
-       return cdrom_start_packet_command(drive, rq->data_len,
-                                         cdrom_do_newpc_cont);
+               if (!((addr & stack_mask) ^
+                     ((unsigned long)current->stack & stack_mask)))
+                       info->dma = 0;
+       }
  }
  
  /*
   * cdrom driver request routine.
   */
-static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq,
+static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
                                         sector_t block)
  {
-       ide_startstop_t action;
         struct cdrom_info *info = drive->driver_data;
+       ide_handler_t *fn;
+       int xferlen;
  
         if (blk_fs_request(rq)) {
                 if (info->cd_flags & IDE_CD_FLAG_SEEKING) {
@@ -1240,29 +1215,48 @@ static ide_startstop_t ide_do_rw_cdrom(ide_drive_t *drive, struct request *rq,
                 }
                 if (rq_data_dir(rq) == READ &&
                     IDE_LARGE_SEEK(info->last_block, block,
-                                  IDECD_SEEK_THRESHOLD) &&
-                   drive->dsc_overlap)
-                       action = cdrom_start_seek(drive, block);
-               else
-                       action = cdrom_start_rw(drive, rq);
+                           IDECD_SEEK_THRESHOLD) &&
+                   drive->dsc_overlap) {
+                       xferlen = 0;
+                       fn = cdrom_start_seek_continuation;
+
+                       info->dma = 0;
+                       info->start_seek = jiffies;
+
+                       ide_cd_prepare_seek_request(drive, rq);
+               } else {
+                       xferlen = 32768;
+                       fn = cdrom_start_rw_cont;
+
+                       if (cdrom_start_rw(drive, rq) == ide_stopped)
+                               return ide_stopped;
+
+                       if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped)
+                               return ide_stopped;
+               }
                 info->last_block = block;
-               return action;
         } else if (blk_sense_request(rq) || blk_pc_request(rq) ||
                    rq->cmd_type == REQ_TYPE_ATA_PC) {
-               return cdrom_do_block_pc(drive, rq);
+               xferlen = rq->data_len;
+               fn = cdrom_do_newpc_cont;
+
+               if (!rq->timeout)
+                       rq->timeout = ATAPI_WAIT_PC;
+
+               cdrom_do_block_pc(drive, rq);
         } else if (blk_special_request(rq)) {
                 /* right now this can only be a reset... */
                 cdrom_end_request(drive, 1);
                 return ide_stopped;
+       } else {
+               blk_dump_rq_flags(rq, "ide-cd bad flags");
+               cdrom_end_request(drive, 0);
+               return ide_stopped;
         }
  
-       blk_dump_rq_flags(rq, "ide-cd bad flags");
-       cdrom_end_request(drive, 0);
-       return ide_stopped;
+       return cdrom_start_packet_command(drive, xferlen, fn);
  }
  
-
-
  /*
   * Ioctl handling.
   *
@@ -1872,6 +1866,7 @@ static int ide_cdrom_setup(ide_drive_t *drive)
  
         blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn);
         blk_queue_dma_alignment(drive->queue, 31);
+       blk_queue_update_dma_pad(drive->queue, 15);
         drive->queue->unplug_delay = (1 * HZ) / 1000;
         if (!drive->queue->unplug_delay)
                 drive->queue->unplug_delay = 1;
@@ -1954,10 +1949,9 @@ static ide_driver_t ide_cdrom_driver = {
         .version                = IDECD_VERSION,
         .media                  = ide_cdrom,
         .supports_dsc_overlap   = 1,
-       .do_request             = ide_do_rw_cdrom,
+       .do_request             = ide_cd_do_request,
         .end_request            = ide_end_request,
         .error                  = __ide_error,
-       .abort                  = __ide_abort,
  #ifdef CONFIG_IDE_PROC_FS
         .proc                   = idecd_proc,
  #endif
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c

index 5f49a4ae9dd80f773ad913eaaa2adf91d6316682..3a2e80237c10f5d6daa1a0f377769428af2ba17d 100644 (file)
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -985,7 +985,6 @@ static ide_driver_t idedisk_driver = {
         .do_request             = ide_do_rw_disk,
         .end_request            = ide_end_request,
         .error                  = __ide_error,
-       .abort                  = __ide_abort,
  #ifdef CONFIG_IDE_PROC_FS
         .proc                   = idedisk_proc,
  #endif
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c

index b3689437269f0586ebdfdebfa3ca53c2cd83a182..011d72011cc45524fcad2d1098de83a0c786b59d 100644 (file)
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -351,10 +351,7 @@ static void ide_floppy_callback(ide_drive_t *drive)
  
  static void idefloppy_init_pc(struct ide_atapi_pc *pc)
  {
-       memset(pc->c, 0, 12);
-       pc->retries = 0;
-       pc->flags = 0;
-       pc->req_xfer = 0;
+       memset(pc, 0, sizeof(*pc));
         pc->buf = pc->pc_buf;
         pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE;
         pc->callback = ide_floppy_callback;
@@ -561,12 +558,6 @@ static void idefloppy_create_start_stop_cmd(struct ide_atapi_pc *pc, int start)
         pc->c[4] = start;
  }
  
-static void idefloppy_create_test_unit_ready_cmd(struct ide_atapi_pc *pc)
-{
-       idefloppy_init_pc(pc);
-       pc->c[0] = GPCMD_TEST_UNIT_READY;
-}
-
  static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
                                     struct ide_atapi_pc *pc, struct request *rq,
                                     unsigned long sector)
@@ -711,10 +702,10 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
         set_disk_ro(floppy->disk, floppy->wp);
         page = &pc.buf[8];
  
-       transfer_rate = be16_to_cpu(*(u16 *)&pc.buf[8 + 2]);
-       sector_size   = be16_to_cpu(*(u16 *)&pc.buf[8 + 6]);
-       cyls          = be16_to_cpu(*(u16 *)&pc.buf[8 + 8]);
-       rpm           = be16_to_cpu(*(u16 *)&pc.buf[8 + 28]);
+       transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]);
+       sector_size   = be16_to_cpup((__be16 *)&pc.buf[8 + 6]);
+       cyls          = be16_to_cpup((__be16 *)&pc.buf[8 + 8]);
+       rpm           = be16_to_cpup((__be16 *)&pc.buf[8 + 28]);
         heads         = pc.buf[8 + 4];
         sectors       = pc.buf[8 + 5];
  
@@ -789,8 +780,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
         for (i = 0; i < desc_cnt; i++) {
                 unsigned int desc_start = 4 + i*8;
  
-               blocks = be32_to_cpu(*(u32 *)&pc.buf[desc_start]);
-               length = be16_to_cpu(*(u16 *)&pc.buf[desc_start + 6]);
+               blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
+               length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
  
                 debug_log("Descriptor %d: %dkB, %d blocks, %d sector size\n",
                                 i, blocks * length / 1024, blocks, length);
@@ -911,8 +902,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
                 if (u_index >= u_array_size)
                         break;  /* User-supplied buffer too small */
  
-               blocks = be32_to_cpu(*(u32 *)&pc.buf[desc_start]);
-               length = be16_to_cpu(*(u16 *)&pc.buf[desc_start + 6]);
+               blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
+               length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
  
                 if (put_user(blocks, argp))
                         return(-EFAULT);
@@ -1138,7 +1129,6 @@ static ide_driver_t idefloppy_driver = {
         .do_request             = idefloppy_do_request,
         .end_request            = idefloppy_end_request,
         .error                  = __ide_error,
-       .abort                  = __ide_abort,
  #ifdef CONFIG_IDE_PROC_FS
         .proc                   = idefloppy_proc,
  #endif
@@ -1166,7 +1156,9 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
                 floppy->flags &= ~IDEFLOPPY_FLAG_FORMAT_IN_PROGRESS;
                 /* Just in case */
  
-               idefloppy_create_test_unit_ready_cmd(&pc);
+               idefloppy_init_pc(&pc);
+               pc.c[0] = GPCMD_TEST_UNIT_READY;
+
                 if (idefloppy_queue_pc_tail(drive, &pc)) {
                         idefloppy_create_start_stop_cmd(&pc, 1);
                         (void) idefloppy_queue_pc_tail(drive, &pc);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c

index 28057747c1f8fcf91b34dfd2aac2a375c3ab09d9..661b75a89d4dc4d2227a09d3ce69819a6132e42e 100644 (file)
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -504,55 +504,6 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
  
  EXPORT_SYMBOL_GPL(ide_error);
  
-ide_startstop_t __ide_abort(ide_drive_t *drive, struct request *rq)
-{
-       if (drive->media != ide_disk)
-               rq->errors |= ERROR_RESET;
-
-       ide_kill_rq(drive, rq);
-
-       return ide_stopped;
-}
-
-EXPORT_SYMBOL_GPL(__ide_abort);
-
-/**
- *     ide_abort       -       abort pending IDE operations
- *     @drive: drive the error occurred on
- *     @msg: message to report
- *
- *     ide_abort kills and cleans up when we are about to do a 
- *     host initiated reset on active commands. Longer term we
- *     want handlers to have sensible abort handling themselves
- *
- *     This differs fundamentally from ide_error because in 
- *     this case the command is doing just fine when we
- *     blow it away.
- */
- 
-ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg)
-{
-       struct request *rq;
-
-       if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
-               return ide_stopped;
-
-       /* retry only "normal" I/O: */
-       if (!blk_fs_request(rq)) {
-               rq->errors = 1;
-               ide_end_drive_cmd(drive, BUSY_STAT, 0);
-               return ide_stopped;
-       }
-
-       if (rq->rq_disk) {
-               ide_driver_t *drv;
-
-               drv = *(ide_driver_t **)rq->rq_disk->private_data;
-               return drv->abort(drive, rq);
-       } else
-               return __ide_abort(drive, rq);
-}
-
  static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
  {
         tf->nsect   = drive->sect;
@@ -766,6 +717,18 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
         return ide_stopped;
  }
  
+static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
+{
+       switch (rq->cmd[0]) {
+       case REQ_DRIVE_RESET:
+               return ide_do_reset(drive);
+       default:
+               blk_dump_rq_flags(rq, "ide_special_rq - bad request");
+               ide_end_request(drive, 0, 0);
+               return ide_stopped;
+       }
+}
+
  static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
  {
         struct request_pm_state *pm = rq->data;
@@ -869,7 +832,16 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
                             pm->pm_step == ide_pm_state_completed)
                                 ide_complete_pm_request(drive, rq);
                         return startstop;
-               }
+               } else if (!rq->rq_disk && blk_special_request(rq))
+                       /*
+                        * TODO: Once all ULDs have been modified to
+                        * check for specific op codes rather than
+                        * blindly accepting any special request, the
+                        * check for ->rq_disk above may be replaced
+                        * by a more suitable mechanism or even
+                        * dropped entirely.
+                        */
+                       return ide_special_rq(drive, rq);
  
                 drv = *(ide_driver_t **)rq->rq_disk->private_data;
                 return drv->do_request(drive, rq, block);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c

index 80ad4f234f3f015a18537c4f65bf891c293bd1ea..44aaec256a30d94b3d07f8a8b39a4283873c60a5 100644 (file)
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -905,6 +905,14 @@ void ide_execute_pkt_cmd(ide_drive_t *drive)
  }
  EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd);
  
+static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+{
+       struct request *rq = drive->hwif->hwgroup->rq;
+
+       if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET)
+               ide_end_request(drive, err ? err : 1, 0);
+}
+
  /* needed below */
  static ide_startstop_t do_reset1 (ide_drive_t *, int);
  
@@ -940,7 +948,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
         }
         /* done polling */
         hwgroup->polling = 0;
-       hwgroup->resetting = 0;
+       ide_complete_drive_reset(drive, 0);
         return ide_stopped;
  }
  
@@ -956,12 +964,14 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
         ide_hwif_t *hwif        = HWIF(drive);
         const struct ide_port_ops *port_ops = hwif->port_ops;
         u8 tmp;
+       int err = 0;
  
         if (port_ops && port_ops->reset_poll) {
-               if (port_ops->reset_poll(drive)) {
+               err = port_ops->reset_poll(drive);
+               if (err) {
                         printk(KERN_ERR "%s: host reset_poll failure for %s.\n",
                                 hwif->name, drive->name);
-                       return ide_stopped;
+                       goto out;
                 }
         }
  
@@ -975,6 +985,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
                 }
                 printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp);
                 drive->failures++;
+               err = -EIO;
         } else  {
                 printk("%s: reset: ", hwif->name);
                 tmp = ide_read_error(drive);
@@ -1001,10 +1012,12 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
                         if (tmp & 0x80)
                                 printk("; slave: failed");
                         printk("\n");
+                       err = -EIO;
                 }
         }
+out:
         hwgroup->polling = 0;   /* done polling */
-       hwgroup->resetting = 0; /* done reset attempt */
+       ide_complete_drive_reset(drive, err);
         return ide_stopped;
  }
  
@@ -1090,7 +1103,6 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
  
         /* For an ATAPI device, first try an ATAPI SRST. */
         if (drive->media != ide_disk && !do_not_try_atapi) {
-               hwgroup->resetting = 1;
                 pre_reset(drive);
                 SELECT_DRIVE(drive);
                 udelay (20);
@@ -1112,10 +1124,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
  
         if (io_ports->ctl_addr == 0) {
                 spin_unlock_irqrestore(&ide_lock, flags);
+               ide_complete_drive_reset(drive, -ENXIO);
                 return ide_stopped;
         }
  
-       hwgroup->resetting = 1;
         /*
          * Note that we also set nIEN while resetting the device,
          * to mask unwanted interrupts from the interface during the reset.
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c

index 47af80df68728c1ec9885fec5ced8ecd38826c3c..13af72f09ec499571d0125ce3784cc8b26baabdd 100644 (file)
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -1,26 +1,11 @@
-#include <linux/module.h>
  #include <linux/types.h>
  #include <linux/string.h>
  #include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
  #include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/blkpg.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
  #include <linux/hdreg.h>
  #include <linux/ide.h>
  #include <linux/bitops.h>
  
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
  static const char *udma_str[] =
          { "UDMA/16", "UDMA/25",  "UDMA/33",  "UDMA/44",
            "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" };
@@ -90,142 +75,6 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed)
         return min(speed, mode);
  }
  
-/*
- * Standard (generic) timings for PIO modes, from ATA2 specification.
- * These timings are for access to the IDE data port register *only*.
- * Some drives may specify a mode, while also specifying a different
- * value for cycle_time (from drive identification data).
- */
-const ide_pio_timings_t ide_pio_timings[6] = {
-       { 70,   165,    600 },  /* PIO Mode 0 */
-       { 50,   125,    383 },  /* PIO Mode 1 */
-       { 30,   100,    240 },  /* PIO Mode 2 */
-       { 30,   80,     180 },  /* PIO Mode 3 with IORDY */
-       { 25,   70,     120 },  /* PIO Mode 4 with IORDY */
-       { 20,   50,     100 }   /* PIO Mode 5 with IORDY (nonstandard) */
-};
-
-EXPORT_SYMBOL_GPL(ide_pio_timings);
-
-/*
- * Shared data/functions for determining best PIO mode for an IDE drive.
- * Most of this stuff originally lived in cmd640.c, and changes to the
- * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid
- * breaking the fragile cmd640.c support.
- */
-
-/*
- * Black list. Some drives incorrectly report their maximal PIO mode,
- * at least in respect to CMD640. Here we keep info on some known drives.
- */
-static struct ide_pio_info {
-       const char      *name;
-       int             pio;
-} ide_pio_blacklist [] = {
-       { "Conner Peripherals 540MB - CFS540A", 3 },
-
-       { "WDC AC2700",  3 },
-       { "WDC AC2540",  3 },
-       { "WDC AC2420",  3 },
-       { "WDC AC2340",  3 },
-       { "WDC AC2250",  0 },
-       { "WDC AC2200",  0 },
-       { "WDC AC21200", 4 },
-       { "WDC AC2120",  0 },
-       { "WDC AC2850",  3 },
-       { "WDC AC1270",  3 },
-       { "WDC AC1170",  1 },
-       { "WDC AC1210",  1 },
-       { "WDC AC280",   0 },
-       { "WDC AC31000", 3 },
-       { "WDC AC31200", 3 },
-
-       { "Maxtor 7131 AT", 1 },
-       { "Maxtor 7171 AT", 1 },
-       { "Maxtor 7213 AT", 1 },
-       { "Maxtor 7245 AT", 1 },
-       { "Maxtor 7345 AT", 1 },
-       { "Maxtor 7546 AT", 3 },
-       { "Maxtor 7540 AV", 3 },
-
-       { "SAMSUNG SHD-3121A", 1 },
-       { "SAMSUNG SHD-3122A", 1 },
-       { "SAMSUNG SHD-3172A", 1 },
-
-       { "ST5660A",  3 },
-       { "ST3660A",  3 },
-       { "ST3630A",  3 },
-       { "ST3655A",  3 },
-       { "ST3391A",  3 },
-       { "ST3390A",  1 },
-       { "ST3600A",  1 },
-       { "ST3290A",  0 },
-       { "ST3144A",  0 },
-       { "ST3491A",  1 },      /* reports 3, should be 1 or 2 (depending on */ 
-                               /* drive) according to Seagates FIND-ATA program */
-
-       { "QUANTUM ELS127A", 0 },
-       { "QUANTUM ELS170A", 0 },
-       { "QUANTUM LPS240A", 0 },
-       { "QUANTUM LPS210A", 3 },
-       { "QUANTUM LPS270A", 3 },
-       { "QUANTUM LPS365A", 3 },
-       { "QUANTUM LPS540A", 3 },
-       { "QUANTUM LIGHTNING 540A", 3 },
-       { "QUANTUM LIGHTNING 730A", 3 },
-
-        { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */
-        { "QUANTUM FIREBALL_640", 3 }, 
-        { "QUANTUM FIREBALL_1080", 3 },
-        { "QUANTUM FIREBALL_1280", 3 },
-       { NULL, 0 }
-};
-
-/**
- *     ide_scan_pio_blacklist  -       check for a blacklisted drive
- *     @model: Drive model string
- *
- *     This routine searches the ide_pio_blacklist for an entry
- *     matching the start/whole of the supplied model name.
- *
- *     Returns -1 if no match found.
- *     Otherwise returns the recommended PIO mode from ide_pio_blacklist[].
- */
-
-static int ide_scan_pio_blacklist (char *model)
-{
-       struct ide_pio_info *p;
-
-       for (p = ide_pio_blacklist; p->name != NULL; p++) {
-               if (strncmp(p->name, model, strlen(p->name)) == 0)
-                       return p->pio;
-       }
-       return -1;
-}
-
-unsigned int ide_pio_cycle_time(ide_drive_t *drive, u8 pio)
-{
-       struct hd_driveid *id = drive->id;
-       int cycle_time = 0;
-
-       if (id->field_valid & 2) {
-               if (id->capability & 8)
-                       cycle_time = id->eide_pio_iordy;
-               else
-                       cycle_time = id->eide_pio;
-       }
-
-       /* conservative "downgrade" for all pre-ATA2 drives */
-       if (pio < 3) {
-               if (cycle_time && cycle_time < ide_pio_timings[pio].cycle_time)
-                       cycle_time = 0; /* use standard timing */
-       }
-
-       return cycle_time ? cycle_time : ide_pio_timings[pio].cycle_time;
-}
-
-EXPORT_SYMBOL_GPL(ide_pio_cycle_time);
-
  /**
   *     ide_get_best_pio_mode   -       get PIO mode from drive
   *     @drive: drive to consider
diff --git a/drivers/ide/ide-pio-blacklist.c b/drivers/ide/ide-pio-blacklist.c

new file mode 100644 (file)

index 0000000..a8c2c8f
--- /dev/null
+++ b/drivers/ide/ide-pio-blacklist.c
@@ -0,0 +1,94 @@
+/*
+ * PIO blacklist.  Some drives incorrectly report their maximal PIO mode,
+ * at least in respect to CMD640.  Here we keep info on some known drives.
+ *
+ * Changes to the ide_pio_blacklist[] should be made with EXTREME CAUTION
+ * to avoid breaking the fragile cmd640.c support.
+ */
+
+#include <linux/string.h>
+
+static struct ide_pio_info {
+       const char      *name;
+       int             pio;
+} ide_pio_blacklist [] = {
+       { "Conner Peripherals 540MB - CFS540A", 3 },
+
+       { "WDC AC2700",  3 },
+       { "WDC AC2540",  3 },
+       { "WDC AC2420",  3 },
+       { "WDC AC2340",  3 },
+       { "WDC AC2250",  0 },
+       { "WDC AC2200",  0 },
+       { "WDC AC21200", 4 },
+       { "WDC AC2120",  0 },
+       { "WDC AC2850",  3 },
+       { "WDC AC1270",  3 },
+       { "WDC AC1170",  1 },
+       { "WDC AC1210",  1 },
+       { "WDC AC280",   0 },
+       { "WDC AC31000", 3 },
+       { "WDC AC31200", 3 },
+
+       { "Maxtor 7131 AT", 1 },
+       { "Maxtor 7171 AT", 1 },
+       { "Maxtor 7213 AT", 1 },
+       { "Maxtor 7245 AT", 1 },
+       { "Maxtor 7345 AT", 1 },
+       { "Maxtor 7546 AT", 3 },
+       { "Maxtor 7540 AV", 3 },
+
+       { "SAMSUNG SHD-3121A", 1 },
+       { "SAMSUNG SHD-3122A", 1 },
+       { "SAMSUNG SHD-3172A", 1 },
+
+       { "ST5660A",  3 },
+       { "ST3660A",  3 },
+       { "ST3630A",  3 },
+       { "ST3655A",  3 },
+       { "ST3391A",  3 },
+       { "ST3390A",  1 },
+       { "ST3600A",  1 },
+       { "ST3290A",  0 },
+       { "ST3144A",  0 },
+       { "ST3491A",  1 }, /* reports 3, should be 1 or 2 (depending on drive)
+                             according to Seagate's FIND-ATA program */
+
+       { "QUANTUM ELS127A", 0 },
+       { "QUANTUM ELS170A", 0 },
+       { "QUANTUM LPS240A", 0 },
+       { "QUANTUM LPS210A", 3 },
+       { "QUANTUM LPS270A", 3 },
+       { "QUANTUM LPS365A", 3 },
+       { "QUANTUM LPS540A", 3 },
+       { "QUANTUM LIGHTNING 540A", 3 },
+       { "QUANTUM LIGHTNING 730A", 3 },
+
+       { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */
+       { "QUANTUM FIREBALL_640", 3 },
+       { "QUANTUM FIREBALL_1080", 3 },
+       { "QUANTUM FIREBALL_1280", 3 },
+       { NULL, 0 }
+};
+
+/**
+ *     ide_scan_pio_blacklist  -       check for a blacklisted drive
+ *     @model: Drive model string
+ *
+ *     This routine searches the ide_pio_blacklist for an entry
+ *     matching the start/whole of the supplied model name.
+ *
+ *     Returns -1 if no match found.
+ *     Otherwise returns the recommended PIO mode from ide_pio_blacklist[].
+ */
+
+int ide_scan_pio_blacklist(char *model)
+{
+       struct ide_pio_info *p;
+
+       for (p = ide_pio_blacklist; p->name != NULL; p++) {
+               if (strncmp(p->name, model, strlen(p->name)) == 0)
+                       return p->pio;
+       }
+       return -1;
+}
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c

index adbd01784162453968ce3a4c33a035591d97b679..03f2ef5470a3756cfac07a093110ef4dd234a9e0 100644 (file)
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -33,6 +33,8 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
         ide_hwif_t *hwif;
         unsigned long base, ctl;
  
+       printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
+
         if (!(pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && pnp_irq_valid(dev, 0)))
                 return -1;
  
@@ -62,10 +64,8 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
                 u8 index = hwif->index;
                 u8 idx[4] = { index, 0xff, 0xff, 0xff };
  
-               ide_init_port_data(hwif, index);
                 ide_init_port_hw(hwif, &hw);
  
-               printk(KERN_INFO "ide%d: generic PnP IDE interface\n", index);
                 pnp_set_drvdata(dev, hwif);
  
                 ide_device_add(idx, NULL);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c

index d21e51a02c3e9c17579e64cf7a886b66dc77207c..235ebdb29b28a75d76ef3eaac651216c98c382ce 100644 (file)
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -39,6 +39,8 @@
  #include <asm/uaccess.h>
  #include <asm/io.h>
  
+static ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */
+
  /**
   *     generic_id              -       add a generic drive id
   *     @drive: drive to make an ID block for
@@ -1318,10 +1320,10 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
                         drive->unmask = 1;
                 if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS)
                         drive->no_unmask = 1;
-       }
  
-       if (port_ops && port_ops->port_init_devs)
-               port_ops->port_init_devs(hwif);
+               if (port_ops && port_ops->init_dev)
+                       port_ops->init_dev(drive);
+       }
  }
  
  static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
@@ -1473,22 +1475,29 @@ ide_hwif_t *ide_find_port_slot(const struct ide_port_info *d)
                 for (; i < MAX_HWIFS; i++) {
                         hwif = &ide_hwifs[i];
                         if (hwif->chipset == ide_unknown)
-                               return hwif;
+                               goto out_found;
                 }
         } else {
                 for (i = 2; i < MAX_HWIFS; i++) {
                         hwif = &ide_hwifs[i];
                         if (hwif->chipset == ide_unknown)
-                               return hwif;
+                               goto out_found;
                 }
                 for (i = 0; i < 2 && i < MAX_HWIFS; i++) {
                         hwif = &ide_hwifs[i];
                         if (hwif->chipset == ide_unknown)
-                               return hwif;
+                               goto out_found;
                 }
         }
  
+       printk(KERN_ERR "%s: no free slot for interface\n",
+                       d ? d->name : "ide");
+
         return NULL;
+
+out_found:
+       ide_init_port_data(hwif, i);
+       return hwif;
  }
  EXPORT_SYMBOL_GPL(ide_find_port_slot);
  
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c

index f9cf1670e4e18d7a2b55d1c171ff3bbe859db31f..b711ab96e28751c053c105a05371ba073808d0fe 100644 (file)
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2591,7 +2591,6 @@ static ide_driver_t idetape_driver = {
         .do_request             = idetape_do_request,
         .end_request            = idetape_end_request,
         .error                  = __ide_error,
-       .abort                  = __ide_abort,
  #ifdef CONFIG_IDE_PROC_FS
         .proc                   = idetape_proc,
  #endif
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c

index cf55a48a7dd25e5e33f62f5d49da320174c37a30..1fbdb746dc88a761a7edf9a66d49e7f68e66b179 100644 (file)
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -8,28 +8,18 @@
   *  The big the bad and the ugly.
   */
  
-#include <linux/module.h>
  #include <linux/types.h>
  #include <linux/string.h>
  #include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
  #include <linux/sched.h>
  #include <linux/interrupt.h>
-#include <linux/major.h>
  #include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/blkpg.h>
  #include <linux/slab.h>
-#include <linux/pci.h>
  #include <linux/delay.h>
  #include <linux/hdreg.h>
  #include <linux/ide.h>
-#include <linux/bitops.h>
  #include <linux/scatterlist.h>
  
-#include <asm/byteorder.h>
-#include <asm/irq.h>
  #include <asm/uaccess.h>
  #include <asm/io.h>
  
@@ -62,25 +52,6 @@ int taskfile_lib_get_identify (ide_drive_t *drive, u8 *buf)
         return ide_raw_taskfile(drive, &args, buf, 1);
  }
  
-static int inline task_dma_ok(ide_task_t *task)
-{
-       if (blk_fs_request(task->rq) || (task->tf_flags & IDE_TFLAG_FLAGGED))
-               return 1;
-
-       switch (task->tf.command) {
-               case WIN_WRITEDMA_ONCE:
-               case WIN_WRITEDMA:
-               case WIN_WRITEDMA_EXT:
-               case WIN_READDMA_ONCE:
-               case WIN_READDMA:
-               case WIN_READDMA_EXT:
-               case WIN_IDENTIFY_DMA:
-                       return 1;
-       }
-
-       return 0;
-}
-
  static ide_startstop_t task_no_data_intr(ide_drive_t *);
  static ide_startstop_t set_geometry_intr(ide_drive_t *);
  static ide_startstop_t recal_intr(ide_drive_t *);
@@ -139,8 +110,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
                                     WAIT_WORSTCASE, NULL);
                 return ide_started;
         default:
-               if (task_dma_ok(task) == 0 || drive->using_dma == 0 ||
-                   dma_ops->dma_setup(drive))
+               if (drive->using_dma == 0 || dma_ops->dma_setup(drive))
                         return ide_stopped;
                 dma_ops->dma_exec_cmd(drive, tf->command);
                 dma_ops->dma_start(drive);
@@ -183,7 +153,6 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive)
         if (stat & (ERR_STAT|DRQ_STAT))
                 return ide_error(drive, "set_geometry_intr", stat);
  
-       BUG_ON(HWGROUP(drive)->handler != NULL);
         ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL);
         return ide_started;
  }
diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h

deleted file mode 100644 (file)

index 2e91c58..0000000
--- a/drivers/ide/ide-timing.h
+++ /dev/null
@@ -1,217 +0,0 @@
-#ifndef _IDE_TIMING_H
-#define _IDE_TIMING_H
-
-/*
- *  Copyright (c) 1999-2001 Vojtech Pavlik
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
- */
-
-#include <linux/kernel.h>
-#include <linux/hdreg.h>
-
-#define XFER_PIO_5             0x0d
-#define XFER_UDMA_SLOW         0x4f
-
-struct ide_timing {
-       short mode;
-       short setup;    /* t1 */
-       short act8b;    /* t2 for 8-bit io */
-       short rec8b;    /* t2i for 8-bit io */
-       short cyc8b;    /* t0 for 8-bit io */
-       short active;   /* t2 or tD */
-       short recover;  /* t2i or tK */
-       short cycle;    /* t0 */
-       short udma;     /* t2CYCTYP/2 */
-};
-
-/*
- * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
- * These were taken from ATA/ATAPI-6 standard, rev 0a, except
- * for PIO 5, which is a nonstandard extension and UDMA6, which
- * is currently supported only by Maxtor drives. 
- */
-
-static struct ide_timing ide_timing[] = {
-
-       { XFER_UDMA_6,     0,   0,   0,   0,   0,   0,   0,  15 },
-       { XFER_UDMA_5,     0,   0,   0,   0,   0,   0,   0,  20 },
-       { XFER_UDMA_4,     0,   0,   0,   0,   0,   0,   0,  30 },
-       { XFER_UDMA_3,     0,   0,   0,   0,   0,   0,   0,  45 },
-
-       { XFER_UDMA_2,     0,   0,   0,   0,   0,   0,   0,  60 },
-       { XFER_UDMA_1,     0,   0,   0,   0,   0,   0,   0,  80 },
-       { XFER_UDMA_0,     0,   0,   0,   0,   0,   0,   0, 120 },
-
-       { XFER_UDMA_SLOW,  0,   0,   0,   0,   0,   0,   0, 150 },
-                                          
-       { XFER_MW_DMA_2,  25,   0,   0,   0,  70,  25, 120,   0 },
-       { XFER_MW_DMA_1,  45,   0,   0,   0,  80,  50, 150,   0 },
-       { XFER_MW_DMA_0,  60,   0,   0,   0, 215, 215, 480,   0 },
-                                          
-       { XFER_SW_DMA_2,  60,   0,   0,   0, 120, 120, 240,   0 },
-       { XFER_SW_DMA_1,  90,   0,   0,   0, 240, 240, 480,   0 },
-       { XFER_SW_DMA_0, 120,   0,   0,   0, 480, 480, 960,   0 },
-
-       { XFER_PIO_5,     20,  50,  30, 100,  50,  30, 100,   0 },
-       { XFER_PIO_4,     25,  70,  25, 120,  70,  25, 120,   0 },
-       { XFER_PIO_3,     30,  80,  70, 180,  80,  70, 180,   0 },
-
-       { XFER_PIO_2,     30, 290,  40, 330, 100,  90, 240,   0 },
-       { XFER_PIO_1,     50, 290,  93, 383, 125, 100, 383,   0 },
-       { XFER_PIO_0,     70, 290, 240, 600, 165, 150, 600,   0 },
-
-       { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960,   0 },
-
-       { -1 }
-};
-
-#define IDE_TIMING_SETUP       0x01
-#define IDE_TIMING_ACT8B       0x02
-#define IDE_TIMING_REC8B       0x04
-#define IDE_TIMING_CYC8B       0x08
-#define IDE_TIMING_8BIT                0x0e
-#define IDE_TIMING_ACTIVE      0x10
-#define IDE_TIMING_RECOVER     0x20
-#define IDE_TIMING_CYCLE       0x40
-#define IDE_TIMING_UDMA                0x80
-#define IDE_TIMING_ALL         0xff
-
-#define ENOUGH(v,unit)         (((v)-1)/(unit)+1)
-#define EZ(v,unit)             ((v)?ENOUGH(v,unit):0)
-
-#define XFER_MODE      0xf0
-#define XFER_MWDMA     0x20
-#define XFER_EPIO      0x01
-#define XFER_PIO       0x00
-
-static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q, int T, int UT)
-{
-       q->setup   = EZ(t->setup   * 1000,  T);
-       q->act8b   = EZ(t->act8b   * 1000,  T);
-       q->rec8b   = EZ(t->rec8b   * 1000,  T);
-       q->cyc8b   = EZ(t->cyc8b   * 1000,  T);
-       q->active  = EZ(t->active  * 1000,  T);
-       q->recover = EZ(t->recover * 1000,  T);
-       q->cycle   = EZ(t->cycle   * 1000,  T);
-       q->udma    = EZ(t->udma    * 1000, UT);
-}
-
-static void ide_timing_merge(struct ide_timing *a, struct ide_timing *b, struct ide_timing *m, unsigned int what)
-{
-       if (what & IDE_TIMING_SETUP  ) m->setup   = max(a->setup,   b->setup);
-       if (what & IDE_TIMING_ACT8B  ) m->act8b   = max(a->act8b,   b->act8b);
-       if (what & IDE_TIMING_REC8B  ) m->rec8b   = max(a->rec8b,   b->rec8b);
-       if (what & IDE_TIMING_CYC8B  ) m->cyc8b   = max(a->cyc8b,   b->cyc8b);
-       if (what & IDE_TIMING_ACTIVE ) m->active  = max(a->active,  b->active);
-       if (what & IDE_TIMING_RECOVER) m->recover = max(a->recover, b->recover);
-       if (what & IDE_TIMING_CYCLE  ) m->cycle   = max(a->cycle,   b->cycle);
-       if (what & IDE_TIMING_UDMA   ) m->udma    = max(a->udma,    b->udma);
-}
-
-static struct ide_timing* ide_timing_find_mode(short speed)
-{
-       struct ide_timing *t;
-
-       for (t = ide_timing; t->mode != speed; t++)
-               if (t->mode < 0)
-                       return NULL;
-       return t; 
-}
-
-static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing *t, int T, int UT)
-{
-       struct hd_driveid *id = drive->id;
-       struct ide_timing *s, p;
-
-/*
- * Find the mode.
- */
-
-       if (!(s = ide_timing_find_mode(speed)))
-               return -EINVAL;
-
-/*
- * Copy the timing from the table.
- */
-
-       *t = *s;
-
-/*
- * If the drive is an EIDE drive, it can tell us it needs extended
- * PIO/MWDMA cycle timing.
- */
-
-       if (id && id->field_valid & 2) {        /* EIDE drive */
-
-               memset(&p, 0, sizeof(p));
-
-               switch (speed & XFER_MODE) {
-
-                       case XFER_PIO:
-                               if (speed <= XFER_PIO_2) p.cycle = p.cyc8b = id->eide_pio;
-                                                   else p.cycle = p.cyc8b = id->eide_pio_iordy;
-                               break;
-
-                       case XFER_MWDMA:
-                               p.cycle = id->eide_dma_min;
-                               break;
-               }
-
-               ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B);
-       }
-
-/*
- * Convert the timing to bus clock counts.
- */
-
-       ide_timing_quantize(t, t, T, UT);
-
-/*
- * Even in DMA/UDMA modes we still use PIO access for IDENTIFY, S.M.A.R.T
- * and some other commands. We have to ensure that the DMA cycle timing is
- * slower/equal than the fastest PIO timing.
- */
-
-       if ((speed & XFER_MODE) != XFER_PIO) {
-               u8 pio = ide_get_best_pio_mode(drive, 255, 5);
-               ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT);
-               ide_timing_merge(&p, t, t, IDE_TIMING_ALL);
-       }
-
-/*
- * Lengthen active & recovery time so that cycle time is correct.
- */
-
-       if (t->act8b + t->rec8b < t->cyc8b) {
-               t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
-               t->rec8b = t->cyc8b - t->act8b;
-       }
-
-       if (t->active + t->recover < t->cycle) {
-               t->active += (t->cycle - (t->active + t->recover)) / 2;
-               t->recover = t->cycle - t->active;
-       }
-
-       return 0;
-}
-
-#endif
diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c

new file mode 100644 (file)

index 0000000..8c2f832
--- /dev/null
+++ b/drivers/ide/ide-timings.c
@@ -0,0 +1,205 @@
+/*
+ *  Copyright (c) 1999-2001 Vojtech Pavlik
+ *  Copyright (c) 2007-2008 Bartlomiej Zolnierkiewicz
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Should you need to contact me, the author, you can do so either by
+ * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
+ * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
+ */
+
+#include <linux/kernel.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/module.h>
+
+/*
+ * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
+ * These were taken from ATA/ATAPI-6 standard, rev 0a, except
+ * for PIO 5, which is a nonstandard extension and UDMA6, which
+ * is currently supported only by Maxtor drives.
+ */
+
+static struct ide_timing ide_timing[] = {
+
+       { XFER_UDMA_6,     0,   0,   0,   0,   0,   0,   0,  15 },
+       { XFER_UDMA_5,     0,   0,   0,   0,   0,   0,   0,  20 },
+       { XFER_UDMA_4,     0,   0,   0,   0,   0,   0,   0,  30 },
+       { XFER_UDMA_3,     0,   0,   0,   0,   0,   0,   0,  45 },
+
+       { XFER_UDMA_2,     0,   0,   0,   0,   0,   0,   0,  60 },
+       { XFER_UDMA_1,     0,   0,   0,   0,   0,   0,   0,  80 },
+       { XFER_UDMA_0,     0,   0,   0,   0,   0,   0,   0, 120 },
+
+       { XFER_MW_DMA_2,  25,   0,   0,   0,  70,  25, 120,   0 },
+       { XFER_MW_DMA_1,  45,   0,   0,   0,  80,  50, 150,   0 },
+       { XFER_MW_DMA_0,  60,   0,   0,   0, 215, 215, 480,   0 },
+
+       { XFER_SW_DMA_2,  60,   0,   0,   0, 120, 120, 240,   0 },
+       { XFER_SW_DMA_1,  90,   0,   0,   0, 240, 240, 480,   0 },
+       { XFER_SW_DMA_0, 120,   0,   0,   0, 480, 480, 960,   0 },
+
+       { XFER_PIO_5,     20,  50,  30, 100,  50,  30, 100,   0 },
+       { XFER_PIO_4,     25,  70,  25, 120,  70,  25, 120,   0 },
+       { XFER_PIO_3,     30,  80,  70, 180,  80,  70, 180,   0 },
+
+       { XFER_PIO_2,     30, 290,  40, 330, 100,  90, 240,   0 },
+       { XFER_PIO_1,     50, 290,  93, 383, 125, 100, 383,   0 },
+       { XFER_PIO_0,     70, 290, 240, 600, 165, 150, 600,   0 },
+
+       { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960,   0 },
+
+       { 0xff }
+};
+
+struct ide_timing *ide_timing_find_mode(u8 speed)
+{
+       struct ide_timing *t;
+
+       for (t = ide_timing; t->mode != speed; t++)
+               if (t->mode == 0xff)
+                       return NULL;
+       return t;
+}
+EXPORT_SYMBOL_GPL(ide_timing_find_mode);
+
+u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio)
+{
+       struct hd_driveid *id = drive->id;
+       struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
+       u16 cycle = 0;
+
+       if (id->field_valid & 2) {
+               if (id->capability & 8)
+                       cycle = id->eide_pio_iordy;
+               else
+                       cycle = id->eide_pio;
+
+               /* conservative "downgrade" for all pre-ATA2 drives */
+               if (pio < 3 && cycle < t->cycle)
+                       cycle = 0; /* use standard timing */
+       }
+
+       return cycle ? cycle : t->cycle;
+}
+EXPORT_SYMBOL_GPL(ide_pio_cycle_time);
+
+#define ENOUGH(v, unit)                (((v) - 1) / (unit) + 1)
+#define EZ(v, unit)            ((v) ? ENOUGH(v, unit) : 0)
+
+static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q,
+                               int T, int UT)
+{
+       q->setup   = EZ(t->setup   * 1000,  T);
+       q->act8b   = EZ(t->act8b   * 1000,  T);
+       q->rec8b   = EZ(t->rec8b   * 1000,  T);
+       q->cyc8b   = EZ(t->cyc8b   * 1000,  T);
+       q->active  = EZ(t->active  * 1000,  T);
+       q->recover = EZ(t->recover * 1000,  T);
+       q->cycle   = EZ(t->cycle   * 1000,  T);
+       q->udma    = EZ(t->udma    * 1000, UT);
+}
+
+void ide_timing_merge(struct ide_timing *a, struct ide_timing *b,
+                     struct ide_timing *m, unsigned int what)
+{
+       if (what & IDE_TIMING_SETUP)
+               m->setup   = max(a->setup,   b->setup);
+       if (what & IDE_TIMING_ACT8B)
+               m->act8b   = max(a->act8b,   b->act8b);
+       if (what & IDE_TIMING_REC8B)
+               m->rec8b   = max(a->rec8b,   b->rec8b);
+       if (what & IDE_TIMING_CYC8B)
+               m->cyc8b   = max(a->cyc8b,   b->cyc8b);
+       if (what & IDE_TIMING_ACTIVE)
+               m->active  = max(a->active,  b->active);
+       if (what & IDE_TIMING_RECOVER)
+               m->recover = max(a->recover, b->recover);
+       if (what & IDE_TIMING_CYCLE)
+               m->cycle   = max(a->cycle,   b->cycle);
+       if (what & IDE_TIMING_UDMA)
+               m->udma    = max(a->udma,    b->udma);
+}
+EXPORT_SYMBOL_GPL(ide_timing_merge);
+
+int ide_timing_compute(ide_drive_t *drive, u8 speed,
+                      struct ide_timing *t, int T, int UT)
+{
+       struct hd_driveid *id = drive->id;
+       struct ide_timing *s, p;
+
+       /*
+        * Find the mode.
+        */
+       s = ide_timing_find_mode(speed);
+       if (s == NULL)
+               return -EINVAL;
+
+       /*
+        * Copy the timing from the table.
+        */
+       *t = *s;
+
+       /*
+        * If the drive is an EIDE drive, it can tell us it needs extended
+        * PIO/MWDMA cycle timing.
+        */
+       if (id && id->field_valid & 2) {        /* EIDE drive */
+
+               memset(&p, 0, sizeof(p));
+
+               if (speed <= XFER_PIO_2)
+                       p.cycle = p.cyc8b = id->eide_pio;
+               else if (speed <= XFER_PIO_5)
+                       p.cycle = p.cyc8b = id->eide_pio_iordy;
+               else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
+                       p.cycle = id->eide_dma_min;
+
+               ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B);
+       }
+
+       /*
+        * Convert the timing to bus clock counts.
+        */
+       ide_timing_quantize(t, t, T, UT);
+
+       /*
+        * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
+        * S.M.A.R.T and some other commands. We have to ensure that the
+        * DMA cycle timing is slower/equal than the fastest PIO timing.
+        */
+       if (speed >= XFER_SW_DMA_0) {
+               u8 pio = ide_get_best_pio_mode(drive, 255, 5);
+               ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT);
+               ide_timing_merge(&p, t, t, IDE_TIMING_ALL);
+       }
+
+       /*
+        * Lengthen active & recovery time so that cycle time is correct.
+        */
+       if (t->act8b + t->rec8b < t->cyc8b) {
+               t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
+               t->rec8b = t->cyc8b - t->act8b;
+       }
+
+       if (t->active + t->recover < t->cycle) {
+               t->active += (t->cycle - (t->active + t->recover)) / 2;
+               t->recover = t->cycle - t->active;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ide_timing_compute);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c

index 2b8453510e0900211b21ad6d742070fac1556151..d4a6b102a77227d29b4052281704f4053700156d 100644 (file)
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -50,29 +50,16 @@
  #include <linux/types.h>
  #include <linux/string.h>
  #include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
  #include <linux/interrupt.h>
  #include <linux/major.h>
  #include <linux/errno.h>
  #include <linux/genhd.h>
-#include <linux/blkpg.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/pci.h>
-#include <linux/delay.h>
  #include <linux/ide.h>
  #include <linux/completion.h>
-#include <linux/reboot.h>
-#include <linux/cdrom.h>
-#include <linux/seq_file.h>
  #include <linux/device.h>
-#include <linux/bitops.h>
-
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
  
  
  /* default maximum number of failures */
@@ -91,8 +78,6 @@ DEFINE_MUTEX(ide_cfg_mtx);
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
  EXPORT_SYMBOL(ide_lock);
  
-ide_hwif_t ide_hwifs[MAX_HWIFS];       /* master data repository */
-
  static void ide_port_init_devices_data(ide_hwif_t *);
  
  /*
@@ -121,7 +106,6 @@ void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
  
         ide_port_init_devices_data(hwif);
  }
-EXPORT_SYMBOL_GPL(ide_init_port_data);
  
  static void ide_port_init_devices_data(ide_hwif_t *hwif)
  {
@@ -150,18 +134,6 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
         }
  }
  
-static void __init init_ide_data (void)
-{
-       unsigned int index;
-
-       /* Initialise all interface structures */
-       for (index = 0; index < MAX_HWIFS; ++index) {
-               ide_hwif_t *hwif = &ide_hwifs[index];
-
-               ide_init_port_data(hwif, index);
-       }
-}
-
  void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
  {
         ide_hwgroup_t *hwgroup = hwif->hwgroup;
@@ -312,7 +284,8 @@ void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
         memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
         hwif->irq = hw->irq;
         hwif->chipset = hw->chipset;
-       hwif->gendev.parent = hw->dev;
+       hwif->dev = hw->dev;
+       hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
         hwif->ack_intr = hw->ack_intr;
  }
  EXPORT_SYMBOL_GPL(ide_init_port_hw);
@@ -556,6 +529,22 @@ static int generic_ide_resume(struct device *dev)
         return err;
  }
  
+static int generic_drive_reset(ide_drive_t *drive)
+{
+       struct request *rq;
+       int ret = 0;
+
+       rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_len = 1;
+       rq->cmd[0] = REQ_DRIVE_RESET;
+       rq->cmd_flags |= REQ_SOFTBARRIER;
+       if (blk_execute_rq(drive->queue, NULL, rq, 1))
+               ret = rq->errors;
+       blk_put_request(rq);
+       return ret;
+}
+
  int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev,
                         unsigned int cmd, unsigned long arg)
  {
@@ -630,33 +619,8 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device
                         if (!capable(CAP_SYS_ADMIN))
                                 return -EACCES;
  
-                       /*
-                        *      Abort the current command on the
-                        *      group if there is one, taking
-                        *      care not to allow anything else
-                        *      to be queued and to die on the
-                        *      spot if we miss one somehow
-                        */
-
-                       spin_lock_irqsave(&ide_lock, flags);
-
-                       if (HWGROUP(drive)->resetting) {
-                               spin_unlock_irqrestore(&ide_lock, flags);
-                               return -EBUSY;
-                       }
+                       return generic_drive_reset(drive);
  
-                       ide_abort(drive, "drive reset");
-
-                       BUG_ON(HWGROUP(drive)->handler);
-
-                       /* Ensure nothing gets queued after we
-                          drop the lock. Reset will clear the busy */
-
-                       HWGROUP(drive)->busy = 1;
-                       spin_unlock_irqrestore(&ide_lock, flags);
-                       (void) ide_do_reset(drive);
-
-                       return 0;
                 case HDIO_GET_BUSSTATE:
                         if (!capable(CAP_SYS_ADMIN))
                                 return -EACCES;
@@ -1021,8 +985,6 @@ static int __init ide_init(void)
                 goto out_port_class;
         }
  
-       init_ide_data();
-
         proc_ide_create();
  
         return 0;
diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c

index 052125fafcfa1cb320481c600e5b6129b3e06891..4ec19737f3c5356e88e00403714e0afa482bc3f4 100644 (file)
--- a/drivers/ide/legacy/ali14xx.c
+++ b/drivers/ide/legacy/ali14xx.c
@@ -117,10 +117,11 @@ static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
         u8 param1, param2, param3, param4;
         unsigned long flags;
         int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
+       struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
  
         /* calculate timing, according to PIO mode */
         time1 = ide_pio_cycle_time(drive, pio);
-       time2 = ide_pio_timings[pio].active_time;
+       time2 = t->active;
         param3 = param1 = (time2 * bus_speed + 999) / 1000;
         param4 = param2 = (time1 * bus_speed + 999) / 1000 - param1;
         if (pio < 3) {
diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c

index 9a1d27ef3f8a0c3766df51e3c8444c7a8dcb295c..0497e7f85b09112c5fb39c1e5901e3d4231cb6f4 100644 (file)
--- a/drivers/ide/legacy/buddha.c
+++ b/drivers/ide/legacy/buddha.c
@@ -227,7 +227,6 @@ fail_base2:
                         if (hwif) {
                                 u8 index = hwif->index;
  
-                               ide_init_port_data(hwif, index);
                                 ide_init_port_hw(hwif, &hw);
  
                                 idx[i] = index;
diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c

index af11028b47949afbb207946831f4c2bba78bfd0d..129a812bb57f5f124e4f3d7ef8f43b3356501c9f 100644 (file)
--- a/drivers/ide/legacy/falconide.c
+++ b/drivers/ide/legacy/falconide.c
@@ -111,7 +111,6 @@ static int __init falconide_init(void)
                 u8 index = hwif->index;
                 u8 idx[4] = { index, 0xff, 0xff, 0xff };
  
-               ide_init_port_data(hwif, index);
                 ide_init_port_hw(hwif, &hw);
  
                 /* Atari has a byte-swapped IDE interface */
diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c

index b78941680c320b4542a75d0d894c72e9e53e9098..7e74b20202dff34fc1edfe79c1503e85bb28a13d 100644 (file)
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c
@@ -185,7 +185,6 @@ found:
         if (hwif) {
             u8 index = hwif->index;
  
-           ide_init_port_data(hwif, index);
             ide_init_port_hw(hwif, &hw);
  
             idx[i] = index;
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c

deleted file mode 100644 (file)

index abdedf5..0000000
--- a/drivers/ide/legacy/hd.c
+++ /dev/null
@@ -1,815 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This is the low-level hd interrupt support. It traverses the
- * request-list, using interrupts to jump between functions. As
- * all the functions are called within interrupts, we may not
- * sleep. Special care is recommended.
- *
- *  modified by Drew Eckhardt to check nr of hd's from the CMOS.
- *
- *  Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
- *  in the early extended-partition checks and added DM partitions
- *
- *  IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
- *  and general streamlining by Mark Lord.
- *
- *  Removed 99% of above. Use Mark's ide driver for those options.
- *  This is now a lightweight ST-506 driver. (Paul Gortmaker)
- *
- *  Modified 1995 Russell King for ARM processor.
- *
- *  Bugfix: max_sectors must be <= 255 or the wheels tend to come
- *  off in a hurry once you queue things up - Paul G. 02/2001
- */
-
-/* Uncomment the following if you want verbose error reports. */
-/* #define VERBOSE_ERRORS */
-
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/interrupt.h>
-#include <linux/timer.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/genhd.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/ioport.h>
-#include <linux/mc146818rtc.h> /* CMOS defines */
-#include <linux/init.h>
-#include <linux/blkpg.h>
-#include <linux/hdreg.h>
-
-#define REALLY_SLOW_IO
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
-#ifdef __arm__
-#undef  HD_IRQ
-#endif
-#include <asm/irq.h>
-#ifdef __arm__
-#define HD_IRQ IRQ_HARDDISK
-#endif
-
-/* Hd controller regster ports */
-
-#define HD_DATA                0x1f0           /* _CTL when writing */
-#define HD_ERROR       0x1f1           /* see err-bits */
-#define HD_NSECTOR     0x1f2           /* nr of sectors to read/write */
-#define HD_SECTOR      0x1f3           /* starting sector */
-#define HD_LCYL                0x1f4           /* starting cylinder */
-#define HD_HCYL                0x1f5           /* high byte of starting cyl */
-#define HD_CURRENT     0x1f6           /* 101dhhhh , d=drive, hhhh=head */
-#define HD_STATUS      0x1f7           /* see status-bits */
-#define HD_FEATURE     HD_ERROR        /* same io address, read=error, write=feature */
-#define HD_PRECOMP     HD_FEATURE      /* obsolete use of this port - predates IDE */
-#define HD_COMMAND     HD_STATUS       /* same io address, read=status, write=cmd */
-
-#define HD_CMD         0x3f6           /* used for resets */
-#define HD_ALTSTATUS   0x3f6           /* same as HD_STATUS but doesn't clear irq */
-
-/* Bits of HD_STATUS */
-#define ERR_STAT               0x01
-#define INDEX_STAT             0x02
-#define ECC_STAT               0x04    /* Corrected error */
-#define DRQ_STAT               0x08
-#define SEEK_STAT              0x10
-#define SERVICE_STAT           SEEK_STAT
-#define WRERR_STAT             0x20
-#define READY_STAT             0x40
-#define BUSY_STAT              0x80
-
-/* Bits for HD_ERROR */
-#define MARK_ERR               0x01    /* Bad address mark */
-#define TRK0_ERR               0x02    /* couldn't find track 0 */
-#define ABRT_ERR               0x04    /* Command aborted */
-#define MCR_ERR                        0x08    /* media change request */
-#define ID_ERR                 0x10    /* ID field not found */
-#define MC_ERR                 0x20    /* media changed */
-#define ECC_ERR                        0x40    /* Uncorrectable ECC error */
-#define BBD_ERR                        0x80    /* pre-EIDE meaning:  block marked bad */
-#define ICRC_ERR               0x80    /* new meaning:  CRC error during transfer */
-
-static DEFINE_SPINLOCK(hd_lock);
-static struct request_queue *hd_queue;
-
-#define MAJOR_NR HD_MAJOR
-#define QUEUE (hd_queue)
-#define CURRENT elv_next_request(hd_queue)
-
-#define TIMEOUT_VALUE  (6*HZ)
-#define        HD_DELAY        0
-
-#define MAX_ERRORS     16      /* Max read/write errors/sector */
-#define RESET_FREQ      8      /* Reset controller every 8th retry */
-#define RECAL_FREQ      4      /* Recalibrate every 4th retry */
-#define MAX_HD         2
-
-#define STAT_OK                (READY_STAT|SEEK_STAT)
-#define OK_STATUS(s)   (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK)
-
-static void recal_intr(void);
-static void bad_rw_intr(void);
-
-static int reset;
-static int hd_error;
-
-/*
- *  This struct defines the HD's and their types.
- */
-struct hd_i_struct {
-       unsigned int head, sect, cyl, wpcom, lzone, ctl;
-       int unit;
-       int recalibrate;
-       int special_op;
-};
-
-#ifdef HD_TYPE
-static struct hd_i_struct hd_info[] = { HD_TYPE };
-static int NR_HD = ARRAY_SIZE(hd_info);
-#else
-static struct hd_i_struct hd_info[MAX_HD];
-static int NR_HD;
-#endif
-
-static struct gendisk *hd_gendisk[MAX_HD];
-
-static struct timer_list device_timer;
-
-#define TIMEOUT_VALUE (6*HZ)
-
-#define SET_TIMER                                                      \
-       do {                                                            \
-               mod_timer(&device_timer, jiffies + TIMEOUT_VALUE);      \
-       } while (0)
-
-static void (*do_hd)(void) = NULL;
-#define SET_HANDLER(x) \
-if ((do_hd = (x)) != NULL) \
-       SET_TIMER; \
-else \
-       del_timer(&device_timer);
-
-
-#if (HD_DELAY > 0)
-
-#include <asm/i8253.h>
-
-unsigned long last_req;
-
-unsigned long read_timer(void)
-{
-       unsigned long t, flags;
-       int i;
-
-       spin_lock_irqsave(&i8253_lock, flags);
-       t = jiffies * 11932;
-       outb_p(0, 0x43);
-       i = inb_p(0x40);
-       i |= inb(0x40) << 8;
-       spin_unlock_irqrestore(&i8253_lock, flags);
-       return(t - i);
-}
-#endif
-
-static void __init hd_setup(char *str, int *ints)
-{
-       int hdind = 0;
-
-       if (ints[0] != 3)
-               return;
-       if (hd_info[0].head != 0)
-               hdind = 1;
-       hd_info[hdind].head = ints[2];
-       hd_info[hdind].sect = ints[3];
-       hd_info[hdind].cyl = ints[1];
-       hd_info[hdind].wpcom = 0;
-       hd_info[hdind].lzone = ints[1];
-       hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0);
-       NR_HD = hdind+1;
-}
-
-static void dump_status(const char *msg, unsigned int stat)
-{
-       char *name = "hd?";
-       if (CURRENT)
-               name = CURRENT->rq_disk->disk_name;
-
-#ifdef VERBOSE_ERRORS
-       printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-       if (stat & BUSY_STAT)   printk("Busy ");
-       if (stat & READY_STAT)  printk("DriveReady ");
-       if (stat & WRERR_STAT)  printk("WriteFault ");
-       if (stat & SEEK_STAT)   printk("SeekComplete ");
-       if (stat & DRQ_STAT)    printk("DataRequest ");
-       if (stat & ECC_STAT)    printk("CorrectedError ");
-       if (stat & INDEX_STAT)  printk("Index ");
-       if (stat & ERR_STAT)    printk("Error ");
-       printk("}\n");
-       if ((stat & ERR_STAT) == 0) {
-               hd_error = 0;
-       } else {
-               hd_error = inb(HD_ERROR);
-               printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff);
-               if (hd_error & BBD_ERR)         printk("BadSector ");
-               if (hd_error & ECC_ERR)         printk("UncorrectableError ");
-               if (hd_error & ID_ERR)          printk("SectorIdNotFound ");
-               if (hd_error & ABRT_ERR)        printk("DriveStatusError ");
-               if (hd_error & TRK0_ERR)        printk("TrackZeroNotFound ");
-               if (hd_error & MARK_ERR)        printk("AddrMarkNotFound ");
-               printk("}");
-               if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
-                       printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
-                               inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-                       if (CURRENT)
-                               printk(", sector=%ld", CURRENT->sector);
-               }
-               printk("\n");
-       }
-#else
-       printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff);
-       if ((stat & ERR_STAT) == 0) {
-               hd_error = 0;
-       } else {
-               hd_error = inb(HD_ERROR);
-               printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff);
-       }
-#endif
-}
-
-static void check_status(void)
-{
-       int i = inb_p(HD_STATUS);
-
-       if (!OK_STATUS(i)) {
-               dump_status("check_status", i);
-               bad_rw_intr();
-       }
-}
-
-static int controller_busy(void)
-{
-       int retries = 100000;
-       unsigned char status;
-
-       do {
-               status = inb_p(HD_STATUS);
-       } while ((status & BUSY_STAT) && --retries);
-       return status;
-}
-
-static int status_ok(void)
-{
-       unsigned char status = inb_p(HD_STATUS);
-
-       if (status & BUSY_STAT)
-               return 1;       /* Ancient, but does it make sense??? */
-       if (status & WRERR_STAT)
-               return 0;
-       if (!(status & READY_STAT))
-               return 0;
-       if (!(status & SEEK_STAT))
-               return 0;
-       return 1;
-}
-
-static int controller_ready(unsigned int drive, unsigned int head)
-{
-       int retry = 100;
-
-       do {
-               if (controller_busy() & BUSY_STAT)
-                       return 0;
-               outb_p(0xA0 | (drive<<4) | head, HD_CURRENT);
-               if (status_ok())
-                       return 1;
-       } while (--retry);
-       return 0;
-}
-
-static void hd_out(struct hd_i_struct *disk,
-                  unsigned int nsect,
-                  unsigned int sect,
-                  unsigned int head,
-                  unsigned int cyl,
-                  unsigned int cmd,
-                  void (*intr_addr)(void))
-{
-       unsigned short port;
-
-#if (HD_DELAY > 0)
-       while (read_timer() - last_req < HD_DELAY)
-               /* nothing */;
-#endif
-       if (reset)
-               return;
-       if (!controller_ready(disk->unit, head)) {
-               reset = 1;
-               return;
-       }
-       SET_HANDLER(intr_addr);
-       outb_p(disk->ctl, HD_CMD);
-       port = HD_DATA;
-       outb_p(disk->wpcom >> 2, ++port);
-       outb_p(nsect, ++port);
-       outb_p(sect, ++port);
-       outb_p(cyl, ++port);
-       outb_p(cyl >> 8, ++port);
-       outb_p(0xA0 | (disk->unit << 4) | head, ++port);
-       outb_p(cmd, ++port);
-}
-
-static void hd_request (void);
-
-static int drive_busy(void)
-{
-       unsigned int i;
-       unsigned char c;
-
-       for (i = 0; i < 500000 ; i++) {
-               c = inb_p(HD_STATUS);
-               if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK)
-                       return 0;
-       }
-       dump_status("reset timed out", c);
-       return 1;
-}
-
-static void reset_controller(void)
-{
-       int     i;
-
-       outb_p(4, HD_CMD);
-       for (i = 0; i < 1000; i++) barrier();
-       outb_p(hd_info[0].ctl & 0x0f, HD_CMD);
-       for (i = 0; i < 1000; i++) barrier();
-       if (drive_busy())
-               printk("hd: controller still busy\n");
-       else if ((hd_error = inb(HD_ERROR)) != 1)
-               printk("hd: controller reset failed: %02x\n", hd_error);
-}
-
-static void reset_hd(void)
-{
-       static int i;
-
-repeat:
-       if (reset) {
-               reset = 0;
-               i = -1;
-               reset_controller();
-       } else {
-               check_status();
-               if (reset)
-                       goto repeat;
-       }
-       if (++i < NR_HD) {
-               struct hd_i_struct *disk = &hd_info[i];
-               disk->special_op = disk->recalibrate = 1;
-               hd_out(disk, disk->sect, disk->sect, disk->head-1,
-                       disk->cyl, WIN_SPECIFY, &reset_hd);
-               if (reset)
-                       goto repeat;
-       } else
-               hd_request();
-}
-
-/*
- * Ok, don't know what to do with the unexpected interrupts: on some machines
- * doing a reset and a retry seems to result in an eternal loop. Right now I
- * ignore it, and just set the timeout.
- *
- * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
- * drive enters "idle", "standby", or "sleep" mode, so if the status looks
- * "good", we just ignore the interrupt completely.
- */
-static void unexpected_hd_interrupt(void)
-{
-       unsigned int stat = inb_p(HD_STATUS);
-
-       if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) {
-               dump_status("unexpected interrupt", stat);
-               SET_TIMER;
-       }
-}
-
-/*
- * bad_rw_intr() now tries to be a bit smarter and does things
- * according to the error returned by the controller.
- * -Mika Liljeberg (liljeber@cs.Helsinki.FI)
- */
-static void bad_rw_intr(void)
-{
-       struct request *req = CURRENT;
-       if (req != NULL) {
-               struct hd_i_struct *disk = req->rq_disk->private_data;
-               if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-                       end_request(req, 0);
-                       disk->special_op = disk->recalibrate = 1;
-               } else if (req->errors % RESET_FREQ == 0)
-                       reset = 1;
-               else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0)
-                       disk->special_op = disk->recalibrate = 1;
-               /* Otherwise just retry */
-       }
-}
-
-static inline int wait_DRQ(void)
-{
-       int retries;
-       int stat;
-
-       for (retries = 0; retries < 100000; retries++) {
-               stat = inb_p(HD_STATUS);
-               if (stat & DRQ_STAT)
-                       return 0;
-       }
-       dump_status("wait_DRQ", stat);
-       return -1;
-}
-
-static void read_intr(void)
-{
-       struct request *req;
-       int i, retries = 100000;
-
-       do {
-               i = (unsigned) inb_p(HD_STATUS);
-               if (i & BUSY_STAT)
-                       continue;
-               if (!OK_STATUS(i))
-                       break;
-               if (i & DRQ_STAT)
-                       goto ok_to_read;
-       } while (--retries > 0);
-       dump_status("read_intr", i);
-       bad_rw_intr();
-       hd_request();
-       return;
-ok_to_read:
-       req = CURRENT;
-       insw(HD_DATA, req->buffer, 256);
-       req->sector++;
-       req->buffer += 512;
-       req->errors = 0;
-       i = --req->nr_sectors;
-       --req->current_nr_sectors;
-#ifdef DEBUG
-       printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n",
-               req->rq_disk->disk_name, req->sector, req->nr_sectors,
-               req->buffer+512);
-#endif
-       if (req->current_nr_sectors <= 0)
-               end_request(req, 1);
-       if (i > 0) {
-               SET_HANDLER(&read_intr);
-               return;
-       }
-       (void) inb_p(HD_STATUS);
-#if (HD_DELAY > 0)
-       last_req = read_timer();
-#endif
-       if (elv_next_request(QUEUE))
-               hd_request();
-       return;
-}
-
-static void write_intr(void)
-{
-       struct request *req = CURRENT;
-       int i;
-       int retries = 100000;
-
-       do {
-               i = (unsigned) inb_p(HD_STATUS);
-               if (i & BUSY_STAT)
-                       continue;
-               if (!OK_STATUS(i))
-                       break;
-               if ((req->nr_sectors <= 1) || (i & DRQ_STAT))
-                       goto ok_to_write;
-       } while (--retries > 0);
-       dump_status("write_intr", i);
-       bad_rw_intr();
-       hd_request();
-       return;
-ok_to_write:
-       req->sector++;
-       i = --req->nr_sectors;
-       --req->current_nr_sectors;
-       req->buffer += 512;
-       if (!i || (req->bio && req->current_nr_sectors <= 0))
-               end_request(req, 1);
-       if (i > 0) {
-               SET_HANDLER(&write_intr);
-               outsw(HD_DATA, req->buffer, 256);
-               local_irq_enable();
-       } else {
-#if (HD_DELAY > 0)
-               last_req = read_timer();
-#endif
-               hd_request();
-       }
-       return;
-}
-
-static void recal_intr(void)
-{
-       check_status();
-#if (HD_DELAY > 0)
-       last_req = read_timer();
-#endif
-       hd_request();
-}
-
-/*
- * This is another of the error-routines I don't know what to do with. The
- * best idea seems to just set reset, and start all over again.
- */
-static void hd_times_out(unsigned long dummy)
-{
-       char *name;
-
-       do_hd = NULL;
-
-       if (!CURRENT)
-               return;
-
-       disable_irq(HD_IRQ);
-       local_irq_enable();
-       reset = 1;
-       name = CURRENT->rq_disk->disk_name;
-       printk("%s: timeout\n", name);
-       if (++CURRENT->errors >= MAX_ERRORS) {
-#ifdef DEBUG
-               printk("%s: too many errors\n", name);
-#endif
-               end_request(CURRENT, 0);
-       }
-       local_irq_disable();
-       hd_request();
-       enable_irq(HD_IRQ);
-}
-
-static int do_special_op(struct hd_i_struct *disk, struct request *req)
-{
-       if (disk->recalibrate) {
-               disk->recalibrate = 0;
-               hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr);
-               return reset;
-       }
-       if (disk->head > 16) {
-               printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-               end_request(req, 0);
-       }
-       disk->special_op = 0;
-       return 1;
-}
-
-/*
- * The driver enables interrupts as much as possible.  In order to do this,
- * (a) the device-interrupt is disabled before entering hd_request(),
- * and (b) the timeout-interrupt is disabled before the sti().
- *
- * Interrupts are still masked (by default) whenever we are exchanging
- * data/cmds with a drive, because some drives seem to have very poor
- * tolerance for latency during I/O. The IDE driver has support to unmask
- * interrupts for non-broken hardware, so use that driver if required.
- */
-static void hd_request(void)
-{
-       unsigned int block, nsect, sec, track, head, cyl;
-       struct hd_i_struct *disk;
-       struct request *req;
-
-       if (do_hd)
-               return;
-repeat:
-       del_timer(&device_timer);
-       local_irq_enable();
-
-       req = CURRENT;
-       if (!req) {
-               do_hd = NULL;
-               return;
-       }
-
-       if (reset) {
-               local_irq_disable();
-               reset_hd();
-               return;
-       }
-       disk = req->rq_disk->private_data;
-       block = req->sector;
-       nsect = req->nr_sectors;
-       if (block >= get_capacity(req->rq_disk) ||
-           ((block+nsect) > get_capacity(req->rq_disk))) {
-               printk("%s: bad access: block=%d, count=%d\n",
-                       req->rq_disk->disk_name, block, nsect);
-               end_request(req, 0);
-               goto repeat;
-       }
-
-       if (disk->special_op) {
-               if (do_special_op(disk, req))
-                       goto repeat;
-               return;
-       }
-       sec   = block % disk->sect + 1;
-       track = block / disk->sect;
-       head  = track % disk->head;
-       cyl   = track / disk->head;
-#ifdef DEBUG
-       printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
-               req->rq_disk->disk_name,
-               req_data_dir(req) == READ ? "read" : "writ",
-               cyl, head, sec, nsect, req->buffer);
-#endif
-       if (blk_fs_request(req)) {
-               switch (rq_data_dir(req)) {
-               case READ:
-                       hd_out(disk, nsect, sec, head, cyl, WIN_READ,
-                               &read_intr);
-                       if (reset)
-                               goto repeat;
-                       break;
-               case WRITE:
-                       hd_out(disk, nsect, sec, head, cyl, WIN_WRITE,
-                               &write_intr);
-                       if (reset)
-                               goto repeat;
-                       if (wait_DRQ()) {
-                               bad_rw_intr();
-                               goto repeat;
-                       }
-                       outsw(HD_DATA, req->buffer, 256);
-                       break;
-               default:
-                       printk("unknown hd-command\n");
-                       end_request(req, 0);
-                       break;
-               }
-       }
-}
-
-static void do_hd_request(struct request_queue *q)
-{
-       disable_irq(HD_IRQ);
-       hd_request();
-       enable_irq(HD_IRQ);
-}
-
-static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       struct hd_i_struct *disk = bdev->bd_disk->private_data;
-
-       geo->heads = disk->head;
-       geo->sectors = disk->sect;
-       geo->cylinders = disk->cyl;
-       return 0;
-}
-
-/*
- * Releasing a block device means we sync() it, so that it can safely
- * be forgotten about...
- */
-
-static irqreturn_t hd_interrupt(int irq, void *dev_id)
-{
-       void (*handler)(void) = do_hd;
-
-       do_hd = NULL;
-       del_timer(&device_timer);
-       if (!handler)
-               handler = unexpected_hd_interrupt;
-       handler();
-       local_irq_enable();
-       return IRQ_HANDLED;
-}
-
-static struct block_device_operations hd_fops = {
-       .getgeo =       hd_getgeo,
-};
-
-/*
- * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags
- * means we run the IRQ-handler with interrupts disabled:  this is bad for
- * interrupt latency, but anything else has led to problems on some
- * machines.
- *
- * We enable interrupts in some of the routines after making sure it's
- * safe.
- */
-
-static int __init hd_init(void)
-{
-       int drive;
-
-       if (register_blkdev(MAJOR_NR, "hd"))
-               return -1;
-
-       hd_queue = blk_init_queue(do_hd_request, &hd_lock);
-       if (!hd_queue) {
-               unregister_blkdev(MAJOR_NR, "hd");
-               return -ENOMEM;
-       }
-
-       blk_queue_max_sectors(hd_queue, 255);
-       init_timer(&device_timer);
-       device_timer.function = hd_times_out;
-       blk_queue_hardsect_size(hd_queue, 512);
-
-       if (!NR_HD) {
-               /*
-                * We don't know anything about the drive.  This means
-                * that you *MUST* specify the drive parameters to the
-                * kernel yourself.
-                *
-                * If we were on an i386, we used to read this info from
-                * the BIOS or CMOS.  This doesn't work all that well,
-                * since this assumes that this is a primary or secondary
-                * drive, and if we're using this legacy driver, it's
-                * probably an auxilliary controller added to recover
-                * legacy data off an ST-506 drive.  Either way, it's
-                * definitely safest to have the user explicitly specify
-                * the information.
-                */
-               printk("hd: no drives specified - use hd=cyl,head,sectors"
-                       " on kernel command line\n");
-               goto out;
-       }
-
-       for (drive = 0 ; drive < NR_HD ; drive++) {
-               struct gendisk *disk = alloc_disk(64);
-               struct hd_i_struct *p = &hd_info[drive];
-               if (!disk)
-                       goto Enomem;
-               disk->major = MAJOR_NR;
-               disk->first_minor = drive << 6;
-               disk->fops = &hd_fops;
-               sprintf(disk->disk_name, "hd%c", 'a'+drive);
-               disk->private_data = p;
-               set_capacity(disk, p->head * p->sect * p->cyl);
-               disk->queue = hd_queue;
-               p->unit = drive;
-               hd_gendisk[drive] = disk;
-               printk("%s: %luMB, CHS=%d/%d/%d\n",
-                       disk->disk_name, (unsigned long)get_capacity(disk)/2048,
-                       p->cyl, p->head, p->sect);
-       }
-
-       if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) {
-               printk("hd: unable to get IRQ%d for the hard disk driver\n",
-                       HD_IRQ);
-               goto out1;
-       }
-       if (!request_region(HD_DATA, 8, "hd")) {
-               printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA);
-               goto out2;
-       }
-       if (!request_region(HD_CMD, 1, "hd(cmd)")) {
-               printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD);
-               goto out3;
-       }
-
-       /* Let them fly */
-       for (drive = 0; drive < NR_HD; drive++)
-               add_disk(hd_gendisk[drive]);
-
-       return 0;
-
-out3:
-       release_region(HD_DATA, 8);
-out2:
-       free_irq(HD_IRQ, NULL);
-out1:
-       for (drive = 0; drive < NR_HD; drive++)
-               put_disk(hd_gendisk[drive]);
-       NR_HD = 0;
-out:
-       del_timer(&device_timer);
-       unregister_blkdev(MAJOR_NR, "hd");
-       blk_cleanup_queue(hd_queue);
-       return -1;
-Enomem:
-       while (drive--)
-               put_disk(hd_gendisk[drive]);
-       goto out;
-}
-
-static int __init parse_hd_setup(char *line)
-{
-       int ints[6];
-
-       (void) get_options(line, ARRAY_SIZE(ints), ints);
-       hd_setup(NULL, ints);
-
-       return 1;
-}
-__setup("hd=", parse_hd_setup);
-
-module_init(hd_init);
diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c

index dd6dfb32e85303b4342be96afddddd4584cf33f4..7bc8fd59ea9ee336dbcab2a5981c0bf696df6401 100644 (file)
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -216,6 +216,7 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio)
  
          if (pio) {
                 unsigned int cycle_time;
+               struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
  
                 cycle_time = ide_pio_cycle_time(drive, pio);
  
@@ -224,10 +225,8 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio)
                  *  actual cycle time for recovery and activity
                  *  according system bus speed.
                  */
-               active_time = ide_pio_timings[pio].active_time;
-               recovery_time = cycle_time
-                       - active_time
-                       - ide_pio_timings[pio].setup_time;
+               active_time = t->active;
+               recovery_time = cycle_time - active_time - t->setup;
                 /*
                  *  Cycle times should be Vesa bus cycles
                  */
@@ -311,16 +310,16 @@ static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
  #endif
  }
  
-static void __init ht6560b_port_init_devs(ide_hwif_t *hwif)
+static void __init ht6560b_init_dev(ide_drive_t *drive)
  {
+       ide_hwif_t *hwif = drive->hwif;
         /* Setting default configurations for drives. */
         int t = (HT_CONFIG_DEFAULT << 8) | HT_TIMING_DEFAULT;
  
         if (hwif->channel)
                 t |= (HT_SECONDARY_IF << 8);
  
-       hwif->drives[0].drive_data = t;
-       hwif->drives[1].drive_data = t;
+       drive->drive_data = t;
  }
  
  static int probe_ht6560b;
@@ -329,7 +328,7 @@ module_param_named(probe, probe_ht6560b, bool, 0);
  MODULE_PARM_DESC(probe, "probe for HT6560B chipset");
  
  static const struct ide_port_ops ht6560b_port_ops = {
-       .port_init_devs         = ht6560b_port_init_devs,
+       .init_dev               = ht6560b_init_dev,
         .set_pio_mode           = ht6560b_set_pio_mode,
         .selectproc             = ht6560b_selectproc,
  };
diff --git a/drivers/ide/legacy/ide-4drives.c b/drivers/ide/legacy/ide-4drives.c

index ecae916a3385f15c8725199a3eb18fe3733a79e4..89c8ff0a4d085f81e89bffba2fc3ba12a89f97c2 100644 (file)
--- a/drivers/ide/legacy/ide-4drives.c
+++ b/drivers/ide/legacy/ide-4drives.c
@@ -11,6 +11,21 @@ static int probe_4drives;
  module_param_named(probe, probe_4drives, bool, 0);
  MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port");
  
+static void ide_4drives_init_dev(ide_drive_t *drive)
+{
+       if (drive->hwif->channel)
+               drive->select.all ^= 0x20;
+}
+
+static const struct ide_port_ops ide_4drives_port_ops = {
+       .init_dev               = ide_4drives_init_dev,
+};
+
+static const struct ide_port_info ide_4drives_port_info = {
+       .port_ops               = &ide_4drives_port_ops,
+       .host_flags             = IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA,
+};
+
  static int __init ide_4drives_init(void)
  {
         ide_hwif_t *hwif, *mate;
@@ -49,18 +64,10 @@ static int __init ide_4drives_init(void)
         mate = ide_find_port();
         if (mate) {
                 ide_init_port_hw(mate, &hw);
-               mate->drives[0].select.all ^= 0x20;
-               mate->drives[1].select.all ^= 0x20;
                 idx[1] = mate->index;
-
-               if (hwif) {
-                       hwif->mate = mate;
-                       mate->mate = hwif;
-                       hwif->serialized = mate->serialized = 1;
-               }
         }
  
-       ide_device_add(idx, NULL);
+       ide_device_add(idx, &ide_4drives_port_info);
  
         return 0;
  }
diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c

index 8dbf4d9b6447cdd6e6bf2add89382b1f55ec0213..fc53dcfbfe38a9d108a7ed06b73cc9eaa5b7b6af 100644 (file)
--- a/drivers/ide/legacy/ide-cs.c
+++ b/drivers/ide/legacy/ide-cs.c
@@ -154,6 +154,11 @@ static const struct ide_port_ops idecs_port_ops = {
         .quirkproc              = ide_undecoded_slave,
  };
  
+static const struct ide_port_info idecs_port_info = {
+       .port_ops               = &idecs_port_ops,
+       .host_flags             = IDE_HFLAG_NO_DMA,
+};
+
  static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl,
                                 unsigned long irq, struct pcmcia_device *handle)
  {
@@ -187,13 +192,11 @@ static ide_hwif_t *idecs_register(unsigned long io, unsigned long ctl,
  
      i = hwif->index;
  
-    ide_init_port_data(hwif, i);
      ide_init_port_hw(hwif, &hw);
-    hwif->port_ops = &idecs_port_ops;
  
      idx[0] = i;
  
-    ide_device_add(idx, NULL);
+    ide_device_add(idx, &idecs_port_info);
  
      if (hwif->present)
         return hwif;
diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c

index d3bc3f24e05d07e52f079121117a672b11de0146..a249562b34b52b1ddfb8401136dd5b9c062fd0f6 100644 (file)
--- a/drivers/ide/legacy/ide_platform.c
+++ b/drivers/ide/legacy/ide_platform.c
@@ -44,6 +44,10 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
         hw->chipset = ide_generic;
  }
  
+static const struct ide_port_info platform_ide_port_info = {
+       .host_flags             = IDE_HFLAG_NO_DMA,
+};
+
  static int __devinit plat_ide_probe(struct platform_device *pdev)
  {
         struct resource *res_base, *res_alt, *res_irq;
@@ -54,6 +58,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
         int ret = 0;
         int mmio = 0;
         hw_regs_t hw;
+       struct ide_port_info d = platform_ide_port_info;
  
         pdata = pdev->dev.platform_data;
  
@@ -102,13 +107,13 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
         ide_init_port_hw(hwif, &hw);
  
         if (mmio) {
-               hwif->host_flags = IDE_HFLAG_MMIO;
+               d.host_flags |= IDE_HFLAG_MMIO;
                 default_hwif_mmiops(hwif);
         }
  
         idx[0] = hwif->index;
  
-       ide_device_add(idx, NULL);
+       ide_device_add(idx, &d);
  
         platform_set_drvdata(pdev, hwif);
  
diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c

index 2e84290d0bcc6bacb68142ab7c1e4756568ca876..0a6195bcfeda40681da03a0e53dfe3d769a802be 100644 (file)
--- a/drivers/ide/legacy/macide.c
+++ b/drivers/ide/legacy/macide.c
@@ -130,7 +130,6 @@ static int __init macide_init(void)
                 u8 index = hwif->index;
                 u8 idx[4] = { index, 0xff, 0xff, 0xff };
  
-               ide_init_port_data(hwif, index);
                 ide_init_port_hw(hwif, &hw);
  
                 ide_device_add(idx, NULL);
diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c

index 8ff6e2d208340198c60e788b4f8a73e47062fd73..9c2b9d078f69e5bc993e5d9fc88a0b0f03d9669d 100644 (file)
--- a/drivers/ide/legacy/q40ide.c
+++ b/drivers/ide/legacy/q40ide.c
@@ -142,7 +142,6 @@ static int __init q40ide_init(void)
  
         hwif = ide_find_port();
         if (hwif) {
-               ide_init_port_data(hwif, hwif->index);
                 ide_init_port_hw(hwif, &hw);
  
                 /* Q40 has a byte-swapped IDE interface */
diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c

index 51dba82f88127bba661472e69d210027fd5735ea..2338f344ea243809b7fb5954d7e09e494303ada9 100644 (file)
--- a/drivers/ide/legacy/qd65xx.c
+++ b/drivers/ide/legacy/qd65xx.c
@@ -207,6 +207,7 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
  static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
  {
         ide_hwif_t *hwif = drive->hwif;
+       struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
         unsigned int cycle_time;
         int active_time   = 175;
         int recovery_time = 415; /* worst case values from the dos driver */
@@ -236,7 +237,7 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
                                         active_time = 110;
                                         recovery_time = cycle_time - 120;
                                 } else {
-                                       active_time = ide_pio_timings[pio].active_time;
+                                       active_time = t->active;
                                         recovery_time = cycle_time - active_time;
                                 }
                 }
@@ -281,17 +282,18 @@ static int __init qd_testreg(int port)
         return (readreg != QD_TESTVAL);
  }
  
-static void __init qd6500_port_init_devs(ide_hwif_t *hwif)
+static void __init qd6500_init_dev(ide_drive_t *drive)
  {
+       ide_hwif_t *hwif = drive->hwif;
         u8 base = (hwif->config_data & 0xff00) >> 8;
         u8 config = QD_CONFIG(hwif);
  
-       hwif->drives[0].drive_data = QD6500_DEF_DATA;
-       hwif->drives[1].drive_data = QD6500_DEF_DATA;
+       drive->drive_data = QD6500_DEF_DATA;
  }
  
-static void __init qd6580_port_init_devs(ide_hwif_t *hwif)
+static void __init qd6580_init_dev(ide_drive_t *drive)
  {
+       ide_hwif_t *hwif = drive->hwif;
         u16 t1, t2;
         u8 base = (hwif->config_data & 0xff00) >> 8;
         u8 config = QD_CONFIG(hwif);
@@ -302,18 +304,17 @@ static void __init qd6580_port_init_devs(ide_hwif_t *hwif)
         } else
                 t2 = t1 = hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA;
  
-       hwif->drives[0].drive_data = t1;
-       hwif->drives[1].drive_data = t2;
+       drive->drive_data = drive->select.b.unit ? t2 : t1;
  }
  
  static const struct ide_port_ops qd6500_port_ops = {
-       .port_init_devs         = qd6500_port_init_devs,
+       .init_dev               = qd6500_init_dev,
         .set_pio_mode           = qd6500_set_pio_mode,
         .selectproc             = qd65xx_select,
  };
  
  static const struct ide_port_ops qd6580_port_ops = {
-       .port_init_devs         = qd6580_port_init_devs,
+       .init_dev               = qd6580_init_dev,
         .set_pio_mode           = qd6580_set_pio_mode,
         .selectproc             = qd65xx_select,
  };
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c

index 1a6c27b324983cc989e4c228b2be6cbd6e9a3027..48d57cae63c69c267f42d08339e8ec42d13fa85e 100644 (file)
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -213,10 +213,8 @@ static int auide_build_dmatable(ide_drive_t *drive)
  {
         int i, iswrite, count = 0;
         ide_hwif_t *hwif = HWIF(drive);
-
         struct request *rq = HWGROUP(drive)->rq;
-
-       _auide_hwif *ahwif = (_auide_hwif*)hwif->hwif_data;
+       _auide_hwif *ahwif = &auide_hwif;
         struct scatterlist *sg;
  
         iswrite = (rq_data_dir(rq) == WRITE);
@@ -402,7 +400,7 @@ static const struct ide_dma_ops au1xxx_dma_ops = {
  
  static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
  {
-       _auide_hwif *auide = (_auide_hwif *)hwif->hwif_data;
+       _auide_hwif *auide = &auide_hwif;
         dbdev_tab_t source_dev_tab, target_dev_tab;
         u32 dev_id, tsize, devwidth, flags;
  
@@ -463,7 +461,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
  #else
  static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
  {
-       _auide_hwif *auide = (_auide_hwif *)hwif->hwif_data;
+       _auide_hwif *auide = &auide_hwif;
         dbdev_tab_t source_dev_tab;
         int flags;
  
@@ -600,8 +598,6 @@ static int au_ide_probe(struct device *dev)
  
         ide_init_port_hw(hwif, &hw);
  
-       hwif->dev = dev;
-
         /* If the user has selected DDMA assisted copies,
            then set up a few local I/O function entry points 
         */
@@ -610,11 +606,8 @@ static int au_ide_probe(struct device *dev)
         hwif->input_data  = au1xxx_input_data;
         hwif->output_data = au1xxx_output_data;
  #endif
-       hwif->select_data               = 0;    /* no chipset-specific code */
-       hwif->config_data               = 0;    /* no chipset-specific code */
  
         auide_hwif.hwif                 = hwif;
-       hwif->hwif_data                 = &auide_hwif;
  
         idx[0] = hwif->index;
  
diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c

index 52fee3d2771a7ce7bfdb95592e44b898d9288417..9f1212cc4aed3e33152a1ff0c32eebf62ee0f987 100644 (file)
--- a/drivers/ide/mips/swarm.c
+++ b/drivers/ide/mips/swarm.c
@@ -61,6 +61,11 @@ static struct resource swarm_ide_resource = {
  
  static struct platform_device *swarm_ide_dev;
  
+static const struct ide_port_info swarm_port_info = {
+       .name                   = DRV_NAME,
+       .host_flags             = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+};
+
  /*
   * swarm_ide_probe - if the board header indicates the existence of
   * Generic Bus IDE, allocate a HWIF for it.
@@ -77,12 +82,6 @@ static int __devinit swarm_ide_probe(struct device *dev)
         if (!SIBYTE_HAVE_IDE)
                 return -ENODEV;
  
-       hwif = ide_find_port();
-       if (hwif == NULL) {
-               printk(KERN_ERR DRV_NAME ": no free slot for interface\n");
-               return -ENOMEM;
-       }
-
         base = ioremap(A_IO_EXT_BASE, 0x800);
         offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS));
         size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS));
@@ -109,10 +108,6 @@ static int __devinit swarm_ide_probe(struct device *dev)
  
         base = ioremap(offset, size);
  
-       /* Setup MMIO ops.  */
-       hwif->host_flags = IDE_HFLAG_MMIO;
-       default_hwif_mmiops(hwif);
-
         for (i = 0; i <= 7; i++)
                 hw.io_ports_array[i] =
                                 (unsigned long)(base + ((0x1f0 + i) << 5));
@@ -121,15 +116,26 @@ static int __devinit swarm_ide_probe(struct device *dev)
         hw.irq = K_INT_GB_IDE;
         hw.chipset = ide_generic;
  
+       hwif = ide_find_port_slot(&swarm_port_info);
+       if (hwif == NULL)
+               goto err;
+
         ide_init_port_hw(hwif, &hw);
  
+       /* Setup MMIO ops. */
+       default_hwif_mmiops(hwif);
+
         idx[0] = hwif->index;
  
-       ide_device_add(idx, NULL);
+       ide_device_add(idx, &swarm_port_info);
  
         dev_set_drvdata(dev, hwif);
  
         return 0;
+err:
+       release_resource(&swarm_ide_resource);
+       iounmap(base);
+       return -ENOMEM;
  }
  
  static struct device_driver swarm_ide_driver = {
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c

index f2de00adf147e596448c06913e8790ced832c746..80d19c0eb78097d09cc822cb0ed27273563ce1cf 100644 (file)
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -69,7 +69,8 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
  {
         ide_hwif_t *hwif = HWIF(drive);
         struct pci_dev *dev = to_pci_dev(hwif->dev);
-       int s_time, a_time, c_time;
+       struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
+       int s_time = t->setup, a_time = t->active, c_time = t->cycle;
         u8 s_clc, a_clc, r_clc;
         unsigned long flags;
         int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
@@ -78,13 +79,10 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
         u8 cd_dma_fifo = 0;
         int unit = drive->select.b.unit & 1;
  
-       s_time = ide_pio_timings[pio].setup_time;
-       a_time = ide_pio_timings[pio].active_time;
         if ((s_clc = (s_time * bus_speed + 999) / 1000) >= 8)
                 s_clc = 0;
         if ((a_clc = (a_time * bus_speed + 999) / 1000) >= 8)
                 a_clc = 0;
-       c_time = ide_pio_timings[pio].cycle_time;
  
         if (!(r_clc = (c_time * bus_speed + 999) / 1000 - a_clc - s_clc)) {
                 r_clc = 1;
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c

index ad222206a429ea303e4f528142ee251fdc143ab2..0bfcdd0e77b318a94691c69da0f7a6bdb8d261a6 100644 (file)
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -21,8 +21,6 @@
  #include <linux/init.h>
  #include <linux/ide.h>
  
-#include "ide-timing.h"
-
  enum {
         AMD_IDE_CONFIG          = 0x41,
         AMD_CABLE_DETECT        = 0x42,
diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c

index cd1ba14984ab1197ac571190f132bb6236f10786..1ad1e23e310577c2875994fe421e969d6c61ab55 100644 (file)
--- a/drivers/ide/pci/cmd640.c
+++ b/drivers/ide/pci/cmd640.c
@@ -521,6 +521,7 @@ static void program_drive_counts(ide_drive_t *drive, unsigned int index)
  static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
                             u8 pio_mode, unsigned int cycle_time)
  {
+       struct ide_timing *t;
         int setup_time, active_time, recovery_time, clock_time;
         u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count;
         int bus_speed;
@@ -532,8 +533,11 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
  
         if (pio_mode > 5)
                 pio_mode = 5;
-       setup_time  = ide_pio_timings[pio_mode].setup_time;
-       active_time = ide_pio_timings[pio_mode].active_time;
+
+       t = ide_timing_find_mode(XFER_PIO_0 + pio_mode);
+       setup_time  = t->setup;
+       active_time = t->active;
+
         recovery_time = cycle_time - (setup_time + active_time);
         clock_time = 1000 / bus_speed;
         cycle_count = DIV_ROUND_UP(cycle_time, clock_time);
@@ -607,11 +611,40 @@ static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio)
  
         display_clocks(index);
  }
+#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
+
+static void cmd640_init_dev(ide_drive_t *drive)
+{
+       unsigned int i = drive->hwif->channel * 2 + drive->select.b.unit;
+
+#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
+       /*
+        * Reset timing to the slowest speed and turn off prefetch.
+        * This way, the drive identify code has a better chance.
+        */
+       setup_counts[i]    =  4;        /* max possible */
+       active_counts[i]   = 16;        /* max possible */
+       recovery_counts[i] = 16;        /* max possible */
+       program_drive_counts(drive, i);
+       set_prefetch_mode(drive, i, 0);
+       printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch cleared\n", i);
+#else
+       /*
+        * Set the drive unmask flags to match the prefetch setting.
+        */
+       check_prefetch(drive, i);
+       printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch(%s) preserved\n",
+                                 i, drive->no_io_32bit ? "off" : "on");
+#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
+}
+
  
  static const struct ide_port_ops cmd640_port_ops = {
+       .init_dev               = cmd640_init_dev,
+#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
         .set_pio_mode           = cmd640_set_pio_mode,
+#endif
  };
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
  
  static int pci_conf1(void)
  {
@@ -654,10 +687,8 @@ static const struct ide_port_info cmd640_port_info __initdata = {
                                   IDE_HFLAG_NO_DMA |
                                   IDE_HFLAG_ABUSE_PREFETCH |
                                   IDE_HFLAG_ABUSE_FAST_DEVSEL,
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
         .port_ops               = &cmd640_port_ops,
         .pio_mask               = ATA_PIO5,
-#endif
  };
  
  static int cmd640x_init_one(unsigned long base, unsigned long ctl)
@@ -683,12 +714,8 @@ static int cmd640x_init_one(unsigned long base, unsigned long ctl)
   */
  static int __init cmd640x_init(void)
  {
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
-       int second_port_toggled = 0;
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
         int second_port_cmd640 = 0, rc;
         const char *bus_type, *port2;
-       unsigned int index;
         u8 b, cfr;
         u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
         hw_regs_t hw[2];
@@ -774,88 +801,44 @@ static int __init cmd640x_init(void)
         put_cmd640_reg(CMDTIM, 0);
         put_cmd640_reg(BRST, 0x40);
  
-       cmd_hwif1 = ide_find_port();
+       b = get_cmd640_reg(CNTRL);
  
         /*
          * Try to enable the secondary interface, if not already enabled
          */
-       if (cmd_hwif1 &&
-           cmd_hwif1->drives[0].noprobe && cmd_hwif1->drives[1].noprobe) {
-               port2 = "not probed";
+       if (secondary_port_responding()) {
+               if ((b & CNTRL_ENA_2ND)) {
+                       second_port_cmd640 = 1;
+                       port2 = "okay";
+               } else if (cmd640_vlb) {
+                       second_port_cmd640 = 1;
+                       port2 = "alive";
+               } else
+                       port2 = "not cmd640";
         } else {
-               b = get_cmd640_reg(CNTRL);
+               put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */
                 if (secondary_port_responding()) {
-                       if ((b & CNTRL_ENA_2ND)) {
-                               second_port_cmd640 = 1;
-                               port2 = "okay";
-                       } else if (cmd640_vlb) {
-                               second_port_cmd640 = 1;
-                               port2 = "alive";
-                       } else
-                               port2 = "not cmd640";
+                       second_port_cmd640 = 1;
+                       port2 = "enabled";
                 } else {
-                       put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */
-                       if (secondary_port_responding()) {
-                               second_port_cmd640 = 1;
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
-                               second_port_toggled = 1;
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
-                               port2 = "enabled";
-                       } else {
-                               put_cmd640_reg(CNTRL, b); /* restore original setting */
-                               port2 = "not responding";
-                       }
+                       put_cmd640_reg(CNTRL, b); /* restore original setting */
+                       port2 = "not responding";
                 }
         }
  
         /*
          * Initialize data for secondary cmd640 port, if enabled
          */
-       if (second_port_cmd640 && cmd_hwif1) {
-               ide_init_port_hw(cmd_hwif1, &hw[1]);
-               idx[1] = cmd_hwif1->index;
+       if (second_port_cmd640) {
+               cmd_hwif1 = ide_find_port();
+               if (cmd_hwif1) {
+                       ide_init_port_hw(cmd_hwif1, &hw[1]);
+                       idx[1] = cmd_hwif1->index;
+               }
         }
         printk(KERN_INFO "cmd640: %sserialized, secondary interface %s\n",
                          second_port_cmd640 ? "" : "not ", port2);
  
-       /*
-        * Establish initial timings/prefetch for all drives.
-        * Do not unnecessarily disturb any prior BIOS setup of these.
-        */
-       for (index = 0; index < (2 + (second_port_cmd640 << 1)); index++) {
-               ide_drive_t *drive;
-
-               if (index > 1) {
-                       if (cmd_hwif1 == NULL)
-                               continue;
-                       drive = &cmd_hwif1->drives[index & 1];
-               } else  {
-                       if (cmd_hwif0 == NULL)
-                               continue;
-                       drive = &cmd_hwif0->drives[index & 1];
-               }
-
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
-               /*
-                * Reset timing to the slowest speed and turn off prefetch.
-                * This way, the drive identify code has a better chance.
-                */
-               setup_counts    [index] = 4;    /* max possible */
-               active_counts   [index] = 16;   /* max possible */
-               recovery_counts [index] = 16;   /* max possible */
-               program_drive_counts(drive, index);
-               set_prefetch_mode(drive, index, 0);
-               printk("cmd640: drive%d timings/prefetch cleared\n", index);
-#else
-               /*
-                * Set the drive unmask flags to match the prefetch setting
-                */
-               check_prefetch(drive, index);
-               printk("cmd640: drive%d timings/prefetch(%s) preserved\n",
-                       index, drive->no_io_32bit ? "off" : "on");
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
-       }
-
  #ifdef CMD640_DUMP_REGS
         cmd640_dump_regs();
  #endif
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c

index ca4774aa27eeb56f33d2708d18ac1666b20d3224..cfa784bacf48678f712e169775a4f10293a89cc0 100644 (file)
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -116,6 +116,7 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
  {
         ide_hwif_t *hwif        = HWIF(drive);
         struct pci_dev *dev     = to_pci_dev(hwif->dev);
+       struct ide_timing *t    = ide_timing_find_mode(XFER_PIO_0 + pio);
         unsigned int cycle_time;
         u8 setup_count, arttim = 0;
  
@@ -124,10 +125,9 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
  
         cycle_time = ide_pio_cycle_time(drive, pio);
  
-       program_cycle_times(drive, cycle_time,
-                           ide_pio_timings[pio].active_time);
+       program_cycle_times(drive, cycle_time, t->active);
  
-       setup_count = quantize_timing(ide_pio_timings[pio].setup_time,
+       setup_count = quantize_timing(t->setup,
                         1000 / (ide_pci_clk ? ide_pci_clk : 33));
  
         /*
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c

index 99fe91a191b8f556abb865876c2e3fcaf0e8a97a..dc97c48623f3fc438fb2237d259d55eb2046d985 100644 (file)
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -26,8 +26,6 @@
  #include <linux/pci.h>
  #include <linux/ide.h>
  
-#include "ide-timing.h"
-
  #define MSR_ATAC_BASE          0x51300000
  #define ATAC_GLD_MSR_CAP       (MSR_ATAC_BASE+0)
  #define ATAC_GLD_MSR_CONFIG    (MSR_ATAC_BASE+0x01)
@@ -75,13 +73,11 @@ static unsigned int cs5535_udma_timings[5] =
   */
  static void cs5535_set_speed(ide_drive_t *drive, const u8 speed)
  {
-
         u32 reg = 0, dummy;
         int unit = drive->select.b.unit;
  
-
         /* Set the PIO timings */
-       if ((speed & XFER_MODE) == XFER_PIO) {
+       if (speed < XFER_SW_DMA_0) {
                 ide_drive_t *pair = ide_get_paired_drive(drive);
                 u8 cmd, pioa;
  
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c

index 8c534afcb6c89ede68e98248c784ec230191103b..e14ad5530fa4b0c1e1369387d43e4e73d8ac8b7b 100644 (file)
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -133,6 +133,7 @@ static int calc_clk(int time, int bus_speed)
   */
  static void compute_clocks(u8 pio, pio_clocks_t *p_pclk)
  {
+       struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
         int clk1, clk2;
         int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
  
@@ -141,15 +142,13 @@ static void compute_clocks(u8 pio, pio_clocks_t *p_pclk)
          */
  
         /* let's calc the address setup time clocks */
-       p_pclk->address_time = (u8)calc_clk(ide_pio_timings[pio].setup_time, bus_speed);
+       p_pclk->address_time = (u8)calc_clk(t->setup, bus_speed);
  
         /* let's calc the active and recovery time clocks */
-       clk1 = calc_clk(ide_pio_timings[pio].active_time, bus_speed);
+       clk1 = calc_clk(t->active, bus_speed);
  
         /* calc recovery timing */
-       clk2 =  ide_pio_timings[pio].cycle_time -
-               ide_pio_timings[pio].active_time -
-               ide_pio_timings[pio].setup_time;
+       clk2 = t->cycle - t->active - t->setup;
  
         clk2 = calc_clk(clk2, bus_speed);
  
diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c

index af0f30051d5adbbbc39ead132b30e974779fdd98..0106e2a2df77a3ab8c42fd9092e440c3a309a9b9 100644 (file)
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -93,7 +93,6 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
  
         i = hwif->index;
  
-       ide_init_port_data(hwif, i);
         ide_init_port_hw(hwif, &hw);
  
         idx[0] = i;
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c

index 6ab04115286be677e3b550caf201fdf7f12bd3a1..cbf647202994d2c273fd8a3d9d6cf38eac29fe54 100644 (file)
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -512,8 +512,14 @@ static void __devinit it821x_quirkproc(ide_drive_t *drive)
  }
  
  static struct ide_dma_ops it821x_pass_through_dma_ops = {
+       .dma_host_set           = ide_dma_host_set,
+       .dma_setup              = ide_dma_setup,
+       .dma_exec_cmd           = ide_dma_exec_cmd,
         .dma_start              = it821x_dma_start,
         .dma_end                = it821x_dma_end,
+       .dma_test_irq           = ide_dma_test_irq,
+       .dma_timeout            = ide_dma_timeout,
+       .dma_lost_irq           = ide_dma_lost_irq,
  };
  
  /**
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c

index 1584ebb6a185b6d8da78fa3b205c909c7d52464e..789c66dfbde5a424f841cb8e5874a1e887f6959f 100644 (file)
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -558,12 +558,9 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
         u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
         int i;
  
-       hwif = ide_find_port();
-       if (hwif == NULL) {
-               printk(KERN_ERR "%s: too many IDE interfaces, "
-                               "no room in table\n", SCC_PATA_NAME);
+       hwif = ide_find_port_slot(d);
+       if (hwif == NULL)
                 return -ENOMEM;
-       }
  
         memset(&hw, 0, sizeof(hw));
         for (i = 0; i <= 8; i++)
@@ -572,7 +569,6 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
         hw.dev = &dev->dev;
         hw.chipset = ide_pci;
         ide_init_port_hw(hwif, &hw);
-       hwif->dev = &dev->dev;
  
         idx[0] = hwif->index;
  
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c

index 24513e3dcd6b81c0c1a159a5f5f37200f221268f..c79ff5b41088e40d09a124de94785cf1959408c1 100644 (file)
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -568,6 +568,7 @@ static const struct ide_dma_ops sgiioc4_dma_ops = {
  };
  
  static const struct ide_port_info sgiioc4_port_info __devinitdata = {
+       .name                   = DRV_NAME,
         .chipset                = ide_pci,
         .init_dma               = ide_dma_sgiioc4,
         .port_ops               = &sgiioc4_port_ops,
@@ -587,13 +588,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
         hw_regs_t hw;
         struct ide_port_info d = sgiioc4_port_info;
  
-       hwif = ide_find_port();
-       if (hwif == NULL) {
-               printk(KERN_ERR "%s: too many IDE interfaces, no room in table\n",
-                               DRV_NAME);
-               return -ENOMEM;
-       }
-
         /*  Get the CmdBlk and CtrlBlk Base Registers */
         bar0 = pci_resource_start(dev, 0);
         virt_base = ioremap(bar0, pci_resource_len(dev, 0));
@@ -608,11 +602,11 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
  
         cmd_phys_base = bar0 + IOC4_CMD_OFFSET;
         if (!request_mem_region(cmd_phys_base, IOC4_CMD_CTL_BLK_SIZE,
-           hwif->name)) {
+           DRV_NAME)) {
                 printk(KERN_ERR
                         "%s : %s -- ERROR, Addresses "
                         "0x%p to 0x%p ALREADY in use\n",
-                      __func__, hwif->name, (void *) cmd_phys_base,
+                      __func__, DRV_NAME, (void *) cmd_phys_base,
                        (void *) cmd_phys_base + IOC4_CMD_CTL_BLK_SIZE);
                 return -ENOMEM;
         }
@@ -623,9 +617,12 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
         hw.irq = dev->irq;
         hw.chipset = ide_pci;
         hw.dev = &dev->dev;
-       ide_init_port_hw(hwif, &hw);
  
-       hwif->dev = &dev->dev;
+       hwif = ide_find_port_slot(&d);
+       if (hwif == NULL)
+               goto err;
+
+       ide_init_port_hw(hwif, &hw);
  
         /* The IOC4 uses MMIO rather than Port IO. */
         default_hwif_mmiops(hwif);
@@ -641,6 +638,10 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
                 return -EIO;
  
         return 0;
+err:
+       release_mem_region(cmd_phys_base, IOC4_CMD_CTL_BLK_SIZE);
+       iounmap(virt_base);
+       return -ENOMEM;
  }
  
  static unsigned int __devinit
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c

index b75e9bb390a73410c90e82dad9a059a673b4eee5..6e9d7655d89c02d1f6e426cd681428981fa66200 100644 (file)
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -421,8 +421,7 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
                 if ((sata_stat & 0x03) != 0x03) {
                         printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
                                             hwif->name, sata_stat);
-                       HWGROUP(drive)->polling = 0;
-                       return ide_started;
+                       return -ENXIO;
                 }
         }
  
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c

index e127eb25ab6330e3e08d9f148e5d445f05029450..2389945ca95d0c2a86b22ef90d5b8a2743fc6585 100644 (file)
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -52,8 +52,6 @@
  #include <linux/init.h>
  #include <linux/ide.h>
  
-#include "ide-timing.h"
-
  /* registers layout and init values are chipset family dependant */
  
  #define ATA_16         0x01
@@ -616,7 +614,6 @@ MODULE_LICENSE("GPL");
  /*
   * TODO:
   *     - CLEANUP
- *     - Use drivers/ide/ide-timing.h !
   *     - More checks in the config registers (force values instead of
   *       relying on the BIOS setting them correctly).
   *     - Further optimisations ?
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c

index ce84fa045d39186a05b08f41a643a00428ff4407..6efbde297174b9b32a8405b227e9acf6abb69e77 100644 (file)
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -47,10 +47,11 @@
   */
  static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
  {
+       struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
         unsigned int cmd_on, cmd_off;
         u8 iordy = 0;
  
-       cmd_on  = (ide_pio_timings[pio].active_time + 29) / 30;
+       cmd_on  = (t->active + 29) / 30;
         cmd_off = (ide_pio_cycle_time(drive, pio) - 30 * cmd_on + 29) / 30;
  
         if (cmd_on == 0)
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c

index 3ed9728abd2408c53edebd05a7677b4a6b026830..e47384c70c40bee808d79ef4fc2f127418bb8e47 100644 (file)
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -35,8 +35,6 @@
  #include <asm/processor.h>
  #endif
  
-#include "ide-timing.h"
-
  #define VIA_IDE_ENABLE         0x40
  #define VIA_IDE_CONFIG         0x41
  #define VIA_FIFO_CONFIG                0x43
diff --git a/drivers/ide/ppc/Makefile b/drivers/ide/ppc/Makefile

index 65af5848b28c7e7715e3c9ab962a7d32dcfc6f26..74e52adcdf4bf6adafa89041b7343f656b1e2647 100644 (file)
--- a/drivers/ide/ppc/Makefile
+++ b/drivers/ide/ppc/Makefile
@@ -1,3 +1,2 @@
  
  obj-$(CONFIG_BLK_DEV_IDE_PMAC)         += pmac.o
-obj-$(CONFIG_BLK_DEV_MPC8xx_IDE)       += mpc8xx.o
diff --git a/drivers/ide/ppc/mpc8xx.c b/drivers/ide/ppc/mpc8xx.c

deleted file mode 100644 (file)

index 236f9c3..0000000
--- a/drivers/ide/ppc/mpc8xx.c
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- *  Copyright (C) 2000, 2001 Wolfgang Denk, wd@denx.de
- *  Modified for direct IDE interface
- *     by Thomas Lange, thomas@corelatus.com
- *  Modified for direct IDE interface on 8xx without using the PCMCIA
- *  controller
- *     by Steven.Scholz@imc-berlin.de
- *  Moved out of arch/ppc/kernel/m8xx_setup.c, other minor cleanups
- *     by Mathew Locke <mattl@mvista.com>
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/tty.h>
-#include <linux/major.h>
-#include <linux/interrupt.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/ide.h>
-#include <linux/bootmem.h>
-
-#include <asm/mpc8xx.h>
-#include <asm/mmu.h>
-#include <asm/processor.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/ide.h>
-#include <asm/8xx_immap.h>
-#include <asm/machdep.h>
-#include <asm/irq.h>
-
-#define DRV_NAME "ide-mpc8xx"
-
-static int identify  (volatile u8 *p);
-static void print_fixed (volatile u8 *p);
-static void print_funcid (int func);
-static int check_ide_device (unsigned long base);
-
-static void ide_interrupt_ack (void *dev);
-static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio);
-
-typedef        struct ide_ioport_desc {
-       unsigned long   base_off;               /* Offset to PCMCIA memory      */
-       unsigned long   reg_off[IDE_NR_PORTS];  /* controller register offsets  */
-       int             irq;                    /* IRQ                          */
-} ide_ioport_desc_t;
-
-ide_ioport_desc_t ioport_dsc[MAX_HWIFS] = {
-#ifdef IDE0_BASE_OFFSET
-       { IDE0_BASE_OFFSET,
-           {
-               IDE0_DATA_REG_OFFSET,
-               IDE0_ERROR_REG_OFFSET,
-               IDE0_NSECTOR_REG_OFFSET,
-               IDE0_SECTOR_REG_OFFSET,
-               IDE0_LCYL_REG_OFFSET,
-               IDE0_HCYL_REG_OFFSET,
-               IDE0_SELECT_REG_OFFSET,
-               IDE0_STATUS_REG_OFFSET,
-               IDE0_CONTROL_REG_OFFSET,
-               IDE0_IRQ_REG_OFFSET,
-           },
-           IDE0_INTERRUPT,
-       },
-#ifdef IDE1_BASE_OFFSET
-       { IDE1_BASE_OFFSET,
-           {
-               IDE1_DATA_REG_OFFSET,
-               IDE1_ERROR_REG_OFFSET,
-               IDE1_NSECTOR_REG_OFFSET,
-               IDE1_SECTOR_REG_OFFSET,
-               IDE1_LCYL_REG_OFFSET,
-               IDE1_HCYL_REG_OFFSET,
-               IDE1_SELECT_REG_OFFSET,
-               IDE1_STATUS_REG_OFFSET,
-               IDE1_CONTROL_REG_OFFSET,
-               IDE1_IRQ_REG_OFFSET,
-           },
-           IDE1_INTERRUPT,
-       },
-#endif /* IDE1_BASE_OFFSET */
-#endif /* IDE0_BASE_OFFSET */
-};
-
-ide_pio_timings_t ide_pio_clocks[6];
-int hold_time[6] =  {30, 20, 15, 10, 10, 10 };   /* PIO Mode 5 with IORDY (nonstandard) */
-
-/*
- * Warning: only 1 (ONE) PCMCIA slot supported here,
- * which must be correctly initialized by the firmware (PPCBoot).
- */
-static int _slot_ = -1;                        /* will be read from PCMCIA registers   */
-
-/* Make clock cycles and always round up */
-#define PCMCIA_MK_CLKS( t, T ) (( (t) * ((T)/1000000) + 999U ) / 1000U )
-
-#define M8XX_PCMCIA_CD2(slot)      (0x10000000 >> (slot << 4))
-#define M8XX_PCMCIA_CD1(slot)      (0x08000000 >> (slot << 4))
-
-/*
- * The TQM850L hardware has two pins swapped! Grrrrgh!
- */
-#ifdef CONFIG_TQM850L
-#define __MY_PCMCIA_GCRX_CXRESET       PCMCIA_GCRX_CXOE
-#define __MY_PCMCIA_GCRX_CXOE          PCMCIA_GCRX_CXRESET
-#else
-#define __MY_PCMCIA_GCRX_CXRESET       PCMCIA_GCRX_CXRESET
-#define __MY_PCMCIA_GCRX_CXOE          PCMCIA_GCRX_CXOE
-#endif
-
-#if defined(CONFIG_BLK_DEV_MPC8xx_IDE) && defined(CONFIG_IDE_8xx_PCCARD)
-#define PCMCIA_SCHLVL IDE0_INTERRUPT   /* Status Change Interrupt Level        */
-static int pcmcia_schlvl = PCMCIA_SCHLVL;
-#endif
-
-/*
- * See include/linux/ide.h for definition of hw_regs_t (p, base)
- */
-
-/*
- * m8xx_ide_init_ports() for a direct IDE interface _using_
- * MPC8xx's internal PCMCIA interface
- */
-#if defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_DIRECT)
-static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port)
-{
-       unsigned long *p = hw->io_ports_array;
-       int i;
-
-       typedef struct {
-               ulong br;
-               ulong or;
-       } pcmcia_win_t;
-       volatile pcmcia_win_t *win;
-       volatile pcmconf8xx_t *pcmp;
-
-       uint *pgcrx;
-       u32 pcmcia_phy_base;
-       u32 pcmcia_phy_end;
-       static unsigned long pcmcia_base = 0;
-       unsigned long base;
-
-       *p = 0;
-
-       pcmp = (pcmconf8xx_t *)(&(((immap_t *)IMAP_ADDR)->im_pcmcia));
-
-       if (!pcmcia_base) {
-                /*
-                 * Read out PCMCIA registers. Since the reset values
-                 * are undefined, we sure hope that they have been
-                 * set up by firmware
-                */
-
-               /* Scan all registers for valid settings */
-               pcmcia_phy_base = 0xFFFFFFFF;
-               pcmcia_phy_end = 0;
-               /* br0 is start of brX and orX regs */
-               win = (pcmcia_win_t *) \
-                       (&(((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pbr0));
-               for (i = 0; i < 8; i++) {
-                       if (win->or & 1) {      /* This bank is marked as valid */
-                               if (win->br < pcmcia_phy_base) {
-                                       pcmcia_phy_base = win->br;
-                               }
-                               if ((win->br + PCMCIA_MEM_SIZE) > pcmcia_phy_end) {
-                                       pcmcia_phy_end  = win->br + PCMCIA_MEM_SIZE;
-                               }
-                               /* Check which slot that has been defined */
-                               _slot_ = (win->or >> 2) & 1;
-
-                       }                                       /* Valid bank */
-                       win++;
-               }                                               /* for */
-
-               printk ("PCMCIA slot %c: phys mem %08x...%08x (size %08x)\n",
-                       'A' + _slot_,
-                       pcmcia_phy_base, pcmcia_phy_end,
-                       pcmcia_phy_end - pcmcia_phy_base);
-
-               if (!request_mem_region(pcmcia_phy_base,
-                                       pcmcia_phy_end - pcmcia_phy_base,
-                                       DRV_NAME)) {
-                       printk(KERN_ERR "%s: resources busy\n", DRV_NAME);
-                       return -EBUSY;
-               }
-
-               pcmcia_base=(unsigned long)ioremap(pcmcia_phy_base,
-                                                  pcmcia_phy_end-pcmcia_phy_base);
-
-#ifdef DEBUG
-               printk ("PCMCIA virt base: %08lx\n", pcmcia_base);
-#endif
-               /* Compute clock cycles for PIO timings */
-               for (i=0; i<6; ++i) {
-                       bd_t    *binfo = (bd_t *)__res;
-
-                       hold_time[i]   =
-                               PCMCIA_MK_CLKS (hold_time[i],
-                                               binfo->bi_busfreq);
-                       ide_pio_clocks[i].setup_time  =
-                               PCMCIA_MK_CLKS (ide_pio_timings[i].setup_time,
-                                               binfo->bi_busfreq);
-                       ide_pio_clocks[i].active_time =
-                               PCMCIA_MK_CLKS (ide_pio_timings[i].active_time,
-                                               binfo->bi_busfreq);
-                       ide_pio_clocks[i].cycle_time  =
-                               PCMCIA_MK_CLKS (ide_pio_timings[i].cycle_time,
-                                               binfo->bi_busfreq);
-#if 0
-                       printk ("PIO mode %d timings: %d/%d/%d => %d/%d/%d\n",
-                               i,
-                               ide_pio_clocks[i].setup_time,
-                               ide_pio_clocks[i].active_time,
-                               ide_pio_clocks[i].hold_time,
-                               ide_pio_clocks[i].cycle_time,
-                               ide_pio_timings[i].setup_time,
-                               ide_pio_timings[i].active_time,
-                               ide_pio_timings[i].hold_time,
-                               ide_pio_timings[i].cycle_time);
-#endif
-               }
-       }
-
-       if (_slot_ == -1) {
-               printk ("PCMCIA slot has not been defined! Using A as default\n");
-               _slot_ = 0;
-       }
-
-#ifdef CONFIG_IDE_8xx_PCCARD
-
-#ifdef DEBUG
-       printk ("PIPR = 0x%08X  slot %c ==> mask = 0x%X\n",
-               pcmp->pcmc_pipr,
-               'A' + _slot_,
-               M8XX_PCMCIA_CD1(_slot_) | M8XX_PCMCIA_CD2(_slot_) );
-#endif /* DEBUG */
-
-       if (pcmp->pcmc_pipr & (M8XX_PCMCIA_CD1(_slot_)|M8XX_PCMCIA_CD2(_slot_))) {
-               printk ("No card in slot %c: PIPR=%08x\n",
-                       'A' + _slot_, (u32) pcmp->pcmc_pipr);
-               return -ENODEV;         /* No card in slot */
-       }
-
-       check_ide_device (pcmcia_base);
-
-#endif /* CONFIG_IDE_8xx_PCCARD */
-
-       base = pcmcia_base + ioport_dsc[data_port].base_off;
-#ifdef DEBUG
-       printk ("base: %08x + %08x = %08x\n",
-                       pcmcia_base, ioport_dsc[data_port].base_off, base);
-#endif
-
-       for (i = 0; i < IDE_NR_PORTS; ++i) {
-#ifdef DEBUG
-               printk ("port[%d]: %08x + %08x = %08x\n",
-                       i,
-                       base,
-                       ioport_dsc[data_port].reg_off[i],
-                       i, base + ioport_dsc[data_port].reg_off[i]);
-#endif
-               *p++ = base + ioport_dsc[data_port].reg_off[i];
-       }
-
-       hw->irq = ioport_dsc[data_port].irq;
-       hw->ack_intr = (ide_ack_intr_t *)ide_interrupt_ack;
-
-#ifdef CONFIG_IDE_8xx_PCCARD
-       {
-               unsigned int reg;
-
-               if (_slot_)
-                       pgcrx = &((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pgcrb;
-               else
-                       pgcrx = &((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pgcra;
-
-               reg = *pgcrx;
-               reg |= mk_int_int_mask (pcmcia_schlvl) << 24;
-               reg |= mk_int_int_mask (pcmcia_schlvl) << 16;
-               *pgcrx = reg;
-       }
-#endif /* CONFIG_IDE_8xx_PCCARD */
-
-       /* Enable Harddisk Interrupt,
-        * and make it edge sensitive
-        */
-       /* (11-18) Set edge detect for irq, no wakeup from low power mode */
-       ((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel |=
-                                       (0x80000000 >> ioport_dsc[data_port].irq);
-
-#ifdef CONFIG_IDE_8xx_PCCARD
-       /* Make sure we don't get garbage irq */
-       ((immap_t *) IMAP_ADDR)->im_pcmcia.pcmc_pscr = 0xFFFF;
-
-       /* Enable falling edge irq */
-       pcmp->pcmc_per = 0x100000 >> (16 * _slot_);
-#endif /* CONFIG_IDE_8xx_PCCARD */
-
-       hw->chipset = ide_generic;
-
-       return 0;
-}
-#endif /* CONFIG_IDE_8xx_PCCARD || CONFIG_IDE_8xx_DIRECT */
-
-/*
- * m8xx_ide_init_ports() for a direct IDE interface _not_ using
- * MPC8xx's internal PCMCIA interface
- */
-#if defined(CONFIG_IDE_EXT_DIRECT)
-static int __init m8xx_ide_init_ports(hw_regs_t *hw, unsigned long data_port)
-{
-       unsigned long *p = hw->io_ports_array;
-       int i;
-
-       u32 ide_phy_base;
-       u32 ide_phy_end;
-       static unsigned long ide_base = 0;
-       unsigned long base;
-
-       *p = 0;
-
-       if (!ide_base) {
-
-               /* TODO:
-                * - add code to read ORx, BRx
-                */
-               ide_phy_base = CFG_ATA_BASE_ADDR;
-               ide_phy_end  = CFG_ATA_BASE_ADDR + 0x200;
-
-               printk ("IDE phys mem : %08x...%08x (size %08x)\n",
-                       ide_phy_base, ide_phy_end,
-                       ide_phy_end - ide_phy_base);
-
-               if (!request_mem_region(ide_phy_base, 0x200, DRV_NAME)) {
-                       printk(KERN_ERR "%s: resources busy\n", DRV_NAME);
-                       return -EBUSY;
-               }
-
-               ide_base=(unsigned long)ioremap(ide_phy_base,
-                                               ide_phy_end-ide_phy_base);
-
-#ifdef DEBUG
-               printk ("IDE virt base: %08lx\n", ide_base);
-#endif
-       }
-
-       base = ide_base + ioport_dsc[data_port].base_off;
-#ifdef DEBUG
-       printk ("base: %08x + %08x = %08x\n",
-               ide_base, ioport_dsc[data_port].base_off, base);
-#endif
-
-       for (i = 0; i < IDE_NR_PORTS; ++i) {
-#ifdef DEBUG
-               printk ("port[%d]: %08x + %08x = %08x\n",
-                       i,
-                       base,
-                       ioport_dsc[data_port].reg_off[i],
-                       i, base + ioport_dsc[data_port].reg_off[i]);
-#endif
-               *p++ = base + ioport_dsc[data_port].reg_off[i];
-       }
-
-       /* direct connected IDE drive, i.e. external IRQ */
-       hw->irq = ioport_dsc[data_port].irq;
-       hw->ack_intr = (ide_ack_intr_t *)ide_interrupt_ack;
-
-       /* Enable Harddisk Interrupt,
-        * and make it edge sensitive
-        */
-       /* (11-18) Set edge detect for irq, no wakeup from low power mode */
-       ((immap_t *) IMAP_ADDR)->im_siu_conf.sc_siel |=
-                       (0x80000000 >> ioport_dsc[data_port].irq);
-
-       hw->chipset = ide_generic;
-
-       return 0;
-}
-#endif /* CONFIG_IDE_8xx_DIRECT */
-
-
-/* -------------------------------------------------------------------- */
-
-
-/* PCMCIA Timing */
-#ifndef        PCMCIA_SHT
-#define PCMCIA_SHT(t)  ((t & 0x0F)<<16)        /* Strobe Hold  Time    */
-#define PCMCIA_SST(t)  ((t & 0x0F)<<12)        /* Strobe Setup Time    */
-#define PCMCIA_SL(t) ((t==32) ? 0 : ((t & 0x1F)<<7)) /* Strobe Length  */
-#endif
-
-/* Calculate PIO timings */
-static void m8xx_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
-{
-#if defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_DIRECT)
-       volatile pcmconf8xx_t   *pcmp;
-       ulong timing, mask, reg;
-
-       pcmp = (pcmconf8xx_t *)(&(((immap_t *)IMAP_ADDR)->im_pcmcia));
-
-       mask = ~(PCMCIA_SHT(0xFF) | PCMCIA_SST(0xFF) | PCMCIA_SL(0xFF));
-
-       timing  = PCMCIA_SHT(hold_time[pio]  )
-               | PCMCIA_SST(ide_pio_clocks[pio].setup_time )
-               | PCMCIA_SL (ide_pio_clocks[pio].active_time)
-               ;
-
-#if 1
-       printk ("Setting timing bits 0x%08lx in PCMCIA controller\n", timing);
-#endif
-       if ((reg = pcmp->pcmc_por0 & mask) != 0)
-               pcmp->pcmc_por0 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por1 & mask) != 0)
-               pcmp->pcmc_por1 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por2 & mask) != 0)
-               pcmp->pcmc_por2 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por3 & mask) != 0)
-               pcmp->pcmc_por3 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por4 & mask) != 0)
-               pcmp->pcmc_por4 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por5 & mask) != 0)
-               pcmp->pcmc_por5 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por6 & mask) != 0)
-               pcmp->pcmc_por6 = reg | timing;
-
-       if ((reg = pcmp->pcmc_por7 & mask) != 0)
-               pcmp->pcmc_por7 = reg | timing;
-
-#elif defined(CONFIG_IDE_EXT_DIRECT)
-
-       printk("%s[%d] %s: not implemented yet!\n",
-               __FILE__, __LINE__, __func__);
-#endif /* defined(CONFIG_IDE_8xx_PCCARD) || defined(CONFIG_IDE_8xx_PCMCIA */
-}
-
-static const struct ide_port_ops m8xx_port_ops = {
-       .set_pio_mode           = m8xx_ide_set_pio_mode,
-};
-
-static void
-ide_interrupt_ack (void *dev)
-{
-#ifdef CONFIG_IDE_8xx_PCCARD
-       u_int pscr, pipr;
-
-#if (PCMCIA_SOCKETS_NO == 2)
-       u_int _slot_;
-#endif
-
-       /* get interrupt sources */
-
-       pscr = ((volatile immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr;
-       pipr = ((volatile immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pipr;
-
-       /*
-        * report only if both card detect signals are the same
-        * not too nice done,
-        * we depend on that CD2 is the bit to the left of CD1...
-        */
-
-       if(_slot_==-1){
-         printk("PCMCIA slot has not been defined! Using A as default\n");
-         _slot_=0;
-       }
-
-       if(((pipr & M8XX_PCMCIA_CD2(_slot_)) >> 1) ^
-          (pipr & M8XX_PCMCIA_CD1(_slot_))         ) {
-         printk ("card detect interrupt\n");
-       }
-       /* clear the interrupt sources */
-       ((immap_t *)IMAP_ADDR)->im_pcmcia.pcmc_pscr = pscr;
-
-#else /* ! CONFIG_IDE_8xx_PCCARD */
-       /*
-        * Only CONFIG_IDE_8xx_PCCARD is using the interrupt of the
-        * MPC8xx's PCMCIA controller, so there is nothing to be done here
-        * for CONFIG_IDE_8xx_DIRECT and CONFIG_IDE_EXT_DIRECT.
-        * The interrupt is handled somewhere else.     -- Steven
-        */
-#endif /* CONFIG_IDE_8xx_PCCARD */
-}
-
-
-
-/*
- * CIS Tupel codes
- */
-#define CISTPL_NULL            0x00
-#define CISTPL_DEVICE          0x01
-#define CISTPL_LONGLINK_CB     0x02
-#define CISTPL_INDIRECT                0x03
-#define CISTPL_CONFIG_CB       0x04
-#define CISTPL_CFTABLE_ENTRY_CB 0x05
-#define CISTPL_LONGLINK_MFC    0x06
-#define CISTPL_BAR             0x07
-#define CISTPL_PWR_MGMNT       0x08
-#define CISTPL_EXTDEVICE       0x09
-#define CISTPL_CHECKSUM                0x10
-#define CISTPL_LONGLINK_A      0x11
-#define CISTPL_LONGLINK_C      0x12
-#define CISTPL_LINKTARGET      0x13
-#define CISTPL_NO_LINK         0x14
-#define CISTPL_VERS_1          0x15
-#define CISTPL_ALTSTR          0x16
-#define CISTPL_DEVICE_A                0x17
-#define CISTPL_JEDEC_C         0x18
-#define CISTPL_JEDEC_A         0x19
-#define CISTPL_CONFIG          0x1a
-#define CISTPL_CFTABLE_ENTRY   0x1b
-#define CISTPL_DEVICE_OC       0x1c
-#define CISTPL_DEVICE_OA       0x1d
-#define CISTPL_DEVICE_GEO      0x1e
-#define CISTPL_DEVICE_GEO_A    0x1f
-#define CISTPL_MANFID          0x20
-#define CISTPL_FUNCID          0x21
-#define CISTPL_FUNCE           0x22
-#define CISTPL_SWIL            0x23
-#define CISTPL_END             0xff
-
-/*
- * CIS Function ID codes
- */
-#define CISTPL_FUNCID_MULTI    0x00
-#define CISTPL_FUNCID_MEMORY   0x01
-#define CISTPL_FUNCID_SERIAL   0x02
-#define CISTPL_FUNCID_PARALLEL 0x03
-#define CISTPL_FUNCID_FIXED    0x04
-#define CISTPL_FUNCID_VIDEO    0x05
-#define CISTPL_FUNCID_NETWORK  0x06
-#define CISTPL_FUNCID_AIMS     0x07
-#define CISTPL_FUNCID_SCSI     0x08
-
-/*
- * Fixed Disk FUNCE codes
- */
-#define CISTPL_IDE_INTERFACE   0x01
-
-#define CISTPL_FUNCE_IDE_IFACE 0x01
-#define CISTPL_FUNCE_IDE_MASTER        0x02
-#define CISTPL_FUNCE_IDE_SLAVE 0x03
-
-/* First feature byte */
-#define CISTPL_IDE_SILICON     0x04
-#define CISTPL_IDE_UNIQUE      0x08
-#define CISTPL_IDE_DUAL                0x10
-
-/* Second feature byte */
-#define CISTPL_IDE_HAS_SLEEP   0x01
-#define CISTPL_IDE_HAS_STANDBY 0x02
-#define CISTPL_IDE_HAS_IDLE    0x04
-#define CISTPL_IDE_LOW_POWER   0x08
-#define CISTPL_IDE_REG_INHIBIT 0x10
-#define CISTPL_IDE_HAS_INDEX   0x20
-#define CISTPL_IDE_IOIS16      0x40
-
-
-/* -------------------------------------------------------------------- */
-
-
-#define        MAX_TUPEL_SZ    512
-#define MAX_FEATURES   4
-
-static int check_ide_device (unsigned long base)
-{
-       volatile u8 *ident = NULL;
-       volatile u8 *feature_p[MAX_FEATURES];
-       volatile u8 *p, *start;
-       int n_features = 0;
-       u8 func_id = ~0;
-       u8 code, len;
-       unsigned short config_base = 0;
-       int found = 0;
-       int i;
-
-#ifdef DEBUG
-       printk ("PCMCIA MEM: %08lX\n", base);
-#endif
-       start = p = (volatile u8 *) base;
-
-       while ((p - start) < MAX_TUPEL_SZ) {
-
-               code = *p; p += 2;
-
-               if (code == 0xFF) { /* End of chain */
-                       break;
-               }
-
-               len = *p; p += 2;
-#ifdef DEBUG_PCMCIA
-               { volatile u8 *q = p;
-                       printk ("\nTuple code %02x  length %d\n\tData:",
-                               code, len);
-
-                       for (i = 0; i < len; ++i) {
-                               printk (" %02x", *q);
-                               q+= 2;
-                       }
-               }
-#endif /* DEBUG_PCMCIA */
-               switch (code) {
-               case CISTPL_VERS_1:
-                       ident = p + 4;
-                       break;
-               case CISTPL_FUNCID:
-                       func_id = *p;
-                       break;
-               case CISTPL_FUNCE:
-                       if (n_features < MAX_FEATURES)
-                               feature_p[n_features++] = p;
-                       break;
-               case CISTPL_CONFIG:
-                       config_base = (*(p+6) << 8) + (*(p+4));
-               default:
-                       break;
-               }
-               p += 2 * len;
-       }
-
-       found = identify (ident);
-
-       if (func_id != ((u8)~0)) {
-               print_funcid (func_id);
-
-               if (func_id == CISTPL_FUNCID_FIXED)
-                       found = 1;
-               else
-                       return (1);     /* no disk drive */
-       }
-
-       for (i=0; i<n_features; ++i) {
-               print_fixed (feature_p[i]);
-       }
-
-       if (!found) {
-               printk ("unknown card type\n");
-               return (1);
-       }
-
-       /* set level mode irq and I/O mapped device in config reg*/
-       *((u8 *)(base + config_base)) = 0x41;
-
-       return (0);
-}
-
-/* ------------------------------------------------------------------------- */
-
-static void print_funcid (int func)
-{
-       switch (func) {
-       case CISTPL_FUNCID_MULTI:
-               printk (" Multi-Function");
-               break;
-       case CISTPL_FUNCID_MEMORY:
-               printk (" Memory");
-               break;
-       case CISTPL_FUNCID_SERIAL:
-               printk (" Serial Port");
-               break;
-       case CISTPL_FUNCID_PARALLEL:
-               printk (" Parallel Port");
-               break;
-       case CISTPL_FUNCID_FIXED:
-               printk (" Fixed Disk");
-               break;
-       case CISTPL_FUNCID_VIDEO:
-               printk (" Video Adapter");
-               break;
-       case CISTPL_FUNCID_NETWORK:
-               printk (" Network Adapter");
-               break;
-       case CISTPL_FUNCID_AIMS:
-               printk (" AIMS Card");
-               break;
-       case CISTPL_FUNCID_SCSI:
-               printk (" SCSI Adapter");
-               break;
-       default:
-               printk (" Unknown");
-               break;
-       }
-       printk (" Card\n");
-}
-
-/* ------------------------------------------------------------------------- */
-
-static void print_fixed (volatile u8 *p)
-{
-       if (p == NULL)
-               return;
-
-       switch (*p) {
-       case CISTPL_FUNCE_IDE_IFACE:
-           {   u8 iface = *(p+2);
-
-               printk ((iface == CISTPL_IDE_INTERFACE) ? " IDE" : " unknown");
-               printk (" interface ");
-               break;
-           }
-       case CISTPL_FUNCE_IDE_MASTER:
-       case CISTPL_FUNCE_IDE_SLAVE:
-           {   u8 f1 = *(p+2);
-               u8 f2 = *(p+4);
-
-               printk ((f1 & CISTPL_IDE_SILICON) ? " [silicon]" : " [rotating]");
-
-               if (f1 & CISTPL_IDE_UNIQUE)
-                       printk (" [unique]");
-
-               printk ((f1 & CISTPL_IDE_DUAL) ? " [dual]" : " [single]");
-
-               if (f2 & CISTPL_IDE_HAS_SLEEP)
-                       printk (" [sleep]");
-
-               if (f2 & CISTPL_IDE_HAS_STANDBY)
-                       printk (" [standby]");
-
-               if (f2 & CISTPL_IDE_HAS_IDLE)
-                       printk (" [idle]");
-
-               if (f2 & CISTPL_IDE_LOW_POWER)
-                       printk (" [low power]");
-
-               if (f2 & CISTPL_IDE_REG_INHIBIT)
-                       printk (" [reg inhibit]");
-
-               if (f2 & CISTPL_IDE_HAS_INDEX)
-                       printk (" [index]");
-
-               if (f2 & CISTPL_IDE_IOIS16)
-                       printk (" [IOis16]");
-
-               break;
-           }
-       }
-       printk ("\n");
-}
-
-/* ------------------------------------------------------------------------- */
-
-
-#define MAX_IDENT_CHARS                64
-#define        MAX_IDENT_FIELDS        4
-
-static u8 *known_cards[] = {
-       "ARGOSY PnPIDE D5",
-       NULL
-};
-
-static int identify  (volatile u8 *p)
-{
-       u8 id_str[MAX_IDENT_CHARS];
-       u8 data;
-       u8 *t;
-       u8 **card;
-       int i, done;
-
-       if (p == NULL)
-               return (0);     /* Don't know */
-
-       t = id_str;
-       done =0;
-
-       for (i=0; i<=4 && !done; ++i, p+=2) {
-               while ((data = *p) != '\0') {
-                       if (data == 0xFF) {
-                               done = 1;
-                               break;
-                       }
-                       *t++ = data;
-                       if (t == &id_str[MAX_IDENT_CHARS-1]) {
-                               done = 1;
-                               break;
-                       }
-                       p += 2;
-               }
-               if (!done)
-                       *t++ = ' ';
-       }
-       *t = '\0';
-       while (--t > id_str) {
-               if (*t == ' ')
-                       *t = '\0';
-               else
-                       break;
-       }
-       printk ("Card ID: %s\n", id_str);
-
-       for (card=known_cards; *card; ++card) {
-               if (strcmp(*card, id_str) == 0) {       /* found! */
-                       return (1);
-               }
-       }
-
-       return (0);     /* don't know */
-}
-
-static int __init mpc8xx_ide_probe(void)
-{
-       hw_regs_t hw;
-       u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
-
-#ifdef IDE0_BASE_OFFSET
-       memset(&hw, 0, sizeof(hw));
-       if (!m8xx_ide_init_ports(&hw, 0)) {
-               ide_hwif_t *hwif = ide_find_port();
-
-               if (hwif) {
-                       ide_init_port_hw(hwif, &hw);
-                       hwif->pio_mask = ATA_PIO4;
-                       hwif->port_ops = &m8xx_port_ops;
-
-                       idx[0] = hwif->index;
-               }
-       }
-#ifdef IDE1_BASE_OFFSET
-       memset(&hw, 0, sizeof(hw));
-       if (!m8xx_ide_init_ports(&hw, 1)) {
-               ide_hwif_t *mate = ide_find_port();
-
-               if (mate) {
-                       ide_init_port_hw(mate, &hw);
-                       mate->pio_mask = ATA_PIO4;
-                       mate->port_ops = &m8xx_port_ops;
-
-                       idx[1] = mate->index;
-               }
-       }
-#endif
-#endif
-
-       ide_device_add(idx, NULL);
-
-       return 0;
-}
-
-module_init(mpc8xx_ide_probe);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c

index dcb2c466bb979efd7977437a2a9dfb969d7274e2..93fb9067c0430f7add87b99fa41f39f31ba0d4c5 100644 (file)
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -5,7 +5,7 @@
   * for doing DMA.
   *
   *  Copyright (C) 1998-2003 Paul Mackerras & Ben. Herrenschmidt
- *  Copyright (C)      2007 Bartlomiej Zolnierkiewicz
+ *  Copyright (C) 2007-2008 Bartlomiej Zolnierkiewicz
   *
   *  This program is free software; you can redistribute it and/or
   *  modify it under the terms of the GNU General Public License
@@ -48,8 +48,6 @@
  #include <asm/mediabay.h>
  #endif
  
-#include "../ide-timing.h"
-
  #undef IDE_PMAC_DEBUG
  
  #define DMA_WAIT_TIMEOUT       50
@@ -495,6 +493,7 @@ static void pmac_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port)
  static void
  pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
  {
+       struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio);
         u32 *timings, t;
         unsigned accessTicks, recTicks;
         unsigned accessTime, recTime;
@@ -526,10 +525,9 @@ pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
                 }
         case controller_kl_ata4:
                 /* 66Mhz cell */
-               recTime = cycle_time - ide_pio_timings[pio].active_time
-                               - ide_pio_timings[pio].setup_time;
+               recTime = cycle_time - tim->active - tim->setup;
                 recTime = max(recTime, 150U);
-               accessTime = ide_pio_timings[pio].active_time;
+               accessTime = tim->active;
                 accessTime = max(accessTime, 150U);
                 accessTicks = SYSCLK_TICKS_66(accessTime);
                 accessTicks = min(accessTicks, 0x1fU);
@@ -542,10 +540,9 @@ pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
         default: {
                 /* 33Mhz cell */
                 int ebit = 0;
-               recTime = cycle_time - ide_pio_timings[pio].active_time
-                               - ide_pio_timings[pio].setup_time;
+               recTime = cycle_time - tim->active - tim->setup;
                 recTime = max(recTime, 150U);
-               accessTime = ide_pio_timings[pio].active_time;
+               accessTime = tim->active;
                 accessTime = max(accessTime, 150U);
                 accessTicks = SYSCLK_TICKS(accessTime);
                 accessTicks = min(accessTicks, 0x1fU);
@@ -1151,8 +1148,6 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
         base = ioremap(macio_resource_start(mdev, 0), 0x400);
         regbase = (unsigned long) base;
  
-       hwif->dev = &mdev->bus->pdev->dev;
-
         pmif->mdev = mdev;
         pmif->node = mdev->ofdev.node;
         pmif->regbase = regbase;
@@ -1174,7 +1169,8 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
         memset(&hw, 0, sizeof(hw));
         pmac_ide_init_ports(&hw, pmif->regbase);
         hw.irq = irq;
-       hw.dev = &mdev->ofdev.dev;
+       hw.dev = &mdev->bus->pdev->dev;
+       hw.parent = &mdev->ofdev.dev;
  
         rc = pmac_ide_setup_device(pmif, hwif, &hw);
         if (rc != 0) {
@@ -1274,7 +1270,6 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id)
                 goto out_free_pmif;
         }
  
-       hwif->dev = &pdev->dev;
         pmif->mdev = NULL;
         pmif->node = np;
  
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c

index abcfb1739d4d3a2089d580a1f7ffe5583ed3ff6a..65fc08b6b6d0e813f4eed2e5594c1149a5233d7b 100644 (file)
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -6,19 +6,15 @@
   *  May be copied or modified under the terms of the GNU General Public License
   */
  
-#include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/pci.h>
  #include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
  #include <linux/interrupt.h>
  #include <linux/ide.h>
  #include <linux/dma-mapping.h>
  
  #include <asm/io.h>
-#include <asm/irq.h>
  
  /**
   *     ide_setup_pci_baseregs  -       place a PCI IDE controller native
@@ -319,25 +315,22 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev,
  
                 ctl  = pci_resource_start(dev, 2*port+1);
                 base = pci_resource_start(dev, 2*port);
-               if ((ctl && !base) || (base && !ctl)) {
-                       printk(KERN_ERR "%s: inconsistent baseregs (BIOS) "
-                               "for port %d, skipping\n", d->name, port);
-                       return NULL;
-               }
-       }
-       if (!ctl) {
+       } else {
                 /* Use default values */
                 ctl = port ? 0x374 : 0x3f4;
                 base = port ? 0x170 : 0x1f0;
         }
  
-       hwif = ide_find_port_slot(d);
-       if (hwif == NULL) {
-               printk(KERN_ERR "%s: too many IDE interfaces, no room in "
-                               "table\n", d->name);
+       if (!base || !ctl) {
+               printk(KERN_ERR "%s: bad PCI BARs for port %d, skipping\n",
+                               d->name, port);
                 return NULL;
         }
  
+       hwif = ide_find_port_slot(d);
+       if (hwif == NULL)
+               return NULL;
+
         memset(&hw, 0, sizeof(hw));
         hw.irq = irq;
         hw.dev = &dev->dev;
@@ -346,8 +339,6 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev,
  
         ide_init_port_hw(hwif, &hw);
  
-       hwif->dev = &dev->dev;
-
         return hwif;
  }
  
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig

index 636af2862308fab614b4f9f9b2395ce0a24c7e4f..1921b8dbb2427f94d1403c24e0330c6dbad7d70b 100644 (file)
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -179,17 +179,29 @@ config FUJITSU_LAPTOP
          tristate "Fujitsu Laptop Extras"
          depends on X86
          depends on ACPI
+       depends on INPUT
          depends on BACKLIGHT_CLASS_DEVICE
          ---help---
           This is a driver for laptops built by Fujitsu:
  
             * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks
             * Possibly other Fujitsu laptop models
+           * Tested with S6410 and S7020
  
-         It adds support for LCD brightness control.
+         It adds support for LCD brightness control and some hotkeys.
  
           If you have a Fujitsu laptop, say Y or M here.
  
+config FUJITSU_LAPTOP_DEBUG
+       bool "Verbose debug mode for Fujitsu Laptop Extras"
+       depends on FUJITSU_LAPTOP
+       default n
+       ---help---
+         Enables extra debug output from the fujitsu extras driver, at the
+         expense of a slight increase in driver size.
+
+         If you are not sure, say N here.
+
  config TC1100_WMI
         tristate "HP Compaq TC1100 Tablet WMI Extras (EXPERIMENTAL)"
         depends on X86 && !X86_64
@@ -219,6 +231,23 @@ config MSI_LAPTOP
  
           If you have an MSI S270 laptop, say Y or M here.
  
+config COMPAL_LAPTOP
+       tristate "Compal Laptop Extras"
+       depends on X86
+       depends on ACPI_EC
+       depends on BACKLIGHT_CLASS_DEVICE
+       ---help---
+         This is a driver for laptops built by Compal:
+
+         Compal FL90/IFL90
+         Compal FL91/IFL91
+         Compal FL92/JFL92
+         Compal FT00/IFT00
+
+         It adds support for Bluetooth, WLAN and LCD brightness control.
+
+         If you have an Compal FL9x/IFL9x/FT00 laptop, say Y or M here.
+
  config SONY_LAPTOP
         tristate "Sony Laptop Extras"
         depends on X86 && ACPI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile

index 1952875a272ec406dfcbc8566a606002eebee128..a6dac6a2e7e59c485fcd5a65f9f4ed752768a2af 100644 (file)
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -5,10 +5,11 @@ obj- := misc.o        # Dummy rule to force built-in.o to be made
  
  obj-$(CONFIG_IBM_ASM)          += ibmasm/
  obj-$(CONFIG_HDPU_FEATURES)    += hdpuftrs/
-obj-$(CONFIG_MSI_LAPTOP)     += msi-laptop.o
-obj-$(CONFIG_ACER_WMI)     += acer-wmi.o
  obj-$(CONFIG_ASUS_LAPTOP)      += asus-laptop.o
  obj-$(CONFIG_EEEPC_LAPTOP)     += eeepc-laptop.o
+obj-$(CONFIG_MSI_LAPTOP)       += msi-laptop.o
+obj-$(CONFIG_COMPAL_LAPTOP)    += compal-laptop.o
+obj-$(CONFIG_ACER_WMI)         += acer-wmi.o
  obj-$(CONFIG_ATMEL_PWM)                += atmel_pwm.o
  obj-$(CONFIG_ATMEL_SSC)                += atmel-ssc.o
  obj-$(CONFIG_ATMEL_TCLIB)      += atmel_tclib.o
diff --git a/drivers/misc/acer-wmi.c b/drivers/misc/acer-wmi.c

index dd13a3749927494804aa4566c59df652d963f40b..e7a3fe508dff00c9cd9d0e8db8a5227e487f3737 100644 (file)
--- a/drivers/misc/acer-wmi.c
+++ b/drivers/misc/acer-wmi.c
@@ -22,18 +22,18 @@
   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   */
  
-#define ACER_WMI_VERSION       "0.1"
-
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/types.h>
  #include <linux/dmi.h>
+#include <linux/fb.h>
  #include <linux/backlight.h>
  #include <linux/leds.h>
  #include <linux/platform_device.h>
  #include <linux/acpi.h>
  #include <linux/i8042.h>
+#include <linux/debugfs.h>
  
  #include <acpi/acpi_drivers.h>
  
@@ -87,6 +87,7 @@ struct acer_quirks {
   * Acer ACPI method GUIDs
   */
  #define AMW0_GUID1             "67C3371D-95A3-4C37-BB61-DD47B491DAAB"
+#define AMW0_GUID2             "431F16ED-0C2B-444C-B267-27DEB140CF9C"
  #define WMID_GUID1             "6AF4F258-B401-42fd-BE91-3D4AC2D7C0D3"
  #define WMID_GUID2             "95764E09-FB56-4e83-B31A-37761F60994A"
  
@@ -150,6 +151,12 @@ struct acer_data {
         int brightness;
  };
  
+struct acer_debug {
+       struct dentry *root;
+       struct dentry *devices;
+       u32 wmid_devices;
+};
+
  /* Each low-level interface must define at least some of the following */
  struct wmi_interface {
         /* The WMI device type */
@@ -160,6 +167,9 @@ struct wmi_interface {
  
         /* Private data for the current interface */
         struct acer_data data;
+
+       /* debugfs entries associated with this interface */
+       struct acer_debug debug;
  };
  
  /* The static interface pointer, points to the currently detected interface */
@@ -174,7 +184,7 @@ static struct wmi_interface *interface;
  struct quirk_entry {
         u8 wireless;
         u8 mailled;
-       u8 brightness;
+       s8 brightness;
         u8 bluetooth;
  };
  
@@ -198,6 +208,10 @@ static int dmi_matched(const struct dmi_system_id *dmi)
  static struct quirk_entry quirk_unknown = {
  };
  
+static struct quirk_entry quirk_acer_aspire_1520 = {
+       .brightness = -1,
+};
+
  static struct quirk_entry quirk_acer_travelmate_2490 = {
         .mailled = 1,
  };
@@ -207,7 +221,29 @@ static struct quirk_entry quirk_medion_md_98300 = {
         .wireless = 1,
  };
  
+static struct quirk_entry quirk_fujitsu_amilo_li_1718 = {
+       .wireless = 2,
+};
+
  static struct dmi_system_id acer_quirks[] = {
+       {
+               .callback = dmi_matched,
+               .ident = "Acer Aspire 1360",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"),
+               },
+               .driver_data = &quirk_acer_aspire_1520,
+       },
+       {
+               .callback = dmi_matched,
+               .ident = "Acer Aspire 1520",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1520"),
+               },
+               .driver_data = &quirk_acer_aspire_1520,
+       },
         {
                 .callback = dmi_matched,
                 .ident = "Acer Aspire 3100",
@@ -298,6 +334,15 @@ static struct dmi_system_id acer_quirks[] = {
                 },
                 .driver_data = &quirk_acer_travelmate_2490,
         },
+       {
+               .callback = dmi_matched,
+               .ident = "Fujitsu Siemens Amilo Li 1718",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Li 1718"),
+               },
+               .driver_data = &quirk_fujitsu_amilo_li_1718,
+       },
         {
                 .callback = dmi_matched,
                 .ident = "Medion MD 98300",
@@ -393,6 +438,12 @@ struct wmi_interface *iface)
                                 return AE_ERROR;
                         *value = result & 0x1;
                         return AE_OK;
+               case 2:
+                       err = ec_read(0x71, &result);
+                       if (err)
+                               return AE_ERROR;
+                       *value = result & 0x1;
+                       return AE_OK;
                 default:
                         err = ec_read(0xA, &result);
                         if (err)
@@ -506,6 +557,15 @@ static acpi_status AMW0_set_capabilities(void)
         struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL };
         union acpi_object *obj;
  
+       /*
+        * On laptops with this strange GUID (non Acer), normal probing doesn't
+        * work.
+        */
+       if (wmi_has_guid(AMW0_GUID2)) {
+               interface->capability |= ACER_CAP_WIRELESS;
+               return AE_OK;
+       }
+
         args.eax = ACER_AMW0_WRITE;
         args.ecx = args.edx = 0;
  
@@ -552,7 +612,8 @@ static acpi_status AMW0_set_capabilities(void)
          * appear to use the same EC register for brightness, even if they
          * differ for wireless, etc
          */
-       interface->capability |= ACER_CAP_BRIGHTNESS;
+       if (quirks->brightness >= 0)
+               interface->capability |= ACER_CAP_BRIGHTNESS;
  
         return AE_OK;
  }
@@ -807,7 +868,15 @@ static int read_brightness(struct backlight_device *bd)
  
  static int update_bl_status(struct backlight_device *bd)
  {
-       set_u32(bd->props.brightness, ACER_CAP_BRIGHTNESS);
+       int intensity = bd->props.brightness;
+
+       if (bd->props.power != FB_BLANK_UNBLANK)
+               intensity = 0;
+       if (bd->props.fb_blank != FB_BLANK_UNBLANK)
+               intensity = 0;
+
+       set_u32(intensity, ACER_CAP_BRIGHTNESS);
+
         return 0;
  }
  
@@ -829,8 +898,9 @@ static int __devinit acer_backlight_init(struct device *dev)
  
         acer_backlight_device = bd;
  
+       bd->props.power = FB_BLANK_UNBLANK;
+       bd->props.brightness = max_brightness;
         bd->props.max_brightness = max_brightness;
-       bd->props.brightness = read_brightness(NULL);
         backlight_update_status(bd);
         return 0;
  }
@@ -893,6 +963,28 @@ static ssize_t show_interface(struct device *dev, struct device_attribute *attr,
  static DEVICE_ATTR(interface, S_IWUGO | S_IRUGO | S_IWUSR,
         show_interface, NULL);
  
+/*
+ * debugfs functions
+ */
+static u32 get_wmid_devices(void)
+{
+       struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL};
+       union acpi_object *obj;
+       acpi_status status;
+
+       status = wmi_query_block(WMID_GUID2, 1, &out);
+       if (ACPI_FAILURE(status))
+               return 0;
+
+       obj = (union acpi_object *) out.pointer;
+       if (obj && obj->type == ACPI_TYPE_BUFFER &&
+               obj->buffer.length == sizeof(u32)) {
+               return *((u32 *) obj->buffer.pointer);
+       } else {
+               return 0;
+       }
+}
+
  /*
   * Platform device
   */
@@ -1052,12 +1144,40 @@ error_sysfs:
         return retval;
  }
  
+static void remove_debugfs(void)
+{
+       debugfs_remove(interface->debug.devices);
+       debugfs_remove(interface->debug.root);
+}
+
+static int create_debugfs(void)
+{
+       interface->debug.root = debugfs_create_dir("acer-wmi", NULL);
+       if (!interface->debug.root) {
+               printk(ACER_ERR "Failed to create debugfs directory");
+               return -ENOMEM;
+       }
+
+       interface->debug.devices = debugfs_create_u32("devices", S_IRUGO,
+                                       interface->debug.root,
+                                       &interface->debug.wmid_devices);
+       if (!interface->debug.devices)
+               goto error_debugfs;
+
+       return 0;
+
+error_debugfs:
+               remove_debugfs();
+       return -ENOMEM;
+}
+
  static int __init acer_wmi_init(void)
  {
         int err;
  
-       printk(ACER_INFO "Acer Laptop ACPI-WMI Extras version %s\n",
-                       ACER_WMI_VERSION);
+       printk(ACER_INFO "Acer Laptop ACPI-WMI Extras\n");
+
+       find_quirks();
  
         /*
          * Detect which ACPI-WMI interface we're using.
@@ -1092,8 +1212,6 @@ static int __init acer_wmi_init(void)
         if (wmi_has_guid(AMW0_GUID1))
                 AMW0_find_mailled();
  
-       find_quirks();
-
         if (!interface) {
                 printk(ACER_ERR "No or unsupported WMI interface, unable to "
                                 "load\n");
@@ -1111,6 +1229,13 @@ static int __init acer_wmi_init(void)
         if (err)
                 return err;
  
+       if (wmi_has_guid(WMID_GUID2)) {
+               interface->debug.wmid_devices = get_wmid_devices();
+               err = create_debugfs();
+               if (err)
+                       return err;
+       }
+
         /* Override any initial settings with values from the commandline */
         acer_commandline_init();
  
diff --git a/drivers/misc/compal-laptop.c b/drivers/misc/compal-laptop.c

new file mode 100644 (file)

index 0000000..344b790
--- /dev/null
+++ b/drivers/misc/compal-laptop.c
@@ -0,0 +1,404 @@
+/*-*-linux-c-*-*/
+
+/*
+  Copyright (C) 2008 Cezary Jackiewicz <cezary.jackiewicz (at) gmail.com>
+
+  based on MSI driver
+
+  Copyright (C) 2006 Lennart Poettering <mzxreary (at) 0pointer (dot) de>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+ */
+
+/*
+ * comapl-laptop.c - Compal laptop support.
+ *
+ * This driver exports a few files in /sys/devices/platform/compal-laptop/:
+ *
+ *   wlan - wlan subsystem state: contains 0 or 1 (rw)
+ *
+ *   bluetooth - Bluetooth subsystem state: contains 0 or 1 (rw)
+ *
+ *   raw - raw value taken from embedded controller register (ro)
+ *
+ * In addition to these platform device attributes the driver
+ * registers itself in the Linux backlight control subsystem and is
+ * available to userspace under /sys/class/backlight/compal-laptop/.
+ *
+ * This driver might work on other laptops produced by Compal. If you
+ * want to try it you can pass force=1 as argument to the module which
+ * will force it to load even when the DMI data doesn't identify the
+ * laptop as FL9x.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/backlight.h>
+#include <linux/platform_device.h>
+#include <linux/autoconf.h>
+
+#define COMPAL_DRIVER_VERSION "0.2.6"
+
+#define COMPAL_LCD_LEVEL_MAX 8
+
+#define COMPAL_EC_COMMAND_WIRELESS 0xBB
+#define COMPAL_EC_COMMAND_LCD_LEVEL 0xB9
+
+#define KILLSWITCH_MASK 0x10
+#define WLAN_MASK      0x01
+#define BT_MASK        0x02
+
+static int force;
+module_param(force, bool, 0);
+MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
+
+/* Hardware access */
+
+static int set_lcd_level(int level)
+{
+       if (level < 0 || level >= COMPAL_LCD_LEVEL_MAX)
+               return -EINVAL;
+
+       ec_write(COMPAL_EC_COMMAND_LCD_LEVEL, level);
+
+       return 0;
+}
+
+static int get_lcd_level(void)
+{
+       u8 result;
+
+       ec_read(COMPAL_EC_COMMAND_LCD_LEVEL, &result);
+
+       return (int) result;
+}
+
+static int set_wlan_state(int state)
+{
+       u8 result, value;
+
+       ec_read(COMPAL_EC_COMMAND_WIRELESS, &result);
+
+       if ((result & KILLSWITCH_MASK) == 0)
+               return -EINVAL;
+       else {
+               if (state)
+                       value = (u8) (result | WLAN_MASK);
+               else
+                       value = (u8) (result & ~WLAN_MASK);
+               ec_write(COMPAL_EC_COMMAND_WIRELESS, value);
+       }
+
+       return 0;
+}
+
+static int set_bluetooth_state(int state)
+{
+       u8 result, value;
+
+       ec_read(COMPAL_EC_COMMAND_WIRELESS, &result);
+
+       if ((result & KILLSWITCH_MASK) == 0)
+               return -EINVAL;
+       else {
+               if (state)
+                       value = (u8) (result | BT_MASK);
+               else
+                       value = (u8) (result & ~BT_MASK);
+               ec_write(COMPAL_EC_COMMAND_WIRELESS, value);
+       }
+
+       return 0;
+}
+
+static int get_wireless_state(int *wlan, int *bluetooth)
+{
+       u8 result;
+
+       ec_read(COMPAL_EC_COMMAND_WIRELESS, &result);
+
+       if (wlan) {
+               if ((result & KILLSWITCH_MASK) == 0)
+                       *wlan = 0;
+               else
+                       *wlan = result & WLAN_MASK;
+       }
+
+       if (bluetooth) {
+               if ((result & KILLSWITCH_MASK) == 0)
+                       *bluetooth = 0;
+               else
+                       *bluetooth = (result & BT_MASK) >> 1;
+       }
+
+       return 0;
+}
+
+/* Backlight device stuff */
+
+static int bl_get_brightness(struct backlight_device *b)
+{
+       return get_lcd_level();
+}
+
+
+static int bl_update_status(struct backlight_device *b)
+{
+       return set_lcd_level(b->props.brightness);
+}
+
+static struct backlight_ops compalbl_ops = {
+       .get_brightness = bl_get_brightness,
+       .update_status  = bl_update_status,
+};
+
+static struct backlight_device *compalbl_device;
+
+/* Platform device */
+
+static ssize_t show_wlan(struct device *dev,
+       struct device_attribute *attr, char *buf)
+{
+       int ret, enabled;
+
+       ret = get_wireless_state(&enabled, NULL);
+       if (ret < 0)
+               return ret;
+
+       return sprintf(buf, "%i\n", enabled);
+}
+
+static ssize_t show_raw(struct device *dev,
+       struct device_attribute *attr, char *buf)
+{
+       u8 result;
+
+       ec_read(COMPAL_EC_COMMAND_WIRELESS, &result);
+
+       return sprintf(buf, "%i\n", result);
+}
+
+static ssize_t show_bluetooth(struct device *dev,
+       struct device_attribute *attr, char *buf)
+{
+       int ret, enabled;
+
+       ret = get_wireless_state(NULL, &enabled);
+       if (ret < 0)
+               return ret;
+
+       return sprintf(buf, "%i\n", enabled);
+}
+
+static ssize_t store_wlan_state(struct device *dev,
+       struct device_attribute *attr, const char *buf, size_t count)
+{
+       int state, ret;
+
+       if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1))
+               return -EINVAL;
+
+       ret = set_wlan_state(state);
+       if (ret < 0)
+               return ret;
+
+       return count;
+}
+
+static ssize_t store_bluetooth_state(struct device *dev,
+       struct device_attribute *attr, const char *buf, size_t count)
+{
+       int state, ret;
+
+       if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1))
+               return -EINVAL;
+
+       ret = set_bluetooth_state(state);
+       if (ret < 0)
+               return ret;
+
+       return count;
+}
+
+static DEVICE_ATTR(bluetooth, 0644, show_bluetooth, store_bluetooth_state);
+static DEVICE_ATTR(wlan, 0644, show_wlan, store_wlan_state);
+static DEVICE_ATTR(raw, 0444, show_raw, NULL);
+
+static struct attribute *compal_attributes[] = {
+       &dev_attr_bluetooth.attr,
+       &dev_attr_wlan.attr,
+       &dev_attr_raw.attr,
+       NULL
+};
+
+static struct attribute_group compal_attribute_group = {
+       .attrs = compal_attributes
+};
+
+static struct platform_driver compal_driver = {
+       .driver = {
+               .name = "compal-laptop",
+               .owner = THIS_MODULE,
+       }
+};
+
+static struct platform_device *compal_device;
+
+/* Initialization */
+
+static int dmi_check_cb(const struct dmi_system_id *id)
+{
+       printk(KERN_INFO "compal-laptop: Identified laptop model '%s'.\n",
+               id->ident);
+
+       return 0;
+}
+
+static struct dmi_system_id __initdata compal_dmi_table[] = {
+       {
+               .ident = "FL90/IFL90",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "IFL90"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "IFT00"),
+               },
+               .callback = dmi_check_cb
+       },
+       {
+               .ident = "FL90/IFL90",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "IFL90"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "REFERENCE"),
+               },
+               .callback = dmi_check_cb
+       },
+       {
+               .ident = "FL91/IFL91",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "IFL91"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "IFT00"),
+               },
+               .callback = dmi_check_cb
+       },
+       {
+               .ident = "FL92/JFL92",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "JFL92"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "IFT00"),
+               },
+               .callback = dmi_check_cb
+       },
+       {
+               .ident = "FT00/IFT00",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "IFT00"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "IFT00"),
+               },
+               .callback = dmi_check_cb
+       },
+       { }
+};
+
+static int __init compal_init(void)
+{
+       int ret;
+
+       if (acpi_disabled)
+               return -ENODEV;
+
+       if (!force && !dmi_check_system(compal_dmi_table))
+               return -ENODEV;
+
+       /* Register backlight stuff */
+
+       compalbl_device = backlight_device_register("compal-laptop", NULL, NULL,
+                                               &compalbl_ops);
+       if (IS_ERR(compalbl_device))
+               return PTR_ERR(compalbl_device);
+
+       compalbl_device->props.max_brightness = COMPAL_LCD_LEVEL_MAX-1;
+
+       ret = platform_driver_register(&compal_driver);
+       if (ret)
+               goto fail_backlight;
+
+       /* Register platform stuff */
+
+       compal_device = platform_device_alloc("compal-laptop", -1);
+       if (!compal_device) {
+               ret = -ENOMEM;
+               goto fail_platform_driver;
+       }
+
+       ret = platform_device_add(compal_device);
+       if (ret)
+               goto fail_platform_device1;
+
+       ret = sysfs_create_group(&compal_device->dev.kobj,
+               &compal_attribute_group);
+       if (ret)
+               goto fail_platform_device2;
+
+       printk(KERN_INFO "compal-laptop: driver "COMPAL_DRIVER_VERSION
+               " successfully loaded.\n");
+
+       return 0;
+
+fail_platform_device2:
+
+       platform_device_del(compal_device);
+
+fail_platform_device1:
+
+       platform_device_put(compal_device);
+
+fail_platform_driver:
+
+       platform_driver_unregister(&compal_driver);
+
+fail_backlight:
+
+       backlight_device_unregister(compalbl_device);
+
+       return ret;
+}
+
+static void __exit compal_cleanup(void)
+{
+
+       sysfs_remove_group(&compal_device->dev.kobj, &compal_attribute_group);
+       platform_device_unregister(compal_device);
+       platform_driver_unregister(&compal_driver);
+       backlight_device_unregister(compalbl_device);
+
+       printk(KERN_INFO "compal-laptop: driver unloaded.\n");
+}
+
+module_init(compal_init);
+module_exit(compal_cleanup);
+
+MODULE_AUTHOR("Cezary Jackiewicz");
+MODULE_DESCRIPTION("Compal Laptop Support");
+MODULE_VERSION(COMPAL_DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("dmi:*:rnIFL90:rvrIFT00:*");
+MODULE_ALIAS("dmi:*:rnIFL90:rvrREFERENCE:*");
+MODULE_ALIAS("dmi:*:rnIFL91:rvrIFT00:*");
+MODULE_ALIAS("dmi:*:rnJFL92:rvrIFT00:*");
+MODULE_ALIAS("dmi:*:rnIFT00:rvrIFT00:*");
diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c

index 6d727609097fc4676e65cc381403b511197eec0b..9e8d79e7e9f4cb4b105840ce71420e435820edd5 100644 (file)
--- a/drivers/misc/eeepc-laptop.c
+++ b/drivers/misc/eeepc-laptop.c
@@ -87,7 +87,7 @@ enum {
         CM_ASL_LID
  };
  
-const char *cm_getv[] = {
+static const char *cm_getv[] = {
         "WLDG", NULL, NULL, NULL,
         "CAMG", NULL, NULL, NULL,
         NULL, "PBLG", NULL, NULL,
@@ -96,7 +96,7 @@ const char *cm_getv[] = {
         "CRDG", "LIDG"
  };
  
-const char *cm_setv[] = {
+static const char *cm_setv[] = {
         "WLDS", NULL, NULL, NULL,
         "CAMS", NULL, NULL, NULL,
         "SDSP", "PBLS", "HDPS", NULL,
diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/misc/fujitsu-laptop.c

index 6d14e8fe153706f69cb7b08c40b01f9c388baa54..7a1ef6c262defaf338bbfcbb7dff8c56f3c03c99 100644 (file)
--- a/drivers/misc/fujitsu-laptop.c
+++ b/drivers/misc/fujitsu-laptop.c
@@ -1,12 +1,14 @@
  /*-*-linux-c-*-*/
  
  /*
-  Copyright (C) 2007 Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
+  Copyright (C) 2007,2008 Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
+  Copyright (C) 2008 Peter Gruber <nokos@gmx.net>
    Based on earlier work:
      Copyright (C) 2003 Shane Spencer <shane@bogomip.com>
      Adrian Yee <brewt-fujitsu@brewt.org>
  
-  Templated from msi-laptop.c which is copyright by its respective authors.
+  Templated from msi-laptop.c and thinkpad_acpi.c which is copyright
+  by its respective authors.
  
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -39,8 +41,17 @@
   * registers itself in the Linux backlight control subsystem and is
   * available to userspace under /sys/class/backlight/fujitsu-laptop/.
   *
- * This driver has been tested on a Fujitsu Lifebook S7020.  It should
- * work on most P-series and S-series Lifebooks, but YMMV.
+ * Hotkeys present on certain Fujitsu laptops (eg: the S6xxx series) are
+ * also supported by this driver.
+ *
+ * This driver has been tested on a Fujitsu Lifebook S6410 and S7020.  It
+ * should work on most P-series and S-series Lifebooks, but YMMV.
+ *
+ * The module parameter use_alt_lcd_levels switches between different ACPI
+ * brightness controls which are used by different Fujitsu laptops.  In most
+ * cases the correct method is automatically detected. "use_alt_lcd_levels=1"
+ * is applicable for a Fujitsu Lifebook S6410 if autodetection fails.
+ *
   */
  
  #include <linux/module.h>
@@ -49,30 +60,105 @@
  #include <linux/acpi.h>
  #include <linux/dmi.h>
  #include <linux/backlight.h>
+#include <linux/input.h>
+#include <linux/kfifo.h>
+#include <linux/video_output.h>
  #include <linux/platform_device.h>
  
-#define FUJITSU_DRIVER_VERSION "0.3"
+#define FUJITSU_DRIVER_VERSION "0.4.2"
  
  #define FUJITSU_LCD_N_LEVELS 8
  
  #define ACPI_FUJITSU_CLASS              "fujitsu"
  #define ACPI_FUJITSU_HID                "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME        "Fujitsu laptop FUJ02B1 ACPI extras driver"
+#define ACPI_FUJITSU_DRIVER_NAME       "Fujitsu laptop FUJ02B1 ACPI brightness driver"
  #define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
-
+#define ACPI_FUJITSU_HOTKEY_HID        "FUJ02E3"
+#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+
+#define ACPI_FUJITSU_NOTIFY_CODE1     0x80
+
+#define ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS     0x86
+#define ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS     0x87
+
+/* Hotkey details */
+#define LOCK_KEY       0x410   /* codes for the keys in the GIRB register */
+#define DISPLAY_KEY    0x411   /* keys are mapped to KEY_SCREENLOCK (the key with the key symbol) */
+#define ENERGY_KEY     0x412   /* KEY_MEDIA (the key with the laptop symbol, KEY_EMAIL (E key)) */
+#define REST_KEY       0x413   /* KEY_SUSPEND (R key) */
+
+#define MAX_HOTKEY_RINGBUFFER_SIZE 100
+#define RINGBUFFERSIZE 40
+
+/* Debugging */
+#define FUJLAPTOP_LOG     ACPI_FUJITSU_HID ": "
+#define FUJLAPTOP_ERR     KERN_ERR FUJLAPTOP_LOG
+#define FUJLAPTOP_NOTICE   KERN_NOTICE FUJLAPTOP_LOG
+#define FUJLAPTOP_INFO    KERN_INFO FUJLAPTOP_LOG
+#define FUJLAPTOP_DEBUG    KERN_DEBUG FUJLAPTOP_LOG
+
+#define FUJLAPTOP_DBG_ALL        0xffff
+#define FUJLAPTOP_DBG_ERROR      0x0001
+#define FUJLAPTOP_DBG_WARN       0x0002
+#define FUJLAPTOP_DBG_INFO       0x0004
+#define FUJLAPTOP_DBG_TRACE      0x0008
+
+#define dbg_printk(a_dbg_level, format, arg...) \
+       do { if (dbg_level & a_dbg_level) \
+               printk(FUJLAPTOP_DEBUG "%s: " format, __func__ , ## arg); \
+       } while (0)
+#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
+#define vdbg_printk(a_dbg_level, format, arg...) \
+       dbg_printk(a_dbg_level, format, ## arg)
+#else
+#define vdbg_printk(a_dbg_level, format, arg...)
+#endif
+
+/* Device controlling the backlight and associated keys */
  struct fujitsu_t {
         acpi_handle acpi_handle;
+       struct acpi_device *dev;
+       struct input_dev *input;
+       char phys[32];
         struct backlight_device *bl_device;
         struct platform_device *pf_device;
  
-       unsigned long fuj02b1_state;
+       unsigned int max_brightness;
         unsigned int brightness_changed;
         unsigned int brightness_level;
  };
  
  static struct fujitsu_t *fujitsu;
+static int use_alt_lcd_levels = -1;
+static int disable_brightness_keys = -1;
+static int disable_brightness_adjust = -1;
+
+/* Device used to access other hotkeys on the laptop */
+struct fujitsu_hotkey_t {
+       acpi_handle acpi_handle;
+       struct acpi_device *dev;
+       struct input_dev *input;
+       char phys[32];
+       struct platform_device *pf_device;
+       struct kfifo *fifo;
+       spinlock_t fifo_lock;
+
+       unsigned int irb;       /* info about the pressed buttons */
+};
  
-/* Hardware access */
+static struct fujitsu_hotkey_t *fujitsu_hotkey;
+
+static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event,
+                                      void *data);
+
+#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
+static u32 dbg_level = 0x03;
+#endif
+
+static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data);
+
+/* Hardware access for LCD brightness control */
  
  static int set_lcd_level(int level)
  {
@@ -81,7 +167,10 @@ static int set_lcd_level(int level)
         struct acpi_object_list arg_list = { 1, &arg0 };
         acpi_handle handle = NULL;
  
-       if (level < 0 || level >= FUJITSU_LCD_N_LEVELS)
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
+                   level);
+
+       if (level < 0 || level >= fujitsu->max_brightness)
                 return -EINVAL;
  
         if (!fujitsu)
@@ -89,7 +178,38 @@ static int set_lcd_level(int level)
  
         status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
         if (ACPI_FAILURE(status)) {
-               ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SBLL not present\n"));
+               vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
+               return -ENODEV;
+       }
+
+       arg0.integer.value = level;
+
+       status = acpi_evaluate_object(handle, NULL, &arg_list, NULL);
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
+
+       return 0;
+}
+
+static int set_lcd_level_alt(int level)
+{
+       acpi_status status = AE_OK;
+       union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+       struct acpi_object_list arg_list = { 1, &arg0 };
+       acpi_handle handle = NULL;
+
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
+                   level);
+
+       if (level < 0 || level >= fujitsu->max_brightness)
+               return -EINVAL;
+
+       if (!fujitsu)
+               return -EINVAL;
+
+       status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+       if (ACPI_FAILURE(status)) {
+               vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
                 return -ENODEV;
         }
  
@@ -107,13 +227,52 @@ static int get_lcd_level(void)
         unsigned long state = 0;
         acpi_status status = AE_OK;
  
-       // Get the Brightness
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
+
         status =
             acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
         if (status < 0)
                 return status;
  
-       fujitsu->fuj02b1_state = state;
+       fujitsu->brightness_level = state & 0x0fffffff;
+
+       if (state & 0x80000000)
+               fujitsu->brightness_changed = 1;
+       else
+               fujitsu->brightness_changed = 0;
+
+       return fujitsu->brightness_level;
+}
+
+static int get_max_brightness(void)
+{
+       unsigned long state = 0;
+       acpi_status status = AE_OK;
+
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
+
+       status =
+           acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+       if (status < 0)
+               return status;
+
+       fujitsu->max_brightness = state;
+
+       return fujitsu->max_brightness;
+}
+
+static int get_lcd_level_alt(void)
+{
+       unsigned long state = 0;
+       acpi_status status = AE_OK;
+
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLS\n");
+
+       status =
+           acpi_evaluate_integer(fujitsu->acpi_handle, "GBLS", NULL, &state);
+       if (status < 0)
+               return status;
+
         fujitsu->brightness_level = state & 0x0fffffff;
  
         if (state & 0x80000000)
@@ -128,12 +287,18 @@ static int get_lcd_level(void)
  
  static int bl_get_brightness(struct backlight_device *b)
  {
-       return get_lcd_level();
+       if (use_alt_lcd_levels)
+               return get_lcd_level_alt();
+       else
+               return get_lcd_level();
  }
  
  static int bl_update_status(struct backlight_device *b)
  {
-       return set_lcd_level(b->props.brightness);
+       if (use_alt_lcd_levels)
+               return set_lcd_level_alt(b->props.brightness);
+       else
+               return set_lcd_level(b->props.brightness);
  }
  
  static struct backlight_ops fujitsubl_ops = {
@@ -141,7 +306,35 @@ static struct backlight_ops fujitsubl_ops = {
         .update_status = bl_update_status,
  };
  
-/* Platform device */
+/* Platform LCD brightness device */
+
+static ssize_t
+show_max_brightness(struct device *dev,
+                   struct device_attribute *attr, char *buf)
+{
+
+       int ret;
+
+       ret = get_max_brightness();
+       if (ret < 0)
+               return ret;
+
+       return sprintf(buf, "%i\n", ret);
+}
+
+static ssize_t
+show_brightness_changed(struct device *dev,
+                       struct device_attribute *attr, char *buf)
+{
+
+       int ret;
+
+       ret = fujitsu->brightness_changed;
+       if (ret < 0)
+               return ret;
+
+       return sprintf(buf, "%i\n", ret);
+}
  
  static ssize_t show_lcd_level(struct device *dev,
                               struct device_attribute *attr, char *buf)
@@ -149,7 +342,10 @@ static ssize_t show_lcd_level(struct device *dev,
  
         int ret;
  
-       ret = get_lcd_level();
+       if (use_alt_lcd_levels)
+               ret = get_lcd_level_alt();
+       else
+               ret = get_lcd_level();
         if (ret < 0)
                 return ret;
  
@@ -164,19 +360,61 @@ static ssize_t store_lcd_level(struct device *dev,
         int level, ret;
  
         if (sscanf(buf, "%i", &level) != 1
-           || (level < 0 || level >= FUJITSU_LCD_N_LEVELS))
+           || (level < 0 || level >= fujitsu->max_brightness))
                 return -EINVAL;
  
-       ret = set_lcd_level(level);
+       if (use_alt_lcd_levels)
+               ret = set_lcd_level_alt(level);
+       else
+               ret = set_lcd_level(level);
+       if (ret < 0)
+               return ret;
+
+       if (use_alt_lcd_levels)
+               ret = get_lcd_level_alt();
+       else
+               ret = get_lcd_level();
         if (ret < 0)
                 return ret;
  
         return count;
  }
  
+/* Hardware access for hotkey device */
+
+static int get_irb(void)
+{
+       unsigned long state = 0;
+       acpi_status status = AE_OK;
+
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "Get irb\n");
+
+       status =
+           acpi_evaluate_integer(fujitsu_hotkey->acpi_handle, "GIRB", NULL,
+                                 &state);
+       if (status < 0)
+               return status;
+
+       fujitsu_hotkey->irb = state;
+
+       return fujitsu_hotkey->irb;
+}
+
+static ssize_t
+ignore_store(struct device *dev,
+            struct device_attribute *attr, const char *buf, size_t count)
+{
+       return count;
+}
+
+static DEVICE_ATTR(max_brightness, 0444, show_max_brightness, ignore_store);
+static DEVICE_ATTR(brightness_changed, 0444, show_brightness_changed,
+                  ignore_store);
  static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level);
  
  static struct attribute *fujitsupf_attributes[] = {
+       &dev_attr_brightness_changed.attr,
+       &dev_attr_max_brightness.attr,
         &dev_attr_lcd_level.attr,
         NULL
  };
@@ -192,14 +430,52 @@ static struct platform_driver fujitsupf_driver = {
                    }
  };
  
-/* ACPI device */
+static int dmi_check_cb_s6410(const struct dmi_system_id *id)
+{
+       acpi_handle handle;
+       int have_blnf;
+       printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n",
+              id->ident);
+       have_blnf = ACPI_SUCCESS
+           (acpi_get_handle(NULL, "\\_SB.PCI0.GFX0.LCD.BLNF", &handle));
+       if (use_alt_lcd_levels == -1) {
+               vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detecting usealt\n");
+               use_alt_lcd_levels = 1;
+       }
+       if (disable_brightness_keys == -1) {
+               vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                           "auto-detecting disable_keys\n");
+               disable_brightness_keys = have_blnf ? 1 : 0;
+       }
+       if (disable_brightness_adjust == -1) {
+               vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                           "auto-detecting disable_adjust\n");
+               disable_brightness_adjust = have_blnf ? 0 : 1;
+       }
+       return 0;
+}
+
+static struct dmi_system_id __initdata fujitsu_dmi_table[] = {
+       {
+        .ident = "Fujitsu Siemens",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S6410"),
+                    },
+        .callback = dmi_check_cb_s6410},
+       {}
+};
+
+/* ACPI device for LCD brightness control */
  
  static int acpi_fujitsu_add(struct acpi_device *device)
  {
+       acpi_status status;
+       acpi_handle handle;
         int result = 0;
         int state = 0;
-
-       ACPI_FUNCTION_TRACE("acpi_fujitsu_add");
+       struct input_dev *input;
+       int error;
  
         if (!device)
                 return -EINVAL;
@@ -209,10 +485,42 @@ static int acpi_fujitsu_add(struct acpi_device *device)
         sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
         acpi_driver_data(device) = fujitsu;
  
+       status = acpi_install_notify_handler(device->handle,
+                                            ACPI_DEVICE_NOTIFY,
+                                            acpi_fujitsu_notify, fujitsu);
+
+       if (ACPI_FAILURE(status)) {
+               printk(KERN_ERR "Error installing notify handler\n");
+               error = -ENODEV;
+               goto err_stop;
+       }
+
+       fujitsu->input = input = input_allocate_device();
+       if (!input) {
+               error = -ENOMEM;
+               goto err_uninstall_notify;
+       }
+
+       snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+                "%s/video/input0", acpi_device_hid(device));
+
+       input->name = acpi_device_name(device);
+       input->phys = fujitsu->phys;
+       input->id.bustype = BUS_HOST;
+       input->id.product = 0x06;
+       input->dev.parent = &device->dev;
+       input->evbit[0] = BIT(EV_KEY);
+       set_bit(KEY_BRIGHTNESSUP, input->keybit);
+       set_bit(KEY_BRIGHTNESSDOWN, input->keybit);
+       set_bit(KEY_UNKNOWN, input->keybit);
+
+       error = input_register_device(input);
+       if (error)
+               goto err_free_input_dev;
+
         result = acpi_bus_get_power(fujitsu->acpi_handle, &state);
         if (result) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
-                                 "Error reading power state\n"));
+               printk(KERN_ERR "Error reading power state\n");
                 goto end;
         }
  
@@ -220,22 +528,373 @@ static int acpi_fujitsu_add(struct acpi_device *device)
                acpi_device_name(device), acpi_device_bid(device),
                !device->power.state ? "on" : "off");
  
-      end:
+       fujitsu->dev = device;
+
+       if (ACPI_SUCCESS
+           (acpi_get_handle(device->handle, METHOD_NAME__INI, &handle))) {
+               vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
+               if (ACPI_FAILURE
+                   (acpi_evaluate_object
+                    (device->handle, METHOD_NAME__INI, NULL, NULL)))
+                       printk(KERN_ERR "_INI Method failed\n");
+       }
+
+       /* do config (detect defaults) */
+       dmi_check_system(fujitsu_dmi_table);
+       use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
+       disable_brightness_keys = disable_brightness_keys == 1 ? 1 : 0;
+       disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
+       vdbg_printk(FUJLAPTOP_DBG_INFO,
+                   "config: [alt interface: %d], [key disable: %d], [adjust disable: %d]\n",
+                   use_alt_lcd_levels, disable_brightness_keys,
+                   disable_brightness_adjust);
+
+       if (get_max_brightness() <= 0)
+               fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+       if (use_alt_lcd_levels)
+               get_lcd_level_alt();
+       else
+               get_lcd_level();
+
+       return result;
+
+end:
+err_free_input_dev:
+       input_free_device(input);
+err_uninstall_notify:
+       acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+                                  acpi_fujitsu_notify);
+err_stop:
  
         return result;
  }
  
  static int acpi_fujitsu_remove(struct acpi_device *device, int type)
  {
-       ACPI_FUNCTION_TRACE("acpi_fujitsu_remove");
+       acpi_status status;
+       struct fujitsu_t *fujitsu = NULL;
  
         if (!device || !acpi_driver_data(device))
                 return -EINVAL;
+
+       fujitsu = acpi_driver_data(device);
+
+       status = acpi_remove_notify_handler(fujitsu->acpi_handle,
+                                           ACPI_DEVICE_NOTIFY,
+                                           acpi_fujitsu_notify);
+
+       if (!device || !acpi_driver_data(device))
+               return -EINVAL;
+
         fujitsu->acpi_handle = NULL;
  
         return 0;
  }
  
+/* Brightness notify */
+
+static void acpi_fujitsu_notify(acpi_handle handle, u32 event, void *data)
+{
+       struct input_dev *input;
+       int keycode;
+       int oldb, newb;
+
+       input = fujitsu->input;
+
+       switch (event) {
+       case ACPI_FUJITSU_NOTIFY_CODE1:
+               keycode = 0;
+               oldb = fujitsu->brightness_level;
+               get_lcd_level();  /* the alt version always yields changed */
+               newb = fujitsu->brightness_level;
+
+               vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                           "brightness button event [%i -> %i (%i)]\n",
+                           oldb, newb, fujitsu->brightness_changed);
+
+               if (oldb == newb && fujitsu->brightness_changed) {
+                       keycode = 0;
+                       if (disable_brightness_keys != 1) {
+                               if (oldb == 0) {
+                                       acpi_bus_generate_proc_event(fujitsu->
+                                               dev,
+                                               ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS,
+                                               0);
+                                       keycode = KEY_BRIGHTNESSDOWN;
+                               } else if (oldb ==
+                                          (fujitsu->max_brightness) - 1) {
+                                       acpi_bus_generate_proc_event(fujitsu->
+                                               dev,
+                                               ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS,
+                                               0);
+                                       keycode = KEY_BRIGHTNESSUP;
+                               }
+                       }
+               } else if (oldb < newb) {
+                       if (disable_brightness_adjust != 1) {
+                               if (use_alt_lcd_levels)
+                                       set_lcd_level_alt(newb);
+                               else
+                                       set_lcd_level(newb);
+                       }
+                       if (disable_brightness_keys != 1) {
+                               acpi_bus_generate_proc_event(fujitsu->dev,
+                                       ACPI_VIDEO_NOTIFY_INC_BRIGHTNESS,
+                                       0);
+                               keycode = KEY_BRIGHTNESSUP;
+                       }
+               } else if (oldb > newb) {
+                       if (disable_brightness_adjust != 1) {
+                               if (use_alt_lcd_levels)
+                                       set_lcd_level_alt(newb);
+                               else
+                                       set_lcd_level(newb);
+                       }
+                       if (disable_brightness_keys != 1) {
+                               acpi_bus_generate_proc_event(fujitsu->dev,
+                                       ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS,
+                                       0);
+                               keycode = KEY_BRIGHTNESSDOWN;
+                       }
+               } else {
+                       keycode = KEY_UNKNOWN;
+               }
+               break;
+       default:
+               keycode = KEY_UNKNOWN;
+               vdbg_printk(FUJLAPTOP_DBG_WARN,
+                           "unsupported event [0x%x]\n", event);
+               break;
+       }
+
+       if (keycode != 0) {
+               input_report_key(input, keycode, 1);
+               input_sync(input);
+               input_report_key(input, keycode, 0);
+               input_sync(input);
+       }
+
+       return;
+}
+
+/* ACPI device for hotkey handling */
+
+static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+{
+       acpi_status status;
+       acpi_handle handle;
+       int result = 0;
+       int state = 0;
+       struct input_dev *input;
+       int error;
+       int i;
+
+       if (!device)
+               return -EINVAL;
+
+       fujitsu_hotkey->acpi_handle = device->handle;
+       sprintf(acpi_device_name(device), "%s",
+               ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+       sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
+       acpi_driver_data(device) = fujitsu_hotkey;
+
+       status = acpi_install_notify_handler(device->handle,
+                                            ACPI_DEVICE_NOTIFY,
+                                            acpi_fujitsu_hotkey_notify,
+                                            fujitsu_hotkey);
+
+       if (ACPI_FAILURE(status)) {
+               printk(KERN_ERR "Error installing notify handler\n");
+               error = -ENODEV;
+               goto err_stop;
+       }
+
+       /* kfifo */
+       spin_lock_init(&fujitsu_hotkey->fifo_lock);
+       fujitsu_hotkey->fifo =
+           kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL,
+                       &fujitsu_hotkey->fifo_lock);
+       if (IS_ERR(fujitsu_hotkey->fifo)) {
+               printk(KERN_ERR "kfifo_alloc failed\n");
+               error = PTR_ERR(fujitsu_hotkey->fifo);
+               goto err_stop;
+       }
+
+       fujitsu_hotkey->input = input = input_allocate_device();
+       if (!input) {
+               error = -ENOMEM;
+               goto err_uninstall_notify;
+       }
+
+       snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+                "%s/video/input0", acpi_device_hid(device));
+
+       input->name = acpi_device_name(device);
+       input->phys = fujitsu_hotkey->phys;
+       input->id.bustype = BUS_HOST;
+       input->id.product = 0x06;
+       input->dev.parent = &device->dev;
+       input->evbit[0] = BIT(EV_KEY);
+       set_bit(KEY_SCREENLOCK, input->keybit);
+       set_bit(KEY_MEDIA, input->keybit);
+       set_bit(KEY_EMAIL, input->keybit);
+       set_bit(KEY_SUSPEND, input->keybit);
+       set_bit(KEY_UNKNOWN, input->keybit);
+
+       error = input_register_device(input);
+       if (error)
+               goto err_free_input_dev;
+
+       result = acpi_bus_get_power(fujitsu_hotkey->acpi_handle, &state);
+       if (result) {
+               printk(KERN_ERR "Error reading power state\n");
+               goto end;
+       }
+
+       printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+              acpi_device_name(device), acpi_device_bid(device),
+              !device->power.state ? "on" : "off");
+
+       fujitsu_hotkey->dev = device;
+
+       if (ACPI_SUCCESS
+           (acpi_get_handle(device->handle, METHOD_NAME__INI, &handle))) {
+               vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
+               if (ACPI_FAILURE
+                   (acpi_evaluate_object
+                    (device->handle, METHOD_NAME__INI, NULL, NULL)))
+                       printk(KERN_ERR "_INI Method failed\n");
+       }
+
+       i = 0;                  /* Discard hotkey ringbuffer */
+       while (get_irb() != 0 && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) ;
+       vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
+
+       return result;
+
+end:
+err_free_input_dev:
+       input_free_device(input);
+err_uninstall_notify:
+       acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+                                  acpi_fujitsu_hotkey_notify);
+       kfifo_free(fujitsu_hotkey->fifo);
+err_stop:
+
+       return result;
+}
+
+static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type)
+{
+       acpi_status status;
+       struct fujitsu_hotkey_t *fujitsu_hotkey = NULL;
+
+       if (!device || !acpi_driver_data(device))
+               return -EINVAL;
+
+       fujitsu_hotkey = acpi_driver_data(device);
+
+       status = acpi_remove_notify_handler(fujitsu_hotkey->acpi_handle,
+                                           ACPI_DEVICE_NOTIFY,
+                                           acpi_fujitsu_hotkey_notify);
+
+       fujitsu_hotkey->acpi_handle = NULL;
+
+       kfifo_free(fujitsu_hotkey->fifo);
+
+       return 0;
+}
+
+static void acpi_fujitsu_hotkey_notify(acpi_handle handle, u32 event,
+                                      void *data)
+{
+       struct input_dev *input;
+       int keycode, keycode_r;
+       unsigned int irb = 1;
+       int i, status;
+
+       input = fujitsu_hotkey->input;
+
+       vdbg_printk(FUJLAPTOP_DBG_TRACE, "Hotkey event\n");
+
+       switch (event) {
+       case ACPI_FUJITSU_NOTIFY_CODE1:
+               i = 0;
+               while ((irb = get_irb()) != 0
+                      && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
+                       vdbg_printk(FUJLAPTOP_DBG_TRACE, "GIRB result [%x]\n",
+                                   irb);
+
+                       switch (irb & 0x4ff) {
+                       case LOCK_KEY:
+                               keycode = KEY_SCREENLOCK;
+                               break;
+                       case DISPLAY_KEY:
+                               keycode = KEY_MEDIA;
+                               break;
+                       case ENERGY_KEY:
+                               keycode = KEY_EMAIL;
+                               break;
+                       case REST_KEY:
+                               keycode = KEY_SUSPEND;
+                               break;
+                       case 0:
+                               keycode = 0;
+                               break;
+                       default:
+                               vdbg_printk(FUJLAPTOP_DBG_WARN,
+                                       "Unknown GIRB result [%x]\n", irb);
+                               keycode = -1;
+                               break;
+                       }
+                       if (keycode > 0) {
+                               vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                                       "Push keycode into ringbuffer [%d]\n",
+                                       keycode);
+                               status = kfifo_put(fujitsu_hotkey->fifo,
+                                               (unsigned char *)&keycode,
+                                               sizeof(keycode));
+                               if (status != sizeof(keycode)) {
+                                       vdbg_printk(FUJLAPTOP_DBG_WARN,
+                                               "Could not push keycode [0x%x]\n",
+                                               keycode);
+                               } else {
+                                       input_report_key(input, keycode, 1);
+                                       input_sync(input);
+                               }
+                       } else if (keycode == 0) {
+                               while ((status =
+                                       kfifo_get
+                                       (fujitsu_hotkey->fifo, (unsigned char *)
+                                        &keycode_r,
+                                        sizeof
+                                        (keycode_r))) == sizeof(keycode_r)) {
+                                       input_report_key(input, keycode_r, 0);
+                                       input_sync(input);
+                                       vdbg_printk(FUJLAPTOP_DBG_TRACE,
+                                                   "Pop keycode from ringbuffer [%d]\n",
+                                                   keycode_r);
+                               }
+                       }
+               }
+
+               break;
+       default:
+               keycode = KEY_UNKNOWN;
+               vdbg_printk(FUJLAPTOP_DBG_WARN,
+                           "Unsupported event [0x%x]\n", event);
+               input_report_key(input, keycode, 1);
+               input_sync(input);
+               input_report_key(input, keycode, 0);
+               input_sync(input);
+               break;
+       }
+
+       return;
+}
+
+/* Initialization */
+
  static const struct acpi_device_id fujitsu_device_ids[] = {
         {ACPI_FUJITSU_HID, 0},
         {"", 0},
@@ -251,11 +910,24 @@ static struct acpi_driver acpi_fujitsu_driver = {
                 },
  };
  
-/* Initialization */
+static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
+       {ACPI_FUJITSU_HOTKEY_HID, 0},
+       {"", 0},
+};
+
+static struct acpi_driver acpi_fujitsu_hotkey_driver = {
+       .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+       .class = ACPI_FUJITSU_CLASS,
+       .ids = fujitsu_hotkey_device_ids,
+       .ops = {
+               .add = acpi_fujitsu_hotkey_add,
+               .remove = acpi_fujitsu_hotkey_remove,
+               },
+};
  
  static int __init fujitsu_init(void)
  {
-       int ret, result;
+       int ret, result, max_brightness;
  
         if (acpi_disabled)
                 return -ENODEV;
@@ -271,19 +943,6 @@ static int __init fujitsu_init(void)
                 goto fail_acpi;
         }
  
-       /* Register backlight stuff */
-
-       fujitsu->bl_device =
-           backlight_device_register("fujitsu-laptop", NULL, NULL,
-                                     &fujitsubl_ops);
-       if (IS_ERR(fujitsu->bl_device))
-               return PTR_ERR(fujitsu->bl_device);
-
-       fujitsu->bl_device->props.max_brightness = FUJITSU_LCD_N_LEVELS - 1;
-       ret = platform_driver_register(&fujitsupf_driver);
-       if (ret)
-               goto fail_backlight;
-
         /* Register platform stuff */
  
         fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
@@ -302,28 +961,68 @@ static int __init fujitsu_init(void)
         if (ret)
                 goto fail_platform_device2;
  
+       /* Register backlight stuff */
+
+       fujitsu->bl_device =
+           backlight_device_register("fujitsu-laptop", NULL, NULL,
+                                     &fujitsubl_ops);
+       if (IS_ERR(fujitsu->bl_device))
+               return PTR_ERR(fujitsu->bl_device);
+
+       max_brightness = fujitsu->max_brightness;
+
+       fujitsu->bl_device->props.max_brightness = max_brightness - 1;
+       fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+
+       ret = platform_driver_register(&fujitsupf_driver);
+       if (ret)
+               goto fail_backlight;
+
+       /* Register hotkey driver */
+
+       fujitsu_hotkey = kmalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
+       if (!fujitsu_hotkey) {
+               ret = -ENOMEM;
+               goto fail_hotkey;
+       }
+       memset(fujitsu_hotkey, 0, sizeof(struct fujitsu_hotkey_t));
+
+       result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
+       if (result < 0) {
+               ret = -ENODEV;
+               goto fail_hotkey1;
+       }
+
         printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION
                " successfully loaded.\n");
  
         return 0;
  
-      fail_platform_device2:
+fail_hotkey1:
  
-       platform_device_del(fujitsu->pf_device);
-
-      fail_platform_device1:
-
-       platform_device_put(fujitsu->pf_device);
+       kfree(fujitsu_hotkey);
  
-      fail_platform_driver:
+fail_hotkey:
  
         platform_driver_unregister(&fujitsupf_driver);
  
-      fail_backlight:
+fail_backlight:
  
         backlight_device_unregister(fujitsu->bl_device);
  
-      fail_acpi:
+fail_platform_device2:
+
+       platform_device_del(fujitsu->pf_device);
+
+fail_platform_device1:
+
+       platform_device_put(fujitsu->pf_device);
+
+fail_platform_driver:
+
+       acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+
+fail_acpi:
  
         kfree(fujitsu);
  
@@ -342,19 +1041,43 @@ static void __exit fujitsu_cleanup(void)
  
         kfree(fujitsu);
  
+       acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+
+       kfree(fujitsu_hotkey);
+
         printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n");
  }
  
  module_init(fujitsu_init);
  module_exit(fujitsu_cleanup);
  
-MODULE_AUTHOR("Jonathan Woithe");
+module_param(use_alt_lcd_levels, uint, 0644);
+MODULE_PARM_DESC(use_alt_lcd_levels,
+                "Use alternative interface for lcd_levels (needed for Lifebook s6410).");
+module_param(disable_brightness_keys, uint, 0644);
+MODULE_PARM_DESC(disable_brightness_keys,
+                "Disable brightness keys (eg. if they are already handled by the generic ACPI_VIDEO device).");
+module_param(disable_brightness_adjust, uint, 0644);
+MODULE_PARM_DESC(disable_brightness_adjust, "Disable brightness adjustment .");
+#ifdef CONFIG_FUJITSU_LAPTOP_DEBUG
+module_param_named(debug, dbg_level, uint, 0644);
+MODULE_PARM_DESC(debug, "Sets debug level bit-mask");
+#endif
+
+MODULE_AUTHOR("Jonathan Woithe, Peter Gruber");
  MODULE_DESCRIPTION("Fujitsu laptop extras support");
  MODULE_VERSION(FUJITSU_DRIVER_VERSION);
  MODULE_LICENSE("GPL");
  
+MODULE_ALIAS
+    ("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
+MODULE_ALIAS
+    ("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
+
  static struct pnp_device_id pnp_ids[] = {
         { .id = "FUJ02bf" },
+       { .id = "FUJ02B1" },
+       { .id = "FUJ02E3" },
         { .id = "" }
  };
  MODULE_DEVICE_TABLE(pnp, pnp_ids);
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c

index f9ad960d7c1a61305e6f3f1841bc0e8437048a18..66e5a5487c20a228c67793e90d2f95b5dc36e53b 100644 (file)
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2,7 +2,7 @@
   * Block driver for media (i.e., flash cards)
   *
   * Copyright 2002 Hewlett-Packard Company
- * Copyright 2005-2007 Pierre Ossman
+ * Copyright 2005-2008 Pierre Ossman
   *
   * Use consistent with the GNU GPL is permitted,
   * provided that this copyright notice is
@@ -237,17 +237,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
                 if (brq.data.blocks > card->host->max_blk_count)
                         brq.data.blocks = card->host->max_blk_count;
  
-               /*
-                * If the host doesn't support multiple block writes, force
-                * block writes to single block. SD cards are excepted from
-                * this rule as they support querying the number of
-                * successfully written sectors.
-                */
-               if (rq_data_dir(req) != READ &&
-                   !(card->host->caps & MMC_CAP_MULTIWRITE) &&
-                   !mmc_card_sd(card))
-                       brq.data.blocks = 1;
-
                 if (brq.data.blocks > 1) {
                         /* SPI multiblock writes terminate using a special
                          * token, not a STOP_TRANSMISSION request.
@@ -296,22 +285,24 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
  
                 mmc_queue_bounce_post(mq);
  
+               /*
+                * Check for errors here, but don't jump to cmd_err
+                * until later as we need to wait for the card to leave
+                * programming mode even when things go wrong.
+                */
                 if (brq.cmd.error) {
                         printk(KERN_ERR "%s: error %d sending read/write command\n",
                                req->rq_disk->disk_name, brq.cmd.error);
-                       goto cmd_err;
                 }
  
                 if (brq.data.error) {
                         printk(KERN_ERR "%s: error %d transferring data\n",
                                req->rq_disk->disk_name, brq.data.error);
-                       goto cmd_err;
                 }
  
                 if (brq.stop.error) {
                         printk(KERN_ERR "%s: error %d sending stop command\n",
                                req->rq_disk->disk_name, brq.stop.error);
-                       goto cmd_err;
                 }
  
                 if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
@@ -344,6 +335,9 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
  #endif
                 }
  
+               if (brq.cmd.error || brq.data.error || brq.stop.error)
+                       goto cmd_err;
+
                 /*
                  * A block was successfully transferred.
                  */
@@ -362,30 +356,32 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
          * mark the known good sectors as ok.
          *
          * If the card is not SD, we can still ok written sectors
-        * if the controller can do proper error reporting.
+        * as reported by the controller (which might be less than
+        * the real number of written sectors, but never more).
          *
          * For reads we just fail the entire chunk as that should
          * be safe in all cases.
          */
-       if (rq_data_dir(req) != READ && mmc_card_sd(card)) {
-               u32 blocks;
-               unsigned int bytes;
-
-               blocks = mmc_sd_num_wr_blocks(card);
-               if (blocks != (u32)-1) {
-                       if (card->csd.write_partial)
-                               bytes = blocks << md->block_bits;
-                       else
-                               bytes = blocks << 9;
+       if (rq_data_dir(req) != READ) {
+               if (mmc_card_sd(card)) {
+                       u32 blocks;
+                       unsigned int bytes;
+
+                       blocks = mmc_sd_num_wr_blocks(card);
+                       if (blocks != (u32)-1) {
+                               if (card->csd.write_partial)
+                                       bytes = blocks << md->block_bits;
+                               else
+                                       bytes = blocks << 9;
+                               spin_lock_irq(&md->lock);
+                               ret = __blk_end_request(req, 0, bytes);
+                               spin_unlock_irq(&md->lock);
+                       }
+               } else {
                         spin_lock_irq(&md->lock);
-                       ret = __blk_end_request(req, 0, bytes);
+                       ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
                         spin_unlock_irq(&md->lock);
                 }
-       } else if (rq_data_dir(req) != READ &&
-                  (card->host->caps & MMC_CAP_MULTIWRITE)) {
-               spin_lock_irq(&md->lock);
-               ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
-               spin_unlock_irq(&md->lock);
         }
  
         mmc_release_host(card->host);
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c

index ffadee549a414cd99a8f3a5c5db71f3554d019c8..d6b9b486417cc702acb18bba2fd413baedf592d9 100644 (file)
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -1,7 +1,7 @@
  /*
   *  linux/drivers/mmc/card/mmc_test.c
   *
- *  Copyright 2007 Pierre Ossman
+ *  Copyright 2007-2008 Pierre Ossman
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -26,13 +26,17 @@
  struct mmc_test_card {
         struct mmc_card *card;
  
+       u8              scratch[BUFFER_SIZE];
         u8              *buffer;
  };
  
  /*******************************************************************/
-/*  Helper functions                                               */
+/*  General helper functions                                       */
  /*******************************************************************/
  
+/*
+ * Configure correct block size in card
+ */
  static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size)
  {
         struct mmc_command cmd;
@@ -48,117 +52,61 @@ static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size)
         return 0;
  }
  
-static int __mmc_test_transfer(struct mmc_test_card *test, int write,
-       unsigned broken_xfer, u8 *buffer, unsigned addr,
-       unsigned blocks, unsigned blksz)
+/*
+ * Fill in the mmc_request structure given a set of transfer parameters.
+ */
+static void mmc_test_prepare_mrq(struct mmc_test_card *test,
+       struct mmc_request *mrq, struct scatterlist *sg, unsigned sg_len,
+       unsigned dev_addr, unsigned blocks, unsigned blksz, int write)
  {
-       int ret, busy;
-
-       struct mmc_request mrq;
-       struct mmc_command cmd;
-       struct mmc_command stop;
-       struct mmc_data data;
-
-       struct scatterlist sg;
-
-       memset(&mrq, 0, sizeof(struct mmc_request));
-
-       mrq.cmd = &cmd;
-       mrq.data = &data;
-
-       memset(&cmd, 0, sizeof(struct mmc_command));
+       BUG_ON(!mrq || !mrq->cmd || !mrq->data || !mrq->stop);
  
-       if (broken_xfer) {
-               if (blocks > 1) {
-                       cmd.opcode = write ?
-                               MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK;
-               } else {
-                       cmd.opcode = MMC_SEND_STATUS;
-               }
+       if (blocks > 1) {
+               mrq->cmd->opcode = write ?
+                       MMC_WRITE_MULTIPLE_BLOCK : MMC_READ_MULTIPLE_BLOCK;
         } else {
-               if (blocks > 1) {
-                       cmd.opcode = write ?
-                               MMC_WRITE_MULTIPLE_BLOCK : MMC_READ_MULTIPLE_BLOCK;
-               } else {
-                       cmd.opcode = write ?
-                               MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK;
-               }
+               mrq->cmd->opcode = write ?
+                       MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK;
         }
  
-       if (broken_xfer && blocks == 1)
-               cmd.arg = test->card->rca << 16;
-       else
-               cmd.arg = addr;
-       cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
+       mrq->cmd->arg = dev_addr;
+       mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;
  
-       memset(&stop, 0, sizeof(struct mmc_command));
-
-       if (!broken_xfer && (blocks > 1)) {
-               stop.opcode = MMC_STOP_TRANSMISSION;
-               stop.arg = 0;
-               stop.flags = MMC_RSP_R1B | MMC_CMD_AC;
-
-               mrq.stop = &stop;
+       if (blocks == 1)
+               mrq->stop = NULL;
+       else {
+               mrq->stop->opcode = MMC_STOP_TRANSMISSION;
+               mrq->stop->arg = 0;
+               mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC;
         }
  
-       memset(&data, 0, sizeof(struct mmc_data));
-
-       data.blksz = blksz;
-       data.blocks = blocks;
-       data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ;
-       data.sg = &sg;
-       data.sg_len = 1;
-
-       sg_init_one(&sg, buffer, blocks * blksz);
-
-       mmc_set_data_timeout(&data, test->card);
+       mrq->data->blksz = blksz;
+       mrq->data->blocks = blocks;
+       mrq->data->flags = write ? MMC_DATA_WRITE : MMC_DATA_READ;
+       mrq->data->sg = sg;
+       mrq->data->sg_len = sg_len;
  
-       mmc_wait_for_req(test->card->host, &mrq);
-
-       ret = 0;
-
-       if (broken_xfer) {
-               if (!ret && cmd.error)
-                       ret = cmd.error;
-               if (!ret && data.error == 0)
-                       ret = RESULT_FAIL;
-               if (!ret && data.error != -ETIMEDOUT)
-                       ret = data.error;
-               if (!ret && stop.error)
-                       ret = stop.error;
-               if (blocks > 1) {
-                       if (!ret && data.bytes_xfered > blksz)
-                               ret = RESULT_FAIL;
-               } else {
-                       if (!ret && data.bytes_xfered > 0)
-                               ret = RESULT_FAIL;
-               }
-       } else {
-               if (!ret && cmd.error)
-                       ret = cmd.error;
-               if (!ret && data.error)
-                       ret = data.error;
-               if (!ret && stop.error)
-                       ret = stop.error;
-               if (!ret && data.bytes_xfered != blocks * blksz)
-                       ret = RESULT_FAIL;
-       }
+       mmc_set_data_timeout(mrq->data, test->card);
+}
  
-       if (ret == -EINVAL)
-               ret = RESULT_UNSUP_HOST;
+/*
+ * Wait for the card to finish the busy state
+ */
+static int mmc_test_wait_busy(struct mmc_test_card *test)
+{
+       int ret, busy;
+       struct mmc_command cmd;
  
         busy = 0;
         do {
-               int ret2;
-
                 memset(&cmd, 0, sizeof(struct mmc_command));
  
                 cmd.opcode = MMC_SEND_STATUS;
                 cmd.arg = test->card->rca << 16;
                 cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
  
-               ret2 = mmc_wait_for_cmd(test->card->host, &cmd, 0);
-               if (ret2)
+               ret = mmc_wait_for_cmd(test->card->host, &cmd, 0);
+               if (ret)
                         break;
  
                 if (!busy && !(cmd.resp[0] & R1_READY_FOR_DATA)) {
@@ -172,14 +120,57 @@ static int __mmc_test_transfer(struct mmc_test_card *test, int write,
         return ret;
  }
  
-static int mmc_test_transfer(struct mmc_test_card *test, int write,
-       u8 *buffer, unsigned addr, unsigned blocks, unsigned blksz)
+/*
+ * Transfer a single sector of kernel addressable data
+ */
+static int mmc_test_buffer_transfer(struct mmc_test_card *test,
+       u8 *buffer, unsigned addr, unsigned blksz, int write)
  {
-       return __mmc_test_transfer(test, write, 0, buffer,
-                       addr, blocks, blksz);
+       int ret;
+
+       struct mmc_request mrq;
+       struct mmc_command cmd;
+       struct mmc_command stop;
+       struct mmc_data data;
+
+       struct scatterlist sg;
+
+       memset(&mrq, 0, sizeof(struct mmc_request));
+       memset(&cmd, 0, sizeof(struct mmc_command));
+       memset(&data, 0, sizeof(struct mmc_data));
+       memset(&stop, 0, sizeof(struct mmc_command));
+
+       mrq.cmd = &cmd;
+       mrq.data = &data;
+       mrq.stop = &stop;
+
+       sg_init_one(&sg, buffer, blksz);
+
+       mmc_test_prepare_mrq(test, &mrq, &sg, 1, addr, 1, blksz, write);
+
+       mmc_wait_for_req(test->card->host, &mrq);
+
+       if (cmd.error)
+               return cmd.error;
+       if (data.error)
+               return data.error;
+
+       ret = mmc_test_wait_busy(test);
+       if (ret)
+               return ret;
+
+       return 0;
  }
  
-static int mmc_test_prepare_verify(struct mmc_test_card *test, int write)
+/*******************************************************************/
+/*  Test preparation and cleanup                                   */
+/*******************************************************************/
+
+/*
+ * Fill the first couple of sectors of the card with known data
+ * so that bad reads/writes can be detected
+ */
+static int __mmc_test_prepare(struct mmc_test_card *test, int write)
  {
         int ret, i;
  
@@ -188,15 +179,14 @@ static int mmc_test_prepare_verify(struct mmc_test_card *test, int write)
                 return ret;
  
         if (write)
-               memset(test->buffer, 0xDF, BUFFER_SIZE);
+               memset(test->buffer, 0xDF, 512);
         else {
-               for (i = 0;i < BUFFER_SIZE;i++)
+               for (i = 0;i < 512;i++)
                         test->buffer[i] = i;
         }
  
         for (i = 0;i < BUFFER_SIZE / 512;i++) {
-               ret = mmc_test_transfer(test, 1, test->buffer + i * 512,
-                       i * 512, 1, 512);
+               ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1);
                 if (ret)
                         return ret;
         }
@@ -204,41 +194,218 @@ static int mmc_test_prepare_verify(struct mmc_test_card *test, int write)
         return 0;
  }
  
-static int mmc_test_prepare_verify_write(struct mmc_test_card *test)
+static int mmc_test_prepare_write(struct mmc_test_card *test)
+{
+       return __mmc_test_prepare(test, 1);
+}
+
+static int mmc_test_prepare_read(struct mmc_test_card *test)
+{
+       return __mmc_test_prepare(test, 0);
+}
+
+static int mmc_test_cleanup(struct mmc_test_card *test)
  {
-       return mmc_test_prepare_verify(test, 1);
+       int ret, i;
+
+       ret = mmc_test_set_blksize(test, 512);
+       if (ret)
+               return ret;
+
+       memset(test->buffer, 0, 512);
+
+       for (i = 0;i < BUFFER_SIZE / 512;i++) {
+               ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
  }
  
-static int mmc_test_prepare_verify_read(struct mmc_test_card *test)
+/*******************************************************************/
+/*  Test execution helpers                                         */
+/*******************************************************************/
+
+/*
+ * Modifies the mmc_request to perform the "short transfer" tests
+ */
+static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test,
+       struct mmc_request *mrq, int write)
  {
-       return mmc_test_prepare_verify(test, 0);
+       BUG_ON(!mrq || !mrq->cmd || !mrq->data);
+
+       if (mrq->data->blocks > 1) {
+               mrq->cmd->opcode = write ?
+                       MMC_WRITE_BLOCK : MMC_READ_SINGLE_BLOCK;
+               mrq->stop = NULL;
+       } else {
+               mrq->cmd->opcode = MMC_SEND_STATUS;
+               mrq->cmd->arg = test->card->rca << 16;
+       }
  }
  
-static int mmc_test_verified_transfer(struct mmc_test_card *test, int write,
-       u8 *buffer, unsigned addr, unsigned blocks, unsigned blksz)
+/*
+ * Checks that a normal transfer didn't have any errors
+ */
+static int mmc_test_check_result(struct mmc_test_card *test,
+       struct mmc_request *mrq)
  {
-       int ret, i, sectors;
+       int ret;
  
-       /*
-        * It is assumed that the above preparation has been done.
-        */
+       BUG_ON(!mrq || !mrq->cmd || !mrq->data);
+
+       ret = 0;
  
-       memset(test->buffer, 0, BUFFER_SIZE);
+       if (!ret && mrq->cmd->error)
+               ret = mrq->cmd->error;
+       if (!ret && mrq->data->error)
+               ret = mrq->data->error;
+       if (!ret && mrq->stop && mrq->stop->error)
+               ret = mrq->stop->error;
+       if (!ret && mrq->data->bytes_xfered !=
+               mrq->data->blocks * mrq->data->blksz)
+               ret = RESULT_FAIL;
+
+       if (ret == -EINVAL)
+               ret = RESULT_UNSUP_HOST;
+
+       return ret;
+}
+
+/*
+ * Checks that a "short transfer" behaved as expected
+ */
+static int mmc_test_check_broken_result(struct mmc_test_card *test,
+       struct mmc_request *mrq)
+{
+       int ret;
+
+       BUG_ON(!mrq || !mrq->cmd || !mrq->data);
+
+       ret = 0;
+
+       if (!ret && mrq->cmd->error)
+               ret = mrq->cmd->error;
+       if (!ret && mrq->data->error == 0)
+               ret = RESULT_FAIL;
+       if (!ret && mrq->data->error != -ETIMEDOUT)
+               ret = mrq->data->error;
+       if (!ret && mrq->stop && mrq->stop->error)
+               ret = mrq->stop->error;
+       if (mrq->data->blocks > 1) {
+               if (!ret && mrq->data->bytes_xfered > mrq->data->blksz)
+                       ret = RESULT_FAIL;
+       } else {
+               if (!ret && mrq->data->bytes_xfered > 0)
+                       ret = RESULT_FAIL;
+       }
+
+       if (ret == -EINVAL)
+               ret = RESULT_UNSUP_HOST;
+
+       return ret;
+}
+
+/*
+ * Tests a basic transfer with certain parameters
+ */
+static int mmc_test_simple_transfer(struct mmc_test_card *test,
+       struct scatterlist *sg, unsigned sg_len, unsigned dev_addr,
+       unsigned blocks, unsigned blksz, int write)
+{
+       struct mmc_request mrq;
+       struct mmc_command cmd;
+       struct mmc_command stop;
+       struct mmc_data data;
+
+       memset(&mrq, 0, sizeof(struct mmc_request));
+       memset(&cmd, 0, sizeof(struct mmc_command));
+       memset(&data, 0, sizeof(struct mmc_data));
+       memset(&stop, 0, sizeof(struct mmc_command));
+
+       mrq.cmd = &cmd;
+       mrq.data = &data;
+       mrq.stop = &stop;
+
+       mmc_test_prepare_mrq(test, &mrq, sg, sg_len, dev_addr,
+               blocks, blksz, write);
+
+       mmc_wait_for_req(test->card->host, &mrq);
+
+       mmc_test_wait_busy(test);
+
+       return mmc_test_check_result(test, &mrq);
+}
+
+/*
+ * Tests a transfer where the card will fail completely or partly
+ */
+static int mmc_test_broken_transfer(struct mmc_test_card *test,
+       unsigned blocks, unsigned blksz, int write)
+{
+       struct mmc_request mrq;
+       struct mmc_command cmd;
+       struct mmc_command stop;
+       struct mmc_data data;
+
+       struct scatterlist sg;
+
+       memset(&mrq, 0, sizeof(struct mmc_request));
+       memset(&cmd, 0, sizeof(struct mmc_command));
+       memset(&data, 0, sizeof(struct mmc_data));
+       memset(&stop, 0, sizeof(struct mmc_command));
+
+       mrq.cmd = &cmd;
+       mrq.data = &data;
+       mrq.stop = &stop;
+
+       sg_init_one(&sg, test->buffer, blocks * blksz);
+
+       mmc_test_prepare_mrq(test, &mrq, &sg, 1, 0, blocks, blksz, write);
+       mmc_test_prepare_broken_mrq(test, &mrq, write);
+
+       mmc_wait_for_req(test->card->host, &mrq);
+
+       mmc_test_wait_busy(test);
+
+       return mmc_test_check_broken_result(test, &mrq);
+}
+
+/*
+ * Does a complete transfer test where data is also validated
+ *
+ * Note: mmc_test_prepare() must have been done before this call
+ */
+static int mmc_test_transfer(struct mmc_test_card *test,
+       struct scatterlist *sg, unsigned sg_len, unsigned dev_addr,
+       unsigned blocks, unsigned blksz, int write)
+{
+       int ret, i;
+       unsigned long flags;
  
         if (write) {
                 for (i = 0;i < blocks * blksz;i++)
-                       buffer[i] = i;
+                       test->scratch[i] = i;
+       } else {
+               memset(test->scratch, 0, BUFFER_SIZE);
         }
+       local_irq_save(flags);
+       sg_copy_from_buffer(sg, sg_len, test->scratch, BUFFER_SIZE);
+       local_irq_restore(flags);
  
         ret = mmc_test_set_blksize(test, blksz);
         if (ret)
                 return ret;
  
-       ret = mmc_test_transfer(test, write, buffer, addr, blocks, blksz);
+       ret = mmc_test_simple_transfer(test, sg, sg_len, dev_addr,
+               blocks, blksz, write);
         if (ret)
                 return ret;
  
         if (write) {
+               int sectors;
+
                 ret = mmc_test_set_blksize(test, 512);
                 if (ret)
                         return ret;
@@ -253,9 +420,9 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write,
                 memset(test->buffer, 0, sectors * 512);
  
                 for (i = 0;i < sectors;i++) {
-                       ret = mmc_test_transfer(test, 0,
+                       ret = mmc_test_buffer_transfer(test,
                                 test->buffer + i * 512,
-                               addr + i * 512, 1, 512);
+                               dev_addr + i * 512, 512, 0);
                         if (ret)
                                 return ret;
                 }
@@ -270,8 +437,11 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write,
                                 return RESULT_FAIL;
                 }
         } else {
+               local_irq_save(flags);
+               sg_copy_to_buffer(sg, sg_len, test->scratch, BUFFER_SIZE);
+               local_irq_restore(flags);
                 for (i = 0;i < blocks * blksz;i++) {
-                       if (buffer[i] != (u8)i)
+                       if (test->scratch[i] != (u8)i)
                                 return RESULT_FAIL;
                 }
         }
@@ -279,26 +449,6 @@ static int mmc_test_verified_transfer(struct mmc_test_card *test, int write,
         return 0;
  }
  
-static int mmc_test_cleanup_verify(struct mmc_test_card *test)
-{
-       int ret, i;
-
-       ret = mmc_test_set_blksize(test, 512);
-       if (ret)
-               return ret;
-
-       memset(test->buffer, 0, BUFFER_SIZE);
-
-       for (i = 0;i < BUFFER_SIZE / 512;i++) {
-               ret = mmc_test_transfer(test, 1, test->buffer + i * 512,
-                       i * 512, 1, 512);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
  /*******************************************************************/
  /*  Tests                                                          */
  /*******************************************************************/
@@ -314,12 +464,15 @@ struct mmc_test_case {
  static int mmc_test_basic_write(struct mmc_test_card *test)
  {
         int ret;
+       struct scatterlist sg;
  
         ret = mmc_test_set_blksize(test, 512);
         if (ret)
                 return ret;
  
-       ret = mmc_test_transfer(test, 1, test->buffer, 0, 1, 512);
+       sg_init_one(&sg, test->buffer, 512);
+
+       ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1);
         if (ret)
                 return ret;
  
@@ -329,12 +482,15 @@ static int mmc_test_basic_write(struct mmc_test_card *test)
  static int mmc_test_basic_read(struct mmc_test_card *test)
  {
         int ret;
+       struct scatterlist sg;
  
         ret = mmc_test_set_blksize(test, 512);
         if (ret)
                 return ret;
  
-       ret = mmc_test_transfer(test, 0, test->buffer, 0, 1, 512);
+       sg_init_one(&sg, test->buffer, 512);
+
+       ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1);
         if (ret)
                 return ret;
  
@@ -344,8 +500,11 @@ static int mmc_test_basic_read(struct mmc_test_card *test)
  static int mmc_test_verify_write(struct mmc_test_card *test)
  {
         int ret;
+       struct scatterlist sg;
+
+       sg_init_one(&sg, test->buffer, 512);
  
-       ret = mmc_test_verified_transfer(test, 1, test->buffer, 0, 1, 512);
+       ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 1);
         if (ret)
                 return ret;
  
@@ -355,8 +514,11 @@ static int mmc_test_verify_write(struct mmc_test_card *test)
  static int mmc_test_verify_read(struct mmc_test_card *test)
  {
         int ret;
+       struct scatterlist sg;
+
+       sg_init_one(&sg, test->buffer, 512);
  
-       ret = mmc_test_verified_transfer(test, 0, test->buffer, 0, 1, 512);
+       ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 0);
         if (ret)
                 return ret;
  
@@ -367,6 +529,7 @@ static int mmc_test_multi_write(struct mmc_test_card *test)
  {
         int ret;
         unsigned int size;
+       struct scatterlist sg;
  
         if (test->card->host->max_blk_count == 1)
                 return RESULT_UNSUP_HOST;
@@ -379,8 +542,9 @@ static int mmc_test_multi_write(struct mmc_test_card *test)
         if (size < 1024)
                 return RESULT_UNSUP_HOST;
  
-       ret = mmc_test_verified_transfer(test, 1, test->buffer, 0,
-               size / 512, 512);
+       sg_init_one(&sg, test->buffer, size);
+
+       ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1);
         if (ret)
                 return ret;
  
@@ -391,6 +555,7 @@ static int mmc_test_multi_read(struct mmc_test_card *test)
  {
         int ret;
         unsigned int size;
+       struct scatterlist sg;
  
         if (test->card->host->max_blk_count == 1)
                 return RESULT_UNSUP_HOST;
@@ -403,8 +568,9 @@ static int mmc_test_multi_read(struct mmc_test_card *test)
         if (size < 1024)
                 return RESULT_UNSUP_HOST;
  
-       ret = mmc_test_verified_transfer(test, 0, test->buffer, 0,
-               size / 512, 512);
+       sg_init_one(&sg, test->buffer, size);
+
+       ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0);
         if (ret)
                 return ret;
  
@@ -414,13 +580,14 @@ static int mmc_test_multi_read(struct mmc_test_card *test)
  static int mmc_test_pow2_write(struct mmc_test_card *test)
  {
         int ret, i;
+       struct scatterlist sg;
  
         if (!test->card->csd.write_partial)
                 return RESULT_UNSUP_CARD;
  
         for (i = 1; i < 512;i <<= 1) {
-               ret = mmc_test_verified_transfer(test, 1,
-                       test->buffer, 0, 1, i);
+               sg_init_one(&sg, test->buffer, i);
+               ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 1);
                 if (ret)
                         return ret;
         }
@@ -431,13 +598,14 @@ static int mmc_test_pow2_write(struct mmc_test_card *test)
  static int mmc_test_pow2_read(struct mmc_test_card *test)
  {
         int ret, i;
+       struct scatterlist sg;
  
         if (!test->card->csd.read_partial)
                 return RESULT_UNSUP_CARD;
  
         for (i = 1; i < 512;i <<= 1) {
-               ret = mmc_test_verified_transfer(test, 0,
-                       test->buffer, 0, 1, i);
+               sg_init_one(&sg, test->buffer, i);
+               ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 0);
                 if (ret)
                         return ret;
         }
@@ -448,13 +616,14 @@ static int mmc_test_pow2_read(struct mmc_test_card *test)
  static int mmc_test_weird_write(struct mmc_test_card *test)
  {
         int ret, i;
+       struct scatterlist sg;
  
         if (!test->card->csd.write_partial)
                 return RESULT_UNSUP_CARD;
  
         for (i = 3; i < 512;i += 7) {
-               ret = mmc_test_verified_transfer(test, 1,
-                       test->buffer, 0, 1, i);
+               sg_init_one(&sg, test->buffer, i);
+               ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 1);
                 if (ret)
                         return ret;
         }
@@ -465,13 +634,14 @@ static int mmc_test_weird_write(struct mmc_test_card *test)
  static int mmc_test_weird_read(struct mmc_test_card *test)
  {
         int ret, i;
+       struct scatterlist sg;
  
         if (!test->card->csd.read_partial)
                 return RESULT_UNSUP_CARD;
  
         for (i = 3; i < 512;i += 7) {
-               ret = mmc_test_verified_transfer(test, 0,
-                       test->buffer, 0, 1, i);
+               sg_init_one(&sg, test->buffer, i);
+               ret = mmc_test_transfer(test, &sg, 1, 0, 1, i, 0);
                 if (ret)
                         return ret;
         }
@@ -482,10 +652,11 @@ static int mmc_test_weird_read(struct mmc_test_card *test)
  static int mmc_test_align_write(struct mmc_test_card *test)
  {
         int ret, i;
+       struct scatterlist sg;
  
         for (i = 1;i < 4;i++) {
-               ret = mmc_test_verified_transfer(test, 1, test->buffer + i,
-                       0, 1, 512);
+               sg_init_one(&sg, test->buffer + i, 512);
+               ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 1);
                 if (ret)
                         return ret;
         }
@@ -496,10 +667,11 @@ static int mmc_test_align_write(struct mmc_test_card *test)
  static int mmc_test_align_read(struct mmc_test_card *test)
  {
         int ret, i;
+       struct scatterlist sg;
  
         for (i = 1;i < 4;i++) {
-               ret = mmc_test_verified_transfer(test, 0, test->buffer + i,
-                       0, 1, 512);
+               sg_init_one(&sg, test->buffer + i, 512);
+               ret = mmc_test_transfer(test, &sg, 1, 0, 1, 512, 0);
                 if (ret)
                         return ret;
         }
@@ -511,6 +683,7 @@ static int mmc_test_align_multi_write(struct mmc_test_card *test)
  {
         int ret, i;
         unsigned int size;
+       struct scatterlist sg;
  
         if (test->card->host->max_blk_count == 1)
                 return RESULT_UNSUP_HOST;
@@ -524,8 +697,8 @@ static int mmc_test_align_multi_write(struct mmc_test_card *test)
                 return RESULT_UNSUP_HOST;
  
         for (i = 1;i < 4;i++) {
-               ret = mmc_test_verified_transfer(test, 1, test->buffer + i,
-                       0, size / 512, 512);
+               sg_init_one(&sg, test->buffer + i, size);
+               ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1);
                 if (ret)
                         return ret;
         }
@@ -537,6 +710,7 @@ static int mmc_test_align_multi_read(struct mmc_test_card *test)
  {
         int ret, i;
         unsigned int size;
+       struct scatterlist sg;
  
         if (test->card->host->max_blk_count == 1)
                 return RESULT_UNSUP_HOST;
@@ -550,8 +724,8 @@ static int mmc_test_align_multi_read(struct mmc_test_card *test)
                 return RESULT_UNSUP_HOST;
  
         for (i = 1;i < 4;i++) {
-               ret = mmc_test_verified_transfer(test, 0, test->buffer + i,
-                       0, size / 512, 512);
+               sg_init_one(&sg, test->buffer + i, size);
+               ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0);
                 if (ret)
                         return ret;
         }
@@ -567,7 +741,7 @@ static int mmc_test_xfersize_write(struct mmc_test_card *test)
         if (ret)
                 return ret;
  
-       ret = __mmc_test_transfer(test, 1, 1, test->buffer, 0, 1, 512);
+       ret = mmc_test_broken_transfer(test, 1, 512, 1);
         if (ret)
                 return ret;
  
@@ -582,7 +756,7 @@ static int mmc_test_xfersize_read(struct mmc_test_card *test)
         if (ret)
                 return ret;
  
-       ret = __mmc_test_transfer(test, 0, 1, test->buffer, 0, 1, 512);
+       ret = mmc_test_broken_transfer(test, 1, 512, 0);
         if (ret)
                 return ret;
  
@@ -600,7 +774,7 @@ static int mmc_test_multi_xfersize_write(struct mmc_test_card *test)
         if (ret)
                 return ret;
  
-       ret = __mmc_test_transfer(test, 1, 1, test->buffer, 0, 2, 512);
+       ret = mmc_test_broken_transfer(test, 2, 512, 1);
         if (ret)
                 return ret;
  
@@ -618,7 +792,7 @@ static int mmc_test_multi_xfersize_read(struct mmc_test_card *test)
         if (ret)
                 return ret;
  
-       ret = __mmc_test_transfer(test, 0, 1, test->buffer, 0, 2, 512);
+       ret = mmc_test_broken_transfer(test, 2, 512, 0);
         if (ret)
                 return ret;
  
@@ -638,86 +812,86 @@ static const struct mmc_test_case mmc_test_cases[] = {
  
         {
                 .name = "Basic write (with data verification)",
-               .prepare = mmc_test_prepare_verify_write,
+               .prepare = mmc_test_prepare_write,
                 .run = mmc_test_verify_write,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Basic read (with data verification)",
-               .prepare = mmc_test_prepare_verify_read,
+               .prepare = mmc_test_prepare_read,
                 .run = mmc_test_verify_read,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Multi-block write",
-               .prepare = mmc_test_prepare_verify_write,
+               .prepare = mmc_test_prepare_write,
                 .run = mmc_test_multi_write,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Multi-block read",
-               .prepare = mmc_test_prepare_verify_read,
+               .prepare = mmc_test_prepare_read,
                 .run = mmc_test_multi_read,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Power of two block writes",
-               .prepare = mmc_test_prepare_verify_write,
+               .prepare = mmc_test_prepare_write,
                 .run = mmc_test_pow2_write,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Power of two block reads",
-               .prepare = mmc_test_prepare_verify_read,
+               .prepare = mmc_test_prepare_read,
                 .run = mmc_test_pow2_read,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Weird sized block writes",
-               .prepare = mmc_test_prepare_verify_write,
+               .prepare = mmc_test_prepare_write,
                 .run = mmc_test_weird_write,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Weird sized block reads",
-               .prepare = mmc_test_prepare_verify_read,
+               .prepare = mmc_test_prepare_read,
                 .run = mmc_test_weird_read,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Badly aligned write",
-               .prepare = mmc_test_prepare_verify_write,
+               .prepare = mmc_test_prepare_write,
                 .run = mmc_test_align_write,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Badly aligned read",
-               .prepare = mmc_test_prepare_verify_read,
+               .prepare = mmc_test_prepare_read,
                 .run = mmc_test_align_read,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Badly aligned multi-block write",
-               .prepare = mmc_test_prepare_verify_write,
+               .prepare = mmc_test_prepare_write,
                 .run = mmc_test_align_multi_write,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
                 .name = "Badly aligned multi-block read",
-               .prepare = mmc_test_prepare_verify_read,
+               .prepare = mmc_test_prepare_read,
                 .run = mmc_test_align_multi_read,
-               .cleanup = mmc_test_cleanup_verify,
+               .cleanup = mmc_test_cleanup,
         },
  
         {
@@ -743,7 +917,7 @@ static const struct mmc_test_case mmc_test_cases[] = {
  
  static struct mutex mmc_test_lock;
  
-static void mmc_test_run(struct mmc_test_card *test)
+static void mmc_test_run(struct mmc_test_card *test, int testcase)
  {
         int i, ret;
  
@@ -753,6 +927,9 @@ static void mmc_test_run(struct mmc_test_card *test)
         mmc_claim_host(test->card->host);
  
         for (i = 0;i < ARRAY_SIZE(mmc_test_cases);i++) {
+               if (testcase && ((i + 1) != testcase))
+                       continue;
+
                 printk(KERN_INFO "%s: Test case %d. %s...\n",
                         mmc_hostname(test->card->host), i + 1,
                         mmc_test_cases[i].name);
@@ -824,9 +1001,12 @@ static ssize_t mmc_test_store(struct device *dev,
  {
         struct mmc_card *card;
         struct mmc_test_card *test;
+       int testcase;
  
         card = container_of(dev, struct mmc_card, dev);
  
+       testcase = simple_strtol(buf, NULL, 10);
+
         test = kzalloc(sizeof(struct mmc_test_card), GFP_KERNEL);
         if (!test)
                 return -ENOMEM;
@@ -836,7 +1016,7 @@ static ssize_t mmc_test_store(struct device *dev,
         test->buffer = kzalloc(BUFFER_SIZE, GFP_KERNEL);
         if (test->buffer) {
                 mutex_lock(&mmc_test_lock);
-               mmc_test_run(test);
+               mmc_test_run(test, testcase);
                 mutex_unlock(&mmc_test_lock);
         }
  
@@ -852,6 +1032,9 @@ static int mmc_test_probe(struct mmc_card *card)
  {
         int ret;
  
+       if ((card->type != MMC_TYPE_MMC) && (card->type != MMC_TYPE_SD))
+               return -ENODEV;
+
         mutex_init(&mmc_test_lock);
  
         ret = device_create_file(&card->dev, &dev_attr_test);
diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c

index eeea84c309e691c8a7c838b527b95c7ce50c296c..78ad48718ab028e61b1d77d83e29de8efbe9930c 100644 (file)
--- a/drivers/mmc/card/sdio_uart.c
+++ b/drivers/mmc/card/sdio_uart.c
@@ -885,12 +885,14 @@ static void sdio_uart_set_termios(struct tty_struct *tty, struct ktermios *old_t
         sdio_uart_release_func(port);
  }
  
-static void sdio_uart_break_ctl(struct tty_struct *tty, int break_state)
+static int sdio_uart_break_ctl(struct tty_struct *tty, int break_state)
  {
         struct sdio_uart_port *port = tty->driver_data;
+       int result;
  
-       if (sdio_uart_claim_func(port) != 0)
-               return;
+       result = sdio_uart_claim_func(port);
+       if (result != 0)
+               return result;
  
         if (break_state == -1)
                 port->lcr |= UART_LCR_SBC;
@@ -899,6 +901,7 @@ static void sdio_uart_break_ctl(struct tty_struct *tty, int break_state)
         sdio_out(port, UART_LCR, port->lcr);
  
         sdio_uart_release_func(port);
+       return 0;
  }
  
  static int sdio_uart_tiocmget(struct tty_struct *tty, struct file *file)
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c

index 01ced4c5a61d45016234654c9e0cf3313ad0ff3d..3ee5b8c3b5ce42fa205d3bbc2ee2d5e4f6420794 100644 (file)
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -3,7 +3,7 @@
   *
   *  Copyright (C) 2003-2004 Russell King, All Rights Reserved.
   *  SD support Copyright (C) 2004 Ian Molton, All Rights Reserved.
- *  Copyright (C) 2005-2007 Pierre Ossman, All Rights Reserved.
+ *  Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved.
   *  MMCv4 support Copyright (C) 2006 Philip Langdale, All Rights Reserved.
   *
   * This program is free software; you can redistribute it and/or modify
@@ -294,6 +294,33 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
  }
  EXPORT_SYMBOL(mmc_set_data_timeout);
  
+/**
+ *     mmc_align_data_size - pads a transfer size to a more optimal value
+ *     @card: the MMC card associated with the data transfer
+ *     @sz: original transfer size
+ *
+ *     Pads the original data size with a number of extra bytes in
+ *     order to avoid controller bugs and/or performance hits
+ *     (e.g. some controllers revert to PIO for certain sizes).
+ *
+ *     Returns the improved size, which might be unmodified.
+ *
+ *     Note that this function is only relevant when issuing a
+ *     single scatter gather entry.
+ */
+unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz)
+{
+       /*
+        * FIXME: We don't have a system for the controller to tell
+        * the core about its problems yet, so for now we just 32-bit
+        * align the size.
+        */
+       sz = ((sz + 3) / 4) * 4;
+
+       return sz;
+}
+EXPORT_SYMBOL(mmc_align_data_size);
+
  /**
   *     __mmc_claim_host - exclusively claim a host
   *     @host: mmc host to claim
@@ -638,6 +665,9 @@ void mmc_rescan(struct work_struct *work)
                  */
                 mmc_bus_put(host);
  
+               if (host->ops->get_cd && host->ops->get_cd(host) == 0)
+                       goto out;
+
                 mmc_claim_host(host);
  
                 mmc_power_up(host);
@@ -652,7 +682,7 @@ void mmc_rescan(struct work_struct *work)
                 if (!err) {
                         if (mmc_attach_sdio(host, ocr))
                                 mmc_power_off(host);
-                       return;
+                       goto out;
                 }
  
                 /*
@@ -662,7 +692,7 @@ void mmc_rescan(struct work_struct *work)
                 if (!err) {
                         if (mmc_attach_sd(host, ocr))
                                 mmc_power_off(host);
-                       return;
+                       goto out;
                 }
  
                 /*
@@ -672,7 +702,7 @@ void mmc_rescan(struct work_struct *work)
                 if (!err) {
                         if (mmc_attach_mmc(host, ocr))
                                 mmc_power_off(host);
-                       return;
+                       goto out;
                 }
  
                 mmc_release_host(host);
@@ -683,6 +713,9 @@ void mmc_rescan(struct work_struct *work)
  
                 mmc_bus_put(host);
         }
+out:
+       if (host->caps & MMC_CAP_NEEDS_POLL)
+               mmc_schedule_delayed_work(&host->detect, HZ);
  }
  
  void mmc_start_host(struct mmc_host *host)
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c

index 3da29eef8f7dbc10640ec57237dc32d3c951183f..fdd7c760be8cff590d00310d620ab4b4fc7e02b3 100644 (file)
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -288,7 +288,7 @@ static struct device_type mmc_type = {
  /*
   * Handle the detection and initialisation of a card.
   *
- * In the case of a resume, "curcard" will contain the card
+ * In the case of a resume, "oldcard" will contain the card
   * we're trying to reinitialise.
   */
  static int mmc_init_card(struct mmc_host *host, u32 ocr,
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c

index 7ef3b15c5e3df9dbabb5bfa70923585bf1a6328f..26fc098d77cd5d9261852f34aa902b11e88eaa79 100644 (file)
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -326,7 +326,7 @@ static struct device_type sd_type = {
  /*
   * Handle the detection and initialisation of a card.
   *
- * In the case of a resume, "curcard" will contain the card
+ * In the case of a resume, "oldcard" will contain the card
   * we're trying to reinitialise.
   */
  static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
@@ -494,13 +494,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
          * Check if read-only switch is active.
          */
         if (!oldcard) {
-               if (!host->ops->get_ro) {
+               if (!host->ops->get_ro || host->ops->get_ro(host) < 0) {
                         printk(KERN_WARNING "%s: host does not "
                                 "support reading read-only "
                                 "switch. assuming write-enable.\n",
                                 mmc_hostname(host));
                 } else {
-                       if (host->ops->get_ro(host))
+                       if (host->ops->get_ro(host) > 0)
                                 mmc_card_set_readonly(card);
                 }
         }
diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c

index d5e51b1c7b3fb715ebc81dcad5468c572748d42e..956bd7677502c3c1ebe69d3c03f65641abce060e 100644 (file)
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -129,6 +129,12 @@ static int cistpl_funce_func(struct sdio_func *func,
         /* TPLFE_MAX_BLK_SIZE */
         func->max_blksize = buf[12] | (buf[13] << 8);
  
+       /* TPLFE_ENABLE_TIMEOUT_VAL, present in ver 1.1 and above */
+       if (vsn > SDIO_SDIO_REV_1_00)
+               func->enable_timeout = (buf[28] | (buf[29] << 8)) * 10;
+       else
+               func->enable_timeout = jiffies_to_msecs(HZ);
+
         return 0;
  }
  
diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c

index 625b92ce9cef2abedbaf853f2e0d2ba6dc7a7314..f61fc2d4cd0a649a02e8e2acc10da53a7ffe7c2a 100644 (file)
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -1,7 +1,7 @@
  /*
   *  linux/drivers/mmc/core/sdio_io.c
   *
- *  Copyright 2007 Pierre Ossman
+ *  Copyright 2007-2008 Pierre Ossman
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -76,11 +76,7 @@ int sdio_enable_func(struct sdio_func *func)
         if (ret)
                 goto err;
  
-       /*
-        * FIXME: This should timeout based on information in the CIS,
-        * but we don't have card to parse that yet.
-        */
-       timeout = jiffies + HZ;
+       timeout = jiffies + msecs_to_jiffies(func->enable_timeout);
  
         while (1) {
                 ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_CCCR_IORx, 0, &reg);
@@ -167,10 +163,8 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz)
                 return -EINVAL;
  
         if (blksz == 0) {
-               blksz = min(min(
-                       func->max_blksize,
-                       func->card->host->max_blk_size),
-                       512u);
+               blksz = min(func->max_blksize, func->card->host->max_blk_size);
+               blksz = min(blksz, 512u);
         }
  
         ret = mmc_io_rw_direct(func->card, 1, 0,
@@ -186,9 +180,116 @@ int sdio_set_block_size(struct sdio_func *func, unsigned blksz)
         func->cur_blksize = blksz;
         return 0;
  }
-
  EXPORT_SYMBOL_GPL(sdio_set_block_size);
  
+/*
+ * Calculate the maximum byte mode transfer size
+ */
+static inline unsigned int sdio_max_byte_size(struct sdio_func *func)
+{
+       unsigned mval = min(func->card->host->max_seg_size,
+                           func->card->host->max_blk_size);
+       mval = min(mval, func->max_blksize);
+       return min(mval, 512u); /* maximum size for byte mode */
+}
+
+/**
+ *     sdio_align_size - pads a transfer size to a more optimal value
+ *     @func: SDIO function
+ *     @sz: original transfer size
+ *
+ *     Pads the original data size with a number of extra bytes in
+ *     order to avoid controller bugs and/or performance hits
+ *     (e.g. some controllers revert to PIO for certain sizes).
+ *
+ *     If possible, it will also adjust the size so that it can be
+ *     handled in just a single request.
+ *
+ *     Returns the improved size, which might be unmodified.
+ */
+unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz)
+{
+       unsigned int orig_sz;
+       unsigned int blk_sz, byte_sz;
+       unsigned chunk_sz;
+
+       orig_sz = sz;
+
+       /*
+        * Do a first check with the controller, in case it
+        * wants to increase the size up to a point where it
+        * might need more than one block.
+        */
+       sz = mmc_align_data_size(func->card, sz);
+
+       /*
+        * If we can still do this with just a byte transfer, then
+        * we're done.
+        */
+       if (sz <= sdio_max_byte_size(func))
+               return sz;
+
+       if (func->card->cccr.multi_block) {
+               /*
+                * Check if the transfer is already block aligned
+                */
+               if ((sz % func->cur_blksize) == 0)
+                       return sz;
+
+               /*
+                * Realign it so that it can be done with one request,
+                * and recheck if the controller still likes it.
+                */
+               blk_sz = ((sz + func->cur_blksize - 1) /
+                       func->cur_blksize) * func->cur_blksize;
+               blk_sz = mmc_align_data_size(func->card, blk_sz);
+
+               /*
+                * This value is only good if it is still just
+                * one request.
+                */
+               if ((blk_sz % func->cur_blksize) == 0)
+                       return blk_sz;
+
+               /*
+                * We failed to do one request, but at least try to
+                * pad the remainder properly.
+                */
+               byte_sz = mmc_align_data_size(func->card,
+                               sz % func->cur_blksize);
+               if (byte_sz <= sdio_max_byte_size(func)) {
+                       blk_sz = sz / func->cur_blksize;
+                       return blk_sz * func->cur_blksize + byte_sz;
+               }
+       } else {
+               /*
+                * We need multiple requests, so first check that the
+                * controller can handle the chunk size;
+                */
+               chunk_sz = mmc_align_data_size(func->card,
+                               sdio_max_byte_size(func));
+               if (chunk_sz == sdio_max_byte_size(func)) {
+                       /*
+                        * Fix up the size of the remainder (if any)
+                        */
+                       byte_sz = orig_sz % chunk_sz;
+                       if (byte_sz) {
+                               byte_sz = mmc_align_data_size(func->card,
+                                               byte_sz);
+                       }
+
+                       return (orig_sz / chunk_sz) * chunk_sz + byte_sz;
+               }
+       }
+
+       /*
+        * The controller is simply incapable of transferring the size
+        * we want in decent manner, so just return the original size.
+        */
+       return orig_sz;
+}
+EXPORT_SYMBOL_GPL(sdio_align_size);
+
  /* Split an arbitrarily sized data transfer into several
   * IO_RW_EXTENDED commands. */
  static int sdio_io_rw_ext_helper(struct sdio_func *func, int write,
@@ -199,14 +300,13 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write,
         int ret;
  
         /* Do the bulk of the transfer using block mode (if supported). */
-       if (func->card->cccr.multi_block) {
+       if (func->card->cccr.multi_block && (size > sdio_max_byte_size(func))) {
                 /* Blocks per command is limited by host count, host transfer
                  * size (we only use a single sg entry) and the maximum for
                  * IO_RW_EXTENDED of 511 blocks. */
-               max_blocks = min(min(
-                       func->card->host->max_blk_count,
-                       func->card->host->max_seg_size / func->cur_blksize),
-                       511u);
+               max_blocks = min(func->card->host->max_blk_count,
+                       func->card->host->max_seg_size / func->cur_blksize);
+               max_blocks = min(max_blocks, 511u);
  
                 while (remainder > func->cur_blksize) {
                         unsigned blocks;
@@ -231,11 +331,7 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write,
  
         /* Write the remainder using byte mode. */
         while (remainder > 0) {
-               size = remainder;
-               if (size > func->cur_blksize)
-                       size = func->cur_blksize;
-               if (size > 512)
-                       size = 512; /* maximum size for byte mode */
+               size = min(remainder, sdio_max_byte_size(func));
  
                 ret = mmc_io_rw_extended(func->card, write, func->num, addr,
                          incr_addr, buf, 1, size);
@@ -260,11 +356,10 @@ static int sdio_io_rw_ext_helper(struct sdio_func *func, int write,
   *     function. If there is a problem reading the address, 0xff
   *     is returned and @err_ret will contain the error code.
   */
-unsigned char sdio_readb(struct sdio_func *func, unsigned int addr,
-       int *err_ret)
+u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret)
  {
         int ret;
-       unsigned char val;
+       u8 val;
  
         BUG_ON(!func);
  
@@ -293,8 +388,7 @@ EXPORT_SYMBOL_GPL(sdio_readb);
   *     function. @err_ret will contain the status of the actual
   *     transfer.
   */
-void sdio_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
-       int *err_ret)
+void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret)
  {
         int ret;
  
@@ -355,7 +449,6 @@ int sdio_readsb(struct sdio_func *func, void *dst, unsigned int addr,
  {
         return sdio_io_rw_ext_helper(func, 0, addr, 0, dst, count);
  }
-
  EXPORT_SYMBOL_GPL(sdio_readsb);
  
  /**
@@ -385,8 +478,7 @@ EXPORT_SYMBOL_GPL(sdio_writesb);
   *     function. If there is a problem reading the address, 0xffff
   *     is returned and @err_ret will contain the error code.
   */
-unsigned short sdio_readw(struct sdio_func *func, unsigned int addr,
-       int *err_ret)
+u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret)
  {
         int ret;
  
@@ -400,7 +492,7 @@ unsigned short sdio_readw(struct sdio_func *func, unsigned int addr,
                 return 0xFFFF;
         }
  
-       return le16_to_cpu(*(u16*)func->tmpbuf);
+       return le16_to_cpup((__le16 *)func->tmpbuf);
  }
  EXPORT_SYMBOL_GPL(sdio_readw);
  
@@ -415,12 +507,11 @@ EXPORT_SYMBOL_GPL(sdio_readw);
   *     function. @err_ret will contain the status of the actual
   *     transfer.
   */
-void sdio_writew(struct sdio_func *func, unsigned short b, unsigned int addr,
-       int *err_ret)
+void sdio_writew(struct sdio_func *func, u16 b, unsigned int addr, int *err_ret)
  {
         int ret;
  
-       *(u16*)func->tmpbuf = cpu_to_le16(b);
+       *(__le16 *)func->tmpbuf = cpu_to_le16(b);
  
         ret = sdio_memcpy_toio(func, addr, func->tmpbuf, 2);
         if (err_ret)
@@ -439,8 +530,7 @@ EXPORT_SYMBOL_GPL(sdio_writew);
   *     0xffffffff is returned and @err_ret will contain the error
   *     code.
   */
-unsigned long sdio_readl(struct sdio_func *func, unsigned int addr,
-       int *err_ret)
+u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret)
  {
         int ret;
  
@@ -454,7 +544,7 @@ unsigned long sdio_readl(struct sdio_func *func, unsigned int addr,
                 return 0xFFFFFFFF;
         }
  
-       return le32_to_cpu(*(u32*)func->tmpbuf);
+       return le32_to_cpup((__le32 *)func->tmpbuf);
  }
  EXPORT_SYMBOL_GPL(sdio_readl);
  
@@ -469,12 +559,11 @@ EXPORT_SYMBOL_GPL(sdio_readl);
   *     function. @err_ret will contain the status of the actual
   *     transfer.
   */
-void sdio_writel(struct sdio_func *func, unsigned long b, unsigned int addr,
-       int *err_ret)
+void sdio_writel(struct sdio_func *func, u32 b, unsigned int addr, int *err_ret)
  {
         int ret;
  
-       *(u32*)func->tmpbuf = cpu_to_le32(b);
+       *(__le32 *)func->tmpbuf = cpu_to_le32(b);
  
         ret = sdio_memcpy_toio(func, addr, func->tmpbuf, 4);
         if (err_ret)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig

index dead61754ad76ef23ed532fbc503fe1d334491a9..dc6f2579f85cd81f31e4b1316d2b7d7ba69a127a 100644 (file)
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -26,18 +26,31 @@ config MMC_PXA
  
  config MMC_SDHCI
         tristate "Secure Digital Host Controller Interface support"
-       depends on PCI
+       depends on HAS_DMA
         help
-         This select the generic Secure Digital Host Controller Interface.
+         This selects the generic Secure Digital Host Controller Interface.
           It is used by manufacturers such as Texas Instruments(R), Ricoh(R)
           and Toshiba(R). Most controllers found in laptops are of this type.
+
+         If you have a controller with this interface, say Y or M here. You
+         also need to enable an appropriate bus interface.
+
+         If unsure, say N.
+
+config MMC_SDHCI_PCI
+       tristate "SDHCI support on PCI bus"
+       depends on MMC_SDHCI && PCI
+       help
+         This selects the PCI Secure Digital Host Controller Interface.
+         Most controllers found today are PCI devices.
+
           If you have a controller with this interface, say Y or M here.
  
           If unsure, say N.
  
  config MMC_RICOH_MMC
         tristate "Ricoh MMC Controller Disabler  (EXPERIMENTAL)"
-       depends on PCI && EXPERIMENTAL && MMC_SDHCI
+       depends on MMC_SDHCI_PCI
         help
           This selects the disabler for the Ricoh MMC Controller. This
           proprietary controller is unnecessary because the SDHCI driver
@@ -91,6 +104,16 @@ config MMC_AT91
  
           If unsure, say N.
  
+config MMC_ATMELMCI
+       tristate "Atmel Multimedia Card Interface support"
+       depends on AVR32
+       help
+         This selects the Atmel Multimedia Card Interface driver. If
+         you have an AT32 (AVR32) platform with a Multimedia Card
+         slot, say Y or M here.
+
+         If unsure, say N.
+
  config MMC_IMX
         tristate "Motorola i.MX Multimedia Card Interface support"
         depends on ARCH_IMX
@@ -130,3 +153,24 @@ config MMC_SPI
  
           If unsure, or if your system has no SPI master driver, say N.
  
+config MMC_S3C
+       tristate "Samsung S3C SD/MMC Card Interface support"
+       depends on ARCH_S3C2410 && MMC
+       help
+         This selects a driver for the MCI interface found in
+          Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs.
+         If you have a board based on one of those and a MMC/SD
+         slot, say Y or M here.
+
+         If unsure, say N.
+
+config MMC_SDRICOH_CS
+       tristate "MMC/SD driver for Ricoh Bay1Controllers (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && MMC && PCI && PCMCIA
+       help
+         Say Y here if your Notebook reports a Ricoh Bay1Controller PCMCIA
+         card whenever you insert a MMC or SD card into the card slot.
+
+         To compile this driver as a module, choose M here: the
+         module will be called sdricoh_cs.
+
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile

index 3877c87e6da26e699a730959104dc61ce5992290..db52eebfb50ee77e04705e9bb1aca3ab0c29b7ea 100644 (file)
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -10,11 +10,15 @@ obj-$(CONFIG_MMC_ARMMMCI)   += mmci.o
  obj-$(CONFIG_MMC_PXA)          += pxamci.o
  obj-$(CONFIG_MMC_IMX)          += imxmmc.o
  obj-$(CONFIG_MMC_SDHCI)                += sdhci.o
+obj-$(CONFIG_MMC_SDHCI_PCI)    += sdhci-pci.o
  obj-$(CONFIG_MMC_RICOH_MMC)    += ricoh_mmc.o
  obj-$(CONFIG_MMC_WBSD)         += wbsd.o
  obj-$(CONFIG_MMC_AU1X)         += au1xmmc.o
  obj-$(CONFIG_MMC_OMAP)         += omap.o
  obj-$(CONFIG_MMC_AT91)         += at91_mci.o
+obj-$(CONFIG_MMC_ATMELMCI)     += atmel-mci.o
  obj-$(CONFIG_MMC_TIFM_SD)      += tifm_sd.o
  obj-$(CONFIG_MMC_SPI)          += mmc_spi.o
+obj-$(CONFIG_MMC_S3C)          += s3cmci.o
+obj-$(CONFIG_MMC_SDRICOH_CS)   += sdricoh_cs.o
  
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c

index 8979ad330a4d5e6e154b4e8be4794a0b28765eba..f15e2064305cd227ab93f7fdaaa8006ceb684e36 100644 (file)
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -125,8 +125,71 @@ struct at91mci_host
  
         /* Latest in the scatterlist that has been enabled for transfer */
         int transfer_index;
+
+       /* Timer for timeouts */
+       struct timer_list timer;
  };
  
+/*
+ * Reset the controller and restore most of the state
+ */
+static void at91_reset_host(struct at91mci_host *host)
+{
+       unsigned long flags;
+       u32 mr;
+       u32 sdcr;
+       u32 dtor;
+       u32 imr;
+
+       local_irq_save(flags);
+       imr = at91_mci_read(host, AT91_MCI_IMR);
+
+       at91_mci_write(host, AT91_MCI_IDR, 0xffffffff);
+
+       /* save current state */
+       mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff;
+       sdcr = at91_mci_read(host, AT91_MCI_SDCR);
+       dtor = at91_mci_read(host, AT91_MCI_DTOR);
+
+       /* reset the controller */
+       at91_mci_write(host, AT91_MCI_CR, AT91_MCI_MCIDIS | AT91_MCI_SWRST);
+
+       /* restore state */
+       at91_mci_write(host, AT91_MCI_CR, AT91_MCI_MCIEN);
+       at91_mci_write(host, AT91_MCI_MR, mr);
+       at91_mci_write(host, AT91_MCI_SDCR, sdcr);
+       at91_mci_write(host, AT91_MCI_DTOR, dtor);
+       at91_mci_write(host, AT91_MCI_IER, imr);
+
+       /* make sure sdio interrupts will fire */
+       at91_mci_read(host, AT91_MCI_SR);
+
+       local_irq_restore(flags);
+}
+
+static void at91_timeout_timer(unsigned long data)
+{
+       struct at91mci_host *host;
+
+       host = (struct at91mci_host *)data;
+
+       if (host->request) {
+               dev_err(host->mmc->parent, "Timeout waiting end of packet\n");
+
+               if (host->cmd && host->cmd->data) {
+                       host->cmd->data->error = -ETIMEDOUT;
+               } else {
+                       if (host->cmd)
+                               host->cmd->error = -ETIMEDOUT;
+                       else
+                               host->request->cmd->error = -ETIMEDOUT;
+               }
+
+               at91_reset_host(host);
+               mmc_request_done(host->mmc, host->request);
+       }
+}
+
  /*
   * Copy from sg to a dma block - used for transfers
   */
@@ -135,9 +198,14 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data
         unsigned int len, i, size;
         unsigned *dmabuf = host->buffer;
  
-       size = host->total_length;
+       size = data->blksz * data->blocks;
         len = data->sg_len;
  
+       /* AT91SAM926[0/3] Data Write Operation and number of bytes erratum */
+       if (cpu_is_at91sam9260() || cpu_is_at91sam9263())
+               if (host->total_length == 12)
+                       memset(dmabuf, 0, 12);
+
         /*
          * Just loop through all entries. Size might not
          * be the entire list though so make sure that
@@ -159,9 +227,10 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data
  
                         for (index = 0; index < (amount / 4); index++)
                                 *dmabuf++ = swab32(sgbuffer[index]);
-               }
-               else
+               } else {
                         memcpy(dmabuf, sgbuffer, amount);
+                       dmabuf += amount;
+               }
  
                 kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ);
  
@@ -233,11 +302,11 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host)
  
                 if (i == 0) {
                         at91_mci_write(host, ATMEL_PDC_RPR, sg->dma_address);
-                       at91_mci_write(host, ATMEL_PDC_RCR, sg->length / 4);
+                       at91_mci_write(host, ATMEL_PDC_RCR, (data->blksz & 0x3) ? sg->length : sg->length / 4);
                 }
                 else {
                         at91_mci_write(host, ATMEL_PDC_RNPR, sg->dma_address);
-                       at91_mci_write(host, ATMEL_PDC_RNCR, sg->length / 4);
+                       at91_mci_write(host, ATMEL_PDC_RNCR, (data->blksz & 0x3) ? sg->length : sg->length / 4);
                 }
         }
  
@@ -277,8 +346,6 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
  
                 dma_unmap_page(NULL, sg->dma_address, sg->length, DMA_FROM_DEVICE);
  
-               data->bytes_xfered += sg->length;
-
                 if (cpu_is_at91rm9200()) {      /* AT91RM9200 errata */
                         unsigned int *buffer;
                         int index;
@@ -294,6 +361,8 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
                 }
  
                 flush_dcache_page(sg_page(sg));
+
+               data->bytes_xfered += sg->length;
         }
  
         /* Is there another transfer to trigger? */
@@ -334,10 +403,32 @@ static void at91_mci_handle_transmitted(struct at91mci_host *host)
                 at91_mci_write(host, AT91_MCI_IER, AT91_MCI_BLKE);
         } else
                 at91_mci_write(host, AT91_MCI_IER, AT91_MCI_NOTBUSY);
+}
+
+/*
+ * Update bytes tranfered count during a write operation
+ */
+static void at91_mci_update_bytes_xfered(struct at91mci_host *host)
+{
+       struct mmc_data *data;
+
+       /* always deal with the effective request (and not the current cmd) */
+
+       if (host->request->cmd && host->request->cmd->error != 0)
+               return;
  
-       data->bytes_xfered = host->total_length;
+       if (host->request->data) {
+               data = host->request->data;
+               if (data->flags & MMC_DATA_WRITE) {
+                       /* card is in IDLE mode now */
+                       pr_debug("-> bytes_xfered %d, total_length = %d\n",
+                               data->bytes_xfered, host->total_length);
+                       data->bytes_xfered = data->blksz * data->blocks;
+               }
+       }
  }
  
+
  /*Handle after command sent ready*/
  static int at91_mci_handle_cmdrdy(struct at91mci_host *host)
  {
@@ -350,8 +441,7 @@ static int at91_mci_handle_cmdrdy(struct at91mci_host *host)
                 } else return 1;
         } else if (host->cmd->data->flags & MMC_DATA_WRITE) {
                 /*After sendding multi-block-write command, start DMA transfer*/
-               at91_mci_write(host, AT91_MCI_IER, AT91_MCI_TXBUFE);
-               at91_mci_write(host, AT91_MCI_IER, AT91_MCI_BLKE);
+               at91_mci_write(host, AT91_MCI_IER, AT91_MCI_TXBUFE | AT91_MCI_BLKE);
                 at91_mci_write(host, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN);
         }
  
@@ -430,11 +520,19 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
  
         if (data) {
  
-               if ( data->blksz & 0x3 ) {
-                       pr_debug("Unsupported block size\n");
-                       cmd->error = -EINVAL;
-                       mmc_request_done(host->mmc, host->request);
-                       return;
+               if (cpu_is_at91rm9200() || cpu_is_at91sam9261()) {
+                       if (data->blksz & 0x3) {
+                               pr_debug("Unsupported block size\n");
+                               cmd->error = -EINVAL;
+                               mmc_request_done(host->mmc, host->request);
+                               return;
+                       }
+                       if (data->flags & MMC_DATA_STREAM) {
+                               pr_debug("Stream commands not supported\n");
+                               cmd->error = -EINVAL;
+                               mmc_request_done(host->mmc, host->request);
+                               return;
+                       }
                 }
  
                 block_length = data->blksz;
@@ -481,8 +579,16 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
                 ier = AT91_MCI_CMDRDY;
         } else {
                 /* zero block length and PDC mode */
-               mr = at91_mci_read(host, AT91_MCI_MR) & 0x7fff;
-               at91_mci_write(host, AT91_MCI_MR, mr | (block_length << 16) | AT91_MCI_PDCMODE);
+               mr = at91_mci_read(host, AT91_MCI_MR) & 0x5fff;
+               mr |= (data->blksz & 0x3) ? AT91_MCI_PDCFBYTE : 0;
+               mr |= (block_length << 16);
+               mr |= AT91_MCI_PDCMODE;
+               at91_mci_write(host, AT91_MCI_MR, mr);
+
+               if (!(cpu_is_at91rm9200() || cpu_is_at91sam9261()))
+                       at91_mci_write(host, AT91_MCI_BLKR,
+                               AT91_MCI_BLKR_BCNT(blocks) |
+                               AT91_MCI_BLKR_BLKLEN(block_length));
  
                 /*
                  * Disable the PDC controller
@@ -508,6 +614,13 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
                                  * Handle a write
                                  */
                                 host->total_length = block_length * blocks;
+                               /*
+                                * AT91SAM926[0/3] Data Write Operation and
+                                * number of bytes erratum
+                                */
+                               if (cpu_is_at91sam9260 () || cpu_is_at91sam9263())
+                                       if (host->total_length < 12)
+                                               host->total_length = 12;
                                 host->buffer = dma_alloc_coherent(NULL,
                                                 host->total_length,
                                                 &host->physical_address, GFP_KERNEL);
@@ -517,7 +630,9 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
                                 pr_debug("Transmitting %d bytes\n", host->total_length);
  
                                 at91_mci_write(host, ATMEL_PDC_TPR, host->physical_address);
-                               at91_mci_write(host, ATMEL_PDC_TCR, host->total_length / 4);
+                               at91_mci_write(host, ATMEL_PDC_TCR, (data->blksz & 0x3) ?
+                                               host->total_length : host->total_length / 4);
+
                                 ier = AT91_MCI_CMDRDY;
                         }
                 }
@@ -552,20 +667,26 @@ static void at91_mci_process_next(struct at91mci_host *host)
         else if ((!(host->flags & FL_SENT_STOP)) && host->request->stop) {
                 host->flags |= FL_SENT_STOP;
                 at91_mci_send_command(host, host->request->stop);
-       }
-       else
+       } else {
+               del_timer(&host->timer);
+               /* the at91rm9200 mci controller hangs after some transfers,
+                * and the workaround is to reset it after each transfer.
+                */
+               if (cpu_is_at91rm9200())
+                       at91_reset_host(host);
                 mmc_request_done(host->mmc, host->request);
+       }
  }
  
  /*
   * Handle a command that has been completed
   */
-static void at91_mci_completed_command(struct at91mci_host *host)
+static void at91_mci_completed_command(struct at91mci_host *host, unsigned int status)
  {
         struct mmc_command *cmd = host->cmd;
-       unsigned int status;
+       struct mmc_data *data = cmd->data;
  
-       at91_mci_write(host, AT91_MCI_IDR, 0xffffffff);
+       at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB));
  
         cmd->resp[0] = at91_mci_read(host, AT91_MCI_RSPR(0));
         cmd->resp[1] = at91_mci_read(host, AT91_MCI_RSPR(1));
@@ -577,25 +698,34 @@ static void at91_mci_completed_command(struct at91mci_host *host)
                 host->buffer = NULL;
         }
  
-       status = at91_mci_read(host, AT91_MCI_SR);
-
-       pr_debug("Status = %08X [%08X %08X %08X %08X]\n",
-                status, cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]);
+       pr_debug("Status = %08X/%08x [%08X %08X %08X %08X]\n",
+                status, at91_mci_read(host, AT91_MCI_SR),
+                cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]);
  
         if (status & AT91_MCI_ERRORS) {
                 if ((status & AT91_MCI_RCRCE) && !(mmc_resp_type(cmd) & MMC_RSP_CRC)) {
                         cmd->error = 0;
                 }
                 else {
-                       if (status & (AT91_MCI_RTOE | AT91_MCI_DTOE))
-                               cmd->error = -ETIMEDOUT;
-                       else if (status & (AT91_MCI_RCRCE | AT91_MCI_DCRCE))
-                               cmd->error = -EILSEQ;
-                       else
-                               cmd->error = -EIO;
+                       if (status & (AT91_MCI_DTOE | AT91_MCI_DCRCE)) {
+                               if (data) {
+                                       if (status & AT91_MCI_DTOE)
+                                               data->error = -ETIMEDOUT;
+                                       else if (status & AT91_MCI_DCRCE)
+                                               data->error = -EILSEQ;
+                               }
+                       } else {
+                               if (status & AT91_MCI_RTOE)
+                                       cmd->error = -ETIMEDOUT;
+                               else if (status & AT91_MCI_RCRCE)
+                                       cmd->error = -EILSEQ;
+                               else
+                                       cmd->error = -EIO;
+                       }
  
-                       pr_debug("Error detected and set to %d (cmd = %d, retries = %d)\n",
-                                cmd->error, cmd->opcode, cmd->retries);
+                       pr_debug("Error detected and set to %d/%d (cmd = %d, retries = %d)\n",
+                               cmd->error, data ? data->error : 0,
+                                cmd->opcode, cmd->retries);
                 }
         }
         else
@@ -613,6 +743,8 @@ static void at91_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
         host->request = mrq;
         host->flags = 0;
  
+       mod_timer(&host->timer, jiffies +  HZ);
+
         at91_mci_process_next(host);
  }
  
@@ -736,6 +868,7 @@ static irqreturn_t at91_mci_irq(int irq, void *devid)
  
                 if (int_status & AT91_MCI_NOTBUSY) {
                         pr_debug("Card is ready\n");
+                       at91_mci_update_bytes_xfered(host);
                         completed = 1;
                 }
  
@@ -744,9 +877,21 @@ static irqreturn_t at91_mci_irq(int irq, void *devid)
  
                 if (int_status & AT91_MCI_BLKE) {
                         pr_debug("Block transfer has ended\n");
-                       completed = 1;
+                       if (host->request->data && host->request->data->blocks > 1) {
+                               /* multi block write : complete multi write
+                                * command and send stop */
+                               completed = 1;
+                       } else {
+                               at91_mci_write(host, AT91_MCI_IER, AT91_MCI_NOTBUSY);
+                       }
                 }
  
+               if (int_status & AT91_MCI_SDIOIRQA)
+                       mmc_signal_sdio_irq(host->mmc);
+
+               if (int_status & AT91_MCI_SDIOIRQB)
+                       mmc_signal_sdio_irq(host->mmc);
+
                 if (int_status & AT91_MCI_TXRDY)
                         pr_debug("Ready to transmit\n");
  
@@ -761,10 +906,10 @@ static irqreturn_t at91_mci_irq(int irq, void *devid)
  
         if (completed) {
                 pr_debug("Completed command\n");
-               at91_mci_write(host, AT91_MCI_IDR, 0xffffffff);
-               at91_mci_completed_command(host);
+               at91_mci_write(host, AT91_MCI_IDR, 0xffffffff & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB));
+               at91_mci_completed_command(host, int_status);
         } else
-               at91_mci_write(host, AT91_MCI_IDR, int_status);
+               at91_mci_write(host, AT91_MCI_IDR, int_status & ~(AT91_MCI_SDIOIRQA | AT91_MCI_SDIOIRQB));
  
         return IRQ_HANDLED;
  }
@@ -793,25 +938,33 @@ static irqreturn_t at91_mmc_det_irq(int irq, void *_host)
  
  static int at91_mci_get_ro(struct mmc_host *mmc)
  {
-       int read_only = 0;
         struct at91mci_host *host = mmc_priv(mmc);
  
-       if (host->board->wp_pin) {
-               read_only = gpio_get_value(host->board->wp_pin);
-               printk(KERN_WARNING "%s: card is %s\n", mmc_hostname(mmc),
-                               (read_only ? "read-only" : "read-write") );
-       }
-       else {
-               printk(KERN_WARNING "%s: host does not support reading read-only "
-                               "switch.  Assuming write-enable.\n", mmc_hostname(mmc));
-       }
-       return read_only;
+       if (host->board->wp_pin)
+               return !!gpio_get_value(host->board->wp_pin);
+       /*
+        * Board doesn't support read only detection; let the mmc core
+        * decide what to do.
+        */
+       return -ENOSYS;
+}
+
+static void at91_mci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+       struct at91mci_host *host = mmc_priv(mmc);
+
+       pr_debug("%s: sdio_irq %c : %s\n", mmc_hostname(host->mmc),
+               host->board->slot_b ? 'B':'A', enable ? "enable" : "disable");
+       at91_mci_write(host, enable ? AT91_MCI_IER : AT91_MCI_IDR,
+               host->board->slot_b ? AT91_MCI_SDIOIRQB : AT91_MCI_SDIOIRQA);
+
  }
  
  static const struct mmc_host_ops at91_mci_ops = {
         .request        = at91_mci_request,
         .set_ios        = at91_mci_set_ios,
         .get_ro         = at91_mci_get_ro,
+       .enable_sdio_irq = at91_mci_enable_sdio_irq,
  };
  
  /*
@@ -842,6 +995,7 @@ static int __init at91_mci_probe(struct platform_device *pdev)
         mmc->f_min = 375000;
         mmc->f_max = 25000000;
         mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+       mmc->caps = MMC_CAP_SDIO_IRQ;
  
         mmc->max_blk_size = 4095;
         mmc->max_blk_count = mmc->max_req_size;
@@ -935,6 +1089,8 @@ static int __init at91_mci_probe(struct platform_device *pdev)
  
         mmc_add_host(mmc);
  
+       setup_timer(&host->timer, at91_timeout_timer, (unsigned long)host);
+
         /*
          * monitor card insertion/removal if we can
          */
@@ -995,6 +1151,7 @@ static int __exit at91_mci_remove(struct platform_device *pdev)
         }
  
         at91_mci_disable(host);
+       del_timer_sync(&host->timer);
         mmc_remove_host(mmc);
         free_irq(host->irq, host);
  
diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h

new file mode 100644 (file)

index 0000000..a9a5657
--- /dev/null
+++ b/drivers/mmc/host/atmel-mci-regs.h
@@ -0,0 +1,91 @@
+/*
+ * Atmel MultiMedia Card Interface driver
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __DRIVERS_MMC_ATMEL_MCI_H__
+#define __DRIVERS_MMC_ATMEL_MCI_H__
+
+/* MCI Register Definitions */
+#define MCI_CR                 0x0000  /* Control */
+# define MCI_CR_MCIEN          (  1 <<  0)     /* MCI Enable */
+# define MCI_CR_MCIDIS         (  1 <<  1)     /* MCI Disable */
+# define MCI_CR_SWRST          (  1 <<  7)     /* Software Reset */
+#define MCI_MR                 0x0004  /* Mode */
+# define MCI_MR_CLKDIV(x)      ((x) <<  0)     /* Clock Divider */
+# define MCI_MR_RDPROOF                (  1 << 11)     /* Read Proof */
+# define MCI_MR_WRPROOF                (  1 << 12)     /* Write Proof */
+#define MCI_DTOR               0x0008  /* Data Timeout */
+# define MCI_DTOCYC(x)         ((x) <<  0)     /* Data Timeout Cycles */
+# define MCI_DTOMUL(x)         ((x) <<  4)     /* Data Timeout Multiplier */
+#define MCI_SDCR               0x000c  /* SD Card / SDIO */
+# define MCI_SDCSEL_SLOT_A     (  0 <<  0)     /* Select SD slot A */
+# define MCI_SDCSEL_SLOT_B     (  1 <<  0)     /* Select SD slot A */
+# define MCI_SDCBUS_1BIT       (  0 <<  7)     /* 1-bit data bus */
+# define MCI_SDCBUS_4BIT       (  1 <<  7)     /* 4-bit data bus */
+#define MCI_ARGR               0x0010  /* Command Argument */
+#define MCI_CMDR               0x0014  /* Command */
+# define MCI_CMDR_CMDNB(x)     ((x) <<  0)     /* Command Opcode */
+# define MCI_CMDR_RSPTYP_NONE  (  0 <<  6)     /* No response */
+# define MCI_CMDR_RSPTYP_48BIT (  1 <<  6)     /* 48-bit response */
+# define MCI_CMDR_RSPTYP_136BIT        (  2 <<  6)     /* 136-bit response */
+# define MCI_CMDR_SPCMD_INIT   (  1 <<  8)     /* Initialization command */
+# define MCI_CMDR_SPCMD_SYNC   (  2 <<  8)     /* Synchronized command */
+# define MCI_CMDR_SPCMD_INT    (  4 <<  8)     /* Interrupt command */
+# define MCI_CMDR_SPCMD_INTRESP        (  5 <<  8)     /* Interrupt response */
+# define MCI_CMDR_OPDCMD       (  1 << 11)     /* Open Drain */
+# define MCI_CMDR_MAXLAT_5CYC  (  0 << 12)     /* Max latency 5 cycles */
+# define MCI_CMDR_MAXLAT_64CYC (  1 << 12)     /* Max latency 64 cycles */
+# define MCI_CMDR_START_XFER   (  1 << 16)     /* Start data transfer */
+# define MCI_CMDR_STOP_XFER    (  2 << 16)     /* Stop data transfer */
+# define MCI_CMDR_TRDIR_WRITE  (  0 << 18)     /* Write data */
+# define MCI_CMDR_TRDIR_READ   (  1 << 18)     /* Read data */
+# define MCI_CMDR_BLOCK                (  0 << 19)     /* Single-block transfer */
+# define MCI_CMDR_MULTI_BLOCK  (  1 << 19)     /* Multi-block transfer */
+# define MCI_CMDR_STREAM       (  2 << 19)     /* MMC Stream transfer */
+# define MCI_CMDR_SDIO_BYTE    (  4 << 19)     /* SDIO Byte transfer */
+# define MCI_CMDR_SDIO_BLOCK   (  5 << 19)     /* SDIO Block transfer */
+# define MCI_CMDR_SDIO_SUSPEND (  1 << 24)     /* SDIO Suspend Command */
+# define MCI_CMDR_SDIO_RESUME  (  2 << 24)     /* SDIO Resume Command */
+#define MCI_BLKR               0x0018  /* Block */
+# define MCI_BCNT(x)           ((x) <<  0)     /* Data Block Count */
+# define MCI_BLKLEN(x)         ((x) << 16)     /* Data Block Length */
+#define MCI_RSPR               0x0020  /* Response 0 */
+#define MCI_RSPR1              0x0024  /* Response 1 */
+#define MCI_RSPR2              0x0028  /* Response 2 */
+#define MCI_RSPR3              0x002c  /* Response 3 */
+#define MCI_RDR                        0x0030  /* Receive Data */
+#define MCI_TDR                        0x0034  /* Transmit Data */
+#define MCI_SR                 0x0040  /* Status */
+#define MCI_IER                        0x0044  /* Interrupt Enable */
+#define MCI_IDR                        0x0048  /* Interrupt Disable */
+#define MCI_IMR                        0x004c  /* Interrupt Mask */
+# define MCI_CMDRDY            (  1 <<   0)    /* Command Ready */
+# define MCI_RXRDY             (  1 <<   1)    /* Receiver Ready */
+# define MCI_TXRDY             (  1 <<   2)    /* Transmitter Ready */
+# define MCI_BLKE              (  1 <<   3)    /* Data Block Ended */
+# define MCI_DTIP              (  1 <<   4)    /* Data Transfer In Progress */
+# define MCI_NOTBUSY           (  1 <<   5)    /* Data Not Busy */
+# define MCI_SDIOIRQA          (  1 <<   8)    /* SDIO IRQ in slot A */
+# define MCI_SDIOIRQB          (  1 <<   9)    /* SDIO IRQ in slot B */
+# define MCI_RINDE             (  1 <<  16)    /* Response Index Error */
+# define MCI_RDIRE             (  1 <<  17)    /* Response Direction Error */
+# define MCI_RCRCE             (  1 <<  18)    /* Response CRC Error */
+# define MCI_RENDE             (  1 <<  19)    /* Response End Bit Error */
+# define MCI_RTOE              (  1 <<  20)    /* Response Time-Out Error */
+# define MCI_DCRCE             (  1 <<  21)    /* Data CRC Error */
+# define MCI_DTOE              (  1 <<  22)    /* Data Time-Out Error */
+# define MCI_OVRE              (  1 <<  30)    /* RX Overrun Error */
+# define MCI_UNRE              (  1 <<  31)    /* TX Underrun Error */
+
+/* Register access macros */
+#define mci_readl(port,reg)                            \
+       __raw_readl((port)->regs + MCI_##reg)
+#define mci_writel(port,reg,value)                     \
+       __raw_writel((value), (port)->regs + MCI_##reg)
+
+#endif /* __DRIVERS_MMC_ATMEL_MCI_H__ */
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c

new file mode 100644 (file)

index 0000000..cce873c
--- /dev/null
+++ b/drivers/mmc/host/atmel-mci.c
@@ -0,0 +1,981 @@
+/*
+ * Atmel MultiMedia Card Interface driver
+ *
+ * Copyright (C) 2004-2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/blkdev.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+
+#include <linux/mmc/host.h>
+
+#include <asm/atmel-mci.h>
+#include <asm/io.h>
+#include <asm/unaligned.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/gpio.h>
+
+#include "atmel-mci-regs.h"
+
+#define ATMCI_DATA_ERROR_FLAGS (MCI_DCRCE | MCI_DTOE | MCI_OVRE | MCI_UNRE)
+
+enum {
+       EVENT_CMD_COMPLETE = 0,
+       EVENT_DATA_ERROR,
+       EVENT_DATA_COMPLETE,
+       EVENT_STOP_SENT,
+       EVENT_STOP_COMPLETE,
+       EVENT_XFER_COMPLETE,
+};
+
+struct atmel_mci {
+       struct mmc_host         *mmc;
+       void __iomem            *regs;
+
+       struct scatterlist      *sg;
+       unsigned int            pio_offset;
+
+       struct mmc_request      *mrq;
+       struct mmc_command      *cmd;
+       struct mmc_data         *data;
+
+       u32                     cmd_status;
+       u32                     data_status;
+       u32                     stop_status;
+       u32                     stop_cmdr;
+
+       u32                     mode_reg;
+       u32                     sdc_reg;
+
+       struct tasklet_struct   tasklet;
+       unsigned long           pending_events;
+       unsigned long           completed_events;
+
+       int                     present;
+       int                     detect_pin;
+       int                     wp_pin;
+
+       /* For detect pin debouncing */
+       struct timer_list       detect_timer;
+
+       unsigned long           bus_hz;
+       unsigned long           mapbase;
+       struct clk              *mck;
+       struct platform_device  *pdev;
+};
+
+#define atmci_is_completed(host, event)                                \
+       test_bit(event, &host->completed_events)
+#define atmci_test_and_clear_pending(host, event)              \
+       test_and_clear_bit(event, &host->pending_events)
+#define atmci_test_and_set_completed(host, event)              \
+       test_and_set_bit(event, &host->completed_events)
+#define atmci_set_completed(host, event)                       \
+       set_bit(event, &host->completed_events)
+#define atmci_set_pending(host, event)                         \
+       set_bit(event, &host->pending_events)
+#define atmci_clear_pending(host, event)                       \
+       clear_bit(event, &host->pending_events)
+
+
+static void atmci_enable(struct atmel_mci *host)
+{
+       clk_enable(host->mck);
+       mci_writel(host, CR, MCI_CR_MCIEN);
+       mci_writel(host, MR, host->mode_reg);
+       mci_writel(host, SDCR, host->sdc_reg);
+}
+
+static void atmci_disable(struct atmel_mci *host)
+{
+       mci_writel(host, CR, MCI_CR_SWRST);
+
+       /* Stall until write is complete, then disable the bus clock */
+       mci_readl(host, SR);
+       clk_disable(host->mck);
+}
+
+static inline unsigned int ns_to_clocks(struct atmel_mci *host,
+                                       unsigned int ns)
+{
+       return (ns * (host->bus_hz / 1000000) + 999) / 1000;
+}
+
+static void atmci_set_timeout(struct atmel_mci *host,
+                             struct mmc_data *data)
+{
+       static unsigned dtomul_to_shift[] = {
+               0, 4, 7, 8, 10, 12, 16, 20
+       };
+       unsigned        timeout;
+       unsigned        dtocyc;
+       unsigned        dtomul;
+
+       timeout = ns_to_clocks(host, data->timeout_ns) + data->timeout_clks;
+
+       for (dtomul = 0; dtomul < 8; dtomul++) {
+               unsigned shift = dtomul_to_shift[dtomul];
+               dtocyc = (timeout + (1 << shift) - 1) >> shift;
+               if (dtocyc < 15)
+                       break;
+       }
+
+       if (dtomul >= 8) {
+               dtomul = 7;
+               dtocyc = 15;
+       }
+
+       dev_vdbg(&host->mmc->class_dev, "setting timeout to %u cycles\n",
+                       dtocyc << dtomul_to_shift[dtomul]);
+       mci_writel(host, DTOR, (MCI_DTOMUL(dtomul) | MCI_DTOCYC(dtocyc)));
+}
+
+/*
+ * Return mask with command flags to be enabled for this command.
+ */
+static u32 atmci_prepare_command(struct mmc_host *mmc,
+                                struct mmc_command *cmd)
+{
+       struct mmc_data *data;
+       u32             cmdr;
+
+       cmd->error = -EINPROGRESS;
+
+       cmdr = MCI_CMDR_CMDNB(cmd->opcode);
+
+       if (cmd->flags & MMC_RSP_PRESENT) {
+               if (cmd->flags & MMC_RSP_136)
+                       cmdr |= MCI_CMDR_RSPTYP_136BIT;
+               else
+                       cmdr |= MCI_CMDR_RSPTYP_48BIT;
+       }
+
+       /*
+        * This should really be MAXLAT_5 for CMD2 and ACMD41, but
+        * it's too difficult to determine whether this is an ACMD or
+        * not. Better make it 64.
+        */
+       cmdr |= MCI_CMDR_MAXLAT_64CYC;
+
+       if (mmc->ios.bus_mode == MMC_BUSMODE_OPENDRAIN)
+               cmdr |= MCI_CMDR_OPDCMD;
+
+       data = cmd->data;
+       if (data) {
+               cmdr |= MCI_CMDR_START_XFER;
+               if (data->flags & MMC_DATA_STREAM)
+                       cmdr |= MCI_CMDR_STREAM;
+               else if (data->blocks > 1)
+                       cmdr |= MCI_CMDR_MULTI_BLOCK;
+               else
+                       cmdr |= MCI_CMDR_BLOCK;
+
+               if (data->flags & MMC_DATA_READ)
+                       cmdr |= MCI_CMDR_TRDIR_READ;
+       }
+
+       return cmdr;
+}
+
+static void atmci_start_command(struct atmel_mci *host,
+                               struct mmc_command *cmd,
+                               u32 cmd_flags)
+{
+       /* Must read host->cmd after testing event flags */
+       smp_rmb();
+       WARN_ON(host->cmd);
+       host->cmd = cmd;
+
+       dev_vdbg(&host->mmc->class_dev,
+                       "start command: ARGR=0x%08x CMDR=0x%08x\n",
+                       cmd->arg, cmd_flags);
+
+       mci_writel(host, ARGR, cmd->arg);
+       mci_writel(host, CMDR, cmd_flags);
+}
+
+static void send_stop_cmd(struct mmc_host *mmc, struct mmc_data *data)
+{
+       struct atmel_mci *host = mmc_priv(mmc);
+
+       atmci_start_command(host, data->stop, host->stop_cmdr);
+       mci_writel(host, IER, MCI_CMDRDY);
+}
+
+static void atmci_request_end(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+       struct atmel_mci *host = mmc_priv(mmc);
+
+       WARN_ON(host->cmd || host->data);
+       host->mrq = NULL;
+
+       atmci_disable(host);
+
+       mmc_request_done(mmc, mrq);
+}
+
+/*
+ * Returns a mask of interrupt flags to be enabled after the whole
+ * request has been prepared.
+ */
+static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data)
+{
+       struct atmel_mci        *host = mmc_priv(mmc);
+       u32                     iflags;
+
+       data->error = -EINPROGRESS;
+
+       WARN_ON(host->data);
+       host->sg = NULL;
+       host->data = data;
+
+       mci_writel(host, BLKR, MCI_BCNT(data->blocks)
+                       | MCI_BLKLEN(data->blksz));
+       dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n",
+                       MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
+
+       iflags = ATMCI_DATA_ERROR_FLAGS;
+       host->sg = data->sg;
+       host->pio_offset = 0;
+       if (data->flags & MMC_DATA_READ)
+               iflags |= MCI_RXRDY;
+       else
+               iflags |= MCI_TXRDY;
+
+       return iflags;
+}
+
+static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+       struct atmel_mci        *host = mmc_priv(mmc);
+       struct mmc_data         *data;
+       struct mmc_command      *cmd;
+       u32                     iflags;
+       u32                     cmdflags = 0;
+
+       iflags = mci_readl(host, IMR);
+       if (iflags)
+               dev_warn(&mmc->class_dev, "WARNING: IMR=0x%08x\n",
+                               mci_readl(host, IMR));
+
+       WARN_ON(host->mrq != NULL);
+
+       /*
+        * We may "know" the card is gone even though there's still an
+        * electrical connection. If so, we really need to communicate
+        * this to the MMC core since there won't be any more
+        * interrupts as the card is completely removed. Otherwise,
+        * the MMC core might believe the card is still there even
+        * though the card was just removed very slowly.
+        */
+       if (!host->present) {
+               mrq->cmd->error = -ENOMEDIUM;
+               mmc_request_done(mmc, mrq);
+               return;
+       }
+
+       host->mrq = mrq;
+       host->pending_events = 0;
+       host->completed_events = 0;
+
+       atmci_enable(host);
+
+       /* We don't support multiple blocks of weird lengths. */
+       data = mrq->data;
+       if (data) {
+               if (data->blocks > 1 && data->blksz & 3)
+                       goto fail;
+               atmci_set_timeout(host, data);
+       }
+
+       iflags = MCI_CMDRDY;
+       cmd = mrq->cmd;
+       cmdflags = atmci_prepare_command(mmc, cmd);
+       atmci_start_command(host, cmd, cmdflags);
+
+       if (data)
+               iflags |= atmci_submit_data(mmc, data);
+
+       if (mrq->stop) {
+               host->stop_cmdr = atmci_prepare_command(mmc, mrq->stop);
+               host->stop_cmdr |= MCI_CMDR_STOP_XFER;
+               if (!(data->flags & MMC_DATA_WRITE))
+                       host->stop_cmdr |= MCI_CMDR_TRDIR_READ;
+               if (data->flags & MMC_DATA_STREAM)
+                       host->stop_cmdr |= MCI_CMDR_STREAM;
+               else
+                       host->stop_cmdr |= MCI_CMDR_MULTI_BLOCK;
+       }
+
+       /*
+        * We could have enabled interrupts earlier, but I suspect
+        * that would open up a nice can of interesting race
+        * conditions (e.g. command and data complete, but stop not
+        * prepared yet.)
+        */
+       mci_writel(host, IER, iflags);
+
+       return;
+
+fail:
+       atmci_disable(host);
+       host->mrq = NULL;
+       mrq->cmd->error = -EINVAL;
+       mmc_request_done(mmc, mrq);
+}
+
+static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+       struct atmel_mci        *host = mmc_priv(mmc);
+
+       if (ios->clock) {
+               u32 clkdiv;
+
+               /* Set clock rate */
+               clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * ios->clock) - 1;
+               if (clkdiv > 255) {
+                       dev_warn(&mmc->class_dev,
+                               "clock %u too slow; using %lu\n",
+                               ios->clock, host->bus_hz / (2 * 256));
+                       clkdiv = 255;
+               }
+
+               host->mode_reg = MCI_MR_CLKDIV(clkdiv) | MCI_MR_WRPROOF
+                                       | MCI_MR_RDPROOF;
+       }
+
+       switch (ios->bus_width) {
+       case MMC_BUS_WIDTH_1:
+               host->sdc_reg = 0;
+               break;
+       case MMC_BUS_WIDTH_4:
+               host->sdc_reg = MCI_SDCBUS_4BIT;
+               break;
+       }
+
+       switch (ios->power_mode) {
+       case MMC_POWER_ON:
+               /* Send init sequence (74 clock cycles) */
+               atmci_enable(host);
+               mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT);
+               while (!(mci_readl(host, SR) & MCI_CMDRDY))
+                       cpu_relax();
+               atmci_disable(host);
+               break;
+       default:
+               /*
+                * TODO: None of the currently available AVR32-based
+                * boards allow MMC power to be turned off. Implement
+                * power control when this can be tested properly.
+                */
+               break;
+       }
+}
+
+static int atmci_get_ro(struct mmc_host *mmc)
+{
+       int                     read_only = 0;
+       struct atmel_mci        *host = mmc_priv(mmc);
+
+       if (host->wp_pin >= 0) {
+               read_only = gpio_get_value(host->wp_pin);
+               dev_dbg(&mmc->class_dev, "card is %s\n",
+                               read_only ? "read-only" : "read-write");
+       } else {
+               dev_dbg(&mmc->class_dev,
+                       "no pin for checking read-only switch."
+                       " Assuming write-enable.\n");
+       }
+
+       return read_only;
+}
+
+static struct mmc_host_ops atmci_ops = {
+       .request        = atmci_request,
+       .set_ios        = atmci_set_ios,
+       .get_ro         = atmci_get_ro,
+};
+
+static void atmci_command_complete(struct atmel_mci *host,
+                       struct mmc_command *cmd, u32 status)
+{
+       /* Read the response from the card (up to 16 bytes) */
+       cmd->resp[0] = mci_readl(host, RSPR);
+       cmd->resp[1] = mci_readl(host, RSPR);
+       cmd->resp[2] = mci_readl(host, RSPR);
+       cmd->resp[3] = mci_readl(host, RSPR);
+
+       if (status & MCI_RTOE)
+               cmd->error = -ETIMEDOUT;
+       else if ((cmd->flags & MMC_RSP_CRC) && (status & MCI_RCRCE))
+               cmd->error = -EILSEQ;
+       else if (status & (MCI_RINDE | MCI_RDIRE | MCI_RENDE))
+               cmd->error = -EIO;
+       else
+               cmd->error = 0;
+
+       if (cmd->error) {
+               dev_dbg(&host->mmc->class_dev,
+                       "command error: status=0x%08x\n", status);
+
+               if (cmd->data) {
+                       host->data = NULL;
+                       mci_writel(host, IDR, MCI_NOTBUSY
+                                       | MCI_TXRDY | MCI_RXRDY
+                                       | ATMCI_DATA_ERROR_FLAGS);
+               }
+       }
+}
+
+static void atmci_detect_change(unsigned long data)
+{
+       struct atmel_mci *host = (struct atmel_mci *)data;
+       struct mmc_request *mrq = host->mrq;
+       int present;
+
+       /*
+        * atmci_remove() sets detect_pin to -1 before freeing the
+        * interrupt. We must not re-enable the interrupt if it has
+        * been freed.
+        */
+       smp_rmb();
+       if (host->detect_pin < 0)
+               return;
+
+       enable_irq(gpio_to_irq(host->detect_pin));
+       present = !gpio_get_value(host->detect_pin);
+
+       dev_vdbg(&host->pdev->dev, "detect change: %d (was %d)\n",
+                       present, host->present);
+
+       if (present != host->present) {
+               dev_dbg(&host->mmc->class_dev, "card %s\n",
+                       present ? "inserted" : "removed");
+               host->present = present;
+
+               /* Reset controller if card is gone */
+               if (!present) {
+                       mci_writel(host, CR, MCI_CR_SWRST);
+                       mci_writel(host, IDR, ~0UL);
+                       mci_writel(host, CR, MCI_CR_MCIEN);
+               }
+
+               /* Clean up queue if present */
+               if (mrq) {
+                       /*
+                        * Reset controller to terminate any ongoing
+                        * commands or data transfers.
+                        */
+                       mci_writel(host, CR, MCI_CR_SWRST);
+
+                       if (!atmci_is_completed(host, EVENT_CMD_COMPLETE))
+                               mrq->cmd->error = -ENOMEDIUM;
+
+                       if (mrq->data && !atmci_is_completed(host,
+                                               EVENT_DATA_COMPLETE)) {
+                               host->data = NULL;
+                               mrq->data->error = -ENOMEDIUM;
+                       }
+                       if (mrq->stop && !atmci_is_completed(host,
+                                               EVENT_STOP_COMPLETE))
+                               mrq->stop->error = -ENOMEDIUM;
+
+                       host->cmd = NULL;
+                       atmci_request_end(host->mmc, mrq);
+               }
+
+               mmc_detect_change(host->mmc, 0);
+       }
+}
+
+static void atmci_tasklet_func(unsigned long priv)
+{
+       struct mmc_host         *mmc = (struct mmc_host *)priv;
+       struct atmel_mci        *host = mmc_priv(mmc);
+       struct mmc_request      *mrq = host->mrq;
+       struct mmc_data         *data = host->data;
+
+       dev_vdbg(&mmc->class_dev,
+               "tasklet: pending/completed/mask %lx/%lx/%x\n",
+               host->pending_events, host->completed_events,
+               mci_readl(host, IMR));
+
+       if (atmci_test_and_clear_pending(host, EVENT_CMD_COMPLETE)) {
+               /*
+                * host->cmd must be set to NULL before the interrupt
+                * handler sees EVENT_CMD_COMPLETE
+                */
+               host->cmd = NULL;
+               smp_wmb();
+               atmci_set_completed(host, EVENT_CMD_COMPLETE);
+               atmci_command_complete(host, mrq->cmd, host->cmd_status);
+
+               if (!mrq->cmd->error && mrq->stop
+                               && atmci_is_completed(host, EVENT_XFER_COMPLETE)
+                               && !atmci_test_and_set_completed(host,
+                                       EVENT_STOP_SENT))
+                       send_stop_cmd(host->mmc, mrq->data);
+       }
+       if (atmci_test_and_clear_pending(host, EVENT_STOP_COMPLETE)) {
+               /*
+                * host->cmd must be set to NULL before the interrupt
+                * handler sees EVENT_STOP_COMPLETE
+                */
+               host->cmd = NULL;
+               smp_wmb();
+               atmci_set_completed(host, EVENT_STOP_COMPLETE);
+               atmci_command_complete(host, mrq->stop, host->stop_status);
+       }
+       if (atmci_test_and_clear_pending(host, EVENT_DATA_ERROR)) {
+               u32 status = host->data_status;
+
+               dev_vdbg(&mmc->class_dev, "data error: status=%08x\n", status);
+
+               atmci_set_completed(host, EVENT_DATA_ERROR);
+               atmci_set_completed(host, EVENT_DATA_COMPLETE);
+
+               if (status & MCI_DTOE) {
+                       dev_dbg(&mmc->class_dev,
+                                       "data timeout error\n");
+                       data->error = -ETIMEDOUT;
+               } else if (status & MCI_DCRCE) {
+                       dev_dbg(&mmc->class_dev, "data CRC error\n");
+                       data->error = -EILSEQ;
+               } else {
+                       dev_dbg(&mmc->class_dev,
+                                       "data FIFO error (status=%08x)\n",
+                                       status);
+                       data->error = -EIO;
+               }
+
+               if (host->present && data->stop
+                               && atmci_is_completed(host, EVENT_CMD_COMPLETE)
+                               && !atmci_test_and_set_completed(
+                                       host, EVENT_STOP_SENT))
+                       send_stop_cmd(host->mmc, data);
+
+               host->data = NULL;
+       }
+       if (atmci_test_and_clear_pending(host, EVENT_DATA_COMPLETE)) {
+               atmci_set_completed(host, EVENT_DATA_COMPLETE);
+
+               if (!atmci_is_completed(host, EVENT_DATA_ERROR)) {
+                       data->bytes_xfered = data->blocks * data->blksz;
+                       data->error = 0;
+               }
+
+               host->data = NULL;
+       }
+
+       if (host->mrq && !host->cmd && !host->data)
+               atmci_request_end(mmc, host->mrq);
+}
+
+static void atmci_read_data_pio(struct atmel_mci *host)
+{
+       struct scatterlist      *sg = host->sg;
+       void                    *buf = sg_virt(sg);
+       unsigned int            offset = host->pio_offset;
+       struct mmc_data         *data = host->data;
+       u32                     value;
+       u32                     status;
+       unsigned int            nbytes = 0;
+
+       do {
+               value = mci_readl(host, RDR);
+               if (likely(offset + 4 <= sg->length)) {
+                       put_unaligned(value, (u32 *)(buf + offset));
+
+                       offset += 4;
+                       nbytes += 4;
+
+                       if (offset == sg->length) {
+                               host->sg = sg = sg_next(sg);
+                               if (!sg)
+                                       goto done;
+
+                               offset = 0;
+                               buf = sg_virt(sg);
+                       }
+               } else {
+                       unsigned int remaining = sg->length - offset;
+                       memcpy(buf + offset, &value, remaining);
+                       nbytes += remaining;
+
+                       flush_dcache_page(sg_page(sg));
+                       host->sg = sg = sg_next(sg);
+                       if (!sg)
+                               goto done;
+
+                       offset = 4 - remaining;
+                       buf = sg_virt(sg);
+                       memcpy(buf, (u8 *)&value + remaining, offset);
+                       nbytes += offset;
+               }
+
+               status = mci_readl(host, SR);
+               if (status & ATMCI_DATA_ERROR_FLAGS) {
+                       mci_writel(host, IDR, (MCI_NOTBUSY | MCI_RXRDY
+                                               | ATMCI_DATA_ERROR_FLAGS));
+                       host->data_status = status;
+                       atmci_set_pending(host, EVENT_DATA_ERROR);
+                       tasklet_schedule(&host->tasklet);
+                       break;
+               }
+       } while (status & MCI_RXRDY);
+
+       host->pio_offset = offset;
+       data->bytes_xfered += nbytes;
+
+       return;
+
+done:
+       mci_writel(host, IDR, MCI_RXRDY);
+       mci_writel(host, IER, MCI_NOTBUSY);
+       data->bytes_xfered += nbytes;
+       atmci_set_completed(host, EVENT_XFER_COMPLETE);
+       if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE)
+                       && !atmci_test_and_set_completed(host, EVENT_STOP_SENT))
+               send_stop_cmd(host->mmc, data);
+}
+
+static void atmci_write_data_pio(struct atmel_mci *host)
+{
+       struct scatterlist      *sg = host->sg;
+       void                    *buf = sg_virt(sg);
+       unsigned int            offset = host->pio_offset;
+       struct mmc_data         *data = host->data;
+       u32                     value;
+       u32                     status;
+       unsigned int            nbytes = 0;
+
+       do {
+               if (likely(offset + 4 <= sg->length)) {
+                       value = get_unaligned((u32 *)(buf + offset));
+                       mci_writel(host, TDR, value);
+
+                       offset += 4;
+                       nbytes += 4;
+                       if (offset == sg->length) {
+                               host->sg = sg = sg_next(sg);
+                               if (!sg)
+                                       goto done;
+
+                               offset = 0;
+                               buf = sg_virt(sg);
+                       }
+               } else {
+                       unsigned int remaining = sg->length - offset;
+
+                       value = 0;
+                       memcpy(&value, buf + offset, remaining);
+                       nbytes += remaining;
+
+                       host->sg = sg = sg_next(sg);
+                       if (!sg) {
+                               mci_writel(host, TDR, value);
+                               goto done;
+                       }
+
+                       offset = 4 - remaining;
+                       buf = sg_virt(sg);
+                       memcpy((u8 *)&value + remaining, buf, offset);
+                       mci_writel(host, TDR, value);
+                       nbytes += offset;
+               }
+
+               status = mci_readl(host, SR);
+               if (status & ATMCI_DATA_ERROR_FLAGS) {
+                       mci_writel(host, IDR, (MCI_NOTBUSY | MCI_TXRDY
+                                               | ATMCI_DATA_ERROR_FLAGS));
+                       host->data_status = status;
+                       atmci_set_pending(host, EVENT_DATA_ERROR);
+                       tasklet_schedule(&host->tasklet);
+                       break;
+               }
+       } while (status & MCI_TXRDY);
+
+       host->pio_offset = offset;
+       data->bytes_xfered += nbytes;
+
+       return;
+
+done:
+       mci_writel(host, IDR, MCI_TXRDY);
+       mci_writel(host, IER, MCI_NOTBUSY);
+       data->bytes_xfered += nbytes;
+       atmci_set_completed(host, EVENT_XFER_COMPLETE);
+       if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE)
+                       && !atmci_test_and_set_completed(host, EVENT_STOP_SENT))
+               send_stop_cmd(host->mmc, data);
+}
+
+static void atmci_cmd_interrupt(struct mmc_host *mmc, u32 status)
+{
+       struct atmel_mci        *host = mmc_priv(mmc);
+
+       mci_writel(host, IDR, MCI_CMDRDY);
+
+       if (atmci_is_completed(host, EVENT_STOP_SENT)) {
+               host->stop_status = status;
+               atmci_set_pending(host, EVENT_STOP_COMPLETE);
+       } else {
+               host->cmd_status = status;
+               atmci_set_pending(host, EVENT_CMD_COMPLETE);
+       }
+
+       tasklet_schedule(&host->tasklet);
+}
+
+static irqreturn_t atmci_interrupt(int irq, void *dev_id)
+{
+       struct mmc_host         *mmc = dev_id;
+       struct atmel_mci        *host = mmc_priv(mmc);
+       u32                     status, mask, pending;
+       unsigned int            pass_count = 0;
+
+       spin_lock(&mmc->lock);
+
+       do {
+               status = mci_readl(host, SR);
+               mask = mci_readl(host, IMR);
+               pending = status & mask;
+               if (!pending)
+                       break;
+
+               if (pending & ATMCI_DATA_ERROR_FLAGS) {
+                       mci_writel(host, IDR, ATMCI_DATA_ERROR_FLAGS
+                                       | MCI_RXRDY | MCI_TXRDY);
+                       pending &= mci_readl(host, IMR);
+                       host->data_status = status;
+                       atmci_set_pending(host, EVENT_DATA_ERROR);
+                       tasklet_schedule(&host->tasklet);
+               }
+               if (pending & MCI_NOTBUSY) {
+                       mci_writel(host, IDR, (MCI_NOTBUSY
+                                              | ATMCI_DATA_ERROR_FLAGS));
+                       atmci_set_pending(host, EVENT_DATA_COMPLETE);
+                       tasklet_schedule(&host->tasklet);
+               }
+               if (pending & MCI_RXRDY)
+                       atmci_read_data_pio(host);
+               if (pending & MCI_TXRDY)
+                       atmci_write_data_pio(host);
+
+               if (pending & MCI_CMDRDY)
+                       atmci_cmd_interrupt(mmc, status);
+       } while (pass_count++ < 5);
+
+       spin_unlock(&mmc->lock);
+
+       return pass_count ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id)
+{
+       struct mmc_host         *mmc = dev_id;
+       struct atmel_mci        *host = mmc_priv(mmc);
+
+       /*
+        * Disable interrupts until the pin has stabilized and check
+        * the state then. Use mod_timer() since we may be in the
+        * middle of the timer routine when this interrupt triggers.
+        */
+       disable_irq_nosync(irq);
+       mod_timer(&host->detect_timer, jiffies + msecs_to_jiffies(20));
+
+       return IRQ_HANDLED;
+}
+
+static int __init atmci_probe(struct platform_device *pdev)
+{
+       struct mci_platform_data        *pdata;
+       struct atmel_mci *host;
+       struct mmc_host *mmc;
+       struct resource *regs;
+       int irq;
+       int ret;
+
+       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!regs)
+               return -ENXIO;
+       pdata = pdev->dev.platform_data;
+       if (!pdata)
+               return -ENXIO;
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return irq;
+
+       mmc = mmc_alloc_host(sizeof(struct atmel_mci), &pdev->dev);
+       if (!mmc)
+               return -ENOMEM;
+
+       host = mmc_priv(mmc);
+       host->pdev = pdev;
+       host->mmc = mmc;
+       host->detect_pin = pdata->detect_pin;
+       host->wp_pin = pdata->wp_pin;
+
+       host->mck = clk_get(&pdev->dev, "mci_clk");
+       if (IS_ERR(host->mck)) {
+               ret = PTR_ERR(host->mck);
+               goto err_clk_get;
+       }
+
+       ret = -ENOMEM;
+       host->regs = ioremap(regs->start, regs->end - regs->start + 1);
+       if (!host->regs)
+               goto err_ioremap;
+
+       clk_enable(host->mck);
+       mci_writel(host, CR, MCI_CR_SWRST);
+       host->bus_hz = clk_get_rate(host->mck);
+       clk_disable(host->mck);
+
+       host->mapbase = regs->start;
+
+       mmc->ops = &atmci_ops;
+       mmc->f_min = (host->bus_hz + 511) / 512;
+       mmc->f_max = host->bus_hz / 2;
+       mmc->ocr_avail  = MMC_VDD_32_33 | MMC_VDD_33_34;
+       mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+       mmc->max_hw_segs = 64;
+       mmc->max_phys_segs = 64;
+       mmc->max_req_size = 32768 * 512;
+       mmc->max_blk_size = 32768;
+       mmc->max_blk_count = 512;
+
+       tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)mmc);
+
+       ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, mmc);
+       if (ret)
+               goto err_request_irq;
+
+       /* Assume card is present if we don't have a detect pin */
+       host->present = 1;
+       if (host->detect_pin >= 0) {
+               if (gpio_request(host->detect_pin, "mmc_detect")) {
+                       dev_dbg(&mmc->class_dev, "no detect pin available\n");
+                       host->detect_pin = -1;
+               } else {
+                       host->present = !gpio_get_value(host->detect_pin);
+               }
+       }
+       if (host->wp_pin >= 0) {
+               if (gpio_request(host->wp_pin, "mmc_wp")) {
+                       dev_dbg(&mmc->class_dev, "no WP pin available\n");
+                       host->wp_pin = -1;
+               }
+       }
+
+       platform_set_drvdata(pdev, host);
+
+       mmc_add_host(mmc);
+
+       if (host->detect_pin >= 0) {
+               setup_timer(&host->detect_timer, atmci_detect_change,
+                               (unsigned long)host);
+
+               ret = request_irq(gpio_to_irq(host->detect_pin),
+                               atmci_detect_interrupt,
+                               IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+                               "mmc-detect", mmc);
+               if (ret) {
+                       dev_dbg(&mmc->class_dev,
+                               "could not request IRQ %d for detect pin\n",
+                               gpio_to_irq(host->detect_pin));
+                       gpio_free(host->detect_pin);
+                       host->detect_pin = -1;
+               }
+       }
+
+       dev_info(&mmc->class_dev,
+                       "Atmel MCI controller at 0x%08lx irq %d\n",
+                       host->mapbase, irq);
+
+       return 0;
+
+err_request_irq:
+       iounmap(host->regs);
+err_ioremap:
+       clk_put(host->mck);
+err_clk_get:
+       mmc_free_host(mmc);
+       return ret;
+}
+
+static int __exit atmci_remove(struct platform_device *pdev)
+{
+       struct atmel_mci *host = platform_get_drvdata(pdev);
+
+       platform_set_drvdata(pdev, NULL);
+
+       if (host) {
+               if (host->detect_pin >= 0) {
+                       int pin = host->detect_pin;
+
+                       /* Make sure the timer doesn't enable the interrupt */
+                       host->detect_pin = -1;
+                       smp_wmb();
+
+                       free_irq(gpio_to_irq(pin), host->mmc);
+                       del_timer_sync(&host->detect_timer);
+                       gpio_free(pin);
+               }
+
+               mmc_remove_host(host->mmc);
+
+               clk_enable(host->mck);
+               mci_writel(host, IDR, ~0UL);
+               mci_writel(host, CR, MCI_CR_MCIDIS);
+               mci_readl(host, SR);
+               clk_disable(host->mck);
+
+               if (host->wp_pin >= 0)
+                       gpio_free(host->wp_pin);
+
+               free_irq(platform_get_irq(pdev, 0), host->mmc);
+               iounmap(host->regs);
+
+               clk_put(host->mck);
+
+               mmc_free_host(host->mmc);
+       }
+       return 0;
+}
+
+static struct platform_driver atmci_driver = {
+       .remove         = __exit_p(atmci_remove),
+       .driver         = {
+               .name           = "atmel_mci",
+       },
+};
+
+static int __init atmci_init(void)
+{
+       return platform_driver_probe(&atmci_driver, atmci_probe);
+}
+
+static void __exit atmci_exit(void)
+{
+       platform_driver_unregister(&atmci_driver);
+}
+
+module_init(atmci_init);
+module_exit(atmci_exit);
+
+MODULE_DESCRIPTION("Atmel Multimedia Card Interface driver");
+MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c

index cc5f7bc546afc95d5379faca5634a1f439363e50..3f15eb204895f25cfaa75af21e8ee59dca16b8f2 100644 (file)
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -21,7 +21,7 @@
   * published by the Free Software Foundation.
   */
  
-/* Why is a timer used to detect insert events?
+/* Why don't we use the SD controllers' carddetect feature?
   *
   * From the AU1100 MMC application guide:
   * If the Au1100-based design is intended to support both MultiMediaCards
@@ -30,8 +30,6 @@
   * In doing so, a MMC card never enters SPI-mode communications,
   * but now the SecureDigital card-detect feature of CD/DAT3 is ineffective
   * (the low to high transition will not occur).
- *
- * So we use the timer to check the status manually.
   */
  
  #include <linux/module.h>
@@ -41,51 +39,110 @@
  #include <linux/interrupt.h>
  #include <linux/dma-mapping.h>
  #include <linux/scatterlist.h>
-
+#include <linux/leds.h>
  #include <linux/mmc/host.h>
+
  #include <asm/io.h>
  #include <asm/mach-au1x00/au1000.h>
  #include <asm/mach-au1x00/au1xxx_dbdma.h>
  #include <asm/mach-au1x00/au1100_mmc.h>
  
-#include <au1xxx.h>
-#include "au1xmmc.h"
-
  #define DRIVER_NAME "au1xxx-mmc"
  
  /* Set this to enable special debugging macros */
+/* #define DEBUG */
  
  #ifdef DEBUG
-#define DBG(fmt, idx, args...) printk("au1xx(%d): DEBUG: " fmt, idx, ##args)
+#define DBG(fmt, idx, args...) \
+       printk(KERN_DEBUG "au1xmmc(%d): DEBUG: " fmt, idx, ##args)
  #else
-#define DBG(fmt, idx, args...)
+#define DBG(fmt, idx, args...) do {} while (0)
  #endif
  
-const struct {
+/* Hardware definitions */
+#define AU1XMMC_DESCRIPTOR_COUNT 1
+#define AU1XMMC_DESCRIPTOR_SIZE  2048
+
+#define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \
+                    MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \
+                    MMC_VDD_33_34 | MMC_VDD_34_35 | MMC_VDD_35_36)
+
+/* This gives us a hard value for the stop command that we can write directly
+ * to the command register.
+ */
+#define STOP_CMD       \
+       (SD_CMD_RT_1B | SD_CMD_CT_7 | (0xC << SD_CMD_CI_SHIFT) | SD_CMD_GO)
+
+/* This is the set of interrupts that we configure by default. */
+#define AU1XMMC_INTERRUPTS                             \
+       (SD_CONFIG_SC | SD_CONFIG_DT | SD_CONFIG_RAT |  \
+        SD_CONFIG_CR | SD_CONFIG_I)
+
+/* The poll event (looking for insert/remove events runs twice a second. */
+#define AU1XMMC_DETECT_TIMEOUT (HZ/2)
+
+struct au1xmmc_host {
+       struct mmc_host *mmc;
+       struct mmc_request *mrq;
+
+       u32 flags;
         u32 iobase;
-       u32 tx_devid, rx_devid;
-       u16 bcsrpwr;
-       u16 bcsrstatus;
-       u16 wpstatus;
-} au1xmmc_card_table[] = {
-       { SD0_BASE, DSCR_CMD0_SDMS_TX0, DSCR_CMD0_SDMS_RX0,
-         BCSR_BOARD_SD0PWR, BCSR_INT_SD0INSERT, BCSR_STATUS_SD0WP },
-#ifndef CONFIG_MIPS_DB1200
-       { SD1_BASE, DSCR_CMD0_SDMS_TX1, DSCR_CMD0_SDMS_RX1,
-         BCSR_BOARD_DS1PWR, BCSR_INT_SD1INSERT, BCSR_STATUS_SD1WP }
-#endif
-};
+       u32 clock;
+       u32 bus_width;
+       u32 power_mode;
  
-#define AU1XMMC_CONTROLLER_COUNT (ARRAY_SIZE(au1xmmc_card_table))
+       int status;
  
-/* This array stores pointers for the hosts (used by the IRQ handler) */
-struct au1xmmc_host *au1xmmc_hosts[AU1XMMC_CONTROLLER_COUNT];
-static int dma = 1;
+       struct {
+               int len;
+               int dir;
+       } dma;
  
-#ifdef MODULE
-module_param(dma, bool, 0);
-MODULE_PARM_DESC(dma, "Use DMA engine for data transfers (0 = disabled)");
-#endif
+       struct {
+               int index;
+               int offset;
+               int len;
+       } pio;
+
+       u32 tx_chan;
+       u32 rx_chan;
+
+       int irq;
+
+       struct tasklet_struct finish_task;
+       struct tasklet_struct data_task;
+       struct au1xmmc_platform_data *platdata;
+       struct platform_device *pdev;
+       struct resource *ioarea;
+};
+
+/* Status flags used by the host structure */
+#define HOST_F_XMIT    0x0001
+#define HOST_F_RECV    0x0002
+#define HOST_F_DMA     0x0010
+#define HOST_F_ACTIVE  0x0100
+#define HOST_F_STOP    0x1000
+
+#define HOST_S_IDLE    0x0001
+#define HOST_S_CMD     0x0002
+#define HOST_S_DATA    0x0003
+#define HOST_S_STOP    0x0004
+
+/* Easy access macros */
+#define HOST_STATUS(h) ((h)->iobase + SD_STATUS)
+#define HOST_CONFIG(h) ((h)->iobase + SD_CONFIG)
+#define HOST_ENABLE(h) ((h)->iobase + SD_ENABLE)
+#define HOST_TXPORT(h) ((h)->iobase + SD_TXPORT)
+#define HOST_RXPORT(h) ((h)->iobase + SD_RXPORT)
+#define HOST_CMDARG(h) ((h)->iobase + SD_CMDARG)
+#define HOST_BLKSIZE(h)        ((h)->iobase + SD_BLKSIZE)
+#define HOST_CMD(h)    ((h)->iobase + SD_CMD)
+#define HOST_CONFIG2(h)        ((h)->iobase + SD_CONFIG2)
+#define HOST_TIMEOUT(h)        ((h)->iobase + SD_TIMEOUT)
+#define HOST_DEBUG(h)  ((h)->iobase + SD_DEBUG)
+
+#define DMA_CHANNEL(h) \
+       (((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan)
  
  static inline void IRQ_ON(struct au1xmmc_host *host, u32 mask)
  {
@@ -119,14 +176,13 @@ static inline void IRQ_OFF(struct au1xmmc_host *host, u32 mask)
  
  static inline void SEND_STOP(struct au1xmmc_host *host)
  {
-
-       /* We know the value of CONFIG2, so avoid a read we don't need */
-       u32 mask = SD_CONFIG2_EN;
+       u32 config2;
  
         WARN_ON(host->status != HOST_S_DATA);
         host->status = HOST_S_STOP;
  
-       au_writel(mask | SD_CONFIG2_DF, HOST_CONFIG2(host));
+       config2 = au_readl(HOST_CONFIG2(host));
+       au_writel(config2 | SD_CONFIG2_DF, HOST_CONFIG2(host));
         au_sync();
  
         /* Send the stop commmand */
@@ -135,35 +191,36 @@ static inline void SEND_STOP(struct au1xmmc_host *host)
  
  static void au1xmmc_set_power(struct au1xmmc_host *host, int state)
  {
-
-       u32 val = au1xmmc_card_table[host->id].bcsrpwr;
-
-       bcsr->board &= ~val;
-       if (state) bcsr->board |= val;
-
-       au_sync_delay(1);
+       if (host->platdata && host->platdata->set_power)
+               host->platdata->set_power(host->mmc, state);
  }
  
-static inline int au1xmmc_card_inserted(struct au1xmmc_host *host)
+static int au1xmmc_card_inserted(struct mmc_host *mmc)
  {
-       return (bcsr->sig_status & au1xmmc_card_table[host->id].bcsrstatus)
-               ? 1 : 0;
+       struct au1xmmc_host *host = mmc_priv(mmc);
+
+       if (host->platdata && host->platdata->card_inserted)
+               return !!host->platdata->card_inserted(host->mmc);
+
+       return -ENOSYS;
  }
  
  static int au1xmmc_card_readonly(struct mmc_host *mmc)
  {
         struct au1xmmc_host *host = mmc_priv(mmc);
-       return (bcsr->status & au1xmmc_card_table[host->id].wpstatus)
-               ? 1 : 0;
+
+       if (host->platdata && host->platdata->card_readonly)
+               return !!host->platdata->card_readonly(mmc);
+
+       return -ENOSYS;
  }
  
  static void au1xmmc_finish_request(struct au1xmmc_host *host)
  {
-
         struct mmc_request *mrq = host->mrq;
  
         host->mrq = NULL;
-       host->flags &= HOST_F_ACTIVE;
+       host->flags &= HOST_F_ACTIVE | HOST_F_DMA;
  
         host->dma.len = 0;
         host->dma.dir = 0;
@@ -174,8 +231,6 @@ static void au1xmmc_finish_request(struct au1xmmc_host *host)
  
         host->status = HOST_S_IDLE;
  
-       bcsr->disk_leds |= (1 << 8);
-
         mmc_request_done(host->mmc, mrq);
  }
  
@@ -235,18 +290,14 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait,
         au_sync();
  
         /* Wait for the command to go on the line */
-
-       while(1) {
-               if (!(au_readl(HOST_CMD(host)) & SD_CMD_GO))
-                       break;
-       }
+       while (au_readl(HOST_CMD(host)) & SD_CMD_GO)
+               /* nop */;
  
         /* Wait for the command to come back */
-
         if (wait) {
                 u32 status = au_readl(HOST_STATUS(host));
  
-               while(!(status & SD_STATUS_CR))
+               while (!(status & SD_STATUS_CR))
                         status = au_readl(HOST_STATUS(host));
  
                 /* Clear the CR status */
@@ -260,12 +311,11 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait,
  
  static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status)
  {
-
         struct mmc_request *mrq = host->mrq;
         struct mmc_data *data;
         u32 crc;
  
-       WARN_ON(host->status != HOST_S_DATA && host->status != HOST_S_STOP);
+       WARN_ON((host->status != HOST_S_DATA) && (host->status != HOST_S_STOP));
  
         if (host->mrq == NULL)
                 return;
@@ -276,15 +326,13 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status)
                 status = au_readl(HOST_STATUS(host));
  
         /* The transaction is really over when the SD_STATUS_DB bit is clear */
-
-       while((host->flags & HOST_F_XMIT) && (status & SD_STATUS_DB))
+       while ((host->flags & HOST_F_XMIT) && (status & SD_STATUS_DB))
                 status = au_readl(HOST_STATUS(host));
  
         data->error = 0;
         dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, host->dma.dir);
  
          /* Process any errors */
-
         crc = (status & (SD_STATUS_WC | SD_STATUS_RC));
         if (host->flags & HOST_F_XMIT)
                 crc |= ((status & 0x07) == 0x02) ? 0 : 1;
@@ -299,16 +347,16 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status)
  
         if (!data->error) {
                 if (host->flags & HOST_F_DMA) {
+#ifdef CONFIG_SOC_AU1200       /* DBDMA */
                         u32 chan = DMA_CHANNEL(host);
  
-                       chan_tab_t *c = *((chan_tab_t **) chan);
+                       chan_tab_t *c = *((chan_tab_t **)chan);
                         au1x_dma_chan_t *cp = c->chan_ptr;
                         data->bytes_xfered = cp->ddma_bytecnt;
-               }
-               else
+#endif
+               } else
                         data->bytes_xfered =
-                               (data->blocks * data->blksz) -
-                               host->pio.len;
+                               (data->blocks * data->blksz) - host->pio.len;
         }
  
         au1xmmc_finish_request(host);
@@ -316,7 +364,7 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status)
  
  static void au1xmmc_tasklet_data(unsigned long param)
  {
-       struct au1xmmc_host *host = (struct au1xmmc_host *) param;
+       struct au1xmmc_host *host = (struct au1xmmc_host *)param;
  
         u32 status = au_readl(HOST_STATUS(host));
         au1xmmc_data_complete(host, status);
@@ -326,11 +374,10 @@ static void au1xmmc_tasklet_data(unsigned long param)
  
  static void au1xmmc_send_pio(struct au1xmmc_host *host)
  {
-
-       struct mmc_data *data = 0;
-       int sg_len, max, count = 0;
-       unsigned char *sg_ptr;
-       u32 status = 0;
+       struct mmc_data *data;
+       int sg_len, max, count;
+       unsigned char *sg_ptr, val;
+       u32 status;
         struct scatterlist *sg;
  
         data = host->mrq->data;
@@ -345,14 +392,12 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host)
         /* This is the space left inside the buffer */
         sg_len = data->sg[host->pio.index].length - host->pio.offset;
  
-       /* Check to if we need less then the size of the sg_buffer */
-
+       /* Check if we need less than the size of the sg_buffer */
         max = (sg_len > host->pio.len) ? host->pio.len : sg_len;
-       if (max > AU1XMMC_MAX_TRANSFER) max = AU1XMMC_MAX_TRANSFER;
-
-       for(count = 0; count < max; count++ ) {
-               unsigned char val;
+       if (max > AU1XMMC_MAX_TRANSFER)
+               max = AU1XMMC_MAX_TRANSFER;
  
+       for (count = 0; count < max; count++) {
                 status = au_readl(HOST_STATUS(host));
  
                 if (!(status & SD_STATUS_TH))
@@ -360,7 +405,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host)
  
                 val = *sg_ptr++;
  
-               au_writel((unsigned long) val, HOST_TXPORT(host));
+               au_writel((unsigned long)val, HOST_TXPORT(host));
                 au_sync();
         }
  
@@ -384,11 +429,10 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host)
  
  static void au1xmmc_receive_pio(struct au1xmmc_host *host)
  {
-
-       struct mmc_data *data = 0;
-       int sg_len = 0, max = 0, count = 0;
-       unsigned char *sg_ptr = 0;
-       u32 status = 0;
+       struct mmc_data *data;
+       int max, count, sg_len = 0;
+       unsigned char *sg_ptr = NULL;
+       u32 status, val;
         struct scatterlist *sg;
  
         data = host->mrq->data;
@@ -405,33 +449,33 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)
                 /* This is the space left inside the buffer */
                 sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset;
  
-               /* Check to if we need less then the size of the sg_buffer */
-               if (sg_len < max) max = sg_len;
+               /* Check if we need less than the size of the sg_buffer */
+               if (sg_len < max)
+                       max = sg_len;
         }
  
         if (max > AU1XMMC_MAX_TRANSFER)
                 max = AU1XMMC_MAX_TRANSFER;
  
-       for(count = 0; count < max; count++ ) {
-               u32 val;
+       for (count = 0; count < max; count++) {
                 status = au_readl(HOST_STATUS(host));
  
                 if (!(status & SD_STATUS_NE))
                         break;
  
                 if (status & SD_STATUS_RC) {
-                       DBG("RX CRC Error [%d + %d].\n", host->id,
+                       DBG("RX CRC Error [%d + %d].\n", host->pdev->id,
                                         host->pio.len, count);
                         break;
                 }
  
                 if (status & SD_STATUS_RO) {
-                       DBG("RX Overrun [%d + %d]\n", host->id,
+                       DBG("RX Overrun [%d + %d]\n", host->pdev->id,
                                         host->pio.len, count);
                         break;
                 }
                 else if (status & SD_STATUS_RU) {
-                       DBG("RX Underrun [%d + %d]\n", host->id,
+                       DBG("RX Underrun [%d + %d]\n", host->pdev->id,
                                         host->pio.len,  count);
                         break;
                 }
@@ -439,7 +483,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)
                 val = au_readl(HOST_RXPORT(host));
  
                 if (sg_ptr)
-                       *sg_ptr++ = (unsigned char) (val & 0xFF);
+                       *sg_ptr++ = (unsigned char)(val & 0xFF);
         }
  
         host->pio.len -= count;
@@ -451,7 +495,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)
         }
  
         if (host->pio.len == 0) {
-               //IRQ_OFF(host, SD_CONFIG_RA | SD_CONFIG_RF);
+               /* IRQ_OFF(host, SD_CONFIG_RA | SD_CONFIG_RF); */
                 IRQ_OFF(host, SD_CONFIG_NE);
  
                 if (host->flags & HOST_F_STOP)
@@ -461,17 +505,15 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)
         }
  }
  
-/* static void au1xmmc_cmd_complete
-   This is called when a command has been completed - grab the response
-   and check for errors.  Then start the data transfer if it is indicated.
-*/
-
+/* This is called when a command has been completed - grab the response
+ * and check for errors.  Then start the data transfer if it is indicated.
+ */
  static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
  {
-
         struct mmc_request *mrq = host->mrq;
         struct mmc_command *cmd;
-       int trans;
+       u32 r[4];
+       int i, trans;
  
         if (!host->mrq)
                 return;
@@ -481,9 +523,6 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
  
         if (cmd->flags & MMC_RSP_PRESENT) {
                 if (cmd->flags & MMC_RSP_136) {
-                       u32 r[4];
-                       int i;
-
                         r[0] = au_readl(host->iobase + SD_RESP3);
                         r[1] = au_readl(host->iobase + SD_RESP2);
                         r[2] = au_readl(host->iobase + SD_RESP1);
@@ -491,10 +530,9 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
  
                         /* The CRC is omitted from the response, so really
                          * we only got 120 bytes, but the engine expects
-                        * 128 bits, so we have to shift things up
+                        * 128 bits, so we have to shift things up.
                          */
-
-                       for(i = 0; i < 4; i++) {
+                       for (i = 0; i < 4; i++) {
                                 cmd->resp[i] = (r[i] & 0x00FFFFFF) << 8;
                                 if (i != 3)
                                         cmd->resp[i] |= (r[i + 1] & 0xFF000000) >> 24;
@@ -505,22 +543,20 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
                          * our response omits the CRC, our data ends up
                          * being shifted 8 bits to the right.  In this case,
                          * that means that the OSR data starts at bit 31,
-                        * so we can just read RESP0 and return that
+                        * so we can just read RESP0 and return that.
                          */
                         cmd->resp[0] = au_readl(host->iobase + SD_RESP0);
                 }
         }
  
          /* Figure out errors */
-
         if (status & (SD_STATUS_SC | SD_STATUS_WC | SD_STATUS_RC))
                 cmd->error = -EILSEQ;
  
         trans = host->flags & (HOST_F_XMIT | HOST_F_RECV);
  
         if (!trans || cmd->error) {
-
-               IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA|SD_CONFIG_RF);
+               IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA | SD_CONFIG_RF);
                 tasklet_schedule(&host->finish_task);
                 return;
         }
@@ -528,6 +564,7 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
         host->status = HOST_S_DATA;
  
         if (host->flags & HOST_F_DMA) {
+#ifdef CONFIG_SOC_AU1200       /* DBDMA */
                 u32 channel = DMA_CHANNEL(host);
  
                 /* Start the DMA as soon as the buffer gets something in it */
@@ -540,23 +577,21 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
                 }
  
                 au1xxx_dbdma_start(channel);
+#endif
         }
  }
  
  static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate)
  {
-
         unsigned int pbus = get_au1x00_speed();
         unsigned int divisor;
         u32 config;
  
         /* From databook:
-          divisor = ((((cpuclock / sbus_divisor) / 2) / mmcclock) / 2) - 1
-       */
-
+        * divisor = ((((cpuclock / sbus_divisor) / 2) / mmcclock) / 2) - 1
+        */
         pbus /= ((au_readl(SYS_POWERCTRL) & 0x3) + 2);
         pbus /= 2;
-
         divisor = ((pbus / rate) / 2) - 1;
  
         config = au_readl(HOST_CONFIG(host));
@@ -568,15 +603,11 @@ static void au1xmmc_set_clock(struct au1xmmc_host *host, int rate)
         au_sync();
  }
  
-static int
-au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
+static int au1xmmc_prepare_data(struct au1xmmc_host *host,
+                               struct mmc_data *data)
  {
-
         int datalen = data->blocks * data->blksz;
  
-       if (dma != 0)
-               host->flags |= HOST_F_DMA;
-
         if (data->flags & MMC_DATA_READ)
                 host->flags |= HOST_F_RECV;
         else
@@ -596,12 +627,13 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
         au_writel(data->blksz - 1, HOST_BLKSIZE(host));
  
         if (host->flags & HOST_F_DMA) {
+#ifdef CONFIG_SOC_AU1200       /* DBDMA */
                 int i;
                 u32 channel = DMA_CHANNEL(host);
  
                 au1xxx_dbdma_stop(channel);
  
-               for(i = 0; i < host->dma.len; i++) {
+               for (i = 0; i < host->dma.len; i++) {
                         u32 ret = 0, flags = DDMA_FLAGS_NOIE;
                         struct scatterlist *sg = &data->sg[i];
                         int sg_len = sg->length;
@@ -611,23 +643,21 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
                         if (i == host->dma.len - 1)
                                 flags = DDMA_FLAGS_IE;
  
-                       if (host->flags & HOST_F_XMIT){
-                               ret = au1xxx_dbdma_put_source_flags(channel,
-                                       (void *) sg_virt(sg), len, flags);
-                       }
-                       else {
-                               ret = au1xxx_dbdma_put_dest_flags(channel,
-                                       (void *) sg_virt(sg),
-                                       len, flags);
+                       if (host->flags & HOST_F_XMIT) {
+                               ret = au1xxx_dbdma_put_source_flags(channel,
+                                       (void *)sg_virt(sg), len, flags);
+                       } else {
+                               ret = au1xxx_dbdma_put_dest_flags(channel,
+                                       (void *)sg_virt(sg), len, flags);
                         }
  
-                       if (!ret)
+                       if (!ret)
                                 goto dataerr;
  
                         datalen -= len;
                 }
-       }
-       else {
+#endif
+       } else {
                 host->pio.index = 0;
                 host->pio.offset = 0;
                 host->pio.len = datalen;
@@ -636,25 +666,21 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
                         IRQ_ON(host, SD_CONFIG_TH);
                 else
                         IRQ_ON(host, SD_CONFIG_NE);
-                       //IRQ_ON(host, SD_CONFIG_RA|SD_CONFIG_RF);
+                       /* IRQ_ON(host, SD_CONFIG_RA | SD_CONFIG_RF); */
         }
  
         return 0;
  
- dataerr:
-       dma_unmap_sg(mmc_dev(host->mmc),data->sg,data->sg_len,host->dma.dir);
+dataerr:
+       dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+                       host->dma.dir);
         return -ETIMEDOUT;
  }
  
-/* static void au1xmmc_request
-   This actually starts a command or data transaction
-*/
-
+/* This actually starts a command or data transaction */
  static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq)
  {
-
         struct au1xmmc_host *host = mmc_priv(mmc);
-       unsigned int flags = 0;
         int ret = 0;
  
         WARN_ON(irqs_disabled());
@@ -663,11 +689,15 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq)
         host->mrq = mrq;
         host->status = HOST_S_CMD;
  
-       bcsr->disk_leds &= ~(1 << 8);
+       /* fail request immediately if no card is present */
+       if (0 == au1xmmc_card_inserted(mmc)) {
+               mrq->cmd->error = -ENOMEDIUM;
+               au1xmmc_finish_request(host);
+               return;
+       }
  
         if (mrq->data) {
                 FLUSH_FIFO(host);
-               flags = mrq->data->flags;
                 ret = au1xmmc_prepare_data(host, mrq->data);
         }
  
@@ -682,7 +712,6 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq)
  
  static void au1xmmc_reset_controller(struct au1xmmc_host *host)
  {
-
         /* Apply the clock */
         au_writel(SD_ENABLE_CE, HOST_ENABLE(host));
          au_sync_delay(1);
@@ -712,9 +741,10 @@ static void au1xmmc_reset_controller(struct au1xmmc_host *host)
  }
  
  
-static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios)
+static void au1xmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
  {
         struct au1xmmc_host *host = mmc_priv(mmc);
+       u32 config2;
  
         if (ios->power_mode == MMC_POWER_OFF)
                 au1xmmc_set_power(host, 0);
@@ -726,21 +756,18 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios)
                 au1xmmc_set_clock(host, ios->clock);
                 host->clock = ios->clock;
         }
-}
-
-static void au1xmmc_dma_callback(int irq, void *dev_id)
-{
-       struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id;
-
-       /* Avoid spurious interrupts */
  
-       if (!host->mrq)
-               return;
-
-       if (host->flags & HOST_F_STOP)
-               SEND_STOP(host);
-
-       tasklet_schedule(&host->data_task);
+       config2 = au_readl(HOST_CONFIG2(host));
+       switch (ios->bus_width) {
+       case MMC_BUS_WIDTH_4:
+               config2 |= SD_CONFIG2_WB;
+               break;
+       case MMC_BUS_WIDTH_1:
+               config2 &= ~SD_CONFIG2_WB;
+               break;
+       }
+       au_writel(config2, HOST_CONFIG2(host));
+       au_sync();
  }
  
  #define STATUS_TIMEOUT (SD_STATUS_RAT | SD_STATUS_DT)
@@ -749,245 +776,354 @@ static void au1xmmc_dma_callback(int irq, void *dev_id)
  
  static irqreturn_t au1xmmc_irq(int irq, void *dev_id)
  {
-
+       struct au1xmmc_host *host = dev_id;
         u32 status;
-       int i, ret = 0;
-
-       disable_irq(AU1100_SD_IRQ);
  
-       for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) {
-               struct au1xmmc_host * host = au1xmmc_hosts[i];
-               u32 handled = 1;
+       status = au_readl(HOST_STATUS(host));
  
-               status = au_readl(HOST_STATUS(host));
+       if (!(status & SD_STATUS_I))
+               return IRQ_NONE;        /* not ours */
  
-               if (host->mrq && (status & STATUS_TIMEOUT)) {
-                       if (status & SD_STATUS_RAT)
-                               host->mrq->cmd->error = -ETIMEDOUT;
+       if (status & SD_STATUS_SI)      /* SDIO */
+               mmc_signal_sdio_irq(host->mmc);
  
-                       else if (status & SD_STATUS_DT)
-                               host->mrq->data->error = -ETIMEDOUT;
+       if (host->mrq && (status & STATUS_TIMEOUT)) {
+               if (status & SD_STATUS_RAT)
+                       host->mrq->cmd->error = -ETIMEDOUT;
+               else if (status & SD_STATUS_DT)
+                       host->mrq->data->error = -ETIMEDOUT;
  
-                       /* In PIO mode, interrupts might still be enabled */
-                       IRQ_OFF(host, SD_CONFIG_NE | SD_CONFIG_TH);
+               /* In PIO mode, interrupts might still be enabled */
+               IRQ_OFF(host, SD_CONFIG_NE | SD_CONFIG_TH);
  
-                       //IRQ_OFF(host, SD_CONFIG_TH|SD_CONFIG_RA|SD_CONFIG_RF);
-                       tasklet_schedule(&host->finish_task);
-               }
+               /* IRQ_OFF(host, SD_CONFIG_TH | SD_CONFIG_RA | SD_CONFIG_RF); */
+               tasklet_schedule(&host->finish_task);
+       }
  #if 0
-               else if (status & SD_STATUS_DD) {
-
-                       /* Sometimes we get a DD before a NE in PIO mode */
-
-                       if (!(host->flags & HOST_F_DMA) &&
-                                       (status & SD_STATUS_NE))
-                               au1xmmc_receive_pio(host);
-                       else {
-                               au1xmmc_data_complete(host, status);
-                               //tasklet_schedule(&host->data_task);
-                       }
+       else if (status & SD_STATUS_DD) {
+               /* Sometimes we get a DD before a NE in PIO mode */
+               if (!(host->flags & HOST_F_DMA) && (status & SD_STATUS_NE))
+                       au1xmmc_receive_pio(host);
+               else {
+                       au1xmmc_data_complete(host, status);
+                       /* tasklet_schedule(&host->data_task); */
                 }
+       }
  #endif
-               else if (status & (SD_STATUS_CR)) {
-                       if (host->status == HOST_S_CMD)
-                               au1xmmc_cmd_complete(host,status);
-               }
-               else if (!(host->flags & HOST_F_DMA)) {
-                       if ((host->flags & HOST_F_XMIT) &&
-                           (status & STATUS_DATA_OUT))
-                               au1xmmc_send_pio(host);
-                       else if ((host->flags & HOST_F_RECV) &&
-                           (status & STATUS_DATA_IN))
-                               au1xmmc_receive_pio(host);
-               }
-               else if (status & 0x203FBC70) {
-                       DBG("Unhandled status %8.8x\n", host->id, status);
-                       handled = 0;
-               }
-
-               au_writel(status, HOST_STATUS(host));
-               au_sync();
-
-               ret |= handled;
+       else if (status & SD_STATUS_CR) {
+               if (host->status == HOST_S_CMD)
+                       au1xmmc_cmd_complete(host, status);
+
+       } else if (!(host->flags & HOST_F_DMA)) {
+               if ((host->flags & HOST_F_XMIT) && (status & STATUS_DATA_OUT))
+                       au1xmmc_send_pio(host);
+               else if ((host->flags & HOST_F_RECV) && (status & STATUS_DATA_IN))
+                       au1xmmc_receive_pio(host);
+
+       } else if (status & 0x203F3C70) {
+                       DBG("Unhandled status %8.8x\n", host->pdev->id,
+                               status);
         }
  
-       enable_irq(AU1100_SD_IRQ);
-       return ret;
+       au_writel(status, HOST_STATUS(host));
+       au_sync();
+
+       return IRQ_HANDLED;
  }
  
-static void au1xmmc_poll_event(unsigned long arg)
-{
-       struct au1xmmc_host *host = (struct au1xmmc_host *) arg;
+#ifdef CONFIG_SOC_AU1200
+/* 8bit memory DMA device */
+static dbdev_tab_t au1xmmc_mem_dbdev = {
+       .dev_id         = DSCR_CMD0_ALWAYS,
+       .dev_flags      = DEV_FLAGS_ANYUSE,
+       .dev_tsize      = 0,
+       .dev_devwidth   = 8,
+       .dev_physaddr   = 0x00000000,
+       .dev_intlevel   = 0,
+       .dev_intpolarity = 0,
+};
+static int memid;
  
-       int card = au1xmmc_card_inserted(host);
-        int controller = (host->flags & HOST_F_ACTIVE) ? 1 : 0;
+static void au1xmmc_dbdma_callback(int irq, void *dev_id)
+{
+       struct au1xmmc_host *host = (struct au1xmmc_host *)dev_id;
  
-       if (card != controller) {
-               host->flags &= ~HOST_F_ACTIVE;
-               if (card) host->flags |= HOST_F_ACTIVE;
-               mmc_detect_change(host->mmc, 0);
-       }
+       /* Avoid spurious interrupts */
+       if (!host->mrq)
+               return;
  
-       if (host->mrq != NULL) {
-               u32 status = au_readl(HOST_STATUS(host));
-               DBG("PENDING - %8.8x\n", host->id, status);
-       }
+       if (host->flags & HOST_F_STOP)
+               SEND_STOP(host);
  
-       mod_timer(&host->timer, jiffies + AU1XMMC_DETECT_TIMEOUT);
+       tasklet_schedule(&host->data_task);
  }
  
-static dbdev_tab_t au1xmmc_mem_dbdev =
-{
-       DSCR_CMD0_ALWAYS, DEV_FLAGS_ANYUSE, 0, 8, 0x00000000, 0, 0
-};
-
-static void au1xmmc_init_dma(struct au1xmmc_host *host)
+static int au1xmmc_dbdma_init(struct au1xmmc_host *host)
  {
+       struct resource *res;
+       int txid, rxid;
+
+       res = platform_get_resource(host->pdev, IORESOURCE_DMA, 0);
+       if (!res)
+               return -ENODEV;
+       txid = res->start;
+
+       res = platform_get_resource(host->pdev, IORESOURCE_DMA, 1);
+       if (!res)
+               return -ENODEV;
+       rxid = res->start;
+
+       if (!memid)
+               return -ENODEV;
+
+       host->tx_chan = au1xxx_dbdma_chan_alloc(memid, txid,
+                               au1xmmc_dbdma_callback, (void *)host);
+       if (!host->tx_chan) {
+               dev_err(&host->pdev->dev, "cannot allocate TX DMA\n");
+               return -ENODEV;
+       }
  
-       u32 rxchan, txchan;
-
-       int txid = au1xmmc_card_table[host->id].tx_devid;
-       int rxid = au1xmmc_card_table[host->id].rx_devid;
+       host->rx_chan = au1xxx_dbdma_chan_alloc(rxid, memid,
+                               au1xmmc_dbdma_callback, (void *)host);
+       if (!host->rx_chan) {
+               dev_err(&host->pdev->dev, "cannot allocate RX DMA\n");
+               au1xxx_dbdma_chan_free(host->tx_chan);
+               return -ENODEV;
+       }
  
-       /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride
-          of 8 bits.  And since devices are shared, we need to create
-          our own to avoid freaking out other devices
-       */
+       au1xxx_dbdma_set_devwidth(host->tx_chan, 8);
+       au1xxx_dbdma_set_devwidth(host->rx_chan, 8);
  
-       int memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev);
+       au1xxx_dbdma_ring_alloc(host->tx_chan, AU1XMMC_DESCRIPTOR_COUNT);
+       au1xxx_dbdma_ring_alloc(host->rx_chan, AU1XMMC_DESCRIPTOR_COUNT);
  
-       txchan = au1xxx_dbdma_chan_alloc(memid, txid,
-                                        au1xmmc_dma_callback, (void *) host);
+       /* DBDMA is good to go */
+       host->flags |= HOST_F_DMA;
  
-       rxchan = au1xxx_dbdma_chan_alloc(rxid, memid,
-                                        au1xmmc_dma_callback, (void *) host);
+       return 0;
+}
  
-       au1xxx_dbdma_set_devwidth(txchan, 8);
-       au1xxx_dbdma_set_devwidth(rxchan, 8);
+static void au1xmmc_dbdma_shutdown(struct au1xmmc_host *host)
+{
+       if (host->flags & HOST_F_DMA) {
+               host->flags &= ~HOST_F_DMA;
+               au1xxx_dbdma_chan_free(host->tx_chan);
+               au1xxx_dbdma_chan_free(host->rx_chan);
+       }
+}
+#endif
  
-       au1xxx_dbdma_ring_alloc(txchan, AU1XMMC_DESCRIPTOR_COUNT);
-       au1xxx_dbdma_ring_alloc(rxchan, AU1XMMC_DESCRIPTOR_COUNT);
+static void au1xmmc_enable_sdio_irq(struct mmc_host *mmc, int en)
+{
+       struct au1xmmc_host *host = mmc_priv(mmc);
  
-       host->tx_chan = txchan;
-       host->rx_chan = rxchan;
+       if (en)
+               IRQ_ON(host, SD_CONFIG_SI);
+       else
+               IRQ_OFF(host, SD_CONFIG_SI);
  }
  
  static const struct mmc_host_ops au1xmmc_ops = {
         .request        = au1xmmc_request,
         .set_ios        = au1xmmc_set_ios,
         .get_ro         = au1xmmc_card_readonly,
+       .get_cd         = au1xmmc_card_inserted,
+       .enable_sdio_irq = au1xmmc_enable_sdio_irq,
  };
  
  static int __devinit au1xmmc_probe(struct platform_device *pdev)
  {
+       struct mmc_host *mmc;
+       struct au1xmmc_host *host;
+       struct resource *r;
+       int ret;
+
+       mmc = mmc_alloc_host(sizeof(struct au1xmmc_host), &pdev->dev);
+       if (!mmc) {
+               dev_err(&pdev->dev, "no memory for mmc_host\n");
+               ret = -ENOMEM;
+               goto out0;
+       }
  
-       int i, ret = 0;
-
-       /* THe interrupt is shared among all controllers */
-       ret = request_irq(AU1100_SD_IRQ, au1xmmc_irq, IRQF_DISABLED, "MMC", 0);
+       host = mmc_priv(mmc);
+       host->mmc = mmc;
+       host->platdata = pdev->dev.platform_data;
+       host->pdev = pdev;
  
-       if (ret) {
-               printk(DRIVER_NAME "ERROR: Couldn't get int %d: %d\n",
-                               AU1100_SD_IRQ, ret);
-               return -ENXIO;
+       ret = -ENODEV;
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(&pdev->dev, "no mmio defined\n");
+               goto out1;
         }
  
-       disable_irq(AU1100_SD_IRQ);
+       host->ioarea = request_mem_region(r->start, r->end - r->start + 1,
+                                          pdev->name);
+       if (!host->ioarea) {
+               dev_err(&pdev->dev, "mmio already in use\n");
+               goto out1;
+       }
  
-       for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) {
-               struct mmc_host *mmc = mmc_alloc_host(sizeof(struct au1xmmc_host), &pdev->dev);
-               struct au1xmmc_host *host = 0;
+       host->iobase = (unsigned long)ioremap(r->start, 0x3c);
+       if (!host->iobase) {
+               dev_err(&pdev->dev, "cannot remap mmio\n");
+               goto out2;
+       }
  
-               if (!mmc) {
-                       printk(DRIVER_NAME "ERROR: no mem for host %d\n", i);
-                       au1xmmc_hosts[i] = 0;
-                       continue;
-               }
+       r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (!r) {
+               dev_err(&pdev->dev, "no IRQ defined\n");
+               goto out3;
+       }
  
-               mmc->ops = &au1xmmc_ops;
+       host->irq = r->start;
+       /* IRQ is shared among both SD controllers */
+       ret = request_irq(host->irq, au1xmmc_irq, IRQF_SHARED,
+                         DRIVER_NAME, host);
+       if (ret) {
+               dev_err(&pdev->dev, "cannot grab IRQ\n");
+               goto out3;
+       }
  
-               mmc->f_min =   450000;
-               mmc->f_max = 24000000;
+       mmc->ops = &au1xmmc_ops;
  
-               mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE;
-               mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT;
+       mmc->f_min =   450000;
+       mmc->f_max = 24000000;
  
-               mmc->max_blk_size = 2048;
-               mmc->max_blk_count = 512;
+       mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE;
+       mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT;
  
-               mmc->ocr_avail = AU1XMMC_OCR;
+       mmc->max_blk_size = 2048;
+       mmc->max_blk_count = 512;
  
-               host = mmc_priv(mmc);
-               host->mmc = mmc;
+       mmc->ocr_avail = AU1XMMC_OCR;
+       mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
  
-               host->id = i;
-               host->iobase = au1xmmc_card_table[host->id].iobase;
-               host->clock = 0;
-               host->power_mode = MMC_POWER_OFF;
+       host->status = HOST_S_IDLE;
  
-               host->flags = au1xmmc_card_inserted(host) ? HOST_F_ACTIVE : 0;
-               host->status = HOST_S_IDLE;
+       /* board-specific carddetect setup, if any */
+       if (host->platdata && host->platdata->cd_setup) {
+               ret = host->platdata->cd_setup(mmc, 1);
+               if (ret) {
+                       dev_warn(&pdev->dev, "board CD setup failed\n");
+                       mmc->caps |= MMC_CAP_NEEDS_POLL;
+               }
+       } else
+               mmc->caps |= MMC_CAP_NEEDS_POLL;
  
-               init_timer(&host->timer);
+       tasklet_init(&host->data_task, au1xmmc_tasklet_data,
+                       (unsigned long)host);
  
-               host->timer.function = au1xmmc_poll_event;
-               host->timer.data = (unsigned long) host;
-               host->timer.expires = jiffies + AU1XMMC_DETECT_TIMEOUT;
+       tasklet_init(&host->finish_task, au1xmmc_tasklet_finish,
+                       (unsigned long)host);
  
-               tasklet_init(&host->data_task, au1xmmc_tasklet_data,
-                               (unsigned long) host);
+#ifdef CONFIG_SOC_AU1200
+       ret = au1xmmc_dbdma_init(host);
+       if (ret)
+               printk(KERN_INFO DRIVER_NAME ": DBDMA init failed; using PIO\n");
+#endif
  
-               tasklet_init(&host->finish_task, au1xmmc_tasklet_finish,
-                               (unsigned long) host);
+#ifdef CONFIG_LEDS_CLASS
+       if (host->platdata && host->platdata->led) {
+               struct led_classdev *led = host->platdata->led;
+               led->name = mmc_hostname(mmc);
+               led->brightness = LED_OFF;
+               led->default_trigger = mmc_hostname(mmc);
+               ret = led_classdev_register(mmc_dev(mmc), led);
+               if (ret)
+                       goto out5;
+       }
+#endif
  
-               spin_lock_init(&host->lock);
+       au1xmmc_reset_controller(host);
  
-               if (dma != 0)
-                       au1xmmc_init_dma(host);
+       ret = mmc_add_host(mmc);
+       if (ret) {
+               dev_err(&pdev->dev, "cannot add mmc host\n");
+               goto out6;
+       }
  
-               au1xmmc_reset_controller(host);
+       platform_set_drvdata(pdev, mmc);
  
-               mmc_add_host(mmc);
-               au1xmmc_hosts[i] = host;
+       printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X"
+               " (mode=%s)\n", pdev->id, host->iobase,
+               host->flags & HOST_F_DMA ? "dma" : "pio");
  
-               add_timer(&host->timer);
+       return 0;       /* all ok */
  
-               printk(KERN_INFO DRIVER_NAME ": MMC Controller %d set up at %8.8X (mode=%s)\n",
-                      host->id, host->iobase, dma ? "dma" : "pio");
-       }
+out6:
+#ifdef CONFIG_LEDS_CLASS
+       if (host->platdata && host->platdata->led)
+               led_classdev_unregister(host->platdata->led);
+out5:
+#endif
+       au_writel(0, HOST_ENABLE(host));
+       au_writel(0, HOST_CONFIG(host));
+       au_writel(0, HOST_CONFIG2(host));
+       au_sync();
  
-       enable_irq(AU1100_SD_IRQ);
+#ifdef CONFIG_SOC_AU1200
+       au1xmmc_dbdma_shutdown(host);
+#endif
  
-       return 0;
+       tasklet_kill(&host->data_task);
+       tasklet_kill(&host->finish_task);
+
+       if (host->platdata && host->platdata->cd_setup &&
+           !(mmc->caps & MMC_CAP_NEEDS_POLL))
+               host->platdata->cd_setup(mmc, 0);
+
+       free_irq(host->irq, host);
+out3:
+       iounmap((void *)host->iobase);
+out2:
+       release_resource(host->ioarea);
+       kfree(host->ioarea);
+out1:
+       mmc_free_host(mmc);
+out0:
+       return ret;
  }
  
  static int __devexit au1xmmc_remove(struct platform_device *pdev)
  {
+       struct mmc_host *mmc = platform_get_drvdata(pdev);
+       struct au1xmmc_host *host;
+
+       if (mmc) {
+               host  = mmc_priv(mmc);
  
-       int i;
+               mmc_remove_host(mmc);
  
-       disable_irq(AU1100_SD_IRQ);
+#ifdef CONFIG_LEDS_CLASS
+               if (host->platdata && host->platdata->led)
+                       led_classdev_unregister(host->platdata->led);
+#endif
  
-       for(i = 0; i < AU1XMMC_CONTROLLER_COUNT; i++) {
-               struct au1xmmc_host *host = au1xmmc_hosts[i];
-               if (!host) continue;
+               if (host->platdata && host->platdata->cd_setup &&
+                   !(mmc->caps & MMC_CAP_NEEDS_POLL))
+                       host->platdata->cd_setup(mmc, 0);
+
+               au_writel(0, HOST_ENABLE(host));
+               au_writel(0, HOST_CONFIG(host));
+               au_writel(0, HOST_CONFIG2(host));
+               au_sync();
  
                 tasklet_kill(&host->data_task);
                 tasklet_kill(&host->finish_task);
  
-               del_timer_sync(&host->timer);
+#ifdef CONFIG_SOC_AU1200
+               au1xmmc_dbdma_shutdown(host);
+#endif
                 au1xmmc_set_power(host, 0);
  
-               mmc_remove_host(host->mmc);
-
-               au1xxx_dbdma_chan_free(host->tx_chan);
-               au1xxx_dbdma_chan_free(host->rx_chan);
+               free_irq(host->irq, host);
+               iounmap((void *)host->iobase);
+               release_resource(host->ioarea);
+               kfree(host->ioarea);
  
-               au_writel(0x0, HOST_ENABLE(host));
-               au_sync();
+               mmc_free_host(mmc);
         }
-
-       free_irq(AU1100_SD_IRQ, 0);
         return 0;
  }
  
@@ -1004,21 +1140,31 @@ static struct platform_driver au1xmmc_driver = {
  
  static int __init au1xmmc_init(void)
  {
+#ifdef CONFIG_SOC_AU1200
+       /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride
+        * of 8 bits.  And since devices are shared, we need to create
+        * our own to avoid freaking out other devices.
+        */
+       memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev);
+       if (!memid)
+               printk(KERN_ERR "au1xmmc: cannot add memory dbdma dev\n");
+#endif
         return platform_driver_register(&au1xmmc_driver);
  }
  
  static void __exit au1xmmc_exit(void)
  {
+#ifdef CONFIG_SOC_AU1200
+       if (memid)
+               au1xxx_ddma_del_device(memid);
+#endif
         platform_driver_unregister(&au1xmmc_driver);
  }
  
  module_init(au1xmmc_init);
  module_exit(au1xmmc_exit);
  
-#ifdef MODULE
  MODULE_AUTHOR("Advanced Micro Devices, Inc");
  MODULE_DESCRIPTION("MMC/SD driver for the Alchemy Au1XXX");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS("platform:au1xxx-mmc");
-#endif
-
diff --git a/drivers/mmc/host/au1xmmc.h b/drivers/mmc/host/au1xmmc.h

deleted file mode 100644 (file)

index 341cbdf..0000000
--- a/drivers/mmc/host/au1xmmc.h
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef _AU1XMMC_H_
-#define _AU1XMMC_H_
-
-/* Hardware definitions */
-
-#define AU1XMMC_DESCRIPTOR_COUNT 1
-#define AU1XMMC_DESCRIPTOR_SIZE  2048
-
-#define AU1XMMC_OCR ( MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30  | \
-                     MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33  | \
-                     MMC_VDD_33_34 | MMC_VDD_34_35 | MMC_VDD_35_36)
-
-/* Easy access macros */
-
-#define HOST_STATUS(h) ((h)->iobase + SD_STATUS)
-#define HOST_CONFIG(h) ((h)->iobase + SD_CONFIG)
-#define HOST_ENABLE(h) ((h)->iobase + SD_ENABLE)
-#define HOST_TXPORT(h) ((h)->iobase + SD_TXPORT)
-#define HOST_RXPORT(h) ((h)->iobase + SD_RXPORT)
-#define HOST_CMDARG(h) ((h)->iobase + SD_CMDARG)
-#define HOST_BLKSIZE(h)        ((h)->iobase + SD_BLKSIZE)
-#define HOST_CMD(h)    ((h)->iobase + SD_CMD)
-#define HOST_CONFIG2(h)        ((h)->iobase + SD_CONFIG2)
-#define HOST_TIMEOUT(h)        ((h)->iobase + SD_TIMEOUT)
-#define HOST_DEBUG(h)  ((h)->iobase + SD_DEBUG)
-
-#define DMA_CHANNEL(h) \
-       ( ((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan)
-
-/* This gives us a hard value for the stop command that we can write directly
- * to the command register
- */
-
-#define STOP_CMD (SD_CMD_RT_1B|SD_CMD_CT_7|(0xC << SD_CMD_CI_SHIFT)|SD_CMD_GO)
-
-/* This is the set of interrupts that we configure by default */
-
-#if 0
-#define AU1XMMC_INTERRUPTS (SD_CONFIG_SC | SD_CONFIG_DT | SD_CONFIG_DD | \
-               SD_CONFIG_RAT | SD_CONFIG_CR | SD_CONFIG_I)
-#endif
-
-#define AU1XMMC_INTERRUPTS (SD_CONFIG_SC | SD_CONFIG_DT | \
-               SD_CONFIG_RAT | SD_CONFIG_CR | SD_CONFIG_I)
-/* The poll event (looking for insert/remove events runs twice a second */
-#define AU1XMMC_DETECT_TIMEOUT (HZ/2)
-
-struct au1xmmc_host {
-  struct mmc_host *mmc;
-  struct mmc_request *mrq;
-
-  u32 id;
-
-  u32 flags;
-  u32 iobase;
-  u32 clock;
-  u32 bus_width;
-  u32 power_mode;
-
-  int status;
-
-   struct {
-          int len;
-          int dir;
-  } dma;
-
-   struct {
-          int index;
-          int offset;
-          int len;
-  } pio;
-
-  u32 tx_chan;
-  u32 rx_chan;
-
-  struct timer_list timer;
-  struct tasklet_struct finish_task;
-  struct tasklet_struct data_task;
-
-  spinlock_t lock;
-};
-
-/* Status flags used by the host structure */
-
-#define HOST_F_XMIT   0x0001
-#define HOST_F_RECV   0x0002
-#define HOST_F_DMA    0x0010
-#define HOST_F_ACTIVE 0x0100
-#define HOST_F_STOP   0x1000
-
-#define HOST_S_IDLE   0x0001
-#define HOST_S_CMD    0x0002
-#define HOST_S_DATA   0x0003
-#define HOST_S_STOP   0x0004
-
-#endif
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c

index eed211b2ac707dc6bf02af26cdf8c60d443d63d7..5e880c0f13495105105efee78d1eadcebdfa4a29 100644 (file)
--- a/drivers/mmc/host/imxmmc.c
+++ b/drivers/mmc/host/imxmmc.c
@@ -892,9 +892,12 @@ static int imxmci_get_ro(struct mmc_host *mmc)
         struct imxmci_host *host = mmc_priv(mmc);
  
         if (host->pdata && host->pdata->get_ro)
-               return host->pdata->get_ro(mmc_dev(mmc));
-       /* Host doesn't support read only detection so assume writeable */
-       return 0;
+               return !!host->pdata->get_ro(mmc_dev(mmc));
+       /*
+        * Board doesn't support read only detection; let the mmc core
+        * decide what to do.
+        */
+       return -ENOSYS;
  }
  
  
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c

index 35508584ac2ae55ffae78479877fe67b32a3a7d2..41cc63360e43d357db2d5d42a06cf55b0abd47b3 100644 (file)
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1126,16 +1126,28 @@ static int mmc_spi_get_ro(struct mmc_host *mmc)
         struct mmc_spi_host *host = mmc_priv(mmc);
  
         if (host->pdata && host->pdata->get_ro)
-               return host->pdata->get_ro(mmc->parent);
-       /* board doesn't support read only detection; assume writeable */
-       return 0;
+               return !!host->pdata->get_ro(mmc->parent);
+       /*
+        * Board doesn't support read only detection; let the mmc core
+        * decide what to do.
+        */
+       return -ENOSYS;
  }
  
+static int mmc_spi_get_cd(struct mmc_host *mmc)
+{
+       struct mmc_spi_host *host = mmc_priv(mmc);
+
+       if (host->pdata && host->pdata->get_cd)
+               return !!host->pdata->get_cd(mmc->parent);
+       return -ENOSYS;
+}
  
  static const struct mmc_host_ops mmc_spi_ops = {
         .request        = mmc_spi_request,
         .set_ios        = mmc_spi_set_ios,
         .get_ro         = mmc_spi_get_ro,
+       .get_cd         = mmc_spi_get_cd,
  };
  
  
@@ -1240,10 +1252,7 @@ static int mmc_spi_probe(struct spi_device *spi)
         mmc->ops = &mmc_spi_ops;
         mmc->max_blk_size = MMC_SPI_BLOCKSIZE;
  
-       /* As long as we keep track of the number of successfully
-        * transmitted blocks, we're good for multiwrite.
-        */
-       mmc->caps = MMC_CAP_SPI | MMC_CAP_MULTIWRITE;
+       mmc->caps = MMC_CAP_SPI;
  
         /* SPI doesn't need the lowspeed device identification thing for
          * MMC or SD cards, since it never comes up in open drain mode.
@@ -1319,17 +1328,23 @@ static int mmc_spi_probe(struct spi_device *spi)
                         goto fail_glue_init;
         }
  
+       /* pass platform capabilities, if any */
+       if (host->pdata)
+               mmc->caps |= host->pdata->caps;
+
         status = mmc_add_host(mmc);
         if (status != 0)
                 goto fail_add_host;
  
-       dev_info(&spi->dev, "SD/MMC host %s%s%s%s\n",
+       dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n",
                         mmc->class_dev.bus_id,
                         host->dma_dev ? "" : ", no DMA",
                         (host->pdata && host->pdata->get_ro)
                                 ? "" : ", no WP",
                         (host->pdata && host->pdata->setpower)
-                               ? "" : ", no poweroff");
+                               ? "" : ", no poweroff",
+                       (mmc->caps & MMC_CAP_NEEDS_POLL)
+                               ? ", cd polling" : "");
         return 0;
  
  fail_add_host:
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c

index da5fecad74d9a79152076730981a36d35cfe3bed..696cf3647ceb3e61c6a9f41bf5ffae889c11ffef 100644 (file)
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -535,7 +535,6 @@ static int mmci_probe(struct amba_device *dev, void *id)
         mmc->f_min = (host->mclk + 511) / 512;
         mmc->f_max = min(host->mclk, fmax);
         mmc->ocr_avail = plat->ocr_mask;
-       mmc->caps = MMC_CAP_MULTIWRITE;
  
         /*
          * We can do SGIO
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c

index 549517c35675971b3448a988f83cb2b2369b305d..dbc26eb6a89e06c280f263a88ed02a20736220e7 100644 (file)
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1317,7 +1317,7 @@ static int __init mmc_omap_new_slot(struct mmc_omap_host *host, int id)
  
         host->slots[id] = slot;
  
-       mmc->caps = MMC_CAP_MULTIWRITE;
+       mmc->caps = 0;
         if (host->pdata->conf.wire4)
                 mmc->caps |= MMC_CAP_4_BIT_DATA;
  
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c

index d89475d36988228c09e60b54034c288bd81368e6..d39f59738866c02e8712b9d3eb6cc686de808382 100644 (file)
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -374,9 +374,12 @@ static int pxamci_get_ro(struct mmc_host *mmc)
         struct pxamci_host *host = mmc_priv(mmc);
  
         if (host->pdata && host->pdata->get_ro)
-               return host->pdata->get_ro(mmc_dev(mmc));
-       /* Host doesn't support read only detection so assume writeable */
-       return 0;
+               return !!host->pdata->get_ro(mmc_dev(mmc));
+       /*
+        * Board doesn't support read only detection; let the mmc core
+        * decide what to do.
+        */
+       return -ENOSYS;
  }
  
  static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c

new file mode 100644 (file)

index 0000000..6a1e499
--- /dev/null
+++ b/drivers/mmc/host/s3cmci.c
@@ -0,0 +1,1446 @@
+/*
+ *  linux/drivers/mmc/s3cmci.h - Samsung S3C MCI driver
+ *
+ *  Copyright (C) 2004-2006 maintech GmbH, Thomas Kleffel <tk@maintech.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/clk.h>
+#include <linux/mmc/host.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <asm/dma.h>
+
+#include <asm/arch/regs-sdi.h>
+#include <asm/arch/regs-gpio.h>
+
+#include <asm/plat-s3c24xx/mci.h>
+
+#include "s3cmci.h"
+
+#define DRIVER_NAME "s3c-mci"
+
+enum dbg_channels {
+       dbg_err   = (1 << 0),
+       dbg_debug = (1 << 1),
+       dbg_info  = (1 << 2),
+       dbg_irq   = (1 << 3),
+       dbg_sg    = (1 << 4),
+       dbg_dma   = (1 << 5),
+       dbg_pio   = (1 << 6),
+       dbg_fail  = (1 << 7),
+       dbg_conf  = (1 << 8),
+};
+
+static const int dbgmap_err   = dbg_err | dbg_fail;
+static const int dbgmap_info  = dbg_info | dbg_conf;
+static const int dbgmap_debug = dbg_debug;
+
+#define dbg(host, channels, args...)             \
+       do {                                      \
+       if (dbgmap_err & channels)                \
+               dev_err(&host->pdev->dev, args);  \
+       else if (dbgmap_info & channels)          \
+               dev_info(&host->pdev->dev, args); \
+       else if (dbgmap_debug & channels)         \
+               dev_dbg(&host->pdev->dev, args);  \
+       } while (0)
+
+#define RESSIZE(ressource) (((ressource)->end - (ressource)->start)+1)
+
+static struct s3c2410_dma_client s3cmci_dma_client = {
+       .name           = "s3c-mci",
+};
+
+static void finalize_request(struct s3cmci_host *host);
+static void s3cmci_send_request(struct mmc_host *mmc);
+static void s3cmci_reset(struct s3cmci_host *host);
+
+#ifdef CONFIG_MMC_DEBUG
+
+static void dbg_dumpregs(struct s3cmci_host *host, char *prefix)
+{
+       u32 con, pre, cmdarg, cmdcon, cmdsta, r0, r1, r2, r3, timer, bsize;
+       u32 datcon, datcnt, datsta, fsta, imask;
+
+       con     = readl(host->base + S3C2410_SDICON);
+       pre     = readl(host->base + S3C2410_SDIPRE);
+       cmdarg  = readl(host->base + S3C2410_SDICMDARG);
+       cmdcon  = readl(host->base + S3C2410_SDICMDCON);
+       cmdsta  = readl(host->base + S3C2410_SDICMDSTAT);
+       r0      = readl(host->base + S3C2410_SDIRSP0);
+       r1      = readl(host->base + S3C2410_SDIRSP1);
+       r2      = readl(host->base + S3C2410_SDIRSP2);
+       r3      = readl(host->base + S3C2410_SDIRSP3);
+       timer   = readl(host->base + S3C2410_SDITIMER);
+       bsize   = readl(host->base + S3C2410_SDIBSIZE);
+       datcon  = readl(host->base + S3C2410_SDIDCON);
+       datcnt  = readl(host->base + S3C2410_SDIDCNT);
+       datsta  = readl(host->base + S3C2410_SDIDSTA);
+       fsta    = readl(host->base + S3C2410_SDIFSTA);
+       imask   = readl(host->base + host->sdiimsk);
+
+       dbg(host, dbg_debug, "%s  CON:[%08x]  PRE:[%08x]  TMR:[%08x]\n",
+                               prefix, con, pre, timer);
+
+       dbg(host, dbg_debug, "%s CCON:[%08x] CARG:[%08x] CSTA:[%08x]\n",
+                               prefix, cmdcon, cmdarg, cmdsta);
+
+       dbg(host, dbg_debug, "%s DCON:[%08x] FSTA:[%08x]"
+                              " DSTA:[%08x] DCNT:[%08x]\n",
+                               prefix, datcon, fsta, datsta, datcnt);
+
+       dbg(host, dbg_debug, "%s   R0:[%08x]   R1:[%08x]"
+                              "   R2:[%08x]   R3:[%08x]\n",
+                               prefix, r0, r1, r2, r3);
+}
+
+static void prepare_dbgmsg(struct s3cmci_host *host, struct mmc_command *cmd,
+                          int stop)
+{
+       snprintf(host->dbgmsg_cmd, 300,
+                "#%u%s op:%i arg:0x%08x flags:0x08%x retries:%u",
+                host->ccnt, (stop ? " (STOP)" : ""),
+                cmd->opcode, cmd->arg, cmd->flags, cmd->retries);
+
+       if (cmd->data) {
+               snprintf(host->dbgmsg_dat, 300,
+                        "#%u bsize:%u blocks:%u bytes:%u",
+                        host->dcnt, cmd->data->blksz,
+                        cmd->data->blocks,
+                        cmd->data->blocks * cmd->data->blksz);
+       } else {
+               host->dbgmsg_dat[0] = '\0';
+       }
+}
+
+static void dbg_dumpcmd(struct s3cmci_host *host, struct mmc_command *cmd,
+                       int fail)
+{
+       unsigned int dbglvl = fail ? dbg_fail : dbg_debug;
+
+       if (!cmd)
+               return;
+
+       if (cmd->error == 0) {
+               dbg(host, dbglvl, "CMD[OK] %s R0:0x%08x\n",
+                       host->dbgmsg_cmd, cmd->resp[0]);
+       } else {
+               dbg(host, dbglvl, "CMD[ERR %i] %s Status:%s\n",
+                       cmd->error, host->dbgmsg_cmd, host->status);
+       }
+
+       if (!cmd->data)
+               return;
+
+       if (cmd->data->error == 0) {
+               dbg(host, dbglvl, "DAT[OK] %s\n", host->dbgmsg_dat);
+       } else {
+               dbg(host, dbglvl, "DAT[ERR %i] %s DCNT:0x%08x\n",
+                       cmd->data->error, host->dbgmsg_dat,
+                       readl(host->base + S3C2410_SDIDCNT));
+       }
+}
+#else
+static void dbg_dumpcmd(struct s3cmci_host *host,
+                       struct mmc_command *cmd, int fail) { }
+
+static void prepare_dbgmsg(struct s3cmci_host *host, struct mmc_command *cmd,
+                          int stop) { }
+
+static void dbg_dumpregs(struct s3cmci_host *host, char *prefix) { }
+
+#endif /* CONFIG_MMC_DEBUG */
+
+static inline u32 enable_imask(struct s3cmci_host *host, u32 imask)
+{
+       u32 newmask;
+
+       newmask = readl(host->base + host->sdiimsk);
+       newmask |= imask;
+
+       writel(newmask, host->base + host->sdiimsk);
+
+       return newmask;
+}
+
+static inline u32 disable_imask(struct s3cmci_host *host, u32 imask)
+{
+       u32 newmask;
+
+       newmask = readl(host->base + host->sdiimsk);
+       newmask &= ~imask;
+
+       writel(newmask, host->base + host->sdiimsk);
+
+       return newmask;
+}
+
+static inline void clear_imask(struct s3cmci_host *host)
+{
+       writel(0, host->base + host->sdiimsk);
+}
+
+static inline int get_data_buffer(struct s3cmci_host *host,
+                                 u32 *words, u32 **pointer)
+{
+       struct scatterlist *sg;
+
+       if (host->pio_active == XFER_NONE)
+               return -EINVAL;
+
+       if ((!host->mrq) || (!host->mrq->data))
+               return -EINVAL;
+
+       if (host->pio_sgptr >= host->mrq->data->sg_len) {
+               dbg(host, dbg_debug, "no more buffers (%i/%i)\n",
+                     host->pio_sgptr, host->mrq->data->sg_len);
+               return -EBUSY;
+       }
+       sg = &host->mrq->data->sg[host->pio_sgptr];
+
+       *words = sg->length >> 2;
+       *pointer = sg_virt(sg);
+
+       host->pio_sgptr++;
+
+       dbg(host, dbg_sg, "new buffer (%i/%i)\n",
+           host->pio_sgptr, host->mrq->data->sg_len);
+
+       return 0;
+}
+
+static inline u32 fifo_count(struct s3cmci_host *host)
+{
+       u32 fifostat = readl(host->base + S3C2410_SDIFSTA);
+
+       fifostat &= S3C2410_SDIFSTA_COUNTMASK;
+       return fifostat >> 2;
+}
+
+static inline u32 fifo_free(struct s3cmci_host *host)
+{
+       u32 fifostat = readl(host->base + S3C2410_SDIFSTA);
+
+       fifostat &= S3C2410_SDIFSTA_COUNTMASK;
+       return (63 - fifostat) >> 2;
+}
+
+static void do_pio_read(struct s3cmci_host *host)
+{
+       int res;
+       u32 fifo;
+       void __iomem *from_ptr;
+
+       /* write real prescaler to host, it might be set slow to fix */
+       writel(host->prescaler, host->base + S3C2410_SDIPRE);
+
+       from_ptr = host->base + host->sdidata;
+
+       while ((fifo = fifo_count(host))) {
+               if (!host->pio_words) {
+                       res = get_data_buffer(host, &host->pio_words,
+                                             &host->pio_ptr);
+                       if (res) {
+                               host->pio_active = XFER_NONE;
+                               host->complete_what = COMPLETION_FINALIZE;
+
+                               dbg(host, dbg_pio, "pio_read(): "
+                                   "complete (no more data).\n");
+                               return;
+                       }
+
+                       dbg(host, dbg_pio,
+                           "pio_read(): new target: [%i]@[%p]\n",
+                           host->pio_words, host->pio_ptr);
+               }
+
+               dbg(host, dbg_pio,
+                   "pio_read(): fifo:[%02i] buffer:[%03i] dcnt:[%08X]\n",
+                   fifo, host->pio_words,
+                   readl(host->base + S3C2410_SDIDCNT));
+
+               if (fifo > host->pio_words)
+                       fifo = host->pio_words;
+
+               host->pio_words -= fifo;
+               host->pio_count += fifo;
+
+               while (fifo--)
+                       *(host->pio_ptr++) = readl(from_ptr);
+       }
+
+       if (!host->pio_words) {
+               res = get_data_buffer(host, &host->pio_words, &host->pio_ptr);
+               if (res) {
+                       dbg(host, dbg_pio,
+                           "pio_read(): complete (no more buffers).\n");
+                       host->pio_active = XFER_NONE;
+                       host->complete_what = COMPLETION_FINALIZE;
+
+                       return;
+               }
+       }
+
+       enable_imask(host,
+                    S3C2410_SDIIMSK_RXFIFOHALF | S3C2410_SDIIMSK_RXFIFOLAST);
+}
+
+static void do_pio_write(struct s3cmci_host *host)
+{
+       void __iomem *to_ptr;
+       int res;
+       u32 fifo;
+
+       to_ptr = host->base + host->sdidata;
+
+       while ((fifo = fifo_free(host))) {
+               if (!host->pio_words) {
+                       res = get_data_buffer(host, &host->pio_words,
+                                                       &host->pio_ptr);
+                       if (res) {
+                               dbg(host, dbg_pio,
+                                   "pio_write(): complete (no more data).\n");
+                               host->pio_active = XFER_NONE;
+
+                               return;
+                       }
+
+                       dbg(host, dbg_pio,
+                           "pio_write(): new source: [%i]@[%p]\n",
+                           host->pio_words, host->pio_ptr);
+
+               }
+
+               if (fifo > host->pio_words)
+                       fifo = host->pio_words;
+
+               host->pio_words -= fifo;
+               host->pio_count += fifo;
+
+               while (fifo--)
+                       writel(*(host->pio_ptr++), to_ptr);
+       }
+
+       enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
+}
+
+static void pio_tasklet(unsigned long data)
+{
+       struct s3cmci_host *host = (struct s3cmci_host *) data;
+
+
+       disable_irq(host->irq);
+
+       if (host->pio_active == XFER_WRITE)
+               do_pio_write(host);
+
+       if (host->pio_active == XFER_READ)
+               do_pio_read(host);
+
+       if (host->complete_what == COMPLETION_FINALIZE) {
+               clear_imask(host);
+               if (host->pio_active != XFER_NONE) {
+                       dbg(host, dbg_err, "unfinished %s "
+                           "- pio_count:[%u] pio_words:[%u]\n",
+                           (host->pio_active == XFER_READ) ? "read" : "write",
+                           host->pio_count, host->pio_words);
+
+                       if (host->mrq->data)
+                               host->mrq->data->error = -EINVAL;
+               }
+
+               finalize_request(host);
+       } else
+               enable_irq(host->irq);
+}
+
+/*
+ * ISR for SDI Interface IRQ
+ * Communication between driver and ISR works as follows:
+ *   host->mrq                         points to current request
+ *   host->complete_what       Indicates when the request is considered done
+ *     COMPLETION_CMDSENT        when the command was sent
+ *     COMPLETION_RSPFIN          when a response was received
+ *     COMPLETION_XFERFINISH     when the data transfer is finished
+ *     COMPLETION_XFERFINISH_RSPFIN both of the above.
+ *   host->complete_request    is the completion-object the driver waits for
+ *
+ * 1) Driver sets up host->mrq and host->complete_what
+ * 2) Driver prepares the transfer
+ * 3) Driver enables interrupts
+ * 4) Driver starts transfer
+ * 5) Driver waits for host->complete_rquest
+ * 6) ISR checks for request status (errors and success)
+ * 6) ISR sets host->mrq->cmd->error and host->mrq->data->error
+ * 7) ISR completes host->complete_request
+ * 8) ISR disables interrupts
+ * 9) Driver wakes up and takes care of the request
+ *
+ * Note: "->error"-fields are expected to be set to 0 before the request
+ *       was issued by mmc.c - therefore they are only set, when an error
+ *       contition comes up
+ */
+
+static irqreturn_t s3cmci_irq(int irq, void *dev_id)
+{
+       struct s3cmci_host *host = dev_id;
+       struct mmc_command *cmd;
+       u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt, mci_imsk;
+       u32 mci_cclear, mci_dclear;
+       unsigned long iflags;
+
+       spin_lock_irqsave(&host->complete_lock, iflags);
+
+       mci_csta = readl(host->base + S3C2410_SDICMDSTAT);
+       mci_dsta = readl(host->base + S3C2410_SDIDSTA);
+       mci_dcnt = readl(host->base + S3C2410_SDIDCNT);
+       mci_fsta = readl(host->base + S3C2410_SDIFSTA);
+       mci_imsk = readl(host->base + host->sdiimsk);
+       mci_cclear = 0;
+       mci_dclear = 0;
+
+       if ((host->complete_what == COMPLETION_NONE) ||
+           (host->complete_what == COMPLETION_FINALIZE)) {
+               host->status = "nothing to complete";
+               clear_imask(host);
+               goto irq_out;
+       }
+
+       if (!host->mrq) {
+               host->status = "no active mrq";
+               clear_imask(host);
+               goto irq_out;
+       }
+
+       cmd = host->cmd_is_stop ? host->mrq->stop : host->mrq->cmd;
+
+       if (!cmd) {
+               host->status = "no active cmd";
+               clear_imask(host);
+               goto irq_out;
+       }
+
+       if (!host->dodma) {
+               if ((host->pio_active == XFER_WRITE) &&
+                   (mci_fsta & S3C2410_SDIFSTA_TFDET)) {
+
+                       disable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
+                       tasklet_schedule(&host->pio_tasklet);
+                       host->status = "pio tx";
+               }
+
+               if ((host->pio_active == XFER_READ) &&
+                   (mci_fsta & S3C2410_SDIFSTA_RFDET)) {
+
+                       disable_imask(host,
+                                     S3C2410_SDIIMSK_RXFIFOHALF |
+                                     S3C2410_SDIIMSK_RXFIFOLAST);
+
+                       tasklet_schedule(&host->pio_tasklet);
+                       host->status = "pio rx";
+               }
+       }
+
+       if (mci_csta & S3C2410_SDICMDSTAT_CMDTIMEOUT) {
+               dbg(host, dbg_err, "CMDSTAT: error CMDTIMEOUT\n");
+               cmd->error = -ETIMEDOUT;
+               host->status = "error: command timeout";
+               goto fail_transfer;
+       }
+
+       if (mci_csta & S3C2410_SDICMDSTAT_CMDSENT) {
+               if (host->complete_what == COMPLETION_CMDSENT) {
+                       host->status = "ok: command sent";
+                       goto close_transfer;
+               }
+
+               mci_cclear |= S3C2410_SDICMDSTAT_CMDSENT;
+       }
+
+       if (mci_csta & S3C2410_SDICMDSTAT_CRCFAIL) {
+               if (cmd->flags & MMC_RSP_CRC) {
+                       if (host->mrq->cmd->flags & MMC_RSP_136) {
+                               dbg(host, dbg_irq,
+                                   "fixup: ignore CRC fail with long rsp\n");
+                       } else {
+                               /* note, we used to fail the transfer
+                                * here, but it seems that this is just
+                                * the hardware getting it wrong.
+                                *
+                                * cmd->error = -EILSEQ;
+                                * host->status = "error: bad command crc";
+                                * goto fail_transfer;
+                               */
+                       }
+               }
+
+               mci_cclear |= S3C2410_SDICMDSTAT_CRCFAIL;
+       }
+
+       if (mci_csta & S3C2410_SDICMDSTAT_RSPFIN) {
+               if (host->complete_what == COMPLETION_RSPFIN) {
+                       host->status = "ok: command response received";
+                       goto close_transfer;
+               }
+
+               if (host->complete_what == COMPLETION_XFERFINISH_RSPFIN)
+                       host->complete_what = COMPLETION_XFERFINISH;
+
+               mci_cclear |= S3C2410_SDICMDSTAT_RSPFIN;
+       }
+
+       /* errors handled after this point are only relevant
+          when a data transfer is in progress */
+
+       if (!cmd->data)
+               goto clear_status_bits;
+
+       /* Check for FIFO failure */
+       if (host->is2440) {
+               if (mci_fsta & S3C2440_SDIFSTA_FIFOFAIL) {
+                       dbg(host, dbg_err, "FIFO failure\n");
+                       host->mrq->data->error = -EILSEQ;
+                       host->status = "error: 2440 fifo failure";
+                       goto fail_transfer;
+               }
+       } else {
+               if (mci_dsta & S3C2410_SDIDSTA_FIFOFAIL) {
+                       dbg(host, dbg_err, "FIFO failure\n");
+                       cmd->data->error = -EILSEQ;
+                       host->status = "error:  fifo failure";
+                       goto fail_transfer;
+               }
+       }
+
+       if (mci_dsta & S3C2410_SDIDSTA_RXCRCFAIL) {
+               dbg(host, dbg_err, "bad data crc (outgoing)\n");
+               cmd->data->error = -EILSEQ;
+               host->status = "error: bad data crc (outgoing)";
+               goto fail_transfer;
+       }
+
+       if (mci_dsta & S3C2410_SDIDSTA_CRCFAIL) {
+               dbg(host, dbg_err, "bad data crc (incoming)\n");
+               cmd->data->error = -EILSEQ;
+               host->status = "error: bad data crc (incoming)";
+               goto fail_transfer;
+       }
+
+       if (mci_dsta & S3C2410_SDIDSTA_DATATIMEOUT) {
+               dbg(host, dbg_err, "data timeout\n");
+               cmd->data->error = -ETIMEDOUT;
+               host->status = "error: data timeout";
+               goto fail_transfer;
+       }
+
+       if (mci_dsta & S3C2410_SDIDSTA_XFERFINISH) {
+               if (host->complete_what == COMPLETION_XFERFINISH) {
+                       host->status = "ok: data transfer completed";
+                       goto close_transfer;
+               }
+
+               if (host->complete_what == COMPLETION_XFERFINISH_RSPFIN)
+                       host->complete_what = COMPLETION_RSPFIN;
+
+               mci_dclear |= S3C2410_SDIDSTA_XFERFINISH;
+       }
+
+clear_status_bits:
+       writel(mci_cclear, host->base + S3C2410_SDICMDSTAT);
+       writel(mci_dclear, host->base + S3C2410_SDIDSTA);
+
+       goto irq_out;
+
+fail_transfer:
+       host->pio_active = XFER_NONE;
+
+close_transfer:
+       host->complete_what = COMPLETION_FINALIZE;
+
+       clear_imask(host);
+       tasklet_schedule(&host->pio_tasklet);
+
+       goto irq_out;
+
+irq_out:
+       dbg(host, dbg_irq,
+           "csta:0x%08x dsta:0x%08x fsta:0x%08x dcnt:0x%08x status:%s.\n",
+           mci_csta, mci_dsta, mci_fsta, mci_dcnt, host->status);
+
+       spin_unlock_irqrestore(&host->complete_lock, iflags);
+       return IRQ_HANDLED;
+
+}
+
+/*
+ * ISR for the CardDetect Pin
+*/
+
+static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id)
+{
+       struct s3cmci_host *host = (struct s3cmci_host *)dev_id;
+
+       dbg(host, dbg_irq, "card detect\n");
+
+       mmc_detect_change(host->mmc, msecs_to_jiffies(500));
+
+       return IRQ_HANDLED;
+}
+
+void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch, void *buf_id,
+                             int size, enum s3c2410_dma_buffresult result)
+{
+       struct s3cmci_host *host = buf_id;
+       unsigned long iflags;
+       u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt;
+
+       mci_csta = readl(host->base + S3C2410_SDICMDSTAT);
+       mci_dsta = readl(host->base + S3C2410_SDIDSTA);
+       mci_fsta = readl(host->base + S3C2410_SDIFSTA);
+       mci_dcnt = readl(host->base + S3C2410_SDIDCNT);
+
+       BUG_ON(!host->mrq);
+       BUG_ON(!host->mrq->data);
+       BUG_ON(!host->dmatogo);
+
+       spin_lock_irqsave(&host->complete_lock, iflags);
+
+       if (result != S3C2410_RES_OK) {
+               dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x "
+                       "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n",
+                       mci_csta, mci_dsta, mci_fsta,
+                       mci_dcnt, result, host->dmatogo);
+
+               goto fail_request;
+       }
+
+       host->dmatogo--;
+       if (host->dmatogo) {
+               dbg(host, dbg_dma, "DMA DONE  Size:%i DSTA:[%08x] "
+                       "DCNT:[%08x] toGo:%u\n",
+                       size, mci_dsta, mci_dcnt, host->dmatogo);
+
+               goto out;
+       }
+
+       dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n",
+               size, mci_dsta, mci_dcnt);
+
+       host->complete_what = COMPLETION_FINALIZE;
+
+out:
+       tasklet_schedule(&host->pio_tasklet);
+       spin_unlock_irqrestore(&host->complete_lock, iflags);
+       return;
+
+fail_request:
+       host->mrq->data->error = -EINVAL;
+       host->complete_what = COMPLETION_FINALIZE;
+       writel(0, host->base + host->sdiimsk);
+       goto out;
+
+}
+
+static void finalize_request(struct s3cmci_host *host)
+{
+       struct mmc_request *mrq = host->mrq;
+       struct mmc_command *cmd = host->cmd_is_stop ? mrq->stop : mrq->cmd;
+       int debug_as_failure = 0;
+
+       if (host->complete_what != COMPLETION_FINALIZE)
+               return;
+
+       if (!mrq)
+               return;
+
+       if (cmd->data && (cmd->error == 0) &&
+           (cmd->data->error == 0)) {
+               if (host->dodma && (!host->dma_complete)) {
+                       dbg(host, dbg_dma, "DMA Missing!\n");
+                       return;
+               }
+       }
+
+       /* Read response from controller. */
+       cmd->resp[0] = readl(host->base + S3C2410_SDIRSP0);
+       cmd->resp[1] = readl(host->base + S3C2410_SDIRSP1);
+       cmd->resp[2] = readl(host->base + S3C2410_SDIRSP2);
+       cmd->resp[3] = readl(host->base + S3C2410_SDIRSP3);
+
+       writel(host->prescaler, host->base + S3C2410_SDIPRE);
+
+       if (cmd->error)
+               debug_as_failure = 1;
+
+       if (cmd->data && cmd->data->error)
+               debug_as_failure = 1;
+
+       dbg_dumpcmd(host, cmd, debug_as_failure);
+
+       /* Cleanup controller */
+       writel(0, host->base + S3C2410_SDICMDARG);
+       writel(S3C2410_SDIDCON_STOP, host->base + S3C2410_SDIDCON);
+       writel(0, host->base + S3C2410_SDICMDCON);
+       writel(0, host->base + host->sdiimsk);
+
+       if (cmd->data && cmd->error)
+               cmd->data->error = cmd->error;
+
+       if (cmd->data && cmd->data->stop && (!host->cmd_is_stop)) {
+               host->cmd_is_stop = 1;
+               s3cmci_send_request(host->mmc);
+               return;
+       }
+
+       /* If we have no data transfer we are finished here */
+       if (!mrq->data)
+               goto request_done;
+
+       /* Calulate the amout of bytes transfer if there was no error */
+       if (mrq->data->error == 0) {
+               mrq->data->bytes_xfered =
+                       (mrq->data->blocks * mrq->data->blksz);
+       } else {
+               mrq->data->bytes_xfered = 0;
+       }
+
+       /* If we had an error while transfering data we flush the
+        * DMA channel and the fifo to clear out any garbage. */
+       if (mrq->data->error != 0) {
+               if (host->dodma)
+                       s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
+
+               if (host->is2440) {
+                       /* Clear failure register and reset fifo. */
+                       writel(S3C2440_SDIFSTA_FIFORESET |
+                              S3C2440_SDIFSTA_FIFOFAIL,
+                              host->base + S3C2410_SDIFSTA);
+               } else {
+                       u32 mci_con;
+
+                       /* reset fifo */
+                       mci_con = readl(host->base + S3C2410_SDICON);
+                       mci_con |= S3C2410_SDICON_FIFORESET;
+
+                       writel(mci_con, host->base + S3C2410_SDICON);
+               }
+       }
+
+request_done:
+       host->complete_what = COMPLETION_NONE;
+       host->mrq = NULL;
+       mmc_request_done(host->mmc, mrq);
+}
+
+
+void s3cmci_dma_setup(struct s3cmci_host *host, enum s3c2410_dmasrc source)
+{
+       static enum s3c2410_dmasrc last_source = -1;
+       static int setup_ok;
+
+       if (last_source == source)
+               return;
+
+       last_source = source;
+
+       s3c2410_dma_devconfig(host->dma, source, 3,
+                             host->mem->start + host->sdidata);
+
+       if (!setup_ok) {
+               s3c2410_dma_config(host->dma, 4,
+                       (S3C2410_DCON_HWTRIG | S3C2410_DCON_CH0_SDI));
+               s3c2410_dma_set_buffdone_fn(host->dma,
+                                           s3cmci_dma_done_callback);
+               s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART);
+               setup_ok = 1;
+       }
+}
+
+static void s3cmci_send_command(struct s3cmci_host *host,
+                                       struct mmc_command *cmd)
+{
+       u32 ccon, imsk;
+
+       imsk  = S3C2410_SDIIMSK_CRCSTATUS | S3C2410_SDIIMSK_CMDTIMEOUT |
+               S3C2410_SDIIMSK_RESPONSEND | S3C2410_SDIIMSK_CMDSENT |
+               S3C2410_SDIIMSK_RESPONSECRC;
+
+       enable_imask(host, imsk);
+
+       if (cmd->data)
+               host->complete_what = COMPLETION_XFERFINISH_RSPFIN;
+       else if (cmd->flags & MMC_RSP_PRESENT)
+               host->complete_what = COMPLETION_RSPFIN;
+       else
+               host->complete_what = COMPLETION_CMDSENT;
+
+       writel(cmd->arg, host->base + S3C2410_SDICMDARG);
+
+       ccon  = cmd->opcode & S3C2410_SDICMDCON_INDEX;
+       ccon |= S3C2410_SDICMDCON_SENDERHOST | S3C2410_SDICMDCON_CMDSTART;
+
+       if (cmd->flags & MMC_RSP_PRESENT)
+               ccon |= S3C2410_SDICMDCON_WAITRSP;
+
+       if (cmd->flags & MMC_RSP_136)
+               ccon |= S3C2410_SDICMDCON_LONGRSP;
+
+       writel(ccon, host->base + S3C2410_SDICMDCON);
+}
+
+static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data)
+{
+       u32 dcon, imsk, stoptries = 3;
+
+       /* write DCON register */
+
+       if (!data) {
+               writel(0, host->base + S3C2410_SDIDCON);
+               return 0;
+       }
+
+       if ((data->blksz & 3) != 0) {
+               /* We cannot deal with unaligned blocks with more than
+                * one block being transfered. */
+
+               if (data->blocks > 1)
+                       return -EINVAL;
+
+               /* No support yet for non-word block transfers. */
+               return -EINVAL;
+       }
+
+       while (readl(host->base + S3C2410_SDIDSTA) &
+              (S3C2410_SDIDSTA_TXDATAON | S3C2410_SDIDSTA_RXDATAON)) {
+
+               dbg(host, dbg_err,
+                   "mci_setup_data() transfer stillin progress.\n");
+
+               writel(S3C2410_SDIDCON_STOP, host->base + S3C2410_SDIDCON);
+               s3cmci_reset(host);
+
+               if ((stoptries--) == 0) {
+                       dbg_dumpregs(host, "DRF");
+                       return -EINVAL;
+               }
+       }
+
+       dcon  = data->blocks & S3C2410_SDIDCON_BLKNUM_MASK;
+
+       if (host->dodma)
+               dcon |= S3C2410_SDIDCON_DMAEN;
+
+       if (host->bus_width == MMC_BUS_WIDTH_4)
+               dcon |= S3C2410_SDIDCON_WIDEBUS;
+
+       if (!(data->flags & MMC_DATA_STREAM))
+               dcon |= S3C2410_SDIDCON_BLOCKMODE;
+
+       if (data->flags & MMC_DATA_WRITE) {
+               dcon |= S3C2410_SDIDCON_TXAFTERRESP;
+               dcon |= S3C2410_SDIDCON_XFER_TXSTART;
+       }
+
+       if (data->flags & MMC_DATA_READ) {
+               dcon |= S3C2410_SDIDCON_RXAFTERCMD;
+               dcon |= S3C2410_SDIDCON_XFER_RXSTART;
+       }
+
+       if (host->is2440) {
+               dcon |= S3C2440_SDIDCON_DS_WORD;
+               dcon |= S3C2440_SDIDCON_DATSTART;
+       }
+
+       writel(dcon, host->base + S3C2410_SDIDCON);
+
+       /* write BSIZE register */
+
+       writel(data->blksz, host->base + S3C2410_SDIBSIZE);
+
+       /* add to IMASK register */
+       imsk = S3C2410_SDIIMSK_FIFOFAIL | S3C2410_SDIIMSK_DATACRC |
+              S3C2410_SDIIMSK_DATATIMEOUT | S3C2410_SDIIMSK_DATAFINISH;
+
+       enable_imask(host, imsk);
+
+       /* write TIMER register */
+
+       if (host->is2440) {
+               writel(0x007FFFFF, host->base + S3C2410_SDITIMER);
+       } else {
+               writel(0x0000FFFF, host->base + S3C2410_SDITIMER);
+
+               /* FIX: set slow clock to prevent timeouts on read */
+               if (data->flags & MMC_DATA_READ)
+                       writel(0xFF, host->base + S3C2410_SDIPRE);
+       }
+
+       return 0;
+}
+
+#define BOTH_DIR (MMC_DATA_WRITE | MMC_DATA_READ)
+
+static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
+{
+       int rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0;
+
+       BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
+
+       host->pio_sgptr = 0;
+       host->pio_words = 0;
+       host->pio_count = 0;
+       host->pio_active = rw ? XFER_WRITE : XFER_READ;
+
+       if (rw) {
+               do_pio_write(host);
+               enable_imask(host, S3C2410_SDIIMSK_TXFIFOHALF);
+       } else {
+               enable_imask(host, S3C2410_SDIIMSK_RXFIFOHALF
+                            | S3C2410_SDIIMSK_RXFIFOLAST);
+       }
+
+       return 0;
+}
+
+static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
+{
+       int dma_len, i;
+       int rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0;
+
+       BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
+
+       s3cmci_dma_setup(host, rw ? S3C2410_DMASRC_MEM : S3C2410_DMASRC_HW);
+       s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
+
+       dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+                            (rw) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+       if (dma_len == 0)
+               return -ENOMEM;
+
+       host->dma_complete = 0;
+       host->dmatogo = dma_len;
+
+       for (i = 0; i < dma_len; i++) {
+               int res;
+
+               dbg(host, dbg_dma, "enqueue %i:%u@%u\n", i,
+                       sg_dma_address(&data->sg[i]),
+                       sg_dma_len(&data->sg[i]));
+
+               res = s3c2410_dma_enqueue(host->dma, (void *) host,
+                                         sg_dma_address(&data->sg[i]),
+                                         sg_dma_len(&data->sg[i]));
+
+               if (res) {
+                       s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
+                       return -EBUSY;
+               }
+       }
+
+       s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START);
+
+       return 0;
+}
+
+static void s3cmci_send_request(struct mmc_host *mmc)
+{
+       struct s3cmci_host *host = mmc_priv(mmc);
+       struct mmc_request *mrq = host->mrq;
+       struct mmc_command *cmd = host->cmd_is_stop ? mrq->stop : mrq->cmd;
+
+       host->ccnt++;
+       prepare_dbgmsg(host, cmd, host->cmd_is_stop);
+
+       /* Clear command, data and fifo status registers
+          Fifo clear only necessary on 2440, but doesn't hurt on 2410
+       */
+       writel(0xFFFFFFFF, host->base + S3C2410_SDICMDSTAT);
+       writel(0xFFFFFFFF, host->base + S3C2410_SDIDSTA);
+       writel(0xFFFFFFFF, host->base + S3C2410_SDIFSTA);
+
+       if (cmd->data) {
+               int res = s3cmci_setup_data(host, cmd->data);
+
+               host->dcnt++;
+
+               if (res) {
+                       dbg(host, dbg_err, "setup data error %d\n", res);
+                       cmd->error = res;
+                       cmd->data->error = res;
+
+                       mmc_request_done(mmc, mrq);
+                       return;
+               }
+
+               if (host->dodma)
+                       res = s3cmci_prepare_dma(host, cmd->data);
+               else
+                       res = s3cmci_prepare_pio(host, cmd->data);
+
+               if (res) {
+                       dbg(host, dbg_err, "data prepare error %d\n", res);
+                       cmd->error = res;
+                       cmd->data->error = res;
+
+                       mmc_request_done(mmc, mrq);
+                       return;
+               }
+       }
+
+       /* Send command */
+       s3cmci_send_command(host, cmd);
+
+       /* Enable Interrupt */
+       enable_irq(host->irq);
+}
+
+static int s3cmci_card_present(struct s3cmci_host *host)
+{
+       struct s3c24xx_mci_pdata *pdata = host->pdata;
+       int ret;
+
+       if (pdata->gpio_detect == 0)
+               return -ENOSYS;
+
+       ret = s3c2410_gpio_getpin(pdata->gpio_detect) ? 0 : 1;
+       return ret ^ pdata->detect_invert;
+}
+
+static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+       struct s3cmci_host *host = mmc_priv(mmc);
+
+       host->status = "mmc request";
+       host->cmd_is_stop = 0;
+       host->mrq = mrq;
+
+       if (s3cmci_card_present(host) == 0) {
+               dbg(host, dbg_err, "%s: no medium present\n", __func__);
+               host->mrq->cmd->error = -ENOMEDIUM;
+               mmc_request_done(mmc, mrq);
+       } else
+               s3cmci_send_request(mmc);
+}
+
+static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+       struct s3cmci_host *host = mmc_priv(mmc);
+       u32 mci_psc, mci_con;
+
+       /* Set the power state */
+
+       mci_con = readl(host->base + S3C2410_SDICON);
+
+       switch (ios->power_mode) {
+       case MMC_POWER_ON:
+       case MMC_POWER_UP:
+               s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_SDCLK);
+               s3c2410_gpio_cfgpin(S3C2410_GPE6, S3C2410_GPE6_SDCMD);
+               s3c2410_gpio_cfgpin(S3C2410_GPE7, S3C2410_GPE7_SDDAT0);
+               s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1);
+               s3c2410_gpio_cfgpin(S3C2410_GPE9, S3C2410_GPE9_SDDAT2);
+               s3c2410_gpio_cfgpin(S3C2410_GPE10, S3C2410_GPE10_SDDAT3);
+
+               if (host->pdata->set_power)
+                       host->pdata->set_power(ios->power_mode, ios->vdd);
+
+               if (!host->is2440)
+                       mci_con |= S3C2410_SDICON_FIFORESET;
+
+               break;
+
+       case MMC_POWER_OFF:
+       default:
+               s3c2410_gpio_setpin(S3C2410_GPE5, 0);
+               s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_OUTP);
+
+               if (host->is2440)
+                       mci_con |= S3C2440_SDICON_SDRESET;
+
+               if (host->pdata->set_power)
+                       host->pdata->set_power(ios->power_mode, ios->vdd);
+
+               break;
+       }
+
+       /* Set clock */
+       for (mci_psc = 0; mci_psc < 255; mci_psc++) {
+               host->real_rate = host->clk_rate / (host->clk_div*(mci_psc+1));
+
+               if (host->real_rate <= ios->clock)
+                       break;
+       }
+
+       if (mci_psc > 255)
+               mci_psc = 255;
+
+       host->prescaler = mci_psc;
+       writel(host->prescaler, host->base + S3C2410_SDIPRE);
+
+       /* If requested clock is 0, real_rate will be 0, too */
+       if (ios->clock == 0)
+               host->real_rate = 0;
+
+       /* Set CLOCK_ENABLE */
+       if (ios->clock)
+               mci_con |= S3C2410_SDICON_CLOCKTYPE;
+       else
+               mci_con &= ~S3C2410_SDICON_CLOCKTYPE;
+
+       writel(mci_con, host->base + S3C2410_SDICON);
+
+       if ((ios->power_mode == MMC_POWER_ON) ||
+           (ios->power_mode == MMC_POWER_UP)) {
+               dbg(host, dbg_conf, "running at %lukHz (requested: %ukHz).\n",
+                       host->real_rate/1000, ios->clock/1000);
+       } else {
+               dbg(host, dbg_conf, "powered down.\n");
+       }
+
+       host->bus_width = ios->bus_width;
+}
+
+static void s3cmci_reset(struct s3cmci_host *host)
+{
+       u32 con = readl(host->base + S3C2410_SDICON);
+
+       con |= S3C2440_SDICON_SDRESET;
+       writel(con, host->base + S3C2410_SDICON);
+}
+
+static int s3cmci_get_ro(struct mmc_host *mmc)
+{
+       struct s3cmci_host *host = mmc_priv(mmc);
+       struct s3c24xx_mci_pdata *pdata = host->pdata;
+       int ret;
+
+       if (pdata->gpio_wprotect == 0)
+               return 0;
+
+       ret = s3c2410_gpio_getpin(pdata->gpio_wprotect);
+
+       if (pdata->wprotect_invert)
+               ret = !ret;
+
+       return ret;
+}
+
+static struct mmc_host_ops s3cmci_ops = {
+       .request        = s3cmci_request,
+       .set_ios        = s3cmci_set_ios,
+       .get_ro         = s3cmci_get_ro,
+};
+
+static struct s3c24xx_mci_pdata s3cmci_def_pdata = {
+       /* This is currently here to avoid a number of if (host->pdata)
+        * checks. Any zero fields to ensure reaonable defaults are picked. */
+};
+
+static int __devinit s3cmci_probe(struct platform_device *pdev, int is2440)
+{
+       struct s3cmci_host *host;
+       struct mmc_host *mmc;
+       int ret;
+
+       mmc = mmc_alloc_host(sizeof(struct s3cmci_host), &pdev->dev);
+       if (!mmc) {
+               ret = -ENOMEM;
+               goto probe_out;
+       }
+
+       host = mmc_priv(mmc);
+       host->mmc       = mmc;
+       host->pdev      = pdev;
+       host->is2440    = is2440;
+
+       host->pdata = pdev->dev.platform_data;
+       if (!host->pdata) {
+               pdev->dev.platform_data = &s3cmci_def_pdata;
+               host->pdata = &s3cmci_def_pdata;
+       }
+
+       spin_lock_init(&host->complete_lock);
+       tasklet_init(&host->pio_tasklet, pio_tasklet, (unsigned long) host);
+
+       if (is2440) {
+               host->sdiimsk   = S3C2440_SDIIMSK;
+               host->sdidata   = S3C2440_SDIDATA;
+               host->clk_div   = 1;
+       } else {
+               host->sdiimsk   = S3C2410_SDIIMSK;
+               host->sdidata   = S3C2410_SDIDATA;
+               host->clk_div   = 2;
+       }
+
+       host->dodma             = 0;
+       host->complete_what     = COMPLETION_NONE;
+       host->pio_active        = XFER_NONE;
+
+       host->dma               = S3CMCI_DMA;
+
+       host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!host->mem) {
+               dev_err(&pdev->dev,
+                       "failed to get io memory region resouce.\n");
+
+               ret = -ENOENT;
+               goto probe_free_host;
+       }
+
+       host->mem = request_mem_region(host->mem->start,
+                                      RESSIZE(host->mem), pdev->name);
+
+       if (!host->mem) {
+               dev_err(&pdev->dev, "failed to request io memory region.\n");
+               ret = -ENOENT;
+               goto probe_free_host;
+       }
+
+       host->base = ioremap(host->mem->start, RESSIZE(host->mem));
+       if (host->base == 0) {
+               dev_err(&pdev->dev, "failed to ioremap() io memory region.\n");
+               ret = -EINVAL;
+               goto probe_free_mem_region;
+       }
+
+       host->irq = platform_get_irq(pdev, 0);
+       if (host->irq == 0) {
+               dev_err(&pdev->dev, "failed to get interrupt resouce.\n");
+               ret = -EINVAL;
+               goto probe_iounmap;
+       }
+
+       if (request_irq(host->irq, s3cmci_irq, 0, DRIVER_NAME, host)) {
+               dev_err(&pdev->dev, "failed to request mci interrupt.\n");
+               ret = -ENOENT;
+               goto probe_iounmap;
+       }
+
+       /* We get spurious interrupts even when we have set the IMSK
+        * register to ignore everything, so use disable_irq() to make
+        * ensure we don't lock the system with un-serviceable requests. */
+
+       disable_irq(host->irq);
+
+       host->irq_cd = s3c2410_gpio_getirq(host->pdata->gpio_detect);
+
+       if (host->irq_cd >= 0) {
+               if (request_irq(host->irq_cd, s3cmci_irq_cd,
+                               IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+                               DRIVER_NAME, host)) {
+                       dev_err(&pdev->dev, "can't get card detect irq.\n");
+                       ret = -ENOENT;
+                       goto probe_free_irq;
+               }
+       } else {
+               dev_warn(&pdev->dev, "host detect has no irq available\n");
+               s3c2410_gpio_cfgpin(host->pdata->gpio_detect,
+                                   S3C2410_GPIO_INPUT);
+       }
+
+       if (host->pdata->gpio_wprotect)
+               s3c2410_gpio_cfgpin(host->pdata->gpio_wprotect,
+                                   S3C2410_GPIO_INPUT);
+
+       if (s3c2410_dma_request(S3CMCI_DMA, &s3cmci_dma_client, NULL) < 0) {
+               dev_err(&pdev->dev, "unable to get DMA channel.\n");
+               ret = -EBUSY;
+               goto probe_free_irq_cd;
+       }
+
+       host->clk = clk_get(&pdev->dev, "sdi");
+       if (IS_ERR(host->clk)) {
+               dev_err(&pdev->dev, "failed to find clock source.\n");
+               ret = PTR_ERR(host->clk);
+               host->clk = NULL;
+               goto probe_free_host;
+       }
+
+       ret = clk_enable(host->clk);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to enable clock source.\n");
+               goto clk_free;
+       }
+
+       host->clk_rate = clk_get_rate(host->clk);
+
+       mmc->ops        = &s3cmci_ops;
+       mmc->ocr_avail  = MMC_VDD_32_33 | MMC_VDD_33_34;
+       mmc->caps       = MMC_CAP_4_BIT_DATA;
+       mmc->f_min      = host->clk_rate / (host->clk_div * 256);
+       mmc->f_max      = host->clk_rate / host->clk_div;
+
+       if (host->pdata->ocr_avail)
+               mmc->ocr_avail = host->pdata->ocr_avail;
+
+       mmc->max_blk_count      = 4095;
+       mmc->max_blk_size       = 4095;
+       mmc->max_req_size       = 4095 * 512;
+       mmc->max_seg_size       = mmc->max_req_size;
+
+       mmc->max_phys_segs      = 128;
+       mmc->max_hw_segs        = 128;
+
+       dbg(host, dbg_debug,
+           "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
+           (host->is2440?"2440":""),
+           host->base, host->irq, host->irq_cd, host->dma);
+
+       ret = mmc_add_host(mmc);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to add mmc host.\n");
+               goto free_dmabuf;
+       }
+
+       platform_set_drvdata(pdev, mmc);
+       dev_info(&pdev->dev, "initialisation done.\n");
+
+       return 0;
+
+ free_dmabuf:
+       clk_disable(host->clk);
+
+ clk_free:
+       clk_put(host->clk);
+
+ probe_free_irq_cd:
+       if (host->irq_cd >= 0)
+               free_irq(host->irq_cd, host);
+
+ probe_free_irq:
+       free_irq(host->irq, host);
+
+ probe_iounmap:
+       iounmap(host->base);
+
+ probe_free_mem_region:
+       release_mem_region(host->mem->start, RESSIZE(host->mem));
+
+ probe_free_host:
+       mmc_free_host(mmc);
+ probe_out:
+       return ret;
+}
+
+static int __devexit s3cmci_remove(struct platform_device *pdev)
+{
+       struct mmc_host         *mmc  = platform_get_drvdata(pdev);
+       struct s3cmci_host      *host = mmc_priv(mmc);
+
+       mmc_remove_host(mmc);
+
+       clk_disable(host->clk);
+       clk_put(host->clk);
+
+       tasklet_disable(&host->pio_tasklet);
+       s3c2410_dma_free(S3CMCI_DMA, &s3cmci_dma_client);
+
+       if (host->irq_cd >= 0)
+               free_irq(host->irq_cd, host);
+       free_irq(host->irq, host);
+
+       iounmap(host->base);
+       release_mem_region(host->mem->start, RESSIZE(host->mem));
+
+       mmc_free_host(mmc);
+       return 0;
+}
+
+static int __devinit s3cmci_probe_2410(struct platform_device *dev)
+{
+       return s3cmci_probe(dev, 0);
+}
+
+static int __devinit s3cmci_probe_2412(struct platform_device *dev)
+{
+       return s3cmci_probe(dev, 1);
+}
+
+static int __devinit s3cmci_probe_2440(struct platform_device *dev)
+{
+       return s3cmci_probe(dev, 1);
+}
+
+#ifdef CONFIG_PM
+
+static int s3cmci_suspend(struct platform_device *dev, pm_message_t state)
+{
+       struct mmc_host *mmc = platform_get_drvdata(dev);
+
+       return  mmc_suspend_host(mmc, state);
+}
+
+static int s3cmci_resume(struct platform_device *dev)
+{
+       struct mmc_host *mmc = platform_get_drvdata(dev);
+
+       return mmc_resume_host(mmc);
+}
+
+#else /* CONFIG_PM */
+#define s3cmci_suspend NULL
+#define s3cmci_resume NULL
+#endif /* CONFIG_PM */
+
+
+static struct platform_driver s3cmci_driver_2410 = {
+       .driver.name    = "s3c2410-sdi",
+       .driver.owner   = THIS_MODULE,
+       .probe          = s3cmci_probe_2410,
+       .remove         = __devexit_p(s3cmci_remove),
+       .suspend        = s3cmci_suspend,
+       .resume         = s3cmci_resume,
+};
+
+static struct platform_driver s3cmci_driver_2412 = {
+       .driver.name    = "s3c2412-sdi",
+       .driver.owner   = THIS_MODULE,
+       .probe          = s3cmci_probe_2412,
+       .remove         = __devexit_p(s3cmci_remove),
+       .suspend        = s3cmci_suspend,
+       .resume         = s3cmci_resume,
+};
+
+static struct platform_driver s3cmci_driver_2440 = {
+       .driver.name    = "s3c2440-sdi",
+       .driver.owner   = THIS_MODULE,
+       .probe          = s3cmci_probe_2440,
+       .remove         = __devexit_p(s3cmci_remove),
+       .suspend        = s3cmci_suspend,
+       .resume         = s3cmci_resume,
+};
+
+
+static int __init s3cmci_init(void)
+{
+       platform_driver_register(&s3cmci_driver_2410);
+       platform_driver_register(&s3cmci_driver_2412);
+       platform_driver_register(&s3cmci_driver_2440);
+       return 0;
+}
+
+static void __exit s3cmci_exit(void)
+{
+       platform_driver_unregister(&s3cmci_driver_2410);
+       platform_driver_unregister(&s3cmci_driver_2412);
+       platform_driver_unregister(&s3cmci_driver_2440);
+}
+
+module_init(s3cmci_init);
+module_exit(s3cmci_exit);
+
+MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Thomas Kleffel <tk@maintech.de>");
+MODULE_ALIAS("platform:s3c2410-sdi");
+MODULE_ALIAS("platform:s3c2412-sdi");
+MODULE_ALIAS("platform:s3c2440-sdi");
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h

new file mode 100644 (file)

index 0000000..37d9c60
--- /dev/null
+++ b/drivers/mmc/host/s3cmci.h
@@ -0,0 +1,70 @@
+/*
+ *  linux/drivers/mmc/s3cmci.h - Samsung S3C MCI driver
+ *
+ *  Copyright (C) 2004-2006 Thomas Kleffel, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* FIXME: DMA Resource management ?! */
+#define S3CMCI_DMA 0
+
+enum s3cmci_waitfor {
+       COMPLETION_NONE,
+       COMPLETION_FINALIZE,
+       COMPLETION_CMDSENT,
+       COMPLETION_RSPFIN,
+       COMPLETION_XFERFINISH,
+       COMPLETION_XFERFINISH_RSPFIN,
+};
+
+struct s3cmci_host {
+       struct platform_device  *pdev;
+       struct s3c24xx_mci_pdata *pdata;
+       struct mmc_host         *mmc;
+       struct resource         *mem;
+       struct clk              *clk;
+       void __iomem            *base;
+       int                     irq;
+       int                     irq_cd;
+       int                     dma;
+
+       unsigned long           clk_rate;
+       unsigned long           clk_div;
+       unsigned long           real_rate;
+       u8                      prescaler;
+
+       int                     is2440;
+       unsigned                sdiimsk;
+       unsigned                sdidata;
+       int                     dodma;
+       int                     dmatogo;
+
+       struct mmc_request      *mrq;
+       int                     cmd_is_stop;
+
+       spinlock_t              complete_lock;
+       enum s3cmci_waitfor     complete_what;
+
+       int                     dma_complete;
+
+       u32                     pio_sgptr;
+       u32                     pio_words;
+       u32                     pio_count;
+       u32                     *pio_ptr;
+#define XFER_NONE 0
+#define XFER_READ 1
+#define XFER_WRITE 2
+       u32                     pio_active;
+
+       int                     bus_width;
+
+       char                    dbgmsg_cmd[301];
+       char                    dbgmsg_dat[301];
+       char                    *status;
+
+       unsigned int            ccnt, dcnt;
+       struct tasklet_struct   pio_tasklet;
+};
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c

new file mode 100644 (file)

index 0000000..deb607c
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -0,0 +1,732 @@
+/*  linux/drivers/mmc/host/sdhci-pci.c - SDHCI on PCI bus interface
+ *
+ *  Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * Thanks to the following companies for their support:
+ *
+ *     - JMicron (hardware and technical support)
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/mmc/host.h>
+
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+
+#include "sdhci.h"
+
+/*
+ * PCI registers
+ */
+
+#define PCI_SDHCI_IFPIO                        0x00
+#define PCI_SDHCI_IFDMA                        0x01
+#define PCI_SDHCI_IFVENDOR             0x02
+
+#define PCI_SLOT_INFO                  0x40    /* 8 bits */
+#define  PCI_SLOT_INFO_SLOTS(x)                ((x >> 4) & 7)
+#define  PCI_SLOT_INFO_FIRST_BAR_MASK  0x07
+
+#define MAX_SLOTS                      8
+
+struct sdhci_pci_chip;
+struct sdhci_pci_slot;
+
+struct sdhci_pci_fixes {
+       unsigned int            quirks;
+
+       int                     (*probe)(struct sdhci_pci_chip*);
+
+       int                     (*probe_slot)(struct sdhci_pci_slot*);
+       void                    (*remove_slot)(struct sdhci_pci_slot*, int);
+
+       int                     (*suspend)(struct sdhci_pci_chip*,
+                                       pm_message_t);
+       int                     (*resume)(struct sdhci_pci_chip*);
+};
+
+struct sdhci_pci_slot {
+       struct sdhci_pci_chip   *chip;
+       struct sdhci_host       *host;
+
+       int                     pci_bar;
+};
+
+struct sdhci_pci_chip {
+       struct pci_dev          *pdev;
+
+       unsigned int            quirks;
+       const struct sdhci_pci_fixes *fixes;
+
+       int                     num_slots;      /* Slots on controller */
+       struct sdhci_pci_slot   *slots[MAX_SLOTS]; /* Pointers to host slots */
+};
+
+
+/*****************************************************************************\
+ *                                                                           *
+ * Hardware specific quirk handling                                          *
+ *                                                                           *
+\*****************************************************************************/
+
+static int ricoh_probe(struct sdhci_pci_chip *chip)
+{
+       if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_IBM)
+               chip->quirks |= SDHCI_QUIRK_CLOCK_BEFORE_RESET;
+
+       if (chip->pdev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG)
+               chip->quirks |= SDHCI_QUIRK_NO_CARD_NO_RESET;
+
+       return 0;
+}
+
+static const struct sdhci_pci_fixes sdhci_ricoh = {
+       .probe          = ricoh_probe,
+       .quirks         = SDHCI_QUIRK_32BIT_DMA_ADDR,
+};
+
+static const struct sdhci_pci_fixes sdhci_ene_712 = {
+       .quirks         = SDHCI_QUIRK_SINGLE_POWER_WRITE |
+                         SDHCI_QUIRK_BROKEN_DMA,
+};
+
+static const struct sdhci_pci_fixes sdhci_ene_714 = {
+       .quirks         = SDHCI_QUIRK_SINGLE_POWER_WRITE |
+                         SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS |
+                         SDHCI_QUIRK_BROKEN_DMA,
+};
+
+static const struct sdhci_pci_fixes sdhci_cafe = {
+       .quirks         = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
+                         SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+};
+
+static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
+{
+       u8 scratch;
+       int ret;
+
+       ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch);
+       if (ret)
+               return ret;
+
+       /*
+        * Turn PMOS on [bit 0], set over current detection to 2.4 V
+        * [bit 1:2] and enable over current debouncing [bit 6].
+        */
+       if (on)
+               scratch |= 0x47;
+       else
+               scratch &= ~0x47;
+
+       ret = pci_write_config_byte(chip->pdev, 0xAE, scratch);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int jmicron_probe(struct sdhci_pci_chip *chip)
+{
+       int ret;
+
+       if (chip->pdev->revision == 0) {
+               chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
+                         SDHCI_QUIRK_32BIT_DMA_SIZE |
+                         SDHCI_QUIRK_32BIT_ADMA_SIZE |
+                         SDHCI_QUIRK_RESET_AFTER_REQUEST;
+       }
+
+       /*
+        * JMicron chips can have two interfaces to the same hardware
+        * in order to work around limitations in Microsoft's driver.
+        * We need to make sure we only bind to one of them.
+        *
+        * This code assumes two things:
+        *
+        * 1. The PCI code adds subfunctions in order.
+        *
+        * 2. The MMC interface has a lower subfunction number
+        *    than the SD interface.
+        */
+       if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) {
+               struct pci_dev *sd_dev;
+
+               sd_dev = NULL;
+               while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON,
+                       PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) {
+                       if ((PCI_SLOT(chip->pdev->devfn) ==
+                               PCI_SLOT(sd_dev->devfn)) &&
+                               (chip->pdev->bus == sd_dev->bus))
+                               break;
+               }
+
+               if (sd_dev) {
+                       pci_dev_put(sd_dev);
+                       dev_info(&chip->pdev->dev, "Refusing to bind to "
+                               "secondary interface.\n");
+                       return -ENODEV;
+               }
+       }
+
+       /*
+        * JMicron chips need a bit of a nudge to enable the power
+        * output pins.
+        */
+       ret = jmicron_pmos(chip, 1);
+       if (ret) {
+               dev_err(&chip->pdev->dev, "Failure enabling card power\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static void jmicron_enable_mmc(struct sdhci_host *host, int on)
+{
+       u8 scratch;
+
+       scratch = readb(host->ioaddr + 0xC0);
+
+       if (on)
+               scratch |= 0x01;
+       else
+               scratch &= ~0x01;
+
+       writeb(scratch, host->ioaddr + 0xC0);
+}
+
+static int jmicron_probe_slot(struct sdhci_pci_slot *slot)
+{
+       if (slot->chip->pdev->revision == 0) {
+               u16 version;
+
+               version = readl(slot->host->ioaddr + SDHCI_HOST_VERSION);
+               version = (version & SDHCI_VENDOR_VER_MASK) >>
+                       SDHCI_VENDOR_VER_SHIFT;
+
+               /*
+                * Older versions of the chip have lots of nasty glitches
+                * in the ADMA engine. It's best just to avoid it
+                * completely.
+                */
+               if (version < 0xAC)
+                       slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
+       }
+
+       /*
+        * The secondary interface requires a bit set to get the
+        * interrupts.
+        */
+       if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+               jmicron_enable_mmc(slot->host, 1);
+
+       return 0;
+}
+
+static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead)
+{
+       if (dead)
+               return;
+
+       if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+               jmicron_enable_mmc(slot->host, 0);
+}
+
+static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state)
+{
+       int i;
+
+       if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+               for (i = 0;i < chip->num_slots;i++)
+                       jmicron_enable_mmc(chip->slots[i]->host, 0);
+       }
+
+       return 0;
+}
+
+static int jmicron_resume(struct sdhci_pci_chip *chip)
+{
+       int ret, i;
+
+       if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+               for (i = 0;i < chip->num_slots;i++)
+                       jmicron_enable_mmc(chip->slots[i]->host, 1);
+       }
+
+       ret = jmicron_pmos(chip, 1);
+       if (ret) {
+               dev_err(&chip->pdev->dev, "Failure enabling card power\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static const struct sdhci_pci_fixes sdhci_jmicron = {
+       .probe          = jmicron_probe,
+
+       .probe_slot     = jmicron_probe_slot,
+       .remove_slot    = jmicron_remove_slot,
+
+       .suspend        = jmicron_suspend,
+       .resume         = jmicron_resume,
+};
+
+static const struct pci_device_id pci_ids[] __devinitdata = {
+       {
+               .vendor         = PCI_VENDOR_ID_RICOH,
+               .device         = PCI_DEVICE_ID_RICOH_R5C822,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_ricoh,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_ENE,
+               .device         = PCI_DEVICE_ID_ENE_CB712_SD,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_ene_712,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_ENE,
+               .device         = PCI_DEVICE_ID_ENE_CB712_SD_2,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_ene_712,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_ENE,
+               .device         = PCI_DEVICE_ID_ENE_CB714_SD,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_ene_714,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_ENE,
+               .device         = PCI_DEVICE_ID_ENE_CB714_SD_2,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_ene_714,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_MARVELL,
+               .device         = PCI_DEVICE_ID_MARVELL_CAFE_SD,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_cafe,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_JMICRON,
+               .device         = PCI_DEVICE_ID_JMICRON_JMB38X_SD,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_jmicron,
+       },
+
+       {
+               .vendor         = PCI_VENDOR_ID_JMICRON,
+               .device         = PCI_DEVICE_ID_JMICRON_JMB38X_MMC,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .driver_data    = (kernel_ulong_t)&sdhci_jmicron,
+       },
+
+       {       /* Generic SD host controller */
+               PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
+       },
+
+       { /* end: all zeroes */ },
+};
+
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+/*****************************************************************************\
+ *                                                                           *
+ * SDHCI core callbacks                                                      *
+ *                                                                           *
+\*****************************************************************************/
+
+static int sdhci_pci_enable_dma(struct sdhci_host *host)
+{
+       struct sdhci_pci_slot *slot;
+       struct pci_dev *pdev;
+       int ret;
+
+       slot = sdhci_priv(host);
+       pdev = slot->chip->pdev;
+
+       if (((pdev->class & 0xFFFF00) == (PCI_CLASS_SYSTEM_SDHCI << 8)) &&
+               ((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) &&
+               (host->flags & SDHCI_USE_DMA)) {
+               dev_warn(&pdev->dev, "Will use DMA mode even though HW "
+                       "doesn't fully claim to support it.\n");
+       }
+
+       ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+       if (ret)
+               return ret;
+
+       pci_set_master(pdev);
+
+       return 0;
+}
+
+static struct sdhci_ops sdhci_pci_ops = {
+       .enable_dma     = sdhci_pci_enable_dma,
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Suspend/resume                                                            *
+ *                                                                           *
+\*****************************************************************************/
+
+#ifdef CONFIG_PM
+
+static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
+{
+       struct sdhci_pci_chip *chip;
+       struct sdhci_pci_slot *slot;
+       int i, ret;
+
+       chip = pci_get_drvdata(pdev);
+       if (!chip)
+               return 0;
+
+       for (i = 0;i < chip->num_slots;i++) {
+               slot = chip->slots[i];
+               if (!slot)
+                       continue;
+
+               ret = sdhci_suspend_host(slot->host, state);
+
+               if (ret) {
+                       for (i--;i >= 0;i--)
+                               sdhci_resume_host(chip->slots[i]->host);
+                       return ret;
+               }
+       }
+
+       if (chip->fixes && chip->fixes->suspend) {
+               ret = chip->fixes->suspend(chip, state);
+               if (ret) {
+                       for (i = chip->num_slots - 1;i >= 0;i--)
+                               sdhci_resume_host(chip->slots[i]->host);
+                       return ret;
+               }
+       }
+
+       pci_save_state(pdev);
+       pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
+       pci_disable_device(pdev);
+       pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+       return 0;
+}
+
+static int sdhci_pci_resume (struct pci_dev *pdev)
+{
+       struct sdhci_pci_chip *chip;
+       struct sdhci_pci_slot *slot;
+       int i, ret;
+
+       chip = pci_get_drvdata(pdev);
+       if (!chip)
+               return 0;
+
+       pci_set_power_state(pdev, PCI_D0);
+       pci_restore_state(pdev);
+       ret = pci_enable_device(pdev);
+       if (ret)
+               return ret;
+
+       if (chip->fixes && chip->fixes->resume) {
+               ret = chip->fixes->resume(chip);
+               if (ret)
+                       return ret;
+       }
+
+       for (i = 0;i < chip->num_slots;i++) {
+               slot = chip->slots[i];
+               if (!slot)
+                       continue;
+
+               ret = sdhci_resume_host(slot->host);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+#else /* CONFIG_PM */
+
+#define sdhci_pci_suspend NULL
+#define sdhci_pci_resume NULL
+
+#endif /* CONFIG_PM */
+
+/*****************************************************************************\
+ *                                                                           *
+ * Device probing/removal                                                    *
+ *                                                                           *
+\*****************************************************************************/
+
+static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
+       struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar)
+{
+       struct sdhci_pci_slot *slot;
+       struct sdhci_host *host;
+
+       resource_size_t addr;
+
+       int ret;
+
+       if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+               dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
+               return ERR_PTR(-ENODEV);
+       }
+
+       if (pci_resource_len(pdev, bar) != 0x100) {
+               dev_err(&pdev->dev, "Invalid iomem size. You may "
+                       "experience problems.\n");
+       }
+
+       if ((pdev->class & 0x0000FF) == PCI_SDHCI_IFVENDOR) {
+               dev_err(&pdev->dev, "Vendor specific interface. Aborting.\n");
+               return ERR_PTR(-ENODEV);
+       }
+
+       if ((pdev->class & 0x0000FF) > PCI_SDHCI_IFVENDOR) {
+               dev_err(&pdev->dev, "Unknown interface. Aborting.\n");
+               return ERR_PTR(-ENODEV);
+       }
+
+       host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pci_slot));
+       if (IS_ERR(host)) {
+               ret = PTR_ERR(host);
+               goto unmap;
+       }
+
+       slot = sdhci_priv(host);
+
+       slot->chip = chip;
+       slot->host = host;
+       slot->pci_bar = bar;
+
+       host->hw_name = "PCI";
+       host->ops = &sdhci_pci_ops;
+       host->quirks = chip->quirks;
+
+       host->irq = pdev->irq;
+
+       ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc));
+       if (ret) {
+               dev_err(&pdev->dev, "cannot request region\n");
+               return ERR_PTR(ret);
+       }
+
+       addr = pci_resource_start(pdev, bar);
+       host->ioaddr = ioremap_nocache(addr, pci_resource_len(pdev, bar));
+       if (!host->ioaddr) {
+               dev_err(&pdev->dev, "failed to remap registers\n");
+               goto release;
+       }
+
+       if (chip->fixes && chip->fixes->probe_slot) {
+               ret = chip->fixes->probe_slot(slot);
+               if (ret)
+                       goto unmap;
+       }
+
+       ret = sdhci_add_host(host);
+       if (ret)
+               goto remove;
+
+       return slot;
+
+remove:
+       if (chip->fixes && chip->fixes->remove_slot)
+               chip->fixes->remove_slot(slot, 0);
+
+unmap:
+       iounmap(host->ioaddr);
+
+release:
+       pci_release_region(pdev, bar);
+       sdhci_free_host(host);
+
+       return ERR_PTR(ret);
+}
+
+static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot)
+{
+       int dead;
+       u32 scratch;
+
+       dead = 0;
+       scratch = readl(slot->host->ioaddr + SDHCI_INT_STATUS);
+       if (scratch == (u32)-1)
+               dead = 1;
+
+       sdhci_remove_host(slot->host, dead);
+
+       if (slot->chip->fixes && slot->chip->fixes->remove_slot)
+               slot->chip->fixes->remove_slot(slot, dead);
+
+       pci_release_region(slot->chip->pdev, slot->pci_bar);
+
+       sdhci_free_host(slot->host);
+}
+
+static int __devinit sdhci_pci_probe(struct pci_dev *pdev,
+                                    const struct pci_device_id *ent)
+{
+       struct sdhci_pci_chip *chip;
+       struct sdhci_pci_slot *slot;
+
+       u8 slots, rev, first_bar;
+       int ret, i;
+
+       BUG_ON(pdev == NULL);
+       BUG_ON(ent == NULL);
+
+       pci_read_config_byte(pdev, PCI_CLASS_REVISION, &rev);
+
+       dev_info(&pdev->dev, "SDHCI controller found [%04x:%04x] (rev %x)\n",
+                (int)pdev->vendor, (int)pdev->device, (int)rev);
+
+       ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots);
+       if (ret)
+               return ret;
+
+       slots = PCI_SLOT_INFO_SLOTS(slots) + 1;
+       dev_dbg(&pdev->dev, "found %d slot(s)\n", slots);
+       if (slots == 0)
+               return -ENODEV;
+
+       BUG_ON(slots > MAX_SLOTS);
+
+       ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar);
+       if (ret)
+               return ret;
+
+       first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK;
+
+       if (first_bar > 5) {
+               dev_err(&pdev->dev, "Invalid first BAR. Aborting.\n");
+               return -ENODEV;
+       }
+
+       ret = pci_enable_device(pdev);
+       if (ret)
+               return ret;
+
+       chip = kzalloc(sizeof(struct sdhci_pci_chip), GFP_KERNEL);
+       if (!chip) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       chip->pdev = pdev;
+       chip->fixes = (const struct sdhci_pci_fixes*)ent->driver_data;
+       if (chip->fixes)
+               chip->quirks = chip->fixes->quirks;
+       chip->num_slots = slots;
+
+       pci_set_drvdata(pdev, chip);
+
+       if (chip->fixes && chip->fixes->probe) {
+               ret = chip->fixes->probe(chip);
+               if (ret)
+                       goto free;
+       }
+
+       for (i = 0;i < slots;i++) {
+               slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i);
+               if (IS_ERR(slot)) {
+                       for (i--;i >= 0;i--)
+                               sdhci_pci_remove_slot(chip->slots[i]);
+                       ret = PTR_ERR(slot);
+                       goto free;
+               }
+
+               chip->slots[i] = slot;
+       }
+
+       return 0;
+
+free:
+       pci_set_drvdata(pdev, NULL);
+       kfree(chip);
+
+err:
+       pci_disable_device(pdev);
+       return ret;
+}
+
+static void __devexit sdhci_pci_remove(struct pci_dev *pdev)
+{
+       int i;
+       struct sdhci_pci_chip *chip;
+
+       chip = pci_get_drvdata(pdev);
+
+       if (chip) {
+               for (i = 0;i < chip->num_slots; i++)
+                       sdhci_pci_remove_slot(chip->slots[i]);
+
+               pci_set_drvdata(pdev, NULL);
+               kfree(chip);
+       }
+
+       pci_disable_device(pdev);
+}
+
+static struct pci_driver sdhci_driver = {
+       .name =         "sdhci-pci",
+       .id_table =     pci_ids,
+       .probe =        sdhci_pci_probe,
+       .remove =       __devexit_p(sdhci_pci_remove),
+       .suspend =      sdhci_pci_suspend,
+       .resume =       sdhci_pci_resume,
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Driver init/exit                                                          *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __init sdhci_drv_init(void)
+{
+       return pci_register_driver(&sdhci_driver);
+}
+
+static void __exit sdhci_drv_exit(void)
+{
+       pci_unregister_driver(&sdhci_driver);
+}
+
+module_init(sdhci_drv_init);
+module_exit(sdhci_drv_exit);
+
+MODULE_AUTHOR("Pierre Ossman <drzeus@drzeus.cx>");
+MODULE_DESCRIPTION("Secure Digital Host Controller Interface PCI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c

index b413aa6c246b938649f47e63a2af5482f76fdc11..17701c3da73326b9fa796086a617b253c506b1e5 100644 (file)
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -15,7 +15,7 @@
  
  #include <linux/delay.h>
  #include <linux/highmem.h>
-#include <linux/pci.h>
+#include <linux/io.h>
  #include <linux/dma-mapping.h>
  #include <linux/scatterlist.h>
  
@@ -32,135 +32,6 @@
  
  static unsigned int debug_quirks = 0;
  
-/*
- * Different quirks to handle when the hardware deviates from a strict
- * interpretation of the SDHCI specification.
- */
-
-/* Controller doesn't honor resets unless we touch the clock register */
-#define SDHCI_QUIRK_CLOCK_BEFORE_RESET                 (1<<0)
-/* Controller has bad caps bits, but really supports DMA */
-#define SDHCI_QUIRK_FORCE_DMA                          (1<<1)
-/* Controller doesn't like to be reset when there is no card inserted. */
-#define SDHCI_QUIRK_NO_CARD_NO_RESET                   (1<<2)
-/* Controller doesn't like clearing the power reg before a change */
-#define SDHCI_QUIRK_SINGLE_POWER_WRITE                 (1<<3)
-/* Controller has flaky internal state so reset it on each ios change */
-#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS              (1<<4)
-/* Controller has an unusable DMA engine */
-#define SDHCI_QUIRK_BROKEN_DMA                         (1<<5)
-/* Controller can only DMA from 32-bit aligned addresses */
-#define SDHCI_QUIRK_32BIT_DMA_ADDR                     (1<<6)
-/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_DMA_SIZE                     (1<<7)
-/* Controller needs to be reset after each request to stay stable */
-#define SDHCI_QUIRK_RESET_AFTER_REQUEST                        (1<<8)
-/* Controller needs voltage and power writes to happen separately */
-#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER            (1<<9)
-/* Controller has an off-by-one issue with timeout value */
-#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL               (1<<10)
-
-static const struct pci_device_id pci_ids[] __devinitdata = {
-       {
-               .vendor         = PCI_VENDOR_ID_RICOH,
-               .device         = PCI_DEVICE_ID_RICOH_R5C822,
-               .subvendor      = PCI_VENDOR_ID_IBM,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_CLOCK_BEFORE_RESET |
-                                 SDHCI_QUIRK_FORCE_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_RICOH,
-               .device         = PCI_DEVICE_ID_RICOH_R5C822,
-               .subvendor      = PCI_VENDOR_ID_SAMSUNG,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_FORCE_DMA |
-                                 SDHCI_QUIRK_NO_CARD_NO_RESET,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_RICOH,
-               .device         = PCI_DEVICE_ID_RICOH_R5C822,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_FORCE_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_TI,
-               .device         = PCI_DEVICE_ID_TI_XX21_XX11_SD,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_FORCE_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_ENE,
-               .device         = PCI_DEVICE_ID_ENE_CB712_SD,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_SINGLE_POWER_WRITE |
-                                 SDHCI_QUIRK_BROKEN_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_ENE,
-               .device         = PCI_DEVICE_ID_ENE_CB712_SD_2,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_SINGLE_POWER_WRITE |
-                                 SDHCI_QUIRK_BROKEN_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_ENE,
-               .device         = PCI_DEVICE_ID_ENE_CB714_SD,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_SINGLE_POWER_WRITE |
-                                 SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS |
-                                 SDHCI_QUIRK_BROKEN_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_ENE,
-               .device         = PCI_DEVICE_ID_ENE_CB714_SD_2,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_SINGLE_POWER_WRITE |
-                                 SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS |
-                                 SDHCI_QUIRK_BROKEN_DMA,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_MARVELL,
-               .device         = PCI_DEVICE_ID_MARVELL_CAFE_SD,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
-                                 SDHCI_QUIRK_INCR_TIMEOUT_CONTROL,
-       },
-
-       {
-               .vendor         = PCI_VENDOR_ID_JMICRON,
-               .device         = PCI_DEVICE_ID_JMICRON_JMB38X_SD,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .driver_data    = SDHCI_QUIRK_32BIT_DMA_ADDR |
-                                 SDHCI_QUIRK_32BIT_DMA_SIZE |
-                                 SDHCI_QUIRK_RESET_AFTER_REQUEST,
-       },
-
-       {       /* Generic SD host controller */
-               PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
-       },
-
-       { /* end: all zeroes */ },
-};
-
-MODULE_DEVICE_TABLE(pci, pci_ids);
-
  static void sdhci_prepare_data(struct sdhci_host *, struct mmc_data *);
  static void sdhci_finish_data(struct sdhci_host *);
  
@@ -215,7 +86,7 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask)
  {
         unsigned long timeout;
  
-       if (host->chip->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
+       if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
                 if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) &
                         SDHCI_CARD_PRESENT))
                         return;
@@ -253,7 +124,8 @@ static void sdhci_init(struct sdhci_host *host)
                 SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
                 SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT |
                 SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL |
-               SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE;
+               SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE |
+               SDHCI_INT_ADMA_ERROR;
  
         writel(intmask, host->ioaddr + SDHCI_INT_ENABLE);
         writel(intmask, host->ioaddr + SDHCI_SIGNAL_ENABLE);
@@ -443,23 +315,226 @@ static void sdhci_transfer_pio(struct sdhci_host *host)
         DBG("PIO transfer complete.\n");
  }
  
-static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
+static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags)
  {
-       u8 count;
-       unsigned target_timeout, current_timeout;
+       local_irq_save(*flags);
+       return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+}
  
-       WARN_ON(host->data);
+static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags)
+{
+       kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
+       local_irq_restore(*flags);
+}
  
-       if (data == NULL)
-               return;
+static int sdhci_adma_table_pre(struct sdhci_host *host,
+       struct mmc_data *data)
+{
+       int direction;
  
-       /* Sanity checks */
-       BUG_ON(data->blksz * data->blocks > 524288);
-       BUG_ON(data->blksz > host->mmc->max_blk_size);
-       BUG_ON(data->blocks > 65535);
+       u8 *desc;
+       u8 *align;
+       dma_addr_t addr;
+       dma_addr_t align_addr;
+       int len, offset;
  
-       host->data = data;
-       host->data_early = 0;
+       struct scatterlist *sg;
+       int i;
+       char *buffer;
+       unsigned long flags;
+
+       /*
+        * The spec does not specify endianness of descriptor table.
+        * We currently guess that it is LE.
+        */
+
+       if (data->flags & MMC_DATA_READ)
+               direction = DMA_FROM_DEVICE;
+       else
+               direction = DMA_TO_DEVICE;
+
+       /*
+        * The ADMA descriptor table is mapped further down as we
+        * need to fill it with data first.
+        */
+
+       host->align_addr = dma_map_single(mmc_dev(host->mmc),
+               host->align_buffer, 128 * 4, direction);
+       if (dma_mapping_error(host->align_addr))
+               goto fail;
+       BUG_ON(host->align_addr & 0x3);
+
+       host->sg_count = dma_map_sg(mmc_dev(host->mmc),
+               data->sg, data->sg_len, direction);
+       if (host->sg_count == 0)
+               goto unmap_align;
+
+       desc = host->adma_desc;
+       align = host->align_buffer;
+
+       align_addr = host->align_addr;
+
+       for_each_sg(data->sg, sg, host->sg_count, i) {
+               addr = sg_dma_address(sg);
+               len = sg_dma_len(sg);
+
+               /*
+                * The SDHCI specification states that ADMA
+                * addresses must be 32-bit aligned. If they
+                * aren't, then we use a bounce buffer for
+                * the (up to three) bytes that screw up the
+                * alignment.
+                */
+               offset = (4 - (addr & 0x3)) & 0x3;
+               if (offset) {
+                       if (data->flags & MMC_DATA_WRITE) {
+                               buffer = sdhci_kmap_atomic(sg, &flags);
+                               memcpy(align, buffer, offset);
+                               sdhci_kunmap_atomic(buffer, &flags);
+                       }
+
+                       desc[7] = (align_addr >> 24) & 0xff;
+                       desc[6] = (align_addr >> 16) & 0xff;
+                       desc[5] = (align_addr >> 8) & 0xff;
+                       desc[4] = (align_addr >> 0) & 0xff;
+
+                       BUG_ON(offset > 65536);
+
+                       desc[3] = (offset >> 8) & 0xff;
+                       desc[2] = (offset >> 0) & 0xff;
+
+                       desc[1] = 0x00;
+                       desc[0] = 0x21; /* tran, valid */
+
+                       align += 4;
+                       align_addr += 4;
+
+                       desc += 8;
+
+                       addr += offset;
+                       len -= offset;
+               }
+
+               desc[7] = (addr >> 24) & 0xff;
+               desc[6] = (addr >> 16) & 0xff;
+               desc[5] = (addr >> 8) & 0xff;
+               desc[4] = (addr >> 0) & 0xff;
+
+               BUG_ON(len > 65536);
+
+               desc[3] = (len >> 8) & 0xff;
+               desc[2] = (len >> 0) & 0xff;
+
+               desc[1] = 0x00;
+               desc[0] = 0x21; /* tran, valid */
+
+               desc += 8;
+
+               /*
+                * If this triggers then we have a calculation bug
+                * somewhere. :/
+                */
+               WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4);
+       }
+
+       /*
+        * Add a terminating entry.
+        */
+       desc[7] = 0;
+       desc[6] = 0;
+       desc[5] = 0;
+       desc[4] = 0;
+
+       desc[3] = 0;
+       desc[2] = 0;
+
+       desc[1] = 0x00;
+       desc[0] = 0x03; /* nop, end, valid */
+
+       /*
+        * Resync align buffer as we might have changed it.
+        */
+       if (data->flags & MMC_DATA_WRITE) {
+               dma_sync_single_for_device(mmc_dev(host->mmc),
+                       host->align_addr, 128 * 4, direction);
+       }
+
+       host->adma_addr = dma_map_single(mmc_dev(host->mmc),
+               host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
+       if (dma_mapping_error(host->align_addr))
+               goto unmap_entries;
+       BUG_ON(host->adma_addr & 0x3);
+
+       return 0;
+
+unmap_entries:
+       dma_unmap_sg(mmc_dev(host->mmc), data->sg,
+               data->sg_len, direction);
+unmap_align:
+       dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
+               128 * 4, direction);
+fail:
+       return -EINVAL;
+}
+
+static void sdhci_adma_table_post(struct sdhci_host *host,
+       struct mmc_data *data)
+{
+       int direction;
+
+       struct scatterlist *sg;
+       int i, size;
+       u8 *align;
+       char *buffer;
+       unsigned long flags;
+
+       if (data->flags & MMC_DATA_READ)
+               direction = DMA_FROM_DEVICE;
+       else
+               direction = DMA_TO_DEVICE;
+
+       dma_unmap_single(mmc_dev(host->mmc), host->adma_addr,
+               (128 * 2 + 1) * 4, DMA_TO_DEVICE);
+
+       dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
+               128 * 4, direction);
+
+       if (data->flags & MMC_DATA_READ) {
+               dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg,
+                       data->sg_len, direction);
+
+               align = host->align_buffer;
+
+               for_each_sg(data->sg, sg, host->sg_count, i) {
+                       if (sg_dma_address(sg) & 0x3) {
+                               size = 4 - (sg_dma_address(sg) & 0x3);
+
+                               buffer = sdhci_kmap_atomic(sg, &flags);
+                               memcpy(buffer, align, size);
+                               sdhci_kunmap_atomic(buffer, &flags);
+
+                               align += 4;
+                       }
+               }
+       }
+
+       dma_unmap_sg(mmc_dev(host->mmc), data->sg,
+               data->sg_len, direction);
+}
+
+static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data)
+{
+       u8 count;
+       unsigned target_timeout, current_timeout;
+
+       /*
+        * If the host controller provides us with an incorrect timeout
+        * value, just skip the check and use 0xE.  The hardware may take
+        * longer to time out, but that's much better than having a too-short
+        * timeout value.
+        */
+       if ((host->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL))
+               return 0xE;
  
         /* timeout in us */
         target_timeout = data->timeout_ns / 1000 +
@@ -484,52 +559,158 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
                         break;
         }
  
-       /*
-        * Compensate for an off-by-one error in the CaFe hardware; otherwise,
-        * a too-small count gives us interrupt timeouts.
-        */
-       if ((host->chip->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL))
-               count++;
-
         if (count >= 0xF) {
                 printk(KERN_WARNING "%s: Too large timeout requested!\n",
                         mmc_hostname(host->mmc));
                 count = 0xE;
         }
  
+       return count;
+}
+
+static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
+{
+       u8 count;
+       u8 ctrl;
+       int ret;
+
+       WARN_ON(host->data);
+
+       if (data == NULL)
+               return;
+
+       /* Sanity checks */
+       BUG_ON(data->blksz * data->blocks > 524288);
+       BUG_ON(data->blksz > host->mmc->max_blk_size);
+       BUG_ON(data->blocks > 65535);
+
+       host->data = data;
+       host->data_early = 0;
+
+       count = sdhci_calc_timeout(host, data);
         writeb(count, host->ioaddr + SDHCI_TIMEOUT_CONTROL);
  
         if (host->flags & SDHCI_USE_DMA)
                 host->flags |= SDHCI_REQ_USE_DMA;
  
-       if (unlikely((host->flags & SDHCI_REQ_USE_DMA) &&
-               (host->chip->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) &&
-               ((data->blksz * data->blocks) & 0x3))) {
-               DBG("Reverting to PIO because of transfer size (%d)\n",
-                       data->blksz * data->blocks);
-               host->flags &= ~SDHCI_REQ_USE_DMA;
+       /*
+        * FIXME: This doesn't account for merging when mapping the
+        * scatterlist.
+        */
+       if (host->flags & SDHCI_REQ_USE_DMA) {
+               int broken, i;
+               struct scatterlist *sg;
+
+               broken = 0;
+               if (host->flags & SDHCI_USE_ADMA) {
+                       if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE)
+                               broken = 1;
+               } else {
+                       if (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE)
+                               broken = 1;
+               }
+
+               if (unlikely(broken)) {
+                       for_each_sg(data->sg, sg, data->sg_len, i) {
+                               if (sg->length & 0x3) {
+                                       DBG("Reverting to PIO because of "
+                                               "transfer size (%d)\n",
+                                               sg->length);
+                                       host->flags &= ~SDHCI_REQ_USE_DMA;
+                                       break;
+                               }
+                       }
+               }
         }
  
         /*
          * The assumption here being that alignment is the same after
          * translation to device address space.
          */
-       if (unlikely((host->flags & SDHCI_REQ_USE_DMA) &&
-               (host->chip->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) &&
-               (data->sg->offset & 0x3))) {
-               DBG("Reverting to PIO because of bad alignment\n");
-               host->flags &= ~SDHCI_REQ_USE_DMA;
+       if (host->flags & SDHCI_REQ_USE_DMA) {
+               int broken, i;
+               struct scatterlist *sg;
+
+               broken = 0;
+               if (host->flags & SDHCI_USE_ADMA) {
+                       /*
+                        * As we use 3 byte chunks to work around
+                        * alignment problems, we need to check this
+                        * quirk.
+                        */
+                       if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE)
+                               broken = 1;
+               } else {
+                       if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR)
+                               broken = 1;
+               }
+
+               if (unlikely(broken)) {
+                       for_each_sg(data->sg, sg, data->sg_len, i) {
+                               if (sg->offset & 0x3) {
+                                       DBG("Reverting to PIO because of "
+                                               "bad alignment\n");
+                                       host->flags &= ~SDHCI_REQ_USE_DMA;
+                                       break;
+                               }
+                       }
+               }
         }
  
         if (host->flags & SDHCI_REQ_USE_DMA) {
-               int count;
+               if (host->flags & SDHCI_USE_ADMA) {
+                       ret = sdhci_adma_table_pre(host, data);
+                       if (ret) {
+                               /*
+                                * This only happens when someone fed
+                                * us an invalid request.
+                                */
+                               WARN_ON(1);
+                               host->flags &= ~SDHCI_USE_DMA;
+                       } else {
+                               writel(host->adma_addr,
+                                       host->ioaddr + SDHCI_ADMA_ADDRESS);
+                       }
+               } else {
+                       int sg_cnt;
+
+                       sg_cnt = dma_map_sg(mmc_dev(host->mmc),
+                                       data->sg, data->sg_len,
+                                       (data->flags & MMC_DATA_READ) ?
+                                               DMA_FROM_DEVICE :
+                                               DMA_TO_DEVICE);
+                       if (sg_cnt == 0) {
+                               /*
+                                * This only happens when someone fed
+                                * us an invalid request.
+                                */
+                               WARN_ON(1);
+                               host->flags &= ~SDHCI_USE_DMA;
+                       } else {
+                               WARN_ON(count != 1);
+                               writel(sg_dma_address(data->sg),
+                                       host->ioaddr + SDHCI_DMA_ADDRESS);
+                       }
+               }
+       }
  
-               count = pci_map_sg(host->chip->pdev, data->sg, data->sg_len,
-                       (data->flags & MMC_DATA_READ)?PCI_DMA_FROMDEVICE:PCI_DMA_TODEVICE);
-               BUG_ON(count != 1);
+       /*
+        * Always adjust the DMA selection as some controllers
+        * (e.g. JMicron) can't do PIO properly when the selection
+        * is ADMA.
+        */
+       if (host->version >= SDHCI_SPEC_200) {
+               ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL);
+               ctrl &= ~SDHCI_CTRL_DMA_MASK;
+               if ((host->flags & SDHCI_REQ_USE_DMA) &&
+                       (host->flags & SDHCI_USE_ADMA))
+                       ctrl |= SDHCI_CTRL_ADMA32;
+               else
+                       ctrl |= SDHCI_CTRL_SDMA;
+               writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
+       }
  
-               writel(sg_dma_address(data->sg), host->ioaddr + SDHCI_DMA_ADDRESS);
-       } else {
+       if (!(host->flags & SDHCI_REQ_USE_DMA)) {
                 host->cur_sg = data->sg;
                 host->num_sg = data->sg_len;
  
@@ -567,7 +748,6 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
  static void sdhci_finish_data(struct sdhci_host *host)
  {
         struct mmc_data *data;
-       u16 blocks;
  
         BUG_ON(!host->data);
  
@@ -575,25 +755,26 @@ static void sdhci_finish_data(struct sdhci_host *host)
         host->data = NULL;
  
         if (host->flags & SDHCI_REQ_USE_DMA) {
-               pci_unmap_sg(host->chip->pdev, data->sg, data->sg_len,
-                       (data->flags & MMC_DATA_READ)?PCI_DMA_FROMDEVICE:PCI_DMA_TODEVICE);
+               if (host->flags & SDHCI_USE_ADMA)
+                       sdhci_adma_table_post(host, data);
+               else {
+                       dma_unmap_sg(mmc_dev(host->mmc), data->sg,
+                               data->sg_len, (data->flags & MMC_DATA_READ) ?
+                                       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+               }
         }
  
         /*
-        * Controller doesn't count down when in single block mode.
+        * The specification states that the block count register must
+        * be updated, but it does not specify at what point in the
+        * data flow. That makes the register entirely useless to read
+        * back so we have to assume that nothing made it to the card
+        * in the event of an error.
          */
-       if (data->blocks == 1)
-               blocks = (data->error == 0) ? 0 : 1;
+       if (data->error)
+               data->bytes_xfered = 0;
         else
-               blocks = readw(host->ioaddr + SDHCI_BLOCK_COUNT);
-       data->bytes_xfered = data->blksz * (data->blocks - blocks);
-
-       if (!data->error && blocks) {
-               printk(KERN_ERR "%s: Controller signalled completion even "
-                       "though there were blocks left.\n",
-                       mmc_hostname(host->mmc));
-               data->error = -EIO;
-       }
+               data->bytes_xfered = data->blksz * data->blocks;
  
         if (data->stop) {
                 /*
@@ -775,7 +956,7 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power)
          * Spec says that we should clear the power reg before setting
          * a new value. Some controllers don't seem to like this though.
          */
-       if (!(host->chip->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
+       if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
                 writeb(0, host->ioaddr + SDHCI_POWER_CONTROL);
  
         pwr = SDHCI_POWER_ON;
@@ -797,10 +978,10 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power)
         }
  
         /*
-        * At least the CaFe chip gets confused if we set the voltage
+        * At least the Marvell CaFe chip gets confused if we set the voltage
          * and set turn on power at the same time, so set the voltage first.
          */
-       if ((host->chip->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER))
+       if ((host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER))
                 writeb(pwr & ~SDHCI_POWER_ON,
                                 host->ioaddr + SDHCI_POWER_CONTROL);
  
@@ -833,7 +1014,8 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
  
         host->mrq = mrq;
  
-       if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)) {
+       if (!(readl(host->ioaddr + SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)
+               || (host->flags & SDHCI_DEVICE_DEAD)) {
                 host->mrq->cmd->error = -ENOMEDIUM;
                 tasklet_schedule(&host->finish_tasklet);
         } else
@@ -853,6 +1035,9 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
  
         spin_lock_irqsave(&host->lock, flags);
  
+       if (host->flags & SDHCI_DEVICE_DEAD)
+               goto out;
+
         /*
          * Reset the chip on each power off.
          * Should clear out any weird states.
@@ -888,9 +1073,10 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
          * signalling timeout and CRC errors even on CMD0. Resetting
          * it on each ios seems to solve the problem.
          */
-       if(host->chip->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS)
+       if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS)
                 sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
  
+out:
         mmiowb();
         spin_unlock_irqrestore(&host->lock, flags);
  }
@@ -905,7 +1091,10 @@ static int sdhci_get_ro(struct mmc_host *mmc)
  
         spin_lock_irqsave(&host->lock, flags);
  
-       present = readl(host->ioaddr + SDHCI_PRESENT_STATE);
+       if (host->flags & SDHCI_DEVICE_DEAD)
+               present = 0;
+       else
+               present = readl(host->ioaddr + SDHCI_PRESENT_STATE);
  
         spin_unlock_irqrestore(&host->lock, flags);
  
@@ -922,6 +1111,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
  
         spin_lock_irqsave(&host->lock, flags);
  
+       if (host->flags & SDHCI_DEVICE_DEAD)
+               goto out;
+
         ier = readl(host->ioaddr + SDHCI_INT_ENABLE);
  
         ier &= ~SDHCI_INT_CARD_INT;
@@ -931,6 +1123,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
         writel(ier, host->ioaddr + SDHCI_INT_ENABLE);
         writel(ier, host->ioaddr + SDHCI_SIGNAL_ENABLE);
  
+out:
         mmiowb();
  
         spin_unlock_irqrestore(&host->lock, flags);
@@ -996,13 +1189,14 @@ static void sdhci_tasklet_finish(unsigned long param)
          * The controller needs a reset of internal state machines
          * upon error conditions.
          */
-       if (mrq->cmd->error ||
-               (mrq->data && (mrq->data->error ||
-               (mrq->data->stop && mrq->data->stop->error))) ||
-               (host->chip->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) {
+       if (!(host->flags & SDHCI_DEVICE_DEAD) &&
+               (mrq->cmd->error ||
+                (mrq->data && (mrq->data->error ||
+                 (mrq->data->stop && mrq->data->stop->error))) ||
+                  (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) {
  
                 /* Some controllers need this kick or reset won't work here */
-               if (host->chip->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) {
+               if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) {
                         unsigned int clock;
  
                         /* This is to force an update */
@@ -1116,6 +1310,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
                 host->data->error = -ETIMEDOUT;
         else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT))
                 host->data->error = -EILSEQ;
+       else if (intmask & SDHCI_INT_ADMA_ERROR)
+               host->data->error = -EIO;
  
         if (host->data->error)
                 sdhci_finish_data(host);
@@ -1234,218 +1430,167 @@ out:
  
  #ifdef CONFIG_PM
  
-static int sdhci_suspend (struct pci_dev *pdev, pm_message_t state)
+int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state)
  {
-       struct sdhci_chip *chip;
-       int i, ret;
-
-       chip = pci_get_drvdata(pdev);
-       if (!chip)
-               return 0;
-
-       DBG("Suspending...\n");
-
-       for (i = 0;i < chip->num_slots;i++) {
-               if (!chip->hosts[i])
-                       continue;
-               ret = mmc_suspend_host(chip->hosts[i]->mmc, state);
-               if (ret) {
-                       for (i--;i >= 0;i--)
-                               mmc_resume_host(chip->hosts[i]->mmc);
-                       return ret;
-               }
-       }
-
-       pci_save_state(pdev);
-       pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
+       int ret;
  
-       for (i = 0;i < chip->num_slots;i++) {
-               if (!chip->hosts[i])
-                       continue;
-               free_irq(chip->hosts[i]->irq, chip->hosts[i]);
-       }
+       ret = mmc_suspend_host(host->mmc, state);
+       if (ret)
+               return ret;
  
-       pci_disable_device(pdev);
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
+       free_irq(host->irq, host);
  
         return 0;
  }
  
-static int sdhci_resume (struct pci_dev *pdev)
-{
-       struct sdhci_chip *chip;
-       int i, ret;
+EXPORT_SYMBOL_GPL(sdhci_suspend_host);
  
-       chip = pci_get_drvdata(pdev);
-       if (!chip)
-               return 0;
+int sdhci_resume_host(struct sdhci_host *host)
+{
+       int ret;
  
-       DBG("Resuming...\n");
+       if (host->flags & SDHCI_USE_DMA) {
+               if (host->ops->enable_dma)
+                       host->ops->enable_dma(host);
+       }
  
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-       ret = pci_enable_device(pdev);
+       ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
+                         mmc_hostname(host->mmc), host);
         if (ret)
                 return ret;
  
-       for (i = 0;i < chip->num_slots;i++) {
-               if (!chip->hosts[i])
-                       continue;
-               if (chip->hosts[i]->flags & SDHCI_USE_DMA)
-                       pci_set_master(pdev);
-               ret = request_irq(chip->hosts[i]->irq, sdhci_irq,
-                       IRQF_SHARED, mmc_hostname(chip->hosts[i]->mmc),
-                       chip->hosts[i]);
-               if (ret)
-                       return ret;
-               sdhci_init(chip->hosts[i]);
-               mmiowb();
-               ret = mmc_resume_host(chip->hosts[i]->mmc);
-               if (ret)
-                       return ret;
-       }
+       sdhci_init(host);
+       mmiowb();
+
+       ret = mmc_resume_host(host->mmc);
+       if (ret)
+               return ret;
  
         return 0;
  }
  
-#else /* CONFIG_PM */
-
-#define sdhci_suspend NULL
-#define sdhci_resume NULL
+EXPORT_SYMBOL_GPL(sdhci_resume_host);
  
  #endif /* CONFIG_PM */
  
  /*****************************************************************************\
   *                                                                           *
- * Device probing/removal                                                    *
+ * Device allocation/registration                                            *
   *                                                                           *
  \*****************************************************************************/
  
-static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
+struct sdhci_host *sdhci_alloc_host(struct device *dev,
+       size_t priv_size)
  {
-       int ret;
-       unsigned int version;
-       struct sdhci_chip *chip;
         struct mmc_host *mmc;
         struct sdhci_host *host;
  
-       u8 first_bar;
-       unsigned int caps;
-
-       chip = pci_get_drvdata(pdev);
-       BUG_ON(!chip);
-
-       ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar);
-       if (ret)
-               return ret;
-
-       first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK;
-
-       if (first_bar > 5) {
-               printk(KERN_ERR DRIVER_NAME ": Invalid first BAR. Aborting.\n");
-               return -ENODEV;
-       }
-
-       if (!(pci_resource_flags(pdev, first_bar + slot) & IORESOURCE_MEM)) {
-               printk(KERN_ERR DRIVER_NAME ": BAR is not iomem. Aborting.\n");
-               return -ENODEV;
-       }
-
-       if (pci_resource_len(pdev, first_bar + slot) != 0x100) {
-               printk(KERN_ERR DRIVER_NAME ": Invalid iomem size. "
-                       "You may experience problems.\n");
-       }
-
-       if ((pdev->class & 0x0000FF) == PCI_SDHCI_IFVENDOR) {
-               printk(KERN_ERR DRIVER_NAME ": Vendor specific interface. Aborting.\n");
-               return -ENODEV;
-       }
-
-       if ((pdev->class & 0x0000FF) > PCI_SDHCI_IFVENDOR) {
-               printk(KERN_ERR DRIVER_NAME ": Unknown interface. Aborting.\n");
-               return -ENODEV;
-       }
+       WARN_ON(dev == NULL);
  
-       mmc = mmc_alloc_host(sizeof(struct sdhci_host), &pdev->dev);
+       mmc = mmc_alloc_host(sizeof(struct sdhci_host) + priv_size, dev);
         if (!mmc)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
  
         host = mmc_priv(mmc);
         host->mmc = mmc;
  
-       host->chip = chip;
-       chip->hosts[slot] = host;
+       return host;
+}
  
-       host->bar = first_bar + slot;
+EXPORT_SYMBOL_GPL(sdhci_alloc_host);
  
-       host->addr = pci_resource_start(pdev, host->bar);
-       host->irq = pdev->irq;
+int sdhci_add_host(struct sdhci_host *host)
+{
+       struct mmc_host *mmc;
+       unsigned int caps;
+       int ret;
  
-       DBG("slot %d at 0x%08lx, irq %d\n", slot, host->addr, host->irq);
+       WARN_ON(host == NULL);
+       if (host == NULL)
+               return -EINVAL;
  
-       ret = pci_request_region(pdev, host->bar, mmc_hostname(mmc));
-       if (ret)
-               goto free;
+       mmc = host->mmc;
  
-       host->ioaddr = ioremap_nocache(host->addr,
-               pci_resource_len(pdev, host->bar));
-       if (!host->ioaddr) {
-               ret = -ENOMEM;
-               goto release;
-       }
+       if (debug_quirks)
+               host->quirks = debug_quirks;
  
         sdhci_reset(host, SDHCI_RESET_ALL);
  
-       version = readw(host->ioaddr + SDHCI_HOST_VERSION);
-       version = (version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT;
-       if (version > 1) {
+       host->version = readw(host->ioaddr + SDHCI_HOST_VERSION);
+       host->version = (host->version & SDHCI_SPEC_VER_MASK)
+                               >> SDHCI_SPEC_VER_SHIFT;
+       if (host->version > SDHCI_SPEC_200) {
                 printk(KERN_ERR "%s: Unknown controller version (%d). "
                         "You may experience problems.\n", mmc_hostname(mmc),
-                       version);
+                       host->version);
         }
  
         caps = readl(host->ioaddr + SDHCI_CAPABILITIES);
  
-       if (chip->quirks & SDHCI_QUIRK_FORCE_DMA)
+       if (host->quirks & SDHCI_QUIRK_FORCE_DMA)
                 host->flags |= SDHCI_USE_DMA;
         else if (!(caps & SDHCI_CAN_DO_DMA))
                 DBG("Controller doesn't have DMA capability\n");
         else
                 host->flags |= SDHCI_USE_DMA;
  
-       if ((chip->quirks & SDHCI_QUIRK_BROKEN_DMA) &&
+       if ((host->quirks & SDHCI_QUIRK_BROKEN_DMA) &&
                 (host->flags & SDHCI_USE_DMA)) {
                 DBG("Disabling DMA as it is marked broken\n");
                 host->flags &= ~SDHCI_USE_DMA;
         }
  
-       if (((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) &&
-               (host->flags & SDHCI_USE_DMA)) {
-               printk(KERN_WARNING "%s: Will use DMA "
-                       "mode even though HW doesn't fully "
-                       "claim to support it.\n", mmc_hostname(mmc));
+       if (host->flags & SDHCI_USE_DMA) {
+               if ((host->version >= SDHCI_SPEC_200) &&
+                               (caps & SDHCI_CAN_DO_ADMA2))
+                       host->flags |= SDHCI_USE_ADMA;
+       }
+
+       if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) &&
+               (host->flags & SDHCI_USE_ADMA)) {
+               DBG("Disabling ADMA as it is marked broken\n");
+               host->flags &= ~SDHCI_USE_ADMA;
         }
  
         if (host->flags & SDHCI_USE_DMA) {
-               if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
-                       printk(KERN_WARNING "%s: No suitable DMA available. "
-                               "Falling back to PIO.\n", mmc_hostname(mmc));
-                       host->flags &= ~SDHCI_USE_DMA;
+               if (host->ops->enable_dma) {
+                       if (host->ops->enable_dma(host)) {
+                               printk(KERN_WARNING "%s: No suitable DMA "
+                                       "available. Falling back to PIO.\n",
+                                       mmc_hostname(mmc));
+                               host->flags &= ~(SDHCI_USE_DMA | SDHCI_USE_ADMA);
+                       }
                 }
         }
  
-       if (host->flags & SDHCI_USE_DMA)
-               pci_set_master(pdev);
-       else /* XXX: Hack to get MMC layer to avoid highmem */
-               pdev->dma_mask = 0;
+       if (host->flags & SDHCI_USE_ADMA) {
+               /*
+                * We need to allocate descriptors for all sg entries
+                * (128) and potentially one alignment transfer for
+                * each of those entries.
+                */
+               host->adma_desc = kmalloc((128 * 2 + 1) * 4, GFP_KERNEL);
+               host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
+               if (!host->adma_desc || !host->align_buffer) {
+                       kfree(host->adma_desc);
+                       kfree(host->align_buffer);
+                       printk(KERN_WARNING "%s: Unable to allocate ADMA "
+                               "buffers. Falling back to standard DMA.\n",
+                               mmc_hostname(mmc));
+                       host->flags &= ~SDHCI_USE_ADMA;
+               }
+       }
+
+       /* XXX: Hack to get MMC layer to avoid highmem */
+       if (!(host->flags & SDHCI_USE_DMA))
+               mmc_dev(host->mmc)->dma_mask = NULL;
  
         host->max_clk =
                 (caps & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT;
         if (host->max_clk == 0) {
                 printk(KERN_ERR "%s: Hardware doesn't specify base clock "
                         "frequency.\n", mmc_hostname(mmc));
-               ret = -ENODEV;
-               goto unmap;
+               return -ENODEV;
         }
         host->max_clk *= 1000000;
  
@@ -1454,8 +1599,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
         if (host->timeout_clk == 0) {
                 printk(KERN_ERR "%s: Hardware doesn't specify timeout clock "
                         "frequency.\n", mmc_hostname(mmc));
-               ret = -ENODEV;
-               goto unmap;
+               return -ENODEV;
         }
         if (caps & SDHCI_TIMEOUT_CLK_UNIT)
                 host->timeout_clk *= 1000;
@@ -1466,7 +1610,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
         mmc->ops = &sdhci_ops;
         mmc->f_min = host->max_clk / 256;
         mmc->f_max = host->max_clk;
-       mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_SDIO_IRQ;
+       mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
  
         if (caps & SDHCI_CAN_DO_HISPD)
                 mmc->caps |= MMC_CAP_SD_HIGHSPEED;
@@ -1482,20 +1626,22 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
         if (mmc->ocr_avail == 0) {
                 printk(KERN_ERR "%s: Hardware doesn't report any "
                         "support voltages.\n", mmc_hostname(mmc));
-               ret = -ENODEV;
-               goto unmap;
+               return -ENODEV;
         }
  
         spin_lock_init(&host->lock);
  
         /*
-        * Maximum number of segments. Hardware cannot do scatter lists.
+        * Maximum number of segments. Depends on if the hardware
+        * can do scatter/gather or not.
          */
-       if (host->flags & SDHCI_USE_DMA)
+       if (host->flags & SDHCI_USE_ADMA)
+               mmc->max_hw_segs = 128;
+       else if (host->flags & SDHCI_USE_DMA)
                 mmc->max_hw_segs = 1;
-       else
-               mmc->max_hw_segs = 16;
-       mmc->max_phys_segs = 16;
+       else /* PIO */
+               mmc->max_hw_segs = 128;
+       mmc->max_phys_segs = 128;
  
         /*
          * Maximum number of sectors in one transfer. Limited by DMA boundary
@@ -1505,9 +1651,13 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
  
         /*
          * Maximum segment size. Could be one segment with the maximum number
-        * of bytes.
+        * of bytes. When doing hardware scatter/gather, each entry cannot
+        * be larger than 64 KiB though.
          */
-       mmc->max_seg_size = mmc->max_req_size;
+       if (host->flags & SDHCI_USE_ADMA)
+               mmc->max_seg_size = 65536;
+       else
+               mmc->max_seg_size = mmc->max_req_size;
  
         /*
          * Maximum block size. This varies from controller to controller and
@@ -1553,7 +1703,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
         host->led.default_trigger = mmc_hostname(mmc);
         host->led.brightness_set = sdhci_led_control;
  
-       ret = led_classdev_register(&pdev->dev, &host->led);
+       ret = led_classdev_register(mmc_dev(mmc), &host->led);
         if (ret)
                 goto reset;
  #endif
@@ -1562,8 +1712,9 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
  
         mmc_add_host(mmc);
  
-       printk(KERN_INFO "%s: SDHCI at 0x%08lx irq %d %s\n",
-               mmc_hostname(mmc), host->addr, host->irq,
+       printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n",
+               mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id,
+               (host->flags & SDHCI_USE_ADMA)?"A":"",
                 (host->flags & SDHCI_USE_DMA)?"DMA":"PIO");
  
         return 0;
@@ -1576,35 +1727,40 @@ reset:
  untasklet:
         tasklet_kill(&host->card_tasklet);
         tasklet_kill(&host->finish_tasklet);
-unmap:
-       iounmap(host->ioaddr);
-release:
-       pci_release_region(pdev, host->bar);
-free:
-       mmc_free_host(mmc);
  
         return ret;
  }
  
-static void sdhci_remove_slot(struct pci_dev *pdev, int slot)
+EXPORT_SYMBOL_GPL(sdhci_add_host);
+
+void sdhci_remove_host(struct sdhci_host *host, int dead)
  {
-       struct sdhci_chip *chip;
-       struct mmc_host *mmc;
-       struct sdhci_host *host;
+       unsigned long flags;
  
-       chip = pci_get_drvdata(pdev);
-       host = chip->hosts[slot];
-       mmc = host->mmc;
+       if (dead) {
+               spin_lock_irqsave(&host->lock, flags);
+
+               host->flags |= SDHCI_DEVICE_DEAD;
+
+               if (host->mrq) {
+                       printk(KERN_ERR "%s: Controller removed during "
+                               " transfer!\n", mmc_hostname(host->mmc));
  
-       chip->hosts[slot] = NULL;
+                       host->mrq->cmd->error = -ENOMEDIUM;
+                       tasklet_schedule(&host->finish_tasklet);
+               }
+
+               spin_unlock_irqrestore(&host->lock, flags);
+       }
  
-       mmc_remove_host(mmc);
+       mmc_remove_host(host->mmc);
  
  #ifdef CONFIG_LEDS_CLASS
         led_classdev_unregister(&host->led);
  #endif
  
-       sdhci_reset(host, SDHCI_RESET_ALL);
+       if (!dead)
+               sdhci_reset(host, SDHCI_RESET_ALL);
  
         free_irq(host->irq, host);
  
@@ -1613,106 +1769,21 @@ static void sdhci_remove_slot(struct pci_dev *pdev, int slot)
         tasklet_kill(&host->card_tasklet);
         tasklet_kill(&host->finish_tasklet);
  
-       iounmap(host->ioaddr);
-
-       pci_release_region(pdev, host->bar);
+       kfree(host->adma_desc);
+       kfree(host->align_buffer);
  
-       mmc_free_host(mmc);
+       host->adma_desc = NULL;
+       host->align_buffer = NULL;
  }
  
-static int __devinit sdhci_probe(struct pci_dev *pdev,
-       const struct pci_device_id *ent)
-{
-       int ret, i;
-       u8 slots, rev;
-       struct sdhci_chip *chip;
-
-       BUG_ON(pdev == NULL);
-       BUG_ON(ent == NULL);
+EXPORT_SYMBOL_GPL(sdhci_remove_host);
  
-       pci_read_config_byte(pdev, PCI_CLASS_REVISION, &rev);
-
-       printk(KERN_INFO DRIVER_NAME
-               ": SDHCI controller found at %s [%04x:%04x] (rev %x)\n",
-               pci_name(pdev), (int)pdev->vendor, (int)pdev->device,
-               (int)rev);
-
-       ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots);
-       if (ret)
-               return ret;
-
-       slots = PCI_SLOT_INFO_SLOTS(slots) + 1;
-       DBG("found %d slot(s)\n", slots);
-       if (slots == 0)
-               return -ENODEV;
-
-       ret = pci_enable_device(pdev);
-       if (ret)
-               return ret;
-
-       chip = kzalloc(sizeof(struct sdhci_chip) +
-               sizeof(struct sdhci_host*) * slots, GFP_KERNEL);
-       if (!chip) {
-               ret = -ENOMEM;
-               goto err;
-       }
-
-       chip->pdev = pdev;
-       chip->quirks = ent->driver_data;
-
-       if (debug_quirks)
-               chip->quirks = debug_quirks;
-
-       chip->num_slots = slots;
-       pci_set_drvdata(pdev, chip);
-
-       for (i = 0;i < slots;i++) {
-               ret = sdhci_probe_slot(pdev, i);
-               if (ret) {
-                       for (i--;i >= 0;i--)
-                               sdhci_remove_slot(pdev, i);
-                       goto free;
-               }
-       }
-
-       return 0;
-
-free:
-       pci_set_drvdata(pdev, NULL);
-       kfree(chip);
-
-err:
-       pci_disable_device(pdev);
-       return ret;
-}
-
-static void __devexit sdhci_remove(struct pci_dev *pdev)
+void sdhci_free_host(struct sdhci_host *host)
  {
-       int i;
-       struct sdhci_chip *chip;
-
-       chip = pci_get_drvdata(pdev);
-
-       if (chip) {
-               for (i = 0;i < chip->num_slots;i++)
-                       sdhci_remove_slot(pdev, i);
-
-               pci_set_drvdata(pdev, NULL);
-
-               kfree(chip);
-       }
-
-       pci_disable_device(pdev);
+       mmc_free_host(host->mmc);
  }
  
-static struct pci_driver sdhci_driver = {
-       .name =         DRIVER_NAME,
-       .id_table =     pci_ids,
-       .probe =        sdhci_probe,
-       .remove =       __devexit_p(sdhci_remove),
-       .suspend =      sdhci_suspend,
-       .resume =       sdhci_resume,
-};
+EXPORT_SYMBOL_GPL(sdhci_free_host);
  
  /*****************************************************************************\
   *                                                                           *
@@ -1726,14 +1797,11 @@ static int __init sdhci_drv_init(void)
                 ": Secure Digital Host Controller Interface driver\n");
         printk(KERN_INFO DRIVER_NAME ": Copyright(c) Pierre Ossman\n");
  
-       return pci_register_driver(&sdhci_driver);
+       return 0;
  }
  
  static void __exit sdhci_drv_exit(void)
  {
-       DBG("Exiting\n");
-
-       pci_unregister_driver(&sdhci_driver);
  }
  
  module_init(sdhci_drv_init);
@@ -1742,7 +1810,7 @@ module_exit(sdhci_drv_exit);
  module_param(debug_quirks, uint, 0444);
  
  MODULE_AUTHOR("Pierre Ossman <drzeus@drzeus.cx>");
-MODULE_DESCRIPTION("Secure Digital Host Controller Interface driver");
+MODULE_DESCRIPTION("Secure Digital Host Controller Interface core driver");
  MODULE_LICENSE("GPL");
  
  MODULE_PARM_DESC(debug_quirks, "Force certain quirks.");
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h

index 299118de8933e892c1953ac66856af7f12dd3a8e..5bb355281765daf7fa3ff2e8205cde8b23b9ad3c 100644 (file)
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -9,18 +9,6 @@
   * your option) any later version.
   */
  
-/*
- * PCI registers
- */
-
-#define PCI_SDHCI_IFPIO                        0x00
-#define PCI_SDHCI_IFDMA                        0x01
-#define PCI_SDHCI_IFVENDOR             0x02
-
-#define PCI_SLOT_INFO                  0x40    /* 8 bits */
-#define  PCI_SLOT_INFO_SLOTS(x)                ((x >> 4) & 7)
-#define  PCI_SLOT_INFO_FIRST_BAR_MASK  0x07
-
  /*
   * Controller registers
   */
@@ -72,6 +60,11 @@
  #define  SDHCI_CTRL_LED                0x01
  #define  SDHCI_CTRL_4BITBUS    0x02
  #define  SDHCI_CTRL_HISPD      0x04
+#define  SDHCI_CTRL_DMA_MASK   0x18
+#define   SDHCI_CTRL_SDMA      0x00
+#define   SDHCI_CTRL_ADMA1     0x08
+#define   SDHCI_CTRL_ADMA32    0x10
+#define   SDHCI_CTRL_ADMA64    0x18
  
  #define SDHCI_POWER_CONTROL    0x29
  #define  SDHCI_POWER_ON                0x01
@@ -117,6 +110,7 @@
  #define  SDHCI_INT_DATA_END_BIT        0x00400000
  #define  SDHCI_INT_BUS_POWER   0x00800000
  #define  SDHCI_INT_ACMD12ERR   0x01000000
+#define  SDHCI_INT_ADMA_ERROR  0x02000000
  
  #define  SDHCI_INT_NORMAL_MASK 0x00007FFF
  #define  SDHCI_INT_ERROR_MASK  0xFFFF8000
@@ -140,11 +134,14 @@
  #define  SDHCI_CLOCK_BASE_SHIFT        8
  #define  SDHCI_MAX_BLOCK_MASK  0x00030000
  #define  SDHCI_MAX_BLOCK_SHIFT  16
+#define  SDHCI_CAN_DO_ADMA2    0x00080000
+#define  SDHCI_CAN_DO_ADMA1    0x00100000
  #define  SDHCI_CAN_DO_HISPD    0x00200000
  #define  SDHCI_CAN_DO_DMA      0x00400000
  #define  SDHCI_CAN_VDD_330     0x01000000
  #define  SDHCI_CAN_VDD_300     0x02000000
  #define  SDHCI_CAN_VDD_180     0x04000000
+#define  SDHCI_CAN_64BIT       0x10000000
  
  /* 44-47 reserved for more caps */
  
@@ -152,7 +149,16 @@
  
  /* 4C-4F reserved for more max current */
  
-/* 50-FB reserved */
+#define SDHCI_SET_ACMD12_ERROR 0x50
+#define SDHCI_SET_INT_ERROR    0x52
+
+#define SDHCI_ADMA_ERROR       0x54
+
+/* 55-57 reserved */
+
+#define SDHCI_ADMA_ADDRESS     0x58
+
+/* 60-FB reserved */
  
  #define SDHCI_SLOT_INT_STATUS  0xFC
  
@@ -161,11 +167,50 @@
  #define  SDHCI_VENDOR_VER_SHIFT        8
  #define  SDHCI_SPEC_VER_MASK   0x00FF
  #define  SDHCI_SPEC_VER_SHIFT  0
+#define   SDHCI_SPEC_100       0
+#define   SDHCI_SPEC_200       1
  
-struct sdhci_chip;
+struct sdhci_ops;
  
  struct sdhci_host {
-       struct sdhci_chip       *chip;
+       /* Data set by hardware interface driver */
+       const char              *hw_name;       /* Hardware bus name */
+
+       unsigned int            quirks;         /* Deviations from spec. */
+
+/* Controller doesn't honor resets unless we touch the clock register */
+#define SDHCI_QUIRK_CLOCK_BEFORE_RESET                 (1<<0)
+/* Controller has bad caps bits, but really supports DMA */
+#define SDHCI_QUIRK_FORCE_DMA                          (1<<1)
+/* Controller doesn't like to be reset when there is no card inserted. */
+#define SDHCI_QUIRK_NO_CARD_NO_RESET                   (1<<2)
+/* Controller doesn't like clearing the power reg before a change */
+#define SDHCI_QUIRK_SINGLE_POWER_WRITE                 (1<<3)
+/* Controller has flaky internal state so reset it on each ios change */
+#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS              (1<<4)
+/* Controller has an unusable DMA engine */
+#define SDHCI_QUIRK_BROKEN_DMA                         (1<<5)
+/* Controller has an unusable ADMA engine */
+#define SDHCI_QUIRK_BROKEN_ADMA                                (1<<6)
+/* Controller can only DMA from 32-bit aligned addresses */
+#define SDHCI_QUIRK_32BIT_DMA_ADDR                     (1<<7)
+/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_DMA_SIZE                     (1<<8)
+/* Controller can only ADMA chunks that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_ADMA_SIZE                    (1<<9)
+/* Controller needs to be reset after each request to stay stable */
+#define SDHCI_QUIRK_RESET_AFTER_REQUEST                        (1<<10)
+/* Controller needs voltage and power writes to happen separately */
+#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER            (1<<11)
+/* Controller provides an incorrect timeout value for transfers */
+#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL                 (1<<12)
+
+       int                     irq;            /* Device IRQ */
+       void __iomem *          ioaddr;         /* Mapped address */
+
+       const struct sdhci_ops  *ops;           /* Low level hw interface */
+
+       /* Internal data */
         struct mmc_host         *mmc;           /* MMC structure */
  
  #ifdef CONFIG_LEDS_CLASS
@@ -176,7 +221,11 @@ struct sdhci_host {
  
         int                     flags;          /* Host attributes */
  #define SDHCI_USE_DMA          (1<<0)          /* Host is DMA capable */
-#define SDHCI_REQ_USE_DMA      (1<<1)          /* Use DMA for this req. */
+#define SDHCI_USE_ADMA         (1<<1)          /* Host is ADMA capable */
+#define SDHCI_REQ_USE_DMA      (1<<2)          /* Use DMA for this req. */
+#define SDHCI_DEVICE_DEAD      (1<<3)          /* Device unresponsive */
+
+       unsigned int            version;        /* SDHCI spec. version */
  
         unsigned int            max_clk;        /* Max possible freq (MHz) */
         unsigned int            timeout_clk;    /* Timeout freq (KHz) */
@@ -194,22 +243,41 @@ struct sdhci_host {
         int                     offset;         /* Offset into current sg */
         int                     remain;         /* Bytes left in current */
  
-       int                     irq;            /* Device IRQ */
-       int                     bar;            /* PCI BAR index */
-       unsigned long           addr;           /* Bus address */
-       void __iomem *          ioaddr;         /* Mapped address */
+       int                     sg_count;       /* Mapped sg entries */
+
+       u8                      *adma_desc;     /* ADMA descriptor table */
+       u8                      *align_buffer;  /* Bounce buffer */
+
+       dma_addr_t              adma_addr;      /* Mapped ADMA descr. table */
+       dma_addr_t              align_addr;     /* Mapped bounce buffer */
  
         struct tasklet_struct   card_tasklet;   /* Tasklet structures */
         struct tasklet_struct   finish_tasklet;
  
         struct timer_list       timer;          /* Timer for timeouts */
-};
  
-struct sdhci_chip {
-       struct pci_dev          *pdev;
+       unsigned long           private[0] ____cacheline_aligned;
+};
  
-       unsigned long           quirks;
  
-       int                     num_slots;      /* Slots on controller */
-       struct sdhci_host       *hosts[0];      /* Pointers to hosts */
+struct sdhci_ops {
+       int             (*enable_dma)(struct sdhci_host *host);
  };
+
+
+extern struct sdhci_host *sdhci_alloc_host(struct device *dev,
+       size_t priv_size);
+extern void sdhci_free_host(struct sdhci_host *host);
+
+static inline void *sdhci_priv(struct sdhci_host *host)
+{
+       return (void *)host->private;
+}
+
+extern int sdhci_add_host(struct sdhci_host *host);
+extern void sdhci_remove_host(struct sdhci_host *host, int dead);
+
+#ifdef CONFIG_PM
+extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state);
+extern int sdhci_resume_host(struct sdhci_host *host);
+#endif
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c

new file mode 100644 (file)

index 0000000..f99e9f7
--- /dev/null
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -0,0 +1,575 @@
+/*
+ *  sdricoh_cs.c - driver for Ricoh Secure Digital Card Readers that can be
+ *     found on some Ricoh RL5c476 II cardbus bridge
+ *
+ *  Copyright (C) 2006 - 2008 Sascha Sommer <saschasommer@freenet.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/*
+#define DEBUG
+#define VERBOSE_DEBUG
+*/
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/scatterlist.h>
+#include <linux/version.h>
+
+#include <pcmcia/cs_types.h>
+#include <pcmcia/cs.h>
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+#include <linux/io.h>
+
+#include <linux/mmc/host.h>
+
+#define DRIVER_NAME "sdricoh_cs"
+
+static unsigned int switchlocked;
+
+/* i/o region */
+#define SDRICOH_PCI_REGION 0
+#define SDRICOH_PCI_REGION_SIZE 0x1000
+
+/* registers */
+#define R104_VERSION     0x104
+#define R200_CMD         0x200
+#define R204_CMD_ARG     0x204
+#define R208_DATAIO      0x208
+#define R20C_RESP        0x20c
+#define R21C_STATUS      0x21c
+#define R2E0_INIT        0x2e0
+#define R2E4_STATUS_RESP 0x2e4
+#define R2F0_RESET       0x2f0
+#define R224_MODE        0x224
+#define R226_BLOCKSIZE   0x226
+#define R228_POWER       0x228
+#define R230_DATA        0x230
+
+/* flags for the R21C_STATUS register */
+#define STATUS_CMD_FINISHED      0x00000001
+#define STATUS_TRANSFER_FINISHED 0x00000004
+#define STATUS_CARD_INSERTED     0x00000020
+#define STATUS_CARD_LOCKED       0x00000080
+#define STATUS_CMD_TIMEOUT       0x00400000
+#define STATUS_READY_TO_READ     0x01000000
+#define STATUS_READY_TO_WRITE    0x02000000
+#define STATUS_BUSY              0x40000000
+
+/* timeouts */
+#define INIT_TIMEOUT      100
+#define CMD_TIMEOUT       100000
+#define TRANSFER_TIMEOUT  100000
+#define BUSY_TIMEOUT      32767
+
+/* list of supported pcmcia devices */
+static struct pcmcia_device_id pcmcia_ids[] = {
+       /* vendor and device strings followed by their crc32 hashes */
+       PCMCIA_DEVICE_PROD_ID12("RICOH", "Bay1Controller", 0xd9f522ed,
+                               0xc3901202),
+       PCMCIA_DEVICE_NULL,
+};
+
+MODULE_DEVICE_TABLE(pcmcia, pcmcia_ids);
+
+/* mmc privdata */
+struct sdricoh_host {
+       struct device *dev;
+       struct mmc_host *mmc;   /* MMC structure */
+       unsigned char __iomem *iobase;
+       struct pci_dev *pci_dev;
+       int app_cmd;
+};
+
+/***************** register i/o helper functions *****************************/
+
+static inline unsigned int sdricoh_readl(struct sdricoh_host *host,
+                                        unsigned int reg)
+{
+       unsigned int value = readl(host->iobase + reg);
+       dev_vdbg(host->dev, "rl %x 0x%x\n", reg, value);
+       return value;
+}
+
+static inline void sdricoh_writel(struct sdricoh_host *host, unsigned int reg,
+                                 unsigned int value)
+{
+       writel(value, host->iobase + reg);
+       dev_vdbg(host->dev, "wl %x 0x%x\n", reg, value);
+
+}
+
+static inline unsigned int sdricoh_readw(struct sdricoh_host *host,
+                                        unsigned int reg)
+{
+       unsigned int value = readw(host->iobase + reg);
+       dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value);
+       return value;
+}
+
+static inline void sdricoh_writew(struct sdricoh_host *host, unsigned int reg,
+                                        unsigned short value)
+{
+       writew(value, host->iobase + reg);
+       dev_vdbg(host->dev, "ww %x 0x%x\n", reg, value);
+}
+
+static inline unsigned int sdricoh_readb(struct sdricoh_host *host,
+                                        unsigned int reg)
+{
+       unsigned int value = readb(host->iobase + reg);
+       dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value);
+       return value;
+}
+
+static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted,
+                               unsigned int timeout){
+       unsigned int loop;
+       unsigned int status = 0;
+       struct device *dev = host->dev;
+       for (loop = 0; loop < timeout; loop++) {
+               status = sdricoh_readl(host, R21C_STATUS);
+               sdricoh_writel(host, R2E4_STATUS_RESP, status);
+               if (status & wanted)
+                       break;
+       }
+
+       if (loop == timeout) {
+               dev_err(dev, "query_status: timeout waiting for %x\n", wanted);
+               return -ETIMEDOUT;
+       }
+
+       /* do not do this check in the loop as some commands fail otherwise */
+       if (status & 0x7F0000) {
+               dev_err(dev, "waiting for status bit %x failed\n", wanted);
+               return -EINVAL;
+       }
+       return 0;
+
+}
+
+static int sdricoh_mmc_cmd(struct sdricoh_host *host, unsigned char opcode,
+                          unsigned int arg)
+{
+       unsigned int status;
+       int result = 0;
+       unsigned int loop = 0;
+       /* reset status reg? */
+       sdricoh_writel(host, R21C_STATUS, 0x18);
+       /* fill parameters */
+       sdricoh_writel(host, R204_CMD_ARG, arg);
+       sdricoh_writel(host, R200_CMD, (0x10000 << 8) | opcode);
+       /* wait for command completion */
+       if (opcode) {
+               for (loop = 0; loop < CMD_TIMEOUT; loop++) {
+                       status = sdricoh_readl(host, R21C_STATUS);
+                       sdricoh_writel(host, R2E4_STATUS_RESP, status);
+                       if (status  & STATUS_CMD_FINISHED)
+                               break;
+               }
+               /* don't check for timeout in the loop it is not always
+                  reset correctly
+               */
+               if (loop == CMD_TIMEOUT || status & STATUS_CMD_TIMEOUT)
+                       result = -ETIMEDOUT;
+
+       }
+
+       return result;
+
+}
+
+static int sdricoh_reset(struct sdricoh_host *host)
+{
+       dev_dbg(host->dev, "reset\n");
+       sdricoh_writel(host, R2F0_RESET, 0x10001);
+       sdricoh_writel(host, R2E0_INIT, 0x10000);
+       if (sdricoh_readl(host, R2E0_INIT) != 0x10000)
+               return -EIO;
+       sdricoh_writel(host, R2E0_INIT, 0x10007);
+
+       sdricoh_writel(host, R224_MODE, 0x2000000);
+       sdricoh_writel(host, R228_POWER, 0xe0);
+
+
+       /* status register ? */
+       sdricoh_writel(host, R21C_STATUS, 0x18);
+
+       return 0;
+}
+
+static int sdricoh_blockio(struct sdricoh_host *host, int read,
+                               u8 *buf, int len)
+{
+       int size;
+       u32 data = 0;
+       /* wait until the data is available */
+       if (read) {
+               if (sdricoh_query_status(host, STATUS_READY_TO_READ,
+                                               TRANSFER_TIMEOUT))
+                       return -ETIMEDOUT;
+               sdricoh_writel(host, R21C_STATUS, 0x18);
+               /* read data */
+               while (len) {
+                       data = sdricoh_readl(host, R230_DATA);
+                       size = min(len, 4);
+                       len -= size;
+                       while (size) {
+                               *buf = data & 0xFF;
+                               buf++;
+                               data >>= 8;
+                               size--;
+                       }
+               }
+       } else {
+               if (sdricoh_query_status(host, STATUS_READY_TO_WRITE,
+                                               TRANSFER_TIMEOUT))
+                       return -ETIMEDOUT;
+               sdricoh_writel(host, R21C_STATUS, 0x18);
+               /* write data */
+               while (len) {
+                       size = min(len, 4);
+                       len -= size;
+                       while (size) {
+                               data >>= 8;
+                               data |= (u32)*buf << 24;
+                               buf++;
+                               size--;
+                       }
+                       sdricoh_writel(host, R230_DATA, data);
+               }
+       }
+
+       if (len)
+               return -EIO;
+
+       return 0;
+}
+
+static void sdricoh_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+       struct sdricoh_host *host = mmc_priv(mmc);
+       struct mmc_command *cmd = mrq->cmd;
+       struct mmc_data *data = cmd->data;
+       struct device *dev = host->dev;
+       unsigned char opcode = cmd->opcode;
+       int i;
+
+       dev_dbg(dev, "=============================\n");
+       dev_dbg(dev, "sdricoh_request opcode=%i\n", opcode);
+
+       sdricoh_writel(host, R21C_STATUS, 0x18);
+
+       /* MMC_APP_CMDs need some special handling */
+       if (host->app_cmd) {
+               opcode |= 64;
+               host->app_cmd = 0;
+       } else if (opcode == 55)
+               host->app_cmd = 1;
+
+       /* read/write commands seem to require this */
+       if (data) {
+               sdricoh_writew(host, R226_BLOCKSIZE, data->blksz);
+               sdricoh_writel(host, R208_DATAIO, 0);
+       }
+
+       cmd->error = sdricoh_mmc_cmd(host, opcode, cmd->arg);
+
+       /* read response buffer */
+       if (cmd->flags & MMC_RSP_PRESENT) {
+               if (cmd->flags & MMC_RSP_136) {
+                       /* CRC is stripped so we need to do some shifting. */
+                       for (i = 0; i < 4; i++) {
+                               cmd->resp[i] =
+                                   sdricoh_readl(host,
+                                                 R20C_RESP + (3 - i) * 4) << 8;
+                               if (i != 3)
+                                       cmd->resp[i] |=
+                                           sdricoh_readb(host, R20C_RESP +
+                                                         (3 - i) * 4 - 1);
+                       }
+               } else
+                       cmd->resp[0] = sdricoh_readl(host, R20C_RESP);
+       }
+
+       /* transfer data */
+       if (data && cmd->error == 0) {
+               dev_dbg(dev, "transfer: blksz %i blocks %i sg_len %i "
+                       "sg length %i\n", data->blksz, data->blocks,
+                       data->sg_len, data->sg->length);
+
+               /* enter data reading mode */
+               sdricoh_writel(host, R21C_STATUS, 0x837f031e);
+               for (i = 0; i < data->blocks; i++) {
+                       size_t len = data->blksz;
+                       u8 *buf;
+                       struct page *page;
+                       int result;
+                       page = sg_page(data->sg);
+
+                       buf = kmap(page) + data->sg->offset + (len * i);
+                       result =
+                               sdricoh_blockio(host,
+                                       data->flags & MMC_DATA_READ, buf, len);
+                       kunmap(page);
+                       flush_dcache_page(page);
+                       if (result) {
+                               dev_err(dev, "sdricoh_request: cmd %i "
+                                       "block transfer failed\n", cmd->opcode);
+                               cmd->error = result;
+                               break;
+                       } else
+                               data->bytes_xfered += len;
+               }
+
+               sdricoh_writel(host, R208_DATAIO, 1);
+
+               if (sdricoh_query_status(host, STATUS_TRANSFER_FINISHED,
+                                       TRANSFER_TIMEOUT)) {
+                       dev_err(dev, "sdricoh_request: transfer end error\n");
+                       cmd->error = -EINVAL;
+               }
+       }
+       /* FIXME check busy flag */
+
+       mmc_request_done(mmc, mrq);
+       dev_dbg(dev, "=============================\n");
+}
+
+static void sdricoh_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+       struct sdricoh_host *host = mmc_priv(mmc);
+       dev_dbg(host->dev, "set_ios\n");
+
+       if (ios->power_mode == MMC_POWER_ON) {
+               sdricoh_writel(host, R228_POWER, 0xc0e0);
+
+               if (ios->bus_width == MMC_BUS_WIDTH_4) {
+                       sdricoh_writel(host, R224_MODE, 0x2000300);
+                       sdricoh_writel(host, R228_POWER, 0x40e0);
+               } else {
+                       sdricoh_writel(host, R224_MODE, 0x2000340);
+               }
+
+       } else if (ios->power_mode == MMC_POWER_UP) {
+               sdricoh_writel(host, R224_MODE, 0x2000320);
+               sdricoh_writel(host, R228_POWER, 0xe0);
+       }
+}
+
+static int sdricoh_get_ro(struct mmc_host *mmc)
+{
+       struct sdricoh_host *host = mmc_priv(mmc);
+       unsigned int status;
+
+       status = sdricoh_readl(host, R21C_STATUS);
+       sdricoh_writel(host, R2E4_STATUS_RESP, status);
+
+       /* some notebooks seem to have the locked flag switched */
+       if (switchlocked)
+               return !(status & STATUS_CARD_LOCKED);
+
+       return (status & STATUS_CARD_LOCKED);
+}
+
+static struct mmc_host_ops sdricoh_ops = {
+       .request = sdricoh_request,
+       .set_ios = sdricoh_set_ios,
+       .get_ro = sdricoh_get_ro,
+};
+
+/* initialize the control and register it to the mmc framework */
+static int sdricoh_init_mmc(struct pci_dev *pci_dev,
+                           struct pcmcia_device *pcmcia_dev)
+{
+       int result = 0;
+       void __iomem *iobase = NULL;
+       struct mmc_host *mmc = NULL;
+       struct sdricoh_host *host = NULL;
+       struct device *dev = &pcmcia_dev->dev;
+       /* map iomem */
+       if (pci_resource_len(pci_dev, SDRICOH_PCI_REGION) !=
+           SDRICOH_PCI_REGION_SIZE) {
+               dev_dbg(dev, "unexpected pci resource len\n");
+               return -ENODEV;
+       }
+       iobase =
+           pci_iomap(pci_dev, SDRICOH_PCI_REGION, SDRICOH_PCI_REGION_SIZE);
+       if (!iobase) {
+               dev_err(dev, "unable to map iobase\n");
+               return -ENODEV;
+       }
+       /* check version? */
+       if (readl(iobase + R104_VERSION) != 0x4000) {
+               dev_dbg(dev, "no supported mmc controller found\n");
+               result = -ENODEV;
+               goto err;
+       }
+       /* allocate privdata */
+       mmc = pcmcia_dev->priv =
+           mmc_alloc_host(sizeof(struct sdricoh_host), &pcmcia_dev->dev);
+       if (!mmc) {
+               dev_err(dev, "mmc_alloc_host failed\n");
+               result = -ENOMEM;
+               goto err;
+       }
+       host = mmc_priv(mmc);
+
+       host->iobase = iobase;
+       host->dev = dev;
+       host->pci_dev = pci_dev;
+
+       mmc->ops = &sdricoh_ops;
+
+       /* FIXME: frequency and voltage handling is done by the controller
+        */
+       mmc->f_min = 450000;
+       mmc->f_max = 24000000;
+       mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+       mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+       mmc->max_seg_size = 1024 * 512;
+       mmc->max_blk_size = 512;
+
+       /* reset the controler */
+       if (sdricoh_reset(host)) {
+               dev_dbg(dev, "could not reset\n");
+               result = -EIO;
+               goto err;
+
+       }
+
+       result = mmc_add_host(mmc);
+
+       if (!result) {
+               dev_dbg(dev, "mmc host registered\n");
+               return 0;
+       }
+
+err:
+       if (iobase)
+               iounmap(iobase);
+       if (mmc)
+               mmc_free_host(mmc);
+
+       return result;
+}
+
+/* search for supported mmc controllers */
+static int sdricoh_pcmcia_probe(struct pcmcia_device *pcmcia_dev)
+{
+       struct pci_dev *pci_dev = NULL;
+
+       dev_info(&pcmcia_dev->dev, "Searching MMC controller for pcmcia device"
+               " %s %s ...\n", pcmcia_dev->prod_id[0], pcmcia_dev->prod_id[1]);
+
+       /* search pci cardbus bridge that contains the mmc controler */
+       /* the io region is already claimed by yenta_socket... */
+       while ((pci_dev =
+               pci_get_device(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C476,
+                              pci_dev))) {
+               /* try to init the device */
+               if (!sdricoh_init_mmc(pci_dev, pcmcia_dev)) {
+                       dev_info(&pcmcia_dev->dev, "MMC controller found\n");
+                       return 0;
+               }
+
+       }
+       dev_err(&pcmcia_dev->dev, "No MMC controller was found.\n");
+       return -ENODEV;
+}
+
+static void sdricoh_pcmcia_detach(struct pcmcia_device *link)
+{
+       struct mmc_host *mmc = link->priv;
+
+       dev_dbg(&link->dev, "detach\n");
+
+       /* remove mmc host */
+       if (mmc) {
+               struct sdricoh_host *host = mmc_priv(mmc);
+               mmc_remove_host(mmc);
+               pci_iounmap(host->pci_dev, host->iobase);
+               pci_dev_put(host->pci_dev);
+               mmc_free_host(mmc);
+       }
+       pcmcia_disable_device(link);
+
+}
+
+#ifdef CONFIG_PM
+static int sdricoh_pcmcia_suspend(struct pcmcia_device *link)
+{
+       struct mmc_host *mmc = link->priv;
+       dev_dbg(&link->dev, "suspend\n");
+       mmc_suspend_host(mmc, PMSG_SUSPEND);
+       return 0;
+}
+
+static int sdricoh_pcmcia_resume(struct pcmcia_device *link)
+{
+       struct mmc_host *mmc = link->priv;
+       dev_dbg(&link->dev, "resume\n");
+       sdricoh_reset(mmc_priv(mmc));
+       mmc_resume_host(mmc);
+       return 0;
+}
+#else
+#define sdricoh_pcmcia_suspend NULL
+#define sdricoh_pcmcia_resume NULL
+#endif
+
+static struct pcmcia_driver sdricoh_driver = {
+       .drv = {
+               .name = DRIVER_NAME,
+               },
+       .probe = sdricoh_pcmcia_probe,
+       .remove = sdricoh_pcmcia_detach,
+       .id_table = pcmcia_ids,
+       .suspend = sdricoh_pcmcia_suspend,
+       .resume = sdricoh_pcmcia_resume,
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Driver init/exit                                                          *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __init sdricoh_drv_init(void)
+{
+       return pcmcia_register_driver(&sdricoh_driver);
+}
+
+static void __exit sdricoh_drv_exit(void)
+{
+       pcmcia_unregister_driver(&sdricoh_driver);
+}
+
+module_init(sdricoh_drv_init);
+module_exit(sdricoh_drv_exit);
+
+module_param(switchlocked, uint, 0444);
+
+MODULE_AUTHOR("Sascha Sommer <saschasommer@freenet.de>");
+MODULE_DESCRIPTION("Ricoh PCMCIA Secure Digital Interface driver");
+MODULE_LICENSE("GPL");
+
+MODULE_PARM_DESC(switchlocked, "Switch the cards locked status."
+               "Use this when unlocked cards are shown readonly (default 0)");
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c

index 1c14a186f000acc10b23b59b5bc554533bf6a4ec..13844843e8de147a091f19166f1885df9e7262cb 100644 (file)
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -973,7 +973,7 @@ static int tifm_sd_probe(struct tifm_dev *sock)
  
         mmc->ops = &tifm_sd_ops;
         mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-       mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE;
+       mmc->caps = MMC_CAP_4_BIT_DATA;
         mmc->f_min = 20000000 / 60;
         mmc->f_max = 24000000;
  
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c

index c303e7f57ab45cf1473fd6c393443aa2e19d994e..adda37952032af52335dcfbb69fb925bf16e5046 100644 (file)
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -68,16 +68,16 @@ static const int unlock_codes[] = { 0x83, 0x87 };
  
  static const int valid_ids[] = {
         0x7112,
-       };
+};
  
  #ifdef CONFIG_PNP
-static unsigned int nopnp = 0;
+static unsigned int param_nopnp = 0;
  #else
-static const unsigned int nopnp = 1;
+static const unsigned int param_nopnp = 1;
  #endif
-static unsigned int io = 0x248;
-static unsigned int irq = 6;
-static int dma = 2;
+static unsigned int param_io = 0x248;
+static unsigned int param_irq = 6;
+static int param_dma = 2;
  
  /*
   * Basic functions
@@ -939,7 +939,7 @@ static int wbsd_get_ro(struct mmc_host *mmc)
  
         spin_unlock_bh(&host->lock);
  
-       return csr & WBSD_WRPT;
+       return !!(csr & WBSD_WRPT);
  }
  
  static const struct mmc_host_ops wbsd_ops = {
@@ -1219,7 +1219,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
         mmc->f_min = 375000;
         mmc->f_max = 24000000;
         mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-       mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE;
+       mmc->caps = MMC_CAP_4_BIT_DATA;
  
         spin_lock_init(&host->lock);
  
@@ -1420,7 +1420,7 @@ kfree:
  
         dma_unmap_single(mmc_dev(host->mmc), host->dma_addr,
                 WBSD_DMA_SIZE, DMA_BIDIRECTIONAL);
-       host->dma_addr = (dma_addr_t)NULL;
+       host->dma_addr = 0;
  
         kfree(host->dma_buffer);
         host->dma_buffer = NULL;
@@ -1445,7 +1445,7 @@ static void wbsd_release_dma(struct wbsd_host *host)
  
         host->dma = -1;
         host->dma_buffer = NULL;
-       host->dma_addr = (dma_addr_t)NULL;
+       host->dma_addr = 0;
  }
  
  /*
@@ -1765,7 +1765,7 @@ static void __devexit wbsd_shutdown(struct device *dev, int pnp)
  static int __devinit wbsd_probe(struct platform_device *dev)
  {
         /* Use the module parameters for resources */
-       return wbsd_init(&dev->dev, io, irq, dma, 0);
+       return wbsd_init(&dev->dev, param_io, param_irq, param_dma, 0);
  }
  
  static int __devexit wbsd_remove(struct platform_device *dev)
@@ -1979,14 +1979,14 @@ static int __init wbsd_drv_init(void)
  
  #ifdef CONFIG_PNP
  
-       if (!nopnp) {
+       if (!param_nopnp) {
                 result = pnp_register_driver(&wbsd_pnp_driver);
                 if (result < 0)
                         return result;
         }
  #endif /* CONFIG_PNP */
  
-       if (nopnp) {
+       if (param_nopnp) {
                 result = platform_driver_register(&wbsd_driver);
                 if (result < 0)
                         return result;
@@ -2012,12 +2012,12 @@ static void __exit wbsd_drv_exit(void)
  {
  #ifdef CONFIG_PNP
  
-       if (!nopnp)
+       if (!param_nopnp)
                 pnp_unregister_driver(&wbsd_pnp_driver);
  
  #endif /* CONFIG_PNP */
  
-       if (nopnp) {
+       if (param_nopnp) {
                 platform_device_unregister(wbsd_device);
  
                 platform_driver_unregister(&wbsd_driver);
@@ -2029,11 +2029,11 @@ static void __exit wbsd_drv_exit(void)
  module_init(wbsd_drv_init);
  module_exit(wbsd_drv_exit);
  #ifdef CONFIG_PNP
-module_param(nopnp, uint, 0444);
+module_param_named(nopnp, param_nopnp, uint, 0444);
  #endif
-module_param(io, uint, 0444);
-module_param(irq, uint, 0444);
-module_param(dma, int, 0444);
+module_param_named(io, param_io, uint, 0444);
+module_param_named(irq, param_irq, uint, 0444);
+module_param_named(dma, param_dma, int, 0444);
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Pierre Ossman <drzeus@drzeus.cx>");
diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c

index 3dd537be87d803bdb74ec7653bcd22c3a396d505..b54e2ea8346bf26ae5071ed39bbfe450830a980d 100644 (file)
--- a/drivers/net/wireless/libertas/if_sdio.c
+++ b/drivers/net/wireless/libertas/if_sdio.c
@@ -1,7 +1,7 @@
  /*
   *  linux/drivers/net/wireless/libertas/if_sdio.c
   *
- *  Copyright 2007 Pierre Ossman
+ *  Copyright 2007-2008 Pierre Ossman
   *
   * Inspired by if_cs.c, Copyright 2007 Holger Schurig
   *
@@ -266,13 +266,10 @@ static int if_sdio_card_to_host(struct if_sdio_card *card)
  
         /*
          * The transfer must be in one transaction or the firmware
-        * goes suicidal.
+        * goes suicidal. There's no way to guarantee that for all
+        * controllers, but we can at least try.
          */
-       chunk = size;
-       if ((chunk > card->func->cur_blksize) || (chunk > 512)) {
-               chunk = (chunk + card->func->cur_blksize - 1) /
-                       card->func->cur_blksize * card->func->cur_blksize;
-       }
+       chunk = sdio_align_size(card->func, size);
  
         ret = sdio_readsb(card->func, card->buffer, card->ioport, chunk);
         if (ret)
@@ -696,13 +693,10 @@ static int if_sdio_host_to_card(struct lbs_private *priv,
  
         /*
          * The transfer must be in one transaction or the firmware
-        * goes suicidal.
+        * goes suicidal. There's no way to guarantee that for all
+        * controllers, but we can at least try.
          */
-       size = nb + 4;
-       if ((size > card->func->cur_blksize) || (size > 512)) {
-               size = (size + card->func->cur_blksize - 1) /
-                       card->func->cur_blksize * card->func->cur_blksize;
-       }
+       size = sdio_align_size(card->func, nb + 4);
  
         packet = kzalloc(sizeof(struct if_sdio_packet) + size,
                         GFP_ATOMIC);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile

index 4d1ce2e7361e41cfee2b35b5da3025f7641e2e4b..7d63f8ced24b12568fc49ebfa06fd588e49b6fc9 100644 (file)
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -2,7 +2,7 @@
  # Makefile for the PCI bus specific drivers.
  #
  
-obj-y          += access.o bus.o probe.o remove.o pci.o quirks.o \
+obj-y          += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \
                         pci-driver.o search.o pci-sysfs.o rom.o setup-res.o
  obj-$(CONFIG_PROC_FS) += proc.o
  
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c

index f8c187a763bdcac12323191836079ea9943dde3e..93e37f0666ab5e2786484a775f107bbffd9b3ecd 100644 (file)
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -30,6 +30,7 @@
  #include <linux/types.h>
  #include <linux/pci.h>
  #include <linux/pci_hotplug.h>
+#include <linux/pci-acpi.h>
  #include <acpi/acpi.h>
  #include <acpi/acpi_bus.h>
  #include <acpi/actypes.h>
@@ -299,7 +300,7 @@ free_and_return:
   *
   * @handle - the handle of the hotplug controller.
   */
-acpi_status acpi_run_oshp(acpi_handle handle)
+static acpi_status acpi_run_oshp(acpi_handle handle)
  {
         acpi_status             status;
         struct acpi_buffer      string = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -322,9 +323,6 @@ acpi_status acpi_run_oshp(acpi_handle handle)
         kfree(string.pointer);
         return status;
  }
-EXPORT_SYMBOL_GPL(acpi_run_oshp);
-
-
  
  /* acpi_get_hp_params_from_firmware
   *
@@ -374,6 +372,85 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
  }
  EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
  
+/**
+ * acpi_get_hp_hw_control_from_firmware
+ * @dev: the pci_dev of the bridge that has a hotplug controller
+ * @flags: requested control bits for _OSC
+ *
+ * Attempt to take hotplug control from firmware.
+ */
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
+{
+       acpi_status status;
+       acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev));
+       struct pci_dev *pdev = dev;
+       struct pci_bus *parent;
+       struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
+
+       flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
+                 OSC_SHPC_NATIVE_HP_CONTROL |
+                 OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+       if (!flags) {
+               err("Invalid flags %u specified!\n", flags);
+               return -EINVAL;
+       }
+
+       /*
+        * Per PCI firmware specification, we should run the ACPI _OSC
+        * method to get control of hotplug hardware before using it. If
+        * an _OSC is missing, we look for an OSHP to do the same thing.
+        * To handle different BIOS behavior, we look for _OSC and OSHP
+        * within the scope of the hotplug controller and its parents,
+        * upto the host bridge under which this controller exists.
+        */
+       while (!handle) {
+               /*
+                * This hotplug controller was not listed in the ACPI name
+                * space at all. Try to get acpi handle of parent pci bus.
+                */
+               if (!pdev || !pdev->bus->parent)
+                       break;
+               parent = pdev->bus->parent;
+               dbg("Could not find %s in acpi namespace, trying parent\n",
+                   pci_name(pdev));
+               if (!parent->self)
+                       /* Parent must be a host bridge */
+                       handle = acpi_get_pci_rootbridge_handle(
+                                       pci_domain_nr(parent),
+                                       parent->number);
+               else
+                       handle = DEVICE_ACPI_HANDLE(&(parent->self->dev));
+               pdev = parent->self;
+       }
+
+       while (handle) {
+               acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
+               dbg("Trying to get hotplug control for %s \n",
+                   (char *)string.pointer);
+               status = pci_osc_control_set(handle, flags);
+               if (status == AE_NOT_FOUND)
+                       status = acpi_run_oshp(handle);
+               if (ACPI_SUCCESS(status)) {
+                       dbg("Gained control for hotplug HW for pci %s (%s)\n",
+                           pci_name(dev), (char *)string.pointer);
+                       kfree(string.pointer);
+                       return 0;
+               }
+               if (acpi_root_bridge(handle))
+                       break;
+               chandle = handle;
+               status = acpi_get_parent(chandle, &handle);
+               if (ACPI_FAILURE(status))
+                       break;
+       }
+
+       dbg("Cannot get control of hotplug hardware for pci %s\n",
+           pci_name(dev));
+
+       kfree(string.pointer);
+       return -ENODEV;
+}
+EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware);
  
  /* acpi_root_bridge - check to see if this acpi object is a root bridge
   *
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h

index 7a29164d4b325aa2ee4219398b679b066f0e27a6..eecf7cbf4139aca028fd6c57f49c075717bcf8da 100644 (file)
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -215,7 +215,6 @@ extern u8 acpiphp_get_power_status (struct acpiphp_slot *slot);
  extern u8 acpiphp_get_attention_status (struct acpiphp_slot *slot);
  extern u8 acpiphp_get_latch_status (struct acpiphp_slot *slot);
  extern u8 acpiphp_get_adapter_status (struct acpiphp_slot *slot);
-extern u32 acpiphp_get_address (struct acpiphp_slot *slot);
  
  /* variables */
  extern int acpiphp_debug;
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c

index 7af68ba279036277a9f689683b9b00eaa98481be..0e496e866a84dc51480d4cb3194e1ea6b62023c8 100644 (file)
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -70,7 +70,6 @@ static int disable_slot               (struct hotplug_slot *slot);
  static int set_attention_status (struct hotplug_slot *slot, u8 value);
  static int get_power_status    (struct hotplug_slot *slot, u8 *value);
  static int get_attention_status (struct hotplug_slot *slot, u8 *value);
-static int get_address         (struct hotplug_slot *slot, u32 *value);
  static int get_latch_status    (struct hotplug_slot *slot, u8 *value);
  static int get_adapter_status  (struct hotplug_slot *slot, u8 *value);
  
@@ -83,7 +82,6 @@ static struct hotplug_slot_ops acpi_hotplug_slot_ops = {
         .get_attention_status   = get_attention_status,
         .get_latch_status       = get_latch_status,
         .get_adapter_status     = get_adapter_status,
-       .get_address            = get_address,
  };
  
  
@@ -274,23 +272,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
         return 0;
  }
  
-
-/**
- * get_address - get pci address of a slot
- * @hotplug_slot: slot to get status
- * @value: pointer to struct pci_busdev (seg, bus, dev)
- */
-static int get_address(struct hotplug_slot *hotplug_slot, u32 *value)
-{
-       struct slot *slot = hotplug_slot->private;
-
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
-
-       *value = acpiphp_get_address(slot->acpi_slot);
-
-       return 0;
-}
-
  static int __init init_acpi(void)
  {
         int retval;
@@ -357,7 +338,11 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
         acpiphp_slot->slot = slot;
         snprintf(slot->name, sizeof(slot->name), "%u", slot->acpi_slot->sun);
  
-       retval = pci_hp_register(slot->hotplug_slot);
+       retval = pci_hp_register(slot->hotplug_slot,
+                                       acpiphp_slot->bridge->pci_bus,
+                                       acpiphp_slot->device);
+       if (retval == -EBUSY)
+               goto error_hpslot;
         if (retval) {
                 err("pci_hp_register failed with error %d\n", retval);
                 goto error_hpslot;
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c

index 91156f85a9267371c1c9354646bdcc6975e206af..a3e4705dd8f0f11ef477ddfeaec284990780c87f 100644 (file)
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -258,7 +258,12 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
                                 bridge->pci_bus->number, slot->device);
                 retval = acpiphp_register_hotplug_slot(slot);
                 if (retval) {
-                       warn("acpiphp_register_hotplug_slot failed(err code = 0x%x)\n", retval);
+                       if (retval == -EBUSY)
+                               warn("Slot %d already registered by another "
+                                       "hotplug driver\n", slot->sun);
+                       else
+                               warn("acpiphp_register_hotplug_slot failed "
+                                       "(err code = 0x%x)\n", retval);
                         goto err_exit;
                 }
         }
@@ -1878,19 +1883,3 @@ u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot)
  
         return (sta == 0) ? 0 : 1;
  }
-
-
-/*
- * pci address (seg/bus/dev)
- */
-u32 acpiphp_get_address(struct acpiphp_slot *slot)
-{
-       u32 address;
-       struct pci_bus *pci_bus = slot->bridge->pci_bus;
-
-       address = (pci_domain_nr(pci_bus) << 16) |
-                 (pci_bus->number << 8) |
-                 slot->device;
-
-       return address;
-}
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c

index ede9051fdb5d848b3ee2dfe64d7cb7bccc18f2c2..2b7c45e39370f1e42fa0546f30cf6e6c7e9356f8 100644 (file)
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -33,8 +33,10 @@
  #include <linux/kobject.h>
  #include <asm/uaccess.h>
  #include <linux/moduleparam.h>
+#include <linux/pci.h>
  
  #include "acpiphp.h"
+#include "../pci.h"
  
  #define DRIVER_VERSION "1.0.1"
  #define DRIVER_AUTHOR  "Irene Zubarev <zubarev@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
@@ -430,7 +432,7 @@ static int __init ibm_acpiphp_init(void)
         int retval = 0;
         acpi_status status;
         struct acpi_device *device;
-       struct kobject *sysdir = &pci_hotplug_slots_kset->kobj;
+       struct kobject *sysdir = &pci_slots_kset->kobj;
  
         dbg("%s\n", __func__);
  
@@ -477,7 +479,7 @@ init_return:
  static void __exit ibm_acpiphp_exit(void)
  {
         acpi_status status;
-       struct kobject *sysdir = &pci_hotplug_slots_kset->kobj;
+       struct kobject *sysdir = &pci_slots_kset->kobj;
  
         dbg("%s\n", __func__);
  
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c

index d8a6b80ab42aa92461ba513404a7a8fdabe5c95a..935947991dc98649dfaa8794fb55604ee12cb895 100644 (file)
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -285,7 +285,7 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
                 info->attention_status = cpci_get_attention_status(slot);
  
                 dbg("registering slot %s", slot->hotplug_slot->name);
-               status = pci_hp_register(slot->hotplug_slot);
+               status = pci_hp_register(slot->hotplug_slot, bus, i);
                 if (status) {
                         err("pci_hp_register failed with error %d", status);
                         goto error_name;
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c

index 36b115b27b0b812bebe943933c2aa58e9966789f..54defec51d0895f7dd2136230fa56e87e8feac3b 100644 (file)
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -434,7 +434,9 @@ static int ctrl_slot_setup(struct controller *ctrl,
                                 slot->bus, slot->device,
                                 slot->number, ctrl->slot_device_offset,
                                 slot_number);
-               result = pci_hp_register(hotplug_slot);
+               result = pci_hp_register(hotplug_slot,
+                                        ctrl->pci_dev->subordinate,
+                                        slot->device);
                 if (result) {
                         err("pci_hp_register failed with error %d\n", result);
                         goto error_name;
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c

index 7e9a827c268799ed4c51b84b829fca03a3dfe5a7..40337a06c18ab7defa6160af363df122143545a9 100644 (file)
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -66,6 +66,7 @@ struct dummy_slot {
         struct pci_dev *dev;
         struct work_struct remove_work;
         unsigned long removed;
+       char name[8];
  };
  
  static int debug;
@@ -100,6 +101,7 @@ static int add_slot(struct pci_dev *dev)
         struct dummy_slot *dslot;
         struct hotplug_slot *slot;
         int retval = -ENOMEM;
+       static int count = 1;
  
         slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
         if (!slot)
@@ -113,18 +115,18 @@ static int add_slot(struct pci_dev *dev)
         slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
         slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
  
-       slot->name = &dev->dev.bus_id[0];
-       dbg("slot->name = %s\n", slot->name);
-
         dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL);
         if (!dslot)
                 goto error_info;
  
+       slot->name = dslot->name;
+       snprintf(slot->name, sizeof(dslot->name), "fake%d", count++);
+       dbg("slot->name = %s\n", slot->name);
         slot->ops = &dummy_hotplug_slot_ops;
         slot->release = &dummy_release;
         slot->private = dslot;
  
-       retval = pci_hp_register(slot);
+       retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn));
         if (retval) {
                 err("pci_hp_register failed with error %d\n", retval);
                 goto error_dslot;
@@ -148,17 +150,17 @@ error:
  static int __init pci_scan_buses(void)
  {
         struct pci_dev *dev = NULL;
-       int retval = 0;
+       int lastslot = 0;
  
         while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-               retval = add_slot(dev);
-               if (retval) {
-                       pci_dev_put(dev);
-                       break;
-               }
+               if (PCI_FUNC(dev->devfn) > 0 &&
+                               lastslot == PCI_SLOT(dev->devfn))
+                       continue;
+               lastslot = PCI_SLOT(dev->devfn);
+               add_slot(dev);
         }
  
-       return retval;
+       return 0;
  }
  
  static void remove_slot(struct dummy_slot *dslot)
@@ -296,23 +298,9 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
         return 0;
  }
  
-/* find the hotplug_slot for the pci_dev */
-static struct hotplug_slot *get_slot_from_dev(struct pci_dev *dev)
-{
-       struct dummy_slot *dslot;
-
-       list_for_each_entry(dslot, &slot_list, node) {
-               if (dslot->dev == dev)
-                       return dslot->slot;
-       }
-       return NULL;
-}
-
-
  static int disable_slot(struct hotplug_slot *slot)
  {
         struct dummy_slot *dslot;
-       struct hotplug_slot *hslot;
         struct pci_dev *dev;
         int func;
  
@@ -322,41 +310,27 @@ static int disable_slot(struct hotplug_slot *slot)
  
         dbg("%s - physical_slot = %s\n", __func__, slot->name);
  
-       /* don't disable bridged devices just yet, we can't handle them easily... */
-       if (dslot->dev->subordinate) {
-               err("Can't remove PCI devices with other PCI devices behind it yet.\n");
-               return -ENODEV;
-       }
-       if (test_and_set_bit(0, &dslot->removed)) {
-               dbg("Slot already scheduled for removal\n");
-               return -ENODEV;
-       }
-       /* search for subfunctions and disable them first */
-       if (!(dslot->dev->devfn & 7)) {
-               for (func = 1; func < 8; func++) {
-                       dev = pci_get_slot(dslot->dev->bus,
-                                       dslot->dev->devfn + func);
-                       if (dev) {
-                               hslot = get_slot_from_dev(dev);
-                               if (hslot)
-                                       disable_slot(hslot);
-                               else {
-                                       err("Hotplug slot not found for subfunction of PCI device\n");
-                                       return -ENODEV;
-                               }
-                               pci_dev_put(dev);
-                       } else
-                               dbg("No device in slot found\n");
+       for (func = 7; func >= 0; func--) {
+               dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func);
+               if (!dev)
+                       continue;
+
+               if (test_and_set_bit(0, &dslot->removed)) {
+                       dbg("Slot already scheduled for removal\n");
+                       return -ENODEV;
                 }
-       }
  
-       /* remove the device from the pci core */
-       pci_remove_bus_device(dslot->dev);
+               /* queue work item to blow away this sysfs entry and other
+                * parts.
+                */
+               INIT_WORK(&dslot->remove_work, remove_slot_worker);
+               queue_work(dummyphp_wq, &dslot->remove_work);
  
-       /* queue work item to blow away this sysfs entry and other parts. */
-       INIT_WORK(&dslot->remove_work, remove_slot_worker);
-       queue_work(dummyphp_wq, &dslot->remove_work);
+               /* blow away this sysfs entry and other parts. */
+               remove_slot(dslot);
  
+               pci_dev_put(dev);
+       }
         return 0;
  }
  
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c

index dca7efc14be27d12b6ea29ee387a32843e27af03..8467d028732514277a098815f3597ebfed1591ae 100644 (file)
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -1001,7 +1001,8 @@ static int __init ebda_rsrc_controller (void)
                 tmp_slot = list_entry (list, struct slot, ibm_slot_list);
  
                 snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
-               pci_hp_register (tmp_slot->hotplug_slot);
+               pci_hp_register(tmp_slot->hotplug_slot,
+                       pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
         }
  
         print_ebda_hpc ();
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c

index a11021e8ce37bb0cc3e76f182dfbd84027ff8d9f..5f85b1b120e3ddc12c350e2b821c18946ec90e27 100644 (file)
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -40,6 +40,7 @@
  #include <linux/pci.h>
  #include <linux/pci_hotplug.h>
  #include <asm/uaccess.h>
+#include "../pci.h"
  
  #define MY_NAME        "pci_hotplug"
  
@@ -60,41 +61,7 @@ static int debug;
  //////////////////////////////////////////////////////////////////
  
  static LIST_HEAD(pci_hotplug_slot_list);
-
-struct kset *pci_hotplug_slots_kset;
-
-static ssize_t hotplug_slot_attr_show(struct kobject *kobj,
-               struct attribute *attr, char *buf)
-{
-       struct hotplug_slot *slot = to_hotplug_slot(kobj);
-       struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr);
-       return attribute->show ? attribute->show(slot, buf) : -EIO;
-}
-
-static ssize_t hotplug_slot_attr_store(struct kobject *kobj,
-               struct attribute *attr, const char *buf, size_t len)
-{
-       struct hotplug_slot *slot = to_hotplug_slot(kobj);
-       struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr);
-       return attribute->store ? attribute->store(slot, buf, len) : -EIO;
-}
-
-static struct sysfs_ops hotplug_slot_sysfs_ops = {
-       .show = hotplug_slot_attr_show,
-       .store = hotplug_slot_attr_store,
-};
-
-static void hotplug_slot_release(struct kobject *kobj)
-{
-       struct hotplug_slot *slot = to_hotplug_slot(kobj);
-       if (slot->release)
-               slot->release(slot);
-}
-
-static struct kobj_type hotplug_slot_ktype = {
-       .sysfs_ops = &hotplug_slot_sysfs_ops,
-       .release = &hotplug_slot_release,
-};
+static DEFINE_SPINLOCK(pci_hotplug_slot_list_lock);
  
  /* these strings match up with the values in pci_bus_speed */
  static char *pci_bus_speed_strings[] = {
@@ -149,16 +116,15 @@ GET_STATUS(power_status, u8)
  GET_STATUS(attention_status, u8)
  GET_STATUS(latch_status, u8)
  GET_STATUS(adapter_status, u8)
-GET_STATUS(address, u32)
  GET_STATUS(max_bus_speed, enum pci_bus_speed)
  GET_STATUS(cur_bus_speed, enum pci_bus_speed)
  
-static ssize_t power_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t power_read_file(struct pci_slot *slot, char *buf)
  {
         int retval;
         u8 value;
  
-       retval = get_power_status (slot, &value);
+       retval = get_power_status(slot->hotplug, &value);
         if (retval)
                 goto exit;
         retval = sprintf (buf, "%d\n", value);
@@ -166,9 +132,10 @@ exit:
         return retval;
  }
  
-static ssize_t power_write_file (struct hotplug_slot *slot, const char *buf,
+static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
                 size_t count)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         unsigned long lpower;
         u8 power;
         int retval = 0;
@@ -204,29 +171,30 @@ exit:
         return count;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_power = {
+static struct pci_slot_attribute hotplug_slot_attr_power = {
         .attr = {.name = "power", .mode = S_IFREG | S_IRUGO | S_IWUSR},
         .show = power_read_file,
         .store = power_write_file
  };
  
-static ssize_t attention_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t attention_read_file(struct pci_slot *slot, char *buf)
  {
         int retval;
         u8 value;
  
-       retval = get_attention_status (slot, &value);
+       retval = get_attention_status(slot->hotplug, &value);
         if (retval)
                 goto exit;
-       retval = sprintf (buf, "%d\n", value);
+       retval = sprintf(buf, "%d\n", value);
  
  exit:
         return retval;
  }
  
-static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf,
+static ssize_t attention_write_file(struct pci_slot *slot, const char *buf,
                 size_t count)
  {
+       struct hotplug_slot_ops *ops = slot->hotplug->ops;
         unsigned long lattention;
         u8 attention;
         int retval = 0;
@@ -235,13 +203,13 @@ static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf,
         attention = (u8)(lattention & 0xff);
         dbg (" - attention = %d\n", attention);
  
-       if (!try_module_get(slot->ops->owner)) {
+       if (!try_module_get(ops->owner)) {
                 retval = -ENODEV;
                 goto exit;
         }
-       if (slot->ops->set_attention_status)
-               retval = slot->ops->set_attention_status(slot, attention);
-       module_put(slot->ops->owner);
+       if (ops->set_attention_status)
+               retval = ops->set_attention_status(slot->hotplug, attention);
+       module_put(ops->owner);
  
  exit:  
         if (retval)
@@ -249,18 +217,18 @@ exit:
         return count;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_attention = {
+static struct pci_slot_attribute hotplug_slot_attr_attention = {
         .attr = {.name = "attention", .mode = S_IFREG | S_IRUGO | S_IWUSR},
         .show = attention_read_file,
         .store = attention_write_file
  };
  
-static ssize_t latch_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t latch_read_file(struct pci_slot *slot, char *buf)
  {
         int retval;
         u8 value;
  
-       retval = get_latch_status (slot, &value);
+       retval = get_latch_status(slot->hotplug, &value);
         if (retval)
                 goto exit;
         retval = sprintf (buf, "%d\n", value);
@@ -269,17 +237,17 @@ exit:
         return retval;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_latch = {
+static struct pci_slot_attribute hotplug_slot_attr_latch = {
         .attr = {.name = "latch", .mode = S_IFREG | S_IRUGO},
         .show = latch_read_file,
  };
  
-static ssize_t presence_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t presence_read_file(struct pci_slot *slot, char *buf)
  {
         int retval;
         u8 value;
  
-       retval = get_adapter_status (slot, &value);
+       retval = get_adapter_status(slot->hotplug, &value);
         if (retval)
                 goto exit;
         retval = sprintf (buf, "%d\n", value);
@@ -288,42 +256,20 @@ exit:
         return retval;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_presence = {
+static struct pci_slot_attribute hotplug_slot_attr_presence = {
         .attr = {.name = "adapter", .mode = S_IFREG | S_IRUGO},
         .show = presence_read_file,
  };
  
-static ssize_t address_read_file (struct hotplug_slot *slot, char *buf)
-{
-       int retval;
-       u32 address;
-
-       retval = get_address (slot, &address);
-       if (retval)
-               goto exit;
-       retval = sprintf (buf, "%04x:%02x:%02x\n",
-                         (address >> 16) & 0xffff,
-                         (address >> 8) & 0xff,
-                         address & 0xff);
-
-exit:
-       return retval;
-}
-
-static struct hotplug_slot_attribute hotplug_slot_attr_address = {
-       .attr = {.name = "address", .mode = S_IFREG | S_IRUGO},
-       .show = address_read_file,
-};
-
  static char *unknown_speed = "Unknown bus speed";
  
-static ssize_t max_bus_speed_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t max_bus_speed_read_file(struct pci_slot *slot, char *buf)
  {
         char *speed_string;
         int retval;
         enum pci_bus_speed value;
         
-       retval = get_max_bus_speed (slot, &value);
+       retval = get_max_bus_speed(slot->hotplug, &value);
         if (retval)
                 goto exit;
  
@@ -338,18 +284,18 @@ exit:
         return retval;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_max_bus_speed = {
+static struct pci_slot_attribute hotplug_slot_attr_max_bus_speed = {
         .attr = {.name = "max_bus_speed", .mode = S_IFREG | S_IRUGO},
         .show = max_bus_speed_read_file,
  };
  
-static ssize_t cur_bus_speed_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t cur_bus_speed_read_file(struct pci_slot *slot, char *buf)
  {
         char *speed_string;
         int retval;
         enum pci_bus_speed value;
  
-       retval = get_cur_bus_speed (slot, &value);
+       retval = get_cur_bus_speed(slot->hotplug, &value);
         if (retval)
                 goto exit;
  
@@ -364,14 +310,15 @@ exit:
         return retval;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_cur_bus_speed = {
+static struct pci_slot_attribute hotplug_slot_attr_cur_bus_speed = {
         .attr = {.name = "cur_bus_speed", .mode = S_IFREG | S_IRUGO},
         .show = cur_bus_speed_read_file,
  };
  
-static ssize_t test_write_file (struct hotplug_slot *slot, const char *buf,
+static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
                 size_t count)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         unsigned long ltest;
         u32 test;
         int retval = 0;
@@ -394,13 +341,14 @@ exit:
         return count;
  }
  
-static struct hotplug_slot_attribute hotplug_slot_attr_test = {
+static struct pci_slot_attribute hotplug_slot_attr_test = {
         .attr = {.name = "test", .mode = S_IFREG | S_IRUGO | S_IWUSR},
         .store = test_write_file
  };
  
-static int has_power_file (struct hotplug_slot *slot)
+static int has_power_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if ((slot->ops->enable_slot) ||
@@ -410,8 +358,9 @@ static int has_power_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int has_attention_file (struct hotplug_slot *slot)
+static int has_attention_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if ((slot->ops->set_attention_status) ||
@@ -420,8 +369,9 @@ static int has_attention_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int has_latch_file (struct hotplug_slot *slot)
+static int has_latch_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if (slot->ops->get_latch_status)
@@ -429,8 +379,9 @@ static int has_latch_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int has_adapter_file (struct hotplug_slot *slot)
+static int has_adapter_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if (slot->ops->get_adapter_status)
@@ -438,17 +389,9 @@ static int has_adapter_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int has_address_file (struct hotplug_slot *slot)
-{
-       if ((!slot) || (!slot->ops))
-               return -ENODEV;
-       if (slot->ops->get_address)
-               return 0;
-       return -ENOENT;
-}
-
-static int has_max_bus_speed_file (struct hotplug_slot *slot)
+static int has_max_bus_speed_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if (slot->ops->get_max_bus_speed)
@@ -456,8 +399,9 @@ static int has_max_bus_speed_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int has_cur_bus_speed_file (struct hotplug_slot *slot)
+static int has_cur_bus_speed_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if (slot->ops->get_cur_bus_speed)
@@ -465,8 +409,9 @@ static int has_cur_bus_speed_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int has_test_file (struct hotplug_slot *slot)
+static int has_test_file(struct pci_slot *pci_slot)
  {
+       struct hotplug_slot *slot = pci_slot->hotplug;
         if ((!slot) || (!slot->ops))
                 return -ENODEV;
         if (slot->ops->hardware_test)
@@ -474,7 +419,7 @@ static int has_test_file (struct hotplug_slot *slot)
         return -ENOENT;
  }
  
-static int fs_add_slot (struct hotplug_slot *slot)
+static int fs_add_slot(struct pci_slot *slot)
  {
         int retval = 0;
  
@@ -505,13 +450,6 @@ static int fs_add_slot (struct hotplug_slot *slot)
                         goto exit_adapter;
         }
  
-       if (has_address_file(slot) == 0) {
-               retval = sysfs_create_file(&slot->kobj,
-                                          &hotplug_slot_attr_address.attr);
-               if (retval)
-                       goto exit_address;
-       }
-
         if (has_max_bus_speed_file(slot) == 0) {
                 retval = sysfs_create_file(&slot->kobj,
                                            &hotplug_slot_attr_max_bus_speed.attr);
@@ -544,10 +482,6 @@ exit_cur_speed:
                 sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr);
  
  exit_max_speed:
-       if (has_address_file(slot) == 0)
-               sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr);
-
-exit_address:
         if (has_adapter_file(slot) == 0)
                 sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr);
  
@@ -567,7 +501,7 @@ exit:
         return retval;
  }
  
-static void fs_remove_slot (struct hotplug_slot *slot)
+static void fs_remove_slot(struct pci_slot *slot)
  {
         if (has_power_file(slot) == 0)
                 sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_power.attr);
@@ -581,9 +515,6 @@ static void fs_remove_slot (struct hotplug_slot *slot)
         if (has_adapter_file(slot) == 0)
                 sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr);
  
-       if (has_address_file(slot) == 0)
-               sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr);
-
         if (has_max_bus_speed_file(slot) == 0)
                 sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr);
  
@@ -599,27 +530,33 @@ static struct hotplug_slot *get_slot_from_name (const char *name)
         struct hotplug_slot *slot;
         struct list_head *tmp;
  
+       spin_lock(&pci_hotplug_slot_list_lock);
         list_for_each (tmp, &pci_hotplug_slot_list) {
                 slot = list_entry (tmp, struct hotplug_slot, slot_list);
                 if (strcmp(slot->name, name) == 0)
-                       return slot;
+                       goto out;
         }
-       return NULL;
+       slot = NULL;
+out:
+       spin_unlock(&pci_hotplug_slot_list_lock);
+       return slot;
  }
  
  /**
   * pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem
+ * @bus: bus this slot is on
   * @slot: pointer to the &struct hotplug_slot to register
+ * @slot_nr: slot number
   *
   * Registers a hotplug slot with the pci hotplug subsystem, which will allow
   * userspace interaction to the slot.
   *
   * Returns 0 if successful, anything else for an error.
   */
-int pci_hp_register (struct hotplug_slot *slot)
+int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr)
  {
         int result;
-       struct hotplug_slot *tmp;
+       struct pci_slot *pci_slot;
  
         if (slot == NULL)
                 return -ENODEV;
@@ -632,57 +569,89 @@ int pci_hp_register (struct hotplug_slot *slot)
         }
  
         /* Check if we have already registered a slot with the same name. */
-       tmp = get_slot_from_name(slot->name);
-       if (tmp)
+       if (get_slot_from_name(slot->name))
                 return -EEXIST;
  
-       slot->kobj.kset = pci_hotplug_slots_kset;
-       result = kobject_init_and_add(&slot->kobj, &hotplug_slot_ktype, NULL,
-                                     "%s", slot->name);
-       if (result) {
-               err("Unable to register kobject '%s'", slot->name);
-               return -EINVAL;
+       /*
+        * No problems if we call this interface from both ACPI_PCI_SLOT
+        * driver and call it here again. If we've already created the
+        * pci_slot, the interface will simply bump the refcount.
+        */
+       pci_slot = pci_create_slot(bus, slot_nr, slot->name);
+       if (IS_ERR(pci_slot))
+               return PTR_ERR(pci_slot);
+
+       if (pci_slot->hotplug) {
+               dbg("%s: already claimed\n", __func__);
+               pci_destroy_slot(pci_slot);
+               return -EBUSY;
         }
  
-       list_add (&slot->slot_list, &pci_hotplug_slot_list);
+       slot->pci_slot = pci_slot;
+       pci_slot->hotplug = slot;
+
+       /*
+        * Allow pcihp drivers to override the ACPI_PCI_SLOT name.
+        */
+       if (strcmp(kobject_name(&pci_slot->kobj), slot->name)) {
+               result = kobject_rename(&pci_slot->kobj, slot->name);
+               if (result) {
+                       pci_destroy_slot(pci_slot);
+                       return result;
+               }
+       }
+
+       spin_lock(&pci_hotplug_slot_list_lock);
+       list_add(&slot->slot_list, &pci_hotplug_slot_list);
+       spin_unlock(&pci_hotplug_slot_list_lock);
+
+       result = fs_add_slot(pci_slot);
+       kobject_uevent(&pci_slot->kobj, KOBJ_ADD);
+       dbg("Added slot %s to the list\n", slot->name);
+
  
-       result = fs_add_slot (slot);
-       kobject_uevent(&slot->kobj, KOBJ_ADD);
-       dbg ("Added slot %s to the list\n", slot->name);
         return result;
  }
  
  /**
   * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem
- * @slot: pointer to the &struct hotplug_slot to deregister
+ * @hotplug: pointer to the &struct hotplug_slot to deregister
   *
   * The @slot must have been registered with the pci hotplug subsystem
   * previously with a call to pci_hp_register().
   *
   * Returns 0 if successful, anything else for an error.
   */
-int pci_hp_deregister (struct hotplug_slot *slot)
+int pci_hp_deregister(struct hotplug_slot *hotplug)
  {
         struct hotplug_slot *temp;
+       struct pci_slot *slot;
  
-       if (slot == NULL)
+       if (!hotplug)
                 return -ENODEV;
  
-       temp = get_slot_from_name (slot->name);
-       if (temp != slot) {
+       temp = get_slot_from_name(hotplug->name);
+       if (temp != hotplug)
                 return -ENODEV;
-       }
-       list_del (&slot->slot_list);
  
-       fs_remove_slot (slot);
-       dbg ("Removed slot %s from the list\n", slot->name);
-       kobject_put(&slot->kobj);
+       spin_lock(&pci_hotplug_slot_list_lock);
+       list_del(&hotplug->slot_list);
+       spin_unlock(&pci_hotplug_slot_list_lock);
+
+       slot = hotplug->pci_slot;
+       fs_remove_slot(slot);
+       dbg("Removed slot %s from the list\n", hotplug->name);
+
+       hotplug->release(hotplug);
+       slot->hotplug = NULL;
+       pci_destroy_slot(slot);
+
         return 0;
  }
  
  /**
   * pci_hp_change_slot_info - changes the slot's information structure in the core
- * @slot: pointer to the slot whose info has changed
+ * @hotplug: pointer to the slot whose info has changed
   * @info: pointer to the info copy into the slot's info structure
   *
   * @slot must have been registered with the pci 
@@ -690,13 +659,15 @@ int pci_hp_deregister (struct hotplug_slot *slot)
   *
   * Returns 0 if successful, anything else for an error.
   */
-int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot,
+int __must_check pci_hp_change_slot_info(struct hotplug_slot *hotplug,
                                          struct hotplug_slot_info *info)
  {
-       if ((slot == NULL) || (info == NULL))
+       struct pci_slot *slot;
+       if (!hotplug || !info)
                 return -ENODEV;
+       slot = hotplug->pci_slot;
  
-       memcpy (slot->info, info, sizeof (struct hotplug_slot_info));
+       memcpy(hotplug->info, info, sizeof(struct hotplug_slot_info));
  
         return 0;
  }
@@ -704,36 +675,22 @@ int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot,
  static int __init pci_hotplug_init (void)
  {
         int result;
-       struct kset *pci_bus_kset;
  
-       pci_bus_kset = bus_get_kset(&pci_bus_type);
-
-       pci_hotplug_slots_kset = kset_create_and_add("slots", NULL,
-                                                    &pci_bus_kset->kobj);
-       if (!pci_hotplug_slots_kset) {
-               result = -ENOMEM;
-               err("Register subsys error\n");
-               goto exit;
-       }
         result = cpci_hotplug_init(debug);
         if (result) {
                 err ("cpci_hotplug_init with error %d\n", result);
-               goto err_subsys;
+               goto err_cpci;
         }
  
         info (DRIVER_DESC " version: " DRIVER_VERSION "\n");
-       goto exit;
  
-err_subsys:
-       kset_unregister(pci_hotplug_slots_kset);
-exit:
+err_cpci:
         return result;
  }
  
  static void __exit pci_hotplug_exit (void)
  {
         cpci_hotplug_exit();
-       kset_unregister(pci_hotplug_slots_kset);
  }
  
  module_init(pci_hotplug_init);
@@ -745,7 +702,6 @@ MODULE_LICENSE("GPL");
  module_param(debug, bool, 0644);
  MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
  
-EXPORT_SYMBOL_GPL(pci_hotplug_slots_kset);
  EXPORT_SYMBOL_GPL(pci_hp_register);
  EXPORT_SYMBOL_GPL(pci_hp_deregister);
  EXPORT_SYMBOL_GPL(pci_hp_change_slot_info);
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h

index 79c9ddaad3fb4e9b7754f951b9845476e0d1c5f3..e3a1e7e7dba246b6bc35a16dbd8a0a3f00ee885e 100644 (file)
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -43,6 +43,7 @@ extern int pciehp_poll_mode;
  extern int pciehp_poll_time;
  extern int pciehp_debug;
  extern int pciehp_force;
+extern int pciehp_slot_with_bus;
  extern struct workqueue_struct *pciehp_wq;
  
  #define dbg(format, arg...)                                            \
@@ -96,7 +97,7 @@ struct controller {
         u32 slot_cap;
         u8 cap_base;
         struct timer_list poll_timer;
-       volatile int cmd_busy;
+       int cmd_busy;
         unsigned int no_cmd_complete:1;
  };
  
@@ -156,10 +157,10 @@ extern u8 pciehp_handle_power_fault(struct slot *p_slot);
  extern int pciehp_configure_device(struct slot *p_slot);
  extern int pciehp_unconfigure_device(struct slot *p_slot);
  extern void pciehp_queue_pushbutton_work(struct work_struct *work);
-int pcie_init(struct controller *ctrl, struct pcie_device *dev);
+struct controller *pcie_init(struct pcie_device *dev);
  int pciehp_enable_slot(struct slot *p_slot);
  int pciehp_disable_slot(struct slot *p_slot);
-int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev);
+int pcie_enable_notification(struct controller *ctrl);
  
  static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
  {
@@ -202,8 +203,13 @@ struct hpc_ops {
  #include <acpi/actypes.h>
  #include <linux/pci-acpi.h>
  
-#define pciehp_get_hp_hw_control_from_firmware(dev)                    \
-       pciehp_acpi_get_hp_hw_control_from_firmware(dev)
+static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev)
+{
+       u32 flags = (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
+                    OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+       return acpi_get_hp_hw_control_from_firmware(dev, flags);
+}
+
  static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev,
                         struct hotplug_params *hpp)
  {
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c

index 48a2ed378914f383056d7e803ad4c58b044b3ec4..3677495c4f91e2bf827c7d863d431be4272c3c5a 100644 (file)
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -72,7 +72,6 @@ static int get_power_status   (struct hotplug_slot *slot, u8 *value);
  static int get_attention_status        (struct hotplug_slot *slot, u8 *value);
  static int get_latch_status    (struct hotplug_slot *slot, u8 *value);
  static int get_adapter_status  (struct hotplug_slot *slot, u8 *value);
-static int get_address         (struct hotplug_slot *slot, u32 *value);
  static int get_max_bus_speed   (struct hotplug_slot *slot, enum pci_bus_speed *value);
  static int get_cur_bus_speed   (struct hotplug_slot *slot, enum pci_bus_speed *value);
  
@@ -85,7 +84,6 @@ static struct hotplug_slot_ops pciehp_hotplug_slot_ops = {
         .get_attention_status = get_attention_status,
         .get_latch_status =     get_latch_status,
         .get_adapter_status =   get_adapter_status,
-       .get_address =          get_address,
         .get_max_bus_speed =    get_max_bus_speed,
         .get_cur_bus_speed =    get_cur_bus_speed,
  };
@@ -185,23 +183,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
   */
  static void release_slot(struct hotplug_slot *hotplug_slot)
  {
-       struct slot *slot = hotplug_slot->private;
-
         dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
  
-       kfree(slot->hotplug_slot->info);
-       kfree(slot->hotplug_slot);
-       kfree(slot);
-}
-
-static void make_slot_name(struct slot *slot)
-{
-       if (pciehp_slot_with_bus)
-               snprintf(slot->hotplug_slot->name, SLOT_NAME_SIZE, "%04d_%04d",
-                        slot->bus, slot->number);
-       else
-               snprintf(slot->hotplug_slot->name, SLOT_NAME_SIZE, "%d",
-                        slot->number);
+       kfree(hotplug_slot->info);
+       kfree(hotplug_slot);
  }
  
  static int init_slots(struct controller *ctrl)
@@ -210,49 +195,34 @@ static int init_slots(struct controller *ctrl)
         struct hotplug_slot *hotplug_slot;
         struct hotplug_slot_info *info;
         int retval = -ENOMEM;
-       int i;
-
-       for (i = 0; i < ctrl->num_slots; i++) {
-               slot = kzalloc(sizeof(*slot), GFP_KERNEL);
-               if (!slot)
-                       goto error;
  
+       list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
                 hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
                 if (!hotplug_slot)
-                       goto error_slot;
-               slot->hotplug_slot = hotplug_slot;
+                       goto error;
  
                 info = kzalloc(sizeof(*info), GFP_KERNEL);
                 if (!info)
                         goto error_hpslot;
-               hotplug_slot->info = info;
-
-               hotplug_slot->name = slot->name;
-
-               slot->hp_slot = i;
-               slot->ctrl = ctrl;
-               slot->bus = ctrl->pci_dev->subordinate->number;
-               slot->device = ctrl->slot_device_offset + i;
-               slot->hpc_ops = ctrl->hpc_ops;
-               slot->number = ctrl->first_slot;
-               mutex_init(&slot->lock);
-               INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
  
                 /* register this slot with the hotplug pci core */
+               hotplug_slot->info = info;
+               hotplug_slot->name = slot->name;
                 hotplug_slot->private = slot;
                 hotplug_slot->release = &release_slot;
-               make_slot_name(slot);
                 hotplug_slot->ops = &pciehp_hotplug_slot_ops;
-
                 get_power_status(hotplug_slot, &info->power_status);
                 get_attention_status(hotplug_slot, &info->attention_status);
                 get_latch_status(hotplug_slot, &info->latch_status);
                 get_adapter_status(hotplug_slot, &info->adapter_status);
+               slot->hotplug_slot = hotplug_slot;
  
                 dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
                     "slot_device_offset=%x\n", slot->bus, slot->device,
                     slot->hp_slot, slot->number, ctrl->slot_device_offset);
-               retval = pci_hp_register(hotplug_slot);
+               retval = pci_hp_register(hotplug_slot,
+                                        ctrl->pci_dev->subordinate,
+                                        slot->device);
                 if (retval) {
                         err("pci_hp_register failed with error %d\n", retval);
                         if (retval == -EEXIST)
@@ -263,7 +233,7 @@ static int init_slots(struct controller *ctrl)
                 }
                 /* create additional sysfs entries */
                 if (EMI(ctrl)) {
-                       retval = sysfs_create_file(&hotplug_slot->kobj,
+                       retval = sysfs_create_file(&hotplug_slot->pci_slot->kobj,
                                 &hotplug_slot_attr_lock.attr);
                         if (retval) {
                                 pci_hp_deregister(hotplug_slot);
@@ -271,8 +241,6 @@ static int init_slots(struct controller *ctrl)
                                 goto error_info;
                         }
                 }
-
-               list_add(&slot->slot_list, &ctrl->slot_list);
         }
  
         return 0;
@@ -280,27 +248,18 @@ error_info:
         kfree(info);
  error_hpslot:
         kfree(hotplug_slot);
-error_slot:
-       kfree(slot);
  error:
         return retval;
  }
  
  static void cleanup_slots(struct controller *ctrl)
  {
-       struct list_head *tmp;
-       struct list_head *next;
         struct slot *slot;
  
-       list_for_each_safe(tmp, next, &ctrl->slot_list) {
-               slot = list_entry(tmp, struct slot, slot_list);
-               list_del(&slot->slot_list);
+       list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
                 if (EMI(ctrl))
-                       sysfs_remove_file(&slot->hotplug_slot->kobj,
+                       sysfs_remove_file(&slot->hotplug_slot->pci_slot->kobj,
                                 &hotplug_slot_attr_lock.attr);
-               cancel_delayed_work(&slot->work);
-               flush_scheduled_work();
-               flush_workqueue(pciehp_wq);
                 pci_hp_deregister(slot->hotplug_slot);
         }
  }
@@ -398,19 +357,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
         return 0;
  }
  
-static int get_address(struct hotplug_slot *hotplug_slot, u32 *value)
-{
-       struct slot *slot = hotplug_slot->private;
-       struct pci_bus *bus = slot->ctrl->pci_dev->subordinate;
-
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
-
-       *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device;
-
-       return 0;
-}
-
-static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
+static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
+                               enum pci_bus_speed *value)
  {
         struct slot *slot = hotplug_slot->private;
         int retval;
@@ -444,34 +392,30 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
         struct controller *ctrl;
         struct slot *t_slot;
         u8 value;
-       struct pci_dev *pdev;
+       struct pci_dev *pdev = dev->port;
  
-       ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
-       if (!ctrl) {
-               err("%s : out of memory\n", __func__);
+       if (pciehp_force)
+               dbg("Bypassing BIOS check for pciehp use on %s\n",
+                   pci_name(pdev));
+       else if (pciehp_get_hp_hw_control_from_firmware(pdev))
                 goto err_out_none;
-       }
-       INIT_LIST_HEAD(&ctrl->slot_list);
-
-       pdev = dev->port;
-       ctrl->pci_dev = pdev;
  
-       rc = pcie_init(ctrl, dev);
-       if (rc) {
+       ctrl = pcie_init(dev);
+       if (!ctrl) {
                 dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME);
-               goto err_out_free_ctrl;
+               goto err_out_none;
         }
-
-       pci_set_drvdata(pdev, ctrl);
-
-       dbg("%s: ctrl bus=0x%x, device=%x, function=%x, irq=%x\n",
-           __func__, pdev->bus->number, PCI_SLOT(pdev->devfn),
-           PCI_FUNC(pdev->devfn), pdev->irq);
+       set_service_data(dev, ctrl);
  
         /* Setup the slot information structures */
         rc = init_slots(ctrl);
         if (rc) {
-               err("%s: slot initialization failed\n", PCIE_MODULE_NAME);
+               if (rc == -EBUSY)
+                       warn("%s: slot already registered by another "
+                               "hotplug driver\n", PCIE_MODULE_NAME);
+               else
+                       err("%s: slot initialization failed\n",
+                               PCIE_MODULE_NAME);
                 goto err_out_release_ctlr;
         }
  
@@ -495,20 +439,16 @@ err_out_free_ctrl_slot:
         cleanup_slots(ctrl);
  err_out_release_ctlr:
         ctrl->hpc_ops->release_ctlr(ctrl);
-err_out_free_ctrl:
-       kfree(ctrl);
  err_out_none:
         return -ENODEV;
  }
  
  static void pciehp_remove (struct pcie_device *dev)
  {
-       struct pci_dev *pdev = dev->port;
-       struct controller *ctrl = pci_get_drvdata(pdev);
+       struct controller *ctrl = get_service_data(dev);
  
         cleanup_slots(ctrl);
         ctrl->hpc_ops->release_ctlr(ctrl);
-       kfree(ctrl);
  }
  
  #ifdef CONFIG_PM
@@ -522,13 +462,12 @@ static int pciehp_resume (struct pcie_device *dev)
  {
         printk("%s ENTRY\n", __func__);
         if (pciehp_force) {
-               struct pci_dev *pdev = dev->port;
-               struct controller *ctrl = pci_get_drvdata(pdev);
+               struct controller *ctrl = get_service_data(dev);
                 struct slot *t_slot;
                 u8 status;
  
                 /* reinitialize the chipset's event detection logic */
-               pcie_init_hardware_part2(ctrl, dev);
+               pcie_enable_notification(ctrl);
  
                 t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
  
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c

index 79f104963166bdce55420df4005966df49e0dcc6..1323a43285d71efb7635e1114d939fef34618219 100644 (file)
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -247,30 +247,32 @@ static inline void pciehp_free_irq(struct controller *ctrl)
                 free_irq(ctrl->pci_dev->irq, ctrl);
  }
  
-static inline int pcie_poll_cmd(struct controller *ctrl)
+static int pcie_poll_cmd(struct controller *ctrl)
  {
         u16 slot_status;
         int timeout = 1000;
  
-       if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status))
-               if (slot_status & CMD_COMPLETED)
-                       goto completed;
-       for (timeout = 1000; timeout > 0; timeout -= 100) {
-               msleep(100);
-               if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status))
-                       if (slot_status & CMD_COMPLETED)
-                               goto completed;
+       if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) {
+               if (slot_status & CMD_COMPLETED) {
+                       pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED);
+                       return 1;
+               }
+       }
+       while (timeout > 1000) {
+               msleep(10);
+               timeout -= 10;
+               if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) {
+                       if (slot_status & CMD_COMPLETED) {
+                               pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED);
+                               return 1;
+                       }
+               }
         }
         return 0;       /* timeout */
-
-completed:
-       pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED);
-       return timeout;
  }
  
-static inline int pcie_wait_cmd(struct controller *ctrl, int poll)
+static void pcie_wait_cmd(struct controller *ctrl, int poll)
  {
-       int retval = 0;
         unsigned int msecs = pciehp_poll_mode ? 2500 : 1000;
         unsigned long timeout = msecs_to_jiffies(msecs);
         int rc;
@@ -278,16 +280,9 @@ static inline int pcie_wait_cmd(struct controller *ctrl, int poll)
         if (poll)
                 rc = pcie_poll_cmd(ctrl);
         else
-               rc = wait_event_interruptible_timeout(ctrl->queue,
-                                             !ctrl->cmd_busy, timeout);
+               rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
         if (!rc)
                 dbg("Command not completed in 1000 msec\n");
-       else if (rc < 0) {
-               retval = -EINTR;
-               info("Command was interrupted by a signal\n");
-       }
-
-       return retval;
  }
  
  /**
@@ -342,10 +337,6 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
  
         slot_ctrl &= ~mask;
         slot_ctrl |= (cmd & mask);
-       /* Don't enable command completed if caller is changing it. */
-       if (!(mask & CMD_CMPL_INTR_ENABLE))
-               slot_ctrl |= CMD_CMPL_INTR_ENABLE;
-
         ctrl->cmd_busy = 1;
         smp_mb();
         retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
@@ -365,7 +356,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
                 if (!(slot_ctrl & HP_INTR_ENABLE) ||
                     !(slot_ctrl & CMD_CMPL_INTR_ENABLE))
                         poll = 1;
-                retval = pcie_wait_cmd(ctrl, poll);
+                pcie_wait_cmd(ctrl, poll);
         }
   out:
         mutex_unlock(&ctrl->ctrl_lock);
@@ -614,23 +605,6 @@ static void hpc_set_green_led_blink(struct slot *slot)
             __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
  }
  
-static void hpc_release_ctlr(struct controller *ctrl)
-{
-       /* Mask Hot-plug Interrupt Enable */
-       if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE))
-               err("%s: Cannot mask hotplut interrupt enable\n", __func__);
-
-       /* Free interrupt handler or interrupt polling timer */
-       pciehp_free_irq(ctrl);
-
-       /*
-        * If this is the last controller to be released, destroy the
-        * pciehp work queue
-        */
-       if (atomic_dec_and_test(&pciehp_num_controllers))
-               destroy_workqueue(pciehp_wq);
-}
-
  static int hpc_power_on_slot(struct slot * slot)
  {
         struct controller *ctrl = slot->ctrl;
@@ -785,7 +759,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
                 intr_loc |= detected;
                 if (!intr_loc)
                         return IRQ_NONE;
-               if (pciehp_writew(ctrl, SLOTSTATUS, detected)) {
+               if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
                         err("%s: Cannot write to SLOTSTATUS\n", __func__);
                         return IRQ_NONE;
                 }
@@ -797,25 +771,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
         if (intr_loc & CMD_COMPLETED) {
                 ctrl->cmd_busy = 0;
                 smp_mb();
-               wake_up_interruptible(&ctrl->queue);
+               wake_up(&ctrl->queue);
         }
  
         if (!(intr_loc & ~CMD_COMPLETED))
                 return IRQ_HANDLED;
  
-       /*
-        * Return without handling events if this handler routine is
-        * called before controller initialization is done. This may
-        * happen if hotplug event or another interrupt that shares
-        * the IRQ with pciehp arrives before slot initialization is
-        * done after interrupt handler is registered.
-        *
-        * FIXME - Need more structural fixes. We need to be ready to
-        * handle the event before installing interrupt handler.
-        */
         p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-       if (!p_slot || !p_slot->hpc_ops)
-               return IRQ_HANDLED;
  
         /* Check MRL Sensor Changed */
         if (intr_loc & MRL_SENS_CHANGED)
@@ -992,6 +954,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
         return retval;
  }
  
+static void pcie_release_ctrl(struct controller *ctrl);
  static struct hpc_ops pciehp_hpc_ops = {
         .power_on_slot                  = hpc_power_on_slot,
         .power_off_slot                 = hpc_power_off_slot,
@@ -1013,97 +976,11 @@ static struct hpc_ops pciehp_hpc_ops = {
         .green_led_off                  = hpc_set_green_led_off,
         .green_led_blink                = hpc_set_green_led_blink,
  
-       .release_ctlr                   = hpc_release_ctlr,
+       .release_ctlr                   = pcie_release_ctrl,
         .check_lnk_status               = hpc_check_lnk_status,
  };
  
-#ifdef CONFIG_ACPI
-static int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev)
-{
-       acpi_status status;
-       acpi_handle chandle, handle = DEVICE_ACPI_HANDLE(&(dev->dev));
-       struct pci_dev *pdev = dev;
-       struct pci_bus *parent;
-       struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
-
-       /*
-        * Per PCI firmware specification, we should run the ACPI _OSC
-        * method to get control of hotplug hardware before using it.
-        * If an _OSC is missing, we look for an OSHP to do the same thing.
-        * To handle different BIOS behavior, we look for _OSC and OSHP
-        * within the scope of the hotplug controller and its parents, upto
-        * the host bridge under which this controller exists.
-        */
-       while (!handle) {
-               /*
-                * This hotplug controller was not listed in the ACPI name
-                * space at all. Try to get acpi handle of parent pci bus.
-                */
-               if (!pdev || !pdev->bus->parent)
-                       break;
-               parent = pdev->bus->parent;
-               dbg("Could not find %s in acpi namespace, trying parent\n",
-                               pci_name(pdev));
-               if (!parent->self)
-                       /* Parent must be a host bridge */
-                       handle = acpi_get_pci_rootbridge_handle(
-                                       pci_domain_nr(parent),
-                                       parent->number);
-               else
-                       handle = DEVICE_ACPI_HANDLE(
-                                       &(parent->self->dev));
-               pdev = parent->self;
-       }
-
-       while (handle) {
-               acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
-               dbg("Trying to get hotplug control for %s \n",
-                       (char *)string.pointer);
-               status = pci_osc_control_set(handle,
-                               OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL |
-                               OSC_PCI_EXPRESS_NATIVE_HP_CONTROL);
-               if (status == AE_NOT_FOUND)
-                       status = acpi_run_oshp(handle);
-               if (ACPI_SUCCESS(status)) {
-                       dbg("Gained control for hotplug HW for pci %s (%s)\n",
-                               pci_name(dev), (char *)string.pointer);
-                       kfree(string.pointer);
-                       return 0;
-               }
-               if (acpi_root_bridge(handle))
-                       break;
-               chandle = handle;
-               status = acpi_get_parent(chandle, &handle);
-               if (ACPI_FAILURE(status))
-                       break;
-       }
-
-       dbg("Cannot get control of hotplug hardware for pci %s\n",
-                       pci_name(dev));
-
-       kfree(string.pointer);
-       return -1;
-}
-#endif
-
-static int pcie_init_hardware_part1(struct controller *ctrl,
-                                   struct pcie_device *dev)
-{
-       /* Clear all remaining event bits in Slot Status register */
-       if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) {
-               err("%s: Cannot write to SLOTSTATUS register\n", __func__);
-               return -1;
-       }
-
-       /* Mask Hot-plug Interrupt Enable */
-       if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) {
-               err("%s: Cannot mask hotplug interrupt enable\n", __func__);
-               return -1;
-       }
-       return 0;
-}
-
-int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev)
+int pcie_enable_notification(struct controller *ctrl)
  {
         u16 cmd, mask;
  
@@ -1115,30 +992,83 @@ int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev)
         if (MRL_SENS(ctrl))
                 cmd |= MRL_DETECT_ENABLE;
         if (!pciehp_poll_mode)
-               cmd |= HP_INTR_ENABLE;
+               cmd |= HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
  
-       mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE |
-               PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | HP_INTR_ENABLE;
+       mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
+              PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
  
         if (pcie_write_cmd(ctrl, cmd, mask)) {
                 err("%s: Cannot enable software notification\n", __func__);
-               goto abort;
+               return -1;
         }
+       return 0;
+}
  
-       if (pciehp_force)
-               dbg("Bypassing BIOS check for pciehp use on %s\n",
-                               pci_name(ctrl->pci_dev));
-       else if (pciehp_get_hp_hw_control_from_firmware(ctrl->pci_dev))
-               goto abort_disable_intr;
+static void pcie_disable_notification(struct controller *ctrl)
+{
+       u16 mask;
+       mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
+              PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
+       if (pcie_write_cmd(ctrl, 0, mask))
+               warn("%s: Cannot disable software notification\n", __func__);
+}
  
+static int pcie_init_notification(struct controller *ctrl)
+{
+       if (pciehp_request_irq(ctrl))
+               return -1;
+       if (pcie_enable_notification(ctrl)) {
+               pciehp_free_irq(ctrl);
+               return -1;
+       }
         return 0;
+}
  
-       /* We end up here for the many possible ways to fail this API. */
-abort_disable_intr:
-       if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE))
-               err("%s : disabling interrupts failed\n", __func__);
-abort:
-       return -1;
+static void pcie_shutdown_notification(struct controller *ctrl)
+{
+       pcie_disable_notification(ctrl);
+       pciehp_free_irq(ctrl);
+}
+
+static void make_slot_name(struct slot *slot)
+{
+       if (pciehp_slot_with_bus)
+               snprintf(slot->name, SLOT_NAME_SIZE, "%04d_%04d",
+                        slot->bus, slot->number);
+       else
+               snprintf(slot->name, SLOT_NAME_SIZE, "%d", slot->number);
+}
+
+static int pcie_init_slot(struct controller *ctrl)
+{
+       struct slot *slot;
+
+       slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+       if (!slot)
+               return -ENOMEM;
+
+       slot->hp_slot = 0;
+       slot->ctrl = ctrl;
+       slot->bus = ctrl->pci_dev->subordinate->number;
+       slot->device = ctrl->slot_device_offset + slot->hp_slot;
+       slot->hpc_ops = ctrl->hpc_ops;
+       slot->number = ctrl->first_slot;
+       make_slot_name(slot);
+       mutex_init(&slot->lock);
+       INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
+       list_add(&slot->slot_list, &ctrl->slot_list);
+       return 0;
+}
+
+static void pcie_cleanup_slot(struct controller *ctrl)
+{
+       struct slot *slot;
+       slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list);
+       list_del(&slot->slot_list);
+       cancel_delayed_work(&slot->work);
+       flush_scheduled_work();
+       flush_workqueue(pciehp_wq);
+       kfree(slot);
  }
  
  static inline void dbg_ctrl(struct controller *ctrl)
@@ -1176,15 +1106,23 @@ static inline void dbg_ctrl(struct controller *ctrl)
         dbg("  Comamnd Completed    : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes");
         pciehp_readw(ctrl, SLOTSTATUS, &reg16);
         dbg("Slot Status            : 0x%04x\n", reg16);
-       pciehp_readw(ctrl, SLOTSTATUS, &reg16);
+       pciehp_readw(ctrl, SLOTCTRL, &reg16);
         dbg("Slot Control           : 0x%04x\n", reg16);
  }
  
-int pcie_init(struct controller *ctrl, struct pcie_device *dev)
+struct controller *pcie_init(struct pcie_device *dev)
  {
+       struct controller *ctrl;
         u32 slot_cap;
         struct pci_dev *pdev = dev->port;
  
+       ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+       if (!ctrl) {
+               err("%s : out of memory\n", __func__);
+               goto abort;
+       }
+       INIT_LIST_HEAD(&ctrl->slot_list);
+
         ctrl->pci_dev = pdev;
         ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
         if (!ctrl->cap_base) {
@@ -1215,15 +1153,12 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev)
             !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl)))
             ctrl->no_cmd_complete = 1;
  
-       info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
-            pdev->vendor, pdev->device,
-            pdev->subsystem_vendor, pdev->subsystem_device);
+       /* Clear all remaining event bits in Slot Status register */
+       if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f))
+               goto abort_ctrl;
  
-       if (pcie_init_hardware_part1(ctrl, dev))
-               goto abort;
-
-       if (pciehp_request_irq(ctrl))
-               goto abort;
+       /* Disable sotfware notification */
+       pcie_disable_notification(ctrl);
  
         /*
          * If this is the first controller to be initialized,
@@ -1231,18 +1166,39 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev)
          */
         if (atomic_add_return(1, &pciehp_num_controllers) == 1) {
                 pciehp_wq = create_singlethread_workqueue("pciehpd");
-               if (!pciehp_wq) {
-                       goto abort_free_irq;
-               }
+               if (!pciehp_wq)
+                       goto abort_ctrl;
         }
  
-       if (pcie_init_hardware_part2(ctrl, dev))
-               goto abort_free_irq;
+       info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
+            pdev->vendor, pdev->device,
+            pdev->subsystem_vendor, pdev->subsystem_device);
+
+       if (pcie_init_slot(ctrl))
+               goto abort_ctrl;
  
-       return 0;
+       if (pcie_init_notification(ctrl))
+               goto abort_slot;
  
-abort_free_irq:
-       pciehp_free_irq(ctrl);
+       return ctrl;
+
+abort_slot:
+       pcie_cleanup_slot(ctrl);
+abort_ctrl:
+       kfree(ctrl);
  abort:
-       return -1;
+       return NULL;
+}
+
+void pcie_release_ctrl(struct controller *ctrl)
+{
+       pcie_shutdown_notification(ctrl);
+       pcie_cleanup_slot(ctrl);
+       /*
+        * If this is the last controller to be released, destroy the
+        * pciehp work queue
+        */
+       if (atomic_dec_and_test(&pciehp_num_controllers))
+               destroy_workqueue(pciehp_wq);
+       kfree(ctrl);
  }
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c

index 779c5db71be46150bc5c34896bea57dc23697b9e..a796301ea03fbd3709f15be52f78053bb12380f9 100644 (file)
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -14,8 +14,10 @@
   */
  #include <linux/kobject.h>
  #include <linux/string.h>
+#include <linux/pci.h>
  #include <linux/pci_hotplug.h>
  #include "rpadlpar.h"
+#include "../pci.h"
  
  #define DLPAR_KOBJ_NAME       "control"
  
@@ -27,7 +29,6 @@
  
  #define MAX_DRC_NAME_LEN 64
  
-
  static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr,
                               const char *buf, size_t nbytes)
  {
@@ -112,7 +113,7 @@ int dlpar_sysfs_init(void)
         int error;
  
         dlpar_kobj = kobject_create_and_add(DLPAR_KOBJ_NAME,
-                                           &pci_hotplug_slots_kset->kobj);
+                                           &pci_slots_kset->kobj);
         if (!dlpar_kobj)
                 return -EINVAL;
  
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c

index 56197b600d36d5fe079cfc746b387c1c44768c99..9b714ea93d207036fe9b51d6f3c2bbe0233c1c25 100644 (file)
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -33,33 +33,6 @@
  #include <asm/rtas.h>
  #include "rpaphp.h"
  
-static ssize_t address_read_file (struct hotplug_slot *php_slot, char *buf)
-{
-       int retval;
-       struct slot *slot = (struct slot *)php_slot->private;
-       struct pci_bus *bus;
-
-       if (!slot)
-               return -ENOENT;
-
-       bus = slot->bus;
-       if (!bus)
-               return -ENOENT;
-
-       if (bus->self)
-               retval = sprintf(buf, pci_name(bus->self));
-       else
-               retval = sprintf(buf, "%04x:%02x:00.0",
-                       pci_domain_nr(bus), bus->number);
-
-       return retval;
-}
-
-static struct hotplug_slot_attribute php_attr_address = {
-       .attr = {.name = "address", .mode = S_IFREG | S_IRUGO},
-       .show = address_read_file,
-};
-
  /* free up the memory used by a slot */
  static void rpaphp_release_slot(struct hotplug_slot *hotplug_slot)
  {
@@ -135,9 +108,6 @@ int rpaphp_deregister_slot(struct slot *slot)
  
         list_del(&slot->rpaphp_slot_list);
         
-       /* remove "address" file */
-       sysfs_remove_file(&php_slot->kobj, &php_attr_address.attr);
-
         retval = pci_hp_deregister(php_slot);
         if (retval)
                 err("Problem unregistering a slot %s\n", slot->name);
@@ -151,6 +121,7 @@ int rpaphp_register_slot(struct slot *slot)
  {
         struct hotplug_slot *php_slot = slot->hotplug_slot;
         int retval;
+       int slotno;
  
         dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", 
                 __func__, slot->dn->full_name, slot->index, slot->name,
@@ -162,19 +133,16 @@ int rpaphp_register_slot(struct slot *slot)
                 return -EAGAIN;
         }       
  
-       retval = pci_hp_register(php_slot);
+       if (slot->dn->child)
+               slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn);
+       else
+               slotno = -1;
+       retval = pci_hp_register(php_slot, slot->bus, slotno);
         if (retval) {
                 err("pci_hp_register failed with error %d\n", retval);
                 return retval;
         }
  
-       /* create "address" file */
-       retval = sysfs_create_file(&php_slot->kobj, &php_attr_address.attr);
-       if (retval) {
-               err("sysfs_create_file failed with error %d\n", retval);
-               goto sysfs_fail;
-       }
-
         /* add slot to our internal list */
         list_add(&slot->rpaphp_slot_list, &rpaphp_slot_head);
         info("Slot [%s] registered\n", slot->name);
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c

index 2fe37cd85b69a77441625762c6007f8f473cf2e5..410fe0394a8ea63fb8de8542654d514d2ec36992 100644 (file)
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -197,13 +197,15 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
  static struct hotplug_slot * sn_hp_destroy(void)
  {
         struct slot *slot;
+       struct pci_slot *pci_slot;
         struct hotplug_slot *bss_hotplug_slot = NULL;
  
         list_for_each_entry(slot, &sn_hp_list, hp_list) {
                 bss_hotplug_slot = slot->hotplug_slot;
+               pci_slot = bss_hotplug_slot->pci_slot;
                 list_del(&((struct slot *)bss_hotplug_slot->private)->
                          hp_list);
-               sysfs_remove_file(&bss_hotplug_slot->kobj,
+               sysfs_remove_file(&pci_slot->kobj,
                                   &sn_slot_path_attr.attr);
                 break;
         }
@@ -614,6 +616,7 @@ static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot)
  static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
  {
         int device;
+       struct pci_slot *pci_slot;
         struct hotplug_slot *bss_hotplug_slot;
         int rc = 0;
  
@@ -650,11 +653,12 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
                 bss_hotplug_slot->ops = &sn_hotplug_slot_ops;
                 bss_hotplug_slot->release = &sn_release_slot;
  
-               rc = pci_hp_register(bss_hotplug_slot);
+               rc = pci_hp_register(bss_hotplug_slot, pci_bus, device);
                 if (rc)
                         goto register_err;
  
-               rc = sysfs_create_file(&bss_hotplug_slot->kobj,
+               pci_slot = bss_hotplug_slot->pci_slot;
+               rc = sysfs_create_file(&pci_slot->kobj,
                                        &sn_slot_path_attr.attr);
                 if (rc)
                         goto register_err;
@@ -664,7 +668,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
  
  register_err:
         dev_dbg(&pci_bus->self->dev, "bus failed to register with err = %d\n",
-               rc);
+               rc);
  
  alloc_err:
         if (rc == -ENOMEM)
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h

index f66e8d6315abecab94d94aaffe2cf7d133a34d34..8a026f750deb65f64831f50a603c32d761e6c5ff 100644 (file)
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -170,6 +170,7 @@ extern void shpchp_queue_pushbutton_work(struct work_struct *work);
  extern int shpc_init( struct controller *ctrl, struct pci_dev *pdev);
  
  #ifdef CONFIG_ACPI
+#include <linux/pci-acpi.h>
  static inline int get_hp_params_from_firmware(struct pci_dev *dev,
                                               struct hotplug_params *hpp)
  {
@@ -177,14 +178,15 @@ static inline int get_hp_params_from_firmware(struct pci_dev *dev,
                         return -ENODEV;
         return 0;
  }
-#define get_hp_hw_control_from_firmware(pdev)                          \
-       do {                                                            \
-               if (DEVICE_ACPI_HANDLE(&(pdev->dev)))                   \
-                       acpi_run_oshp(DEVICE_ACPI_HANDLE(&(pdev->dev)));\
-       } while (0)
+
+static inline int get_hp_hw_control_from_firmware(struct pci_dev *dev)
+{
+       u32 flags = OSC_SHPC_NATIVE_HP_CONTROL;
+       return acpi_get_hp_hw_control_from_firmware(dev, flags);
+}
  #else
  #define get_hp_params_from_firmware(dev, hpp) (-ENODEV)
-#define get_hp_hw_control_from_firmware(dev) do { } while (0)
+#define get_hp_hw_control_from_firmware(dev) (0)
  #endif
  
  struct ctrl_reg {
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c

index 97848654652a1d2e03e6f648c268012f9b0711d9..a8cbd039b85bfde22a4f6e019d8de62cb5714f9e 100644 (file)
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -39,7 +39,7 @@
  int shpchp_debug;
  int shpchp_poll_mode;
  int shpchp_poll_time;
-int shpchp_slot_with_bus;
+static int shpchp_slot_with_bus;
  struct workqueue_struct *shpchp_wq;
  
  #define DRIVER_VERSION "0.4"
@@ -68,7 +68,6 @@ static int get_power_status   (struct hotplug_slot *slot, u8 *value);
  static int get_attention_status        (struct hotplug_slot *slot, u8 *value);
  static int get_latch_status    (struct hotplug_slot *slot, u8 *value);
  static int get_adapter_status  (struct hotplug_slot *slot, u8 *value);
-static int get_address         (struct hotplug_slot *slot, u32 *value);
  static int get_max_bus_speed   (struct hotplug_slot *slot, enum pci_bus_speed *value);
  static int get_cur_bus_speed   (struct hotplug_slot *slot, enum pci_bus_speed *value);
  
@@ -81,7 +80,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
         .get_attention_status = get_attention_status,
         .get_latch_status =     get_latch_status,
         .get_adapter_status =   get_adapter_status,
-       .get_address =          get_address,
         .get_max_bus_speed =    get_max_bus_speed,
         .get_cur_bus_speed =    get_cur_bus_speed,
  };
@@ -159,7 +157,8 @@ static int init_slots(struct controller *ctrl)
                 dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
                     "slot_device_offset=%x\n", slot->bus, slot->device,
                     slot->hp_slot, slot->number, ctrl->slot_device_offset);
-               retval = pci_hp_register(slot->hotplug_slot);
+               retval = pci_hp_register(slot->hotplug_slot,
+                               ctrl->pci_dev->subordinate, slot->device);
                 if (retval) {
                         err("pci_hp_register failed with error %d\n", retval);
                         if (retval == -EEXIST)
@@ -288,19 +287,8 @@ static int get_adapter_status (struct hotplug_slot *hotplug_slot, u8 *value)
         return 0;
  }
  
-static int get_address (struct hotplug_slot *hotplug_slot, u32 *value)
-{
-       struct slot *slot = get_slot(hotplug_slot);
-       struct pci_bus *bus = slot->ctrl->pci_dev->subordinate;
-
-       dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name);
-
-       *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device;
-
-       return 0;
-}
-
-static int get_max_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
+static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
+                               enum pci_bus_speed *value)
  {
         struct slot *slot = get_slot(hotplug_slot);
         int retval;
@@ -330,13 +318,14 @@ static int get_cur_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_sp
  
  static int is_shpc_capable(struct pci_dev *dev)
  {
-       if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device ==
-                               PCI_DEVICE_ID_AMD_GOLAM_7450))
-               return 1;
-       if (pci_find_capability(dev, PCI_CAP_ID_SHPC))
-               return 1;
-
-       return 0;
+       if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device ==
+                                               PCI_DEVICE_ID_AMD_GOLAM_7450))
+               return 1;
+       if (!pci_find_capability(dev, PCI_CAP_ID_SHPC))
+               return 0;
+       if (get_hp_hw_control_from_firmware(dev))
+               return 0;
+       return 1;
  }
  
  static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c

index 7d770b2cd889a961f2b9c46226b2e7fe59894fb9..7a0bff364cd4f816cf6475b0c80237f53b73f805 100644 (file)
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -1084,7 +1084,6 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev)
         dbg("%s: HPC at b:d:f:irq=0x%x:%x:%x:%x\n", __func__,
                         pdev->bus->number, PCI_SLOT(pdev->devfn),
                         PCI_FUNC(pdev->devfn), pdev->irq);
-       get_hp_hw_control_from_firmware(pdev);
  
         /*
          * If this is the first controller to be initialized,
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c

index bb0642318a959453e9ef0c68190070ac7d9290a4..3f7b81c065d25188e17d82665b63575a91c92e19 100644 (file)
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1748,7 +1748,6 @@ int __init init_dmars(void)
         deferred_flush = kzalloc(g_num_of_iommus *
                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
         if (!deferred_flush) {
-               kfree(g_iommus);
                 ret = -ENOMEM;
                 goto error;
         }
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c

index 8c61304cbb37e8b9f32343c0fb93cd73bd276d27..15af618d36e20a0f64c704b67cc14e2c1dc2daf2 100644 (file)
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -70,12 +70,10 @@ arch_teardown_msi_irqs(struct pci_dev *dev)
         }
  }
  
-static void msi_set_enable(struct pci_dev *dev, int enable)
+static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
  {
-       int pos;
         u16 control;
  
-       pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
         if (pos) {
                 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
                 control &= ~PCI_MSI_FLAGS_ENABLE;
@@ -85,6 +83,11 @@ static void msi_set_enable(struct pci_dev *dev, int enable)
         }
  }
  
+static void msi_set_enable(struct pci_dev *dev, int enable)
+{
+       __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
+}
+
  static void msix_set_enable(struct pci_dev *dev, int enable)
  {
         int pos;
@@ -141,7 +144,8 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
                         mask_bits |= flag & mask;
                         pci_write_config_dword(entry->dev, pos, mask_bits);
                 } else {
-                       msi_set_enable(entry->dev, !flag);
+                       __msi_set_enable(entry->dev, entry->msi_attrib.pos,
+                                        !flag);
                 }
                 break;
         case PCI_CAP_ID_MSIX:
@@ -561,9 +565,8 @@ int pci_enable_msi(struct pci_dev* dev)
  
         /* Check whether driver already requested for MSI-X irqs */
         if (dev->msix_enabled) {
-               printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
-                       "Device already has MSI-X enabled\n",
-                       pci_name(dev));
+               dev_info(&dev->dev, "can't enable MSI "
+                        "(MSI-X already enabled)\n");
                 return -EINVAL;
         }
         status = msi_capability_init(dev);
@@ -686,9 +689,8 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
  
         /* Check whether driver already requested for MSI irq */
         if (dev->msi_enabled) {
-               printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
-                      "Device already has an MSI irq assigned\n",
-                      pci_name(dev));
+               dev_info(&dev->dev, "can't enable MSI-X "
+                      "(MSI IRQ already assigned)\n");
                 return -EINVAL;
         }
         status = msix_capability_init(dev, entries, nvec);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c

index 9d6fc8e6285d0c557a5e1778e468f49066ff4da5..7764768b6a0e7dfc225c76693b3be997df4b926f 100644 (file)
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -21,12 +21,19 @@
  
  struct acpi_osc_data {
         acpi_handle handle;
-       u32 ctrlset_buf[3];
-       u32 global_ctrlsets;
+       u32 support_set;
+       u32 control_set;
+       int is_queried;
+       u32 query_result;
         struct list_head sibiling;
  };
  static LIST_HEAD(acpi_osc_data_list);
  
+struct acpi_osc_args {
+       u32 capbuf[3];
+       u32 query_result;
+};
+
  static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle)
  {
         struct acpi_osc_data *data;
@@ -44,42 +51,18 @@ static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle)
         return data;
  }
  
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
+                         0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
  
-static acpi_status  
-acpi_query_osc (
-       acpi_handle     handle,
-       u32             level,
-       void            *context,
-       void            **retval )
+static acpi_status acpi_run_osc(acpi_handle handle,
+                               struct acpi_osc_args *osc_args)
  {
-       acpi_status             status;
-       struct acpi_object_list input;
-       union acpi_object       in_params[4];
-       struct acpi_buffer      output = {ACPI_ALLOCATE_BUFFER, NULL};
-       union acpi_object       *out_obj;
-       u32                     osc_dw0;
-       acpi_status *ret_status = (acpi_status *)retval;
-       struct acpi_osc_data *osc_data;
-       u32 flags = (unsigned long)context, temp;
-       acpi_handle tmp;
-
-       status = acpi_get_handle(handle, "_OSC", &tmp);
-       if (ACPI_FAILURE(status))
-               return status;
-
-       osc_data = acpi_get_osc_data(handle);
-       if (!osc_data) {
-               printk(KERN_ERR "acpi osc data array is full\n");
-               return AE_ERROR;
-       }
-
-       osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] |= (flags & OSC_SUPPORT_MASKS);
-
-       /* do _OSC query for all possible controls */
-       temp = osc_data->ctrlset_buf[OSC_CONTROL_TYPE];
-       osc_data->ctrlset_buf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-       osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+       acpi_status status;
+       struct acpi_object_list input;
+       union acpi_object in_params[4];
+       struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+       union acpi_object *out_obj;
+       u32 osc_dw0, flags = osc_args->capbuf[OSC_QUERY_TYPE];
  
         /* Setting up input parameters */
         input.count = 4;
@@ -93,20 +76,19 @@ acpi_query_osc (
         in_params[2].integer.value      = 3;
         in_params[3].type               = ACPI_TYPE_BUFFER;
         in_params[3].buffer.length      = 12;
-       in_params[3].buffer.pointer     = (u8 *)osc_data->ctrlset_buf;
+       in_params[3].buffer.pointer     = (u8 *)osc_args->capbuf;
  
         status = acpi_evaluate_object(handle, "_OSC", &input, &output);
         if (ACPI_FAILURE(status))
-               goto out_nofree;
-       out_obj = output.pointer;
+               return status;
  
+       out_obj = output.pointer;
         if (out_obj->type != ACPI_TYPE_BUFFER) {
-               printk(KERN_DEBUG  
-                       "Evaluate _OSC returns wrong type\n");
+               printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n");
                 status = AE_TYPE;
-               goto query_osc_out;
+               goto out_kfree;
         }
-       osc_dw0 = *((u32 *) out_obj->buffer.pointer);
+       osc_dw0 = *((u32 *)out_obj->buffer.pointer);
         if (osc_dw0) {
                 if (osc_dw0 & OSC_REQUEST_ERROR)
                         printk(KERN_DEBUG "_OSC request fails\n"); 
@@ -115,93 +97,58 @@ acpi_query_osc (
                 if (osc_dw0 & OSC_INVALID_REVISION_ERROR)
                         printk(KERN_DEBUG "_OSC invalid revision\n"); 
                 if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
-                       /* Update Global Control Set */
-                       osc_data->global_ctrlsets =
-                               *((u32 *)(out_obj->buffer.pointer + 8));
-                       status = AE_OK;
-                       goto query_osc_out;
+                       if (flags & OSC_QUERY_ENABLE)
+                               goto out_success;
+                       printk(KERN_DEBUG "_OSC FW not grant req. control\n");
+                       status = AE_SUPPORT;
+                       goto out_kfree;
                 }
                 status = AE_ERROR;
-               goto query_osc_out;
+               goto out_kfree;
         }
-
-       /* Update Global Control Set */
-       osc_data->global_ctrlsets = *((u32 *)(out_obj->buffer.pointer + 8));
+out_success:
+       if (flags & OSC_QUERY_ENABLE)
+               osc_args->query_result =
+                       *((u32 *)(out_obj->buffer.pointer + 8));
         status = AE_OK;
  
-query_osc_out:
+out_kfree:
         kfree(output.pointer);
-out_nofree:
-       *ret_status = status;
-
-       osc_data->ctrlset_buf[OSC_QUERY_TYPE] = !OSC_QUERY_ENABLE;
-       osc_data->ctrlset_buf[OSC_CONTROL_TYPE] = temp;
-       if (ACPI_FAILURE(status)) {
-               /* no osc support at all */
-               osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] = 0;
-       }
-
         return status;
  }
  
-
-static acpi_status  
-acpi_run_osc (
-       acpi_handle     handle,
-       void            *context)
+static acpi_status acpi_query_osc(acpi_handle handle,
+                                 u32 level, void *context, void **retval)
  {
-       acpi_status             status;
-       struct acpi_object_list input;
-       union acpi_object       in_params[4];
-       struct acpi_buffer      output = {ACPI_ALLOCATE_BUFFER, NULL};
-       union acpi_object       *out_obj;
-       u32                     osc_dw0;
-
-       /* Setting up input parameters */
-       input.count = 4;
-       input.pointer = in_params;
-       in_params[0].type               = ACPI_TYPE_BUFFER;
-       in_params[0].buffer.length      = 16;
-       in_params[0].buffer.pointer     = OSC_UUID;
-       in_params[1].type               = ACPI_TYPE_INTEGER;
-       in_params[1].integer.value      = 1;
-       in_params[2].type               = ACPI_TYPE_INTEGER;
-       in_params[2].integer.value      = 3;
-       in_params[3].type               = ACPI_TYPE_BUFFER;
-       in_params[3].buffer.length      = 12;
-       in_params[3].buffer.pointer     = (u8 *)context;
+       acpi_status status;
+       struct acpi_osc_data *osc_data;
+       u32 flags = (unsigned long)context, support_set;
+       acpi_handle tmp;
+       struct acpi_osc_args osc_args;
  
-       status = acpi_evaluate_object(handle, "_OSC", &input, &output);
-       if (ACPI_FAILURE (status))
+       status = acpi_get_handle(handle, "_OSC", &tmp);
+       if (ACPI_FAILURE(status))
                 return status;
  
-       out_obj = output.pointer;
-       if (out_obj->type != ACPI_TYPE_BUFFER) {
-               printk(KERN_DEBUG  
-                       "Evaluate _OSC returns wrong type\n");
-               status = AE_TYPE;
-               goto run_osc_out;
+       osc_data = acpi_get_osc_data(handle);
+       if (!osc_data) {
+               printk(KERN_ERR "acpi osc data array is full\n");
+               return AE_ERROR;
         }
-       osc_dw0 = *((u32 *) out_obj->buffer.pointer);
-       if (osc_dw0) {
-               if (osc_dw0 & OSC_REQUEST_ERROR)
-                       printk(KERN_DEBUG "_OSC request fails\n"); 
-               if (osc_dw0 & OSC_INVALID_UUID_ERROR)
-                       printk(KERN_DEBUG "_OSC invalid UUID\n"); 
-               if (osc_dw0 & OSC_INVALID_REVISION_ERROR)
-                       printk(KERN_DEBUG "_OSC invalid revision\n"); 
-               if (osc_dw0 & OSC_CAPABILITIES_MASK_ERROR) {
-                       printk(KERN_DEBUG "_OSC FW not grant req. control\n");
-                       status = AE_SUPPORT;
-                       goto run_osc_out;
-               }
-               status = AE_ERROR;
-               goto run_osc_out;
+
+       /* do _OSC query for all possible controls */
+       support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS);
+       osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+       osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set;
+       osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+
+       status = acpi_run_osc(handle, &osc_args);
+       if (ACPI_SUCCESS(status)) {
+               osc_data->support_set = support_set;
+               osc_data->query_result = osc_args.query_result;
+               osc_data->is_queried = 1;
         }
-       status = AE_OK;
  
-run_osc_out:
-       kfree(output.pointer);
         return status;
  }
  
@@ -215,15 +162,11 @@ run_osc_out:
   **/
  acpi_status __pci_osc_support_set(u32 flags, const char *hid)
  {
-       acpi_status retval = AE_NOT_FOUND;
-
-       if (!(flags & OSC_SUPPORT_MASKS)) {
+       if (!(flags & OSC_SUPPORT_MASKS))
                 return AE_TYPE;
-       }
-       acpi_get_devices(hid,
-                       acpi_query_osc,
-                       (void *)(unsigned long)flags,
-                       (void **) &retval );
+
+       acpi_get_devices(hid, acpi_query_osc,
+                        (void *)(unsigned long)flags, NULL);
         return AE_OK;
  }
  
@@ -236,10 +179,11 @@ acpi_status __pci_osc_support_set(u32 flags, const char *hid)
   **/
  acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
  {
-       acpi_status     status;
-       u32             ctrlset;
+       acpi_status status;
+       u32 ctrlset, control_set;
         acpi_handle tmp;
         struct acpi_osc_data *osc_data;
+       struct acpi_osc_args osc_args;
  
         status = acpi_get_handle(handle, "_OSC", &tmp);
         if (ACPI_FAILURE(status))
@@ -252,24 +196,25 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
         }
  
         ctrlset = (flags & OSC_CONTROL_MASKS);
-       if (!ctrlset) {
+       if (!ctrlset)
                 return AE_TYPE;
-       }
-       if (osc_data->ctrlset_buf[OSC_SUPPORT_TYPE] &&
-               ((osc_data->global_ctrlsets & ctrlset) != ctrlset)) {
+
+       if (osc_data->is_queried &&
+           ((osc_data->query_result & ctrlset) != ctrlset))
                 return AE_SUPPORT;
-       }
-       osc_data->ctrlset_buf[OSC_CONTROL_TYPE] |= ctrlset;
-       status = acpi_run_osc(handle, osc_data->ctrlset_buf);
-       if (ACPI_FAILURE (status)) {
-               osc_data->ctrlset_buf[OSC_CONTROL_TYPE] &= ~ctrlset;
-       }
-       
+
+       control_set = osc_data->control_set | ctrlset;
+       osc_args.capbuf[OSC_QUERY_TYPE] = 0;
+       osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set;
+       osc_args.capbuf[OSC_CONTROL_TYPE] = control_set;
+       status = acpi_run_osc(handle, &osc_args);
+       if (ACPI_SUCCESS(status))
+               osc_data->control_set = control_set;
+
         return status;
  }
  EXPORT_SYMBOL(pci_osc_control_set);
  
-#ifdef CONFIG_ACPI_SLEEP
  /*
   * _SxD returns the D-state with the highest power
   * (lowest D-state number) supported in the S-state "x".
@@ -293,13 +238,11 @@ EXPORT_SYMBOL(pci_osc_control_set);
   *     choose highest power _SxD or any lower power
   */
  
-static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev,
-       pm_message_t state)
+static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
  {
         int acpi_state;
  
-       acpi_state = acpi_pm_device_sleep_state(&pdev->dev,
-               device_may_wakeup(&pdev->dev), NULL);
+       acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL);
         if (acpi_state < 0)
                 return PCI_POWER_ERROR;
  
@@ -315,7 +258,13 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev,
         }
         return PCI_POWER_ERROR;
  }
-#endif
+
+static bool acpi_pci_power_manageable(struct pci_dev *dev)
+{
+       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+
+       return handle ? acpi_bus_power_manageable(handle) : false;
+}
  
  static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
  {
@@ -328,12 +277,11 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
                 [PCI_D3hot] = ACPI_STATE_D3,
                 [PCI_D3cold] = ACPI_STATE_D3
         };
+       int error = -EINVAL;
  
-       if (!handle)
-               return -ENODEV;
         /* If the ACPI device has _EJ0, ignore the device */
-       if (ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp)))
-               return 0;
+       if (!handle || ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp)))
+               return -ENODEV;
  
         switch (state) {
         case PCI_D0:
@@ -341,11 +289,41 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
         case PCI_D2:
         case PCI_D3hot:
         case PCI_D3cold:
-               return acpi_bus_set_power(handle, state_conv[state]);
+               error = acpi_bus_set_power(handle, state_conv[state]);
         }
-       return -EINVAL;
+
+       if (!error)
+               dev_printk(KERN_INFO, &dev->dev,
+                               "power state changed by ACPI to D%d\n", state);
+
+       return error;
+}
+
+static bool acpi_pci_can_wakeup(struct pci_dev *dev)
+{
+       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+
+       return handle ? acpi_bus_can_wakeup(handle) : false;
+}
+
+static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable)
+{
+       int error = acpi_pm_device_sleep_wake(&dev->dev, enable);
+
+       if (!error)
+               dev_printk(KERN_INFO, &dev->dev,
+                               "wake-up capability %s by ACPI\n",
+                               enable ? "enabled" : "disabled");
+       return error;
  }
  
+static struct pci_platform_pm_ops acpi_pci_platform_pm = {
+       .is_manageable = acpi_pci_power_manageable,
+       .set_state = acpi_pci_set_power_state,
+       .choose_state = acpi_pci_choose_state,
+       .can_wakeup = acpi_pci_can_wakeup,
+       .sleep_wake = acpi_pci_sleep_wake,
+};
  
  /* ACPI bus type */
  static int acpi_pci_find_device(struct device *dev, acpi_handle *handle)
@@ -397,10 +375,7 @@ static int __init acpi_pci_init(void)
         ret = register_acpi_bus_type(&acpi_pci_bus);
         if (ret)
                 return 0;
-#ifdef CONFIG_ACPI_SLEEP
-       platform_pci_choose_state = acpi_pci_choose_state;
-#endif
-       platform_pci_set_power_state = acpi_pci_set_power_state;
+       pci_set_platform_pm(&acpi_pci_platform_pm);
         return 0;
  }
  arch_initcall(acpi_pci_init);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c

index e1637bd82b8e4cc28710ed4c2f02d581155764d6..a13f53486114fe27e665bfc7451933d823465ca1 100644 (file)
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -274,7 +274,57 @@ static int pci_device_remove(struct device * dev)
         return 0;
  }
  
-static int pci_device_suspend(struct device * dev, pm_message_t state)
+static void pci_device_shutdown(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct pci_driver *drv = pci_dev->driver;
+
+       if (drv && drv->shutdown)
+               drv->shutdown(pci_dev);
+       pci_msi_shutdown(pci_dev);
+       pci_msix_shutdown(pci_dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+/*
+ * Default "suspend" method for devices that have no driver provided suspend,
+ * or not even a driver at all.
+ */
+static void pci_default_pm_suspend(struct pci_dev *pci_dev)
+{
+       pci_save_state(pci_dev);
+       /*
+        * mark its power state as "unknown", since we don't know if
+        * e.g. the BIOS will change its device state when we suspend.
+        */
+       if (pci_dev->current_state == PCI_D0)
+               pci_dev->current_state = PCI_UNKNOWN;
+}
+
+/*
+ * Default "resume" method for devices that have no driver provided resume,
+ * or not even a driver at all.
+ */
+static int pci_default_pm_resume(struct pci_dev *pci_dev)
+{
+       int retval = 0;
+
+       /* restore the PCI config space */
+       pci_restore_state(pci_dev);
+       /* if the device was enabled before suspend, reenable */
+       retval = pci_reenable_device(pci_dev);
+       /*
+        * if the device was busmaster before the suspend, make it busmaster
+        * again
+        */
+       if (pci_dev->is_busmaster)
+               pci_set_master(pci_dev);
+
+       return retval;
+}
+
+static int pci_legacy_suspend(struct device *dev, pm_message_t state)
  {
         struct pci_dev * pci_dev = to_pci_dev(dev);
         struct pci_driver * drv = pci_dev->driver;
@@ -284,18 +334,12 @@ static int pci_device_suspend(struct device * dev, pm_message_t state)
                 i = drv->suspend(pci_dev, state);
                 suspend_report_result(drv->suspend, i);
         } else {
-               pci_save_state(pci_dev);
-               /*
-                * mark its power state as "unknown", since we don't know if
-                * e.g. the BIOS will change its device state when we suspend.
-                */
-               if (pci_dev->current_state == PCI_D0)
-                       pci_dev->current_state = PCI_UNKNOWN;
+               pci_default_pm_suspend(pci_dev);
         }
         return i;
  }
  
-static int pci_device_suspend_late(struct device * dev, pm_message_t state)
+static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
  {
         struct pci_dev * pci_dev = to_pci_dev(dev);
         struct pci_driver * drv = pci_dev->driver;
@@ -308,26 +352,7 @@ static int pci_device_suspend_late(struct device * dev, pm_message_t state)
         return i;
  }
  
-/*
- * Default resume method for devices that have no driver provided resume,
- * or not even a driver at all.
- */
-static int pci_default_resume(struct pci_dev *pci_dev)
-{
-       int retval = 0;
-
-       /* restore the PCI config space */
-       pci_restore_state(pci_dev);
-       /* if the device was enabled before suspend, reenable */
-       retval = pci_reenable_device(pci_dev);
-       /* if the device was busmaster before the suspend, make it busmaster again */
-       if (pci_dev->is_busmaster)
-               pci_set_master(pci_dev);
-
-       return retval;
-}
-
-static int pci_device_resume(struct device * dev)
+static int pci_legacy_resume(struct device *dev)
  {
         int error;
         struct pci_dev * pci_dev = to_pci_dev(dev);
@@ -336,34 +361,313 @@ static int pci_device_resume(struct device * dev)
         if (drv && drv->resume)
                 error = drv->resume(pci_dev);
         else
-               error = pci_default_resume(pci_dev);
+               error = pci_default_pm_resume(pci_dev);
         return error;
  }
  
-static int pci_device_resume_early(struct device * dev)
+static int pci_legacy_resume_early(struct device *dev)
  {
         int error = 0;
         struct pci_dev * pci_dev = to_pci_dev(dev);
         struct pci_driver * drv = pci_dev->driver;
  
-       pci_fixup_device(pci_fixup_resume, pci_dev);
-
         if (drv && drv->resume_early)
                 error = drv->resume_early(pci_dev);
         return error;
  }
  
-static void pci_device_shutdown(struct device *dev)
+static int pci_pm_prepare(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm && drv->pm->prepare)
+               error = drv->pm->prepare(dev);
+
+       return error;
+}
+
+static void pci_pm_complete(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+
+       if (drv && drv->pm && drv->pm->complete)
+               drv->pm->complete(dev);
+}
+
+#ifdef CONFIG_SUSPEND
+
+static int pci_pm_suspend(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct device_driver *drv = dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->suspend) {
+                       error = drv->pm->suspend(dev);
+                       suspend_report_result(drv->pm->suspend, error);
+               } else {
+                       pci_default_pm_suspend(pci_dev);
+               }
+       } else {
+               error = pci_legacy_suspend(dev, PMSG_SUSPEND);
+       }
+       pci_fixup_device(pci_fixup_suspend, pci_dev);
+
+       return error;
+}
+
+static int pci_pm_suspend_noirq(struct device *dev)
  {
         struct pci_dev *pci_dev = to_pci_dev(dev);
         struct pci_driver *drv = pci_dev->driver;
+       int error = 0;
  
-       if (drv && drv->shutdown)
-               drv->shutdown(pci_dev);
-       pci_msi_shutdown(pci_dev);
-       pci_msix_shutdown(pci_dev);
+       if (drv && drv->pm) {
+               if (drv->pm->suspend_noirq) {
+                       error = drv->pm->suspend_noirq(dev);
+                       suspend_report_result(drv->pm->suspend_noirq, error);
+               }
+       } else {
+               error = pci_legacy_suspend_late(dev, PMSG_SUSPEND);
+       }
+
+       return error;
  }
  
+static int pci_pm_resume(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct device_driver *drv = dev->driver;
+       int error;
+
+       pci_fixup_device(pci_fixup_resume, pci_dev);
+
+       if (drv && drv->pm) {
+               error = drv->pm->resume ? drv->pm->resume(dev) :
+                       pci_default_pm_resume(pci_dev);
+       } else {
+               error = pci_legacy_resume(dev);
+       }
+
+       return error;
+}
+
+static int pci_pm_resume_noirq(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct pci_driver *drv = pci_dev->driver;
+       int error = 0;
+
+       pci_fixup_device(pci_fixup_resume_early, pci_dev);
+
+       if (drv && drv->pm) {
+               if (drv->pm->resume_noirq)
+                       error = drv->pm->resume_noirq(dev);
+       } else {
+               error = pci_legacy_resume_early(dev);
+       }
+
+       return error;
+}
+
+#else /* !CONFIG_SUSPEND */
+
+#define pci_pm_suspend         NULL
+#define pci_pm_suspend_noirq   NULL
+#define pci_pm_resume          NULL
+#define pci_pm_resume_noirq    NULL
+
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_HIBERNATION
+
+static int pci_pm_freeze(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct device_driver *drv = dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->freeze) {
+                       error = drv->pm->freeze(dev);
+                       suspend_report_result(drv->pm->freeze, error);
+               } else {
+                       pci_default_pm_suspend(pci_dev);
+               }
+       } else {
+               error = pci_legacy_suspend(dev, PMSG_FREEZE);
+               pci_fixup_device(pci_fixup_suspend, pci_dev);
+       }
+
+       return error;
+}
+
+static int pci_pm_freeze_noirq(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct pci_driver *drv = pci_dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->freeze_noirq) {
+                       error = drv->pm->freeze_noirq(dev);
+                       suspend_report_result(drv->pm->freeze_noirq, error);
+               }
+       } else {
+               error = pci_legacy_suspend_late(dev, PMSG_FREEZE);
+       }
+
+       return error;
+}
+
+static int pci_pm_thaw(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->thaw)
+                       error =  drv->pm->thaw(dev);
+       } else {
+               pci_fixup_device(pci_fixup_resume, to_pci_dev(dev));
+               error = pci_legacy_resume(dev);
+       }
+
+       return error;
+}
+
+static int pci_pm_thaw_noirq(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct pci_driver *drv = pci_dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->thaw_noirq)
+                       error = drv->pm->thaw_noirq(dev);
+       } else {
+               pci_fixup_device(pci_fixup_resume_early, pci_dev);
+               error = pci_legacy_resume_early(dev);
+       }
+
+       return error;
+}
+
+static int pci_pm_poweroff(struct device *dev)
+{
+       struct device_driver *drv = dev->driver;
+       int error = 0;
+
+       pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev));
+
+       if (drv && drv->pm) {
+               if (drv->pm->poweroff) {
+                       error = drv->pm->poweroff(dev);
+                       suspend_report_result(drv->pm->poweroff, error);
+               }
+       } else {
+               error = pci_legacy_suspend(dev, PMSG_HIBERNATE);
+       }
+
+       return error;
+}
+
+static int pci_pm_poweroff_noirq(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct pci_driver *drv = pci_dev->driver;
+       int error = 0;
+
+       if (drv && drv->pm) {
+               if (drv->pm->poweroff_noirq) {
+                       error = drv->pm->poweroff_noirq(dev);
+                       suspend_report_result(drv->pm->poweroff_noirq, error);
+               }
+       } else {
+               error = pci_legacy_suspend_late(dev, PMSG_HIBERNATE);
+       }
+
+       return error;
+}
+
+static int pci_pm_restore(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct device_driver *drv = dev->driver;
+       int error;
+
+       if (drv && drv->pm) {
+               error = drv->pm->restore ? drv->pm->restore(dev) :
+                       pci_default_pm_resume(pci_dev);
+       } else {
+               error = pci_legacy_resume(dev);
+       }
+       pci_fixup_device(pci_fixup_resume, pci_dev);
+
+       return error;
+}
+
+static int pci_pm_restore_noirq(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct pci_driver *drv = pci_dev->driver;
+       int error = 0;
+
+       pci_fixup_device(pci_fixup_resume, pci_dev);
+
+       if (drv && drv->pm) {
+               if (drv->pm->restore_noirq)
+                       error = drv->pm->restore_noirq(dev);
+       } else {
+               error = pci_legacy_resume_early(dev);
+       }
+       pci_fixup_device(pci_fixup_resume_early, pci_dev);
+
+       return error;
+}
+
+#else /* !CONFIG_HIBERNATION */
+
+#define pci_pm_freeze          NULL
+#define pci_pm_freeze_noirq    NULL
+#define pci_pm_thaw            NULL
+#define pci_pm_thaw_noirq      NULL
+#define pci_pm_poweroff                NULL
+#define pci_pm_poweroff_noirq  NULL
+#define pci_pm_restore         NULL
+#define pci_pm_restore_noirq   NULL
+
+#endif /* !CONFIG_HIBERNATION */
+
+struct pm_ext_ops pci_pm_ops = {
+       .base = {
+               .prepare = pci_pm_prepare,
+               .complete = pci_pm_complete,
+               .suspend = pci_pm_suspend,
+               .resume = pci_pm_resume,
+               .freeze = pci_pm_freeze,
+               .thaw = pci_pm_thaw,
+               .poweroff = pci_pm_poweroff,
+               .restore = pci_pm_restore,
+       },
+       .suspend_noirq = pci_pm_suspend_noirq,
+       .resume_noirq = pci_pm_resume_noirq,
+       .freeze_noirq = pci_pm_freeze_noirq,
+       .thaw_noirq = pci_pm_thaw_noirq,
+       .poweroff_noirq = pci_pm_poweroff_noirq,
+       .restore_noirq = pci_pm_restore_noirq,
+};
+
+#define PCI_PM_OPS_PTR &pci_pm_ops
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define PCI_PM_OPS_PTR NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
  /**
   * __pci_register_driver - register a new pci driver
   * @drv: the driver structure to register
@@ -386,6 +690,9 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
         drv->driver.owner = owner;
         drv->driver.mod_name = mod_name;
  
+       if (drv->pm)
+               drv->driver.pm = &drv->pm->base;
+
         spin_lock_init(&drv->dynids.lock);
         INIT_LIST_HEAD(&drv->dynids.list);
  
@@ -511,12 +818,9 @@ struct bus_type pci_bus_type = {
         .uevent         = pci_uevent,
         .probe          = pci_device_probe,
         .remove         = pci_device_remove,
-       .suspend        = pci_device_suspend,
-       .suspend_late   = pci_device_suspend_late,
-       .resume_early   = pci_device_resume_early,
-       .resume         = pci_device_resume,
         .shutdown       = pci_device_shutdown,
         .dev_attrs      = pci_dev_attrs,
+       .pm             = PCI_PM_OPS_PTR,
  };
  
  static int __init pci_driver_init(void)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c

index e4548ab2a93c1d148f8100262671a6ad4a04f52a..44a46c92b721a2c6d97fe1b795b6e4c0b1656ad6 100644 (file)
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1,6 +1,4 @@
  /*
- *     $Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $
- *
   *     PCI Bus Services, see include/linux/pci.h for further explanation.
   *
   *     Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
@@ -19,6 +17,7 @@
  #include <linux/string.h>
  #include <linux/log2.h>
  #include <linux/pci-aspm.h>
+#include <linux/pm_wakeup.h>
  #include <asm/dma.h>   /* isa_dma_bridge_buggy */
  #include "pci.h"
  
@@ -378,74 +377,90 @@ pci_restore_bars(struct pci_dev *dev)
                 pci_update_resource(dev, &dev->resource[i], i);
  }
  
-int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t t);
+static struct pci_platform_pm_ops *pci_platform_pm;
  
-/**
- * pci_set_power_state - Set the power state of a PCI device
- * @dev: PCI device to be suspended
- * @state: PCI power state (D0, D1, D2, D3hot, D3cold) we're entering
- *
- * Transition a device to a new power state, using the Power Management 
- * Capabilities in the device's config space.
- *
- * RETURN VALUE: 
- * -EINVAL if trying to enter a lower state than we're already in.
- * 0 if we're already in the requested state.
- * -EIO if device does not support PCI PM.
- * 0 if we can successfully change the power state.
- */
-int
-pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+int pci_set_platform_pm(struct pci_platform_pm_ops *ops)
  {
-       int pm, need_restore = 0;
-       u16 pmcsr, pmc;
+       if (!ops->is_manageable || !ops->set_state || !ops->choose_state
+           || !ops->sleep_wake || !ops->can_wakeup)
+               return -EINVAL;
+       pci_platform_pm = ops;
+       return 0;
+}
  
-       /* bound the state we're entering */
-       if (state > PCI_D3hot)
-               state = PCI_D3hot;
+static inline bool platform_pci_power_manageable(struct pci_dev *dev)
+{
+       return pci_platform_pm ? pci_platform_pm->is_manageable(dev) : false;
+}
  
-       /*
-        * If the device or the parent bridge can't support PCI PM, ignore
-        * the request if we're doing anything besides putting it into D0
-        * (which would only happen on boot).
-        */
-       if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
-               return 0;
+static inline int platform_pci_set_power_state(struct pci_dev *dev,
+                                                pci_power_t t)
+{
+       return pci_platform_pm ? pci_platform_pm->set_state(dev, t) : -ENOSYS;
+}
  
-       /* find PCI PM capability in list */
-       pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev)
+{
+       return pci_platform_pm ?
+                       pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR;
+}
  
-       /* abort if the device doesn't support PM capabilities */
-       if (!pm)
+static inline bool platform_pci_can_wakeup(struct pci_dev *dev)
+{
+       return pci_platform_pm ? pci_platform_pm->can_wakeup(dev) : false;
+}
+
+static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
+{
+       return pci_platform_pm ?
+                       pci_platform_pm->sleep_wake(dev, enable) : -ENODEV;
+}
+
+/**
+ * pci_raw_set_power_state - Use PCI PM registers to set the power state of
+ *                           given PCI device
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if device already is in the requested state.
+ * 0 if device's power state has been successfully changed.
+ */
+static int
+pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+       u16 pmcsr;
+       bool need_restore = false;
+
+       if (!dev->pm_cap)
                 return -EIO;
  
+       if (state < PCI_D0 || state > PCI_D3hot)
+               return -EINVAL;
+
         /* Validate current state:
          * Can enter D0 from any state, but if we can only go deeper 
          * to sleep if we're already in a low power state
          */
-       if (state != PCI_D0 && dev->current_state > state) {
-               printk(KERN_ERR "%s(): %s: state=%d, current state=%d\n",
-                       __func__, pci_name(dev), state, dev->current_state);
+       if (dev->current_state == state) {
+               /* we're already there */
+               return 0;
+       } else if (state != PCI_D0 && dev->current_state <= PCI_D3cold
+           && dev->current_state > state) {
+               dev_err(&dev->dev, "invalid power transition "
+                       "(from state %d to %d)\n", dev->current_state, state);
                 return -EINVAL;
-       } else if (dev->current_state == state)
-               return 0;        /* we're already there */
-
-
-       pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc);
-       if ((pmc & PCI_PM_CAP_VER_MASK) > 3) {
-               printk(KERN_DEBUG
-                      "PCI: %s has unsupported PM cap regs version (%u)\n",
-                      pci_name(dev), pmc & PCI_PM_CAP_VER_MASK);
-               return -EIO;
         }
  
         /* check if this device supports the desired state */
-       if (state == PCI_D1 && !(pmc & PCI_PM_CAP_D1))
-               return -EIO;
-       else if (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2))
+       if ((state == PCI_D1 && !dev->d1_support)
+          || (state == PCI_D2 && !dev->d2_support))
                 return -EIO;
  
-       pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr);
+       pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
  
         /* If we're (effectively) in D3, force entire word to 0.
          * This doesn't affect PME_Status, disables PME_En, and
@@ -461,7 +476,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state)
         case PCI_UNKNOWN: /* Boot-up */
                 if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot
                  && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET))
-                       need_restore = 1;
+                       need_restore = true;
                 /* Fall-through: force to D0 */
         default:
                 pmcsr = 0;
@@ -469,7 +484,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state)
         }
  
         /* enter specified state */
-       pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr);
+       pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
  
         /* Mandatory power management transition delays */
         /* see PCI PM 1.1 5.6.1 table 18 */
@@ -478,13 +493,6 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state)
         else if (state == PCI_D2 || dev->current_state == PCI_D2)
                 udelay(200);
  
-       /*
-        * Give firmware a chance to be called, such as ACPI _PRx, _PSx
-        * Firmware method after native method ?
-        */
-       if (platform_pci_set_power_state)
-               platform_pci_set_power_state(dev, state);
-
         dev->current_state = state;
  
         /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
@@ -508,8 +516,77 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state)
         return 0;
  }
  
-pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state);
- 
+/**
+ * pci_update_current_state - Read PCI power state of given device from its
+ *                            PCI PM registers and cache it
+ * @dev: PCI device to handle.
+ */
+static void pci_update_current_state(struct pci_dev *dev)
+{
+       if (dev->pm_cap) {
+               u16 pmcsr;
+
+               pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+               dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
+       }
+}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
+ *
+ * Transition a device to a new power state, using the platform formware and/or
+ * the device's PCI PM registers.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if device already is in the requested state.
+ * 0 if device's power state has been successfully changed.
+ */
+int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+       int error;
+
+       /* bound the state we're entering */
+       if (state > PCI_D3hot)
+               state = PCI_D3hot;
+       else if (state < PCI_D0)
+               state = PCI_D0;
+       else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
+               /*
+                * If the device or the parent bridge do not support PCI PM,
+                * ignore the request if we're doing anything other than putting
+                * it into D0 (which would only happen on boot).
+                */
+               return 0;
+
+       if (state == PCI_D0 && platform_pci_power_manageable(dev)) {
+               /*
+                * Allow the platform to change the state, for example via ACPI
+                * _PR0, _PS0 and some such, but do not trust it.
+                */
+               int ret = platform_pci_set_power_state(dev, PCI_D0);
+               if (!ret)
+                       pci_update_current_state(dev);
+       }
+
+       error = pci_raw_set_power_state(dev, state);
+
+       if (state > PCI_D0 && platform_pci_power_manageable(dev)) {
+               /* Allow the platform to finalize the transition */
+               int ret = platform_pci_set_power_state(dev, state);
+               if (!ret) {
+                       pci_update_current_state(dev);
+                       error = 0;
+               }
+       }
+
+       return error;
+}
+
  /**
   * pci_choose_state - Choose the power state of a PCI device
   * @dev: PCI device to be suspended
@@ -527,11 +604,9 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
         if (!pci_find_capability(dev, PCI_CAP_ID_PM))
                 return PCI_D0;
  
-       if (platform_pci_choose_state) {
-               ret = platform_pci_choose_state(dev, state);
-               if (ret != PCI_POWER_ERROR)
-                       return ret;
-       }
+       ret = platform_pci_choose_state(dev);
+       if (ret != PCI_POWER_ERROR)
+               return ret;
  
         switch (state.event) {
         case PM_EVENT_ON:
@@ -543,7 +618,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
         case PM_EVENT_HIBERNATE:
                 return PCI_D3hot;
         default:
-               printk("Unrecognized suspend event %d\n", state.event);
+               dev_info(&dev->dev, "unrecognized suspend event %d\n",
+                        state.event);
                 BUG();
         }
         return PCI_D0;
@@ -568,7 +644,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
         else
                 found = 1;
         if (!save_state) {
-               dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n");
+               dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n");
                 return -ENOMEM;
         }
         cap = (u16 *)&save_state->data[0];
@@ -619,7 +695,7 @@ static int pci_save_pcix_state(struct pci_dev *dev)
         else
                 found = 1;
         if (!save_state) {
-               dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n");
+               dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n");
                 return -ENOMEM;
         }
         cap = (u16 *)&save_state->data[0];
@@ -685,10 +761,9 @@ pci_restore_state(struct pci_dev *dev)
         for (i = 15; i >= 0; i--) {
                 pci_read_config_dword(dev, i * 4, &val);
                 if (val != dev->saved_config_space[i]) {
-                       printk(KERN_DEBUG "PM: Writing back config space on "
-                               "device %s at offset %x (was %x, writing %x)\n",
-                               pci_name(dev), i,
-                               val, (int)dev->saved_config_space[i]);
+                       dev_printk(KERN_DEBUG, &dev->dev, "restoring config "
+                               "space at offset %#x (was %#x, writing %#x)\n",
+                               i, val, (int)dev->saved_config_space[i]);
                         pci_write_config_dword(dev,i * 4,
                                 dev->saved_config_space[i]);
                 }
@@ -960,6 +1035,46 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
         return pcibios_set_pcie_reset_state(dev, state);
  }
  
+/**
+ * pci_pme_capable - check the capability of PCI device to generate PME#
+ * @dev: PCI device to handle.
+ * @state: PCI state from which device will issue PME#.
+ */
+static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
+{
+       if (!dev->pm_cap)
+               return false;
+
+       return !!(dev->pme_support & (1 << state));
+}
+
+/**
+ * pci_pme_active - enable or disable PCI device's PME# function
+ * @dev: PCI device to handle.
+ * @enable: 'true' to enable PME# generation; 'false' to disable it.
+ *
+ * The caller must verify that the device is capable of generating PME# before
+ * calling this function with @enable equal to 'true'.
+ */
+static void pci_pme_active(struct pci_dev *dev, bool enable)
+{
+       u16 pmcsr;
+
+       if (!dev->pm_cap)
+               return;
+
+       pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+       /* Clear PME_Status by writing 1 to it and enable PME# */
+       pmcsr |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
+       if (!enable)
+               pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
+
+       pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
+
+       dev_printk(KERN_INFO, &dev->dev, "PME# %s\n",
+                       enable ? "enabled" : "disabled");
+}
+
  /**
   * pci_enable_wake - enable PCI device as wakeup event source
   * @dev: PCI device affected
@@ -971,66 +1086,173 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
   * called automatically by this routine.
   *
   * Devices with legacy power management (no standard PCI PM capabilities)
- * always require such platform hooks.  Depending on the platform, devices
- * supporting the standard PCI PME# signal may require such platform hooks;
- * they always update bits in config space to allow PME# generation.
+ * always require such platform hooks.
   *
- * -EIO is returned if the device can't ever be a wakeup event source.
- * -EINVAL is returned if the device can't generate wakeup events from
- * the specified PCI state.  Returns zero if the operation is successful.
+ * RETURN VALUE:
+ * 0 is returned on success
+ * -EINVAL is returned if device is not supposed to wake up the system
+ * Error code depending on the platform is returned if both the platform and
+ * the native mechanism fail to enable the generation of wake-up events
   */
  int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
  {
-       int pm;
-       int status;
-       u16 value;
-
-       /* Note that drivers should verify device_may_wakeup(&dev->dev)
-        * before calling this function.  Platform code should report
-        * errors when drivers try to enable wakeup on devices that
-        * can't issue wakeups, or on which wakeups were disabled by
-        * userspace updating the /sys/devices.../power/wakeup file.
+       int error = 0;
+       bool pme_done = false;
+
+       if (!device_may_wakeup(&dev->dev))
+               return -EINVAL;
+
+       /*
+        * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don
+        * Anderson we should be doing PME# wake enable followed by ACPI wake
+        * enable.  To disable wake-up we call the platform first, for symmetry.
          */
  
-       status = call_platform_enable_wakeup(&dev->dev, enable);
+       if (!enable && platform_pci_can_wakeup(dev))
+               error = platform_pci_sleep_wake(dev, false);
  
-       /* find PCI PM capability in list */
-       pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+       if (!enable || pci_pme_capable(dev, state)) {
+               pci_pme_active(dev, enable);
+               pme_done = true;
+       }
  
-       /* If device doesn't support PM Capabilities, but caller wants to
-        * disable wake events, it's a NOP.  Otherwise fail unless the
-        * platform hooks handled this legacy device already.
-        */
-       if (!pm)
-               return enable ? status : 0;
+       if (enable && platform_pci_can_wakeup(dev))
+               error = platform_pci_sleep_wake(dev, true);
  
-       /* Check device's ability to generate PME# */
-       pci_read_config_word(dev,pm+PCI_PM_PMC,&value);
+       return pme_done ? 0 : error;
+}
  
-       value &= PCI_PM_CAP_PME_MASK;
-       value >>= ffs(PCI_PM_CAP_PME_MASK) - 1;   /* First bit of mask */
+/**
+ * pci_prepare_to_sleep - prepare PCI device for system-wide transition into
+ *                        a sleep state
+ * @dev: Device to handle.
+ *
+ * Choose the power state appropriate for the device depending on whether
+ * it can wake up the system and/or is power manageable by the platform
+ * (PCI_D3hot is the default) and put the device into that state.
+ */
+int pci_prepare_to_sleep(struct pci_dev *dev)
+{
+       pci_power_t target_state = PCI_D3hot;
+       int error;
  
-       /* Check if it can generate PME# from requested state. */
-       if (!value || !(value & (1 << state))) {
-               /* if it can't, revert what the platform hook changed,
-                * always reporting the base "EINVAL, can't PME#" error
+       if (platform_pci_power_manageable(dev)) {
+               /*
+                * Call the platform to choose the target state of the device
+                * and enable wake-up from this state if supported.
                  */
-               if (enable)
-                       call_platform_enable_wakeup(&dev->dev, 0);
-               return enable ? -EINVAL : 0;
+               pci_power_t state = platform_pci_choose_state(dev);
+
+               switch (state) {
+               case PCI_POWER_ERROR:
+               case PCI_UNKNOWN:
+                       break;
+               case PCI_D1:
+               case PCI_D2:
+                       if (pci_no_d1d2(dev))
+                               break;
+               default:
+                       target_state = state;
+               }
+       } else if (device_may_wakeup(&dev->dev)) {
+               /*
+                * Find the deepest state from which the device can generate
+                * wake-up events, make it the target state and enable device
+                * to generate PME#.
+                */
+               if (!dev->pm_cap)
+                       return -EIO;
+
+               if (dev->pme_support) {
+                       while (target_state
+                             && !(dev->pme_support & (1 << target_state)))
+                               target_state--;
+               }
         }
  
-       pci_read_config_word(dev, pm + PCI_PM_CTRL, &value);
+       pci_enable_wake(dev, target_state, true);
  
-       /* Clear PME_Status by writing 1 to it and enable PME# */
-       value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
+       error = pci_set_power_state(dev, target_state);
  
-       if (!enable)
-               value &= ~PCI_PM_CTRL_PME_ENABLE;
+       if (error)
+               pci_enable_wake(dev, target_state, false);
  
-       pci_write_config_word(dev, pm + PCI_PM_CTRL, value);
+       return error;
+}
  
-       return 0;
+/**
+ * pci_back_from_sleep - turn PCI device on during system-wide transition into
+ *                       the working state a sleep state
+ * @dev: Device to handle.
+ *
+ * Disable device's sytem wake-up capability and put it into D0.
+ */
+int pci_back_from_sleep(struct pci_dev *dev)
+{
+       pci_enable_wake(dev, PCI_D0, false);
+       return pci_set_power_state(dev, PCI_D0);
+}
+
+/**
+ * pci_pm_init - Initialize PM functions of given PCI device
+ * @dev: PCI device to handle.
+ */
+void pci_pm_init(struct pci_dev *dev)
+{
+       int pm;
+       u16 pmc;
+
+       dev->pm_cap = 0;
+
+       /* find PCI PM capability in list */
+       pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+       if (!pm)
+               return;
+       /* Check device's ability to generate PME# */
+       pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc);
+
+       if ((pmc & PCI_PM_CAP_VER_MASK) > 3) {
+               dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n",
+                       pmc & PCI_PM_CAP_VER_MASK);
+               return;
+       }
+
+       dev->pm_cap = pm;
+
+       dev->d1_support = false;
+       dev->d2_support = false;
+       if (!pci_no_d1d2(dev)) {
+               if (pmc & PCI_PM_CAP_D1) {
+                       dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n");
+                       dev->d1_support = true;
+               }
+               if (pmc & PCI_PM_CAP_D2) {
+                       dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n");
+                       dev->d2_support = true;
+               }
+       }
+
+       pmc &= PCI_PM_CAP_PME_MASK;
+       if (pmc) {
+               dev_printk(KERN_INFO, &dev->dev,
+                       "PME# supported from%s%s%s%s%s\n",
+                       (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
+                       (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
+                       (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
+                       (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
+                       (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
+               dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
+               /*
+                * Make device's PM flags reflect the wake-up capability, but
+                * let the user space enable it to wake up the system as needed.
+                */
+               device_set_wakeup_capable(&dev->dev, true);
+               device_set_wakeup_enable(&dev->dev, false);
+               /* Disable the PME# generation functionality */
+               pci_pme_active(dev, false);
+       } else {
+               dev->pme_support = 0;
+       }
  }
  
  int
@@ -1116,13 +1338,11 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
         return 0;
  
  err_out:
-       printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%llx@%llx "
-               "for device %s\n",
-               pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
-               bar + 1, /* PCI BAR # */
-               (unsigned long long)pci_resource_len(pdev, bar),
-               (unsigned long long)pci_resource_start(pdev, bar),
-               pci_name(pdev));
+       dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n",
+                bar,
+                pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
+                (unsigned long long)pci_resource_start(pdev, bar),
+                (unsigned long long)pci_resource_end(pdev, bar));
         return -EBUSY;
  }
  
@@ -1214,7 +1434,7 @@ pci_set_master(struct pci_dev *dev)
  
         pci_read_config_word(dev, PCI_COMMAND, &cmd);
         if (! (cmd & PCI_COMMAND_MASTER)) {
-               pr_debug("PCI: Enabling bus mastering for device %s\n", pci_name(dev));
+               dev_dbg(&dev->dev, "enabling bus mastering\n");
                 cmd |= PCI_COMMAND_MASTER;
                 pci_write_config_word(dev, PCI_COMMAND, cmd);
         }
@@ -1279,8 +1499,8 @@ pci_set_cacheline_size(struct pci_dev *dev)
         if (cacheline_size == pci_cache_line_size)
                 return 0;
  
-       printk(KERN_DEBUG "PCI: cache line size of %d is not supported "
-              "by device %s\n", pci_cache_line_size << 2, pci_name(dev));
+       dev_printk(KERN_DEBUG, &dev->dev, "cache line size of %d is not "
+                  "supported\n", pci_cache_line_size << 2);
  
         return -EINVAL;
  }
@@ -1305,8 +1525,7 @@ pci_set_mwi(struct pci_dev *dev)
  
         pci_read_config_word(dev, PCI_COMMAND, &cmd);
         if (! (cmd & PCI_COMMAND_INVALIDATE)) {
-               pr_debug("PCI: Enabling Mem-Wr-Inval for device %s\n",
-                       pci_name(dev));
+               dev_dbg(&dev->dev, "enabling Mem-Wr-Inval\n");
                 cmd |= PCI_COMMAND_INVALIDATE;
                 pci_write_config_word(dev, PCI_COMMAND, cmd);
         }
@@ -1702,5 +1921,7 @@ EXPORT_SYMBOL(pci_set_power_state);
  EXPORT_SYMBOL(pci_save_state);
  EXPORT_SYMBOL(pci_restore_state);
  EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_prepare_to_sleep);
+EXPORT_SYMBOL(pci_back_from_sleep);
  EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
  
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h

index 00408c97e5fc8cc2e38f52b408c70fdab9d133a8..d807cd786f20a18419105df88d8cf816019cab48 100644 (file)
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -5,11 +5,36 @@ extern int pci_create_sysfs_dev_files(struct pci_dev *pdev);
  extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev);
  extern void pci_cleanup_rom(struct pci_dev *dev);
  
-/* Firmware callbacks */
-extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev,
-                                               pm_message_t state);
-extern int (*platform_pci_set_power_state)(struct pci_dev *dev,
-                                               pci_power_t state);
+/**
+ * Firmware PM callbacks
+ *
+ * @is_manageable - returns 'true' if given device is power manageable by the
+ *                  platform firmware
+ *
+ * @set_state - invokes the platform firmware to set the device's power state
+ *
+ * @choose_state - returns PCI power state of given device preferred by the
+ *                 platform; to be used during system-wide transitions from a
+ *                 sleeping state to the working state and vice versa
+ *
+ * @can_wakeup - returns 'true' if given device is capable of waking up the
+ *               system from a sleeping state
+ *
+ * @sleep_wake - enables/disables the system wake up capability of given device
+ *
+ * If given platform is generally capable of power managing PCI devices, all of
+ * these callbacks are mandatory.
+ */
+struct pci_platform_pm_ops {
+       bool (*is_manageable)(struct pci_dev *dev);
+       int (*set_state)(struct pci_dev *dev, pci_power_t state);
+       pci_power_t (*choose_state)(struct pci_dev *dev);
+       bool (*can_wakeup)(struct pci_dev *dev);
+       int (*sleep_wake)(struct pci_dev *dev, bool enable);
+};
+
+extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops);
+extern void pci_pm_init(struct pci_dev *dev);
  
  extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val);
  extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val);
@@ -106,3 +131,16 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
  }
  
  struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
+
+/* PCI slot sysfs helper code */
+#define to_pci_slot(s) container_of(s, struct pci_slot, kobj)
+
+extern struct kset *pci_slots_kset;
+
+struct pci_slot_attribute {
+       struct attribute attr;
+       ssize_t (*show)(struct pci_slot *, char *);
+       ssize_t (*store)(struct pci_slot *, const char *, size_t);
+};
+#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
+
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c

index 07c3bdb6edc2ad5b261f3877eed725466f4d895a..77036f46acfe22e1cfdebeb3613cb6f3527b6df2 100644 (file)
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -26,6 +26,7 @@
  #include <linux/pcieport_if.h>
  
  #include "aerdrv.h"
+#include "../../pci.h"
  
  /*
   * Version Information
@@ -219,8 +220,7 @@ static int __devinit aer_probe (struct pcie_device *dev,
  
         /* Alloc rpc data structure */
         if (!(rpc = aer_alloc_rpc(dev))) {
-               printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n",
-                       __func__, device->bus_id);
+               dev_printk(KERN_DEBUG, device, "alloc rpc failed\n");
                 aer_remove(dev);
                 return -ENOMEM;
         }
@@ -228,8 +228,7 @@ static int __devinit aer_probe (struct pcie_device *dev,
         /* Request IRQ ISR */
         if ((status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv",
                                 dev))) {
-               printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n",
-                       __func__, device->bus_id);
+               dev_printk(KERN_DEBUG, device, "request IRQ failed\n");
                 aer_remove(dev);
                 return status;
         }
@@ -273,7 +272,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
          * to issue Configuration Requests to those devices.
          */
         msleep(200);
-       printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id);
+       dev_printk(KERN_DEBUG, &dev->dev, "Root Port link has been reset\n");
  
         /* Enable Root Port's interrupt in response to error messages */
         pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c

index d39a78dbd026b03b4d58d450d6efc11ccd3ca652..30f581b8791f5f84c6ab72a1f76bf1de8a5a2c87 100644 (file)
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -50,10 +50,10 @@ int aer_osc_setup(struct pcie_device *pciedev)
         }
  
         if (ACPI_FAILURE(status)) {
-               printk(KERN_DEBUG "AER service couldn't init device %s - %s\n",
-                   pciedev->device.bus_id,
-                   (status == AE_SUPPORT || status == AE_NOT_FOUND) ?
-                   "no _OSC support" : "Run ACPI _OSC fails");
+               dev_printk(KERN_DEBUG, &pciedev->device, "AER service couldn't "
+                          "init device: %s\n",
+                          (status == AE_SUPPORT || status == AE_NOT_FOUND) ?
+                          "no _OSC support" : "_OSC failed");
                 return -1;
         }
  
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c

index aaa82392d1dcc4c79e79b9432e68f66ab6b93be6..ee5e7b5176d0f5d0986fbcb04050e37ff8b324ee 100644 (file)
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -221,9 +221,9 @@ static void report_error_detected(struct pci_dev *dev, void *data)
                          * of a driver for this device is unaware of
                          * its hw state.
                          */
-                       printk(KERN_DEBUG "Device ID[%s] has %s\n",
-                                       dev->dev.bus_id, (dev->driver) ?
-                                       "no AER-aware driver" : "no driver");
+                       dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
+                                  dev->driver ?
+                                  "no AER-aware driver" : "no driver");
                 }
                 return;
         }
@@ -304,7 +304,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
  {
         struct aer_broadcast_data result_data;
  
-       printk(KERN_DEBUG "Broadcast %s message\n", error_mesg);
+       dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
         result_data.state = state;
         if (cb == report_error_detected)
                 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
@@ -404,18 +404,16 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev,
                         data.aer_driver =
                                 to_service_driver(aerdev->device.driver);
                 } else {
-                       printk(KERN_DEBUG "No link-reset support to Device ID"
-                               "[%s]\n",
-                               dev->dev.bus_id);
+                       dev_printk(KERN_DEBUG, &dev->dev, "no link-reset "
+                                  "support\n");
                         return PCI_ERS_RESULT_DISCONNECT;
                 }
         }
  
         status = data.aer_driver->reset_link(udev);
         if (status != PCI_ERS_RESULT_RECOVERED) {
-               printk(KERN_DEBUG "Link reset at upstream Device ID"
-                       "[%s] failed\n",
-                       udev->dev.bus_id);
+               dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream "
+                          "device %s failed\n", pci_name(udev));
                 return PCI_ERS_RESULT_DISCONNECT;
         }
  
@@ -511,10 +509,12 @@ static void handle_error_source(struct pcie_device * aerdev,
         } else {
                 status = do_recovery(aerdev, dev, info.severity);
                 if (status == PCI_ERS_RESULT_RECOVERED) {
-                       printk(KERN_DEBUG "AER driver successfully recovered\n");
+                       dev_printk(KERN_DEBUG, &dev->dev, "AER driver "
+                                  "successfully recovered\n");
                 } else {
                         /* TODO: Should kernel panic here? */
-                       printk(KERN_DEBUG "AER driver didn't recover\n");
+                       dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't "
+                                  "recover\n");
                 }
         }
  }
diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c

index 3f0976868eda656c9a2441caae804c96f9cc3fac..359fe5568df10263cd9c0620c42e7f9c8ffef1bd 100644 (file)
--- a/drivers/pci/pcie/portdrv_bus.c
+++ b/drivers/pci/pcie/portdrv_bus.c
@@ -13,6 +13,7 @@
  #include <linux/pm.h>
  
  #include <linux/pcieport_if.h>
+#include "portdrv.h"
  
  static int pcie_port_bus_match(struct device *dev, struct device_driver *drv);
  static int pcie_port_bus_suspend(struct device *dev, pm_message_t state);
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c

index fb0abfa508dce74cce8efe89982e0ae9ccafc879..890f0d2b370af7003625326075073f7641e549ce 100644 (file)
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -23,20 +23,20 @@ static int pcie_port_probe_service(struct device *dev)
  {
         struct pcie_device *pciedev;
         struct pcie_port_service_driver *driver;
-       int status = -ENODEV;
+       int status;
  
         if (!dev || !dev->driver)
-               return status;
+               return -ENODEV;
  
         driver = to_service_driver(dev->driver);
         if (!driver || !driver->probe)
-               return status;
+               return -ENODEV;
  
         pciedev = to_pcie_device(dev);
         status = driver->probe(pciedev, driver->id_table);
         if (!status) {
-               printk(KERN_DEBUG "Load service driver %s on pcie device %s\n",
-                       driver->name, dev->bus_id);
+               dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n",
+                       driver->name);
                 get_device(dev);
         }
         return status;
@@ -53,8 +53,8 @@ static int pcie_port_remove_service(struct device *dev)
         pciedev = to_pcie_device(dev);
         driver = to_service_driver(dev->driver);
         if (driver && driver->remove) { 
-               printk(KERN_DEBUG "Unload service driver %s on pcie device %s\n",
-                       driver->name, dev->bus_id);
+               dev_printk(KERN_DEBUG, dev, "unloading service driver %s\n",
+                       driver->name);
                 driver->remove(pciedev);
                 put_device(dev);
         }
@@ -103,7 +103,7 @@ static int pcie_port_resume_service(struct device *dev)
   */
  static void release_pcie_device(struct device *dev)
  {
-       printk(KERN_DEBUG "Free Port Service[%s]\n", dev->bus_id);
+       dev_printk(KERN_DEBUG, dev, "free port service\n");
         kfree(to_pcie_device(dev));                     
  }
  
@@ -150,7 +150,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
         if (pos) {
                 struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = 
                         {{0, 0}, {0, 1}, {0, 2}, {0, 3}};
-               printk("%s Found MSIX capability\n", __func__);
+               dev_info(&dev->dev, "found MSI-X capability\n");
                 status = pci_enable_msix(dev, msix_entries, nvec);
                 if (!status) {
                         int j = 0;
@@ -165,7 +165,7 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
         if (status) {
                 pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
                 if (pos) {
-                       printk("%s Found MSI capability\n", __func__);
+                       dev_info(&dev->dev, "found MSI capability\n");
                         status = pci_enable_msi(dev);
                         if (!status) {
                                 interrupt_mode = PCIE_PORT_MSI_MODE;
@@ -252,7 +252,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
                 return NULL;
  
         pcie_device_init(parent, device, port_type, service_type, irq,irq_mode);
-       printk(KERN_DEBUG "Allocate Port Service[%s]\n", device->device.bus_id);
+       dev_printk(KERN_DEBUG, &device->device, "allocate port service\n");
         return device;
  }
  
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c

index 51d163238d93e63429bd6d04446c1563aaa8be64..367c9c20000dd59dfdd8e1130be182ed2b3fde51 100644 (file)
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -91,9 +91,8 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
         
         pci_set_master(dev);
          if (!dev->irq && dev->pin) {
-               printk(KERN_WARNING 
-               "%s->Dev[%04x:%04x] has invalid IRQ. Check vendor BIOS\n", 
-               __func__, dev->vendor, dev->device);
+               dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; "
+                        "check vendor BIOS\n", dev->vendor, dev->device);
         }
         if (pcie_port_device_register(dev)) {
                 pci_disable_device(dev);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c

index 3706ce7972dddc0c389f828a3ec0c4635d1d8efe..b1724cf31b669ef459d3250aa8fae48b38e4c7fa 100644 (file)
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -277,8 +277,8 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
                         res->end = res->start + sz64;
  #else
                         if (sz64 > 0x100000000ULL) {
-                               printk(KERN_ERR "PCI: Unable to handle 64-bit "
-                                       "BAR for device %s\n", pci_name(dev));
+                               dev_err(&dev->dev, "BAR %d: can't handle 64-bit"
+                                       " BAR\n", pos);
                                 res->start = 0;
                                 res->flags = 0;
                         } else if (lhi) {
@@ -329,7 +329,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                 return;
  
         if (dev->transparent) {
-               printk(KERN_INFO "PCI: Transparent bridge - %s\n", pci_name(dev));
+               dev_info(&dev->dev, "transparent bridge\n");
                 for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++)
                         child->resource[i] = child->parent->resource[i - 3];
         }
@@ -392,7 +392,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                         limit |= ((long) mem_limit_hi) << 32;
  #else
                         if (mem_base_hi || mem_limit_hi) {
-                               printk(KERN_ERR "PCI: Unable to handle 64-bit address space for bridge %s\n", pci_name(dev));
+                               dev_err(&dev->dev, "can't handle 64-bit "
+                                       "address space for bridge\n");
                                 return;
                         }
  #endif
@@ -414,6 +415,7 @@ static struct pci_bus * pci_alloc_bus(void)
                 INIT_LIST_HEAD(&b->node);
                 INIT_LIST_HEAD(&b->children);
                 INIT_LIST_HEAD(&b->devices);
+               INIT_LIST_HEAD(&b->slots);
         }
         return b;
  }
@@ -511,8 +513,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
  
         pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
  
-       pr_debug("PCI: Scanning behind PCI bridge %s, config %06x, pass %d\n",
-                pci_name(dev), buses & 0xffffff, pass);
+       dev_dbg(&dev->dev, "scanning behind bridge, config %06x, pass %d\n",
+               buses & 0xffffff, pass);
  
         /* Disable MasterAbortMode during probing to avoid reporting
            of bus errors (in some architectures) */ 
@@ -535,8 +537,8 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
                  * ignore it.  This can happen with the i450NX chipset.
                  */
                 if (pci_find_bus(pci_domain_nr(bus), busnr)) {
-                       printk(KERN_INFO "PCI: Bus %04x:%02x already known\n",
-                                       pci_domain_nr(bus), busnr);
+                       dev_info(&dev->dev, "bus %04x:%02x already known\n",
+                                pci_domain_nr(bus), busnr);
                         goto out;
                 }
  
@@ -711,8 +713,9 @@ static int pci_setup_device(struct pci_dev * dev)
  {
         u32 class;
  
-       sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
-               dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+       dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
+                    dev->bus->number, PCI_SLOT(dev->devfn),
+                    PCI_FUNC(dev->devfn));
  
         pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
         dev->revision = class & 0xff;
@@ -720,7 +723,7 @@ static int pci_setup_device(struct pci_dev * dev)
         dev->class = class;
         class >>= 8;
  
-       pr_debug("PCI: Found %s [%04x/%04x] %06x %02x\n", pci_name(dev),
+       dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n",
                  dev->vendor, dev->device, class, dev->hdr_type);
  
         /* "Unknown power state" */
@@ -788,13 +791,13 @@ static int pci_setup_device(struct pci_dev * dev)
                 break;
  
         default:                                    /* unknown header */
-               printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
-                       pci_name(dev), dev->hdr_type);
+               dev_err(&dev->dev, "unknown header type %02x, "
+                       "ignoring device\n", dev->hdr_type);
                 return -1;
  
         bad:
-               printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
-                      pci_name(dev), class, dev->hdr_type);
+               dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
+                       "type %02x)\n", class, dev->hdr_type);
                 dev->class = PCI_CLASS_NOT_DEFINED;
         }
  
@@ -927,7 +930,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
                         return NULL;
                 /* Card hasn't responded in 60 seconds?  Must be stuck. */
                 if (delay > 60 * 1000) {
-                       printk(KERN_WARNING "Device %04x:%02x:%02x.%d not "
+                       printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not "
                                         "responding\n", pci_domain_nr(bus),
                                         bus->number, PCI_SLOT(devfn),
                                         PCI_FUNC(devfn));
@@ -984,6 +987,9 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
         /* Fix up broken headers */
         pci_fixup_device(pci_fixup_header, dev);
  
+       /* Initialize power management of the device */
+       pci_pm_init(dev);
+
         /*
          * Add the device to our list of discovered devices
          * and the bus list for fixup functions, etc.
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c

index 963a97642ae9101a7454c7b0b3c0dbcc95f8240e..4400dffbd93a853310855cd441cf7f5293e8c091 100644 (file)
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -1,6 +1,4 @@
  /*
- *     $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $
- *
   *     Procfs interface for the PCI bus.
   *
   *     Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
@@ -482,5 +480,5 @@ static int __init pci_proc_init(void)
         return 0;
  }
  
-__initcall(pci_proc_init);
+device_initcall(pci_proc_init);
  
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c

index 338a3f94b4d43bdf3d44dd0548e018199a49774a..12d489395fad0496d7953a78a30d6237ae9018ac 100644 (file)
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -556,7 +556,7 @@ static void quirk_via_ioapic(struct pci_dev *dev)
         pci_write_config_byte (dev, 0x58, tmp);
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C686,       quirk_via_ioapic);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C686,       quirk_via_ioapic);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C686,       quirk_via_ioapic);
  
  /*
   * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit.
@@ -576,7 +576,7 @@ static void quirk_via_vt8237_bypass_apic_deassert(struct pci_dev *dev)
         }
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_8237,         quirk_via_vt8237_bypass_apic_deassert);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_8237,         quirk_via_vt8237_bypass_apic_deassert);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_8237,         quirk_via_vt8237_bypass_apic_deassert);
  
  /*
   * The AMD io apic can hang the box when an apic irq is masked.
@@ -622,7 +622,7 @@ static void quirk_amd_8131_ioapic(struct pci_dev *dev)
          }
  } 
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
  #endif /* CONFIG_X86_IO_APIC */
  
  /*
@@ -774,7 +774,7 @@ static void quirk_cardbus_legacy(struct pci_dev *dev)
         pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy);
-DECLARE_PCI_FIXUP_RESUME(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy);
  
  /*
   * Following the PCI ordering rules is optional on the AMD762. I'm not
@@ -797,7 +797,7 @@ static void quirk_amd_ordering(struct pci_dev *dev)
         }
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD,    PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD,      PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering);
  
  /*
   *     DreamWorks provided workaround for Dunord I-3000 problem
@@ -865,7 +865,7 @@ static void quirk_disable_pxb(struct pci_dev *pdev)
         }
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82454NX,    quirk_disable_pxb);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82454NX,    quirk_disable_pxb);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82454NX,    quirk_disable_pxb);
  
  static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev)
  {
@@ -885,9 +885,9 @@ static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev)
         }
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
  
  /*
   *     Serverworks CSB5 IDE does not fully support native mode
@@ -1054,6 +1054,20 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
                                  * its on-board VGA controller */
                                 asus_hides_smbus = 1;
                         }
+               else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG)
+                       switch(dev->subsystem_device) {
+                       case 0x00b8: /* Compaq Evo D510 CMT */
+                       case 0x00b9: /* Compaq Evo D510 SFF */
+                               asus_hides_smbus = 1;
+                       }
+               else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
+                       switch (dev->subsystem_device) {
+                       case 0x001A: /* Compaq Deskpro EN SSF P667 815E */
+                               /* Motherboard doesn't have host bridge
+                                * subvendor/subdevice IDs, therefore checking
+                                * its on-board VGA controller */
+                               asus_hides_smbus = 1;
+                       }
         }
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82845_HB,   asus_hides_smbus_hostbridge);
@@ -1068,6 +1082,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,     PCI_DEVICE_ID_INTEL_82855GM_HB, as
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);
  
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82810_IG3,  asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82845G_IG,  asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82815_CGC,  asus_hides_smbus_hostbridge);
  
  static void asus_hides_smbus_lpc(struct pci_dev *dev)
  {
@@ -1093,31 +1109,61 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801CA_0,  asu
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801EB_0,  asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801AA_0,  asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801DB_0,  asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801BA_0,  asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801CA_0,  asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801EB_0,  asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801AA_0,  asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801DB_0,  asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801BA_0,  asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801CA_0,  asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82801EB_0,  asus_hides_smbus_lpc);
  
-static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev)
+/* It appears we just have one such device. If not, we have a warning */
+static void __iomem *asus_rcba_base;
+static void asus_hides_smbus_lpc_ich6_suspend(struct pci_dev *dev)
  {
-       u32 val, rcba;
-       void __iomem *base;
+       u32 rcba;
  
         if (likely(!asus_hides_smbus))
                 return;
+       WARN_ON(asus_rcba_base);
+
         pci_read_config_dword(dev, 0xF0, &rcba);
-       base = ioremap_nocache(rcba & 0xFFFFC000, 0x4000); /* use bits 31:14, 16 kB aligned */
-       if (base == NULL) return;
-       val=readl(base + 0x3418); /* read the Function Disable register, dword mode only */
-       writel(val & 0xFFFFFFF7, base + 0x3418); /* enable the SMBus device */
-       iounmap(base);
+       /* use bits 31:14, 16 kB aligned */
+       asus_rcba_base = ioremap_nocache(rcba & 0xFFFFC000, 0x4000);
+       if (asus_rcba_base == NULL)
+               return;
+}
+
+static void asus_hides_smbus_lpc_ich6_resume_early(struct pci_dev *dev)
+{
+       u32 val;
+
+       if (likely(!asus_hides_smbus || !asus_rcba_base))
+               return;
+       /* read the Function Disable register, dword mode only */
+       val = readl(asus_rcba_base + 0x3418);
+       writel(val & 0xFFFFFFF7, asus_rcba_base + 0x3418); /* enable the SMBus device */
+}
+
+static void asus_hides_smbus_lpc_ich6_resume(struct pci_dev *dev)
+{
+       if (likely(!asus_hides_smbus || !asus_rcba_base))
+               return;
+       iounmap(asus_rcba_base);
+       asus_rcba_base = NULL;
         dev_info(&dev->dev, "Enabled ICH6/i801 SMBus device\n");
  }
+
+static void asus_hides_smbus_lpc_ich6(struct pci_dev *dev)
+{
+       asus_hides_smbus_lpc_ich6_suspend(dev);
+       asus_hides_smbus_lpc_ich6_resume_early(dev);
+       asus_hides_smbus_lpc_ich6_resume(dev);
+}
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_ICH6_1,     asus_hides_smbus_lpc_ich6);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_ICH6_1,     asus_hides_smbus_lpc_ich6);
+DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,     asus_hides_smbus_lpc_ich6_suspend);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_ICH6_1,     asus_hides_smbus_lpc_ich6_resume);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_ICH6_1,     asus_hides_smbus_lpc_ich6_resume_early);
  
  /*
   * SiS 96x south bridge: BIOS typically hides SMBus device...
@@ -1135,10 +1181,10 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,      PCI_DEVICE_ID_SI_961,           quirk_sis_96x_
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_962,           quirk_sis_96x_smbus);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_963,           quirk_sis_96x_smbus);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_LPC,           quirk_sis_96x_smbus);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_961,           quirk_sis_96x_smbus);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_962,           quirk_sis_96x_smbus);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_963,           quirk_sis_96x_smbus);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_LPC,           quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_961,           quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_962,           quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_963,           quirk_sis_96x_smbus);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_LPC,           quirk_sis_96x_smbus);
  
  /*
   * ... This is further complicated by the fact that some SiS96x south
@@ -1172,7 +1218,7 @@ static void quirk_sis_503(struct pci_dev *dev)
         quirk_sis_96x_smbus(dev);
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_503,           quirk_sis_503);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SI,     PCI_DEVICE_ID_SI_503,           quirk_sis_503);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_503,           quirk_sis_503);
  
  
  /*
@@ -1205,7 +1251,7 @@ static void asus_hides_ac97_lpc(struct pci_dev *dev)
         }
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_8237, asus_hides_ac97_lpc);
  
  #if defined(CONFIG_ATA) || defined(CONFIG_ATA_MODULE)
  
@@ -1270,12 +1316,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, qui
  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB360, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366, quirk_jmicron_ata);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368, quirk_jmicron_ata);
  
  #endif
  
@@ -1521,6 +1567,10 @@ extern struct pci_fixup __start_pci_fixups_enable[];
  extern struct pci_fixup __end_pci_fixups_enable[];
  extern struct pci_fixup __start_pci_fixups_resume[];
  extern struct pci_fixup __end_pci_fixups_resume[];
+extern struct pci_fixup __start_pci_fixups_resume_early[];
+extern struct pci_fixup __end_pci_fixups_resume_early[];
+extern struct pci_fixup __start_pci_fixups_suspend[];
+extern struct pci_fixup __end_pci_fixups_suspend[];
  
  
  void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
@@ -1553,6 +1603,16 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
                 end = __end_pci_fixups_resume;
                 break;
  
+       case pci_fixup_resume_early:
+               start = __start_pci_fixups_resume_early;
+               end = __end_pci_fixups_resume_early;
+               break;
+
+       case pci_fixup_suspend:
+               start = __start_pci_fixups_suspend;
+               end = __end_pci_fixups_suspend;
+               break;
+
         default:
                 /* stupid compiler warning, you would think with an enum... */
                 return;
@@ -1629,7 +1689,7 @@ static void quirk_nvidia_ck804_pcie_aer_ext_cap(struct pci_dev *dev)
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
                         quirk_nvidia_ck804_pcie_aer_ext_cap);
-DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA,  PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
                         quirk_nvidia_ck804_pcie_aer_ext_cap);
  
  static void __devinit quirk_via_cx700_pci_parking_caching(struct pci_dev *dev)
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c

index 8ddb918f5f57dfc0bdda3e4a287abe4c08bae365..827c0a520e2b3aec87bcc2a160720fbd48e836b2 100644 (file)
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -27,13 +27,6 @@
  #include <linux/slab.h>
  
  
-#define DEBUG_CONFIG 1
-#if DEBUG_CONFIG
-#define DBG(x...)     printk(x)
-#else
-#define DBG(x...)
-#endif
-
  static void pbus_assign_resources_sorted(struct pci_bus *bus)
  {
         struct pci_dev *dev;
@@ -81,8 +74,8 @@ void pci_setup_cardbus(struct pci_bus *bus)
         struct pci_dev *bridge = bus->self;
         struct pci_bus_region region;
  
-       printk("PCI: Bus %d, cardbus bridge: %s\n",
-               bus->number, pci_name(bridge));
+       dev_info(&bridge->dev, "CardBus bridge, secondary bus %04x:%02x\n",
+                pci_domain_nr(bus), bus->number);
  
         pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
         if (bus->resource[0]->flags & IORESOURCE_IO) {
@@ -90,7 +83,7 @@ void pci_setup_cardbus(struct pci_bus *bus)
                  * The IO resource is allocated a range twice as large as it
                  * would normally need.  This allows us to set both IO regs.
                  */
-               printk(KERN_INFO "  IO window: 0x%08lx-0x%08lx\n",
+               dev_info(&bridge->dev, "  IO window: %#08lx-%#08lx\n",
                        (unsigned long)region.start,
                        (unsigned long)region.end);
                 pci_write_config_dword(bridge, PCI_CB_IO_BASE_0,
@@ -101,7 +94,7 @@ void pci_setup_cardbus(struct pci_bus *bus)
  
         pcibios_resource_to_bus(bridge, &region, bus->resource[1]);
         if (bus->resource[1]->flags & IORESOURCE_IO) {
-               printk(KERN_INFO "  IO window: 0x%08lx-0x%08lx\n",
+               dev_info(&bridge->dev, "  IO window: %#08lx-%#08lx\n",
                        (unsigned long)region.start,
                        (unsigned long)region.end);
                 pci_write_config_dword(bridge, PCI_CB_IO_BASE_1,
@@ -112,7 +105,7 @@ void pci_setup_cardbus(struct pci_bus *bus)
  
         pcibios_resource_to_bus(bridge, &region, bus->resource[2]);
         if (bus->resource[2]->flags & IORESOURCE_MEM) {
-               printk(KERN_INFO "  PREFETCH window: 0x%08lx-0x%08lx\n",
+               dev_info(&bridge->dev, "  PREFETCH window: %#08lx-%#08lx\n",
                        (unsigned long)region.start,
                        (unsigned long)region.end);
                 pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0,
@@ -123,7 +116,7 @@ void pci_setup_cardbus(struct pci_bus *bus)
  
         pcibios_resource_to_bus(bridge, &region, bus->resource[3]);
         if (bus->resource[3]->flags & IORESOURCE_MEM) {
-               printk(KERN_INFO "  MEM window: 0x%08lx-0x%08lx\n",
+               dev_info(&bridge->dev, "  MEM window: %#08lx-%#08lx\n",
                        (unsigned long)region.start,
                        (unsigned long)region.end);
                 pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1,
@@ -151,7 +144,8 @@ static void pci_setup_bridge(struct pci_bus *bus)
         struct pci_bus_region region;
         u32 l, bu, lu, io_upper16;
  
-       DBG(KERN_INFO "PCI: Bridge: %s\n", pci_name(bridge));
+       dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
+                pci_domain_nr(bus), bus->number);
  
         /* Set up the top and bottom of the PCI I/O segment for this bus. */
         pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
@@ -162,7 +156,7 @@ static void pci_setup_bridge(struct pci_bus *bus)
                 l |= region.end & 0xf000;
                 /* Set up upper 16 bits of I/O base/limit. */
                 io_upper16 = (region.end & 0xffff0000) | (region.start >> 16);
-               DBG(KERN_INFO "  IO window: %04lx-%04lx\n",
+               dev_info(&bridge->dev, "  IO window: %#04lx-%#04lx\n",
                     (unsigned long)region.start,
                     (unsigned long)region.end);
         }
@@ -170,7 +164,7 @@ static void pci_setup_bridge(struct pci_bus *bus)
                 /* Clear upper 16 bits of I/O base/limit. */
                 io_upper16 = 0;
                 l = 0x00f0;
-               DBG(KERN_INFO "  IO window: disabled.\n");
+               dev_info(&bridge->dev, "  IO window: disabled\n");
         }
         /* Temporarily disable the I/O range before updating PCI_IO_BASE. */
         pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
@@ -185,13 +179,13 @@ static void pci_setup_bridge(struct pci_bus *bus)
         if (bus->resource[1]->flags & IORESOURCE_MEM) {
                 l = (region.start >> 16) & 0xfff0;
                 l |= region.end & 0xfff00000;
-               DBG(KERN_INFO "  MEM window: 0x%08lx-0x%08lx\n",
+               dev_info(&bridge->dev, "  MEM window: %#08lx-%#08lx\n",
                     (unsigned long)region.start,
                     (unsigned long)region.end);
         }
         else {
                 l = 0x0000fff0;
-               DBG(KERN_INFO "  MEM window: disabled.\n");
+               dev_info(&bridge->dev, "  MEM window: disabled\n");
         }
         pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
  
@@ -208,13 +202,13 @@ static void pci_setup_bridge(struct pci_bus *bus)
                 l |= region.end & 0xfff00000;
                 bu = upper_32_bits(region.start);
                 lu = upper_32_bits(region.end);
-               DBG(KERN_INFO "  PREFETCH window: 0x%016llx-0x%016llx\n",
+               dev_info(&bridge->dev, "  PREFETCH window: %#016llx-%#016llx\n",
                     (unsigned long long)region.start,
                     (unsigned long long)region.end);
         }
         else {
                 l = 0x0000fff0;
-               DBG(KERN_INFO "  PREFETCH window: disabled.\n");
+               dev_info(&bridge->dev, "  PREFETCH window: disabled\n");
         }
         pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
  
@@ -361,9 +355,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
                         align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
                         order = __ffs(align) - 20;
                         if (order > 11) {
-                               printk(KERN_WARNING "PCI: region %s/%d "
-                                      "too large: 0x%016llx-0x%016llx\n",
-                                       pci_name(dev), i,
+                               dev_warn(&dev->dev, "BAR %d too large: "
+                                      "%#016llx-%#016llx\n", i,
                                        (unsigned long long)r->start,
                                        (unsigned long long)r->end);
                                 r->flags = 0;
@@ -529,8 +522,8 @@ void __ref pci_bus_assign_resources(struct pci_bus *bus)
                         break;
  
                 default:
-                       printk(KERN_INFO "PCI: not setting up bridge %s "
-                              "for bus %d\n", pci_name(dev), b->number);
+                       dev_info(&dev->dev, "not setting up bridge for bus "
+                                "%04x:%02x\n", pci_domain_nr(b), b->number);
                         break;
                 }
         }
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c

index 05ca2ed9eb51fdc24340a3244a087e4680ae86ce..aa795fd428ded07fd01d25755fe6680e6091041c 100644 (file)
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -47,8 +47,7 @@ pdev_fixup_irq(struct pci_dev *dev,
         }
         dev->irq = irq;
  
-       pr_debug("PCI: fixup irq: (%s) got %d\n",
-               kobject_name(&dev->dev.kobj), dev->irq);
+       dev_dbg(&dev->dev, "fixup irq: got %d\n", dev->irq);
  
         /* Always tell the device, so the driver knows what is
            the real IRQ to use; the device does not use it. */
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c

index 7d35cdf4579f2f92ca8c5293053dad43511042ea..1a5fc83c71b36fec69fe45a30006a27db9bc1300 100644 (file)
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -26,8 +26,7 @@
  #include "pci.h"
  
  
-void
-pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
+void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
  {
         struct pci_bus_region region;
         u32 new, check, mask;
@@ -43,20 +42,20 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
         /*
          * Ignore non-moveable resources.  This might be legacy resources for
          * which no functional BAR register exists or another important
-        * system resource we should better not move around in system address
-        * space.
+        * system resource we shouldn't move around.
          */
         if (res->flags & IORESOURCE_PCI_FIXED)
                 return;
  
         pcibios_resource_to_bus(dev, &region, res);
  
-       pr_debug("  got res [%llx:%llx] bus [%llx:%llx] flags %lx for "
-                "BAR %d of %s\n", (unsigned long long)res->start,
+       dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] "
+               "flags %#lx\n", resno,
+                (unsigned long long)res->start,
                  (unsigned long long)res->end,
                  (unsigned long long)region.start,
                  (unsigned long long)region.end,
-                (unsigned long)res->flags, resno, pci_name(dev));
+                (unsigned long)res->flags);
  
         new = region.start | (res->flags & PCI_REGION_FLAG_MASK);
         if (res->flags & IORESOURCE_IO)
@@ -81,9 +80,8 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
         pci_read_config_dword(dev, reg, &check);
  
         if ((new ^ check) & mask) {
-               printk(KERN_ERR "PCI: Error while updating region "
-                      "%s/%d (%08x != %08x)\n", pci_name(dev), resno,
-                      new, check);
+               dev_err(&dev->dev, "BAR %d: error updating (%#08x != %#08x)\n",
+                       resno, new, check);
         }
  
         if ((new & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
@@ -92,15 +90,14 @@ pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
                 pci_write_config_dword(dev, reg + 4, new);
                 pci_read_config_dword(dev, reg + 4, &check);
                 if (check != new) {
-                       printk(KERN_ERR "PCI: Error updating region "
-                              "%s/%d (high %08x != %08x)\n",
-                              pci_name(dev), resno, new, check);
+                       dev_err(&dev->dev, "BAR %d: error updating "
+                              "(high %#08x != %#08x)\n", resno, new, check);
                 }
         }
         res->flags &= ~IORESOURCE_UNSET;
-       pr_debug("PCI: moved device %s resource %d (%lx) to %x\n",
-               pci_name(dev), resno, res->flags,
-               new & ~PCI_REGION_FLAG_MASK);
+       dev_dbg(&dev->dev, "BAR %d: moved to bus [%#llx-%#llx] flags %#lx\n",
+               resno, (unsigned long long)region.start,
+               (unsigned long long)region.end, res->flags);
  }
  
  int pci_claim_resource(struct pci_dev *dev, int resource)
@@ -117,10 +114,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
                 err = insert_resource(root, res);
  
         if (err) {
-               printk(KERN_ERR "PCI: %s region %d of %s %s [%llx:%llx]\n",
-                       root ? "Address space collision on" :
-                               "No parent found for",
-                       resource, dtype, pci_name(dev),
+               dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n",
+                       resource,
+                       root ? "address space collision on" :
+                               "no parent found for",
+                       dtype,
                         (unsigned long long)res->start,
                         (unsigned long long)res->end);
         }
@@ -140,11 +138,10 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
  
         align = resource_alignment(res);
         if (!align) {
-               printk(KERN_ERR "PCI: Cannot allocate resource (bogus "
-                       "alignment) %d [%llx:%llx] (flags %lx) of %s\n",
+               dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
+                       "alignment) [%#llx-%#llx] flags %#lx\n",
                         resno, (unsigned long long)res->start,
-                       (unsigned long long)res->end, res->flags,
-                       pci_name(dev));
+                       (unsigned long long)res->end, res->flags);
                 return -EINVAL;
         }
  
@@ -165,11 +162,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
         }
  
         if (ret) {
-               printk(KERN_ERR "PCI: Failed to allocate %s resource "
-                       "#%d:%llx@%llx for %s\n",
+               dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
+                       "[%#llx-%#llx]\n", resno,
                         res->flags & IORESOURCE_IO ? "I/O" : "mem",
-                       resno, (unsigned long long)size,
-                       (unsigned long long)res->start, pci_name(dev));
+                       (unsigned long long)res->start,
+                       (unsigned long long)res->end);
         } else {
                 res->flags &= ~IORESOURCE_STARTALIGN;
                 if (resno < PCI_BRIDGE_RESOURCES)
@@ -205,11 +202,11 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
         }
  
         if (ret) {
-               printk(KERN_ERR "PCI: Failed to allocate %s resource "
-                               "#%d:%llx@%llx for %s\n",
+               dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
+                       "[%#llx-%#llx\n]", resno,
                         res->flags & IORESOURCE_IO ? "I/O" : "mem",
-                       resno, (unsigned long long)(res->end - res->start + 1),
-                       (unsigned long long)res->start, pci_name(dev));
+                       (unsigned long long)res->start,
+                       (unsigned long long)res->end);
         } else if (resno < PCI_BRIDGE_RESOURCES) {
                 pci_update_resource(dev, res, resno);
         }
@@ -239,11 +236,10 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
  
                 r_align = resource_alignment(r);
                 if (!r_align) {
-                       printk(KERN_WARNING "PCI: bogus alignment of resource "
-                               "%d [%llx:%llx] (flags %lx) of %s\n",
+                       dev_warn(&dev->dev, "BAR %d: bogus alignment "
+                               "[%#llx-%#llx] flags %#lx\n",
                                 i, (unsigned long long)r->start,
-                               (unsigned long long)r->end, r->flags,
-                               pci_name(dev));
+                               (unsigned long long)r->end, r->flags);
                         continue;
                 }
                 for (list = head; ; list = list->next) {
@@ -291,7 +287,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask)
  
                 if (!r->parent) {
                         dev_err(&dev->dev, "device not available because of "
-                               "BAR %d [%llx:%llx] collisions\n", i,
+                               "BAR %d [%#llx-%#llx] collisions\n", i,
                                 (unsigned long long) r->start,
                                 (unsigned long long) r->end);
                         return -EINVAL;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c

new file mode 100644 (file)

index 0000000..7e5b85c
--- /dev/null
+++ b/drivers/pci/slot.c
@@ -0,0 +1,233 @@
+/*
+ * drivers/pci/slot.c
+ * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
+ * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P.
+ *     Alex Chiang <achiang@hp.com>
+ */
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include "pci.h"
+
+struct kset *pci_slots_kset;
+EXPORT_SYMBOL_GPL(pci_slots_kset);
+
+static ssize_t pci_slot_attr_show(struct kobject *kobj,
+                                       struct attribute *attr, char *buf)
+{
+       struct pci_slot *slot = to_pci_slot(kobj);
+       struct pci_slot_attribute *attribute = to_pci_slot_attr(attr);
+       return attribute->show ? attribute->show(slot, buf) : -EIO;
+}
+
+static ssize_t pci_slot_attr_store(struct kobject *kobj,
+                       struct attribute *attr, const char *buf, size_t len)
+{
+       struct pci_slot *slot = to_pci_slot(kobj);
+       struct pci_slot_attribute *attribute = to_pci_slot_attr(attr);
+       return attribute->store ? attribute->store(slot, buf, len) : -EIO;
+}
+
+static struct sysfs_ops pci_slot_sysfs_ops = {
+       .show = pci_slot_attr_show,
+       .store = pci_slot_attr_store,
+};
+
+static ssize_t address_read_file(struct pci_slot *slot, char *buf)
+{
+       if (slot->number == 0xff)
+               return sprintf(buf, "%04x:%02x\n",
+                               pci_domain_nr(slot->bus),
+                               slot->bus->number);
+       else
+               return sprintf(buf, "%04x:%02x:%02x\n",
+                               pci_domain_nr(slot->bus),
+                               slot->bus->number,
+                               slot->number);
+}
+
+static void pci_slot_release(struct kobject *kobj)
+{
+       struct pci_slot *slot = to_pci_slot(kobj);
+
+       pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
+                slot->bus->number, slot->number);
+
+       list_del(&slot->list);
+
+       kfree(slot);
+}
+
+static struct pci_slot_attribute pci_slot_attr_address =
+       __ATTR(address, (S_IFREG | S_IRUGO), address_read_file, NULL);
+
+static struct attribute *pci_slot_default_attrs[] = {
+       &pci_slot_attr_address.attr,
+       NULL,
+};
+
+static struct kobj_type pci_slot_ktype = {
+       .sysfs_ops = &pci_slot_sysfs_ops,
+       .release = &pci_slot_release,
+       .default_attrs = pci_slot_default_attrs,
+};
+
+/**
+ * pci_create_slot - create or increment refcount for physical PCI slot
+ * @parent: struct pci_bus of parent bridge
+ * @slot_nr: PCI_SLOT(pci_dev->devfn) or -1 for placeholder
+ * @name: user visible string presented in /sys/bus/pci/slots/<name>
+ *
+ * PCI slots have first class attributes such as address, speed, width,
+ * and a &struct pci_slot is used to manage them. This interface will
+ * either return a new &struct pci_slot to the caller, or if the pci_slot
+ * already exists, its refcount will be incremented.
+ *
+ * Slots are uniquely identified by a @pci_bus, @slot_nr, @name tuple.
+ *
+ * Placeholder slots:
+ * In most cases, @pci_bus, @slot_nr will be sufficient to uniquely identify
+ * a slot. There is one notable exception - pSeries (rpaphp), where the
+ * @slot_nr cannot be determined until a device is actually inserted into
+ * the slot. In this scenario, the caller may pass -1 for @slot_nr.
+ *
+ * The following semantics are imposed when the caller passes @slot_nr ==
+ * -1. First, the check for existing %struct pci_slot is skipped, as the
+ * caller may know about several unpopulated slots on a given %struct
+ * pci_bus, and each slot would have a @slot_nr of -1.  Uniqueness for
+ * these slots is then determined by the @name parameter. We expect
+ * kobject_init_and_add() to warn us if the caller attempts to create
+ * multiple slots with the same name. The other change in semantics is
+ * user-visible, which is the 'address' parameter presented in sysfs will
+ * consist solely of a dddd:bb tuple, where dddd is the PCI domain of the
+ * %struct pci_bus and bb is the bus number. In other words, the devfn of
+ * the 'placeholder' slot will not be displayed.
+ */
+
+struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
+                                const char *name)
+{
+       struct pci_slot *slot;
+       int err;
+
+       down_write(&pci_bus_sem);
+
+       if (slot_nr == -1)
+               goto placeholder;
+
+       /* If we've already created this slot, bump refcount and return. */
+       list_for_each_entry(slot, &parent->slots, list) {
+               if (slot->number == slot_nr) {
+                       kobject_get(&slot->kobj);
+                       pr_debug("%s: inc refcount to %d on %04x:%02x:%02x\n",
+                                __func__,
+                                atomic_read(&slot->kobj.kref.refcount),
+                                pci_domain_nr(parent), parent->number,
+                                slot_nr);
+                       goto out;
+               }
+       }
+
+placeholder:
+       slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+       if (!slot) {
+               slot = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       slot->bus = parent;
+       slot->number = slot_nr;
+
+       slot->kobj.kset = pci_slots_kset;
+       err = kobject_init_and_add(&slot->kobj, &pci_slot_ktype, NULL,
+                                  "%s", name);
+       if (err) {
+               printk(KERN_ERR "Unable to register kobject %s\n", name);
+               goto err;
+       }
+
+       INIT_LIST_HEAD(&slot->list);
+       list_add(&slot->list, &parent->slots);
+
+       /* Don't care if debug printk has a -1 for slot_nr */
+       pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
+                __func__, pci_domain_nr(parent), parent->number, slot_nr);
+
+ out:
+       up_write(&pci_bus_sem);
+       return slot;
+ err:
+       kfree(slot);
+       slot = ERR_PTR(err);
+       goto out;
+}
+EXPORT_SYMBOL_GPL(pci_create_slot);
+
+/**
+ * pci_update_slot_number - update %struct pci_slot -> number
+ * @slot - %struct pci_slot to update
+ * @slot_nr - new number for slot
+ *
+ * The primary purpose of this interface is to allow callers who earlier
+ * created a placeholder slot in pci_create_slot() by passing a -1 as
+ * slot_nr, to update their %struct pci_slot with the correct @slot_nr.
+ */
+
+void pci_update_slot_number(struct pci_slot *slot, int slot_nr)
+{
+       int name_count = 0;
+       struct pci_slot *tmp;
+
+       down_write(&pci_bus_sem);
+
+       list_for_each_entry(tmp, &slot->bus->slots, list) {
+               WARN_ON(tmp->number == slot_nr);
+               if (!strcmp(kobject_name(&tmp->kobj), kobject_name(&slot->kobj)))
+                       name_count++;
+       }
+
+       if (name_count > 1)
+               printk(KERN_WARNING "pci_update_slot_number found %d slots with the same name: %s\n", name_count, kobject_name(&slot->kobj));
+
+       slot->number = slot_nr;
+       up_write(&pci_bus_sem);
+}
+EXPORT_SYMBOL_GPL(pci_update_slot_number);
+
+/**
+ * pci_destroy_slot - decrement refcount for physical PCI slot
+ * @slot: struct pci_slot to decrement
+ *
+ * %struct pci_slot is refcounted, so destroying them is really easy; we
+ * just call kobject_put on its kobj and let our release methods do the
+ * rest.
+ */
+
+void pci_destroy_slot(struct pci_slot *slot)
+{
+       pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__,
+                atomic_read(&slot->kobj.kref.refcount) - 1,
+                pci_domain_nr(slot->bus), slot->bus->number, slot->number);
+
+       down_write(&pci_bus_sem);
+       kobject_put(&slot->kobj);
+       up_write(&pci_bus_sem);
+}
+EXPORT_SYMBOL_GPL(pci_destroy_slot);
+
+static int pci_slot_init(void)
+{
+       struct kset *pci_bus_kset;
+
+       pci_bus_kset = bus_get_kset(&pci_bus_type);
+       pci_slots_kset = kset_create_and_add("slots", NULL,
+                                               &pci_bus_kset->kobj);
+       if (!pci_slots_kset) {
+               printk(KERN_ERR "PCI: Slot initialization failure\n");
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+subsys_initcall(pci_slot_init);
diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h

index 886dac823ed6d6e1f66ff609c433fbd5644646fb..e3fa9a2d9a3d3a2a9124bd334c54632aeb760f94 100644 (file)
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -1,3 +1,8 @@
+/*
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
+ */
+
  extern spinlock_t pnp_lock;
  void *pnp_alloc(long size);
  
@@ -19,22 +24,118 @@ void pnp_remove_card(struct pnp_card *card);
  int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev);
  void pnp_remove_card_device(struct pnp_dev *dev);
  
-struct pnp_option *pnp_build_option(int priority);
-struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev);
-struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev,
-                                                int priority);
-int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option,
-                             struct pnp_irq *data);
-int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option,
-                             struct pnp_dma *data);
-int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option,
-                              struct pnp_port *data);
-int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option,
-                             struct pnp_mem *data);
+struct pnp_port {
+       resource_size_t min;    /* min base number */
+       resource_size_t max;    /* max base number */
+       resource_size_t align;  /* align boundary */
+       resource_size_t size;   /* size of range */
+       unsigned char flags;    /* port flags */
+};
+
+#define PNP_IRQ_NR 256
+typedef struct { DECLARE_BITMAP(bits, PNP_IRQ_NR); } pnp_irq_mask_t;
+
+struct pnp_irq {
+       pnp_irq_mask_t map;     /* bitmap for IRQ lines */
+       unsigned char flags;    /* IRQ flags */
+};
+
+struct pnp_dma {
+       unsigned char map;      /* bitmask for DMA channels */
+       unsigned char flags;    /* DMA flags */
+};
+
+struct pnp_mem {
+       resource_size_t min;    /* min base number */
+       resource_size_t max;    /* max base number */
+       resource_size_t align;  /* align boundary */
+       resource_size_t size;   /* size of range */
+       unsigned char flags;    /* memory flags */
+};
+
+#define PNP_OPTION_DEPENDENT           0x80000000
+#define PNP_OPTION_SET_MASK            0xffff
+#define PNP_OPTION_SET_SHIFT           12
+#define PNP_OPTION_PRIORITY_MASK       0xfff
+#define PNP_OPTION_PRIORITY_SHIFT      0
+
+#define PNP_RES_PRIORITY_PREFERRED     0
+#define PNP_RES_PRIORITY_ACCEPTABLE    1
+#define PNP_RES_PRIORITY_FUNCTIONAL    2
+#define PNP_RES_PRIORITY_INVALID       PNP_OPTION_PRIORITY_MASK
+
+struct pnp_option {
+       struct list_head list;
+       unsigned int flags;     /* independent/dependent, set, priority */
+
+       unsigned long type;     /* IORESOURCE_{IO,MEM,IRQ,DMA} */
+       union {
+               struct pnp_port port;
+               struct pnp_irq irq;
+               struct pnp_dma dma;
+               struct pnp_mem mem;
+       } u;
+};
+
+int pnp_register_irq_resource(struct pnp_dev *dev, unsigned int option_flags,
+                             pnp_irq_mask_t *map, unsigned char flags);
+int pnp_register_dma_resource(struct pnp_dev *dev, unsigned int option_flags,
+                             unsigned char map, unsigned char flags);
+int pnp_register_port_resource(struct pnp_dev *dev, unsigned int option_flags,
+                              resource_size_t min, resource_size_t max,
+                              resource_size_t align, resource_size_t size,
+                              unsigned char flags);
+int pnp_register_mem_resource(struct pnp_dev *dev, unsigned int option_flags,
+                             resource_size_t min, resource_size_t max,
+                             resource_size_t align, resource_size_t size,
+                             unsigned char flags);
+
+static inline int pnp_option_is_dependent(struct pnp_option *option)
+{
+       return option->flags & PNP_OPTION_DEPENDENT ? 1 : 0;
+}
+
+static inline unsigned int pnp_option_set(struct pnp_option *option)
+{
+       return (option->flags >> PNP_OPTION_SET_SHIFT) & PNP_OPTION_SET_MASK;
+}
+
+static inline unsigned int pnp_option_priority(struct pnp_option *option)
+{
+       return (option->flags >> PNP_OPTION_PRIORITY_SHIFT) &
+           PNP_OPTION_PRIORITY_MASK;
+}
+
+static inline unsigned int pnp_new_dependent_set(struct pnp_dev *dev,
+                                                int priority)
+{
+       unsigned int flags;
+
+       if (priority > PNP_RES_PRIORITY_FUNCTIONAL) {
+               dev_warn(&dev->dev, "invalid dependent option priority %d "
+                        "clipped to %d", priority,
+                        PNP_RES_PRIORITY_INVALID);
+               priority = PNP_RES_PRIORITY_INVALID;
+       }
+
+       flags = PNP_OPTION_DEPENDENT |
+           ((dev->num_dependent_sets & PNP_OPTION_SET_MASK) <<
+               PNP_OPTION_SET_SHIFT) |
+           ((priority & PNP_OPTION_PRIORITY_MASK) <<
+               PNP_OPTION_PRIORITY_SHIFT);
+
+       dev->num_dependent_sets++;
+
+       return flags;
+}
+
+char *pnp_option_priority_name(struct pnp_option *option);
+void dbg_pnp_show_option(struct pnp_dev *dev, struct pnp_option *option);
+
  void pnp_init_resources(struct pnp_dev *dev);
  
  void pnp_fixup_device(struct pnp_dev *dev);
-void pnp_free_option(struct pnp_option *option);
+void pnp_free_options(struct pnp_dev *dev);
  int __pnp_add_device(struct pnp_dev *dev);
  void __pnp_remove_device(struct pnp_dev *dev);
  
@@ -43,29 +144,18 @@ int pnp_check_mem(struct pnp_dev *dev, struct resource *res);
  int pnp_check_irq(struct pnp_dev *dev, struct resource *res);
  int pnp_check_dma(struct pnp_dev *dev, struct resource *res);
  
+char *pnp_resource_type_name(struct resource *res);
  void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc);
  
-void pnp_init_resource(struct resource *res);
-
-struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev,
-                                         unsigned int type, unsigned int num);
-
-#define PNP_MAX_PORT           40
-#define PNP_MAX_MEM            24
-#define PNP_MAX_IRQ             2
-#define PNP_MAX_DMA             2
+void pnp_free_resources(struct pnp_dev *dev);
+int pnp_resource_type(struct resource *res);
  
  struct pnp_resource {
+       struct list_head list;
         struct resource res;
-       unsigned int index;             /* ISAPNP config register index */
  };
  
-struct pnp_resource_table {
-       struct pnp_resource port[PNP_MAX_PORT];
-       struct pnp_resource mem[PNP_MAX_MEM];
-       struct pnp_resource dma[PNP_MAX_DMA];
-       struct pnp_resource irq[PNP_MAX_IRQ];
-};
+void pnp_free_resource(struct pnp_resource *pnp_res);
  
  struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq,
                                           int flags);
diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c

index 20771b7d4482562d18536aca252f9e870b08f907..a411582bcd72d3caf7638fc1ede2a0747498af6e 100644 (file)
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -99,14 +99,28 @@ static void pnp_free_ids(struct pnp_dev *dev)
         }
  }
  
+void pnp_free_resource(struct pnp_resource *pnp_res)
+{
+       list_del(&pnp_res->list);
+       kfree(pnp_res);
+}
+
+void pnp_free_resources(struct pnp_dev *dev)
+{
+       struct pnp_resource *pnp_res, *tmp;
+
+       list_for_each_entry_safe(pnp_res, tmp, &dev->resources, list) {
+               pnp_free_resource(pnp_res);
+       }
+}
+
  static void pnp_release_device(struct device *dmdev)
  {
         struct pnp_dev *dev = to_pnp_dev(dmdev);
  
-       pnp_free_option(dev->independent);
-       pnp_free_option(dev->dependent);
         pnp_free_ids(dev);
-       kfree(dev->res);
+       pnp_free_resources(dev);
+       pnp_free_options(dev);
         kfree(dev);
  }
  
@@ -119,12 +133,8 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid
         if (!dev)
                 return NULL;
  
-       dev->res = kzalloc(sizeof(struct pnp_resource_table), GFP_KERNEL);
-       if (!dev->res) {
-               kfree(dev);
-               return NULL;
-       }
-
+       INIT_LIST_HEAD(&dev->resources);
+       INIT_LIST_HEAD(&dev->options);
         dev->protocol = protocol;
         dev->number = id;
         dev->dma_mask = DMA_24BIT_MASK;
@@ -140,7 +150,6 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid
  
         dev_id = pnp_add_id(dev, pnpid);
         if (!dev_id) {
-               kfree(dev->res);
                 kfree(dev);
                 return NULL;
         }
diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c

index 5695a79f3a5262fe3701752a41cce19b43fff937..a876ecf7028c06c003d5fc1aed974bb9f5f97f98 100644 (file)
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -3,6 +3,8 @@
   *
   * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela <perex@perex.cz>
   * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   */
  
  #include <linux/pnp.h>
@@ -53,11 +55,13 @@ static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt, ...)
  static void pnp_print_port(pnp_info_buffer_t * buffer, char *space,
                            struct pnp_port *port)
  {
-       pnp_printf(buffer,
-                  "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n",
-                  space, port->min, port->max,
-                  port->align ? (port->align - 1) : 0, port->size,
-                  port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10);
+       pnp_printf(buffer, "%sport %#llx-%#llx, align %#llx, size %#llx, "
+                  "%i-bit address decoding\n", space,
+                  (unsigned long long) port->min,
+                  (unsigned long long) port->max,
+                  port->align ? ((unsigned long long) port->align - 1) : 0,
+                  (unsigned long long) port->size,
+                  port->flags & IORESOURCE_IO_16BIT_ADDR ? 16 : 10);
  }
  
  static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space,
@@ -67,7 +71,7 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space,
  
         pnp_printf(buffer, "%sirq ", space);
         for (i = 0; i < PNP_IRQ_NR; i++)
-               if (test_bit(i, irq->map)) {
+               if (test_bit(i, irq->map.bits)) {
                         if (!first) {
                                 pnp_printf(buffer, ",");
                         } else {
@@ -78,7 +82,7 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space,
                         else
                                 pnp_printf(buffer, "%i", i);
                 }
-       if (bitmap_empty(irq->map, PNP_IRQ_NR))
+       if (bitmap_empty(irq->map.bits, PNP_IRQ_NR))
                 pnp_printf(buffer, "<none>");
         if (irq->flags & IORESOURCE_IRQ_HIGHEDGE)
                 pnp_printf(buffer, " High-Edge");
@@ -88,6 +92,8 @@ static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space,
                 pnp_printf(buffer, " High-Level");
         if (irq->flags & IORESOURCE_IRQ_LOWLEVEL)
                 pnp_printf(buffer, " Low-Level");
+       if (irq->flags & IORESOURCE_IRQ_OPTIONAL)
+               pnp_printf(buffer, " (optional)");
         pnp_printf(buffer, "\n");
  }
  
@@ -148,8 +154,11 @@ static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space,
  {
         char *s;
  
-       pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x",
-                  space, mem->min, mem->max, mem->align, mem->size);
+       pnp_printf(buffer, "%sMemory %#llx-%#llx, align %#llx, size %#llx",
+                  space, (unsigned long long) mem->min,
+                  (unsigned long long) mem->max,
+                  (unsigned long long) mem->align,
+                  (unsigned long long) mem->size);
         if (mem->flags & IORESOURCE_MEM_WRITEABLE)
                 pnp_printf(buffer, ", writeable");
         if (mem->flags & IORESOURCE_MEM_CACHEABLE)
@@ -177,65 +186,58 @@ static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space,
  }
  
  static void pnp_print_option(pnp_info_buffer_t * buffer, char *space,
-                            struct pnp_option *option, int dep)
+                            struct pnp_option *option)
  {
-       char *s;
-       struct pnp_port *port;
-       struct pnp_irq *irq;
-       struct pnp_dma *dma;
-       struct pnp_mem *mem;
-
-       if (dep) {
-               switch (option->priority) {
-               case PNP_RES_PRIORITY_PREFERRED:
-                       s = "preferred";
-                       break;
-               case PNP_RES_PRIORITY_ACCEPTABLE:
-                       s = "acceptable";
-                       break;
-               case PNP_RES_PRIORITY_FUNCTIONAL:
-                       s = "functional";
-                       break;
-               default:
-                       s = "invalid";
-               }
-               pnp_printf(buffer, "Dependent: %02i - Priority %s\n", dep, s);
+       switch (option->type) {
+       case IORESOURCE_IO:
+               pnp_print_port(buffer, space, &option->u.port);
+               break;
+       case IORESOURCE_MEM:
+               pnp_print_mem(buffer, space, &option->u.mem);
+               break;
+       case IORESOURCE_IRQ:
+               pnp_print_irq(buffer, space, &option->u.irq);
+               break;
+       case IORESOURCE_DMA:
+               pnp_print_dma(buffer, space, &option->u.dma);
+               break;
         }
-
-       for (port = option->port; port; port = port->next)
-               pnp_print_port(buffer, space, port);
-       for (irq = option->irq; irq; irq = irq->next)
-               pnp_print_irq(buffer, space, irq);
-       for (dma = option->dma; dma; dma = dma->next)
-               pnp_print_dma(buffer, space, dma);
-       for (mem = option->mem; mem; mem = mem->next)
-               pnp_print_mem(buffer, space, mem);
  }
  
  static ssize_t pnp_show_options(struct device *dmdev,
                                 struct device_attribute *attr, char *buf)
  {
         struct pnp_dev *dev = to_pnp_dev(dmdev);
-       struct pnp_option *independent = dev->independent;
-       struct pnp_option *dependent = dev->dependent;
-       int ret, dep = 1;
+       pnp_info_buffer_t *buffer;
+       struct pnp_option *option;
+       int ret, dep = 0, set = 0;
+       char *indent;
  
-       pnp_info_buffer_t *buffer = (pnp_info_buffer_t *)
-           pnp_alloc(sizeof(pnp_info_buffer_t));
+       buffer = pnp_alloc(sizeof(pnp_info_buffer_t));
         if (!buffer)
                 return -ENOMEM;
  
         buffer->len = PAGE_SIZE;
         buffer->buffer = buf;
         buffer->curr = buffer->buffer;
-       if (independent)
-               pnp_print_option(buffer, "", independent, 0);
  
-       while (dependent) {
-               pnp_print_option(buffer, "   ", dependent, dep);
-               dependent = dependent->next;
-               dep++;
+       list_for_each_entry(option, &dev->options, list) {
+               if (pnp_option_is_dependent(option)) {
+                       indent = "  ";
+                       if (!dep || pnp_option_set(option) != set) {
+                               set = pnp_option_set(option);
+                               dep = 1;
+                               pnp_printf(buffer, "Dependent: %02i - "
+                                          "Priority %s\n", set,
+                                          pnp_option_priority_name(option));
+                       }
+               } else {
+                       dep = 0;
+                       indent = "";
+               }
+               pnp_print_option(buffer, indent, option);
         }
+
         ret = (buffer->curr - buf);
         kfree(buffer);
         return ret;
@@ -248,79 +250,59 @@ static ssize_t pnp_show_current_resources(struct device *dmdev,
                                           char *buf)
  {
         struct pnp_dev *dev = to_pnp_dev(dmdev);
-       struct resource *res;
-       int i, ret;
         pnp_info_buffer_t *buffer;
+       struct pnp_resource *pnp_res;
+       struct resource *res;
+       int ret;
  
         if (!dev)
                 return -EINVAL;
  
-       buffer = (pnp_info_buffer_t *) pnp_alloc(sizeof(pnp_info_buffer_t));
+       buffer = pnp_alloc(sizeof(pnp_info_buffer_t));
         if (!buffer)
                 return -ENOMEM;
+
         buffer->len = PAGE_SIZE;
         buffer->buffer = buf;
         buffer->curr = buffer->buffer;
  
-       pnp_printf(buffer, "state = ");
-       if (dev->active)
-               pnp_printf(buffer, "active\n");
-       else
-               pnp_printf(buffer, "disabled\n");
-
-       for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) {
-               if (pnp_resource_valid(res)) {
-                       pnp_printf(buffer, "io");
-                       if (res->flags & IORESOURCE_DISABLED)
-                               pnp_printf(buffer, " disabled\n");
-                       else
-                               pnp_printf(buffer, " 0x%llx-0x%llx\n",
-                                          (unsigned long long) res->start,
-                                          (unsigned long long) res->end);
-               }
-       }
-       for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) {
-               if (pnp_resource_valid(res)) {
-                       pnp_printf(buffer, "mem");
-                       if (res->flags & IORESOURCE_DISABLED)
-                               pnp_printf(buffer, " disabled\n");
-                       else
-                               pnp_printf(buffer, " 0x%llx-0x%llx\n",
-                                          (unsigned long long) res->start,
-                                          (unsigned long long) res->end);
-               }
-       }
-       for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IRQ, i)); i++) {
-               if (pnp_resource_valid(res)) {
-                       pnp_printf(buffer, "irq");
-                       if (res->flags & IORESOURCE_DISABLED)
-                               pnp_printf(buffer, " disabled\n");
-                       else
-                               pnp_printf(buffer, " %lld\n",
-                                          (unsigned long long) res->start);
+       pnp_printf(buffer, "state = %s\n", dev->active ? "active" : "disabled");
+
+       list_for_each_entry(pnp_res, &dev->resources, list) {
+               res = &pnp_res->res;
+
+               pnp_printf(buffer, pnp_resource_type_name(res));
+
+               if (res->flags & IORESOURCE_DISABLED) {
+                       pnp_printf(buffer, " disabled\n");
+                       continue;
                 }
-       }
-       for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_DMA, i)); i++) {
-               if (pnp_resource_valid(res)) {
-                       pnp_printf(buffer, "dma");
-                       if (res->flags & IORESOURCE_DISABLED)
-                               pnp_printf(buffer, " disabled\n");
-                       else
-                               pnp_printf(buffer, " %lld\n",
-                                          (unsigned long long) res->start);
+
+               switch (pnp_resource_type(res)) {
+               case IORESOURCE_IO:
+               case IORESOURCE_MEM:
+                       pnp_printf(buffer, " %#llx-%#llx\n",
+                                  (unsigned long long) res->start,
+                                  (unsigned long long) res->end);
+                       break;
+               case IORESOURCE_IRQ:
+               case IORESOURCE_DMA:
+                       pnp_printf(buffer, " %lld\n",
+                                  (unsigned long long) res->start);
+                       break;
                 }
         }
+
         ret = (buffer->curr - buf);
         kfree(buffer);
         return ret;
  }
  
-static ssize_t
-pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
-                         const char *ubuf, size_t count)
+static ssize_t pnp_set_current_resources(struct device *dmdev,
+                                        struct device_attribute *attr,
+                                        const char *ubuf, size_t count)
  {
         struct pnp_dev *dev = to_pnp_dev(dmdev);
-       struct pnp_resource *pnp_res;
         char *buf = (void *)ubuf;
         int retval = 0;
         resource_size_t start, end;
@@ -368,7 +350,6 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
                 goto done;
         }
         if (!strnicmp(buf, "set", 3)) {
-               int nport = 0, nmem = 0, nirq = 0, ndma = 0;
                 if (dev->active)
                         goto done;
                 buf += 3;
@@ -391,10 +372,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
                                         end = simple_strtoul(buf, &buf, 0);
                                 } else
                                         end = start;
-                               pnp_res = pnp_add_io_resource(dev, start, end,
-                                                             0);
-                               if (pnp_res)
-                                       pnp_res->index = nport++;
+                               pnp_add_io_resource(dev, start, end, 0);
                                 continue;
                         }
                         if (!strnicmp(buf, "mem", 3)) {
@@ -411,10 +389,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
                                         end = simple_strtoul(buf, &buf, 0);
                                 } else
                                         end = start;
-                               pnp_res = pnp_add_mem_resource(dev, start, end,
-                                                              0);
-                               if (pnp_res)
-                                       pnp_res->index = nmem++;
+                               pnp_add_mem_resource(dev, start, end, 0);
                                 continue;
                         }
                         if (!strnicmp(buf, "irq", 3)) {
@@ -422,9 +397,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
                                 while (isspace(*buf))
                                         ++buf;
                                 start = simple_strtoul(buf, &buf, 0);
-                               pnp_res = pnp_add_irq_resource(dev, start, 0);
-                               if (pnp_res)
-                                       pnp_res->index = nirq++;
+                               pnp_add_irq_resource(dev, start, 0);
                                 continue;
                         }
                         if (!strnicmp(buf, "dma", 3)) {
@@ -432,9 +405,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
                                 while (isspace(*buf))
                                         ++buf;
                                 start = simple_strtoul(buf, &buf, 0);
-                               pnp_res = pnp_add_dma_resource(dev, start, 0);
-                               if (pnp_res)
-                                       pnp_res->index = ndma++;
+                               pnp_add_dma_resource(dev, start, 0);
                                 continue;
                         }
                         break;
diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c

index f1bccdbdeb0841f282bd1c0ea70d1ee06278c303..101a835e8759fb3f757d72426cfc05c2d678865c 100644 (file)
--- a/drivers/pnp/isapnp/core.c
+++ b/drivers/pnp/isapnp/core.c
@@ -429,154 +429,135 @@ static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card,
   *  Add IRQ resource to resources list.
   */
  static void __init isapnp_parse_irq_resource(struct pnp_dev *dev,
-                                            struct pnp_option *option,
+                                            unsigned int option_flags,
                                              int size)
  {
         unsigned char tmp[3];
-       struct pnp_irq *irq;
         unsigned long bits;
+       pnp_irq_mask_t map;
+       unsigned char flags = IORESOURCE_IRQ_HIGHEDGE;
  
         isapnp_peek(tmp, size);
-       irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
-       if (!irq)
-               return;
         bits = (tmp[1] << 8) | tmp[0];
-       bitmap_copy(irq->map, &bits, 16);
+
+       bitmap_zero(map.bits, PNP_IRQ_NR);
+       bitmap_copy(map.bits, &bits, 16);
+
         if (size > 2)
-               irq->flags = tmp[2];
-       else
-               irq->flags = IORESOURCE_IRQ_HIGHEDGE;
-       pnp_register_irq_resource(dev, option, irq);
+               flags = tmp[2];
+
+       pnp_register_irq_resource(dev, option_flags, &map, flags);
  }
  
  /*
   *  Add DMA resource to resources list.
   */
  static void __init isapnp_parse_dma_resource(struct pnp_dev *dev,
-                                            struct pnp_option *option,
+                                            unsigned int option_flags,
                                              int size)
  {
         unsigned char tmp[2];
-       struct pnp_dma *dma;
  
         isapnp_peek(tmp, size);
-       dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
-       if (!dma)
-               return;
-       dma->map = tmp[0];
-       dma->flags = tmp[1];
-       pnp_register_dma_resource(dev, option, dma);
+       pnp_register_dma_resource(dev, option_flags, tmp[0], tmp[1]);
  }
  
  /*
   *  Add port resource to resources list.
   */
  static void __init isapnp_parse_port_resource(struct pnp_dev *dev,
-                                             struct pnp_option *option,
+                                             unsigned int option_flags,
                                               int size)
  {
         unsigned char tmp[7];
-       struct pnp_port *port;
+       resource_size_t min, max, align, len;
+       unsigned char flags;
  
         isapnp_peek(tmp, size);
-       port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-       if (!port)
-               return;
-       port->min = (tmp[2] << 8) | tmp[1];
-       port->max = (tmp[4] << 8) | tmp[3];
-       port->align = tmp[5];
-       port->size = tmp[6];
-       port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0;
-       pnp_register_port_resource(dev, option, port);
+       min = (tmp[2] << 8) | tmp[1];
+       max = (tmp[4] << 8) | tmp[3];
+       align = tmp[5];
+       len = tmp[6];
+       flags = tmp[0] ? IORESOURCE_IO_16BIT_ADDR : 0;
+       pnp_register_port_resource(dev, option_flags,
+                                  min, max, align, len, flags);
  }
  
  /*
   *  Add fixed port resource to resources list.
   */
  static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev,
-                                                   struct pnp_option *option,
+                                                   unsigned int option_flags,
                                                     int size)
  {
         unsigned char tmp[3];
-       struct pnp_port *port;
+       resource_size_t base, len;
  
         isapnp_peek(tmp, size);
-       port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-       if (!port)
-               return;
-       port->min = port->max = (tmp[1] << 8) | tmp[0];
-       port->size = tmp[2];
-       port->align = 0;
-       port->flags = PNP_PORT_FLAG_FIXED;
-       pnp_register_port_resource(dev, option, port);
+       base = (tmp[1] << 8) | tmp[0];
+       len = tmp[2];
+       pnp_register_port_resource(dev, option_flags, base, base, 0, len,
+                                  IORESOURCE_IO_FIXED);
  }
  
  /*
   *  Add memory resource to resources list.
   */
  static void __init isapnp_parse_mem_resource(struct pnp_dev *dev,
-                                            struct pnp_option *option,
+                                            unsigned int option_flags,
                                              int size)
  {
         unsigned char tmp[9];
-       struct pnp_mem *mem;
+       resource_size_t min, max, align, len;
+       unsigned char flags;
  
         isapnp_peek(tmp, size);
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = ((tmp[2] << 8) | tmp[1]) << 8;
-       mem->max = ((tmp[4] << 8) | tmp[3]) << 8;
-       mem->align = (tmp[6] << 8) | tmp[5];
-       mem->size = ((tmp[8] << 8) | tmp[7]) << 8;
-       mem->flags = tmp[0];
-       pnp_register_mem_resource(dev, option, mem);
+       min = ((tmp[2] << 8) | tmp[1]) << 8;
+       max = ((tmp[4] << 8) | tmp[3]) << 8;
+       align = (tmp[6] << 8) | tmp[5];
+       len = ((tmp[8] << 8) | tmp[7]) << 8;
+       flags = tmp[0];
+       pnp_register_mem_resource(dev, option_flags,
+                                 min, max, align, len, flags);
  }
  
  /*
   *  Add 32-bit memory resource to resources list.
   */
  static void __init isapnp_parse_mem32_resource(struct pnp_dev *dev,
-                                              struct pnp_option *option,
+                                              unsigned int option_flags,
                                                int size)
  {
         unsigned char tmp[17];
-       struct pnp_mem *mem;
+       resource_size_t min, max, align, len;
+       unsigned char flags;
  
         isapnp_peek(tmp, size);
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
-       mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
-       mem->align =
-           (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9];
-       mem->size =
-           (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13];
-       mem->flags = tmp[0];
-       pnp_register_mem_resource(dev, option, mem);
+       min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
+       max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
+       align = (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9];
+       len = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13];
+       flags = tmp[0];
+       pnp_register_mem_resource(dev, option_flags,
+                                 min, max, align, len, flags);
  }
  
  /*
   *  Add 32-bit fixed memory resource to resources list.
   */
  static void __init isapnp_parse_fixed_mem32_resource(struct pnp_dev *dev,
-                                                    struct pnp_option *option,
+                                                    unsigned int option_flags,
                                                      int size)
  {
         unsigned char tmp[9];
-       struct pnp_mem *mem;
+       resource_size_t base, len;
+       unsigned char flags;
  
         isapnp_peek(tmp, size);
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = mem->max =
-           (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
-       mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
-       mem->align = 0;
-       mem->flags = tmp[0];
-       pnp_register_mem_resource(dev, option, mem);
+       base = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
+       len = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
+       flags = tmp[0];
+       pnp_register_mem_resource(dev, option_flags, base, base, 0, len, flags);
  }
  
  /*
@@ -604,20 +585,16 @@ isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size)
  static int __init isapnp_create_device(struct pnp_card *card,
                                        unsigned short size)
  {
-       int number = 0, skip = 0, priority = 0, compat = 0;
+       int number = 0, skip = 0, priority, compat = 0;
         unsigned char type, tmp[17];
-       struct pnp_option *option;
+       unsigned int option_flags;
         struct pnp_dev *dev;
         u32 eisa_id;
         char id[8];
  
         if ((dev = isapnp_parse_device(card, size, number++)) == NULL)
                 return 1;
-       option = pnp_register_independent_option(dev);
-       if (!option) {
-               kfree(dev);
-               return 1;
-       }
+       option_flags = 0;
         pnp_add_card_device(card, dev);
  
         while (1) {
@@ -634,16 +611,11 @@ static int __init isapnp_create_device(struct pnp_card *card,
                                         return 1;
                                 size = 0;
                                 skip = 0;
-                               option = pnp_register_independent_option(dev);
-                               if (!option) {
-                                       kfree(dev);
-                                       return 1;
-                               }
+                               option_flags = 0;
                                 pnp_add_card_device(card, dev);
                         } else {
                                 skip = 1;
                         }
-                       priority = 0;
                         compat = 0;
                         break;
                 case _STAG_COMPATDEVID:
@@ -660,44 +632,42 @@ static int __init isapnp_create_device(struct pnp_card *card,
                 case _STAG_IRQ:
                         if (size < 2 || size > 3)
                                 goto __skip;
-                       isapnp_parse_irq_resource(dev, option, size);
+                       isapnp_parse_irq_resource(dev, option_flags, size);
                         size = 0;
                         break;
                 case _STAG_DMA:
                         if (size != 2)
                                 goto __skip;
-                       isapnp_parse_dma_resource(dev, option, size);
+                       isapnp_parse_dma_resource(dev, option_flags, size);
                         size = 0;
                         break;
                 case _STAG_STARTDEP:
                         if (size > 1)
                                 goto __skip;
-                       priority = 0x100 | PNP_RES_PRIORITY_ACCEPTABLE;
+                       priority = PNP_RES_PRIORITY_ACCEPTABLE;
                         if (size > 0) {
                                 isapnp_peek(tmp, size);
-                               priority = 0x100 | tmp[0];
+                               priority = tmp[0];
                                 size = 0;
                         }
-                       option = pnp_register_dependent_option(dev, priority);
-                       if (!option)
-                               return 1;
+                       option_flags = pnp_new_dependent_set(dev, priority);
                         break;
                 case _STAG_ENDDEP:
                         if (size != 0)
                                 goto __skip;
-                       priority = 0;
-                       dev_dbg(&dev->dev, "end dependent options\n");
+                       option_flags = 0;
                         break;
                 case _STAG_IOPORT:
                         if (size != 7)
                                 goto __skip;
-                       isapnp_parse_port_resource(dev, option, size);
+                       isapnp_parse_port_resource(dev, option_flags, size);
                         size = 0;
                         break;
                 case _STAG_FIXEDIO:
                         if (size != 3)
                                 goto __skip;
-                       isapnp_parse_fixed_port_resource(dev, option, size);
+                       isapnp_parse_fixed_port_resource(dev, option_flags,
+                                                        size);
                         size = 0;
                         break;
                 case _STAG_VENDOR:
@@ -705,7 +675,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
                 case _LTAG_MEMRANGE:
                         if (size != 9)
                                 goto __skip;
-                       isapnp_parse_mem_resource(dev, option, size);
+                       isapnp_parse_mem_resource(dev, option_flags, size);
                         size = 0;
                         break;
                 case _LTAG_ANSISTR:
@@ -720,13 +690,14 @@ static int __init isapnp_create_device(struct pnp_card *card,
                 case _LTAG_MEM32RANGE:
                         if (size != 17)
                                 goto __skip;
-                       isapnp_parse_mem32_resource(dev, option, size);
+                       isapnp_parse_mem32_resource(dev, option_flags, size);
                         size = 0;
                         break;
                 case _LTAG_FIXEDMEM32RANGE:
                         if (size != 9)
                                 goto __skip;
-                       isapnp_parse_fixed_mem32_resource(dev, option, size);
+                       isapnp_parse_fixed_mem32_resource(dev, option_flags,
+                                                         size);
                         size = 0;
                         break;
                 case _STAG_END:
@@ -928,7 +899,6 @@ EXPORT_SYMBOL(isapnp_write_byte);
  
  static int isapnp_get_resources(struct pnp_dev *dev)
  {
-       struct pnp_resource *pnp_res;
         int i, ret;
  
         dev_dbg(&dev->dev, "get resources\n");
@@ -940,35 +910,23 @@ static int isapnp_get_resources(struct pnp_dev *dev)
  
         for (i = 0; i < ISAPNP_MAX_PORT; i++) {
                 ret = isapnp_read_word(ISAPNP_CFG_PORT + (i << 1));
-               if (ret) {
-                       pnp_res = pnp_add_io_resource(dev, ret, ret, 0);
-                       if (pnp_res)
-                               pnp_res->index = i;
-               }
+               pnp_add_io_resource(dev, ret, ret,
+                                   ret == 0 ? IORESOURCE_DISABLED : 0);
         }
         for (i = 0; i < ISAPNP_MAX_MEM; i++) {
                 ret = isapnp_read_word(ISAPNP_CFG_MEM + (i << 3)) << 8;
-               if (ret) {
-                       pnp_res = pnp_add_mem_resource(dev, ret, ret, 0);
-                       if (pnp_res)
-                               pnp_res->index = i;
-               }
+               pnp_add_mem_resource(dev, ret, ret,
+                                    ret == 0 ? IORESOURCE_DISABLED : 0);
         }
         for (i = 0; i < ISAPNP_MAX_IRQ; i++) {
                 ret = isapnp_read_word(ISAPNP_CFG_IRQ + (i << 1)) >> 8;
-               if (ret) {
-                       pnp_res = pnp_add_irq_resource(dev, ret, 0);
-                       if (pnp_res)
-                               pnp_res->index = i;
-               }
+               pnp_add_irq_resource(dev, ret,
+                                    ret == 0 ? IORESOURCE_DISABLED : 0);
         }
         for (i = 0; i < ISAPNP_MAX_DMA; i++) {
                 ret = isapnp_read_byte(ISAPNP_CFG_DMA + i);
-               if (ret != 4) {
-                       pnp_res = pnp_add_dma_resource(dev, ret, 0);
-                       if  (pnp_res)
-                               pnp_res->index = i;
-               }
+               pnp_add_dma_resource(dev, ret,
+                                    ret == 4 ? IORESOURCE_DISABLED : 0);
         }
  
  __end:
@@ -978,62 +936,45 @@ __end:
  
  static int isapnp_set_resources(struct pnp_dev *dev)
  {
-       struct pnp_resource *pnp_res;
         struct resource *res;
-       int tmp, index;
+       int tmp;
  
         dev_dbg(&dev->dev, "set resources\n");
         isapnp_cfg_begin(dev->card->number, dev->number);
         dev->active = 1;
         for (tmp = 0; tmp < ISAPNP_MAX_PORT; tmp++) {
-               pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, tmp);
-               if (!pnp_res)
-                       continue;
-               res = &pnp_res->res;
-               if (pnp_resource_valid(res)) {
-                       index = pnp_res->index;
+               res = pnp_get_resource(dev, IORESOURCE_IO, tmp);
+               if (pnp_resource_enabled(res)) {
                         dev_dbg(&dev->dev, "  set io  %d to %#llx\n",
-                               index, (unsigned long long) res->start);
-                       isapnp_write_word(ISAPNP_CFG_PORT + (index << 1),
+                               tmp, (unsigned long long) res->start);
+                       isapnp_write_word(ISAPNP_CFG_PORT + (tmp << 1),
                                           res->start);
                 }
         }
         for (tmp = 0; tmp < ISAPNP_MAX_IRQ; tmp++) {
-               pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, tmp);
-               if (!pnp_res)
-                       continue;
-               res = &pnp_res->res;
-               if (pnp_resource_valid(res)) {
+               res = pnp_get_resource(dev, IORESOURCE_IRQ, tmp);
+               if (pnp_resource_enabled(res)) {
                         int irq = res->start;
                         if (irq == 2)
                                 irq = 9;
-                       index = pnp_res->index;
-                       dev_dbg(&dev->dev, "  set irq %d to %d\n", index, irq);
-                       isapnp_write_byte(ISAPNP_CFG_IRQ + (index << 1), irq);
+                       dev_dbg(&dev->dev, "  set irq %d to %d\n", tmp, irq);
+                       isapnp_write_byte(ISAPNP_CFG_IRQ + (tmp << 1), irq);
                 }
         }
         for (tmp = 0; tmp < ISAPNP_MAX_DMA; tmp++) {
-               pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, tmp);
-               if (!pnp_res)
-                       continue;
-               res = &pnp_res->res;
-               if (pnp_resource_valid(res)) {
-                       index = pnp_res->index;
+               res = pnp_get_resource(dev, IORESOURCE_DMA, tmp);
+               if (pnp_resource_enabled(res)) {
                         dev_dbg(&dev->dev, "  set dma %d to %lld\n",
-                               index, (unsigned long long) res->start);
-                       isapnp_write_byte(ISAPNP_CFG_DMA + index, res->start);
+                               tmp, (unsigned long long) res->start);
+                       isapnp_write_byte(ISAPNP_CFG_DMA + tmp, res->start);
                 }
         }
         for (tmp = 0; tmp < ISAPNP_MAX_MEM; tmp++) {
-               pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, tmp);
-               if (!pnp_res)
-                       continue;
-               res = &pnp_res->res;
-               if (pnp_resource_valid(res)) {
-                       index = pnp_res->index;
+               res = pnp_get_resource(dev, IORESOURCE_MEM, tmp);
+               if (pnp_resource_enabled(res)) {
                         dev_dbg(&dev->dev, "  set mem %d to %#llx\n",
-                               index, (unsigned long long) res->start);
-                       isapnp_write_word(ISAPNP_CFG_MEM + (index << 3),
+                               tmp, (unsigned long long) res->start);
+                       isapnp_write_word(ISAPNP_CFG_MEM + (tmp << 3),
                                           (res->start >> 8) & 0xffff);
                 }
         }
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c

index bea0914ff947a337dcf45d2a58bf7b6fc0f38644..b526eaad3f6c4e069360b9167b83ae786ccf511b 100644 (file)
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -3,6 +3,8 @@
   *
   * based on isapnp.c resource management (c) Jaroslav Kysela <perex@perex.cz>
   * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   */
  
  #include <linux/errno.h>
@@ -19,82 +21,64 @@ DEFINE_MUTEX(pnp_res_mutex);
  
  static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx)
  {
-       struct pnp_resource *pnp_res;
-       struct resource *res;
-
-       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, idx);
-       if (!pnp_res) {
-               dev_err(&dev->dev, "too many I/O port resources\n");
-               /* pretend we were successful so at least the manager won't try again */
-               return 1;
-       }
-
-       res = &pnp_res->res;
+       struct resource *res, local_res;
  
-       /* check if this resource has been manually set, if so skip */
-       if (!(res->flags & IORESOURCE_AUTO)) {
+       res = pnp_get_resource(dev, IORESOURCE_IO, idx);
+       if (res) {
                 dev_dbg(&dev->dev, "  io %d already set to %#llx-%#llx "
                         "flags %#lx\n", idx, (unsigned long long) res->start,
                         (unsigned long long) res->end, res->flags);
-               return 1;
+               return 0;
         }
  
-       /* set the initial values */
-       pnp_res->index = idx;
-       res->flags |= rule->flags | IORESOURCE_IO;
-       res->flags &= ~IORESOURCE_UNSET;
+       res = &local_res;
+       res->flags = rule->flags | IORESOURCE_AUTO;
+       res->start = 0;
+       res->end = 0;
  
         if (!rule->size) {
                 res->flags |= IORESOURCE_DISABLED;
                 dev_dbg(&dev->dev, "  io %d disabled\n", idx);
-               return 1;       /* skip disabled resource requests */
+               goto __add;
         }
  
         res->start = rule->min;
         res->end = res->start + rule->size - 1;
  
-       /* run through until pnp_check_port is happy */
         while (!pnp_check_port(dev, res)) {
                 res->start += rule->align;
                 res->end = res->start + rule->size - 1;
                 if (res->start > rule->max || !rule->align) {
-                       dev_dbg(&dev->dev, "  couldn't assign io %d\n", idx);
-                       return 0;
+                       dev_dbg(&dev->dev, "  couldn't assign io %d "
+                               "(min %#llx max %#llx)\n", idx,
+                               (unsigned long long) rule->min,
+                               (unsigned long long) rule->max);
+                       return -EBUSY;
                 }
         }
-       dev_dbg(&dev->dev, "  assign io  %d %#llx-%#llx\n", idx,
-               (unsigned long long) res->start, (unsigned long long) res->end);
-       return 1;
+
+__add:
+       pnp_add_io_resource(dev, res->start, res->end, res->flags);
+       return 0;
  }
  
  static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
  {
-       struct pnp_resource *pnp_res;
-       struct resource *res;
-
-       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, idx);
-       if (!pnp_res) {
-               dev_err(&dev->dev, "too many memory resources\n");
-               /* pretend we were successful so at least the manager won't try again */
-               return 1;
-       }
+       struct resource *res, local_res;
  
-       res = &pnp_res->res;
-
-       /* check if this resource has been manually set, if so skip */
-       if (!(res->flags & IORESOURCE_AUTO)) {
+       res = pnp_get_resource(dev, IORESOURCE_MEM, idx);
+       if (res) {
                 dev_dbg(&dev->dev, "  mem %d already set to %#llx-%#llx "
                         "flags %#lx\n", idx, (unsigned long long) res->start,
                         (unsigned long long) res->end, res->flags);
-               return 1;
+               return 0;
         }
  
-       /* set the initial values */
-       pnp_res->index = idx;
-       res->flags |= rule->flags | IORESOURCE_MEM;
-       res->flags &= ~IORESOURCE_UNSET;
+       res = &local_res;
+       res->flags = rule->flags | IORESOURCE_AUTO;
+       res->start = 0;
+       res->end = 0;
  
-       /* convert pnp flags to standard Linux flags */
         if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
                 res->flags |= IORESOURCE_READONLY;
         if (rule->flags & IORESOURCE_MEM_CACHEABLE)
@@ -107,30 +91,32 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
         if (!rule->size) {
                 res->flags |= IORESOURCE_DISABLED;
                 dev_dbg(&dev->dev, "  mem %d disabled\n", idx);
-               return 1;       /* skip disabled resource requests */
+               goto __add;
         }
  
         res->start = rule->min;
         res->end = res->start + rule->size - 1;
  
-       /* run through until pnp_check_mem is happy */
         while (!pnp_check_mem(dev, res)) {
                 res->start += rule->align;
                 res->end = res->start + rule->size - 1;
                 if (res->start > rule->max || !rule->align) {
-                       dev_dbg(&dev->dev, "  couldn't assign mem %d\n", idx);
-                       return 0;
+                       dev_dbg(&dev->dev, "  couldn't assign mem %d "
+                               "(min %#llx max %#llx)\n", idx,
+                               (unsigned long long) rule->min,
+                               (unsigned long long) rule->max);
+                       return -EBUSY;
                 }
         }
-       dev_dbg(&dev->dev, "  assign mem %d %#llx-%#llx\n", idx,
-               (unsigned long long) res->start, (unsigned long long) res->end);
-       return 1;
+
+__add:
+       pnp_add_mem_resource(dev, res->start, res->end, res->flags);
+       return 0;
  }
  
  static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx)
  {
-       struct pnp_resource *pnp_res;
-       struct resource *res;
+       struct resource *res, local_res;
         int i;
  
         /* IRQ priority: this table is good for i386 */
@@ -138,59 +124,57 @@ static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx)
                 5, 10, 11, 12, 9, 14, 15, 7, 3, 4, 13, 0, 1, 6, 8, 2
         };
  
-       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, idx);
-       if (!pnp_res) {
-               dev_err(&dev->dev, "too many IRQ resources\n");
-               /* pretend we were successful so at least the manager won't try again */
-               return 1;
-       }
-
-       res = &pnp_res->res;
-
-       /* check if this resource has been manually set, if so skip */
-       if (!(res->flags & IORESOURCE_AUTO)) {
+       res = pnp_get_resource(dev, IORESOURCE_IRQ, idx);
+       if (res) {
                 dev_dbg(&dev->dev, "  irq %d already set to %d flags %#lx\n",
                         idx, (int) res->start, res->flags);
-               return 1;
+               return 0;
         }
  
-       /* set the initial values */
-       pnp_res->index = idx;
-       res->flags |= rule->flags | IORESOURCE_IRQ;
-       res->flags &= ~IORESOURCE_UNSET;
+       res = &local_res;
+       res->flags = rule->flags | IORESOURCE_AUTO;
+       res->start = -1;
+       res->end = -1;
  
-       if (bitmap_empty(rule->map, PNP_IRQ_NR)) {
+       if (bitmap_empty(rule->map.bits, PNP_IRQ_NR)) {
                 res->flags |= IORESOURCE_DISABLED;
                 dev_dbg(&dev->dev, "  irq %d disabled\n", idx);
-               return 1;       /* skip disabled resource requests */
+               goto __add;
         }
  
         /* TBD: need check for >16 IRQ */
-       res->start = find_next_bit(rule->map, PNP_IRQ_NR, 16);
+       res->start = find_next_bit(rule->map.bits, PNP_IRQ_NR, 16);
         if (res->start < PNP_IRQ_NR) {
                 res->end = res->start;
-               dev_dbg(&dev->dev, "  assign irq %d %d\n", idx,
-                       (int) res->start);
-               return 1;
+               goto __add;
         }
         for (i = 0; i < 16; i++) {
-               if (test_bit(xtab[i], rule->map)) {
+               if (test_bit(xtab[i], rule->map.bits)) {
                         res->start = res->end = xtab[i];
-                       if (pnp_check_irq(dev, res)) {
-                               dev_dbg(&dev->dev, "  assign irq %d %d\n", idx,
-                                       (int) res->start);
-                               return 1;
-                       }
+                       if (pnp_check_irq(dev, res))
+                               goto __add;
                 }
         }
+
+       if (rule->flags & IORESOURCE_IRQ_OPTIONAL) {
+               res->start = -1;
+               res->end = -1;
+               res->flags |= IORESOURCE_DISABLED;
+               dev_dbg(&dev->dev, "  irq %d disabled (optional)\n", idx);
+               goto __add;
+       }
+
         dev_dbg(&dev->dev, "  couldn't assign irq %d\n", idx);
+       return -EBUSY;
+
+__add:
+       pnp_add_irq_resource(dev, res->start, res->flags);
         return 0;
  }
  
-static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
+static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
  {
-       struct pnp_resource *pnp_res;
-       struct resource *res;
+       struct resource *res, local_res;
         int i;
  
         /* DMA priority: this table is good for i386 */
@@ -198,231 +182,99 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
                 1, 3, 5, 6, 7, 0, 2, 4
         };
  
-       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, idx);
-       if (!pnp_res) {
-               dev_err(&dev->dev, "too many DMA resources\n");
-               return;
-       }
-
-       res = &pnp_res->res;
-
-       /* check if this resource has been manually set, if so skip */
-       if (!(res->flags & IORESOURCE_AUTO)) {
+       res = pnp_get_resource(dev, IORESOURCE_DMA, idx);
+       if (res) {
                 dev_dbg(&dev->dev, "  dma %d already set to %d flags %#lx\n",
                         idx, (int) res->start, res->flags);
-               return;
+               return 0;
         }
  
-       /* set the initial values */
-       pnp_res->index = idx;
-       res->flags |= rule->flags | IORESOURCE_DMA;
-       res->flags &= ~IORESOURCE_UNSET;
+       res = &local_res;
+       res->flags = rule->flags | IORESOURCE_AUTO;
+       res->start = -1;
+       res->end = -1;
  
         for (i = 0; i < 8; i++) {
                 if (rule->map & (1 << xtab[i])) {
                         res->start = res->end = xtab[i];
-                       if (pnp_check_dma(dev, res)) {
-                               dev_dbg(&dev->dev, "  assign dma %d %d\n", idx,
-                                       (int) res->start);
-                               return;
-                       }
+                       if (pnp_check_dma(dev, res))
+                               goto __add;
                 }
         }
  #ifdef MAX_DMA_CHANNELS
         res->start = res->end = MAX_DMA_CHANNELS;
  #endif
-       res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
+       res->flags |= IORESOURCE_DISABLED;
         dev_dbg(&dev->dev, "  disable dma %d\n", idx);
-}
-
-void pnp_init_resource(struct resource *res)
-{
-       unsigned long type;
-
-       type = res->flags & (IORESOURCE_IO  | IORESOURCE_MEM |
-                            IORESOURCE_IRQ | IORESOURCE_DMA);
  
-       res->name = NULL;
-       res->flags = type | IORESOURCE_AUTO | IORESOURCE_UNSET;
-       if (type == IORESOURCE_IRQ || type == IORESOURCE_DMA) {
-               res->start = -1;
-               res->end = -1;
-       } else {
-               res->start = 0;
-               res->end = 0;
-       }
+__add:
+       pnp_add_dma_resource(dev, res->start, res->flags);
+       return 0;
  }
  
-/**
- * pnp_init_resources - Resets a resource table to default values.
- * @table: pointer to the desired resource table
- */
  void pnp_init_resources(struct pnp_dev *dev)
  {
-       struct resource *res;
-       int idx;
-
-       for (idx = 0; idx < PNP_MAX_IRQ; idx++) {
-               res = &dev->res->irq[idx].res;
-               res->flags = IORESOURCE_IRQ;
-               pnp_init_resource(res);
-       }
-       for (idx = 0; idx < PNP_MAX_DMA; idx++) {
-               res = &dev->res->dma[idx].res;
-               res->flags = IORESOURCE_DMA;
-               pnp_init_resource(res);
-       }
-       for (idx = 0; idx < PNP_MAX_PORT; idx++) {
-               res = &dev->res->port[idx].res;
-               res->flags = IORESOURCE_IO;
-               pnp_init_resource(res);
-       }
-       for (idx = 0; idx < PNP_MAX_MEM; idx++) {
-               res = &dev->res->mem[idx].res;
-               res->flags = IORESOURCE_MEM;
-               pnp_init_resource(res);
-       }
+       pnp_free_resources(dev);
  }
  
-/**
- * pnp_clean_resources - clears resources that were not manually set
- * @res: the resources to clean
- */
  static void pnp_clean_resource_table(struct pnp_dev *dev)
  {
-       struct resource *res;
-       int idx;
-
-       for (idx = 0; idx < PNP_MAX_IRQ; idx++) {
-               res = &dev->res->irq[idx].res;
-               if (res->flags & IORESOURCE_AUTO) {
-                       res->flags = IORESOURCE_IRQ;
-                       pnp_init_resource(res);
-               }
-       }
-       for (idx = 0; idx < PNP_MAX_DMA; idx++) {
-               res = &dev->res->dma[idx].res;
-               if (res->flags & IORESOURCE_AUTO) {
-                       res->flags = IORESOURCE_DMA;
-                       pnp_init_resource(res);
-               }
-       }
-       for (idx = 0; idx < PNP_MAX_PORT; idx++) {
-               res = &dev->res->port[idx].res;
-               if (res->flags & IORESOURCE_AUTO) {
-                       res->flags = IORESOURCE_IO;
-                       pnp_init_resource(res);
-               }
-       }
-       for (idx = 0; idx < PNP_MAX_MEM; idx++) {
-               res = &dev->res->mem[idx].res;
-               if (res->flags & IORESOURCE_AUTO) {
-                       res->flags = IORESOURCE_MEM;
-                       pnp_init_resource(res);
-               }
+       struct pnp_resource *pnp_res, *tmp;
+
+       list_for_each_entry_safe(pnp_res, tmp, &dev->resources, list) {
+               if (pnp_res->res.flags & IORESOURCE_AUTO)
+                       pnp_free_resource(pnp_res);
         }
  }
  
  /**
   * pnp_assign_resources - assigns resources to the device based on the specified dependent number
   * @dev: pointer to the desired device
- * @depnum: the dependent function number
- *
- * Only set depnum to 0 if the device does not have dependent options.
+ * @set: the dependent function number
   */
-static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
+static int pnp_assign_resources(struct pnp_dev *dev, int set)
  {
-       struct pnp_port *port;
-       struct pnp_mem *mem;
-       struct pnp_irq *irq;
-       struct pnp_dma *dma;
+       struct pnp_option *option;
         int nport = 0, nmem = 0, nirq = 0, ndma = 0;
+       int ret = 0;
  
-       if (!pnp_can_configure(dev))
-               return -ENODEV;
-
-       dbg_pnp_show_resources(dev, "before pnp_assign_resources");
+       dev_dbg(&dev->dev, "pnp_assign_resources, try dependent set %d\n", set);
         mutex_lock(&pnp_res_mutex);
         pnp_clean_resource_table(dev);
-       if (dev->independent) {
-               dev_dbg(&dev->dev, "assigning independent options\n");
-               port = dev->independent->port;
-               mem = dev->independent->mem;
-               irq = dev->independent->irq;
-               dma = dev->independent->dma;
-               while (port) {
-                       if (!pnp_assign_port(dev, port, nport))
-                               goto fail;
-                       nport++;
-                       port = port->next;
-               }
-               while (mem) {
-                       if (!pnp_assign_mem(dev, mem, nmem))
-                               goto fail;
-                       nmem++;
-                       mem = mem->next;
-               }
-               while (irq) {
-                       if (!pnp_assign_irq(dev, irq, nirq))
-                               goto fail;
-                       nirq++;
-                       irq = irq->next;
-               }
-               while (dma) {
-                       pnp_assign_dma(dev, dma, ndma);
-                       ndma++;
-                       dma = dma->next;
-               }
-       }
  
-       if (depnum) {
-               struct pnp_option *dep;
-               int i;
-
-               dev_dbg(&dev->dev, "assigning dependent option %d\n", depnum);
-               for (i = 1, dep = dev->dependent; i < depnum;
-                    i++, dep = dep->next)
-                       if (!dep)
-                               goto fail;
-               port = dep->port;
-               mem = dep->mem;
-               irq = dep->irq;
-               dma = dep->dma;
-               while (port) {
-                       if (!pnp_assign_port(dev, port, nport))
-                               goto fail;
-                       nport++;
-                       port = port->next;
-               }
-               while (mem) {
-                       if (!pnp_assign_mem(dev, mem, nmem))
-                               goto fail;
-                       nmem++;
-                       mem = mem->next;
-               }
-               while (irq) {
-                       if (!pnp_assign_irq(dev, irq, nirq))
-                               goto fail;
-                       nirq++;
-                       irq = irq->next;
+       list_for_each_entry(option, &dev->options, list) {
+               if (pnp_option_is_dependent(option) &&
+                   pnp_option_set(option) != set)
+                               continue;
+
+               switch (option->type) {
+               case IORESOURCE_IO:
+                       ret = pnp_assign_port(dev, &option->u.port, nport++);
+                       break;
+               case IORESOURCE_MEM:
+                       ret = pnp_assign_mem(dev, &option->u.mem, nmem++);
+                       break;
+               case IORESOURCE_IRQ:
+                       ret = pnp_assign_irq(dev, &option->u.irq, nirq++);
+                       break;
+               case IORESOURCE_DMA:
+                       ret = pnp_assign_dma(dev, &option->u.dma, ndma++);
+                       break;
+               default:
+                       ret = -EINVAL;
+                       break;
                 }
-               while (dma) {
-                       pnp_assign_dma(dev, dma, ndma);
-                       ndma++;
-                       dma = dma->next;
-               }
-       } else if (dev->dependent)
-               goto fail;
-
-       mutex_unlock(&pnp_res_mutex);
-       dbg_pnp_show_resources(dev, "after pnp_assign_resources");
-       return 1;
+               if (ret < 0)
+                       break;
+       }
  
-fail:
-       pnp_clean_resource_table(dev);
         mutex_unlock(&pnp_res_mutex);
-       dbg_pnp_show_resources(dev, "after pnp_assign_resources (failed)");
-       return 0;
+       if (ret < 0) {
+               dev_dbg(&dev->dev, "pnp_assign_resources failed (%d)\n", ret);
+               pnp_clean_resource_table(dev);
+       } else
+               dbg_pnp_show_resources(dev, "pnp_assign_resources succeeded");
+       return ret;
  }
  
  /**
@@ -431,29 +283,25 @@ fail:
   */
  int pnp_auto_config_dev(struct pnp_dev *dev)
  {
-       struct pnp_option *dep;
-       int i = 1;
+       int i, ret;
  
         if (!pnp_can_configure(dev)) {
                 dev_dbg(&dev->dev, "configuration not supported\n");
                 return -ENODEV;
         }
  
-       if (!dev->dependent) {
-               if (pnp_assign_resources(dev, 0))
+       ret = pnp_assign_resources(dev, 0);
+       if (ret == 0)
+               return 0;
+
+       for (i = 1; i < dev->num_dependent_sets; i++) {
+               ret = pnp_assign_resources(dev, i);
+               if (ret == 0)
                         return 0;
-       } else {
-               dep = dev->dependent;
-               do {
-                       if (pnp_assign_resources(dev, i))
-                               return 0;
-                       dep = dep->next;
-                       i++;
-               } while (dep);
         }
  
         dev_err(&dev->dev, "unable to assign resources\n");
-       return -EBUSY;
+       return ret;
  }
  
  /**
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c

index 50902773beaf9092fd0a241d535835a8839c843b..c1b9ea34977b395c400fcd522ecd41f06fadbd21 100644 (file)
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -117,9 +117,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
  {
         int power_state;
  
-       power_state = acpi_pm_device_sleep_state(&dev->dev,
-                                               device_may_wakeup(&dev->dev),
-                                               NULL);
+       power_state = acpi_pm_device_sleep_state(&dev->dev, NULL);
         if (power_state < 0)
                 power_state = (state.event == PM_EVENT_ON) ?
                                 ACPI_STATE_D0 : ACPI_STATE_D3;
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c

index 46c791adb8947dbf4e12005199f9a7179e35e8d6..d7e9f2152df0913305495ee7c7c25f36cdbb9f18 100644 (file)
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -3,6 +3,8 @@
   *
   * Copyright (c) 2004 Matthieu Castet <castet.matthieu@free.fr>
   * Copyright (c) 2004 Li Shaohua <shaohua.li@intel.com>
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   *
   * This program is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License as published by the
@@ -98,8 +100,10 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev,
         int irq, flags;
         int p, t;
  
-       if (!valid_IRQ(gsi))
+       if (!valid_IRQ(gsi)) {
+               pnp_add_irq_resource(dev, gsi, IORESOURCE_DISABLED);
                 return;
+       }
  
         /*
          * in IO-APIC mode, use overrided attribute. Two reasons:
@@ -178,13 +182,68 @@ static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 start,
         u64 end = start + len - 1;
  
         if (io_decode == ACPI_DECODE_16)
-               flags |= PNP_PORT_FLAG_16BITADDR;
+               flags |= IORESOURCE_IO_16BIT_ADDR;
         if (len == 0 || end >= 0x10003)
                 flags |= IORESOURCE_DISABLED;
  
         pnp_add_io_resource(dev, start, end, flags);
  }
  
+/*
+ * Device CSRs that do not appear in PCI config space should be described
+ * via ACPI.  This would normally be done with Address Space Descriptors
+ * marked as "consumer-only," but old versions of Windows and Linux ignore
+ * the producer/consumer flag, so HP invented a vendor-defined resource to
+ * describe the location and size of CSR space.
+ */
+static struct acpi_vendor_uuid hp_ccsr_uuid = {
+       .subtype = 2,
+       .data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a,
+           0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad },
+};
+
+static int vendor_resource_matches(struct pnp_dev *dev,
+                                  struct acpi_resource_vendor_typed *vendor,
+                                  struct acpi_vendor_uuid *match,
+                                  int expected_len)
+{
+       int uuid_len = sizeof(vendor->uuid);
+       u8 uuid_subtype = vendor->uuid_subtype;
+       u8 *uuid = vendor->uuid;
+       int actual_len;
+
+       /* byte_length includes uuid_subtype and uuid */
+       actual_len = vendor->byte_length - uuid_len - 1;
+
+       if (uuid_subtype == match->subtype &&
+           uuid_len == sizeof(match->data) &&
+           memcmp(uuid, match->data, uuid_len) == 0) {
+               if (expected_len && expected_len != actual_len) {
+                       dev_err(&dev->dev, "wrong vendor descriptor size; "
+                               "expected %d, found %d bytes\n",
+                               expected_len, actual_len);
+                       return 0;
+               }
+
+               return 1;
+       }
+
+       return 0;
+}
+
+static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev,
+                                   struct acpi_resource_vendor_typed *vendor)
+{
+       if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) {
+               u64 start, length;
+
+               memcpy(&start, vendor->byte_data, sizeof(start));
+               memcpy(&length, vendor->byte_data + 8, sizeof(length));
+
+               pnp_add_mem_resource(dev, start, start + length - 1, 0);
+       }
+}
+
  static void pnpacpi_parse_allocated_memresource(struct pnp_dev *dev,
                                                 u64 start, u64 len,
                                                 int write_protect)
@@ -235,6 +294,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
         struct acpi_resource_dma *dma;
         struct acpi_resource_io *io;
         struct acpi_resource_fixed_io *fixed_io;
+       struct acpi_resource_vendor_typed *vendor_typed;
         struct acpi_resource_memory24 *memory24;
         struct acpi_resource_memory32 *memory32;
         struct acpi_resource_fixed_memory32 *fixed_memory32;
@@ -248,24 +308,39 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
                  * _CRS, but some firmware violates this, so parse them all.
                  */
                 irq = &res->data.irq;
-               for (i = 0; i < irq->interrupt_count; i++) {
-                       pnpacpi_parse_allocated_irqresource(dev,
-                               irq->interrupts[i],
-                               irq->triggering,
-                               irq->polarity,
-                               irq->sharable);
+               if (irq->interrupt_count == 0)
+                       pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED);
+               else {
+                       for (i = 0; i < irq->interrupt_count; i++) {
+                               pnpacpi_parse_allocated_irqresource(dev,
+                                       irq->interrupts[i],
+                                       irq->triggering,
+                                       irq->polarity,
+                                   irq->sharable);
+                       }
+
+                       /*
+                        * The IRQ encoder puts a single interrupt in each
+                        * descriptor, so if a _CRS descriptor has more than
+                        * one interrupt, we won't be able to re-encode it.
+                        */
+                       if (pnp_can_write(dev) && irq->interrupt_count > 1) {
+                               dev_warn(&dev->dev, "multiple interrupts in "
+                                        "_CRS descriptor; configuration can't "
+                                        "be changed\n");
+                               dev->capabilities &= ~PNP_WRITE;
+                       }
                 }
                 break;
  
         case ACPI_RESOURCE_TYPE_DMA:
                 dma = &res->data.dma;
-               if (dma->channel_count > 0) {
+               if (dma->channel_count > 0 && dma->channels[0] != (u8) -1)
                         flags = dma_flags(dma->type, dma->bus_master,
                                           dma->transfer);
-                       if (dma->channels[0] == (u8) -1)
-                               flags |= IORESOURCE_DISABLED;
-                       pnp_add_dma_resource(dev, dma->channels[0], flags);
-               }
+               else
+                       flags = IORESOURCE_DISABLED;
+               pnp_add_dma_resource(dev, dma->channels[0], flags);
                 break;
  
         case ACPI_RESOURCE_TYPE_IO:
@@ -289,6 +364,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
                 break;
  
         case ACPI_RESOURCE_TYPE_VENDOR:
+               vendor_typed = &res->data.vendor_typed;
+               pnpacpi_parse_allocated_vendor(dev, vendor_typed);
                 break;
  
         case ACPI_RESOURCE_TYPE_END_TAG:
@@ -331,12 +408,29 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
                 if (extended_irq->producer_consumer == ACPI_PRODUCER)
                         return AE_OK;
  
-               for (i = 0; i < extended_irq->interrupt_count; i++) {
-                       pnpacpi_parse_allocated_irqresource(dev,
-                               extended_irq->interrupts[i],
-                               extended_irq->triggering,
-                               extended_irq->polarity,
-                               extended_irq->sharable);
+               if (extended_irq->interrupt_count == 0)
+                       pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED);
+               else {
+                       for (i = 0; i < extended_irq->interrupt_count; i++) {
+                               pnpacpi_parse_allocated_irqresource(dev,
+                                       extended_irq->interrupts[i],
+                                       extended_irq->triggering,
+                                       extended_irq->polarity,
+                                       extended_irq->sharable);
+                       }
+
+                       /*
+                        * The IRQ encoder puts a single interrupt in each
+                        * descriptor, so if a _CRS descriptor has more than
+                        * one interrupt, we won't be able to re-encode it.
+                        */
+                       if (pnp_can_write(dev) &&
+                           extended_irq->interrupt_count > 1) {
+                               dev_warn(&dev->dev, "multiple interrupts in "
+                                        "_CRS descriptor; configuration can't "
+                                        "be changed\n");
+                               dev->capabilities &= ~PNP_WRITE;
+                       }
                 }
                 break;
  
@@ -373,179 +467,147 @@ int pnpacpi_parse_allocated_resource(struct pnp_dev *dev)
  }
  
  static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev,
-                                           struct pnp_option *option,
+                                           unsigned int option_flags,
                                             struct acpi_resource_dma *p)
  {
         int i;
-       struct pnp_dma *dma;
+       unsigned char map = 0, flags;
  
         if (p->channel_count == 0)
                 return;
-       dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
-       if (!dma)
-               return;
  
         for (i = 0; i < p->channel_count; i++)
-               dma->map |= 1 << p->channels[i];
-
-       dma->flags = dma_flags(p->type, p->bus_master, p->transfer);
+               map |= 1 << p->channels[i];
  
-       pnp_register_dma_resource(dev, option, dma);
+       flags = dma_flags(p->type, p->bus_master, p->transfer);
+       pnp_register_dma_resource(dev, option_flags, map, flags);
  }
  
  static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev,
-                                           struct pnp_option *option,
+                                           unsigned int option_flags,
                                             struct acpi_resource_irq *p)
  {
         int i;
-       struct pnp_irq *irq;
+       pnp_irq_mask_t map;
+       unsigned char flags;
  
         if (p->interrupt_count == 0)
                 return;
-       irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
-       if (!irq)
-               return;
  
+       bitmap_zero(map.bits, PNP_IRQ_NR);
         for (i = 0; i < p->interrupt_count; i++)
                 if (p->interrupts[i])
-                       __set_bit(p->interrupts[i], irq->map);
-       irq->flags = irq_flags(p->triggering, p->polarity, p->sharable);
+                       __set_bit(p->interrupts[i], map.bits);
  
-       pnp_register_irq_resource(dev, option, irq);
+       flags = irq_flags(p->triggering, p->polarity, p->sharable);
+       pnp_register_irq_resource(dev, option_flags, &map, flags);
  }
  
  static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev,
-                                               struct pnp_option *option,
+                                       unsigned int option_flags,
                                         struct acpi_resource_extended_irq *p)
  {
         int i;
-       struct pnp_irq *irq;
+       pnp_irq_mask_t map;
+       unsigned char flags;
  
         if (p->interrupt_count == 0)
                 return;
-       irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
-       if (!irq)
-               return;
  
-       for (i = 0; i < p->interrupt_count; i++)
-               if (p->interrupts[i])
-                       __set_bit(p->interrupts[i], irq->map);
-       irq->flags = irq_flags(p->triggering, p->polarity, p->sharable);
+       bitmap_zero(map.bits, PNP_IRQ_NR);
+       for (i = 0; i < p->interrupt_count; i++) {
+               if (p->interrupts[i]) {
+                       if (p->interrupts[i] < PNP_IRQ_NR)
+                               __set_bit(p->interrupts[i], map.bits);
+                       else
+                               dev_err(&dev->dev, "ignoring IRQ %d option "
+                                       "(too large for %d entry bitmap)\n",
+                                       p->interrupts[i], PNP_IRQ_NR);
+               }
+       }
  
-       pnp_register_irq_resource(dev, option, irq);
+       flags = irq_flags(p->triggering, p->polarity, p->sharable);
+       pnp_register_irq_resource(dev, option_flags, &map, flags);
  }
  
  static __init void pnpacpi_parse_port_option(struct pnp_dev *dev,
-                                            struct pnp_option *option,
+                                            unsigned int option_flags,
                                              struct acpi_resource_io *io)
  {
-       struct pnp_port *port;
+       unsigned char flags = 0;
  
         if (io->address_length == 0)
                 return;
-       port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-       if (!port)
-               return;
-       port->min = io->minimum;
-       port->max = io->maximum;
-       port->align = io->alignment;
-       port->size = io->address_length;
-       port->flags = ACPI_DECODE_16 == io->io_decode ?
-           PNP_PORT_FLAG_16BITADDR : 0;
-       pnp_register_port_resource(dev, option, port);
+
+       if (io->io_decode == ACPI_DECODE_16)
+               flags = IORESOURCE_IO_16BIT_ADDR;
+       pnp_register_port_resource(dev, option_flags, io->minimum, io->maximum,
+                                  io->alignment, io->address_length, flags);
  }
  
  static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev,
-                                                  struct pnp_option *option,
+                                       unsigned int option_flags,
                                         struct acpi_resource_fixed_io *io)
  {
-       struct pnp_port *port;
-
         if (io->address_length == 0)
                 return;
-       port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-       if (!port)
-               return;
-       port->min = port->max = io->address;
-       port->size = io->address_length;
-       port->align = 0;
-       port->flags = PNP_PORT_FLAG_FIXED;
-       pnp_register_port_resource(dev, option, port);
+
+       pnp_register_port_resource(dev, option_flags, io->address, io->address,
+                                  0, io->address_length, IORESOURCE_IO_FIXED);
  }
  
  static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev,
-                                             struct pnp_option *option,
+                                             unsigned int option_flags,
                                               struct acpi_resource_memory24 *p)
  {
-       struct pnp_mem *mem;
+       unsigned char flags = 0;
  
         if (p->address_length == 0)
                 return;
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = p->minimum;
-       mem->max = p->maximum;
-       mem->align = p->alignment;
-       mem->size = p->address_length;
-
-       mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
-           IORESOURCE_MEM_WRITEABLE : 0;
  
-       pnp_register_mem_resource(dev, option, mem);
+       if (p->write_protect == ACPI_READ_WRITE_MEMORY)
+               flags = IORESOURCE_MEM_WRITEABLE;
+       pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum,
+                                 p->alignment, p->address_length, flags);
  }
  
  static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev,
-                                             struct pnp_option *option,
+                                             unsigned int option_flags,
                                               struct acpi_resource_memory32 *p)
  {
-       struct pnp_mem *mem;
+       unsigned char flags = 0;
  
         if (p->address_length == 0)
                 return;
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = p->minimum;
-       mem->max = p->maximum;
-       mem->align = p->alignment;
-       mem->size = p->address_length;
-
-       mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
-           IORESOURCE_MEM_WRITEABLE : 0;
  
-       pnp_register_mem_resource(dev, option, mem);
+       if (p->write_protect == ACPI_READ_WRITE_MEMORY)
+               flags = IORESOURCE_MEM_WRITEABLE;
+       pnp_register_mem_resource(dev, option_flags, p->minimum, p->maximum,
+                                 p->alignment, p->address_length, flags);
  }
  
  static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev,
-                                                   struct pnp_option *option,
+                                       unsigned int option_flags,
                                         struct acpi_resource_fixed_memory32 *p)
  {
-       struct pnp_mem *mem;
+       unsigned char flags = 0;
  
         if (p->address_length == 0)
                 return;
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = mem->max = p->address;
-       mem->size = p->address_length;
-       mem->align = 0;
-
-       mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
-           IORESOURCE_MEM_WRITEABLE : 0;
  
-       pnp_register_mem_resource(dev, option, mem);
+       if (p->write_protect == ACPI_READ_WRITE_MEMORY)
+               flags = IORESOURCE_MEM_WRITEABLE;
+       pnp_register_mem_resource(dev, option_flags, p->address, p->address,
+                                 0, p->address_length, flags);
  }
  
  static __init void pnpacpi_parse_address_option(struct pnp_dev *dev,
-                                               struct pnp_option *option,
+                                               unsigned int option_flags,
                                                 struct acpi_resource *r)
  {
         struct acpi_resource_address64 addr, *p = &addr;
         acpi_status status;
-       struct pnp_mem *mem;
-       struct pnp_port *port;
+       unsigned char flags = 0;
  
         status = acpi_resource_to_address64(r, p);
         if (!ACPI_SUCCESS(status)) {
@@ -558,49 +620,37 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev,
                 return;
  
         if (p->resource_type == ACPI_MEMORY_RANGE) {
-               mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-               if (!mem)
-                       return;
-               mem->min = mem->max = p->minimum;
-               mem->size = p->address_length;
-               mem->align = 0;
-               mem->flags = (p->info.mem.write_protect ==
-                             ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE
-                   : 0;
-               pnp_register_mem_resource(dev, option, mem);
-       } else if (p->resource_type == ACPI_IO_RANGE) {
-               port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-               if (!port)
-                       return;
-               port->min = port->max = p->minimum;
-               port->size = p->address_length;
-               port->align = 0;
-               port->flags = PNP_PORT_FLAG_FIXED;
-               pnp_register_port_resource(dev, option, port);
-       }
+               if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY)
+                       flags = IORESOURCE_MEM_WRITEABLE;
+               pnp_register_mem_resource(dev, option_flags, p->minimum,
+                                         p->minimum, 0, p->address_length,
+                                         flags);
+       } else if (p->resource_type == ACPI_IO_RANGE)
+               pnp_register_port_resource(dev, option_flags, p->minimum,
+                                          p->minimum, 0, p->address_length,
+                                          IORESOURCE_IO_FIXED);
  }
  
  struct acpipnp_parse_option_s {
-       struct pnp_option *option;
-       struct pnp_option *option_independent;
         struct pnp_dev *dev;
+       unsigned int option_flags;
  };
  
  static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res,
                                                   void *data)
  {
-       int priority = 0;
+       int priority;
         struct acpipnp_parse_option_s *parse_data = data;
         struct pnp_dev *dev = parse_data->dev;
-       struct pnp_option *option = parse_data->option;
+       unsigned int option_flags = parse_data->option_flags;
  
         switch (res->type) {
         case ACPI_RESOURCE_TYPE_IRQ:
-               pnpacpi_parse_irq_option(dev, option, &res->data.irq);
+               pnpacpi_parse_irq_option(dev, option_flags, &res->data.irq);
                 break;
  
         case ACPI_RESOURCE_TYPE_DMA:
-               pnpacpi_parse_dma_option(dev, option, &res->data.dma);
+               pnpacpi_parse_dma_option(dev, option_flags, &res->data.dma);
                 break;
  
         case ACPI_RESOURCE_TYPE_START_DEPENDENT:
@@ -620,31 +670,19 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res,
                         priority = PNP_RES_PRIORITY_INVALID;
                         break;
                 }
-               /* TBD: Consider performance/robustness bits */
-               option = pnp_register_dependent_option(dev, priority);
-               if (!option)
-                       return AE_ERROR;
-               parse_data->option = option;
+               parse_data->option_flags = pnp_new_dependent_set(dev, priority);
                 break;
  
         case ACPI_RESOURCE_TYPE_END_DEPENDENT:
-               /*only one EndDependentFn is allowed */
-               if (!parse_data->option_independent) {
-                       dev_warn(&dev->dev, "more than one EndDependentFn "
-                                "in _PRS\n");
-                       return AE_ERROR;
-               }
-               parse_data->option = parse_data->option_independent;
-               parse_data->option_independent = NULL;
-               dev_dbg(&dev->dev, "end dependent options\n");
+               parse_data->option_flags = 0;
                 break;
  
         case ACPI_RESOURCE_TYPE_IO:
-               pnpacpi_parse_port_option(dev, option, &res->data.io);
+               pnpacpi_parse_port_option(dev, option_flags, &res->data.io);
                 break;
  
         case ACPI_RESOURCE_TYPE_FIXED_IO:
-               pnpacpi_parse_fixed_port_option(dev, option,
+               pnpacpi_parse_fixed_port_option(dev, option_flags,
                                                 &res->data.fixed_io);
                 break;
  
@@ -653,29 +691,31 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res,
                 break;
  
         case ACPI_RESOURCE_TYPE_MEMORY24:
-               pnpacpi_parse_mem24_option(dev, option, &res->data.memory24);
+               pnpacpi_parse_mem24_option(dev, option_flags,
+                                          &res->data.memory24);
                 break;
  
         case ACPI_RESOURCE_TYPE_MEMORY32:
-               pnpacpi_parse_mem32_option(dev, option, &res->data.memory32);
+               pnpacpi_parse_mem32_option(dev, option_flags,
+                                          &res->data.memory32);
                 break;
  
         case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
-               pnpacpi_parse_fixed_mem32_option(dev, option,
+               pnpacpi_parse_fixed_mem32_option(dev, option_flags,
                                                  &res->data.fixed_memory32);
                 break;
  
         case ACPI_RESOURCE_TYPE_ADDRESS16:
         case ACPI_RESOURCE_TYPE_ADDRESS32:
         case ACPI_RESOURCE_TYPE_ADDRESS64:
-               pnpacpi_parse_address_option(dev, option, res);
+               pnpacpi_parse_address_option(dev, option_flags, res);
                 break;
  
         case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
                 break;
  
         case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
-               pnpacpi_parse_ext_irq_option(dev, option,
+               pnpacpi_parse_ext_irq_option(dev, option_flags,
                                              &res->data.extended_irq);
                 break;
  
@@ -699,12 +739,9 @@ int __init pnpacpi_parse_resource_option_data(struct pnp_dev *dev)
  
         dev_dbg(&dev->dev, "parse resource options\n");
  
-       parse_data.option = pnp_register_independent_option(dev);
-       if (!parse_data.option)
-               return -ENOMEM;
-
-       parse_data.option_independent = parse_data.option;
         parse_data.dev = dev;
+       parse_data.option_flags = 0;
+
         status = acpi_walk_resources(handle, METHOD_NAME__PRS,
                                      pnpacpi_option_resource, &parse_data);
  
@@ -806,6 +843,13 @@ static void pnpacpi_encode_irq(struct pnp_dev *dev,
         struct acpi_resource_irq *irq = &resource->data.irq;
         int triggering, polarity, shareable;
  
+       if (!pnp_resource_enabled(p)) {
+               irq->interrupt_count = 0;
+               dev_dbg(&dev->dev, "  encode irq (%s)\n",
+                       p ? "disabled" : "missing");
+               return;
+       }
+
         decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable);
         irq->triggering = triggering;
         irq->polarity = polarity;
@@ -828,6 +872,13 @@ static void pnpacpi_encode_ext_irq(struct pnp_dev *dev,
         struct acpi_resource_extended_irq *extended_irq = &resource->data.extended_irq;
         int triggering, polarity, shareable;
  
+       if (!pnp_resource_enabled(p)) {
+               extended_irq->interrupt_count = 0;
+               dev_dbg(&dev->dev, "  encode extended irq (%s)\n",
+                       p ? "disabled" : "missing");
+               return;
+       }
+
         decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable);
         extended_irq->producer_consumer = ACPI_CONSUMER;
         extended_irq->triggering = triggering;
@@ -848,6 +899,13 @@ static void pnpacpi_encode_dma(struct pnp_dev *dev,
  {
         struct acpi_resource_dma *dma = &resource->data.dma;
  
+       if (!pnp_resource_enabled(p)) {
+               dma->channel_count = 0;
+               dev_dbg(&dev->dev, "  encode dma (%s)\n",
+                       p ? "disabled" : "missing");
+               return;
+       }
+
         /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */
         switch (p->flags & IORESOURCE_DMA_SPEED_MASK) {
         case IORESOURCE_DMA_TYPEA:
@@ -889,17 +947,21 @@ static void pnpacpi_encode_io(struct pnp_dev *dev,
  {
         struct acpi_resource_io *io = &resource->data.io;
  
-       /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */
-       io->io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ?
-           ACPI_DECODE_16 : ACPI_DECODE_10;
-       io->minimum = p->start;
-       io->maximum = p->end;
-       io->alignment = 0;      /* Correct? */
-       io->address_length = p->end - p->start + 1;
-
-       dev_dbg(&dev->dev, "  encode io %#llx-%#llx decode %#x\n",
-               (unsigned long long) p->start, (unsigned long long) p->end,
-               io->io_decode);
+       if (pnp_resource_enabled(p)) {
+               /* Note: pnp_assign_port copies pnp_port->flags into p->flags */
+               io->io_decode = (p->flags & IORESOURCE_IO_16BIT_ADDR) ?
+                   ACPI_DECODE_16 : ACPI_DECODE_10;
+               io->minimum = p->start;
+               io->maximum = p->end;
+               io->alignment = 0;      /* Correct? */
+               io->address_length = p->end - p->start + 1;
+       } else {
+               io->minimum = 0;
+               io->address_length = 0;
+       }
+
+       dev_dbg(&dev->dev, "  encode io %#x-%#x decode %#x\n", io->minimum,
+               io->minimum + io->address_length - 1, io->io_decode);
  }
  
  static void pnpacpi_encode_fixed_io(struct pnp_dev *dev,
@@ -908,11 +970,16 @@ static void pnpacpi_encode_fixed_io(struct pnp_dev *dev,
  {
         struct acpi_resource_fixed_io *fixed_io = &resource->data.fixed_io;
  
-       fixed_io->address = p->start;
-       fixed_io->address_length = p->end - p->start + 1;
+       if (pnp_resource_enabled(p)) {
+               fixed_io->address = p->start;
+               fixed_io->address_length = p->end - p->start + 1;
+       } else {
+               fixed_io->address = 0;
+               fixed_io->address_length = 0;
+       }
  
-       dev_dbg(&dev->dev, "  encode fixed_io %#llx-%#llx\n",
-               (unsigned long long) p->start, (unsigned long long) p->end);
+       dev_dbg(&dev->dev, "  encode fixed_io %#x-%#x\n", fixed_io->address,
+               fixed_io->address + fixed_io->address_length - 1);
  }
  
  static void pnpacpi_encode_mem24(struct pnp_dev *dev,
@@ -921,17 +988,22 @@ static void pnpacpi_encode_mem24(struct pnp_dev *dev,
  {
         struct acpi_resource_memory24 *memory24 = &resource->data.memory24;
  
-       /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */
-       memory24->write_protect =
-           (p->flags & IORESOURCE_MEM_WRITEABLE) ?
-           ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
-       memory24->minimum = p->start;
-       memory24->maximum = p->end;
-       memory24->alignment = 0;
-       memory24->address_length = p->end - p->start + 1;
-
-       dev_dbg(&dev->dev, "  encode mem24 %#llx-%#llx write_protect %#x\n",
-               (unsigned long long) p->start, (unsigned long long) p->end,
+       if (pnp_resource_enabled(p)) {
+               /* Note: pnp_assign_mem copies pnp_mem->flags into p->flags */
+               memory24->write_protect = p->flags & IORESOURCE_MEM_WRITEABLE ?
+                   ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
+               memory24->minimum = p->start;
+               memory24->maximum = p->end;
+               memory24->alignment = 0;
+               memory24->address_length = p->end - p->start + 1;
+       } else {
+               memory24->minimum = 0;
+               memory24->address_length = 0;
+       }
+
+       dev_dbg(&dev->dev, "  encode mem24 %#x-%#x write_protect %#x\n",
+               memory24->minimum,
+               memory24->minimum + memory24->address_length - 1,
                 memory24->write_protect);
  }
  
@@ -941,16 +1013,21 @@ static void pnpacpi_encode_mem32(struct pnp_dev *dev,
  {
         struct acpi_resource_memory32 *memory32 = &resource->data.memory32;
  
-       memory32->write_protect =
-           (p->flags & IORESOURCE_MEM_WRITEABLE) ?
-           ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
-       memory32->minimum = p->start;
-       memory32->maximum = p->end;
-       memory32->alignment = 0;
-       memory32->address_length = p->end - p->start + 1;
+       if (pnp_resource_enabled(p)) {
+               memory32->write_protect = p->flags & IORESOURCE_MEM_WRITEABLE ?
+                   ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
+               memory32->minimum = p->start;
+               memory32->maximum = p->end;
+               memory32->alignment = 0;
+               memory32->address_length = p->end - p->start + 1;
+       } else {
+               memory32->minimum = 0;
+               memory32->alignment = 0;
+       }
  
-       dev_dbg(&dev->dev, "  encode mem32 %#llx-%#llx write_protect %#x\n",
-               (unsigned long long) p->start, (unsigned long long) p->end,
+       dev_dbg(&dev->dev, "  encode mem32 %#x-%#x write_protect %#x\n",
+               memory32->minimum,
+               memory32->minimum + memory32->address_length - 1,
                 memory32->write_protect);
  }
  
@@ -960,15 +1037,20 @@ static void pnpacpi_encode_fixed_mem32(struct pnp_dev *dev,
  {
         struct acpi_resource_fixed_memory32 *fixed_memory32 = &resource->data.fixed_memory32;
  
-       fixed_memory32->write_protect =
-           (p->flags & IORESOURCE_MEM_WRITEABLE) ?
-           ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
-       fixed_memory32->address = p->start;
-       fixed_memory32->address_length = p->end - p->start + 1;
+       if (pnp_resource_enabled(p)) {
+               fixed_memory32->write_protect =
+                   p->flags & IORESOURCE_MEM_WRITEABLE ?
+                   ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
+               fixed_memory32->address = p->start;
+               fixed_memory32->address_length = p->end - p->start + 1;
+       } else {
+               fixed_memory32->address = 0;
+               fixed_memory32->address_length = 0;
+       }
  
-       dev_dbg(&dev->dev, "  encode fixed_mem32 %#llx-%#llx "
-               "write_protect %#x\n",
-               (unsigned long long) p->start, (unsigned long long) p->end,
+       dev_dbg(&dev->dev, "  encode fixed_mem32 %#x-%#x write_protect %#x\n",
+               fixed_memory32->address,
+               fixed_memory32->address + fixed_memory32->address_length - 1,
                 fixed_memory32->write_protect);
  }
  
diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c

index 5ff9a4c0447ec8052e0de9a433255051826e1298..ca567671379e73985863538650f37609b360fc32 100644 (file)
--- a/drivers/pnp/pnpbios/rsparser.c
+++ b/drivers/pnp/pnpbios/rsparser.c
@@ -216,137 +216,116 @@ len_err:
  
  static __init void pnpbios_parse_mem_option(struct pnp_dev *dev,
                                             unsigned char *p, int size,
-                                           struct pnp_option *option)
+                                           unsigned int option_flags)
  {
-       struct pnp_mem *mem;
-
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = ((p[5] << 8) | p[4]) << 8;
-       mem->max = ((p[7] << 8) | p[6]) << 8;
-       mem->align = (p[9] << 8) | p[8];
-       mem->size = ((p[11] << 8) | p[10]) << 8;
-       mem->flags = p[3];
-       pnp_register_mem_resource(dev, option, mem);
+       resource_size_t min, max, align, len;
+       unsigned char flags;
+
+       min = ((p[5] << 8) | p[4]) << 8;
+       max = ((p[7] << 8) | p[6]) << 8;
+       align = (p[9] << 8) | p[8];
+       len = ((p[11] << 8) | p[10]) << 8;
+       flags = p[3];
+       pnp_register_mem_resource(dev, option_flags, min, max, align, len,
+                                 flags);
  }
  
  static __init void pnpbios_parse_mem32_option(struct pnp_dev *dev,
                                               unsigned char *p, int size,
-                                             struct pnp_option *option)
+                                             unsigned int option_flags)
  {
-       struct pnp_mem *mem;
-
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4];
-       mem->max = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8];
-       mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12];
-       mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16];
-       mem->flags = p[3];
-       pnp_register_mem_resource(dev, option, mem);
+       resource_size_t min, max, align, len;
+       unsigned char flags;
+
+       min = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4];
+       max = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8];
+       align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12];
+       len = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16];
+       flags = p[3];
+       pnp_register_mem_resource(dev, option_flags, min, max, align, len,
+                                 flags);
  }
  
  static __init void pnpbios_parse_fixed_mem32_option(struct pnp_dev *dev,
                                                     unsigned char *p, int size,
-                                                   struct pnp_option *option)
+                                                   unsigned int option_flags)
  {
-       struct pnp_mem *mem;
-
-       mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
-       if (!mem)
-               return;
-       mem->min = mem->max = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4];
-       mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8];
-       mem->align = 0;
-       mem->flags = p[3];
-       pnp_register_mem_resource(dev, option, mem);
+       resource_size_t base, len;
+       unsigned char flags;
+
+       base = (p[7] << 24) | (p[6] << 16) | (p[5] << 8) | p[4];
+       len = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8];
+       flags = p[3];
+       pnp_register_mem_resource(dev, option_flags, base, base, 0, len, flags);
  }
  
  static __init void pnpbios_parse_irq_option(struct pnp_dev *dev,
                                             unsigned char *p, int size,
-                                           struct pnp_option *option)
+                                           unsigned int option_flags)
  {
-       struct pnp_irq *irq;
         unsigned long bits;
+       pnp_irq_mask_t map;
+       unsigned char flags = IORESOURCE_IRQ_HIGHEDGE;
  
-       irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
-       if (!irq)
-               return;
         bits = (p[2] << 8) | p[1];
-       bitmap_copy(irq->map, &bits, 16);
+
+       bitmap_zero(map.bits, PNP_IRQ_NR);
+       bitmap_copy(map.bits, &bits, 16);
+
         if (size > 2)
-               irq->flags = p[3];
-       else
-               irq->flags = IORESOURCE_IRQ_HIGHEDGE;
-       pnp_register_irq_resource(dev, option, irq);
+               flags = p[3];
+
+       pnp_register_irq_resource(dev, option_flags, &map, flags);
  }
  
  static __init void pnpbios_parse_dma_option(struct pnp_dev *dev,
                                             unsigned char *p, int size,
-                                           struct pnp_option *option)
+                                           unsigned int option_flags)
  {
-       struct pnp_dma *dma;
-
-       dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
-       if (!dma)
-               return;
-       dma->map = p[1];
-       dma->flags = p[2];
-       pnp_register_dma_resource(dev, option, dma);
+       pnp_register_dma_resource(dev, option_flags, p[1], p[2]);
  }
  
  static __init void pnpbios_parse_port_option(struct pnp_dev *dev,
                                              unsigned char *p, int size,
-                                            struct pnp_option *option)
+                                            unsigned int option_flags)
  {
-       struct pnp_port *port;
-
-       port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-       if (!port)
-               return;
-       port->min = (p[3] << 8) | p[2];
-       port->max = (p[5] << 8) | p[4];
-       port->align = p[6];
-       port->size = p[7];
-       port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0;
-       pnp_register_port_resource(dev, option, port);
+       resource_size_t min, max, align, len;
+       unsigned char flags;
+
+       min = (p[3] << 8) | p[2];
+       max = (p[5] << 8) | p[4];
+       align = p[6];
+       len = p[7];
+       flags = p[1] ? IORESOURCE_IO_16BIT_ADDR : 0;
+       pnp_register_port_resource(dev, option_flags, min, max, align, len,
+                                  flags);
  }
  
  static __init void pnpbios_parse_fixed_port_option(struct pnp_dev *dev,
                                                    unsigned char *p, int size,
-                                                  struct pnp_option *option)
+                                                  unsigned int option_flags)
  {
-       struct pnp_port *port;
-
-       port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
-       if (!port)
-               return;
-       port->min = port->max = (p[2] << 8) | p[1];
-       port->size = p[3];
-       port->align = 0;
-       port->flags = PNP_PORT_FLAG_FIXED;
-       pnp_register_port_resource(dev, option, port);
+       resource_size_t base, len;
+
+       base = (p[2] << 8) | p[1];
+       len = p[3];
+       pnp_register_port_resource(dev, option_flags, base, base, 0, len,
+                                  IORESOURCE_IO_FIXED);
  }
  
  static __init unsigned char *
  pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end,
-                                       struct pnp_dev *dev)
+                                  struct pnp_dev *dev)
  {
         unsigned int len, tag;
-       int priority = 0;
-       struct pnp_option *option, *option_independent;
+       int priority;
+       unsigned int option_flags;
  
         if (!p)
                 return NULL;
  
         dev_dbg(&dev->dev, "parse resource options\n");
-
-       option_independent = option = pnp_register_independent_option(dev);
-       if (!option)
-               return NULL;
-
+       option_flags = 0;
         while ((char *)p < (char *)end) {
  
                 /* determine the type of tag */
@@ -363,37 +342,38 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end,
                 case LARGE_TAG_MEM:
                         if (len != 9)
                                 goto len_err;
-                       pnpbios_parse_mem_option(dev, p, len, option);
+                       pnpbios_parse_mem_option(dev, p, len, option_flags);
                         break;
  
                 case LARGE_TAG_MEM32:
                         if (len != 17)
                                 goto len_err;
-                       pnpbios_parse_mem32_option(dev, p, len, option);
+                       pnpbios_parse_mem32_option(dev, p, len, option_flags);
                         break;
  
                 case LARGE_TAG_FIXEDMEM32:
                         if (len != 9)
                                 goto len_err;
-                       pnpbios_parse_fixed_mem32_option(dev, p, len, option);
+                       pnpbios_parse_fixed_mem32_option(dev, p, len,
+                                                        option_flags);
                         break;
  
                 case SMALL_TAG_IRQ:
                         if (len < 2 || len > 3)
                                 goto len_err;
-                       pnpbios_parse_irq_option(dev, p, len, option);
+                       pnpbios_parse_irq_option(dev, p, len, option_flags);
                         break;
  
                 case SMALL_TAG_DMA:
                         if (len != 2)
                                 goto len_err;
-                       pnpbios_parse_dma_option(dev, p, len, option);
+                       pnpbios_parse_dma_option(dev, p, len, option_flags);
                         break;
  
                 case SMALL_TAG_PORT:
                         if (len != 7)
                                 goto len_err;
-                       pnpbios_parse_port_option(dev, p, len, option);
+                       pnpbios_parse_port_option(dev, p, len, option_flags);
                         break;
  
                 case SMALL_TAG_VENDOR:
@@ -403,28 +383,23 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end,
                 case SMALL_TAG_FIXEDPORT:
                         if (len != 3)
                                 goto len_err;
-                       pnpbios_parse_fixed_port_option(dev, p, len, option);
+                       pnpbios_parse_fixed_port_option(dev, p, len,
+                                                       option_flags);
                         break;
  
                 case SMALL_TAG_STARTDEP:
                         if (len > 1)
                                 goto len_err;
-                       priority = 0x100 | PNP_RES_PRIORITY_ACCEPTABLE;
+                       priority = PNP_RES_PRIORITY_ACCEPTABLE;
                         if (len > 0)
-                               priority = 0x100 | p[1];
-                       option = pnp_register_dependent_option(dev, priority);
-                       if (!option)
-                               return NULL;
+                               priority = p[1];
+                       option_flags = pnp_new_dependent_set(dev, priority);
                         break;
  
                 case SMALL_TAG_ENDDEP:
                         if (len != 0)
                                 goto len_err;
-                       if (option_independent == option)
-                               dev_warn(&dev->dev, "missing "
-                                        "SMALL_TAG_STARTDEP tag\n");
-                       option = option_independent;
-                       dev_dbg(&dev->dev, "end dependent options\n");
+                       option_flags = 0;
                         break;
  
                 case SMALL_TAG_END:
@@ -526,8 +501,16 @@ len_err:
  static void pnpbios_encode_mem(struct pnp_dev *dev, unsigned char *p,
                                struct resource *res)
  {
-       unsigned long base = res->start;
-       unsigned long len = res->end - res->start + 1;
+       unsigned long base;
+       unsigned long len;
+
+       if (pnp_resource_enabled(res)) {
+               base = res->start;
+               len = res->end - res->start + 1;
+       } else {
+               base = 0;
+               len = 0;
+       }
  
         p[4] = (base >> 8) & 0xff;
         p[5] = ((base >> 8) >> 8) & 0xff;
@@ -536,15 +519,22 @@ static void pnpbios_encode_mem(struct pnp_dev *dev, unsigned char *p,
         p[10] = (len >> 8) & 0xff;
         p[11] = ((len >> 8) >> 8) & 0xff;
  
-       dev_dbg(&dev->dev, "  encode mem %#llx-%#llx\n",
-               (unsigned long long) res->start, (unsigned long long) res->end);
+       dev_dbg(&dev->dev, "  encode mem %#lx-%#lx\n", base, base + len - 1);
  }
  
  static void pnpbios_encode_mem32(struct pnp_dev *dev, unsigned char *p,
                                  struct resource *res)
  {
-       unsigned long base = res->start;
-       unsigned long len = res->end - res->start + 1;
+       unsigned long base;
+       unsigned long len;
+
+       if (pnp_resource_enabled(res)) {
+               base = res->start;
+               len = res->end - res->start + 1;
+       } else {
+               base = 0;
+               len = 0;
+       }
  
         p[4] = base & 0xff;
         p[5] = (base >> 8) & 0xff;
@@ -559,15 +549,22 @@ static void pnpbios_encode_mem32(struct pnp_dev *dev, unsigned char *p,
         p[18] = (len >> 16) & 0xff;
         p[19] = (len >> 24) & 0xff;
  
-       dev_dbg(&dev->dev, "  encode mem32 %#llx-%#llx\n",
-               (unsigned long long) res->start, (unsigned long long) res->end);
+       dev_dbg(&dev->dev, "  encode mem32 %#lx-%#lx\n", base, base + len - 1);
  }
  
  static void pnpbios_encode_fixed_mem32(struct pnp_dev *dev, unsigned char *p,
                                        struct resource *res)
  {
-       unsigned long base = res->start;
-       unsigned long len = res->end - res->start + 1;
+       unsigned long base;
+       unsigned long len;
+
+       if (pnp_resource_enabled(res)) {
+               base = res->start;
+               len = res->end - res->start + 1;
+       } else {
+               base = 0;
+               len = 0;
+       }
  
         p[4] = base & 0xff;
         p[5] = (base >> 8) & 0xff;
@@ -578,40 +575,54 @@ static void pnpbios_encode_fixed_mem32(struct pnp_dev *dev, unsigned char *p,
         p[10] = (len >> 16) & 0xff;
         p[11] = (len >> 24) & 0xff;
  
-       dev_dbg(&dev->dev, "  encode fixed_mem32 %#llx-%#llx\n",
-               (unsigned long long) res->start, (unsigned long long) res->end);
+       dev_dbg(&dev->dev, "  encode fixed_mem32 %#lx-%#lx\n", base,
+               base + len - 1);
  }
  
  static void pnpbios_encode_irq(struct pnp_dev *dev, unsigned char *p,
                                struct resource *res)
  {
-       unsigned long map = 0;
+       unsigned long map;
+
+       if (pnp_resource_enabled(res))
+               map = 1 << res->start;
+       else
+               map = 0;
  
-       map = 1 << res->start;
         p[1] = map & 0xff;
         p[2] = (map >> 8) & 0xff;
  
-       dev_dbg(&dev->dev, "  encode irq %llu\n",
-               (unsigned long long)res->start);
+       dev_dbg(&dev->dev, "  encode irq mask %#lx\n", map);
  }
  
  static void pnpbios_encode_dma(struct pnp_dev *dev, unsigned char *p,
                                struct resource *res)
  {
-       unsigned long map = 0;
+       unsigned long map;
+
+       if (pnp_resource_enabled(res))
+               map = 1 << res->start;
+       else
+               map = 0;
  
-       map = 1 << res->start;
         p[1] = map & 0xff;
  
-       dev_dbg(&dev->dev, "  encode dma %llu\n",
-               (unsigned long long)res->start);
+       dev_dbg(&dev->dev, "  encode dma mask %#lx\n", map);
  }
  
  static void pnpbios_encode_port(struct pnp_dev *dev, unsigned char *p,
                                 struct resource *res)
  {
-       unsigned long base = res->start;
-       unsigned long len = res->end - res->start + 1;
+       unsigned long base;
+       unsigned long len;
+
+       if (pnp_resource_enabled(res)) {
+               base = res->start;
+               len = res->end - res->start + 1;
+       } else {
+               base = 0;
+               len = 0;
+       }
  
         p[2] = base & 0xff;
         p[3] = (base >> 8) & 0xff;
@@ -619,8 +630,7 @@ static void pnpbios_encode_port(struct pnp_dev *dev, unsigned char *p,
         p[5] = (base >> 8) & 0xff;
         p[7] = len & 0xff;
  
-       dev_dbg(&dev->dev, "  encode io %#llx-%#llx\n",
-               (unsigned long long) res->start, (unsigned long long) res->end);
+       dev_dbg(&dev->dev, "  encode io %#lx-%#lx\n", base, base + len - 1);
  }
  
  static void pnpbios_encode_fixed_port(struct pnp_dev *dev, unsigned char *p,
@@ -629,12 +639,20 @@ static void pnpbios_encode_fixed_port(struct pnp_dev *dev, unsigned char *p,
         unsigned long base = res->start;
         unsigned long len = res->end - res->start + 1;
  
+       if (pnp_resource_enabled(res)) {
+               base = res->start;
+               len = res->end - res->start + 1;
+       } else {
+               base = 0;
+               len = 0;
+       }
+
         p[1] = base & 0xff;
         p[2] = (base >> 8) & 0xff;
         p[3] = len & 0xff;
  
-       dev_dbg(&dev->dev, "  encode fixed_io %#llx-%#llx\n",
-               (unsigned long long) res->start, (unsigned long long) res->end);
+       dev_dbg(&dev->dev, "  encode fixed_io %#lx-%#lx\n", base,
+               base + len - 1);
  }
  
  static unsigned char *pnpbios_encode_allocated_resource_data(struct pnp_dev
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c

index 1ff3bb585ab2b0b5f0bb88f8477fe334c26c726a..55f55ed72dc7a69019d9c948c35c827d918955bd 100644 (file)
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -5,6 +5,8 @@
   *  when building up the resource structure for the first time.
   *
   *  Copyright (c) 2000 Peter Denison <peterd@pnd-pc.demon.co.uk>
+ *  Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   *
   *  Heavily based on PCI quirks handling which is
   *
@@ -20,203 +22,207 @@
  #include <linux/kallsyms.h>
  #include "base.h"
  
+static void quirk_awe32_add_ports(struct pnp_dev *dev,
+                                 struct pnp_option *option,
+                                 unsigned int offset)
+{
+       struct pnp_option *new_option;
+
+       new_option = kmalloc(sizeof(struct pnp_option), GFP_KERNEL);
+       if (!new_option) {
+               dev_err(&dev->dev, "couldn't add ioport region to option set "
+                       "%d\n", pnp_option_set(option));
+               return;
+       }
+
+       *new_option = *option;
+       new_option->u.port.min += offset;
+       new_option->u.port.max += offset;
+       list_add(&new_option->list, &option->list);
+
+       dev_info(&dev->dev, "added ioport region %#llx-%#llx to set %d\n",
+               (unsigned long long) new_option->u.port.min,
+               (unsigned long long) new_option->u.port.max,
+               pnp_option_set(option));
+}
+
  static void quirk_awe32_resources(struct pnp_dev *dev)
  {
-       struct pnp_port *port, *port2, *port3;
-       struct pnp_option *res = dev->dependent;
+       struct pnp_option *option;
+       unsigned int set = ~0;
  
         /*
-        * Unfortunately the isapnp_add_port_resource is too tightly bound
-        * into the PnP discovery sequence, and cannot be used. Link in the
-        * two extra ports (at offset 0x400 and 0x800 from the one given) by
-        * hand.
+        * Add two extra ioport regions (at offset 0x400 and 0x800 from the
+        * one given) to every dependent option set.
          */
-       for (; res; res = res->next) {
-               port2 = pnp_alloc(sizeof(struct pnp_port));
-               if (!port2)
-                       return;
-               port3 = pnp_alloc(sizeof(struct pnp_port));
-               if (!port3) {
-                       kfree(port2);
-                       return;
+       list_for_each_entry(option, &dev->options, list) {
+               if (pnp_option_is_dependent(option) &&
+                   pnp_option_set(option) != set) {
+                       set = pnp_option_set(option);
+                       quirk_awe32_add_ports(dev, option, 0x800);
+                       quirk_awe32_add_ports(dev, option, 0x400);
                 }
-               port = res->port;
-               memcpy(port2, port, sizeof(struct pnp_port));
-               memcpy(port3, port, sizeof(struct pnp_port));
-               port->next = port2;
-               port2->next = port3;
-               port2->min += 0x400;
-               port2->max += 0x400;
-               port3->min += 0x800;
-               port3->max += 0x800;
-               dev_info(&dev->dev,
-                       "AWE32 quirk - added ioports 0x%lx and 0x%lx\n",
-                       (unsigned long)port2->min,
-                       (unsigned long)port3->min);
         }
  }
  
  static void quirk_cmi8330_resources(struct pnp_dev *dev)
  {
-       struct pnp_option *res = dev->dependent;
-       unsigned long tmp;
-
-       for (; res; res = res->next) {
-
-               struct pnp_irq *irq;
-               struct pnp_dma *dma;
+       struct pnp_option *option;
+       struct pnp_irq *irq;
+       struct pnp_dma *dma;
  
-               for (irq = res->irq; irq; irq = irq->next) {    // Valid irqs are 5, 7, 10
-                       tmp = 0x04A0;
-                       bitmap_copy(irq->map, &tmp, 16);        // 0000 0100 1010 0000
-               }
+       list_for_each_entry(option, &dev->options, list) {
+               if (!pnp_option_is_dependent(option))
+                       continue;
  
-               for (dma = res->dma; dma; dma = dma->next)      // Valid 8bit dma channels are 1,3
+               if (option->type == IORESOURCE_IRQ) {
+                       irq = &option->u.irq;
+                       bitmap_zero(irq->map.bits, PNP_IRQ_NR);
+                       __set_bit(5, irq->map.bits);
+                       __set_bit(7, irq->map.bits);
+                       __set_bit(10, irq->map.bits);
+                       dev_info(&dev->dev, "set possible IRQs in "
+                                "option set %d to 5, 7, 10\n",
+                                pnp_option_set(option));
+               } else if (option->type == IORESOURCE_DMA) {
+                       dma = &option->u.dma;
                         if ((dma->flags & IORESOURCE_DMA_TYPE_MASK) ==
-                           IORESOURCE_DMA_8BIT)
-                               dma->map = 0x000A;
+                                               IORESOURCE_DMA_8BIT &&
+                           dma->map != 0x0A) {
+                               dev_info(&dev->dev, "changing possible "
+                                        "DMA channel mask in option set %d "
+                                        "from %#02x to 0x0A (1, 3)\n",
+                                        pnp_option_set(option), dma->map);
+                               dma->map = 0x0A;
+                       }
+               }
         }
-       dev_info(&dev->dev, "CMI8330 quirk - forced possible IRQs to 5, 7, 10 "
-               "and DMA channels to 1, 3\n");
  }
  
  static void quirk_sb16audio_resources(struct pnp_dev *dev)
  {
+       struct pnp_option *option;
+       unsigned int prev_option_flags = ~0, n = 0;
         struct pnp_port *port;
-       struct pnp_option *res = dev->dependent;
-       int changed = 0;
  
         /*
-        * The default range on the mpu port for these devices is 0x388-0x388.
+        * The default range on the OPL port for these devices is 0x388-0x388.
          * Here we increase that range so that two such cards can be
          * auto-configured.
          */
+       list_for_each_entry(option, &dev->options, list) {
+               if (prev_option_flags != option->flags) {
+                       prev_option_flags = option->flags;
+                       n = 0;
+               }
  
-       for (; res; res = res->next) {
-               port = res->port;
-               if (!port)
-                       continue;
-               port = port->next;
-               if (!port)
-                       continue;
-               port = port->next;
-               if (!port)
-                       continue;
-               if (port->min != port->max)
-                       continue;
-               port->max += 0x70;
-               changed = 1;
+               if (pnp_option_is_dependent(option) &&
+                   option->type == IORESOURCE_IO) {
+                       n++;
+                       port = &option->u.port;
+                       if (n == 3 && port->min == port->max) {
+                               port->max += 0x70;
+                               dev_info(&dev->dev, "increased option port "
+                                        "range from %#llx-%#llx to "
+                                        "%#llx-%#llx\n",
+                                        (unsigned long long) port->min,
+                                        (unsigned long long) port->min,
+                                        (unsigned long long) port->min,
+                                        (unsigned long long) port->max);
+                       }
+               }
         }
-       if (changed)
-               dev_info(&dev->dev, "SB audio device quirk - increased port range\n");
  }
  
-static struct pnp_option *quirk_isapnp_mpu_options(struct pnp_dev *dev)
+static struct pnp_option *pnp_clone_dependent_set(struct pnp_dev *dev,
+                                                 unsigned int set)
  {
-       struct pnp_option *head = NULL;
-       struct pnp_option *prev = NULL;
-       struct pnp_option *res;
-
-       /*
-        * Build a functional IRQ-less variant of each MPU option.
-        */
-
-       for (res = dev->dependent; res; res = res->next) {
-               struct pnp_option *curr;
-               struct pnp_port *port;
-               struct pnp_port *copy;
+       struct pnp_option *tail = NULL, *first_new_option = NULL;
+       struct pnp_option *option, *new_option;
+       unsigned int flags;
  
-               port = res->port;
-               if (!port || !res->irq)
-                       continue;
+       list_for_each_entry(option, &dev->options, list) {
+               if (pnp_option_is_dependent(option))
+                       tail = option;
+       }
+       if (!tail) {
+               dev_err(&dev->dev, "no dependent option sets\n");
+               return NULL;
+       }
  
-               copy = pnp_alloc(sizeof *copy);
-               if (!copy)
-                       break;
+       flags = pnp_new_dependent_set(dev, PNP_RES_PRIORITY_FUNCTIONAL);
+       list_for_each_entry(option, &dev->options, list) {
+               if (pnp_option_is_dependent(option) &&
+                   pnp_option_set(option) == set) {
+                       new_option = kmalloc(sizeof(struct pnp_option),
+                                            GFP_KERNEL);
+                       if (!new_option) {
+                               dev_err(&dev->dev, "couldn't clone dependent "
+                                       "set %d\n", set);
+                               return NULL;
+                       }
  
-               copy->min = port->min;
-               copy->max = port->max;
-               copy->align = port->align;
-               copy->size = port->size;
-               copy->flags = port->flags;
+                       *new_option = *option;
+                       new_option->flags = flags;
+                       if (!first_new_option)
+                               first_new_option = new_option;
  
-               curr = pnp_build_option(PNP_RES_PRIORITY_FUNCTIONAL);
-               if (!curr) {
-                       kfree(copy);
-                       break;
+                       list_add(&new_option->list, &tail->list);
+                       tail = new_option;
                 }
-               curr->port = copy;
-
-               if (prev)
-                       prev->next = curr;
-               else
-                       head = curr;
-               prev = curr;
         }
-       if (head)
-               dev_info(&dev->dev, "adding IRQ-less MPU options\n");
  
-       return head;
+       return first_new_option;
  }
  
-static void quirk_ad1815_mpu_resources(struct pnp_dev *dev)
+
+static void quirk_add_irq_optional_dependent_sets(struct pnp_dev *dev)
  {
-       struct pnp_option *res;
+       struct pnp_option *new_option;
+       unsigned int num_sets, i, set;
         struct pnp_irq *irq;
  
-       /*
-        * Distribute the independent IRQ over the dependent options
-        */
-
-       res = dev->independent;
-       if (!res)
-               return;
-
-       irq = res->irq;
-       if (!irq || irq->next)
-               return;
-
-       res = dev->dependent;
-       if (!res)
-               return;
-
-       while (1) {
-               struct pnp_irq *copy;
-
-               copy = pnp_alloc(sizeof *copy);
-               if (!copy)
-                       break;
-
-               memcpy(copy->map, irq->map, sizeof copy->map);
-               copy->flags = irq->flags;
+       num_sets = dev->num_dependent_sets;
+       for (i = 0; i < num_sets; i++) {
+               new_option = pnp_clone_dependent_set(dev, i);
+               if (!new_option)
+                       return;
  
-               copy->next = res->irq; /* Yes, this is NULL */
-               res->irq = copy;
+               set = pnp_option_set(new_option);
+               while (new_option && pnp_option_set(new_option) == set) {
+                       if (new_option->type == IORESOURCE_IRQ) {
+                               irq = &new_option->u.irq;
+                               irq->flags |= IORESOURCE_IRQ_OPTIONAL;
+                       }
+                       dbg_pnp_show_option(dev, new_option);
+                       new_option = list_entry(new_option->list.next,
+                                               struct pnp_option, list);
+               }
  
-               if (!res->next)
-                       break;
-               res = res->next;
+               dev_info(&dev->dev, "added dependent option set %d (same as "
+                        "set %d except IRQ optional)\n", set, i);
         }
-       kfree(irq);
-
-       res->next = quirk_isapnp_mpu_options(dev);
-
-       res = dev->independent;
-       res->irq = NULL;
  }
  
-static void quirk_isapnp_mpu_resources(struct pnp_dev *dev)
+static void quirk_ad1815_mpu_resources(struct pnp_dev *dev)
  {
-       struct pnp_option *res;
+       struct pnp_option *option;
+       struct pnp_irq *irq = NULL;
+       unsigned int independent_irqs = 0;
+
+       list_for_each_entry(option, &dev->options, list) {
+               if (option->type == IORESOURCE_IRQ &&
+                   !pnp_option_is_dependent(option)) {
+                       independent_irqs++;
+                       irq = &option->u.irq;
+               }
+       }
  
-       res = dev->dependent;
-       if (!res)
+       if (independent_irqs != 1)
                 return;
  
-       while (res->next)
-               res = res->next;
-
-       res->next = quirk_isapnp_mpu_options(dev);
+       irq->flags |= IORESOURCE_IRQ_OPTIONAL;
+       dev_info(&dev->dev, "made independent IRQ optional\n");
  }
  
  #include <linux/pci.h>
@@ -248,8 +254,7 @@ static void quirk_system_pci_resources(struct pnp_dev *dev)
                         for (j = 0;
                              (res = pnp_get_resource(dev, IORESOURCE_MEM, j));
                              j++) {
-                               if (res->flags & IORESOURCE_UNSET ||
-                                   (res->start == 0 && res->end == 0))
+                               if (res->start == 0 && res->end == 0)
                                         continue;
  
                                 pnp_start = res->start;
@@ -312,10 +317,10 @@ static struct pnp_fixup pnp_fixups[] = {
         {"CTL0043", quirk_sb16audio_resources},
         {"CTL0044", quirk_sb16audio_resources},
         {"CTL0045", quirk_sb16audio_resources},
-       /* Add IRQ-less MPU options */
+       /* Add IRQ-optional MPU options */
         {"ADS7151", quirk_ad1815_mpu_resources},
-       {"ADS7181", quirk_isapnp_mpu_resources},
-       {"AZT0002", quirk_isapnp_mpu_resources},
+       {"ADS7181", quirk_add_irq_optional_dependent_sets},
+       {"AZT0002", quirk_add_irq_optional_dependent_sets},
         /* PnP resources that might overlap PCI BARs */
         {"PNP0c01", quirk_system_pci_resources},
         {"PNP0c02", quirk_system_pci_resources},
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c

index 390b50096e30890f57d6cf6f55dc730d18013b47..4cfe3a1efdfbc24a213abc838dab1631abf9f5d4 100644 (file)
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -3,6 +3,8 @@
   *
   * based on isapnp.c resource management (c) Jaroslav Kysela <perex@perex.cz>
   * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   */
  
  #include <linux/module.h>
@@ -28,201 +30,121 @@ static int pnp_reserve_mem[16] = {[0 ... 15] = -1 };      /* reserve (don't use) some
   * option registration
   */
  
-struct pnp_option *pnp_build_option(int priority)
+struct pnp_option *pnp_build_option(struct pnp_dev *dev, unsigned long type,
+                                   unsigned int option_flags)
  {
-       struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option));
+       struct pnp_option *option;
  
+       option = kzalloc(sizeof(struct pnp_option), GFP_KERNEL);
         if (!option)
                 return NULL;
  
-       option->priority = priority & 0xff;
-       /* make sure the priority is valid */
-       if (option->priority > PNP_RES_PRIORITY_FUNCTIONAL)
-               option->priority = PNP_RES_PRIORITY_INVALID;
-
-       return option;
-}
-
-struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev)
-{
-       struct pnp_option *option;
-
-       option = pnp_build_option(PNP_RES_PRIORITY_PREFERRED);
-
-       /* this should never happen but if it does we'll try to continue */
-       if (dev->independent)
-               dev_err(&dev->dev, "independent resource already registered\n");
-       dev->independent = option;
+       option->flags = option_flags;
+       option->type = type;
  
-       dev_dbg(&dev->dev, "new independent option\n");
+       list_add_tail(&option->list, &dev->options);
         return option;
  }
  
-struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev,
-                                                int priority)
+int pnp_register_irq_resource(struct pnp_dev *dev, unsigned int option_flags,
+                             pnp_irq_mask_t *map, unsigned char flags)
  {
         struct pnp_option *option;
+       struct pnp_irq *irq;
  
-       option = pnp_build_option(priority);
-
-       if (dev->dependent) {
-               struct pnp_option *parent = dev->dependent;
-               while (parent->next)
-                       parent = parent->next;
-               parent->next = option;
-       } else
-               dev->dependent = option;
-
-       dev_dbg(&dev->dev, "new dependent option (priority %#x)\n", priority);
-       return option;
-}
-
-int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option,
-                             struct pnp_irq *data)
-{
-       struct pnp_irq *ptr;
-#ifdef DEBUG
-       char buf[PNP_IRQ_NR];   /* hex-encoded, so this is overkill but safe */
-#endif
+       option = pnp_build_option(dev, IORESOURCE_IRQ, option_flags);
+       if (!option)
+               return -ENOMEM;
  
-       ptr = option->irq;
-       while (ptr && ptr->next)
-               ptr = ptr->next;
-       if (ptr)
-               ptr->next = data;
-       else
-               option->irq = data;
+       irq = &option->u.irq;
+       irq->map = *map;
+       irq->flags = flags;
  
  #ifdef CONFIG_PCI
         {
                 int i;
  
                 for (i = 0; i < 16; i++)
-                       if (test_bit(i, data->map))
+                       if (test_bit(i, irq->map.bits))
                                 pcibios_penalize_isa_irq(i, 0);
         }
  #endif
  
-#ifdef DEBUG
-       bitmap_scnprintf(buf, sizeof(buf), data->map, PNP_IRQ_NR);
-       dev_dbg(&dev->dev, "  irq bitmask %s flags %#x\n", buf,
-               data->flags);
-#endif
+       dbg_pnp_show_option(dev, option);
         return 0;
  }
  
-int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option,
-                             struct pnp_dma *data)
+int pnp_register_dma_resource(struct pnp_dev *dev, unsigned int option_flags,
+                             unsigned char map, unsigned char flags)
  {
-       struct pnp_dma *ptr;
-
-       ptr = option->dma;
-       while (ptr && ptr->next)
-               ptr = ptr->next;
-       if (ptr)
-               ptr->next = data;
-       else
-               option->dma = data;
-
-       dev_dbg(&dev->dev, "  dma bitmask %#x flags %#x\n", data->map,
-               data->flags);
-       return 0;
-}
+       struct pnp_option *option;
+       struct pnp_dma *dma;
  
-int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option,
-                              struct pnp_port *data)
-{
-       struct pnp_port *ptr;
-
-       ptr = option->port;
-       while (ptr && ptr->next)
-               ptr = ptr->next;
-       if (ptr)
-               ptr->next = data;
-       else
-               option->port = data;
-
-       dev_dbg(&dev->dev, "  io  "
-               "min %#x max %#x align %d size %d flags %#x\n",
-               data->min, data->max, data->align, data->size, data->flags);
-       return 0;
-}
+       option = pnp_build_option(dev, IORESOURCE_DMA, option_flags);
+       if (!option)
+               return -ENOMEM;
  
-int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option,
-                             struct pnp_mem *data)
-{
-       struct pnp_mem *ptr;
-
-       ptr = option->mem;
-       while (ptr && ptr->next)
-               ptr = ptr->next;
-       if (ptr)
-               ptr->next = data;
-       else
-               option->mem = data;
-
-       dev_dbg(&dev->dev, "  mem "
-               "min %#x max %#x align %d size %d flags %#x\n",
-               data->min, data->max, data->align, data->size, data->flags);
+       dma = &option->u.dma;
+       dma->map = map;
+       dma->flags = flags;
+
+       dbg_pnp_show_option(dev, option);
         return 0;
  }
  
-static void pnp_free_port(struct pnp_port *port)
+int pnp_register_port_resource(struct pnp_dev *dev, unsigned int option_flags,
+                              resource_size_t min, resource_size_t max,
+                              resource_size_t align, resource_size_t size,
+                              unsigned char flags)
  {
-       struct pnp_port *next;
+       struct pnp_option *option;
+       struct pnp_port *port;
  
-       while (port) {
-               next = port->next;
-               kfree(port);
-               port = next;
-       }
-}
+       option = pnp_build_option(dev, IORESOURCE_IO, option_flags);
+       if (!option)
+               return -ENOMEM;
  
-static void pnp_free_irq(struct pnp_irq *irq)
-{
-       struct pnp_irq *next;
+       port = &option->u.port;
+       port->min = min;
+       port->max = max;
+       port->align = align;
+       port->size = size;
+       port->flags = flags;
  
-       while (irq) {
-               next = irq->next;
-               kfree(irq);
-               irq = next;
-       }
+       dbg_pnp_show_option(dev, option);
+       return 0;
  }
  
-static void pnp_free_dma(struct pnp_dma *dma)
+int pnp_register_mem_resource(struct pnp_dev *dev, unsigned int option_flags,
+                             resource_size_t min, resource_size_t max,
+                             resource_size_t align, resource_size_t size,
+                             unsigned char flags)
  {
-       struct pnp_dma *next;
+       struct pnp_option *option;
+       struct pnp_mem *mem;
  
-       while (dma) {
-               next = dma->next;
-               kfree(dma);
-               dma = next;
-       }
-}
+       option = pnp_build_option(dev, IORESOURCE_MEM, option_flags);
+       if (!option)
+               return -ENOMEM;
  
-static void pnp_free_mem(struct pnp_mem *mem)
-{
-       struct pnp_mem *next;
+       mem = &option->u.mem;
+       mem->min = min;
+       mem->max = max;
+       mem->align = align;
+       mem->size = size;
+       mem->flags = flags;
  
-       while (mem) {
-               next = mem->next;
-               kfree(mem);
-               mem = next;
-       }
+       dbg_pnp_show_option(dev, option);
+       return 0;
  }
  
-void pnp_free_option(struct pnp_option *option)
+void pnp_free_options(struct pnp_dev *dev)
  {
-       struct pnp_option *next;
-
-       while (option) {
-               next = option->next;
-               pnp_free_port(option->port);
-               pnp_free_irq(option->irq);
-               pnp_free_dma(option->dma);
-               pnp_free_mem(option->mem);
+       struct pnp_option *option, *tmp;
+
+       list_for_each_entry_safe(option, tmp, &dev->options, list) {
+               list_del(&option->list);
                 kfree(option);
-               option = next;
         }
  }
  
@@ -237,7 +159,7 @@ void pnp_free_option(struct pnp_option *option)
         !((*(enda) < *(startb)) || (*(endb) < *(starta)))
  
  #define cannot_compare(flags) \
-((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED))
+((flags) & IORESOURCE_DISABLED)
  
  int pnp_check_port(struct pnp_dev *dev, struct resource *res)
  {
@@ -364,6 +286,61 @@ static irqreturn_t pnp_test_handler(int irq, void *dev_id)
         return IRQ_HANDLED;
  }
  
+#ifdef CONFIG_PCI
+static int pci_dev_uses_irq(struct pnp_dev *pnp, struct pci_dev *pci,
+                           unsigned int irq)
+{
+       u32 class;
+       u8 progif;
+
+       if (pci->irq == irq) {
+               dev_dbg(&pnp->dev, "device %s using irq %d\n",
+                       pci_name(pci), irq);
+               return 1;
+       }
+
+       /*
+        * See pci_setup_device() and ata_pci_sff_activate_host() for
+        * similar IDE legacy detection.
+        */
+       pci_read_config_dword(pci, PCI_CLASS_REVISION, &class);
+       class >>= 8;            /* discard revision ID */
+       progif = class & 0xff;
+       class >>= 8;
+
+       if (class == PCI_CLASS_STORAGE_IDE) {
+               /*
+                * Unless both channels are native-PCI mode only,
+                * treat the compatibility IRQs as busy.
+                */
+               if ((progif & 0x5) != 0x5)
+                       if (pci_get_legacy_ide_irq(pci, 0) == irq ||
+                           pci_get_legacy_ide_irq(pci, 1) == irq) {
+                               dev_dbg(&pnp->dev, "legacy IDE device %s "
+                                       "using irq %d\n", pci_name(pci), irq);
+                               return 1;
+                       }
+       }
+
+       return 0;
+}
+#endif
+
+static int pci_uses_irq(struct pnp_dev *pnp, unsigned int irq)
+{
+#ifdef CONFIG_PCI
+       struct pci_dev *pci = NULL;
+
+       for_each_pci_dev(pci) {
+               if (pci_dev_uses_irq(pnp, pci, irq)) {
+                       pci_dev_put(pci);
+                       return 1;
+               }
+       }
+#endif
+       return 0;
+}
+
  int pnp_check_irq(struct pnp_dev *dev, struct resource *res)
  {
         int i;
@@ -395,18 +372,9 @@ int pnp_check_irq(struct pnp_dev *dev, struct resource *res)
                 }
         }
  
-#ifdef CONFIG_PCI
         /* check if the resource is being used by a pci device */
-       {
-               struct pci_dev *pci = NULL;
-               for_each_pci_dev(pci) {
-                       if (pci->irq == *irq) {
-                               pci_dev_put(pci);
-                               return 0;
-                       }
-               }
-       }
-#endif
+       if (pci_uses_irq(dev, *irq))
+               return 0;
  
         /* check if the resource is already in use, skip if the
          * device is active because it itself may be in use */
@@ -499,81 +467,37 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res)
  #endif
  }
  
-struct pnp_resource *pnp_get_pnp_resource(struct pnp_dev *dev,
-                                         unsigned int type, unsigned int num)
+int pnp_resource_type(struct resource *res)
  {
-       struct pnp_resource_table *res = dev->res;
-
-       switch (type) {
-       case IORESOURCE_IO:
-               if (num >= PNP_MAX_PORT)
-                       return NULL;
-               return &res->port[num];
-       case IORESOURCE_MEM:
-               if (num >= PNP_MAX_MEM)
-                       return NULL;
-               return &res->mem[num];
-       case IORESOURCE_IRQ:
-               if (num >= PNP_MAX_IRQ)
-                       return NULL;
-               return &res->irq[num];
-       case IORESOURCE_DMA:
-               if (num >= PNP_MAX_DMA)
-                       return NULL;
-               return &res->dma[num];
-       }
-       return NULL;
+       return res->flags & (IORESOURCE_IO  | IORESOURCE_MEM |
+                            IORESOURCE_IRQ | IORESOURCE_DMA);
  }
  
  struct resource *pnp_get_resource(struct pnp_dev *dev,
                                   unsigned int type, unsigned int num)
  {
         struct pnp_resource *pnp_res;
+       struct resource *res;
  
-       pnp_res = pnp_get_pnp_resource(dev, type, num);
-       if (pnp_res)
-               return &pnp_res->res;
-
+       list_for_each_entry(pnp_res, &dev->resources, list) {
+               res = &pnp_res->res;
+               if (pnp_resource_type(res) == type && num-- == 0)
+                       return res;
+       }
         return NULL;
  }
  EXPORT_SYMBOL(pnp_get_resource);
  
-static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev, int type)
+static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev)
  {
         struct pnp_resource *pnp_res;
-       int i;
  
-       switch (type) {
-       case IORESOURCE_IO:
-               for (i = 0; i < PNP_MAX_PORT; i++) {
-                       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IO, i);
-                       if (pnp_res && !pnp_resource_valid(&pnp_res->res))
-                               return pnp_res;
-               }
-               break;
-       case IORESOURCE_MEM:
-               for (i = 0; i < PNP_MAX_MEM; i++) {
-                       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_MEM, i);
-                       if (pnp_res && !pnp_resource_valid(&pnp_res->res))
-                               return pnp_res;
-               }
-               break;
-       case IORESOURCE_IRQ:
-               for (i = 0; i < PNP_MAX_IRQ; i++) {
-                       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_IRQ, i);
-                       if (pnp_res && !pnp_resource_valid(&pnp_res->res))
-                               return pnp_res;
-               }
-               break;
-       case IORESOURCE_DMA:
-               for (i = 0; i < PNP_MAX_DMA; i++) {
-                       pnp_res = pnp_get_pnp_resource(dev, IORESOURCE_DMA, i);
-                       if (pnp_res && !pnp_resource_valid(&pnp_res->res))
-                               return pnp_res;
-               }
-               break;
-       }
-       return NULL;
+       pnp_res = kzalloc(sizeof(struct pnp_resource), GFP_KERNEL);
+       if (!pnp_res)
+               return NULL;
+
+       list_add_tail(&pnp_res->list, &dev->resources);
+       return pnp_res;
  }
  
  struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq,
@@ -581,15 +505,10 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq,
  {
         struct pnp_resource *pnp_res;
         struct resource *res;
-       static unsigned char warned;
  
-       pnp_res = pnp_new_resource(dev, IORESOURCE_IRQ);
+       pnp_res = pnp_new_resource(dev);
         if (!pnp_res) {
-               if (!warned) {
-                       dev_err(&dev->dev, "can't add resource for IRQ %d\n",
-                               irq);
-                       warned = 1;
-               }
+               dev_err(&dev->dev, "can't add resource for IRQ %d\n", irq);
                 return NULL;
         }
  
@@ -607,15 +526,10 @@ struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma,
  {
         struct pnp_resource *pnp_res;
         struct resource *res;
-       static unsigned char warned;
  
-       pnp_res = pnp_new_resource(dev, IORESOURCE_DMA);
+       pnp_res = pnp_new_resource(dev);
         if (!pnp_res) {
-               if (!warned) {
-                       dev_err(&dev->dev, "can't add resource for DMA %d\n",
-                               dma);
-                       warned = 1;
-               }
+               dev_err(&dev->dev, "can't add resource for DMA %d\n", dma);
                 return NULL;
         }
  
@@ -634,16 +548,12 @@ struct pnp_resource *pnp_add_io_resource(struct pnp_dev *dev,
  {
         struct pnp_resource *pnp_res;
         struct resource *res;
-       static unsigned char warned;
  
-       pnp_res = pnp_new_resource(dev, IORESOURCE_IO);
+       pnp_res = pnp_new_resource(dev);
         if (!pnp_res) {
-               if (!warned) {
-                       dev_err(&dev->dev, "can't add resource for IO "
-                               "%#llx-%#llx\n",(unsigned long long) start,
-                               (unsigned long long) end);
-                       warned = 1;
-               }
+               dev_err(&dev->dev, "can't add resource for IO %#llx-%#llx\n",
+                       (unsigned long long) start,
+                       (unsigned long long) end);
                 return NULL;
         }
  
@@ -663,16 +573,12 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev,
  {
         struct pnp_resource *pnp_res;
         struct resource *res;
-       static unsigned char warned;
  
-       pnp_res = pnp_new_resource(dev, IORESOURCE_MEM);
+       pnp_res = pnp_new_resource(dev);
         if (!pnp_res) {
-               if (!warned) {
-                       dev_err(&dev->dev, "can't add resource for MEM "
-                               "%#llx-%#llx\n",(unsigned long long) start,
-                               (unsigned long long) end);
-                       warned = 1;
-               }
+               dev_err(&dev->dev, "can't add resource for MEM %#llx-%#llx\n",
+                       (unsigned long long) start,
+                       (unsigned long long) end);
                 return NULL;
         }
  
@@ -686,6 +592,52 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev,
         return pnp_res;
  }
  
+/*
+ * Determine whether the specified resource is a possible configuration
+ * for this device.
+ */
+int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start,
+                       resource_size_t size)
+{
+       struct pnp_option *option;
+       struct pnp_port *port;
+       struct pnp_mem *mem;
+       struct pnp_irq *irq;
+       struct pnp_dma *dma;
+
+       list_for_each_entry(option, &dev->options, list) {
+               if (option->type != type)
+                       continue;
+
+               switch (option->type) {
+               case IORESOURCE_IO:
+                       port = &option->u.port;
+                       if (port->min == start && port->size == size)
+                               return 1;
+                       break;
+               case IORESOURCE_MEM:
+                       mem = &option->u.mem;
+                       if (mem->min == start && mem->size == size)
+                               return 1;
+                       break;
+               case IORESOURCE_IRQ:
+                       irq = &option->u.irq;
+                       if (start < PNP_IRQ_NR &&
+                           test_bit(start, irq->map.bits))
+                               return 1;
+                       break;
+               case IORESOURCE_DMA:
+                       dma = &option->u.dma;
+                       if (dma->map & (1 << start))
+                               return 1;
+                       break;
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(pnp_possible_config);
+
  /* format is: pnp_reserve_irq=irq1[,irq2] .... */
  static int __init pnp_setup_reserve_irq(char *str)
  {
diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c

index 95b076c18c075677cd49449b8bec9bf9492ab0d8..bbf78ef4ba024932af837e18f16503daf8c9ce4a 100644 (file)
--- a/drivers/pnp/support.c
+++ b/drivers/pnp/support.c
@@ -2,6 +2,8 @@
   * support.c - standard functions for the use of pnp protocol drivers
   *
   * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   */
  
  #include <linux/module.h>
@@ -16,6 +18,10 @@
   */
  int pnp_is_active(struct pnp_dev *dev)
  {
+       /*
+        * I don't think this is very reliable because pnp_disable_dev()
+        * only clears out auto-assigned resources.
+        */
         if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 &&
             !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 &&
             pnp_irq(dev, 0) == -1 && pnp_dma(dev, 0) == -1)
@@ -52,39 +58,154 @@ void pnp_eisa_id_to_string(u32 id, char *str)
         str[7] = '\0';
  }
  
+char *pnp_resource_type_name(struct resource *res)
+{
+       switch (pnp_resource_type(res)) {
+       case IORESOURCE_IO:
+               return "io";
+       case IORESOURCE_MEM:
+               return "mem";
+       case IORESOURCE_IRQ:
+               return "irq";
+       case IORESOURCE_DMA:
+               return "dma";
+       }
+       return NULL;
+}
+
  void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc)
  {
  #ifdef DEBUG
+       char buf[128];
+       int len = 0;
+       struct pnp_resource *pnp_res;
         struct resource *res;
-       int i;
  
-       dev_dbg(&dev->dev, "current resources: %s\n", desc);
-
-       for (i = 0; i < PNP_MAX_IRQ; i++) {
-               res = pnp_get_resource(dev, IORESOURCE_IRQ, i);
-               if (res && !(res->flags & IORESOURCE_UNSET))
-                       dev_dbg(&dev->dev, "  irq %lld flags %#lx\n",
-                               (unsigned long long) res->start, res->flags);
+       if (list_empty(&dev->resources)) {
+               dev_dbg(&dev->dev, "%s: no current resources\n", desc);
+               return;
         }
-       for (i = 0; i < PNP_MAX_DMA; i++) {
-               res = pnp_get_resource(dev, IORESOURCE_DMA, i);
-               if (res && !(res->flags & IORESOURCE_UNSET))
-                       dev_dbg(&dev->dev, "  dma %lld flags %#lx\n",
-                               (unsigned long long) res->start, res->flags);
+
+       dev_dbg(&dev->dev, "%s: current resources:\n", desc);
+       list_for_each_entry(pnp_res, &dev->resources, list) {
+               res = &pnp_res->res;
+
+               len += snprintf(buf + len, sizeof(buf) - len, "  %-3s ",
+                               pnp_resource_type_name(res));
+
+               if (res->flags & IORESOURCE_DISABLED) {
+                       dev_dbg(&dev->dev, "%sdisabled\n", buf);
+                       continue;
+               }
+
+               switch (pnp_resource_type(res)) {
+               case IORESOURCE_IO:
+               case IORESOURCE_MEM:
+                       len += snprintf(buf + len, sizeof(buf) - len,
+                                       "%#llx-%#llx flags %#lx",
+                                       (unsigned long long) res->start,
+                                       (unsigned long long) res->end,
+                                       res->flags);
+                       break;
+               case IORESOURCE_IRQ:
+               case IORESOURCE_DMA:
+                       len += snprintf(buf + len, sizeof(buf) - len,
+                                       "%lld flags %#lx",
+                                       (unsigned long long) res->start,
+                                       res->flags);
+                       break;
+               }
+               dev_dbg(&dev->dev, "%s\n", buf);
         }
-       for (i = 0; i < PNP_MAX_PORT; i++) {
-               res = pnp_get_resource(dev, IORESOURCE_IO, i);
-               if (res && !(res->flags & IORESOURCE_UNSET))
-                       dev_dbg(&dev->dev, "  io  %#llx-%#llx flags %#lx\n",
-                               (unsigned long long) res->start,
-                               (unsigned long long) res->end, res->flags);
+#endif
+}
+
+char *pnp_option_priority_name(struct pnp_option *option)
+{
+       switch (pnp_option_priority(option)) {
+       case PNP_RES_PRIORITY_PREFERRED:
+               return "preferred";
+       case PNP_RES_PRIORITY_ACCEPTABLE:
+               return "acceptable";
+       case PNP_RES_PRIORITY_FUNCTIONAL:
+               return "functional";
         }
-       for (i = 0; i < PNP_MAX_MEM; i++) {
-               res = pnp_get_resource(dev, IORESOURCE_MEM, i);
-               if (res && !(res->flags & IORESOURCE_UNSET))
-                       dev_dbg(&dev->dev, "  mem %#llx-%#llx flags %#lx\n",
-                               (unsigned long long) res->start,
-                               (unsigned long long) res->end, res->flags);
+       return "invalid";
+}
+
+void dbg_pnp_show_option(struct pnp_dev *dev, struct pnp_option *option)
+{
+#ifdef DEBUG
+       char buf[128];
+       int len = 0, i;
+       struct pnp_port *port;
+       struct pnp_mem *mem;
+       struct pnp_irq *irq;
+       struct pnp_dma *dma;
+
+       if (pnp_option_is_dependent(option))
+               len += snprintf(buf + len, sizeof(buf) - len,
+                               "  dependent set %d (%s) ",
+                               pnp_option_set(option),
+                               pnp_option_priority_name(option));
+       else
+               len += snprintf(buf + len, sizeof(buf) - len, "  independent ");
+
+       switch (option->type) {
+       case IORESOURCE_IO:
+               port = &option->u.port;
+               len += snprintf(buf + len, sizeof(buf) - len, "io  min %#llx "
+                               "max %#llx align %lld size %lld flags %#x",
+                               (unsigned long long) port->min,
+                               (unsigned long long) port->max,
+                               (unsigned long long) port->align,
+                               (unsigned long long) port->size, port->flags);
+               break;
+       case IORESOURCE_MEM:
+               mem = &option->u.mem;
+               len += snprintf(buf + len, sizeof(buf) - len, "mem min %#llx "
+                               "max %#llx align %lld size %lld flags %#x",
+                               (unsigned long long) mem->min,
+                               (unsigned long long) mem->max,
+                               (unsigned long long) mem->align,
+                               (unsigned long long) mem->size, mem->flags);
+               break;
+       case IORESOURCE_IRQ:
+               irq = &option->u.irq;
+               len += snprintf(buf + len, sizeof(buf) - len, "irq");
+               if (bitmap_empty(irq->map.bits, PNP_IRQ_NR))
+                       len += snprintf(buf + len, sizeof(buf) - len,
+                                       " <none>");
+               else {
+                       for (i = 0; i < PNP_IRQ_NR; i++)
+                               if (test_bit(i, irq->map.bits))
+                                       len += snprintf(buf + len,
+                                                       sizeof(buf) - len,
+                                                       " %d", i);
+               }
+               len += snprintf(buf + len, sizeof(buf) - len, " flags %#x",
+                               irq->flags);
+               if (irq->flags & IORESOURCE_IRQ_OPTIONAL)
+                       len += snprintf(buf + len, sizeof(buf) - len,
+                                       " (optional)");
+               break;
+       case IORESOURCE_DMA:
+               dma = &option->u.dma;
+               len += snprintf(buf + len, sizeof(buf) - len, "dma");
+               if (!dma->map)
+                       len += snprintf(buf + len, sizeof(buf) - len,
+                                       " <none>");
+               else {
+                       for (i = 0; i < 8; i++)
+                               if (dma->map & (1 << i))
+                                       len += snprintf(buf + len,
+                                                       sizeof(buf) - len,
+                                                       " %d", i);
+               }
+               len += snprintf(buf + len, sizeof(buf) - len, " (bitmask %#x) "
+                               "flags %#x", dma->map, dma->flags);
+               break;
         }
+       dev_dbg(&dev->dev, "%s\n", buf);
  #endif
  }
diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c

index cf4e07b01d484e7038e7bf2424bb04b68e84e014..764f3a3106854056d3acec26ce77068908b0eb58 100644 (file)
--- a/drivers/pnp/system.c
+++ b/drivers/pnp/system.c
@@ -60,7 +60,7 @@ static void reserve_resources_of_dev(struct pnp_dev *dev)
         int i;
  
         for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_IO, i)); i++) {
-               if (res->flags & IORESOURCE_UNSET)
+               if (res->flags & IORESOURCE_DISABLED)
                         continue;
                 if (res->start == 0)
                         continue;       /* disabled */
@@ -81,7 +81,7 @@ static void reserve_resources_of_dev(struct pnp_dev *dev)
         }
  
         for (i = 0; (res = pnp_get_resource(dev, IORESOURCE_MEM, i)); i++) {
-               if (res->flags & (IORESOURCE_UNSET | IORESOURCE_DISABLED))
+               if (res->flags & IORESOURCE_DISABLED)
                         continue;
  
                 reserve_range(dev, res->start, res->end, 0);
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c

index d91df38ee4f7cb4ed418a043e689fc95126e8c7e..85fcb43710541e4d1057bd06deeac90f29346992 100644 (file)
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -333,7 +333,8 @@ dasd_diag_check_device(struct dasd_device *device)
         if (IS_ERR(block)) {
                 DEV_MESSAGE(KERN_WARNING, device, "%s",
                             "could not allocate dasd block structure");
-               kfree(device->private);
+               device->private = NULL;
+               kfree(private);
                 return PTR_ERR(block);
         }
         device->block = block;
@@ -348,7 +349,8 @@ dasd_diag_check_device(struct dasd_device *device)
         if (rc) {
                 DEV_MESSAGE(KERN_WARNING, device, "failed to retrieve device "
                             "information (rc=%d)", rc);
-               return -ENOTSUPP;
+               rc = -EOPNOTSUPP;
+               goto out;
         }
  
         /* Figure out position of label block */
@@ -362,7 +364,8 @@ dasd_diag_check_device(struct dasd_device *device)
         default:
                 DEV_MESSAGE(KERN_WARNING, device, "unsupported device class "
                             "(class=%d)", private->rdc_data.vdev_class);
-               return -ENOTSUPP;
+               rc = -EOPNOTSUPP;
+               goto out;
         }
  
         DBF_DEV_EVENT(DBF_INFO, device,
@@ -379,7 +382,8 @@ dasd_diag_check_device(struct dasd_device *device)
         if (label == NULL)  {
                 DEV_MESSAGE(KERN_WARNING, device, "%s",
                             "No memory to allocate initialization request");
-               return -ENOMEM;
+               rc = -ENOMEM;
+               goto out;
         }
         rc = 0;
         end_block = 0;
@@ -403,7 +407,7 @@ dasd_diag_check_device(struct dasd_device *device)
                         DEV_MESSAGE(KERN_WARNING, device, "%s",
                                 "DIAG call failed");
                         rc = -EOPNOTSUPP;
-                       goto out;
+                       goto out_label;
                 }
                 mdsk_term_io(device);
                 if (rc == 0)
@@ -413,7 +417,7 @@ dasd_diag_check_device(struct dasd_device *device)
                 DEV_MESSAGE(KERN_WARNING, device, "device access failed "
                             "(rc=%d)", rc);
                 rc = -EIO;
-               goto out;
+               goto out_label;
         }
         /* check for label block */
         if (memcmp(label->label_id, DASD_DIAG_CMS1,
@@ -439,8 +443,15 @@ dasd_diag_check_device(struct dasd_device *device)
                             (unsigned long) (block->blocks <<
                                 block->s2b_shift) >> 1);
         }
-out:
+out_label:
         free_page((long) label);
+out:
+       if (rc) {
+               device->block = NULL;
+               dasd_free_block(block);
+               device->private = NULL;
+               kfree(private);
+       }
         return rc;
  }
  
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c

index e0b77210d37a5b95f15b707c39baf42397396ba2..3590fdb5b2fda56ce9e5df1a22f1b59966be7af8 100644 (file)
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1418,8 +1418,10 @@ static void dasd_eckd_handle_unsolicited_interrupt(struct dasd_device *device,
  
  
         /* service information message SIM */
-       if ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE) {
+       if (irb->esw.esw0.erw.cons && (irb->ecw[27] & DASD_SENSE_BIT_0) &&
+           ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE)) {
                 dasd_3990_erp_handle_sim(device, irb->ecw);
+               dasd_schedule_device_bh(device);
                 return;
         }
  
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c

index aee4656127f76e9ddeb8dc2439b2bbf38ea30efb..aa0c533423a5f040d96e9d5c6f275787a7a9f21c 100644 (file)
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -139,7 +139,8 @@ dasd_fba_check_characteristics(struct dasd_device *device)
         if (IS_ERR(block)) {
                 DEV_MESSAGE(KERN_WARNING, device, "%s",
                             "could not allocate dasd block structure");
-               kfree(device->private);
+               device->private = NULL;
+               kfree(private);
                 return PTR_ERR(block);
         }
         device->block = block;
@@ -152,6 +153,10 @@ dasd_fba_check_characteristics(struct dasd_device *device)
                 DEV_MESSAGE(KERN_WARNING, device,
                             "Read device characteristics returned error %d",
                             rc);
+               device->block = NULL;
+               dasd_free_block(block);
+               device->private = NULL;
+               kfree(private);
                 return rc;
         }
  
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c

index 0a9f1cccbe58b852f6716d416ec03a804b1531b5..b0ac44b271270e31db9e5b689c7802175f6dc0ec 100644 (file)
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -345,7 +345,7 @@ static int get_urd_class(struct urdev *urd)
         cc = diag210(&ur_diag210);
         switch (cc) {
         case 0:
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
         case 2:
                 return ur_diag210.vrdcvcla; /* virtual device class */
         case 3:
@@ -621,7 +621,7 @@ static int verify_device(struct urdev *urd)
         case DEV_CLASS_UR_I:
                 return verify_uri_device(urd);
         default:
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
         }
  }
  
@@ -654,7 +654,7 @@ static int get_file_reclen(struct urdev *urd)
         case DEV_CLASS_UR_I:
                 return get_uri_file_reclen(urd);
         default:
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
         }
  }
  
@@ -827,7 +827,7 @@ static int ur_probe(struct ccw_device *cdev)
                 goto fail_remove_attr;
         }
         if ((urd->class != DEV_CLASS_UR_I) && (urd->class != DEV_CLASS_UR_O)) {
-               rc = -ENOTSUPP;
+               rc = -EOPNOTSUPP;
                 goto fail_remove_attr;
         }
         spin_lock_irq(get_ccwdev_lock(cdev));
@@ -892,7 +892,7 @@ static int ur_set_online(struct ccw_device *cdev)
         } else if (urd->cdev->id.cu_type == PRINTER_DEVTYPE) {
                 sprintf(node_id, "vmprt-%s", cdev->dev.bus_id);
         } else {
-               rc = -ENOTSUPP;
+               rc = -EOPNOTSUPP;
                 goto fail_free_cdev;
         }
  
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c

index 047dd92ae804505ed9e363e526060b1822a6f102..7fd84be1193132f53163a272eba02c3791775116 100644 (file)
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -29,6 +29,7 @@
  
  #define TO_USER                0
  #define TO_KERNEL      1
+#define CHUNK_INFO_SIZE        34 /* 2 16-byte char, each followed by blank */
  
  enum arch_id {
         ARCH_S390       = 0,
@@ -51,6 +52,7 @@ static struct debug_info *zcore_dbf;
  static int hsa_available;
  static struct dentry *zcore_dir;
  static struct dentry *zcore_file;
+static struct dentry *zcore_memmap_file;
  
  /*
   * Copy memory from HSA to kernel or user memory (not reentrant):
@@ -476,6 +478,54 @@ static const struct file_operations zcore_fops = {
         .release        = zcore_release,
  };
  
+static ssize_t zcore_memmap_read(struct file *filp, char __user *buf,
+                                size_t count, loff_t *ppos)
+{
+       return simple_read_from_buffer(buf, count, ppos, filp->private_data,
+                                      MEMORY_CHUNKS * CHUNK_INFO_SIZE);
+}
+
+static int zcore_memmap_open(struct inode *inode, struct file *filp)
+{
+       int i;
+       char *buf;
+       struct mem_chunk *chunk_array;
+
+       chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk),
+                             GFP_KERNEL);
+       if (!chunk_array)
+               return -ENOMEM;
+       detect_memory_layout(chunk_array);
+       buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL);
+       if (!buf) {
+               kfree(chunk_array);
+               return -ENOMEM;
+       }
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
+               sprintf(buf + (i * CHUNK_INFO_SIZE), "%016llx %016llx ",
+                       (unsigned long long) chunk_array[i].addr,
+                       (unsigned long long) chunk_array[i].size);
+               if (chunk_array[i].size == 0)
+                       break;
+       }
+       kfree(chunk_array);
+       filp->private_data = buf;
+       return 0;
+}
+
+static int zcore_memmap_release(struct inode *inode, struct file *filp)
+{
+       kfree(filp->private_data);
+       return 0;
+}
+
+static const struct file_operations zcore_memmap_fops = {
+       .owner          = THIS_MODULE,
+       .read           = zcore_memmap_read,
+       .open           = zcore_memmap_open,
+       .release        = zcore_memmap_release,
+};
+
  
  static void __init set_s390_lc_mask(union save_area *map)
  {
@@ -554,18 +604,44 @@ static int __init check_sdias(void)
         return 0;
  }
  
-static void __init zcore_header_init(int arch, struct zcore_header *hdr)
+static int __init get_mem_size(unsigned long *mem)
+{
+       int i;
+       struct mem_chunk *chunk_array;
+
+       chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk),
+                             GFP_KERNEL);
+       if (!chunk_array)
+               return -ENOMEM;
+       detect_memory_layout(chunk_array);
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
+               if (chunk_array[i].size == 0)
+                       break;
+               *mem += chunk_array[i].size;
+       }
+       kfree(chunk_array);
+       return 0;
+}
+
+static int __init zcore_header_init(int arch, struct zcore_header *hdr)
  {
+       int rc;
+       unsigned long memory = 0;
+
         if (arch == ARCH_S390X)
                 hdr->arch_id = DUMP_ARCH_S390X;
         else
                 hdr->arch_id = DUMP_ARCH_S390;
-       hdr->mem_size = sys_info.mem_size;
-       hdr->rmem_size = sys_info.mem_size;
+       rc = get_mem_size(&memory);
+       if (rc)
+               return rc;
+       hdr->mem_size = memory;
+       hdr->rmem_size = memory;
         hdr->mem_end = sys_info.mem_size;
-       hdr->num_pages = sys_info.mem_size / PAGE_SIZE;
+       hdr->num_pages = memory / PAGE_SIZE;
         hdr->tod = get_clock();
         get_cpu_id(&hdr->cpu_id);
+       return 0;
  }
  
  static int __init zcore_init(void)
@@ -608,7 +684,9 @@ static int __init zcore_init(void)
         if (rc)
                 goto fail;
  
-       zcore_header_init(arch, &zcore_header);
+       rc = zcore_header_init(arch, &zcore_header);
+       if (rc)
+               goto fail;
  
         zcore_dir = debugfs_create_dir("zcore" , NULL);
         if (!zcore_dir) {
@@ -618,13 +696,22 @@ static int __init zcore_init(void)
         zcore_file = debugfs_create_file("mem", S_IRUSR, zcore_dir, NULL,
                                          &zcore_fops);
         if (!zcore_file) {
-               debugfs_remove(zcore_dir);
                 rc = -ENOMEM;
-               goto fail;
+               goto fail_dir;
+       }
+       zcore_memmap_file = debugfs_create_file("memmap", S_IRUSR, zcore_dir,
+                                               NULL, &zcore_memmap_fops);
+       if (!zcore_memmap_file) {
+               rc = -ENOMEM;
+               goto fail_file;
         }
         hsa_available = 1;
         return 0;
  
+fail_file:
+       debugfs_remove(zcore_file);
+fail_dir:
+       debugfs_remove(zcore_dir);
  fail:
         diag308(DIAG308_REL_HSA, NULL);
         return rc;
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile

index 91e9e3f3073af51b7ca4447a0143815dcc6fe9d8..bd79bd1653963c785de07f8d890395f021d61470 100644 (file)
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -9,4 +9,6 @@ ccw_device-objs += device_id.o device_pgid.o device_status.o
  obj-y += ccw_device.o cmf.o
  obj-$(CONFIG_CHSC_SCH) += chsc_sch.o
  obj-$(CONFIG_CCWGROUP) += ccwgroup.o
+
+qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_perf.o qdio_setup.o
  obj-$(CONFIG_QDIO) += qdio.o
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c

index 65264a38057d5cba26ba13b80787800b030ee84e..29826fdd47b84e658916241c3897386582d04ac9 100644 (file)
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -27,7 +27,13 @@
  
  static void *sei_page;
  
-static int chsc_error_from_response(int response)
+/**
+ * chsc_error_from_response() - convert a chsc response to an error
+ * @response: chsc response code
+ *
+ * Returns an appropriate Linux error code for @response.
+ */
+int chsc_error_from_response(int response)
  {
         switch (response) {
         case 0x0001:
@@ -45,6 +51,7 @@ static int chsc_error_from_response(int response)
                 return -EIO;
         }
  }
+EXPORT_SYMBOL_GPL(chsc_error_from_response);
  
  struct chsc_ssd_area {
         struct chsc_header request;
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h

index fb6c4d6c45b44cc68f29b7bd7a8cd7372ef70d6e..ba59bceace987968823330fb74abf048286b96e1 100644 (file)
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -101,4 +101,6 @@ void chsc_chp_online(struct chp_id chpid);
  void chsc_chp_offline(struct chp_id chpid);
  int chsc_get_channel_measurement_chars(struct channel_path *chp);
  
+int chsc_error_from_response(int response);
+
  #endif
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c

deleted file mode 100644 (file)

index 2bf36e1..0000000
--- a/drivers/s390/cio/qdio.c
+++ /dev/null
@@ -1,3929 +0,0 @@
-/*
- *
- * linux/drivers/s390/cio/qdio.c
- *
- * Linux for S/390 QDIO base support, Hipersocket base support
- * version 2
- *
- * Copyright 2000,2002 IBM Corporation
- * Author(s):             Utz Bacher <utz.bacher@de.ibm.com>
- * 2.6 cio integration by Cornelia Huck <cornelia.huck@de.ibm.com>
- *
- * Restriction: only 63 iqdio subchannels would have its own indicator,
- * after that, subsequent subchannels share one indicator
- *
- *
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/kernel.h>
-#include <linux/proc_fs.h>
-#include <linux/timer.h>
-#include <linux/mempool.h>
-#include <linux/semaphore.h>
-
-#include <asm/ccwdev.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/timex.h>
-
-#include <asm/debug.h>
-#include <asm/s390_rdev.h>
-#include <asm/qdio.h>
-#include <asm/airq.h>
-
-#include "cio.h"
-#include "css.h"
-#include "device.h"
-#include "qdio.h"
-#include "ioasm.h"
-#include "chsc.h"
-
-/****************** MODULE PARAMETER VARIABLES ********************/
-MODULE_AUTHOR("Utz Bacher <utz.bacher@de.ibm.com>");
-MODULE_DESCRIPTION("QDIO base support version 2, " \
-                  "Copyright 2000 IBM Corporation");
-MODULE_LICENSE("GPL");
-
-/******************** HERE WE GO ***********************************/
-
-static const char version[] = "QDIO base support version 2";
-
-static int qdio_performance_stats = 0;
-static int proc_perf_file_registration;
-static struct qdio_perf_stats perf_stats;
-
-static int hydra_thinints;
-static int is_passthrough = 0;
-static int omit_svs;
-
-static int indicator_used[INDICATORS_PER_CACHELINE];
-static __u32 * volatile indicators;
-static __u32 volatile spare_indicator;
-static atomic_t spare_indicator_usecount;
-#define QDIO_MEMPOOL_SCSSC_ELEMENTS 2
-static mempool_t *qdio_mempool_scssc;
-static struct kmem_cache *qdio_q_cache;
-
-static debug_info_t *qdio_dbf_setup;
-static debug_info_t *qdio_dbf_sbal;
-static debug_info_t *qdio_dbf_trace;
-static debug_info_t *qdio_dbf_sense;
-#ifdef CONFIG_QDIO_DEBUG
-static debug_info_t *qdio_dbf_slsb_out;
-static debug_info_t *qdio_dbf_slsb_in;
-#endif /* CONFIG_QDIO_DEBUG */
-
-/* iQDIO stuff: */
-static volatile struct qdio_q *tiq_list=NULL; /* volatile as it could change
-                                                during a while loop */
-static DEFINE_SPINLOCK(ttiq_list_lock);
-static void *tiqdio_ind;
-static void tiqdio_tl(unsigned long);
-static DECLARE_TASKLET(tiqdio_tasklet,tiqdio_tl,0);
-
-/* not a macro, as one of the arguments is atomic_read */
-static inline int 
-qdio_min(int a,int b)
-{
-       if (a<b)
-               return a;
-       else
-               return b;
-}
-
-/***************** SCRUBBER HELPER ROUTINES **********************/
-#ifdef CONFIG_64BIT
-static inline void qdio_perf_stat_inc(atomic64_t *count)
-{
-       if (qdio_performance_stats)
-               atomic64_inc(count);
-}
-
-static inline void qdio_perf_stat_dec(atomic64_t *count)
-{
-       if (qdio_performance_stats)
-               atomic64_dec(count);
-}
-#else /* CONFIG_64BIT */
-static inline void qdio_perf_stat_inc(atomic_t *count)
-{
-       if (qdio_performance_stats)
-               atomic_inc(count);
-}
-
-static inline void qdio_perf_stat_dec(atomic_t *count)
-{
-       if (qdio_performance_stats)
-               atomic_dec(count);
-}
-#endif /* CONFIG_64BIT */
-
-static inline __u64 
-qdio_get_micros(void)
-{
-       return (get_clock() >> 12); /* time>>12 is microseconds */
-}
-
-/* 
- * unfortunately, we can't just xchg the values; in do_QDIO we want to reserve
- * the q in any case, so that we'll not be interrupted when we are in
- * qdio_mark_tiq... shouldn't have a really bad impact, as reserving almost
- * ever works (last famous words) 
- */
-static inline int 
-qdio_reserve_q(struct qdio_q *q)
-{
-       return atomic_add_return(1,&q->use_count) - 1;
-}
-
-static inline void 
-qdio_release_q(struct qdio_q *q)
-{
-       atomic_dec(&q->use_count);
-}
-
-/*check ccq  */
-static int
-qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
-{
-       char dbf_text[15];
-
-       if (ccq == 0 || ccq == 32)
-               return 0;
-       if (ccq == 96 || ccq == 97)
-               return 1;
-       /*notify devices immediately*/
-       sprintf(dbf_text,"%d", ccq);
-       QDIO_DBF_TEXT2(1,trace,dbf_text);
-       return -EIO;
-}
-/* EQBS: extract buffer states */
-static int
-qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
-            unsigned int *start, unsigned int *cnt)
-{
-       struct qdio_irq *irq;
-       unsigned int tmp_cnt, q_no, ccq;
-       int rc ;
-       char dbf_text[15];
-
-       ccq = 0;
-       tmp_cnt = *cnt;
-       irq = (struct qdio_irq*)q->irq_ptr;
-       q_no = q->q_no;
-       if(!q->is_input_q)
-               q_no += irq->no_input_qs;
-again:
-       ccq = do_eqbs(irq->sch_token, state, q_no, start, cnt);
-       rc = qdio_check_ccq(q, ccq);
-       if ((ccq == 96) && (tmp_cnt != *cnt))
-               rc = 0;
-       if (rc == 1) {
-               QDIO_DBF_TEXT5(1,trace,"eqAGAIN");
-               goto again;
-       }
-       if (rc < 0) {
-                QDIO_DBF_TEXT2(1,trace,"eqberr");
-                sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt, *cnt, ccq, q_no);
-                QDIO_DBF_TEXT2(1,trace,dbf_text);
-               q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION|
-                               QDIO_STATUS_LOOK_FOR_ERROR,
-                               0, 0, 0, -1, -1, q->int_parm);
-               return 0;
-       }
-       return (tmp_cnt - *cnt);
-}
-
-/* SQBS: set buffer states */
-static int
-qdio_do_sqbs(struct qdio_q *q, unsigned char state,
-            unsigned int *start, unsigned int *cnt)
-{
-       struct qdio_irq *irq;
-       unsigned int tmp_cnt, q_no, ccq;
-       int rc;
-       char dbf_text[15];
-
-       ccq = 0;
-       tmp_cnt = *cnt;
-       irq = (struct qdio_irq*)q->irq_ptr;
-       q_no = q->q_no;
-       if(!q->is_input_q)
-               q_no += irq->no_input_qs;
-again:
-       ccq = do_sqbs(irq->sch_token, state, q_no, start, cnt);
-       rc = qdio_check_ccq(q, ccq);
-       if (rc == 1) {
-               QDIO_DBF_TEXT5(1,trace,"sqAGAIN");
-               goto again;
-       }
-       if (rc < 0) {
-               QDIO_DBF_TEXT3(1,trace,"sqberr");
-               sprintf(dbf_text,"%2x,%2x",tmp_cnt,*cnt);
-               QDIO_DBF_TEXT3(1,trace,dbf_text);
-               sprintf(dbf_text,"%d,%d",ccq,q_no);
-               QDIO_DBF_TEXT3(1,trace,dbf_text);
-               q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION|
-                               QDIO_STATUS_LOOK_FOR_ERROR,
-                               0, 0, 0, -1, -1, q->int_parm);
-               return 0;
-       }
-       return (tmp_cnt - *cnt);
-}
-
-static inline int
-qdio_set_slsb(struct qdio_q *q, unsigned int *bufno,
-             unsigned char state, unsigned int *count)
-{
-       volatile char *slsb;
-       struct qdio_irq *irq;
-
-       irq = (struct qdio_irq*)q->irq_ptr;
-       if (!irq->is_qebsm) {
-               slsb = (char *)&q->slsb.acc.val[(*bufno)];
-               xchg(slsb, state);
-               return 1;
-       }
-       return qdio_do_sqbs(q, state, bufno, count);
-}
-
-#ifdef CONFIG_QDIO_DEBUG
-static inline void
-qdio_trace_slsb(struct qdio_q *q)
-{
-       if (q->queue_type==QDIO_TRACE_QTYPE) {
-               if (q->is_input_q)
-                       QDIO_DBF_HEX2(0,slsb_in,&q->slsb,
-                                     QDIO_MAX_BUFFERS_PER_Q);
-               else
-                       QDIO_DBF_HEX2(0,slsb_out,&q->slsb,
-                                     QDIO_MAX_BUFFERS_PER_Q);
-       }
-}
-#endif
-
-static inline int
-set_slsb(struct qdio_q *q, unsigned int *bufno,
-        unsigned char state, unsigned int *count)
-{
-       int rc;
-#ifdef CONFIG_QDIO_DEBUG
-       qdio_trace_slsb(q);
-#endif
-       rc = qdio_set_slsb(q, bufno, state, count);
-#ifdef CONFIG_QDIO_DEBUG
-       qdio_trace_slsb(q);
-#endif
-       return rc;
-}
-static inline int 
-qdio_siga_sync(struct qdio_q *q, unsigned int gpr2,
-              unsigned int gpr3)
-{
-       int cc;
-
-       QDIO_DBF_TEXT4(0,trace,"sigasync");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       qdio_perf_stat_inc(&perf_stats.siga_syncs);
-
-       cc = do_siga_sync(q->schid, gpr2, gpr3);
-       if (cc)
-               QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*));
-
-       return cc;
-}
-
-static inline int
-qdio_siga_sync_q(struct qdio_q *q)
-{
-       if (q->is_input_q)
-               return qdio_siga_sync(q, 0, q->mask);
-       return qdio_siga_sync(q, q->mask, 0);
-}
-
-static int
-__do_siga_output(struct qdio_q *q, unsigned int *busy_bit)
-{
-       struct qdio_irq *irq;
-       unsigned int fc = 0;
-       unsigned long schid;
-
-       irq = (struct qdio_irq *) q->irq_ptr;
-       if (!irq->is_qebsm)
-              schid = *((u32 *)&q->schid);
-       else {
-              schid = irq->sch_token;
-              fc |= 0x80;
-       }
-       return do_siga_output(schid, q->mask, busy_bit, fc);
-}
-
-/* 
- * returns QDIO_SIGA_ERROR_ACCESS_EXCEPTION as cc, when SIGA returns
- * an access exception 
- */
-static int
-qdio_siga_output(struct qdio_q *q)
-{
-       int cc;
-       __u32 busy_bit;
-       __u64 start_time=0;
-
-       qdio_perf_stat_inc(&perf_stats.siga_outs);
-
-       QDIO_DBF_TEXT4(0,trace,"sigaout");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       for (;;) {
-               cc = __do_siga_output(q, &busy_bit);
-//QDIO_PRINT_ERR("cc=%x, busy=%x\n",cc,busy_bit);
-               if ((cc==2) && (busy_bit) && (q->is_iqdio_q)) {
-                       if (!start_time) 
-                               start_time=NOW;
-                       if ((NOW-start_time)>QDIO_BUSY_BIT_PATIENCE)
-                               break;
-               } else
-                       break;
-       }
-       
-       if ((cc==2) && (busy_bit)) 
-               cc |= QDIO_SIGA_ERROR_B_BIT_SET;
-
-       if (cc)
-               QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*));
-
-       return cc;
-}
-
-static int
-qdio_siga_input(struct qdio_q *q)
-{
-       int cc;
-
-       QDIO_DBF_TEXT4(0,trace,"sigain");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       qdio_perf_stat_inc(&perf_stats.siga_ins);
-
-       cc = do_siga_input(q->schid, q->mask);
-       
-       if (cc)
-               QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*));
-
-       return cc;
-}
-
-/* locked by the locks in qdio_activate and qdio_cleanup */
-static __u32 *
-qdio_get_indicator(void)
-{
-       int i;
-
-       for (i = 0; i < INDICATORS_PER_CACHELINE; i++)
-               if (!indicator_used[i]) {
-                       indicator_used[i]=1;
-                       return indicators+i;
-               }
-       atomic_inc(&spare_indicator_usecount);
-       return (__u32 * volatile) &spare_indicator;
-}
-
-/* locked by the locks in qdio_activate and qdio_cleanup */
-static void 
-qdio_put_indicator(__u32 *addr)
-{
-       int i;
-
-       if ( (addr) && (addr!=&spare_indicator) ) {
-               i=addr-indicators;
-               indicator_used[i]=0;
-       }
-       if (addr == &spare_indicator)
-               atomic_dec(&spare_indicator_usecount);
-}
-
-static inline void
-tiqdio_clear_summary_bit(__u32 *location)
-{
-       QDIO_DBF_TEXT5(0,trace,"clrsummb");
-       QDIO_DBF_HEX5(0,trace,&location,sizeof(void*));
-
-       xchg(location,0);
-}
-
-static inline  void
-tiqdio_set_summary_bit(__u32 *location)
-{
-       QDIO_DBF_TEXT5(0,trace,"setsummb");
-       QDIO_DBF_HEX5(0,trace,&location,sizeof(void*));
-
-       xchg(location,-1);
-}
-
-static inline void 
-tiqdio_sched_tl(void)
-{
-       tasklet_hi_schedule(&tiqdio_tasklet);
-}
-
-static void
-qdio_mark_tiq(struct qdio_q *q)
-{
-       unsigned long flags;
-
-       QDIO_DBF_TEXT4(0,trace,"mark iq");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       spin_lock_irqsave(&ttiq_list_lock,flags);
-       if (unlikely(atomic_read(&q->is_in_shutdown)))
-               goto out_unlock;
-
-       if (!q->is_input_q)
-               goto out_unlock;
-
-       if ((q->list_prev) || (q->list_next)) 
-               goto out_unlock;
-
-       if (!tiq_list) {
-               tiq_list=q;
-               q->list_prev=q;
-               q->list_next=q;
-       } else {
-               q->list_next=tiq_list;
-               q->list_prev=tiq_list->list_prev;
-               tiq_list->list_prev->list_next=q;
-               tiq_list->list_prev=q;
-       }
-       spin_unlock_irqrestore(&ttiq_list_lock,flags);
-
-       tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind);
-       tiqdio_sched_tl();
-       return;
-out_unlock:
-       spin_unlock_irqrestore(&ttiq_list_lock,flags);
-       return;
-}
-
-static inline void
-qdio_mark_q(struct qdio_q *q)
-{
-       QDIO_DBF_TEXT4(0,trace,"mark q");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       if (unlikely(atomic_read(&q->is_in_shutdown)))
-               return;
-
-       tasklet_schedule(&q->tasklet);
-}
-
-static int
-qdio_stop_polling(struct qdio_q *q)
-{
-#ifdef QDIO_USE_PROCESSING_STATE
-       unsigned int tmp, gsf, count = 1;
-       unsigned char state = 0;
-       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
-
-       if (!atomic_xchg(&q->polling,0))
-               return 1;
-
-       QDIO_DBF_TEXT4(0,trace,"stoppoll");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       /* show the card that we are not polling anymore */
-       if (!q->is_input_q)
-               return 1;
-
-       tmp = gsf = GET_SAVED_FRONTIER(q);
-       tmp = ((tmp + QDIO_MAX_BUFFERS_PER_Q-1) & (QDIO_MAX_BUFFERS_PER_Q-1) );
-       set_slsb(q, &tmp, SLSB_P_INPUT_NOT_INIT, &count);
-
-       /* 
-        * we don't issue this SYNC_MEMORY, as we trust Rick T and
-        * moreover will not use the PROCESSING state under VM, so
-        * q->polling was 0 anyway
-        */
-       /*SYNC_MEMORY;*/
-       if (irq->is_qebsm) {
-               count = 1;
-               qdio_do_eqbs(q, &state, &gsf, &count);
-       } else
-               state = q->slsb.acc.val[gsf];
-       if (state != SLSB_P_INPUT_PRIMED)
-               return 1;
-       /* 
-        * set our summary bit again, as otherwise there is a
-        * small window we can miss between resetting it and
-        * checking for PRIMED state 
-        */
-       if (q->is_thinint_q)
-               tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind);
-       return 0;
-
-#else /* QDIO_USE_PROCESSING_STATE */
-       return 1;
-#endif /* QDIO_USE_PROCESSING_STATE */
-}
-
-/* 
- * see the comment in do_QDIO and before qdio_reserve_q about the
- * sophisticated locking outside of unmark_q, so that we don't need to
- * disable the interrupts :-) 
-*/
-static void
-qdio_unmark_q(struct qdio_q *q)
-{
-       unsigned long flags;
-
-       QDIO_DBF_TEXT4(0,trace,"unmark q");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       if ((!q->list_prev)||(!q->list_next))
-               return;
-
-       if ((q->is_thinint_q)&&(q->is_input_q)) {
-               /* iQDIO */
-               spin_lock_irqsave(&ttiq_list_lock,flags);
-               /* in case cleanup has done this already and simultanously
-                * qdio_unmark_q is called from the interrupt handler, we've
-                * got to check this in this specific case again */
-               if ((!q->list_prev)||(!q->list_next))
-                       goto out;
-               if (q->list_next==q) {
-                       /* q was the only interesting q */
-                       tiq_list=NULL;
-                       q->list_next=NULL;
-                       q->list_prev=NULL;
-               } else {
-                       q->list_next->list_prev=q->list_prev;
-                       q->list_prev->list_next=q->list_next;
-                       tiq_list=q->list_next;
-                       q->list_next=NULL;
-                       q->list_prev=NULL;
-               }
-out:
-               spin_unlock_irqrestore(&ttiq_list_lock,flags);
-       }
-}
-
-static inline unsigned long 
-tiqdio_clear_global_summary(void)
-{
-       unsigned long time;
-
-       QDIO_DBF_TEXT5(0,trace,"clrglobl");
-       
-       time = do_clear_global_summary();
-
-       QDIO_DBF_HEX5(0,trace,&time,sizeof(unsigned long));
-
-       return time;
-}
-
-
-/************************* OUTBOUND ROUTINES *******************************/
-static int
-qdio_qebsm_get_outbound_buffer_frontier(struct qdio_q *q)
-{
-        struct qdio_irq *irq;
-        unsigned char state;
-        unsigned int cnt, count, ftc;
-
-        irq = (struct qdio_irq *) q->irq_ptr;
-        if ((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis))
-                SYNC_MEMORY;
-
-        ftc = q->first_to_check;
-        count = qdio_min(atomic_read(&q->number_of_buffers_used),
-                        (QDIO_MAX_BUFFERS_PER_Q-1));
-        if (count == 0)
-                return q->first_to_check;
-        cnt = qdio_do_eqbs(q, &state, &ftc, &count);
-        if (cnt == 0)
-                return q->first_to_check;
-        switch (state) {
-        case SLSB_P_OUTPUT_ERROR:
-                QDIO_DBF_TEXT3(0,trace,"outperr");
-                atomic_sub(cnt , &q->number_of_buffers_used);
-                if (q->qdio_error)
-                        q->error_status_flags |=
-                                QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR;
-                q->qdio_error = SLSB_P_OUTPUT_ERROR;
-                q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR;
-                q->first_to_check = ftc;
-                break;
-        case SLSB_P_OUTPUT_EMPTY:
-                QDIO_DBF_TEXT5(0,trace,"outpempt");
-                atomic_sub(cnt, &q->number_of_buffers_used);
-                q->first_to_check = ftc;
-                break;
-        case SLSB_CU_OUTPUT_PRIMED:
-                /* all buffers primed */
-                QDIO_DBF_TEXT5(0,trace,"outpprim");
-                break;
-        default:
-                break;
-        }
-        QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
-        return q->first_to_check;
-}
-
-static int
-qdio_qebsm_get_inbound_buffer_frontier(struct qdio_q *q)
-{
-        struct qdio_irq *irq;
-        unsigned char state;
-        int tmp, ftc, count, cnt;
-        char dbf_text[15];
-
-
-        irq = (struct qdio_irq *) q->irq_ptr;
-        ftc = q->first_to_check;
-        count = qdio_min(atomic_read(&q->number_of_buffers_used),
-                        (QDIO_MAX_BUFFERS_PER_Q-1));
-        if (count == 0)
-                 return q->first_to_check;
-        cnt = qdio_do_eqbs(q, &state, &ftc, &count);
-        if (cnt == 0)
-                 return q->first_to_check;
-        switch (state) {
-        case SLSB_P_INPUT_ERROR :
-#ifdef CONFIG_QDIO_DEBUG
-                QDIO_DBF_TEXT3(1,trace,"inperr");
-                sprintf(dbf_text,"%2x,%2x",ftc,count);
-                QDIO_DBF_TEXT3(1,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-                if (q->qdio_error)
-                        q->error_status_flags |=
-                                QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR;
-                q->qdio_error = SLSB_P_INPUT_ERROR;
-                q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR;
-                atomic_sub(cnt, &q->number_of_buffers_used);
-                q->first_to_check = ftc;
-                break;
-        case SLSB_P_INPUT_PRIMED :
-                QDIO_DBF_TEXT3(0,trace,"inptprim");
-                sprintf(dbf_text,"%2x,%2x",ftc,count);
-                QDIO_DBF_TEXT3(1,trace,dbf_text);
-                tmp = 0;
-                ftc = q->first_to_check;
-#ifdef QDIO_USE_PROCESSING_STATE
-               if (cnt > 1) {
-                       cnt -= 1;
-                       tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt);
-                       if (!tmp)
-                               break;
-               }
-               cnt = 1;
-               tmp += set_slsb(q, &ftc,
-                              SLSB_P_INPUT_PROCESSING, &cnt);
-               atomic_set(&q->polling, 1);
-#else
-                tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt);
-#endif
-                atomic_sub(tmp, &q->number_of_buffers_used);
-                q->first_to_check = ftc;
-                break;
-        case SLSB_CU_INPUT_EMPTY:
-        case SLSB_P_INPUT_NOT_INIT:
-        case SLSB_P_INPUT_PROCESSING:
-                QDIO_DBF_TEXT5(0,trace,"inpnipro");
-                break;
-        default:
-                break;
-        }
-        QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
-        return q->first_to_check;
-}
-
-static int
-qdio_get_outbound_buffer_frontier(struct qdio_q *q)
-{
-       struct qdio_irq *irq;
-        volatile char *slsb;
-        unsigned int count = 1;
-        int first_not_to_check, f, f_mod_no;
-       char dbf_text[15];
-
-       QDIO_DBF_TEXT4(0,trace,"getobfro");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       irq = (struct qdio_irq *) q->irq_ptr;
-       if (irq->is_qebsm)
-               return qdio_qebsm_get_outbound_buffer_frontier(q);
-
-       slsb=&q->slsb.acc.val[0];
-       f_mod_no=f=q->first_to_check;
-       /* 
-        * f points to already processed elements, so f+no_used is correct...
-        * ... but: we don't check 128 buffers, as otherwise
-        * qdio_has_outbound_q_moved would return 0 
-        */
-       first_not_to_check=f+qdio_min(atomic_read(&q->number_of_buffers_used),
-                                     (QDIO_MAX_BUFFERS_PER_Q-1));
-
-       if (((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis)) ||
-                (q->queue_type == QDIO_IQDIO_QFMT_ASYNCH))
-               SYNC_MEMORY;
-
-check_next:
-       if (f==first_not_to_check) 
-               goto out;
-
-       switch(slsb[f_mod_no]) {
-
-        /* the adapter has not fetched the output yet */
-       case SLSB_CU_OUTPUT_PRIMED:
-               QDIO_DBF_TEXT5(0,trace,"outpprim");
-               break;
-
-       /* the adapter got it */
-       case SLSB_P_OUTPUT_EMPTY:
-               atomic_dec(&q->number_of_buffers_used);
-               f++;
-               f_mod_no=f&(QDIO_MAX_BUFFERS_PER_Q-1);
-               QDIO_DBF_TEXT5(0,trace,"outpempt");
-               goto check_next;
-
-       case SLSB_P_OUTPUT_ERROR:
-               QDIO_DBF_TEXT3(0,trace,"outperr");
-               sprintf(dbf_text,"%x-%x-%x",f_mod_no,
-                       q->sbal[f_mod_no]->element[14].sbalf.value,
-                       q->sbal[f_mod_no]->element[15].sbalf.value);
-               QDIO_DBF_TEXT3(1,trace,dbf_text);
-               QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256);
-
-               /* kind of process the buffer */
-               set_slsb(q, &f_mod_no, SLSB_P_OUTPUT_NOT_INIT, &count);
-
-               /* 
-                * we increment the frontier, as this buffer
-                * was processed obviously 
-                */
-               atomic_dec(&q->number_of_buffers_used);
-               f_mod_no=(f_mod_no+1)&(QDIO_MAX_BUFFERS_PER_Q-1);
-
-               if (q->qdio_error)
-                       q->error_status_flags|=
-                               QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR;
-               q->qdio_error=SLSB_P_OUTPUT_ERROR;
-               q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR;
-
-               break;
-
-       /* no new buffers */
-       default:
-               QDIO_DBF_TEXT5(0,trace,"outpni");
-       }
-out:
-       return (q->first_to_check=f_mod_no);
-}
-
-/* all buffers are processed */
-static int
-qdio_is_outbound_q_done(struct qdio_q *q)
-{
-       int no_used;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-#endif
-
-       no_used=atomic_read(&q->number_of_buffers_used);
-
-#ifdef CONFIG_QDIO_DEBUG
-       if (no_used) {
-               sprintf(dbf_text,"oqisnt%02x",no_used);
-               QDIO_DBF_TEXT4(0,trace,dbf_text);
-       } else {
-               QDIO_DBF_TEXT4(0,trace,"oqisdone");
-       }
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-#endif /* CONFIG_QDIO_DEBUG */
-       return (no_used==0);
-}
-
-static int
-qdio_has_outbound_q_moved(struct qdio_q *q)
-{
-       int i;
-
-       i=qdio_get_outbound_buffer_frontier(q);
-
-       if ( (i!=GET_SAVED_FRONTIER(q)) ||
-            (q->error_status_flags&QDIO_STATUS_LOOK_FOR_ERROR) ) {
-               SAVE_FRONTIER(q,i);
-               QDIO_DBF_TEXT4(0,trace,"oqhasmvd");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               return 1;
-       } else {
-               QDIO_DBF_TEXT4(0,trace,"oqhsntmv");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               return 0;
-       }
-}
-
-static void
-qdio_kick_outbound_q(struct qdio_q *q)
-{
-       int result;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-
-       QDIO_DBF_TEXT4(0,trace,"kickoutq");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if (!q->siga_out)
-               return;
-
-       /* here's the story with cc=2 and busy bit set (thanks, Rick):
-        * VM's CP could present us cc=2 and busy bit set on SIGA-write
-        * during reconfiguration of their Guest LAN (only in HIPERS mode,
-        * QDIO mode is asynchronous -- cc=2 and busy bit there will take
-        * the queues down immediately; and not being under VM we have a
-        * problem on cc=2 and busy bit set right away).
-        *
-        * Therefore qdio_siga_output will try for a short time constantly,
-        * if such a condition occurs. If it doesn't change, it will
-        * increase the busy_siga_counter and save the timestamp, and
-        * schedule the queue for later processing (via mark_q, using the
-        * queue tasklet). __qdio_outbound_processing will check out the
-        * counter. If non-zero, it will call qdio_kick_outbound_q as often
-        * as the value of the counter. This will attempt further SIGA
-        * instructions. For each successful SIGA, the counter is
-        * decreased, for failing SIGAs the counter remains the same, after
-        * all.
-        * After some time of no movement, qdio_kick_outbound_q will
-        * finally fail and reflect corresponding error codes to call
-        * the upper layer module and have it take the queues down.
-        *
-        * Note that this is a change from the original HiperSockets design
-        * (saying cc=2 and busy bit means take the queues down), but in
-        * these days Guest LAN didn't exist... excessive cc=2 with busy bit
-        * conditions will still take the queues down, but the threshold is
-        * higher due to the Guest LAN environment.
-        */
-
-
-       result=qdio_siga_output(q);
-
-       switch (result) {
-       case 0:
-               /* went smooth this time, reset timestamp */
-#ifdef CONFIG_QDIO_DEBUG
-               QDIO_DBF_TEXT3(0,trace,"cc2reslv");
-               sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no,
-                       atomic_read(&q->busy_siga_counter));
-               QDIO_DBF_TEXT3(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-               q->timing.busy_start=0;
-               break;
-       case (2|QDIO_SIGA_ERROR_B_BIT_SET):
-               /* cc=2 and busy bit: */
-               atomic_inc(&q->busy_siga_counter);
-
-               /* if the last siga was successful, save
-                * timestamp here */
-               if (!q->timing.busy_start)
-                       q->timing.busy_start=NOW;
-
-               /* if we're in time, don't touch error_status_flags
-                * and siga_error */
-               if (NOW-q->timing.busy_start<QDIO_BUSY_BIT_GIVE_UP) {
-                       qdio_mark_q(q);
-                       break;
-               }
-               QDIO_DBF_TEXT2(0,trace,"cc2REPRT");
-#ifdef CONFIG_QDIO_DEBUG
-               sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no,
-                       atomic_read(&q->busy_siga_counter));
-               QDIO_DBF_TEXT3(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-               /* else fallthrough and report error */
-       default:
-               /* for plain cc=1, 2 or 3: */
-               if (q->siga_error)
-                       q->error_status_flags|=
-                               QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR;
-               q->error_status_flags|=
-                       QDIO_STATUS_LOOK_FOR_ERROR;
-               q->siga_error=result;
-       }
-}
-
-static void
-qdio_kick_outbound_handler(struct qdio_q *q)
-{
-       int start, end, real_end, count;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-#endif
-
-       start = q->first_element_to_kick;
-       /* last_move_ftc was just updated */
-       real_end = GET_SAVED_FRONTIER(q);
-       end = (real_end+QDIO_MAX_BUFFERS_PER_Q-1)&
-               (QDIO_MAX_BUFFERS_PER_Q-1);
-       count = (end+QDIO_MAX_BUFFERS_PER_Q+1-start)&
-               (QDIO_MAX_BUFFERS_PER_Q-1);
-
-#ifdef CONFIG_QDIO_DEBUG
-       QDIO_DBF_TEXT4(0,trace,"kickouth");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       sprintf(dbf_text,"s=%2xc=%2x",start,count);
-       QDIO_DBF_TEXT4(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if (q->state==QDIO_IRQ_STATE_ACTIVE)
-               q->handler(q->cdev,QDIO_STATUS_OUTBOUND_INT|
-                          q->error_status_flags,
-                          q->qdio_error,q->siga_error,q->q_no,start,count,
-                          q->int_parm);
-
-       /* for the next time: */
-       q->first_element_to_kick=real_end;
-       q->qdio_error=0;
-       q->siga_error=0;
-       q->error_status_flags=0;
-}
-
-static void
-__qdio_outbound_processing(struct qdio_q *q)
-{
-       int siga_attempts;
-
-       QDIO_DBF_TEXT4(0,trace,"qoutproc");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       if (unlikely(qdio_reserve_q(q))) {
-               qdio_release_q(q);
-               qdio_perf_stat_inc(&perf_stats.outbound_tl_runs_resched);
-               /* as we're sissies, we'll check next time */
-               if (likely(!atomic_read(&q->is_in_shutdown))) {
-                       qdio_mark_q(q);
-                       QDIO_DBF_TEXT4(0,trace,"busy,agn");
-               }
-               return;
-       }
-       qdio_perf_stat_inc(&perf_stats.outbound_tl_runs);
-       qdio_perf_stat_inc(&perf_stats.tl_runs);
-
-       /* see comment in qdio_kick_outbound_q */
-       siga_attempts=atomic_read(&q->busy_siga_counter);
-       while (siga_attempts) {
-               atomic_dec(&q->busy_siga_counter);
-               qdio_kick_outbound_q(q);
-               siga_attempts--;
-       }
-
-       if (qdio_has_outbound_q_moved(q))
-               qdio_kick_outbound_handler(q);
-
-       if (q->queue_type == QDIO_ZFCP_QFMT) {
-               if ((!q->hydra_gives_outbound_pcis) &&
-                   (!qdio_is_outbound_q_done(q)))
-                       qdio_mark_q(q);
-       }
-       else if (((!q->is_iqdio_q) && (!q->is_pci_out)) ||
-                (q->queue_type == QDIO_IQDIO_QFMT_ASYNCH)) {
-               /* 
-                * make sure buffer switch from PRIMED to EMPTY is noticed
-                * and outbound_handler is called
-                */
-               if (qdio_is_outbound_q_done(q)) {
-                       del_timer(&q->timer);
-               } else {
-                       if (!timer_pending(&q->timer))
-                               mod_timer(&q->timer, jiffies +
-                                         QDIO_FORCE_CHECK_TIMEOUT);
-               }
-       }
-
-       qdio_release_q(q);
-}
-
-static void
-qdio_outbound_processing(unsigned long q)
-{
-       __qdio_outbound_processing((struct qdio_q *) q);
-}
-
-/************************* INBOUND ROUTINES *******************************/
-
-
-static int
-qdio_get_inbound_buffer_frontier(struct qdio_q *q)
-{
-       struct qdio_irq *irq;
-       int f,f_mod_no;
-       volatile char *slsb;
-       unsigned int count = 1;
-       int first_not_to_check;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-#endif /* CONFIG_QDIO_DEBUG */
-#ifdef QDIO_USE_PROCESSING_STATE
-       int last_position=-1;
-#endif /* QDIO_USE_PROCESSING_STATE */
-
-       QDIO_DBF_TEXT4(0,trace,"getibfro");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       irq = (struct qdio_irq *) q->irq_ptr;
-       if (irq->is_qebsm)
-               return qdio_qebsm_get_inbound_buffer_frontier(q);
-
-       slsb=&q->slsb.acc.val[0];
-       f_mod_no=f=q->first_to_check;
-       /* 
-        * we don't check 128 buffers, as otherwise qdio_has_inbound_q_moved
-        * would return 0 
-        */
-       first_not_to_check=f+qdio_min(atomic_read(&q->number_of_buffers_used),
-                                     (QDIO_MAX_BUFFERS_PER_Q-1));
-
-       /* 
-        * we don't use this one, as a PCI or we after a thin interrupt
-        * will sync the queues
-        */
-       /* SYNC_MEMORY;*/
-
-check_next:
-       f_mod_no=f&(QDIO_MAX_BUFFERS_PER_Q-1);
-       if (f==first_not_to_check) 
-               goto out;
-       switch (slsb[f_mod_no]) {
-
-       /* CU_EMPTY means frontier is reached */
-       case SLSB_CU_INPUT_EMPTY:
-               QDIO_DBF_TEXT5(0,trace,"inptempt");
-               break;
-
-       /* P_PRIMED means set slsb to P_PROCESSING and move on */
-       case SLSB_P_INPUT_PRIMED:
-               QDIO_DBF_TEXT5(0,trace,"inptprim");
-
-#ifdef QDIO_USE_PROCESSING_STATE
-               /* 
-                * as soon as running under VM, polling the input queues will
-                * kill VM in terms of CP overhead 
-                */
-               if (q->siga_sync) {
-                       set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count);
-               } else {
-                       /* set the previous buffer to NOT_INIT. The current
-                        * buffer will be set to PROCESSING at the end of
-                        * this function to avoid further interrupts. */
-                       if (last_position>=0)
-                               set_slsb(q, &last_position,
-                                        SLSB_P_INPUT_NOT_INIT, &count);
-                       atomic_set(&q->polling,1);
-                       last_position=f_mod_no;
-               }
-#else /* QDIO_USE_PROCESSING_STATE */
-               set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count);
-#endif /* QDIO_USE_PROCESSING_STATE */
-               /* 
-                * not needed, as the inbound queue will be synced on the next
-                * siga-r, resp. tiqdio_is_inbound_q_done will do the siga-s
-                */
-               /*SYNC_MEMORY;*/
-               f++;
-               atomic_dec(&q->number_of_buffers_used);
-               goto check_next;
-
-       case SLSB_P_INPUT_NOT_INIT:
-       case SLSB_P_INPUT_PROCESSING:
-               QDIO_DBF_TEXT5(0,trace,"inpnipro");
-               break;
-
-       /* P_ERROR means frontier is reached, break and report error */
-       case SLSB_P_INPUT_ERROR:
-#ifdef CONFIG_QDIO_DEBUG
-               sprintf(dbf_text,"inperr%2x",f_mod_no);
-               QDIO_DBF_TEXT3(1,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-               QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256);
-
-               /* kind of process the buffer */
-               set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count);
-
-               if (q->qdio_error)
-                       q->error_status_flags|=
-                               QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR;
-               q->qdio_error=SLSB_P_INPUT_ERROR;
-               q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR;
-
-               /* we increment the frontier, as this buffer
-                * was processed obviously */
-               f_mod_no=(f_mod_no+1)&(QDIO_MAX_BUFFERS_PER_Q-1);
-               atomic_dec(&q->number_of_buffers_used);
-
-#ifdef QDIO_USE_PROCESSING_STATE
-               last_position=-1;
-#endif /* QDIO_USE_PROCESSING_STATE */
-
-               break;
-
-       /* everything else means frontier not changed (HALTED or so) */
-       default: 
-               break;
-       }
-out:
-       q->first_to_check=f_mod_no;
-
-#ifdef QDIO_USE_PROCESSING_STATE
-       if (last_position>=0)
-               set_slsb(q, &last_position, SLSB_P_INPUT_PROCESSING, &count);
-#endif /* QDIO_USE_PROCESSING_STATE */
-
-       QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
-
-       return q->first_to_check;
-}
-
-static int
-qdio_has_inbound_q_moved(struct qdio_q *q)
-{
-       int i;
-
-       i=qdio_get_inbound_buffer_frontier(q);
-       if ( (i!=GET_SAVED_FRONTIER(q)) ||
-            (q->error_status_flags&QDIO_STATUS_LOOK_FOR_ERROR) ) {
-               SAVE_FRONTIER(q,i);
-               if ((!q->siga_sync)&&(!q->hydra_gives_outbound_pcis))
-                       SAVE_TIMESTAMP(q);
-
-               QDIO_DBF_TEXT4(0,trace,"inhasmvd");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               return 1;
-       } else {
-               QDIO_DBF_TEXT4(0,trace,"inhsntmv");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               return 0;
-       }
-}
-
-/* means, no more buffers to be filled */
-static int
-tiqdio_is_inbound_q_done(struct qdio_q *q)
-{
-       int no_used;
-       unsigned int start_buf, count;
-       unsigned char state = 0;
-       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
-
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-#endif
-
-       no_used=atomic_read(&q->number_of_buffers_used);
-
-       /* propagate the change from 82 to 80 through VM */
-       SYNC_MEMORY;
-
-#ifdef CONFIG_QDIO_DEBUG
-       if (no_used) {
-               sprintf(dbf_text,"iqisnt%02x",no_used);
-               QDIO_DBF_TEXT4(0,trace,dbf_text);
-       } else {
-               QDIO_DBF_TEXT4(0,trace,"iniqisdo");
-       }
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if (!no_used)
-               return 1;
-       if (irq->is_qebsm) {
-               count = 1;
-               start_buf = q->first_to_check;
-               qdio_do_eqbs(q, &state, &start_buf, &count);
-       } else
-               state = q->slsb.acc.val[q->first_to_check];
-       if (state != SLSB_P_INPUT_PRIMED)
-               /* 
-                * nothing more to do, if next buffer is not PRIMED.
-                * note that we did a SYNC_MEMORY before, that there
-                * has been a sychnronization.
-                * we will return 0 below, as there is nothing to do
-                * (stop_polling not necessary, as we have not been
-                * using the PROCESSING state 
-                */
-               return 0;
-
-       /* 
-        * ok, the next input buffer is primed. that means, that device state 
-        * change indicator and adapter local summary are set, so we will find
-        * it next time.
-        * we will return 0 below, as there is nothing to do, except scheduling
-        * ourselves for the next time. 
-        */
-       tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind);
-       tiqdio_sched_tl();
-       return 0;
-}
-
-static int
-qdio_is_inbound_q_done(struct qdio_q *q)
-{
-       int no_used;
-       unsigned int start_buf, count;
-       unsigned char state = 0;
-       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
-
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-#endif
-
-       no_used=atomic_read(&q->number_of_buffers_used);
-
-       /* 
-        * we need that one for synchronization with the adapter, as it
-        * does a kind of PCI avoidance 
-        */
-       SYNC_MEMORY;
-
-       if (!no_used) {
-               QDIO_DBF_TEXT4(0,trace,"inqisdnA");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               return 1;
-       }
-       if (irq->is_qebsm) {
-               count = 1;
-               start_buf = q->first_to_check;
-               qdio_do_eqbs(q, &state, &start_buf, &count);
-       } else
-               state = q->slsb.acc.val[q->first_to_check];
-       if (state == SLSB_P_INPUT_PRIMED) {
-               /* we got something to do */
-               QDIO_DBF_TEXT4(0,trace,"inqisntA");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               return 0;
-       }
-
-       /* on VM, we don't poll, so the q is always done here */
-       if (q->siga_sync)
-               return 1;
-       if (q->hydra_gives_outbound_pcis)
-               return 1;
-
-       /* 
-        * at this point we know, that inbound first_to_check
-        * has (probably) not moved (see qdio_inbound_processing) 
-        */
-       if (NOW>GET_SAVED_TIMESTAMP(q)+q->timing.threshold) {
-#ifdef CONFIG_QDIO_DEBUG
-               QDIO_DBF_TEXT4(0,trace,"inqisdon");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               sprintf(dbf_text,"pf%02xcn%02x",q->first_to_check,no_used);
-               QDIO_DBF_TEXT4(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-               return 1;
-       } else {
-#ifdef CONFIG_QDIO_DEBUG
-               QDIO_DBF_TEXT4(0,trace,"inqisntd");
-               QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-               sprintf(dbf_text,"pf%02xcn%02x",q->first_to_check,no_used);
-               QDIO_DBF_TEXT4(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-               return 0;
-       }
-}
-
-static void
-qdio_kick_inbound_handler(struct qdio_q *q)
-{
-       int count, start, end, real_end, i;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-#endif
-
-       QDIO_DBF_TEXT4(0,trace,"kickinh");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       start=q->first_element_to_kick;
-       real_end=q->first_to_check;
-       end=(real_end+QDIO_MAX_BUFFERS_PER_Q-1)&(QDIO_MAX_BUFFERS_PER_Q-1);
- 
-       i=start;
-       count=0;
-       while (1) {
-               count++;
-               if (i==end)
-                       break;
-               i=(i+1)&(QDIO_MAX_BUFFERS_PER_Q-1);
-       }
-
-#ifdef CONFIG_QDIO_DEBUG
-       sprintf(dbf_text,"s=%2xc=%2x",start,count);
-       QDIO_DBF_TEXT4(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if (likely(q->state==QDIO_IRQ_STATE_ACTIVE))
-               q->handler(q->cdev,
-                          QDIO_STATUS_INBOUND_INT|q->error_status_flags,
-                          q->qdio_error,q->siga_error,q->q_no,start,count,
-                          q->int_parm);
-
-       /* for the next time: */
-       q->first_element_to_kick=real_end;
-       q->qdio_error=0;
-       q->siga_error=0;
-       q->error_status_flags=0;
-
-       qdio_perf_stat_inc(&perf_stats.inbound_cnt);
-}
-
-static void
-__tiqdio_inbound_processing(struct qdio_q *q, int spare_ind_was_set)
-{
-       struct qdio_irq *irq_ptr;
-       struct qdio_q *oq;
-       int i;
-
-       QDIO_DBF_TEXT4(0,trace,"iqinproc");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       /* 
-        * we first want to reserve the q, so that we know, that we don't
-        * interrupt ourselves and call qdio_unmark_q, as is_in_shutdown might
-        * be set 
-        */
-       if (unlikely(qdio_reserve_q(q))) {
-               qdio_release_q(q);
-               qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs_resched);
-               /* 
-                * as we might just be about to stop polling, we make
-                * sure that we check again at least once more 
-                */
-               tiqdio_sched_tl();
-               return;
-       }
-       qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs);
-       if (unlikely(atomic_read(&q->is_in_shutdown))) {
-               qdio_unmark_q(q);
-               goto out;
-       }
-
-       /* 
-        * we reset spare_ind_was_set, when the queue does not use the
-        * spare indicator
-        */
-       if (spare_ind_was_set)
-               spare_ind_was_set = (q->dev_st_chg_ind == &spare_indicator);
-
-       if (!(*(q->dev_st_chg_ind)) && !spare_ind_was_set)
-               goto out;
-       /*
-        * q->dev_st_chg_ind is the indicator, be it shared or not.
-        * only clear it, if indicator is non-shared
-        */
-       if (q->dev_st_chg_ind != &spare_indicator)
-               tiqdio_clear_summary_bit((__u32*)q->dev_st_chg_ind);
-
-       if (q->hydra_gives_outbound_pcis) {
-               if (!q->siga_sync_done_on_thinints) {
-                       SYNC_MEMORY_ALL;
-               } else if (!q->siga_sync_done_on_outb_tis) {
-                       SYNC_MEMORY_ALL_OUTB;
-               }
-       } else {
-               SYNC_MEMORY;
-       }
-       /*
-        * maybe we have to do work on our outbound queues... at least
-        * we have to check the outbound-int-capable thinint-capable
-        * queues
-        */
-       if (q->hydra_gives_outbound_pcis) {
-               irq_ptr = (struct qdio_irq*)q->irq_ptr;
-               for (i=0;i<irq_ptr->no_output_qs;i++) {
-                       oq = irq_ptr->output_qs[i];
-                       if (!qdio_is_outbound_q_done(oq)) {
-                               qdio_perf_stat_dec(&perf_stats.tl_runs);
-                               __qdio_outbound_processing(oq);
-                       }
-               }
-       }
-
-       if (!qdio_has_inbound_q_moved(q))
-               goto out;
-
-       qdio_kick_inbound_handler(q);
-       if (tiqdio_is_inbound_q_done(q))
-               if (!qdio_stop_polling(q)) {
-                       /* 
-                        * we set the flags to get into the stuff next time,
-                        * see also comment in qdio_stop_polling 
-                        */
-                       tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind);
-                       tiqdio_sched_tl();
-               }
-out:
-       qdio_release_q(q);
-}
-
-static void
-tiqdio_inbound_processing(unsigned long q)
-{
-       __tiqdio_inbound_processing((struct qdio_q *) q,
-                                   atomic_read(&spare_indicator_usecount));
-}
-
-static void
-__qdio_inbound_processing(struct qdio_q *q)
-{
-       int q_laps=0;
-
-       QDIO_DBF_TEXT4(0,trace,"qinproc");
-       QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
-
-       if (unlikely(qdio_reserve_q(q))) {
-               qdio_release_q(q);
-               qdio_perf_stat_inc(&perf_stats.inbound_tl_runs_resched);
-               /* as we're sissies, we'll check next time */
-               if (likely(!atomic_read(&q->is_in_shutdown))) {
-                       qdio_mark_q(q);
-                       QDIO_DBF_TEXT4(0,trace,"busy,agn");
-               }
-               return;
-       }
-       qdio_perf_stat_inc(&perf_stats.inbound_tl_runs);
-       qdio_perf_stat_inc(&perf_stats.tl_runs);
-
-again:
-       if (qdio_has_inbound_q_moved(q)) {
-               qdio_kick_inbound_handler(q);
-               if (!qdio_stop_polling(q)) {
-                       q_laps++;
-                       if (q_laps<QDIO_Q_LAPS) 
-                               goto again;
-               }
-               qdio_mark_q(q);
-       } else {
-               if (!qdio_is_inbound_q_done(q)) 
-                        /* means poll time is not yet over */
-                       qdio_mark_q(q);
-       }
-
-       qdio_release_q(q);
-}
-
-static void
-qdio_inbound_processing(unsigned long q)
-{
-       __qdio_inbound_processing((struct qdio_q *) q);
-}
-
-/************************* MAIN ROUTINES *******************************/
-
-#ifdef QDIO_USE_PROCESSING_STATE
-static int
-tiqdio_reset_processing_state(struct qdio_q *q, int q_laps)
-{
-       if (!q) {
-               tiqdio_sched_tl();
-               return 0;
-       }
-
-       /* 
-        * under VM, we have not used the PROCESSING state, so no
-        * need to stop polling 
-        */
-       if (q->siga_sync)
-               return 2;
-
-       if (unlikely(qdio_reserve_q(q))) {
-               qdio_release_q(q);
-               qdio_perf_stat_inc(&perf_stats.inbound_thin_tl_runs_resched);
-               /* 
-                * as we might just be about to stop polling, we make
-                * sure that we check again at least once more 
-                */
-               
-               /* 
-                * sanity -- we'd get here without setting the
-                * dev st chg ind 
-                */
-               tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind);
-               tiqdio_sched_tl();
-               return 0;
-       }
-       if (qdio_stop_polling(q)) {
-               qdio_release_q(q);
-               return 2;
-       }               
-       if (q_laps<QDIO_Q_LAPS-1) {
-               qdio_release_q(q);
-               return 3;
-       }
-       /* 
-        * we set the flags to get into the stuff
-        * next time, see also comment in qdio_stop_polling 
-        */
-       tiqdio_set_summary_bit((__u32*)q->dev_st_chg_ind);
-       tiqdio_sched_tl();
-       qdio_release_q(q);
-       return 1;
-       
-}
-#endif /* QDIO_USE_PROCESSING_STATE */
-
-static void
-tiqdio_inbound_checks(void)
-{
-       struct qdio_q *q;
-       int spare_ind_was_set=0;
-#ifdef QDIO_USE_PROCESSING_STATE
-       int q_laps=0;
-#endif /* QDIO_USE_PROCESSING_STATE */
-
-       QDIO_DBF_TEXT4(0,trace,"iqdinbck");
-       QDIO_DBF_TEXT5(0,trace,"iqlocsum");
-
-#ifdef QDIO_USE_PROCESSING_STATE
-again:
-#endif /* QDIO_USE_PROCESSING_STATE */
-
-       /* when the spare indicator is used and set, save that and clear it */
-       if ((atomic_read(&spare_indicator_usecount)) && spare_indicator) {
-               spare_ind_was_set = 1;
-               tiqdio_clear_summary_bit((__u32*)&spare_indicator);
-       }
-
-       q=(struct qdio_q*)tiq_list;
-       do {
-               if (!q)
-                       break;
-               __tiqdio_inbound_processing(q, spare_ind_was_set);
-               q=(struct qdio_q*)q->list_next;
-       } while (q!=(struct qdio_q*)tiq_list);
-
-#ifdef QDIO_USE_PROCESSING_STATE
-       q=(struct qdio_q*)tiq_list;
-       do {
-               int ret;
-
-               ret = tiqdio_reset_processing_state(q, q_laps);
-               switch (ret) {
-               case 0:
-                       return;
-               case 1:
-                       q_laps++;
-               case 2:
-                       q = (struct qdio_q*)q->list_next;
-                       break;
-               default:
-                       q_laps++;
-                       goto again;
-               }
-       } while (q!=(struct qdio_q*)tiq_list);
-#endif /* QDIO_USE_PROCESSING_STATE */
-}
-
-static void
-tiqdio_tl(unsigned long data)
-{
-       QDIO_DBF_TEXT4(0,trace,"iqdio_tl");
-
-       qdio_perf_stat_inc(&perf_stats.tl_runs);
-
-       tiqdio_inbound_checks();
-}
-
-/********************* GENERAL HELPER_ROUTINES ***********************/
-
-static void
-qdio_release_irq_memory(struct qdio_irq *irq_ptr)
-{
-       int i;
-       struct qdio_q *q;
-
-       for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) {
-               q = irq_ptr->input_qs[i];
-               if (q) {
-                       free_page((unsigned long) q->slib);
-                       kmem_cache_free(qdio_q_cache, q);
-               }
-               q = irq_ptr->output_qs[i];
-               if (q) {
-                       free_page((unsigned long) q->slib);
-                       kmem_cache_free(qdio_q_cache, q);
-               }
-       }
-       free_page((unsigned long) irq_ptr->qdr);
-       free_page((unsigned long) irq_ptr);
-}
-
-static void
-qdio_set_impl_params(struct qdio_irq *irq_ptr,
-                    unsigned int qib_param_field_format,
-                    /* pointer to 128 bytes or NULL, if no param field */
-                    unsigned char *qib_param_field,
-                    /* pointer to no_queues*128 words of data or NULL */
-                    unsigned int no_input_qs,
-                    unsigned int no_output_qs,
-                    unsigned long *input_slib_elements,
-                    unsigned long *output_slib_elements)
-{
-       int i,j;
-
-       if (!irq_ptr)
-               return;
-
-       irq_ptr->qib.pfmt=qib_param_field_format;
-       if (qib_param_field)
-               memcpy(irq_ptr->qib.parm,qib_param_field,
-                      QDIO_MAX_BUFFERS_PER_Q);
-
-       if (input_slib_elements)
-               for (i=0;i<no_input_qs;i++) {
-                       for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++)
-                               irq_ptr->input_qs[i]->slib->slibe[j].parms=
-                                       input_slib_elements[
-                                               i*QDIO_MAX_BUFFERS_PER_Q+j];
-               }
-       if (output_slib_elements)
-               for (i=0;i<no_output_qs;i++) {
-                       for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++)
-                               irq_ptr->output_qs[i]->slib->slibe[j].parms=
-                                       output_slib_elements[
-                                               i*QDIO_MAX_BUFFERS_PER_Q+j];
-               }
-}
-
-static int
-qdio_alloc_qs(struct qdio_irq *irq_ptr,
-             int no_input_qs, int no_output_qs)
-{
-       int i;
-       struct qdio_q *q;
-
-       for (i = 0; i < no_input_qs; i++) {
-               q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL);
-               if (!q)
-                       return -ENOMEM;
-               memset(q, 0, sizeof(*q));
-
-               q->slib = (struct slib *) __get_free_page(GFP_KERNEL);
-               if (!q->slib) {
-                       kmem_cache_free(qdio_q_cache, q);
-                       return -ENOMEM;
-               }
-               irq_ptr->input_qs[i]=q;
-       }
-
-       for (i = 0; i < no_output_qs; i++) {
-               q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL);
-               if (!q)
-                       return -ENOMEM;
-               memset(q, 0, sizeof(*q));
-
-               q->slib = (struct slib *) __get_free_page(GFP_KERNEL);
-               if (!q->slib) {
-                       kmem_cache_free(qdio_q_cache, q);
-                       return -ENOMEM;
-               }
-               irq_ptr->output_qs[i]=q;
-       }
-       return 0;
-}
-
-static void
-qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
-                    int no_input_qs, int no_output_qs,
-            qdio_handler_t *input_handler,
-            qdio_handler_t *output_handler,
-            unsigned long int_parm,int q_format,
-            unsigned long flags,
-            void **inbound_sbals_array,
-            void **outbound_sbals_array)
-{
-       struct qdio_q *q;
-       int i,j;
-       char dbf_text[20]; /* see qdio_initialize */
-       void *ptr;
-       int available;
-
-       sprintf(dbf_text,"qfqs%4x",cdev->private->schid.sch_no);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       for (i=0;i<no_input_qs;i++) {
-               q=irq_ptr->input_qs[i];
-
-               memset(q,0,((char*)&q->slib)-((char*)q));
-               sprintf(dbf_text,"in-q%4x",i);
-               QDIO_DBF_TEXT0(0,setup,dbf_text);
-               QDIO_DBF_HEX0(0,setup,&q,sizeof(void*));
-
-               memset(q->slib,0,PAGE_SIZE);
-               q->sl=(struct sl*)(((char*)q->slib)+PAGE_SIZE/2);
-
-               available=0;
-
-               for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++)
-                       q->sbal[j]=*(inbound_sbals_array++);
-
-                q->queue_type=q_format;
-               q->int_parm=int_parm;
-               q->schid = irq_ptr->schid;
-               q->irq_ptr = irq_ptr;
-               q->cdev = cdev;
-               q->mask=1<<(31-i);
-               q->q_no=i;
-               q->is_input_q=1;
-               q->first_to_check=0;
-               q->last_move_ftc=0;
-               q->handler=input_handler;
-               q->dev_st_chg_ind=irq_ptr->dev_st_chg_ind;
-
-               /* q->is_thinint_q isn't valid at this time, but
-                * irq_ptr->is_thinint_irq is
-                */
-               if (irq_ptr->is_thinint_irq)
-                       tasklet_init(&q->tasklet, tiqdio_inbound_processing,
-                                    (unsigned long) q);
-               else
-                       tasklet_init(&q->tasklet, qdio_inbound_processing,
-                                    (unsigned long) q);
-
-               /* actually this is not used for inbound queues. yet. */
-               atomic_set(&q->busy_siga_counter,0);
-               q->timing.busy_start=0;
-
-/*             for (j=0;j<QDIO_STATS_NUMBER;j++)
-                       q->timing.last_transfer_times[j]=(qdio_get_micros()/
-                                                         QDIO_STATS_NUMBER)*j;
-               q->timing.last_transfer_index=QDIO_STATS_NUMBER-1;
-*/
-
-               /* fill in slib */
-               if (i>0) irq_ptr->input_qs[i-1]->slib->nsliba=
-                                (unsigned long)(q->slib);
-               q->slib->sla=(unsigned long)(q->sl);
-               q->slib->slsba=(unsigned long)(&q->slsb.acc.val[0]);
-
-               /* fill in sl */
-               for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++)
-                       q->sl->element[j].sbal=(unsigned long)(q->sbal[j]);
-
-               QDIO_DBF_TEXT2(0,setup,"sl-sb-b0");
-               ptr=(void*)q->sl;
-               QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
-               ptr=(void*)&q->slsb;
-               QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
-               ptr=(void*)q->sbal[0];
-               QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
-
-               /* fill in slsb */
-               if (!irq_ptr->is_qebsm) {
-                        unsigned int count = 1;
-                        for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
-                                set_slsb(q, &j, SLSB_P_INPUT_NOT_INIT, &count);
-                }
-       }
-
-       for (i=0;i<no_output_qs;i++) {
-               q=irq_ptr->output_qs[i];
-               memset(q,0,((char*)&q->slib)-((char*)q));
-
-               sprintf(dbf_text,"outq%4x",i);
-               QDIO_DBF_TEXT0(0,setup,dbf_text);
-               QDIO_DBF_HEX0(0,setup,&q,sizeof(void*));
-
-               memset(q->slib,0,PAGE_SIZE);
-               q->sl=(struct sl*)(((char*)q->slib)+PAGE_SIZE/2);
-
-               available=0;
-               
-               for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++)
-                       q->sbal[j]=*(outbound_sbals_array++);
-
-                q->queue_type=q_format;
-               if ((q->queue_type == QDIO_IQDIO_QFMT) &&
-                   (no_output_qs > 1) &&
-                   (i == no_output_qs-1))
-                       q->queue_type = QDIO_IQDIO_QFMT_ASYNCH;
-               q->int_parm=int_parm;
-               q->is_input_q=0;
-               q->is_pci_out = 0;
-               q->schid = irq_ptr->schid;
-               q->cdev = cdev;
-               q->irq_ptr = irq_ptr;
-               q->mask=1<<(31-i);
-               q->q_no=i;
-               q->first_to_check=0;
-               q->last_move_ftc=0;
-               q->handler=output_handler;
-
-               tasklet_init(&q->tasklet, qdio_outbound_processing,
-                            (unsigned long) q);
-               setup_timer(&q->timer, qdio_outbound_processing,
-                           (unsigned long) q);
-
-               atomic_set(&q->busy_siga_counter,0);
-               q->timing.busy_start=0;
-
-               /* fill in slib */
-               if (i>0) irq_ptr->output_qs[i-1]->slib->nsliba=
-                                (unsigned long)(q->slib);
-               q->slib->sla=(unsigned long)(q->sl);
-               q->slib->slsba=(unsigned long)(&q->slsb.acc.val[0]);
-
-               /* fill in sl */
-               for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++)
-                       q->sl->element[j].sbal=(unsigned long)(q->sbal[j]);
-
-               QDIO_DBF_TEXT2(0,setup,"sl-sb-b0");
-               ptr=(void*)q->sl;
-               QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
-               ptr=(void*)&q->slsb;
-               QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
-               ptr=(void*)q->sbal[0];
-               QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
-
-               /* fill in slsb */
-                if (!irq_ptr->is_qebsm) {
-                        unsigned int count = 1;
-                        for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
-                                set_slsb(q, &j, SLSB_P_OUTPUT_NOT_INIT, &count);
-                }
-       }
-}
-
-static void
-qdio_fill_thresholds(struct qdio_irq *irq_ptr,
-                    unsigned int no_input_qs,
-                    unsigned int no_output_qs,
-                    unsigned int min_input_threshold,
-                    unsigned int max_input_threshold,
-                    unsigned int min_output_threshold,
-                    unsigned int max_output_threshold)
-{
-       int i;
-       struct qdio_q *q;
-
-       for (i=0;i<no_input_qs;i++) {
-               q=irq_ptr->input_qs[i];
-               q->timing.threshold=max_input_threshold;
-/*             for (j=0;j<QDIO_STATS_CLASSES;j++) {
-                       q->threshold_classes[j].threshold=
-                               min_input_threshold+
-                               (max_input_threshold-min_input_threshold)/
-                               QDIO_STATS_CLASSES;
-               }
-               qdio_use_thresholds(q,QDIO_STATS_CLASSES/2);*/
-       }
-       for (i=0;i<no_output_qs;i++) {
-               q=irq_ptr->output_qs[i];
-               q->timing.threshold=max_output_threshold;
-/*             for (j=0;j<QDIO_STATS_CLASSES;j++) {
-                       q->threshold_classes[j].threshold=
-                               min_output_threshold+
-                               (max_output_threshold-min_output_threshold)/
-                               QDIO_STATS_CLASSES;
-               }
-               qdio_use_thresholds(q,QDIO_STATS_CLASSES/2);*/
-       }
-}
-
-static void tiqdio_thinint_handler(void *ind, void *drv_data)
-{
-       QDIO_DBF_TEXT4(0,trace,"thin_int");
-
-       qdio_perf_stat_inc(&perf_stats.thinints);
-
-       /* SVS only when needed:
-        * issue SVS to benefit from iqdio interrupt avoidance
-        * (SVS clears AISOI)*/
-       if (!omit_svs)
-               tiqdio_clear_global_summary();
-
-       tiqdio_inbound_checks();
-}
-
-static void
-qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state)
-{
-       int i;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15];
-
-       QDIO_DBF_TEXT5(0,trace,"newstate");
-       sprintf(dbf_text,"%4x%4x",irq_ptr->schid.sch_no,state);
-       QDIO_DBF_TEXT5(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       irq_ptr->state=state;
-       for (i=0;i<irq_ptr->no_input_qs;i++)
-               irq_ptr->input_qs[i]->state=state;
-       for (i=0;i<irq_ptr->no_output_qs;i++)
-               irq_ptr->output_qs[i]->state=state;
-       mb();
-}
-
-static void
-qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb)
-{
-       char dbf_text[15];
-
-       if (irb->esw.esw0.erw.cons) {
-               sprintf(dbf_text,"sens%4x",schid.sch_no);
-               QDIO_DBF_TEXT2(1,trace,dbf_text);
-               QDIO_DBF_HEX0(0,sense,irb,QDIO_DBF_SENSE_LEN);
-
-               QDIO_PRINT_WARN("sense data available on qdio channel.\n");
-               QDIO_HEXDUMP16(WARN,"irb: ",irb);
-               QDIO_HEXDUMP16(WARN,"sense data: ",irb->ecw);
-       }
-               
-}
-
-static void
-qdio_handle_pci(struct qdio_irq *irq_ptr)
-{
-       int i;
-       struct qdio_q *q;
-
-       qdio_perf_stat_inc(&perf_stats.pcis);
-       for (i=0;i<irq_ptr->no_input_qs;i++) {
-               q=irq_ptr->input_qs[i];
-               if (q->is_input_q&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT)
-                       qdio_mark_q(q);
-               else {
-                       qdio_perf_stat_dec(&perf_stats.tl_runs);
-                       __qdio_inbound_processing(q);
-               }
-       }
-       if (!irq_ptr->hydra_gives_outbound_pcis)
-               return;
-       for (i=0;i<irq_ptr->no_output_qs;i++) {
-               q=irq_ptr->output_qs[i];
-               if (qdio_is_outbound_q_done(q))
-                       continue;
-               qdio_perf_stat_dec(&perf_stats.tl_runs);
-               if (!irq_ptr->sync_done_on_outb_pcis)
-                       SYNC_MEMORY;
-               __qdio_outbound_processing(q);
-       }
-}
-
-static void qdio_establish_handle_irq(struct ccw_device*, int, int);
-
-static void
-qdio_handle_activate_check(struct ccw_device *cdev, unsigned long intparm,
-                          int cstat, int dstat)
-{
-       struct qdio_irq *irq_ptr;
-       struct qdio_q *q;
-       char dbf_text[15];
-
-       irq_ptr = cdev->private->qdio_data;
-
-       QDIO_DBF_TEXT2(1, trace, "ick2");
-       sprintf(dbf_text,"%s", cdev->dev.bus_id);
-       QDIO_DBF_TEXT2(1,trace,dbf_text);
-       QDIO_DBF_HEX2(0,trace,&intparm,sizeof(int));
-       QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int));
-       QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int));
-       QDIO_PRINT_ERR("received check condition on activate " \
-                      "queues on device %s (cs=x%x, ds=x%x).\n",
-                      cdev->dev.bus_id, cstat, dstat);
-       if (irq_ptr->no_input_qs) {
-               q=irq_ptr->input_qs[0];
-       } else if (irq_ptr->no_output_qs) {
-               q=irq_ptr->output_qs[0];
-       } else {
-               QDIO_PRINT_ERR("oops... no queue registered for device %s!?\n",
-                              cdev->dev.bus_id);
-               goto omit_handler_call;
-       }
-       q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION|
-                  QDIO_STATUS_LOOK_FOR_ERROR,
-                  0,0,0,-1,-1,q->int_parm);
-omit_handler_call:
-       qdio_set_state(irq_ptr,QDIO_IRQ_STATE_STOPPED);
-
-}
-
-static void
-qdio_call_shutdown(struct work_struct *work)
-{
-       struct ccw_device_private *priv;
-       struct ccw_device *cdev;
-
-       priv = container_of(work, struct ccw_device_private, kick_work);
-       cdev = priv->cdev;
-       qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
-       put_device(&cdev->dev);
-}
-
-static void
-qdio_timeout_handler(struct ccw_device *cdev)
-{
-       struct qdio_irq *irq_ptr;
-       char dbf_text[15];
-
-       QDIO_DBF_TEXT2(0, trace, "qtoh");
-       sprintf(dbf_text, "%s", cdev->dev.bus_id);
-       QDIO_DBF_TEXT2(0, trace, dbf_text);
-
-       irq_ptr = cdev->private->qdio_data;
-       sprintf(dbf_text, "state:%d", irq_ptr->state);
-       QDIO_DBF_TEXT2(0, trace, dbf_text);
-
-       switch (irq_ptr->state) {
-       case QDIO_IRQ_STATE_INACTIVE:
-               QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: timed out\n",
-                              irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-               QDIO_DBF_TEXT2(1,setup,"eq:timeo");
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-               break;
-       case QDIO_IRQ_STATE_CLEANUP:
-               QDIO_PRINT_INFO("Did not get interrupt on cleanup, "
-                               "irq=0.%x.%x.\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-               break;
-       case QDIO_IRQ_STATE_ESTABLISHED:
-       case QDIO_IRQ_STATE_ACTIVE:
-               /* I/O has been terminated by common I/O layer. */
-               QDIO_PRINT_INFO("Queues on irq 0.%x.%04x killed by cio.\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-               QDIO_DBF_TEXT2(1, trace, "cio:term");
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
-               if (get_device(&cdev->dev)) {
-                       /* Can't call shutdown from interrupt context. */
-                       PREPARE_WORK(&cdev->private->kick_work,
-                                    qdio_call_shutdown);
-                       queue_work(ccw_device_work, &cdev->private->kick_work);
-               }
-               break;
-       default:
-               BUG();
-       }
-       wake_up(&cdev->private->wait_q);
-}
-
-static void
-qdio_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
-{
-       struct qdio_irq *irq_ptr;
-       int cstat,dstat;
-       char dbf_text[15];
-
-#ifdef CONFIG_QDIO_DEBUG
-       QDIO_DBF_TEXT4(0, trace, "qint");
-       sprintf(dbf_text, "%s", cdev->dev.bus_id);
-       QDIO_DBF_TEXT4(0, trace, dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-       
-       if (!intparm) {
-               QDIO_PRINT_ERR("got unsolicited interrupt in qdio " \
-                                 "handler, device %s\n", cdev->dev.bus_id);
-               return;
-       }
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr) {
-               QDIO_DBF_TEXT2(1, trace, "uint");
-               sprintf(dbf_text,"%s", cdev->dev.bus_id);
-               QDIO_DBF_TEXT2(1,trace,dbf_text);
-               QDIO_PRINT_ERR("received interrupt on unused device %s!\n",
-                              cdev->dev.bus_id);
-               return;
-       }
-
-       if (IS_ERR(irb)) {
-               /* Currently running i/o is in error. */
-               switch (PTR_ERR(irb)) {
-               case -EIO:
-                       QDIO_PRINT_ERR("i/o error on device %s\n",
-                                      cdev->dev.bus_id);
-                       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-                       wake_up(&cdev->private->wait_q);
-                       return;
-               case -ETIMEDOUT:
-                       qdio_timeout_handler(cdev);
-                       return;
-               default:
-                       QDIO_PRINT_ERR("unknown error state %ld on device %s\n",
-                                      PTR_ERR(irb), cdev->dev.bus_id);
-                       return;
-               }
-       }
-
-       qdio_irq_check_sense(irq_ptr->schid, irb);
-
-#ifdef CONFIG_QDIO_DEBUG
-       sprintf(dbf_text, "state:%d", irq_ptr->state);
-       QDIO_DBF_TEXT4(0, trace, dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       cstat = irb->scsw.cmd.cstat;
-       dstat = irb->scsw.cmd.dstat;
-
-       switch (irq_ptr->state) {
-       case QDIO_IRQ_STATE_INACTIVE:
-               qdio_establish_handle_irq(cdev, cstat, dstat);
-               break;
-
-       case QDIO_IRQ_STATE_CLEANUP:
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
-               break;
-
-       case QDIO_IRQ_STATE_ESTABLISHED:
-       case QDIO_IRQ_STATE_ACTIVE:
-               if (cstat & SCHN_STAT_PCI) {
-                       qdio_handle_pci(irq_ptr);
-                       break;
-               }
-
-               if ((cstat&~SCHN_STAT_PCI)||dstat) {
-                       qdio_handle_activate_check(cdev, intparm, cstat, dstat);
-                       break;
-               }
-       default:
-               QDIO_PRINT_ERR("got interrupt for queues in state %d on " \
-                              "device %s?!\n",
-                              irq_ptr->state, cdev->dev.bus_id);
-       }
-       wake_up(&cdev->private->wait_q);
-
-}
-
-int
-qdio_synchronize(struct ccw_device *cdev, unsigned int flags,
-                unsigned int queue_number)
-{
-       int cc = 0;
-       struct qdio_q *q;
-       struct qdio_irq *irq_ptr;
-       void *ptr;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[15]="SyncXXXX";
-#endif
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -ENODEV;
-
-#ifdef CONFIG_QDIO_DEBUG
-       *((int*)(&dbf_text[4])) = irq_ptr->schid.sch_no;
-       QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN);
-       *((int*)(&dbf_text[0]))=flags;
-       *((int*)(&dbf_text[4]))=queue_number;
-       QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if (flags&QDIO_FLAG_SYNC_INPUT) {
-               q=irq_ptr->input_qs[queue_number];
-               if (!q)
-                       return -EINVAL;
-               if (!(irq_ptr->is_qebsm))
-                       cc = do_siga_sync(q->schid, 0, q->mask);
-       } else if (flags&QDIO_FLAG_SYNC_OUTPUT) {
-               q=irq_ptr->output_qs[queue_number];
-               if (!q)
-                       return -EINVAL;
-               if (!(irq_ptr->is_qebsm))
-                       cc = do_siga_sync(q->schid, q->mask, 0);
-       } else 
-               return -EINVAL;
-
-       ptr=&cc;
-       if (cc)
-               QDIO_DBF_HEX3(0,trace,&ptr,sizeof(int));
-
-       return cc;
-}
-
-static int
-qdio_get_ssqd_information(struct subchannel_id *schid,
-                         struct qdio_chsc_ssqd **ssqd_area)
-{
-       int result;
-
-       QDIO_DBF_TEXT0(0, setup, "getssqd");
-       *ssqd_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC);
-       if (!ssqd_area) {
-               QDIO_PRINT_WARN("Could not get memory for chsc on sch x%x.\n",
-                               schid->sch_no);
-               return -ENOMEM;
-       }
-
-       (*ssqd_area)->request = (struct chsc_header) {
-               .length = 0x0010,
-               .code   = 0x0024,
-       };
-       (*ssqd_area)->first_sch = schid->sch_no;
-       (*ssqd_area)->last_sch = schid->sch_no;
-       (*ssqd_area)->ssid = schid->ssid;
-       result = chsc(*ssqd_area);
-
-       if (result) {
-               QDIO_PRINT_WARN("CHSC returned cc %i on sch 0.%x.%x.\n",
-                               result, schid->ssid, schid->sch_no);
-               goto out;
-       }
-
-       if ((*ssqd_area)->response.code != QDIO_CHSC_RESPONSE_CODE_OK) {
-               QDIO_PRINT_WARN("CHSC response is 0x%x on sch 0.%x.%x.\n",
-                               (*ssqd_area)->response.code,
-                               schid->ssid, schid->sch_no);
-               goto out;
-       }
-       if (!((*ssqd_area)->flags & CHSC_FLAG_QDIO_CAPABILITY) ||
-           !((*ssqd_area)->flags & CHSC_FLAG_VALIDITY) ||
-           ((*ssqd_area)->sch != schid->sch_no)) {
-               QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \
-                               "using all SIGAs.\n",
-                               schid->ssid, schid->sch_no);
-               goto out;
-       }
-       return 0;
-out:
-       return -EINVAL;
-}
-
-int
-qdio_get_ssqd_pct(struct ccw_device *cdev)
-{
-       struct qdio_chsc_ssqd *ssqd_area;
-       struct subchannel_id schid;
-       char dbf_text[15];
-       int rc;
-       int pct = 0;
-
-       QDIO_DBF_TEXT0(0, setup, "getpct");
-       schid = ccw_device_get_subchannel_id(cdev);
-       rc = qdio_get_ssqd_information(&schid, &ssqd_area);
-       if (!rc)
-               pct = (int)ssqd_area->pct;
-       if (rc != -ENOMEM)
-               mempool_free(ssqd_area, qdio_mempool_scssc);
-       sprintf(dbf_text, "pct: %d", pct);
-       QDIO_DBF_TEXT2(0, setup, dbf_text);
-       return pct;
-}
-EXPORT_SYMBOL(qdio_get_ssqd_pct);
-
-static void
-qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned long token)
-{
-       struct qdio_q *q;
-       int i;
-       unsigned int count, start_buf;
-       char dbf_text[15];
-
-       /*check if QEBSM is disabled */
-       if (!(irq_ptr->is_qebsm) || !(irq_ptr->qdioac & 0x01)) {
-               irq_ptr->is_qebsm  = 0;
-               irq_ptr->sch_token = 0;
-               irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM;
-               QDIO_DBF_TEXT0(0,setup,"noV=V");
-               return;
-       }
-       irq_ptr->sch_token = token;
-       /*input queue*/
-       for (i = 0; i < irq_ptr->no_input_qs;i++) {
-               q = irq_ptr->input_qs[i];
-               count = QDIO_MAX_BUFFERS_PER_Q;
-               start_buf = 0;
-               set_slsb(q, &start_buf, SLSB_P_INPUT_NOT_INIT, &count);
-       }
-       sprintf(dbf_text,"V=V:%2x",irq_ptr->is_qebsm);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       sprintf(dbf_text,"%8lx",irq_ptr->sch_token);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       /*output queue*/
-       for (i = 0; i < irq_ptr->no_output_qs; i++) {
-               q = irq_ptr->output_qs[i];
-               count = QDIO_MAX_BUFFERS_PER_Q;
-               start_buf = 0;
-               set_slsb(q, &start_buf, SLSB_P_OUTPUT_NOT_INIT, &count);
-       }
-}
-
-static void
-qdio_get_ssqd_siga(struct qdio_irq *irq_ptr)
-{
-       int rc;
-       struct qdio_chsc_ssqd *ssqd_area;
-
-       QDIO_DBF_TEXT0(0,setup,"getssqd");
-       irq_ptr->qdioac = 0;
-       rc = qdio_get_ssqd_information(&irq_ptr->schid, &ssqd_area);
-       if (rc) {
-               QDIO_PRINT_WARN("using all SIGAs for sch x%x.n",
-                       irq_ptr->schid.sch_no);
-               irq_ptr->qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY |
-                                 CHSC_FLAG_SIGA_OUTPUT_NECESSARY |
-                                 CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
-               irq_ptr->is_qebsm = 0;
-       } else
-               irq_ptr->qdioac = ssqd_area->qdioac1;
-
-       qdio_check_subchannel_qebsm(irq_ptr, ssqd_area->sch_token);
-       if (rc != -ENOMEM)
-               mempool_free(ssqd_area, qdio_mempool_scssc);
-}
-
-static unsigned int
-tiqdio_check_chsc_availability(void)
-{
-       char dbf_text[15];
-
-       /* Check for bit 41. */
-       if (!css_general_characteristics.aif) {
-               QDIO_PRINT_WARN("Adapter interruption facility not " \
-                               "installed.\n");
-               return -ENOENT;
-       }
-
-       /* Check for bits 107 and 108. */
-       if (!css_chsc_characteristics.scssc ||
-           !css_chsc_characteristics.scsscf) {
-               QDIO_PRINT_WARN("Set Chan Subsys. Char. & Fast-CHSCs " \
-                               "not available.\n");
-               return -ENOENT;
-       }
-
-       /* Check for OSA/FCP thin interrupts (bit 67). */
-       hydra_thinints = css_general_characteristics.aif_osa;
-       sprintf(dbf_text,"hydrati%1x", hydra_thinints);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-
-#ifdef CONFIG_64BIT
-       /* Check for QEBSM support in general (bit 58). */
-       is_passthrough = css_general_characteristics.qebsm;
-#endif
-       sprintf(dbf_text,"cssQBS:%1x", is_passthrough);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-
-       /* Check for aif time delay disablement fac (bit 56). If installed,
-        * omit svs even under lpar (good point by rick again) */
-       omit_svs = css_general_characteristics.aif_tdd;
-       sprintf(dbf_text,"omitsvs%1x", omit_svs);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       return 0;
-}
-
-
-static unsigned int
-tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero)
-{
-       unsigned long real_addr_local_summary_bit;
-       unsigned long real_addr_dev_st_chg_ind;
-       void *ptr;
-       char dbf_text[15];
-
-       unsigned int resp_code;
-       int result;
-
-       struct {
-               struct chsc_header request;
-               u16 operation_code;
-               u16 reserved1;
-               u32 reserved2;
-               u32 reserved3;
-               u64 summary_indicator_addr;
-               u64 subchannel_indicator_addr;
-               u32 ks:4;
-               u32 kc:4;
-               u32 reserved4:21;
-               u32 isc:3;
-               u32 word_with_d_bit;
-               /* set to 0x10000000 to enable
-                * time delay disablement facility */
-               u32 reserved5;
-               struct subchannel_id schid;
-               u32 reserved6[1004];
-               struct chsc_header response;
-               u32 reserved7;
-       } *scssc_area;
-
-       if (!irq_ptr->is_thinint_irq)
-               return -ENODEV;
-
-       if (reset_to_zero) {
-               real_addr_local_summary_bit=0;
-               real_addr_dev_st_chg_ind=0;
-       } else {
-               real_addr_local_summary_bit=
-                       virt_to_phys((volatile void *)tiqdio_ind);
-               real_addr_dev_st_chg_ind=
-                       virt_to_phys((volatile void *)irq_ptr->dev_st_chg_ind);
-       }
-
-       scssc_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC);
-       if (!scssc_area) {
-               QDIO_PRINT_WARN("No memory for setting indicators on " \
-                               "subchannel 0.%x.%x.\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-               return -ENOMEM;
-       }
-       scssc_area->request = (struct chsc_header) {
-               .length = 0x0fe0,
-               .code   = 0x0021,
-       };
-       scssc_area->operation_code = 0;
-
-        scssc_area->summary_indicator_addr = real_addr_local_summary_bit;
-       scssc_area->subchannel_indicator_addr = real_addr_dev_st_chg_ind;
-       scssc_area->ks = QDIO_STORAGE_KEY;
-       scssc_area->kc = QDIO_STORAGE_KEY;
-       scssc_area->isc = TIQDIO_THININT_ISC;
-       scssc_area->schid = irq_ptr->schid;
-       /* enables the time delay disablement facility. Don't care
-        * whether it is really there (i.e. we haven't checked for
-        * it) */
-       if (css_general_characteristics.aif_tdd)
-               scssc_area->word_with_d_bit = 0x10000000;
-       else
-               QDIO_PRINT_WARN("Time delay disablement facility " \
-                               "not available\n");
-
-       result = chsc(scssc_area);
-       if (result) {
-               QDIO_PRINT_WARN("could not set indicators on irq 0.%x.%x, " \
-                               "cc=%i.\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no,result);
-               result = -EIO;
-               goto out;
-       }
-
-       resp_code = scssc_area->response.code;
-       if (resp_code!=QDIO_CHSC_RESPONSE_CODE_OK) {
-               QDIO_PRINT_WARN("response upon setting indicators " \
-                               "is 0x%x.\n",resp_code);
-               sprintf(dbf_text,"sidR%4x",resp_code);
-               QDIO_DBF_TEXT1(0,trace,dbf_text);
-               QDIO_DBF_TEXT1(0,setup,dbf_text);
-               ptr=&scssc_area->response;
-               QDIO_DBF_HEX2(1,setup,&ptr,QDIO_DBF_SETUP_LEN);
-               result = -EIO;
-               goto out;
-       }
-
-       QDIO_DBF_TEXT2(0,setup,"setscind");
-       QDIO_DBF_HEX2(0,setup,&real_addr_local_summary_bit,
-                     sizeof(unsigned long));
-       QDIO_DBF_HEX2(0,setup,&real_addr_dev_st_chg_ind,sizeof(unsigned long));
-       result = 0;
-out:
-       mempool_free(scssc_area, qdio_mempool_scssc);
-       return result;
-
-}
-
-static unsigned int
-tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target)
-{
-       unsigned int resp_code;
-       int result;
-       void *ptr;
-       char dbf_text[15];
-
-       struct {
-               struct chsc_header request;
-               u16 operation_code;
-               u16 reserved1;
-               u32 reserved2;
-               u32 reserved3;
-               u32 reserved4[2];
-               u32 delay_target;
-               u32 reserved5[1009];
-               struct chsc_header response;
-               u32 reserved6;
-       } *scsscf_area;
-
-       if (!irq_ptr->is_thinint_irq)
-               return -ENODEV;
-
-       scsscf_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC);
-       if (!scsscf_area) {
-               QDIO_PRINT_WARN("No memory for setting delay target on " \
-                               "subchannel 0.%x.%x.\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-               return -ENOMEM;
-       }
-       scsscf_area->request = (struct chsc_header) {
-               .length = 0x0fe0,
-               .code   = 0x1027,
-       };
-
-       scsscf_area->delay_target = delay_target<<16;
-
-       result=chsc(scsscf_area);
-       if (result) {
-               QDIO_PRINT_WARN("could not set delay target on irq 0.%x.%x, " \
-                               "cc=%i. Continuing.\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
-                               result);
-               result = -EIO;
-               goto out;
-       }
-
-       resp_code = scsscf_area->response.code;
-       if (resp_code!=QDIO_CHSC_RESPONSE_CODE_OK) {
-               QDIO_PRINT_WARN("response upon setting delay target " \
-                               "is 0x%x. Continuing.\n",resp_code);
-               sprintf(dbf_text,"sdtR%4x",resp_code);
-               QDIO_DBF_TEXT1(0,trace,dbf_text);
-               QDIO_DBF_TEXT1(0,setup,dbf_text);
-               ptr=&scsscf_area->response;
-               QDIO_DBF_HEX2(1,trace,&ptr,QDIO_DBF_TRACE_LEN);
-       }
-       QDIO_DBF_TEXT2(0,trace,"delytrgt");
-       QDIO_DBF_HEX2(0,trace,&delay_target,sizeof(unsigned long));
-       result = 0; /* not critical */
-out:
-       mempool_free(scsscf_area, qdio_mempool_scssc);
-       return result;
-}
-
-int
-qdio_cleanup(struct ccw_device *cdev, int how)
-{
-       struct qdio_irq *irq_ptr;
-       char dbf_text[15];
-       int rc;
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -ENODEV;
-
-       sprintf(dbf_text,"qcln%4x",irq_ptr->schid.sch_no);
-       QDIO_DBF_TEXT1(0,trace,dbf_text);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-
-       rc = qdio_shutdown(cdev, how);
-       if ((rc == 0) || (rc == -EINPROGRESS))
-               rc = qdio_free(cdev);
-       return rc;
-}
-
-int
-qdio_shutdown(struct ccw_device *cdev, int how)
-{
-       struct qdio_irq *irq_ptr;
-       int i;
-       int result = 0;
-       int rc;
-       unsigned long flags;
-       int timeout;
-       char dbf_text[15];
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -ENODEV;
-
-       down(&irq_ptr->setting_up_sema);
-
-       sprintf(dbf_text,"qsqs%4x",irq_ptr->schid.sch_no);
-       QDIO_DBF_TEXT1(0,trace,dbf_text);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-
-       /* mark all qs as uninteresting */
-       for (i=0;i<irq_ptr->no_input_qs;i++)
-               atomic_set(&irq_ptr->input_qs[i]->is_in_shutdown,1);
-
-       for (i=0;i<irq_ptr->no_output_qs;i++)
-               atomic_set(&irq_ptr->output_qs[i]->is_in_shutdown,1);
-
-       tasklet_kill(&tiqdio_tasklet);
-
-       for (i=0;i<irq_ptr->no_input_qs;i++) {
-               qdio_unmark_q(irq_ptr->input_qs[i]);
-               tasklet_kill(&irq_ptr->input_qs[i]->tasklet);
-               wait_event_interruptible_timeout(cdev->private->wait_q,
-                                                !atomic_read(&irq_ptr->
-                                                             input_qs[i]->
-                                                             use_count),
-                                                QDIO_NO_USE_COUNT_TIMEOUT);
-               if (atomic_read(&irq_ptr->input_qs[i]->use_count))
-                       result=-EINPROGRESS;
-       }
-
-       for (i=0;i<irq_ptr->no_output_qs;i++) {
-               tasklet_kill(&irq_ptr->output_qs[i]->tasklet);
-               del_timer(&irq_ptr->output_qs[i]->timer);
-               wait_event_interruptible_timeout(cdev->private->wait_q,
-                                                !atomic_read(&irq_ptr->
-                                                             output_qs[i]->
-                                                             use_count),
-                                                QDIO_NO_USE_COUNT_TIMEOUT);
-               if (atomic_read(&irq_ptr->output_qs[i]->use_count))
-                       result=-EINPROGRESS;
-       }
-
-       /* cleanup subchannel */
-       spin_lock_irqsave(get_ccwdev_lock(cdev),flags);
-       if (how&QDIO_FLAG_CLEANUP_USING_CLEAR) {
-               rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
-               timeout=QDIO_CLEANUP_CLEAR_TIMEOUT;
-       } else if (how&QDIO_FLAG_CLEANUP_USING_HALT) {
-               rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP);
-               timeout=QDIO_CLEANUP_HALT_TIMEOUT;
-       } else { /* default behaviour */
-               rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP);
-               timeout=QDIO_CLEANUP_HALT_TIMEOUT;
-       }
-       if (rc == -ENODEV) {
-               /* No need to wait for device no longer present. */
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
-               spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
-       } else if (((void *)cdev->handler != (void *)qdio_handler) && rc == 0) {
-               /*
-                * Whoever put another handler there, has to cope with the
-                * interrupt theirself. Might happen if qdio_shutdown was
-                * called on already shutdown queues, but this shouldn't have
-                * bad side effects.
-                */
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
-               spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
-       } else if (rc == 0) {
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
-               spin_unlock_irqrestore(get_ccwdev_lock(cdev),flags);
-
-               wait_event_interruptible_timeout(cdev->private->wait_q,
-                       irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
-                       irq_ptr->state == QDIO_IRQ_STATE_ERR,
-                       timeout);
-       } else {
-               QDIO_PRINT_INFO("ccw_device_{halt,clear} returned %d for "
-                               "device %s\n", result, cdev->dev.bus_id);
-               spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
-               result = rc;
-               goto out;
-       }
-       if (irq_ptr->is_thinint_irq) {
-               qdio_put_indicator((__u32*)irq_ptr->dev_st_chg_ind);
-               tiqdio_set_subchannel_ind(irq_ptr,1); 
-                /* reset adapter interrupt indicators */
-       }
-
-       /* exchange int handlers, if necessary */
-       if ((void*)cdev->handler == (void*)qdio_handler)
-               cdev->handler=irq_ptr->original_int_handler;
-
-       /* Ignore errors. */
-       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
-out:
-       up(&irq_ptr->setting_up_sema);
-       return result;
-}
-
-int
-qdio_free(struct ccw_device *cdev)
-{
-       struct qdio_irq *irq_ptr;
-       char dbf_text[15];
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -ENODEV;
-
-       down(&irq_ptr->setting_up_sema);
-
-       sprintf(dbf_text,"qfqs%4x",irq_ptr->schid.sch_no);
-       QDIO_DBF_TEXT1(0,trace,dbf_text);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-
-       cdev->private->qdio_data = NULL;
-
-       up(&irq_ptr->setting_up_sema);
-
-       qdio_release_irq_memory(irq_ptr);
-       module_put(THIS_MODULE);
-       return 0;
-}
-
-static void
-qdio_allocate_do_dbf(struct qdio_initialize *init_data)
-{
-       char dbf_text[20]; /* if a printf printed out more than 8 chars */
-
-       sprintf(dbf_text,"qfmt:%x",init_data->q_format);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_HEX0(0,setup,init_data->adapter_name,8);
-       sprintf(dbf_text,"qpff%4x",init_data->qib_param_field_format);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_HEX0(0,setup,&init_data->qib_param_field,sizeof(char*));
-       QDIO_DBF_HEX0(0,setup,&init_data->input_slib_elements,sizeof(long*));
-       QDIO_DBF_HEX0(0,setup,&init_data->output_slib_elements,sizeof(long*));
-       sprintf(dbf_text,"miit%4x",init_data->min_input_threshold);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       sprintf(dbf_text,"mait%4x",init_data->max_input_threshold);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       sprintf(dbf_text,"miot%4x",init_data->min_output_threshold);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       sprintf(dbf_text,"maot%4x",init_data->max_output_threshold);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       sprintf(dbf_text,"niq:%4x",init_data->no_input_qs);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       sprintf(dbf_text,"noq:%4x",init_data->no_output_qs);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_HEX0(0,setup,&init_data->input_handler,sizeof(void*));
-       QDIO_DBF_HEX0(0,setup,&init_data->output_handler,sizeof(void*));
-       QDIO_DBF_HEX0(0,setup,&init_data->int_parm,sizeof(long));
-       QDIO_DBF_HEX0(0,setup,&init_data->flags,sizeof(long));
-       QDIO_DBF_HEX0(0,setup,&init_data->input_sbal_addr_array,sizeof(void*));
-       QDIO_DBF_HEX0(0,setup,&init_data->output_sbal_addr_array,sizeof(void*));
-}
-
-static void
-qdio_allocate_fill_input_desc(struct qdio_irq *irq_ptr, int i, int iqfmt)
-{
-       irq_ptr->input_qs[i]->is_iqdio_q = iqfmt;
-       irq_ptr->input_qs[i]->is_thinint_q = irq_ptr->is_thinint_irq;
-
-       irq_ptr->qdr->qdf0[i].sliba=(unsigned long)(irq_ptr->input_qs[i]->slib);
-
-       irq_ptr->qdr->qdf0[i].sla=(unsigned long)(irq_ptr->input_qs[i]->sl);
-
-       irq_ptr->qdr->qdf0[i].slsba=
-               (unsigned long)(&irq_ptr->input_qs[i]->slsb.acc.val[0]);
-
-       irq_ptr->qdr->qdf0[i].akey=QDIO_STORAGE_KEY;
-       irq_ptr->qdr->qdf0[i].bkey=QDIO_STORAGE_KEY;
-       irq_ptr->qdr->qdf0[i].ckey=QDIO_STORAGE_KEY;
-       irq_ptr->qdr->qdf0[i].dkey=QDIO_STORAGE_KEY;
-}
-
-static void
-qdio_allocate_fill_output_desc(struct qdio_irq *irq_ptr, int i,
-                              int j, int iqfmt)
-{
-       irq_ptr->output_qs[i]->is_iqdio_q = iqfmt;
-       irq_ptr->output_qs[i]->is_thinint_q = irq_ptr->is_thinint_irq;
-
-       irq_ptr->qdr->qdf0[i+j].sliba=(unsigned long)(irq_ptr->output_qs[i]->slib);
-
-       irq_ptr->qdr->qdf0[i+j].sla=(unsigned long)(irq_ptr->output_qs[i]->sl);
-
-       irq_ptr->qdr->qdf0[i+j].slsba=
-               (unsigned long)(&irq_ptr->output_qs[i]->slsb.acc.val[0]);
-
-       irq_ptr->qdr->qdf0[i+j].akey=QDIO_STORAGE_KEY;
-       irq_ptr->qdr->qdf0[i+j].bkey=QDIO_STORAGE_KEY;
-       irq_ptr->qdr->qdf0[i+j].ckey=QDIO_STORAGE_KEY;
-       irq_ptr->qdr->qdf0[i+j].dkey=QDIO_STORAGE_KEY;
-}
-
-
-static void
-qdio_initialize_set_siga_flags_input(struct qdio_irq *irq_ptr)
-{
-       int i;
-
-       for (i=0;i<irq_ptr->no_input_qs;i++) {
-               irq_ptr->input_qs[i]->siga_sync=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY;
-               irq_ptr->input_qs[i]->siga_in=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_INPUT_NECESSARY;
-               irq_ptr->input_qs[i]->siga_out=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_OUTPUT_NECESSARY;
-               irq_ptr->input_qs[i]->siga_sync_done_on_thinints=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS;
-               irq_ptr->input_qs[i]->hydra_gives_outbound_pcis=
-                       irq_ptr->hydra_gives_outbound_pcis;
-               irq_ptr->input_qs[i]->siga_sync_done_on_outb_tis=
-                       ((irq_ptr->qdioac&
-                         (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS|
-                          CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS))==
-                        (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS|
-                         CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS));
-
-       }
-}
-
-static void
-qdio_initialize_set_siga_flags_output(struct qdio_irq *irq_ptr)
-{
-       int i;
-
-       for (i=0;i<irq_ptr->no_output_qs;i++) {
-               irq_ptr->output_qs[i]->siga_sync=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY;
-               irq_ptr->output_qs[i]->siga_in=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_INPUT_NECESSARY;
-               irq_ptr->output_qs[i]->siga_out=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_OUTPUT_NECESSARY;
-               irq_ptr->output_qs[i]->siga_sync_done_on_thinints=
-                       irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS;
-               irq_ptr->output_qs[i]->hydra_gives_outbound_pcis=
-                       irq_ptr->hydra_gives_outbound_pcis;
-               irq_ptr->output_qs[i]->siga_sync_done_on_outb_tis=
-                       ((irq_ptr->qdioac&
-                         (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS|
-                          CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS))==
-                        (CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS|
-                         CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS));
-
-       }
-}
-
-static int
-qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat,
-                                   int dstat)
-{
-       char dbf_text[15];
-       struct qdio_irq *irq_ptr;
-
-       irq_ptr = cdev->private->qdio_data;
-
-       if (cstat || (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END))) {
-               sprintf(dbf_text,"ick1%4x",irq_ptr->schid.sch_no);
-               QDIO_DBF_TEXT2(1,trace,dbf_text);
-               QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int));
-               QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int));
-               QDIO_PRINT_ERR("received check condition on establish " \
-                              "queues on irq 0.%x.%x (cs=x%x, ds=x%x).\n",
-                              irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
-                              cstat,dstat);
-               qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ERR);
-       }
-       
-       if (!(dstat & DEV_STAT_DEV_END)) {
-               QDIO_DBF_TEXT2(1,setup,"eq:no de");
-               QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat));
-               QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat));
-               QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: didn't get "
-                              "device end: dstat=%02x, cstat=%02x\n",
-                              irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
-                              dstat, cstat);
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-               return 1;
-       }
-
-       if (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END)) {
-               QDIO_DBF_TEXT2(1,setup,"eq:badio");
-               QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat));
-               QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat));
-               QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: got "
-                              "the following devstat: dstat=%02x, "
-                              "cstat=%02x\n", irq_ptr->schid.ssid,
-                              irq_ptr->schid.sch_no, dstat, cstat);
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-               return 1;
-       }
-       return 0;
-}
-
-static void
-qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat)
-{
-       struct qdio_irq *irq_ptr;
-       char dbf_text[15];
-
-       irq_ptr = cdev->private->qdio_data;
-
-       sprintf(dbf_text,"qehi%4x",cdev->private->schid.sch_no);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_TEXT0(0,trace,dbf_text);
-
-       if (qdio_establish_irq_check_for_errors(cdev, cstat, dstat))
-               return;
-
-       qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ESTABLISHED);
-}
-
-int
-qdio_initialize(struct qdio_initialize *init_data)
-{
-       int rc;
-       char dbf_text[15];
-
-       sprintf(dbf_text,"qini%4x",init_data->cdev->private->schid.sch_no);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_TEXT0(0,trace,dbf_text);
-
-       rc = qdio_allocate(init_data);
-       if (rc == 0) {
-               rc = qdio_establish(init_data);
-               if (rc != 0)
-                       qdio_free(init_data->cdev);
-       }
-
-       return rc;
-}
-
-
-int
-qdio_allocate(struct qdio_initialize *init_data)
-{
-       struct qdio_irq *irq_ptr;
-       char dbf_text[15];
-
-       sprintf(dbf_text,"qalc%4x",init_data->cdev->private->schid.sch_no);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_TEXT0(0,trace,dbf_text);
-       if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) ||
-            (init_data->no_output_qs>QDIO_MAX_QUEUES_PER_IRQ) ||
-            ((init_data->no_input_qs) && (!init_data->input_handler)) ||
-            ((init_data->no_output_qs) && (!init_data->output_handler)) )
-               return -EINVAL;
-
-       if (!init_data->input_sbal_addr_array)
-               return -EINVAL;
-
-       if (!init_data->output_sbal_addr_array)
-               return -EINVAL;
-
-       qdio_allocate_do_dbf(init_data);
-
-       /* create irq */
-       irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
-
-       QDIO_DBF_TEXT0(0,setup,"irq_ptr:");
-       QDIO_DBF_HEX0(0,setup,&irq_ptr,sizeof(void*));
-
-       if (!irq_ptr) {
-               QDIO_PRINT_ERR("allocation of irq_ptr failed!\n");
-               return -ENOMEM;
-       }
-
-       init_MUTEX(&irq_ptr->setting_up_sema);
-
-       /* QDR must be in DMA area since CCW data address is only 32 bit */
-       irq_ptr->qdr = (struct qdr *) __get_free_page(GFP_KERNEL | GFP_DMA);
-       if (!(irq_ptr->qdr)) {
-               free_page((unsigned long) irq_ptr);
-               QDIO_PRINT_ERR("allocation of irq_ptr->qdr failed!\n");
-               return -ENOMEM;
-               }
-       QDIO_DBF_TEXT0(0,setup,"qdr:");
-       QDIO_DBF_HEX0(0,setup,&irq_ptr->qdr,sizeof(void*));
-
-       if (qdio_alloc_qs(irq_ptr,
-                                 init_data->no_input_qs,
-                         init_data->no_output_qs)) {
-               QDIO_PRINT_ERR("queue allocation failed!\n");
-               qdio_release_irq_memory(irq_ptr);
-               return -ENOMEM;
-       }
-
-       init_data->cdev->private->qdio_data = irq_ptr;
-
-       qdio_set_state(irq_ptr,QDIO_IRQ_STATE_INACTIVE);
-
-       return 0;
-}
-
-static int qdio_fill_irq(struct qdio_initialize *init_data)
-{
-       int i;
-       char dbf_text[15];
-       struct ciw *ciw;
-       int is_iqdio;
-       struct qdio_irq *irq_ptr;
-
-       irq_ptr = init_data->cdev->private->qdio_data;
-
-       memset(irq_ptr,0,((char*)&irq_ptr->qdr)-((char*)irq_ptr));
-
-        /* wipes qib.ac, required by ar7063 */
-       memset(irq_ptr->qdr,0,sizeof(struct qdr));
-
-       irq_ptr->int_parm=init_data->int_parm;
-
-       irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev);
-       irq_ptr->no_input_qs=init_data->no_input_qs;
-       irq_ptr->no_output_qs=init_data->no_output_qs;
-
-       if (init_data->q_format==QDIO_IQDIO_QFMT) {
-               irq_ptr->is_iqdio_irq=1;
-               irq_ptr->is_thinint_irq=1;
-       } else {
-               irq_ptr->is_iqdio_irq=0;
-               irq_ptr->is_thinint_irq=hydra_thinints;
-       }
-       sprintf(dbf_text,"is_i_t%1x%1x",
-               irq_ptr->is_iqdio_irq,irq_ptr->is_thinint_irq);
-       QDIO_DBF_TEXT2(0,setup,dbf_text);
-
-       if (irq_ptr->is_thinint_irq) {
-               irq_ptr->dev_st_chg_ind = qdio_get_indicator();
-               QDIO_DBF_HEX1(0,setup,&irq_ptr->dev_st_chg_ind,sizeof(void*));
-               if (!irq_ptr->dev_st_chg_ind) {
-                       QDIO_PRINT_WARN("no indicator location available " \
-                                       "for irq 0.%x.%x\n",
-                                       irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-                       qdio_release_irq_memory(irq_ptr);
-                       return -ENOBUFS;
-               }
-       }
-
-       /* defaults */
-       irq_ptr->equeue.cmd=DEFAULT_ESTABLISH_QS_CMD;
-       irq_ptr->equeue.count=DEFAULT_ESTABLISH_QS_COUNT;
-       irq_ptr->aqueue.cmd=DEFAULT_ACTIVATE_QS_CMD;
-       irq_ptr->aqueue.count=DEFAULT_ACTIVATE_QS_COUNT;
-
-       qdio_fill_qs(irq_ptr, init_data->cdev,
-                    init_data->no_input_qs,
-                    init_data->no_output_qs,
-                    init_data->input_handler,
-                    init_data->output_handler,init_data->int_parm,
-                    init_data->q_format,init_data->flags,
-                    init_data->input_sbal_addr_array,
-                    init_data->output_sbal_addr_array);
-
-       if (!try_module_get(THIS_MODULE)) {
-               QDIO_PRINT_CRIT("try_module_get() failed!\n");
-               qdio_release_irq_memory(irq_ptr);
-               return -EINVAL;
-       }
-
-       qdio_fill_thresholds(irq_ptr,init_data->no_input_qs,
-                            init_data->no_output_qs,
-                            init_data->min_input_threshold,
-                            init_data->max_input_threshold,
-                            init_data->min_output_threshold,
-                            init_data->max_output_threshold);
-
-       /* fill in qdr */
-       irq_ptr->qdr->qfmt=init_data->q_format;
-       irq_ptr->qdr->iqdcnt=init_data->no_input_qs;
-       irq_ptr->qdr->oqdcnt=init_data->no_output_qs;
-       irq_ptr->qdr->iqdsz=sizeof(struct qdesfmt0)/4; /* size in words */
-       irq_ptr->qdr->oqdsz=sizeof(struct qdesfmt0)/4;
-
-       irq_ptr->qdr->qiba=(unsigned long)&irq_ptr->qib;
-       irq_ptr->qdr->qkey=QDIO_STORAGE_KEY;
-
-       /* fill in qib */
-       irq_ptr->is_qebsm = is_passthrough;
-       if (irq_ptr->is_qebsm)
-               irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM;
-
-       irq_ptr->qib.qfmt=init_data->q_format;
-       if (init_data->no_input_qs)
-               irq_ptr->qib.isliba=(unsigned long)(irq_ptr->input_qs[0]->slib);
-       if (init_data->no_output_qs)
-               irq_ptr->qib.osliba=(unsigned long)(irq_ptr->output_qs[0]->slib);
-       memcpy(irq_ptr->qib.ebcnam,init_data->adapter_name,8);
-
-       qdio_set_impl_params(irq_ptr,init_data->qib_param_field_format,
-                            init_data->qib_param_field,
-                            init_data->no_input_qs,
-                            init_data->no_output_qs,
-                            init_data->input_slib_elements,
-                            init_data->output_slib_elements);
-
-       /* first input descriptors, then output descriptors */
-       is_iqdio = (init_data->q_format == QDIO_IQDIO_QFMT) ? 1 : 0;
-       for (i=0;i<init_data->no_input_qs;i++)
-               qdio_allocate_fill_input_desc(irq_ptr, i, is_iqdio);
-
-       for (i=0;i<init_data->no_output_qs;i++)
-               qdio_allocate_fill_output_desc(irq_ptr, i,
-                                              init_data->no_input_qs,
-                                              is_iqdio);
-
-       /* qdr, qib, sls, slsbs, slibs, sbales filled. */
-
-       /* get qdio commands */
-       ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE);
-       if (!ciw) {
-               QDIO_DBF_TEXT2(1,setup,"no eq");
-               QDIO_PRINT_INFO("No equeue CIW found for QDIO commands. "
-                               "Trying to use default.\n");
-       } else
-               irq_ptr->equeue = *ciw;
-       ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE);
-       if (!ciw) {
-               QDIO_DBF_TEXT2(1,setup,"no aq");
-               QDIO_PRINT_INFO("No aqueue CIW found for QDIO commands. "
-                               "Trying to use default.\n");
-       } else
-               irq_ptr->aqueue = *ciw;
-
-       /* Set new interrupt handler. */
-       irq_ptr->original_int_handler = init_data->cdev->handler;
-       init_data->cdev->handler = qdio_handler;
-
-       return 0;
-}
-
-int
-qdio_establish(struct qdio_initialize *init_data)
-{
-       struct qdio_irq *irq_ptr;
-       unsigned long saveflags;
-       int result, result2;
-       struct ccw_device *cdev;
-       char dbf_text[20];
-
-       cdev=init_data->cdev;
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -EINVAL;
-
-       if (cdev->private->state != DEV_STATE_ONLINE)
-               return -EINVAL;
-       
-       down(&irq_ptr->setting_up_sema);
-
-       qdio_fill_irq(init_data);
-
-       /* the thinint CHSC stuff */
-       if (irq_ptr->is_thinint_irq) {
-
-               result = tiqdio_set_subchannel_ind(irq_ptr,0);
-               if (result) {
-                       up(&irq_ptr->setting_up_sema);
-                       qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
-                       return result;
-               }
-               tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET);
-       }
-
-       sprintf(dbf_text,"qest%4x",cdev->private->schid.sch_no);
-       QDIO_DBF_TEXT0(0,setup,dbf_text);
-       QDIO_DBF_TEXT0(0,trace,dbf_text);
-
-       /* establish q */
-       irq_ptr->ccw.cmd_code=irq_ptr->equeue.cmd;
-       irq_ptr->ccw.flags=CCW_FLAG_SLI;
-       irq_ptr->ccw.count=irq_ptr->equeue.count;
-       irq_ptr->ccw.cda=QDIO_GET_ADDR(irq_ptr->qdr);
-
-       spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags);
-
-       ccw_device_set_options_mask(cdev, 0);
-       result = ccw_device_start(cdev, &irq_ptr->ccw,
-                               QDIO_DOING_ESTABLISH, 0, 0);
-       if (result) {
-               result2 = ccw_device_start(cdev, &irq_ptr->ccw,
-                                       QDIO_DOING_ESTABLISH, 0, 0);
-               sprintf(dbf_text,"eq:io%4x",result);
-               QDIO_DBF_TEXT2(1,setup,dbf_text);
-               if (result2) {
-                       sprintf(dbf_text,"eq:io%4x",result);
-                       QDIO_DBF_TEXT2(1,setup,dbf_text);
-               }
-               QDIO_PRINT_WARN("establish queues on irq 0.%x.%04x: do_IO " \
-                               "returned %i, next try returned %i\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
-                               result, result2);
-               result=result2;
-       }
-
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags);
-
-       if (result) {
-               up(&irq_ptr->setting_up_sema);
-               qdio_shutdown(cdev,QDIO_FLAG_CLEANUP_USING_CLEAR);
-               return result;
-       }
-       
-       wait_event_interruptible_timeout(cdev->private->wait_q,
-               irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED ||
-               irq_ptr->state == QDIO_IRQ_STATE_ERR,
-               QDIO_ESTABLISH_TIMEOUT);
-
-       if (irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED)
-               result = 0;
-       else {
-               up(&irq_ptr->setting_up_sema);
-               qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
-               return -EIO;
-       }
-
-       qdio_get_ssqd_siga(irq_ptr);
-       /* if this gets set once, we're running under VM and can omit SVSes */
-       if (irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY)
-               omit_svs=1;
-
-       sprintf(dbf_text,"qdioac%2x",irq_ptr->qdioac);
-       QDIO_DBF_TEXT2(0,setup,dbf_text);
-
-       sprintf(dbf_text,"qib ac%2x",irq_ptr->qib.ac);
-       QDIO_DBF_TEXT2(0,setup,dbf_text);
-
-       irq_ptr->hydra_gives_outbound_pcis=
-               irq_ptr->qib.ac&QIB_AC_OUTBOUND_PCI_SUPPORTED;
-       irq_ptr->sync_done_on_outb_pcis=
-               irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS;
-
-       qdio_initialize_set_siga_flags_input(irq_ptr);
-       qdio_initialize_set_siga_flags_output(irq_ptr);
-
-       up(&irq_ptr->setting_up_sema);
-
-       return result;
-       
-}
-
-int
-qdio_activate(struct ccw_device *cdev, int flags)
-{
-       struct qdio_irq *irq_ptr;
-       int i,result=0,result2;
-       unsigned long saveflags;
-       char dbf_text[20]; /* see qdio_initialize */
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -ENODEV;
-
-       if (cdev->private->state != DEV_STATE_ONLINE)
-               return -EINVAL;
-
-       down(&irq_ptr->setting_up_sema);
-       if (irq_ptr->state==QDIO_IRQ_STATE_INACTIVE) {
-               result=-EBUSY;
-               goto out;
-       }
-
-       sprintf(dbf_text,"qact%4x", irq_ptr->schid.sch_no);
-       QDIO_DBF_TEXT2(0,setup,dbf_text);
-       QDIO_DBF_TEXT2(0,trace,dbf_text);
-
-       /* activate q */
-       irq_ptr->ccw.cmd_code=irq_ptr->aqueue.cmd;
-       irq_ptr->ccw.flags=CCW_FLAG_SLI;
-       irq_ptr->ccw.count=irq_ptr->aqueue.count;
-       irq_ptr->ccw.cda=QDIO_GET_ADDR(0);
-
-       spin_lock_irqsave(get_ccwdev_lock(cdev),saveflags);
-
-       ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
-       result=ccw_device_start(cdev,&irq_ptr->ccw,QDIO_DOING_ACTIVATE,
-                               0, DOIO_DENY_PREFETCH);
-       if (result) {
-               result2=ccw_device_start(cdev,&irq_ptr->ccw,
-                                        QDIO_DOING_ACTIVATE,0,0);
-               sprintf(dbf_text,"aq:io%4x",result);
-               QDIO_DBF_TEXT2(1,setup,dbf_text);
-               if (result2) {
-                       sprintf(dbf_text,"aq:io%4x",result);
-                       QDIO_DBF_TEXT2(1,setup,dbf_text);
-               }
-               QDIO_PRINT_WARN("activate queues on irq 0.%x.%04x: do_IO " \
-                               "returned %i, next try returned %i\n",
-                               irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
-                               result, result2);
-               result=result2;
-       }
-
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev),saveflags);
-       if (result)
-               goto out;
-
-       for (i=0;i<irq_ptr->no_input_qs;i++) {
-               if (irq_ptr->is_thinint_irq) {
-                       /* 
-                        * that way we know, that, if we will get interrupted
-                        * by tiqdio_inbound_processing, qdio_unmark_q will
-                        * not be called 
-                        */
-                       qdio_reserve_q(irq_ptr->input_qs[i]);
-                       qdio_mark_tiq(irq_ptr->input_qs[i]);
-                       qdio_release_q(irq_ptr->input_qs[i]);
-               }
-       }
-
-       if (flags&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT) {
-               for (i=0;i<irq_ptr->no_input_qs;i++) {
-                       irq_ptr->input_qs[i]->is_input_q|=
-                               QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT;
-               }
-       }
-
-       msleep(QDIO_ACTIVATE_TIMEOUT);
-       switch (irq_ptr->state) {
-       case QDIO_IRQ_STATE_STOPPED:
-       case QDIO_IRQ_STATE_ERR:
-               up(&irq_ptr->setting_up_sema);
-               qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
-               down(&irq_ptr->setting_up_sema);
-               result = -EIO;
-               break;
-       default:
-               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ACTIVE);
-               result = 0;
-       }
- out:
-       up(&irq_ptr->setting_up_sema);
-
-       return result;
-}
-
-/* buffers filled forwards again to make Rick happy */
-static void
-qdio_do_qdio_fill_input(struct qdio_q *q, unsigned int qidx,
-                       unsigned int count, struct qdio_buffer *buffers)
-{
-       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
-       int tmp = 0;
-
-       qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1);
-       if (irq->is_qebsm) {
-               while (count) {
-                       tmp = set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count);
-                       if (!tmp)
-                               return;
-               }
-               return;
-       }
-       for (;;) {
-               set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count);
-               count--;
-               if (!count) break;
-               qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1);
-       }
-}
-
-static void
-qdio_do_qdio_fill_output(struct qdio_q *q, unsigned int qidx,
-                        unsigned int count, struct qdio_buffer *buffers)
-{
-       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
-       int tmp = 0;
-
-       qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1);
-       if (irq->is_qebsm) {
-               while (count) {
-                       tmp = set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count);
-                       if (!tmp)
-                               return;
-               }
-               return;
-       }
-
-       for (;;) {
-               set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count);
-               count--;
-               if (!count) break;
-               qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1);
-       }
-}
-
-static void
-do_qdio_handle_inbound(struct qdio_q *q, unsigned int callflags,
-                      unsigned int qidx, unsigned int count,
-                      struct qdio_buffer *buffers)
-{
-       int used_elements;
-
-        /* This is the inbound handling of queues */
-       used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count;
-       
-       qdio_do_qdio_fill_input(q,qidx,count,buffers);
-       
-       if ((used_elements+count==QDIO_MAX_BUFFERS_PER_Q)&&
-           (callflags&QDIO_FLAG_UNDER_INTERRUPT))
-               atomic_xchg(&q->polling,0);
-       
-       if (used_elements) 
-               return;
-       if (callflags&QDIO_FLAG_DONT_SIGA)
-               return;
-       if (q->siga_in) {
-               int result;
-               
-               result=qdio_siga_input(q);
-               if (result) {
-                       if (q->siga_error)
-                               q->error_status_flags|=
-                                       QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR;
-                       q->error_status_flags|=QDIO_STATUS_LOOK_FOR_ERROR;
-                       q->siga_error=result;
-               }
-       }
-               
-       qdio_mark_q(q);
-}
-
-static void
-do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags,
-                       unsigned int qidx, unsigned int count,
-                       struct qdio_buffer *buffers)
-{
-       int used_elements;
-       unsigned int cnt, start_buf;
-       unsigned char state = 0;
-       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
-
-       /* This is the outbound handling of queues */
-       qdio_do_qdio_fill_output(q,qidx,count,buffers);
-
-       used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count;
-
-       if (callflags&QDIO_FLAG_DONT_SIGA) {
-               qdio_perf_stat_inc(&perf_stats.outbound_cnt);
-               return;
-       }
-       if (callflags & QDIO_FLAG_PCI_OUT)
-               q->is_pci_out = 1;
-       else
-               q->is_pci_out = 0;
-       if (q->is_iqdio_q) {
-               /* one siga for every sbal */
-               while (count--)
-                       qdio_kick_outbound_q(q);
-                       
-               __qdio_outbound_processing(q);
-       } else {
-               /* under VM, we do a SIGA sync unconditionally */
-               SYNC_MEMORY;
-               else {
-                       /* 
-                        * w/o shadow queues (else branch of
-                        * SYNC_MEMORY :-/ ), we try to
-                        * fast-requeue buffers 
-                        */
-                       if (irq->is_qebsm) {
-                               cnt = 1;
-                               start_buf = ((qidx+QDIO_MAX_BUFFERS_PER_Q-1) &
-                                            (QDIO_MAX_BUFFERS_PER_Q-1));
-                               qdio_do_eqbs(q, &state, &start_buf, &cnt);
-                       } else
-                               state = q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1)
-                                       &(QDIO_MAX_BUFFERS_PER_Q-1) ];
-                        if (state != SLSB_CU_OUTPUT_PRIMED) {
-                               qdio_kick_outbound_q(q);
-                       } else {
-                               QDIO_DBF_TEXT3(0,trace, "fast-req");
-                               qdio_perf_stat_inc(&perf_stats.fast_reqs);
-                       }
-               }
-               /* 
-                * only marking the q could take too long,
-                * the upper layer module could do a lot of
-                * traffic in that time 
-                */
-               __qdio_outbound_processing(q);
-       }
-
-       qdio_perf_stat_inc(&perf_stats.outbound_cnt);
-}
-
-/* count must be 1 in iqdio */
-int
-do_QDIO(struct ccw_device *cdev,unsigned int callflags,
-       unsigned int queue_number, unsigned int qidx,
-       unsigned int count,struct qdio_buffer *buffers)
-{
-       struct qdio_irq *irq_ptr;
-#ifdef CONFIG_QDIO_DEBUG
-       char dbf_text[20];
-
-       sprintf(dbf_text,"doQD%04x",cdev->private->schid.sch_no);
-       QDIO_DBF_TEXT3(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if ( (qidx>QDIO_MAX_BUFFERS_PER_Q) ||
-            (count>QDIO_MAX_BUFFERS_PER_Q) ||
-            (queue_number>QDIO_MAX_QUEUES_PER_IRQ) )
-               return -EINVAL;
-
-       if (count==0)
-               return 0;
-
-       irq_ptr = cdev->private->qdio_data;
-       if (!irq_ptr)
-               return -ENODEV;
-
-#ifdef CONFIG_QDIO_DEBUG
-       if (callflags&QDIO_FLAG_SYNC_INPUT)
-               QDIO_DBF_HEX3(0,trace,&irq_ptr->input_qs[queue_number],
-                             sizeof(void*));
-       else
-               QDIO_DBF_HEX3(0,trace,&irq_ptr->output_qs[queue_number],
-                             sizeof(void*));
-       sprintf(dbf_text,"flag%04x",callflags);
-       QDIO_DBF_TEXT3(0,trace,dbf_text);
-       sprintf(dbf_text,"qi%02xct%02x",qidx,count);
-       QDIO_DBF_TEXT3(0,trace,dbf_text);
-#endif /* CONFIG_QDIO_DEBUG */
-
-       if (irq_ptr->state!=QDIO_IRQ_STATE_ACTIVE)
-               return -EBUSY;
-
-       if (callflags&QDIO_FLAG_SYNC_INPUT)
-               do_qdio_handle_inbound(irq_ptr->input_qs[queue_number],
-                                      callflags, qidx, count, buffers);
-       else if (callflags&QDIO_FLAG_SYNC_OUTPUT)
-               do_qdio_handle_outbound(irq_ptr->output_qs[queue_number],
-                                       callflags, qidx, count, buffers);
-       else {
-               QDIO_DBF_TEXT3(1,trace,"doQD:inv");
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int
-qdio_perf_procfile_read(char *buffer, char **buffer_location, off_t offset,
-                       int buffer_length, int *eof, void *data)
-{
-        int c=0;
-
-        /* we are always called with buffer_length=4k, so we all
-           deliver on the first read */
-        if (offset>0)
-               return 0;
-
-#define _OUTP_IT(x...) c+=sprintf(buffer+c,x)
-#ifdef CONFIG_64BIT
-       _OUTP_IT("Number of tasklet runs (total)                  : %li\n",
-                (long)atomic64_read(&perf_stats.tl_runs));
-       _OUTP_IT("Inbound tasklet runs      tried/retried         : %li/%li\n",
-                (long)atomic64_read(&perf_stats.inbound_tl_runs),
-                (long)atomic64_read(&perf_stats.inbound_tl_runs_resched));
-       _OUTP_IT("Inbound-thin tasklet runs tried/retried         : %li/%li\n",
-                (long)atomic64_read(&perf_stats.inbound_thin_tl_runs),
-                (long)atomic64_read(&perf_stats.inbound_thin_tl_runs_resched));
-       _OUTP_IT("Outbound tasklet runs     tried/retried         : %li/%li\n",
-                (long)atomic64_read(&perf_stats.outbound_tl_runs),
-                (long)atomic64_read(&perf_stats.outbound_tl_runs_resched));
-       _OUTP_IT("\n");
-       _OUTP_IT("Number of SIGA sync's issued                    : %li\n",
-                (long)atomic64_read(&perf_stats.siga_syncs));
-       _OUTP_IT("Number of SIGA in's issued                      : %li\n",
-                (long)atomic64_read(&perf_stats.siga_ins));
-       _OUTP_IT("Number of SIGA out's issued                     : %li\n",
-                (long)atomic64_read(&perf_stats.siga_outs));
-       _OUTP_IT("Number of PCIs caught                           : %li\n",
-                (long)atomic64_read(&perf_stats.pcis));
-       _OUTP_IT("Number of adapter interrupts caught             : %li\n",
-                (long)atomic64_read(&perf_stats.thinints));
-       _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA)  : %li\n",
-                (long)atomic64_read(&perf_stats.fast_reqs));
-       _OUTP_IT("\n");
-       _OUTP_IT("Number of inbound transfers                     : %li\n",
-                (long)atomic64_read(&perf_stats.inbound_cnt));
-       _OUTP_IT("Number of do_QDIOs outbound                     : %li\n",
-                (long)atomic64_read(&perf_stats.outbound_cnt));
-#else /* CONFIG_64BIT */
-       _OUTP_IT("Number of tasklet runs (total)                  : %i\n",
-                atomic_read(&perf_stats.tl_runs));
-       _OUTP_IT("Inbound tasklet runs      tried/retried         : %i/%i\n",
-                atomic_read(&perf_stats.inbound_tl_runs),
-                atomic_read(&perf_stats.inbound_tl_runs_resched));
-       _OUTP_IT("Inbound-thin tasklet runs tried/retried         : %i/%i\n",
-                atomic_read(&perf_stats.inbound_thin_tl_runs),
-                atomic_read(&perf_stats.inbound_thin_tl_runs_resched));
-       _OUTP_IT("Outbound tasklet runs     tried/retried         : %i/%i\n",
-                atomic_read(&perf_stats.outbound_tl_runs),
-                atomic_read(&perf_stats.outbound_tl_runs_resched));
-       _OUTP_IT("\n");
-       _OUTP_IT("Number of SIGA sync's issued                    : %i\n",
-                atomic_read(&perf_stats.siga_syncs));
-       _OUTP_IT("Number of SIGA in's issued                      : %i\n",
-                atomic_read(&perf_stats.siga_ins));
-       _OUTP_IT("Number of SIGA out's issued                     : %i\n",
-                atomic_read(&perf_stats.siga_outs));
-       _OUTP_IT("Number of PCIs caught                           : %i\n",
-                atomic_read(&perf_stats.pcis));
-       _OUTP_IT("Number of adapter interrupts caught             : %i\n",
-                atomic_read(&perf_stats.thinints));
-       _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA)  : %i\n",
-                atomic_read(&perf_stats.fast_reqs));
-       _OUTP_IT("\n");
-       _OUTP_IT("Number of inbound transfers                     : %i\n",
-                atomic_read(&perf_stats.inbound_cnt));
-       _OUTP_IT("Number of do_QDIOs outbound                     : %i\n",
-                atomic_read(&perf_stats.outbound_cnt));
-#endif /* CONFIG_64BIT */
-       _OUTP_IT("\n");
-
-        return c;
-}
-
-static struct proc_dir_entry *qdio_perf_proc_file;
-
-static void
-qdio_add_procfs_entry(void)
-{
-        proc_perf_file_registration=0;
-       qdio_perf_proc_file=create_proc_entry(QDIO_PERF,
-                                             S_IFREG|0444,NULL);
-       if (qdio_perf_proc_file) {
-               qdio_perf_proc_file->read_proc=&qdio_perf_procfile_read;
-       } else proc_perf_file_registration=-1;
-
-        if (proc_perf_file_registration)
-                QDIO_PRINT_WARN("was not able to register perf. " \
-                               "proc-file (%i).\n",
-                               proc_perf_file_registration);
-}
-
-static void
-qdio_remove_procfs_entry(void)
-{
-        if (!proc_perf_file_registration) /* means if it went ok earlier */
-               remove_proc_entry(QDIO_PERF,NULL);
-}
-
-/**
- * attributes in sysfs
- *****************************************************************************/
-
-static ssize_t
-qdio_performance_stats_show(struct bus_type *bus, char *buf)
-{
-       return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0);
-}
-
-static ssize_t
-qdio_performance_stats_store(struct bus_type *bus, const char *buf, size_t count)
-{
-       unsigned long i;
-       int ret;
-
-       ret = strict_strtoul(buf, 16, &i);
-       if (!ret && ((i == 0) || (i == 1))) {
-               if (i == qdio_performance_stats)
-                       return count;
-               qdio_performance_stats = i;
-               if (i==0) {
-                       /* reset perf. stat. info */
-#ifdef CONFIG_64BIT
-                       atomic64_set(&perf_stats.tl_runs, 0);
-                       atomic64_set(&perf_stats.outbound_tl_runs, 0);
-                       atomic64_set(&perf_stats.inbound_tl_runs, 0);
-                       atomic64_set(&perf_stats.inbound_tl_runs_resched, 0);
-                       atomic64_set(&perf_stats.inbound_thin_tl_runs, 0);
-                       atomic64_set(&perf_stats.inbound_thin_tl_runs_resched,
-                                    0);
-                       atomic64_set(&perf_stats.siga_outs, 0);
-                       atomic64_set(&perf_stats.siga_ins, 0);
-                       atomic64_set(&perf_stats.siga_syncs, 0);
-                       atomic64_set(&perf_stats.pcis, 0);
-                       atomic64_set(&perf_stats.thinints, 0);
-                       atomic64_set(&perf_stats.fast_reqs, 0);
-                       atomic64_set(&perf_stats.outbound_cnt, 0);
-                       atomic64_set(&perf_stats.inbound_cnt, 0);
-#else /* CONFIG_64BIT */
-                       atomic_set(&perf_stats.tl_runs, 0);
-                       atomic_set(&perf_stats.outbound_tl_runs, 0);
-                       atomic_set(&perf_stats.inbound_tl_runs, 0);
-                       atomic_set(&perf_stats.inbound_tl_runs_resched, 0);
-                       atomic_set(&perf_stats.inbound_thin_tl_runs, 0);
-                       atomic_set(&perf_stats.inbound_thin_tl_runs_resched, 0);
-                       atomic_set(&perf_stats.siga_outs, 0);
-                       atomic_set(&perf_stats.siga_ins, 0);
-                       atomic_set(&perf_stats.siga_syncs, 0);
-                       atomic_set(&perf_stats.pcis, 0);
-                       atomic_set(&perf_stats.thinints, 0);
-                       atomic_set(&perf_stats.fast_reqs, 0);
-                       atomic_set(&perf_stats.outbound_cnt, 0);
-                       atomic_set(&perf_stats.inbound_cnt, 0);
-#endif /* CONFIG_64BIT */
-               }
-       } else {
-               QDIO_PRINT_ERR("QDIO performance_stats: write 0 or 1 to this file!\n");
-               return -EINVAL;
-       }
-       return count;
-}
-
-static BUS_ATTR(qdio_performance_stats, 0644, qdio_performance_stats_show,
-                       qdio_performance_stats_store);
-
-static void
-tiqdio_register_thinints(void)
-{
-       char dbf_text[20];
-
-       tiqdio_ind =
-               s390_register_adapter_interrupt(&tiqdio_thinint_handler, NULL,
-                                               TIQDIO_THININT_ISC);
-       if (IS_ERR(tiqdio_ind)) {
-               sprintf(dbf_text, "regthn%lx", PTR_ERR(tiqdio_ind));
-               QDIO_DBF_TEXT0(0,setup,dbf_text);
-               QDIO_PRINT_ERR("failed to register adapter handler " \
-                              "(rc=%li).\nAdapter interrupts might " \
-                              "not work. Continuing.\n",
-                              PTR_ERR(tiqdio_ind));
-               tiqdio_ind = NULL;
-       }
-}
-
-static void
-tiqdio_unregister_thinints(void)
-{
-       if (tiqdio_ind)
-               s390_unregister_adapter_interrupt(tiqdio_ind,
-                                                 TIQDIO_THININT_ISC);
-}
-
-static int
-qdio_get_qdio_memory(void)
-{
-       int i;
-       indicator_used[0]=1;
-
-       for (i=1;i<INDICATORS_PER_CACHELINE;i++)
-               indicator_used[i]=0;
-       indicators = kzalloc(sizeof(__u32)*(INDICATORS_PER_CACHELINE),
-                            GFP_KERNEL);
-       if (!indicators)
-               return -ENOMEM;
-       return 0;
-}
-
-static void
-qdio_release_qdio_memory(void)
-{
-       kfree(indicators);
-}
-
-static void
-qdio_unregister_dbf_views(void)
-{
-       if (qdio_dbf_setup)
-               debug_unregister(qdio_dbf_setup);
-       if (qdio_dbf_sbal)
-               debug_unregister(qdio_dbf_sbal);
-       if (qdio_dbf_sense)
-               debug_unregister(qdio_dbf_sense);
-       if (qdio_dbf_trace)
-               debug_unregister(qdio_dbf_trace);
-#ifdef CONFIG_QDIO_DEBUG
-        if (qdio_dbf_slsb_out)
-                debug_unregister(qdio_dbf_slsb_out);
-        if (qdio_dbf_slsb_in)
-                debug_unregister(qdio_dbf_slsb_in);
-#endif /* CONFIG_QDIO_DEBUG */
-}
-
-static int
-qdio_register_dbf_views(void)
-{
-       qdio_dbf_setup=debug_register(QDIO_DBF_SETUP_NAME,
-                                     QDIO_DBF_SETUP_PAGES,
-                                     QDIO_DBF_SETUP_NR_AREAS,
-                                     QDIO_DBF_SETUP_LEN);
-       if (!qdio_dbf_setup)
-               goto oom;
-       debug_register_view(qdio_dbf_setup,&debug_hex_ascii_view);
-       debug_set_level(qdio_dbf_setup,QDIO_DBF_SETUP_LEVEL);
-
-       qdio_dbf_sbal=debug_register(QDIO_DBF_SBAL_NAME,
-                                    QDIO_DBF_SBAL_PAGES,
-                                    QDIO_DBF_SBAL_NR_AREAS,
-                                    QDIO_DBF_SBAL_LEN);
-       if (!qdio_dbf_sbal)
-               goto oom;
-
-       debug_register_view(qdio_dbf_sbal,&debug_hex_ascii_view);
-       debug_set_level(qdio_dbf_sbal,QDIO_DBF_SBAL_LEVEL);
-
-       qdio_dbf_sense=debug_register(QDIO_DBF_SENSE_NAME,
-                                     QDIO_DBF_SENSE_PAGES,
-                                     QDIO_DBF_SENSE_NR_AREAS,
-                                     QDIO_DBF_SENSE_LEN);
-       if (!qdio_dbf_sense)
-               goto oom;
-
-       debug_register_view(qdio_dbf_sense,&debug_hex_ascii_view);
-       debug_set_level(qdio_dbf_sense,QDIO_DBF_SENSE_LEVEL);
-
-       qdio_dbf_trace=debug_register(QDIO_DBF_TRACE_NAME,
-                                     QDIO_DBF_TRACE_PAGES,
-                                     QDIO_DBF_TRACE_NR_AREAS,
-                                     QDIO_DBF_TRACE_LEN);
-       if (!qdio_dbf_trace)
-               goto oom;
-
-       debug_register_view(qdio_dbf_trace,&debug_hex_ascii_view);
-       debug_set_level(qdio_dbf_trace,QDIO_DBF_TRACE_LEVEL);
-
-#ifdef CONFIG_QDIO_DEBUG
-        qdio_dbf_slsb_out=debug_register(QDIO_DBF_SLSB_OUT_NAME,
-                                         QDIO_DBF_SLSB_OUT_PAGES,
-                                         QDIO_DBF_SLSB_OUT_NR_AREAS,
-                                         QDIO_DBF_SLSB_OUT_LEN);
-        if (!qdio_dbf_slsb_out)
-               goto oom;
-        debug_register_view(qdio_dbf_slsb_out,&debug_hex_ascii_view);
-        debug_set_level(qdio_dbf_slsb_out,QDIO_DBF_SLSB_OUT_LEVEL);
-
-        qdio_dbf_slsb_in=debug_register(QDIO_DBF_SLSB_IN_NAME,
-                                        QDIO_DBF_SLSB_IN_PAGES,
-                                        QDIO_DBF_SLSB_IN_NR_AREAS,
-                                        QDIO_DBF_SLSB_IN_LEN);
-        if (!qdio_dbf_slsb_in)
-               goto oom;
-        debug_register_view(qdio_dbf_slsb_in,&debug_hex_ascii_view);
-        debug_set_level(qdio_dbf_slsb_in,QDIO_DBF_SLSB_IN_LEVEL);
-#endif /* CONFIG_QDIO_DEBUG */
-       return 0;
-oom:
-       QDIO_PRINT_ERR("not enough memory for dbf.\n");
-       qdio_unregister_dbf_views();
-       return -ENOMEM;
-}
-
-static void *qdio_mempool_alloc(gfp_t gfp_mask, void *size)
-{
-       return (void *) get_zeroed_page(gfp_mask|GFP_DMA);
-}
-
-static void qdio_mempool_free(void *element, void *size)
-{
-       free_page((unsigned long) element);
-}
-
-static int __init
-init_QDIO(void)
-{
-       int res;
-       void *ptr;
-
-       printk("qdio: loading %s\n",version);
-
-       res=qdio_get_qdio_memory();
-       if (res)
-               return res;
-
-       qdio_q_cache = kmem_cache_create("qdio_q", sizeof(struct qdio_q),
-                                        256, 0, NULL);
-       if (!qdio_q_cache) {
-               qdio_release_qdio_memory();
-               return -ENOMEM;
-       }
-
-       res = qdio_register_dbf_views();
-       if (res) {
-               kmem_cache_destroy(qdio_q_cache);
-               qdio_release_qdio_memory();
-               return res;
-       }
-
-       QDIO_DBF_TEXT0(0,setup,"initQDIO");
-       res = bus_create_file(&ccw_bus_type, &bus_attr_qdio_performance_stats);
-
-       memset((void*)&perf_stats,0,sizeof(perf_stats));
-       QDIO_DBF_TEXT0(0,setup,"perfstat");
-       ptr=&perf_stats;
-       QDIO_DBF_HEX0(0,setup,&ptr,sizeof(void*));
-
-       qdio_add_procfs_entry();
-
-       qdio_mempool_scssc = mempool_create(QDIO_MEMPOOL_SCSSC_ELEMENTS,
-                                           qdio_mempool_alloc,
-                                           qdio_mempool_free, NULL);
-
-       isc_register(QDIO_AIRQ_ISC);
-       if (tiqdio_check_chsc_availability())
-               QDIO_PRINT_ERR("Not all CHSCs supported. Continuing.\n");
-
-       tiqdio_register_thinints();
-
-       return 0;
- }
-
-static void __exit
-cleanup_QDIO(void)
-{
-       tiqdio_unregister_thinints();
-       isc_unregister(QDIO_AIRQ_ISC);
-       qdio_remove_procfs_entry();
-       qdio_release_qdio_memory();
-       qdio_unregister_dbf_views();
-       mempool_destroy(qdio_mempool_scssc);
-       kmem_cache_destroy(qdio_q_cache);
-       bus_remove_file(&ccw_bus_type, &bus_attr_qdio_performance_stats);
-       printk("qdio: %s: module removed\n",version);
-}
-
-module_init(init_QDIO);
-module_exit(cleanup_QDIO);
-
-EXPORT_SYMBOL(qdio_allocate);
-EXPORT_SYMBOL(qdio_establish);
-EXPORT_SYMBOL(qdio_initialize);
-EXPORT_SYMBOL(qdio_activate);
-EXPORT_SYMBOL(do_QDIO);
-EXPORT_SYMBOL(qdio_shutdown);
-EXPORT_SYMBOL(qdio_free);
-EXPORT_SYMBOL(qdio_cleanup);
-EXPORT_SYMBOL(qdio_synchronize);
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h

index 7656081a24d284b33bc893571412e466e21b9585..c1a70985abfaf7d877fa60d4d6f72a78640cf5e9 100644 (file)
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -1,66 +1,20 @@
+/*
+ * linux/drivers/s390/cio/qdio.h
+ *
+ * Copyright 2000,2008 IBM Corp.
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ *           Jan Glauber <jang@linux.vnet.ibm.com>
+ */
  #ifndef _CIO_QDIO_H
  #define _CIO_QDIO_H
  
  #include <asm/page.h>
-#include <asm/isc.h>
  #include <asm/schid.h>
+#include "chsc.h"
  
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_VERBOSE_LEVEL 9
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_VERBOSE_LEVEL 5
-#endif /* CONFIG_QDIO_DEBUG */
-#define QDIO_USE_PROCESSING_STATE
-
-#define QDIO_MINIMAL_BH_RELIEF_TIME 16
-#define QDIO_TIMER_POLL_VALUE 1
-#define IQDIO_TIMER_POLL_VALUE 1
-
-/*
- * unfortunately this can't be (QDIO_MAX_BUFFERS_PER_Q*4/3) or so -- as
- * we never know, whether we'll get initiative again, e.g. to give the
- * transmit skb's back to the stack, however the stack may be waiting for
- * them... therefore we define 4 as threshold to start polling (which
- * will stop as soon as the asynchronous queue catches up)
- * btw, this only applies to the asynchronous HiperSockets queue
- */
-#define IQDIO_FILL_LEVEL_TO_POLL 4
-
-#define TIQDIO_THININT_ISC QDIO_AIRQ_ISC
-#define TIQDIO_DELAY_TARGET 0
-#define QDIO_BUSY_BIT_PATIENCE 100 /* in microsecs */
-#define QDIO_BUSY_BIT_GIVE_UP 10000000 /* 10 seconds */
-#define IQDIO_GLOBAL_LAPS 2 /* GLOBAL_LAPS are not used as we */
-#define IQDIO_GLOBAL_LAPS_INT 1 /* don't global summary */
-#define IQDIO_LOCAL_LAPS 4
-#define IQDIO_LOCAL_LAPS_INT 1
-#define IQDIO_GLOBAL_SUMMARY_CC_MASK 2
-/*#define IQDIO_IQDC_INT_PARM 0x1234*/
-
-#define QDIO_Q_LAPS 5
-
-#define QDIO_STORAGE_KEY PAGE_DEFAULT_KEY
-
-#define L2_CACHELINE_SIZE 256
-#define INDICATORS_PER_CACHELINE (L2_CACHELINE_SIZE/sizeof(__u32))
-
-#define QDIO_PERF "qdio_perf"
-
-/* must be a power of 2 */
-/*#define QDIO_STATS_NUMBER 4
-
-#define QDIO_STATS_CLASSES 2
-#define QDIO_STATS_COUNT_NEEDED 2*/
-
-#define QDIO_NO_USE_COUNT_TIMEOUT (1*HZ) /* wait for 1 sec on each q before
-                                           exiting without having use_count
-                                           of the queue to 0 */
-
-#define QDIO_ESTABLISH_TIMEOUT (1*HZ)
-#define QDIO_CLEANUP_CLEAR_TIMEOUT (20*HZ)
-#define QDIO_CLEANUP_HALT_TIMEOUT (10*HZ)
-#define QDIO_FORCE_CHECK_TIMEOUT (10*HZ)
-#define QDIO_ACTIVATE_TIMEOUT (5) /* 5 ms */
+#define QDIO_BUSY_BIT_PATIENCE         100     /* 100 microseconds */
+#define QDIO_BUSY_BIT_GIVE_UP          2000000 /* 2 seconds = eternity */
+#define QDIO_INPUT_THRESHOLD           500     /* 500 microseconds */
  
  enum qdio_irq_states {
         QDIO_IRQ_STATE_INACTIVE,
@@ -72,565 +26,352 @@ enum qdio_irq_states {
         NR_QDIO_IRQ_STATES,
  };
  
-/* used as intparm in do_IO: */
-#define QDIO_DOING_SENSEID 0
-#define QDIO_DOING_ESTABLISH 1
-#define QDIO_DOING_ACTIVATE 2
-#define QDIO_DOING_CLEANUP 3
-
-/************************* DEBUG FACILITY STUFF *********************/
-
-#define QDIO_DBF_HEX(ex,name,level,addr,len) \
-       do { \
-       if (ex) \
-               debug_exception(qdio_dbf_##name,level,(void*)(addr),len); \
-       else \
-               debug_event(qdio_dbf_##name,level,(void*)(addr),len); \
-       } while (0)
-#define QDIO_DBF_TEXT(ex,name,level,text) \
-       do { \
-       if (ex) \
-               debug_text_exception(qdio_dbf_##name,level,text); \
-       else \
-               debug_text_event(qdio_dbf_##name,level,text); \
-       } while (0)
-
-
-#define QDIO_DBF_HEX0(ex,name,addr,len) QDIO_DBF_HEX(ex,name,0,addr,len)
-#define QDIO_DBF_HEX1(ex,name,addr,len) QDIO_DBF_HEX(ex,name,1,addr,len)
-#define QDIO_DBF_HEX2(ex,name,addr,len) QDIO_DBF_HEX(ex,name,2,addr,len)
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_DBF_HEX3(ex,name,addr,len) QDIO_DBF_HEX(ex,name,3,addr,len)
-#define QDIO_DBF_HEX4(ex,name,addr,len) QDIO_DBF_HEX(ex,name,4,addr,len)
-#define QDIO_DBF_HEX5(ex,name,addr,len) QDIO_DBF_HEX(ex,name,5,addr,len)
-#define QDIO_DBF_HEX6(ex,name,addr,len) QDIO_DBF_HEX(ex,name,6,addr,len)
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_DBF_HEX3(ex,name,addr,len) do {} while (0)
-#define QDIO_DBF_HEX4(ex,name,addr,len) do {} while (0)
-#define QDIO_DBF_HEX5(ex,name,addr,len) do {} while (0)
-#define QDIO_DBF_HEX6(ex,name,addr,len) do {} while (0)
-#endif /* CONFIG_QDIO_DEBUG */
-
-#define QDIO_DBF_TEXT0(ex,name,text) QDIO_DBF_TEXT(ex,name,0,text)
-#define QDIO_DBF_TEXT1(ex,name,text) QDIO_DBF_TEXT(ex,name,1,text)
-#define QDIO_DBF_TEXT2(ex,name,text) QDIO_DBF_TEXT(ex,name,2,text)
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_DBF_TEXT3(ex,name,text) QDIO_DBF_TEXT(ex,name,3,text)
-#define QDIO_DBF_TEXT4(ex,name,text) QDIO_DBF_TEXT(ex,name,4,text)
-#define QDIO_DBF_TEXT5(ex,name,text) QDIO_DBF_TEXT(ex,name,5,text)
-#define QDIO_DBF_TEXT6(ex,name,text) QDIO_DBF_TEXT(ex,name,6,text)
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_DBF_TEXT3(ex,name,text) do {} while (0)
-#define QDIO_DBF_TEXT4(ex,name,text) do {} while (0)
-#define QDIO_DBF_TEXT5(ex,name,text) do {} while (0)
-#define QDIO_DBF_TEXT6(ex,name,text) do {} while (0)
-#endif /* CONFIG_QDIO_DEBUG */
-
-#define QDIO_DBF_SETUP_NAME "qdio_setup"
-#define QDIO_DBF_SETUP_LEN 8
-#define QDIO_DBF_SETUP_PAGES 4
-#define QDIO_DBF_SETUP_NR_AREAS 1
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_DBF_SETUP_LEVEL 6
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_DBF_SETUP_LEVEL 2
-#endif /* CONFIG_QDIO_DEBUG */
-
-#define QDIO_DBF_SBAL_NAME "qdio_labs" /* sbal */
-#define QDIO_DBF_SBAL_LEN 256
-#define QDIO_DBF_SBAL_PAGES 4
-#define QDIO_DBF_SBAL_NR_AREAS 2
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_DBF_SBAL_LEVEL 6
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_DBF_SBAL_LEVEL 2
-#endif /* CONFIG_QDIO_DEBUG */
-
-#define QDIO_DBF_TRACE_NAME "qdio_trace"
-#define QDIO_DBF_TRACE_LEN 8
-#define QDIO_DBF_TRACE_NR_AREAS 2
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_DBF_TRACE_PAGES 16
-#define QDIO_DBF_TRACE_LEVEL 4 /* -------- could be even more verbose here */
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_DBF_TRACE_PAGES 4
-#define QDIO_DBF_TRACE_LEVEL 2
-#endif /* CONFIG_QDIO_DEBUG */
-
-#define QDIO_DBF_SENSE_NAME "qdio_sense"
-#define QDIO_DBF_SENSE_LEN 64
-#define QDIO_DBF_SENSE_PAGES 2
-#define QDIO_DBF_SENSE_NR_AREAS 1
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_DBF_SENSE_LEVEL 6
-#else /* CONFIG_QDIO_DEBUG */
-#define QDIO_DBF_SENSE_LEVEL 2
-#endif /* CONFIG_QDIO_DEBUG */
-
-#ifdef CONFIG_QDIO_DEBUG
-#define QDIO_TRACE_QTYPE QDIO_ZFCP_QFMT
-
-#define QDIO_DBF_SLSB_OUT_NAME "qdio_slsb_out"
-#define QDIO_DBF_SLSB_OUT_LEN QDIO_MAX_BUFFERS_PER_Q
-#define QDIO_DBF_SLSB_OUT_PAGES 256
-#define QDIO_DBF_SLSB_OUT_NR_AREAS 1
-#define QDIO_DBF_SLSB_OUT_LEVEL 6
-
-#define QDIO_DBF_SLSB_IN_NAME "qdio_slsb_in"
-#define QDIO_DBF_SLSB_IN_LEN QDIO_MAX_BUFFERS_PER_Q
-#define QDIO_DBF_SLSB_IN_PAGES 256
-#define QDIO_DBF_SLSB_IN_NR_AREAS 1
-#define QDIO_DBF_SLSB_IN_LEVEL 6
-#endif /* CONFIG_QDIO_DEBUG */
-
-#define QDIO_PRINTK_HEADER QDIO_NAME ": "
-
-#if QDIO_VERBOSE_LEVEL>8
-#define QDIO_PRINT_STUPID(x...) printk( KERN_DEBUG QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_STUPID(x...) do { } while (0)
-#endif
+/* used as intparm in do_IO */
+#define QDIO_DOING_ESTABLISH   1
+#define QDIO_DOING_ACTIVATE    2
+#define QDIO_DOING_CLEANUP     3
+
+#define SLSB_STATE_NOT_INIT    0x0
+#define SLSB_STATE_EMPTY       0x1
+#define SLSB_STATE_PRIMED      0x2
+#define SLSB_STATE_HALTED      0xe
+#define SLSB_STATE_ERROR       0xf
+#define SLSB_TYPE_INPUT                0x0
+#define SLSB_TYPE_OUTPUT       0x20
+#define SLSB_OWNER_PROG                0x80
+#define SLSB_OWNER_CU          0x40
+
+#define SLSB_P_INPUT_NOT_INIT  \
+       (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_NOT_INIT)  /* 0x80 */
+#define SLSB_P_INPUT_ACK       \
+       (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_EMPTY)     /* 0x81 */
+#define SLSB_CU_INPUT_EMPTY    \
+       (SLSB_OWNER_CU | SLSB_TYPE_INPUT | SLSB_STATE_EMPTY)       /* 0x41 */
+#define SLSB_P_INPUT_PRIMED    \
+       (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_PRIMED)    /* 0x82 */
+#define SLSB_P_INPUT_HALTED    \
+       (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_HALTED)    /* 0x8e */
+#define SLSB_P_INPUT_ERROR     \
+       (SLSB_OWNER_PROG | SLSB_TYPE_INPUT | SLSB_STATE_ERROR)     /* 0x8f */
+#define SLSB_P_OUTPUT_NOT_INIT \
+       (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_NOT_INIT) /* 0xa0 */
+#define SLSB_P_OUTPUT_EMPTY    \
+       (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_EMPTY)    /* 0xa1 */
+#define SLSB_CU_OUTPUT_PRIMED  \
+       (SLSB_OWNER_CU | SLSB_TYPE_OUTPUT | SLSB_STATE_PRIMED)     /* 0x62 */
+#define SLSB_P_OUTPUT_HALTED   \
+       (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_HALTED)   /* 0xae */
+#define SLSB_P_OUTPUT_ERROR    \
+       (SLSB_OWNER_PROG | SLSB_TYPE_OUTPUT | SLSB_STATE_ERROR)    /* 0xaf */
+
+#define SLSB_ERROR_DURING_LOOKUP  0xff
+
+/* additional CIWs returned by extended Sense-ID */
+#define CIW_TYPE_EQUEUE                        0x3 /* establish QDIO queues */
+#define CIW_TYPE_AQUEUE                        0x4 /* activate QDIO queues */
  
-#if QDIO_VERBOSE_LEVEL>7
-#define QDIO_PRINT_ALL(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_ALL(x...) do { } while (0)
-#endif
-
-#if QDIO_VERBOSE_LEVEL>6
-#define QDIO_PRINT_INFO(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_INFO(x...) do { } while (0)
-#endif
-
-#if QDIO_VERBOSE_LEVEL>5
-#define QDIO_PRINT_WARN(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_WARN(x...) do { } while (0)
-#endif
-
-#if QDIO_VERBOSE_LEVEL>4
-#define QDIO_PRINT_ERR(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_ERR(x...) do { } while (0)
-#endif
-
-#if QDIO_VERBOSE_LEVEL>3
-#define QDIO_PRINT_CRIT(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_CRIT(x...) do { } while (0)
-#endif
-
-#if QDIO_VERBOSE_LEVEL>2
-#define QDIO_PRINT_ALERT(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_ALERT(x...) do { } while (0)
-#endif
+/* flags for st qdio sch data */
+#define CHSC_FLAG_QDIO_CAPABILITY      0x80
+#define CHSC_FLAG_VALIDITY             0x40
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED          0x40    /* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED         0x20    /* process output queues */
+#define AC1_SIGA_SYNC_NEEDED           0x10    /* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT  0x08    /* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI  0x04    /* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE         0x02    /* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED           0x01    /* enabled for subchannel */
  
-#if QDIO_VERBOSE_LEVEL>1
-#define QDIO_PRINT_EMERG(x...) printk( QDIO_PRINTK_HEADER x)
-#else
-#define QDIO_PRINT_EMERG(x...) do { } while (0)
-#endif
-
-#define QDIO_HEXDUMP16(importance,header,ptr) \
-QDIO_PRINT_##importance(header "%02x %02x %02x %02x  " \
-                       "%02x %02x %02x %02x  %02x %02x %02x %02x  " \
-                       "%02x %02x %02x %02x\n",*(((char*)ptr)), \
-                       *(((char*)ptr)+1),*(((char*)ptr)+2), \
-                       *(((char*)ptr)+3),*(((char*)ptr)+4), \
-                       *(((char*)ptr)+5),*(((char*)ptr)+6), \
-                       *(((char*)ptr)+7),*(((char*)ptr)+8), \
-                       *(((char*)ptr)+9),*(((char*)ptr)+10), \
-                       *(((char*)ptr)+11),*(((char*)ptr)+12), \
-                       *(((char*)ptr)+13),*(((char*)ptr)+14), \
-                       *(((char*)ptr)+15)); \
-QDIO_PRINT_##importance(header "%02x %02x %02x %02x  %02x %02x %02x %02x  " \
-                       "%02x %02x %02x %02x  %02x %02x %02x %02x\n", \
-                       *(((char*)ptr)+16),*(((char*)ptr)+17), \
-                       *(((char*)ptr)+18),*(((char*)ptr)+19), \
-                       *(((char*)ptr)+20),*(((char*)ptr)+21), \
-                       *(((char*)ptr)+22),*(((char*)ptr)+23), \
-                       *(((char*)ptr)+24),*(((char*)ptr)+25), \
-                       *(((char*)ptr)+26),*(((char*)ptr)+27), \
-                       *(((char*)ptr)+28),*(((char*)ptr)+29), \
-                       *(((char*)ptr)+30),*(((char*)ptr)+31));
-
-/****************** END OF DEBUG FACILITY STUFF *********************/
+#ifdef CONFIG_64BIT
+static inline int do_sqbs(u64 token, unsigned char state, int queue,
+                         int *start, int *count)
+{
+       register unsigned long _ccq asm ("0") = *count;
+       register unsigned long _token asm ("1") = token;
+       unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
  
-/*
- * Some instructions as assembly
- */
+       asm volatile(
+               "       .insn   rsy,0xeb000000008A,%1,0,0(%2)"
+               : "+d" (_ccq), "+d" (_queuestart)
+               : "d" ((unsigned long)state), "d" (_token)
+               : "memory", "cc");
+       *count = _ccq & 0xff;
+       *start = _queuestart & 0xff;
  
-static inline int
-do_sqbs(unsigned long sch, unsigned char state, int queue,
-       unsigned int *start, unsigned int *count)
-{
-#ifdef CONFIG_64BIT
-       register unsigned long _ccq asm ("0") = *count;
-       register unsigned long _sch asm ("1") = sch;
-       unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
-
-       asm volatile(
-              "        .insn   rsy,0xeb000000008A,%1,0,0(%2)"
-              : "+d" (_ccq), "+d" (_queuestart)
-              : "d" ((unsigned long)state), "d" (_sch)
-              : "memory", "cc");
-       *count = _ccq & 0xff;
-       *start = _queuestart & 0xff;
-
-       return (_ccq >> 32) & 0xff;
-#else
-       return 0;
-#endif
+       return (_ccq >> 32) & 0xff;
  }
  
-static inline int
-do_eqbs(unsigned long sch, unsigned char *state, int queue,
-       unsigned int *start, unsigned int *count)
+static inline int do_eqbs(u64 token, unsigned char *state, int queue,
+                         int *start, int *count)
  {
-#ifdef CONFIG_64BIT
         register unsigned long _ccq asm ("0") = *count;
-       register unsigned long _sch asm ("1") = sch;
+       register unsigned long _token asm ("1") = token;
         unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
         unsigned long _state = 0;
  
         asm volatile(
                 "       .insn   rrf,0xB99c0000,%1,%2,0,0"
                 : "+d" (_ccq), "+d" (_queuestart), "+d" (_state)
-               : "d" (_sch)
-               : "memory", "cc" );
+               : "d" (_token)
+               : "memory", "cc");
         *count = _ccq & 0xff;
         *start = _queuestart & 0xff;
         *state = _state & 0xff;
  
         return (_ccq >> 32) & 0xff;
-#else
-       return 0;
-#endif
-}
-
-
-static inline int
-do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2)
-{
-       register unsigned long reg0 asm ("0") = 2;
-       register struct subchannel_id reg1 asm ("1") = schid;
-       register unsigned long reg2 asm ("2") = mask1;
-       register unsigned long reg3 asm ("3") = mask2;
-       int cc;
-
-       asm volatile(
-               "       siga    0\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (cc)
-               : "d" (reg0), "d" (reg1), "d" (reg2), "d" (reg3) : "cc");
-       return cc;
-}
-
-static inline int
-do_siga_input(struct subchannel_id schid, unsigned int mask)
-{
-       register unsigned long reg0 asm ("0") = 1;
-       register struct subchannel_id reg1 asm ("1") = schid;
-       register unsigned long reg2 asm ("2") = mask;
-       int cc;
-
-       asm volatile(
-               "       siga    0\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (cc)
-               : "d" (reg0), "d" (reg1), "d" (reg2) : "cc", "memory");
-       return cc;
-}
-
-static inline int
-do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb,
-              unsigned int fc)
-{
-       register unsigned long __fc asm("0") = fc;
-       register unsigned long __schid asm("1") = schid;
-       register unsigned long __mask asm("2") = mask;
-       int cc;
-
-       asm volatile(
-               "       siga    0\n"
-               "0:     ipm     %0\n"
-               "       srl     %0,28\n"
-               "1:\n"
-               EX_TABLE(0b,1b)
-               : "=d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask)
-               : "0" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION)
-               : "cc", "memory");
-       (*bb) = ((unsigned int) __fc) >> 31;
-       return cc;
-}
-
-static inline unsigned long
-do_clear_global_summary(void)
-{
-       register unsigned long __fn asm("1") = 3;
-       register unsigned long __tmp asm("2");
-       register unsigned long __time asm("3");
-
-       asm volatile(
-               "       .insn   rre,0xb2650000,2,0"
-               : "+d" (__fn), "=d" (__tmp), "=d" (__time));
-       return __time;
  }
-       
-/*
- * QDIO device commands returned by extended Sense-ID
- */
-#define DEFAULT_ESTABLISH_QS_CMD 0x1b
-#define DEFAULT_ESTABLISH_QS_COUNT 0x1000
-#define DEFAULT_ACTIVATE_QS_CMD 0x1f
-#define DEFAULT_ACTIVATE_QS_COUNT 0
-
-/*
- * additional CIWs returned by extended Sense-ID
- */
-#define CIW_TYPE_EQUEUE 0x3       /* establish QDIO queues */
-#define CIW_TYPE_AQUEUE 0x4       /* activate QDIO queues */
+#else
+static inline int do_sqbs(u64 token, unsigned char state, int queue,
+                         int *start, int *count) { return 0; }
+static inline int do_eqbs(u64 token, unsigned char *state, int queue,
+                         int *start, int *count) { return 0; }
+#endif /* CONFIG_64BIT */
  
-#define QDIO_CHSC_RESPONSE_CODE_OK 1
-/* flags for st qdio sch data */
-#define CHSC_FLAG_QDIO_CAPABILITY 0x80
-#define CHSC_FLAG_VALIDITY 0x40
+struct qdio_irq;
  
-#define CHSC_FLAG_SIGA_INPUT_NECESSARY 0x40
-#define CHSC_FLAG_SIGA_OUTPUT_NECESSARY 0x20
-#define CHSC_FLAG_SIGA_SYNC_NECESSARY 0x10
-#define CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS 0x08
-#define CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS 0x04
+struct siga_flag {
+       u8 input:1;
+       u8 output:1;
+       u8 sync:1;
+       u8 no_sync_ti:1;
+       u8 no_sync_out_ti:1;
+       u8 no_sync_out_pci:1;
+       u8:2;
+} __attribute__ ((packed));
  
-struct qdio_chsc_ssqd {
+struct chsc_ssqd_area {
         struct chsc_header request;
-       u16 reserved1:10;
-       u16 ssid:2;
-       u16 fmt:4;
+       u16:10;
+       u8 ssid:2;
+       u8 fmt:4;
         u16 first_sch;
-       u16 reserved2;
+       u16:16;
         u16 last_sch;
-       u32 reserved3;
+       u32:32;
         struct chsc_header response;
-       u32 reserved4;
-       u8  flags;
-       u8  reserved5;
-       u16 sch;
-       u8  qfmt;
-       u8  parm;
-       u8  qdioac1;
-       u8  sch_class;
-       u8  pct;
-       u8  icnt;
-       u8  reserved7;
-       u8  ocnt;
-       u8  reserved8;
-       u8  mbccnt;
-       u16 qdioac2;
-       u64 sch_token;
-};
+       u32:32;
+       struct qdio_ssqd_desc qdio_ssqd;
+} __attribute__ ((packed));
  
-struct qdio_perf_stats {
-#ifdef CONFIG_64BIT
-       atomic64_t tl_runs;
-       atomic64_t outbound_tl_runs;
-       atomic64_t outbound_tl_runs_resched;
-       atomic64_t inbound_tl_runs;
-       atomic64_t inbound_tl_runs_resched;
-       atomic64_t inbound_thin_tl_runs;
-       atomic64_t inbound_thin_tl_runs_resched;
-
-       atomic64_t siga_outs;
-       atomic64_t siga_ins;
-       atomic64_t siga_syncs;
-       atomic64_t pcis;
-       atomic64_t thinints;
-       atomic64_t fast_reqs;
-
-       atomic64_t outbound_cnt;
-       atomic64_t inbound_cnt;
-#else /* CONFIG_64BIT */
-       atomic_t tl_runs;
-       atomic_t outbound_tl_runs;
-       atomic_t outbound_tl_runs_resched;
-       atomic_t inbound_tl_runs;
-       atomic_t inbound_tl_runs_resched;
-       atomic_t inbound_thin_tl_runs;
-       atomic_t inbound_thin_tl_runs_resched;
-
-       atomic_t siga_outs;
-       atomic_t siga_ins;
-       atomic_t siga_syncs;
-       atomic_t pcis;
-       atomic_t thinints;
-       atomic_t fast_reqs;
-
-       atomic_t outbound_cnt;
-       atomic_t inbound_cnt;
-#endif /* CONFIG_64BIT */
+struct scssc_area {
+       struct chsc_header request;
+       u16 operation_code;
+       u16:16;
+       u32:32;
+       u32:32;
+       u64 summary_indicator_addr;
+       u64 subchannel_indicator_addr;
+       u32 ks:4;
+       u32 kc:4;
+       u32:21;
+       u32 isc:3;
+       u32 word_with_d_bit;
+       u32:32;
+       struct subchannel_id schid;
+       u32 reserved[1004];
+       struct chsc_header response;
+       u32:32;
+} __attribute__ ((packed));
+
+struct qdio_input_q {
+       /* input buffer acknowledgement flag */
+       int polling;
+
+       /* last time of noticing incoming data */
+       u64 timestamp;
+
+       /* lock for clearing the acknowledgement */
+       spinlock_t lock;
  };
  
-/* unlikely as the later the better */
-#define SYNC_MEMORY if (unlikely(q->siga_sync)) qdio_siga_sync_q(q)
-#define SYNC_MEMORY_ALL if (unlikely(q->siga_sync)) \
-       qdio_siga_sync(q,~0U,~0U)
-#define SYNC_MEMORY_ALL_OUTB if (unlikely(q->siga_sync)) \
-       qdio_siga_sync(q,~0U,0)
+struct qdio_output_q {
+       /* failed siga-w attempts*/
+       atomic_t busy_siga_counter;
  
-#define NOW qdio_get_micros()
-#define SAVE_TIMESTAMP(q) q->timing.last_transfer_time=NOW
-#define GET_SAVED_TIMESTAMP(q) (q->timing.last_transfer_time)
-#define SAVE_FRONTIER(q,val) q->last_move_ftc=val
-#define GET_SAVED_FRONTIER(q) (q->last_move_ftc)
+       /* start time of busy condition */
+       u64 timestamp;
  
-#define MY_MODULE_STRING(x) #x
+       /* PCIs are enabled for the queue */
+       int pci_out_enabled;
  
-#ifdef CONFIG_64BIT
-#define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x)
-#else /* CONFIG_64BIT */
-#define QDIO_GET_ADDR(x) ((__u32)(long)x)
-#endif /* CONFIG_64BIT */
+       /* timer to check for more outbound work */
+       struct timer_list timer;
+};
  
  struct qdio_q {
-       volatile struct slsb slsb;
+       struct slsb slsb;
+       union {
+               struct qdio_input_q in;
+               struct qdio_output_q out;
+       } u;
  
-       char unused[QDIO_MAX_BUFFERS_PER_Q];
+       /* queue number */
+       int nr;
  
-       __u32 * dev_st_chg_ind;
+       /* bitmask of queue number */
+       int mask;
  
+       /* input or output queue */
         int is_input_q;
-       struct subchannel_id schid;
-       struct ccw_device *cdev;
-
-       unsigned int is_iqdio_q;
-       unsigned int is_thinint_q;
  
-       /* bit 0 means queue 0, bit 1 means queue 1, ... */
-       unsigned int mask;
-       unsigned int q_no;
+       /* list of thinint input queues */
+       struct list_head entry;
  
+       /* upper-layer program handler */
         qdio_handler_t (*handler);
  
-       /* points to the next buffer to be checked for having
-        * been processed by the card (outbound)
-        * or to the next buffer the program should check for (inbound) */
-       volatile int first_to_check;
-       /* and the last time it was: */
-       volatile int last_move_ftc;
+       /*
+        * inbound: next buffer the program should check for
+        * outbound: next buffer to check for having been processed
+        * by the card
+        */
+       int first_to_check;
  
-       atomic_t number_of_buffers_used;
-       atomic_t polling;
+       /* first_to_check of the last time */
+       int last_move_ftc;
  
-       unsigned int siga_in;
-       unsigned int siga_out;
-       unsigned int siga_sync;
-       unsigned int siga_sync_done_on_thinints;
-       unsigned int siga_sync_done_on_outb_tis;
-       unsigned int hydra_gives_outbound_pcis;
+       /* beginning position for calling the program */
+       int first_to_kick;
  
-       /* used to save beginning position when calling dd_handlers */
-       int first_element_to_kick;
+       /* number of buffers in use by the adapter */
+       atomic_t nr_buf_used;
  
-       atomic_t use_count;
-       atomic_t is_in_shutdown;
-
-       void *irq_ptr;
-
-       struct timer_list timer;
-#ifdef QDIO_USE_TIMERS_FOR_POLLING
-       atomic_t timer_already_set;
-       spinlock_t timer_lock;
-#else /* QDIO_USE_TIMERS_FOR_POLLING */
+       struct qdio_irq *irq_ptr;
         struct tasklet_struct tasklet;
-#endif /* QDIO_USE_TIMERS_FOR_POLLING */
  
-
-       enum qdio_irq_states state;
-
-       /* used to store the error condition during a data transfer */
+       /* error condition during a data transfer */
         unsigned int qdio_error;
-       unsigned int siga_error;
-       unsigned int error_status_flags;
-
-       /* list of interesting queues */
-       volatile struct qdio_q *list_next;
-       volatile struct qdio_q *list_prev;
  
         struct sl *sl;
-       volatile struct sbal *sbal[QDIO_MAX_BUFFERS_PER_Q];
-
-       struct qdio_buffer *qdio_buffers[QDIO_MAX_BUFFERS_PER_Q];
-
-       unsigned long int_parm;
-
-       /*struct {
-               int in_bh_check_limit;
-               int threshold;
-       } threshold_classes[QDIO_STATS_CLASSES];*/
-
-       struct {
-               /* inbound: the time to stop polling
-                  outbound: the time to kick peer */
-               int threshold; /* the real value */
-
-               /* outbound: last time of do_QDIO
-                  inbound: last time of noticing incoming data */
-               /*__u64 last_transfer_times[QDIO_STATS_NUMBER];
-               int last_transfer_index; */
-
-               __u64 last_transfer_time;
-               __u64 busy_start;
-       } timing;
-       atomic_t busy_siga_counter;
-        unsigned int queue_type;
-       unsigned int is_pci_out;
-
-       /* leave this member at the end. won't be cleared in qdio_fill_qs */
-       struct slib *slib; /* a page is allocated under this pointer,
-                             sl points into this page, offset PAGE_SIZE/2
-                             (after slib) */
+       struct qdio_buffer *sbal[QDIO_MAX_BUFFERS_PER_Q];
+
+       /*
+        * Warning: Leave this member at the end so it won't be cleared in
+        * qdio_fill_qs. A page is allocated under this pointer and used for
+        * slib and sl. slib is 2048 bytes big and sl points to offset
+        * PAGE_SIZE / 2.
+        */
+       struct slib *slib;
  } __attribute__ ((aligned(256)));
  
  struct qdio_irq {
-       __u32 * volatile dev_st_chg_ind;
+       struct qib qib;
+       u32 *dsci;              /* address of device state change indicator */
+       struct ccw_device *cdev;
  
         unsigned long int_parm;
         struct subchannel_id schid;
-
-       unsigned int is_iqdio_irq;
-       unsigned int is_thinint_irq;
-       unsigned int hydra_gives_outbound_pcis;
-       unsigned int sync_done_on_outb_pcis;
-
-       /* QEBSM facility */
-       unsigned int is_qebsm;
-       unsigned long sch_token;
+       unsigned long sch_token;        /* QEBSM facility */
  
         enum qdio_irq_states state;
  
-       unsigned int no_input_qs;
-       unsigned int no_output_qs;
+       struct siga_flag siga_flag;     /* siga sync information from qdioac */
  
-       unsigned char qdioac;
+       int nr_input_qs;
+       int nr_output_qs;
  
         struct ccw1 ccw;
-
         struct ciw equeue;
         struct ciw aqueue;
  
-       struct qib qib;
-       
-       void (*original_int_handler) (struct ccw_device *,
-                                     unsigned long, struct irb *);
+       struct qdio_ssqd_desc ssqd_desc;
+
+       void (*orig_handler) (struct ccw_device *, unsigned long, struct irb *);
  
-       /* leave these four members together at the end. won't be cleared in qdio_fill_irq */
+       /*
+        * Warning: Leave these members together at the end so they won't be
+        * cleared in qdio_setup_irq.
+        */
         struct qdr *qdr;
+       unsigned long chsc_page;
+
         struct qdio_q *input_qs[QDIO_MAX_QUEUES_PER_IRQ];
         struct qdio_q *output_qs[QDIO_MAX_QUEUES_PER_IRQ];
-       struct semaphore setting_up_sema;
+
+       struct mutex setup_mutex;
  };
-#endif
+
+/* helper functions */
+#define queue_type(q)  q->irq_ptr->qib.qfmt
+
+#define is_thinint_irq(irq) \
+       (irq->qib.qfmt == QDIO_IQDIO_QFMT || \
+        css_general_characteristics.aif_osa)
+
+/* the highest iqdio queue is used for multicast */
+static inline int multicast_outbound(struct qdio_q *q)
+{
+       return (q->irq_ptr->nr_output_qs > 1) &&
+              (q->nr == q->irq_ptr->nr_output_qs - 1);
+}
+
+static inline unsigned long long get_usecs(void)
+{
+       return monotonic_clock() >> 12;
+}
+
+#define pci_out_supported(q) \
+       (q->irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)
+#define is_qebsm(q)                    (q->irq_ptr->sch_token != 0)
+
+#define need_siga_sync_thinint(q)      (!q->irq_ptr->siga_flag.no_sync_ti)
+#define need_siga_sync_out_thinint(q)  (!q->irq_ptr->siga_flag.no_sync_out_ti)
+#define need_siga_in(q)                        (q->irq_ptr->siga_flag.input)
+#define need_siga_out(q)               (q->irq_ptr->siga_flag.output)
+#define need_siga_sync(q)              (q->irq_ptr->siga_flag.sync)
+#define siga_syncs_out_pci(q)          (q->irq_ptr->siga_flag.no_sync_out_pci)
+
+#define for_each_input_queue(irq_ptr, q, i)    \
+       for (i = 0, q = irq_ptr->input_qs[0];   \
+               i < irq_ptr->nr_input_qs;       \
+               q = irq_ptr->input_qs[++i])
+#define for_each_output_queue(irq_ptr, q, i)   \
+       for (i = 0, q = irq_ptr->output_qs[0];  \
+               i < irq_ptr->nr_output_qs;      \
+               q = irq_ptr->output_qs[++i])
+
+#define prev_buf(bufnr)        \
+       ((bufnr + QDIO_MAX_BUFFERS_MASK) & QDIO_MAX_BUFFERS_MASK)
+#define next_buf(bufnr)        \
+       ((bufnr + 1) & QDIO_MAX_BUFFERS_MASK)
+#define add_buf(bufnr, inc) \
+       ((bufnr + inc) & QDIO_MAX_BUFFERS_MASK)
+
+/* prototypes for thin interrupt */
+void qdio_sync_after_thinint(struct qdio_q *q);
+int get_buf_state(struct qdio_q *q, unsigned int bufnr, unsigned char *state);
+void qdio_check_outbound_after_thinint(struct qdio_q *q);
+int qdio_inbound_q_moved(struct qdio_q *q);
+void qdio_kick_inbound_handler(struct qdio_q *q);
+void qdio_stop_polling(struct qdio_q *q);
+int qdio_siga_sync_q(struct qdio_q *q);
+
+void qdio_setup_thinint(struct qdio_irq *irq_ptr);
+int qdio_establish_thinint(struct qdio_irq *irq_ptr);
+void qdio_shutdown_thinint(struct qdio_irq *irq_ptr);
+void tiqdio_add_input_queues(struct qdio_irq *irq_ptr);
+void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr);
+void tiqdio_inbound_processing(unsigned long q);
+int tiqdio_allocate_memory(void);
+void tiqdio_free_memory(void);
+int tiqdio_register_thinints(void);
+void tiqdio_unregister_thinints(void);
+
+/* prototypes for setup */
+void qdio_inbound_processing(unsigned long data);
+void qdio_outbound_processing(unsigned long data);
+void qdio_outbound_timer(unsigned long data);
+void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
+                     struct irb *irb);
+int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs,
+                    int nr_output_qs);
+void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr);
+int qdio_setup_irq(struct qdio_initialize *init_data);
+void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
+                               struct ccw_device *cdev);
+void qdio_release_memory(struct qdio_irq *irq_ptr);
+int qdio_setup_init(void);
+void qdio_setup_exit(void);
+
+#endif /* _CIO_QDIO_H */
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c

new file mode 100644 (file)

index 0000000..337aa30
--- /dev/null
+++ b/drivers/s390/cio/qdio_debug.c
@@ -0,0 +1,240 @@
+/*
+ *  drivers/s390/cio/qdio_debug.c
+ *
+ *  Copyright IBM Corp. 2008
+ *
+ *  Author: Jan Glauber (jang@linux.vnet.ibm.com)
+ */
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/qdio.h>
+#include <asm/debug.h>
+#include "qdio_debug.h"
+#include "qdio.h"
+
+debug_info_t *qdio_dbf_setup;
+debug_info_t *qdio_dbf_trace;
+
+static struct dentry *debugfs_root;
+#define MAX_DEBUGFS_QUEUES     32
+static struct dentry *debugfs_queues[MAX_DEBUGFS_QUEUES] = { NULL };
+static DEFINE_MUTEX(debugfs_mutex);
+
+void qdio_allocate_do_dbf(struct qdio_initialize *init_data)
+{
+       char dbf_text[20];
+
+       sprintf(dbf_text, "qfmt:%x", init_data->q_format);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_HEX0(0, setup, init_data->adapter_name, 8);
+       sprintf(dbf_text, "qpff%4x", init_data->qib_param_field_format);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_HEX0(0, setup, &init_data->qib_param_field, sizeof(void *));
+       QDIO_DBF_HEX0(0, setup, &init_data->input_slib_elements, sizeof(void *));
+       QDIO_DBF_HEX0(0, setup, &init_data->output_slib_elements, sizeof(void *));
+       sprintf(dbf_text, "niq:%4x", init_data->no_input_qs);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       sprintf(dbf_text, "noq:%4x", init_data->no_output_qs);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_HEX0(0, setup, &init_data->input_handler, sizeof(void *));
+       QDIO_DBF_HEX0(0, setup, &init_data->output_handler, sizeof(void *));
+       QDIO_DBF_HEX0(0, setup, &init_data->int_parm, sizeof(long));
+       QDIO_DBF_HEX0(0, setup, &init_data->flags, sizeof(long));
+       QDIO_DBF_HEX0(0, setup, &init_data->input_sbal_addr_array, sizeof(void *));
+       QDIO_DBF_HEX0(0, setup, &init_data->output_sbal_addr_array, sizeof(void *));
+}
+
+static void qdio_unregister_dbf_views(void)
+{
+       if (qdio_dbf_setup)
+               debug_unregister(qdio_dbf_setup);
+       if (qdio_dbf_trace)
+               debug_unregister(qdio_dbf_trace);
+}
+
+static int qdio_register_dbf_views(void)
+{
+       qdio_dbf_setup = debug_register("qdio_setup", QDIO_DBF_SETUP_PAGES,
+                                       QDIO_DBF_SETUP_NR_AREAS,
+                                       QDIO_DBF_SETUP_LEN);
+       if (!qdio_dbf_setup)
+               goto oom;
+       debug_register_view(qdio_dbf_setup, &debug_hex_ascii_view);
+       debug_set_level(qdio_dbf_setup, QDIO_DBF_SETUP_LEVEL);
+
+       qdio_dbf_trace = debug_register("qdio_trace", QDIO_DBF_TRACE_PAGES,
+                                       QDIO_DBF_TRACE_NR_AREAS,
+                                       QDIO_DBF_TRACE_LEN);
+       if (!qdio_dbf_trace)
+               goto oom;
+       debug_register_view(qdio_dbf_trace, &debug_hex_ascii_view);
+       debug_set_level(qdio_dbf_trace, QDIO_DBF_TRACE_LEVEL);
+       return 0;
+oom:
+       qdio_unregister_dbf_views();
+       return -ENOMEM;
+}
+
+static int qstat_show(struct seq_file *m, void *v)
+{
+       unsigned char state;
+       struct qdio_q *q = m->private;
+       int i;
+
+       if (!q)
+               return 0;
+
+       seq_printf(m, "device state indicator: %d\n", *q->irq_ptr->dsci);
+       seq_printf(m, "nr_used: %d\n", atomic_read(&q->nr_buf_used));
+       seq_printf(m, "ftc: %d\n", q->first_to_check);
+       seq_printf(m, "last_move_ftc: %d\n", q->last_move_ftc);
+       seq_printf(m, "polling: %d\n", q->u.in.polling);
+       seq_printf(m, "slsb buffer states:\n");
+
+       qdio_siga_sync_q(q);
+       for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) {
+               get_buf_state(q, i, &state);
+               switch (state) {
+               case SLSB_P_INPUT_NOT_INIT:
+               case SLSB_P_OUTPUT_NOT_INIT:
+                       seq_printf(m, "N");
+                       break;
+               case SLSB_P_INPUT_PRIMED:
+               case SLSB_CU_OUTPUT_PRIMED:
+                       seq_printf(m, "+");
+                       break;
+               case SLSB_P_INPUT_ACK:
+                       seq_printf(m, "A");
+                       break;
+               case SLSB_P_INPUT_ERROR:
+               case SLSB_P_OUTPUT_ERROR:
+                       seq_printf(m, "x");
+                       break;
+               case SLSB_CU_INPUT_EMPTY:
+               case SLSB_P_OUTPUT_EMPTY:
+                       seq_printf(m, "-");
+                       break;
+               case SLSB_P_INPUT_HALTED:
+               case SLSB_P_OUTPUT_HALTED:
+                       seq_printf(m, ".");
+                       break;
+               default:
+                       seq_printf(m, "?");
+               }
+               if (i == 63)
+                       seq_printf(m, "\n");
+       }
+       seq_printf(m, "\n");
+       return 0;
+}
+
+static ssize_t qstat_seq_write(struct file *file, const char __user *buf,
+                              size_t count, loff_t *off)
+{
+       struct seq_file *seq = file->private_data;
+       struct qdio_q *q = seq->private;
+
+       if (!q)
+               return 0;
+
+       if (q->is_input_q)
+               xchg(q->irq_ptr->dsci, 1);
+       local_bh_disable();
+       tasklet_schedule(&q->tasklet);
+       local_bh_enable();
+       return count;
+}
+
+static int qstat_seq_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, qstat_show,
+                          filp->f_path.dentry->d_inode->i_private);
+}
+
+static void get_queue_name(struct qdio_q *q, struct ccw_device *cdev, char *name)
+{
+       memset(name, 0, sizeof(name));
+       sprintf(name, "%s", cdev->dev.bus_id);
+       if (q->is_input_q)
+               sprintf(name + strlen(name), "_input");
+       else
+               sprintf(name + strlen(name), "_output");
+       sprintf(name + strlen(name), "_%d", q->nr);
+}
+
+static void remove_debugfs_entry(struct qdio_q *q)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEBUGFS_QUEUES; i++) {
+               if (!debugfs_queues[i])
+                       continue;
+               if (debugfs_queues[i]->d_inode->i_private == q) {
+                       debugfs_remove(debugfs_queues[i]);
+                       debugfs_queues[i] = NULL;
+               }
+       }
+}
+
+static struct file_operations debugfs_fops = {
+       .owner   = THIS_MODULE,
+       .open    = qstat_seq_open,
+       .read    = seq_read,
+       .write   = qstat_seq_write,
+       .llseek  = seq_lseek,
+       .release = single_release,
+};
+
+static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev)
+{
+       int i = 0;
+       char name[40];
+
+       while (debugfs_queues[i] != NULL) {
+               i++;
+               if (i >= MAX_DEBUGFS_QUEUES)
+                       return;
+       }
+       get_queue_name(q, cdev, name);
+       debugfs_queues[i] = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR,
+                                               debugfs_root, q, &debugfs_fops);
+}
+
+void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
+{
+       struct qdio_q *q;
+       int i;
+
+       mutex_lock(&debugfs_mutex);
+       for_each_input_queue(irq_ptr, q, i)
+               setup_debugfs_entry(q, cdev);
+       for_each_output_queue(irq_ptr, q, i)
+               setup_debugfs_entry(q, cdev);
+       mutex_unlock(&debugfs_mutex);
+}
+
+void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
+{
+       struct qdio_q *q;
+       int i;
+
+       mutex_lock(&debugfs_mutex);
+       for_each_input_queue(irq_ptr, q, i)
+               remove_debugfs_entry(q);
+       for_each_output_queue(irq_ptr, q, i)
+               remove_debugfs_entry(q);
+       mutex_unlock(&debugfs_mutex);
+}
+
+int __init qdio_debug_init(void)
+{
+       debugfs_root = debugfs_create_dir("qdio_queues", NULL);
+       return qdio_register_dbf_views();
+}
+
+void qdio_debug_exit(void)
+{
+       debugfs_remove(debugfs_root);
+       qdio_unregister_dbf_views();
+}
diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h

new file mode 100644 (file)

index 0000000..8484b83
--- /dev/null
+++ b/drivers/s390/cio/qdio_debug.h
@@ -0,0 +1,91 @@
+/*
+ *  drivers/s390/cio/qdio_debug.h
+ *
+ *  Copyright IBM Corp. 2008
+ *
+ *  Author: Jan Glauber (jang@linux.vnet.ibm.com)
+ */
+#ifndef QDIO_DEBUG_H
+#define QDIO_DEBUG_H
+
+#include <asm/debug.h>
+#include <asm/qdio.h>
+#include "qdio.h"
+
+#define QDIO_DBF_HEX(ex, name, level, addr, len) \
+       do { \
+       if (ex) \
+               debug_exception(qdio_dbf_##name, level, (void *)(addr), len); \
+       else \
+               debug_event(qdio_dbf_##name, level, (void *)(addr), len); \
+       } while (0)
+#define QDIO_DBF_TEXT(ex, name, level, text) \
+       do { \
+       if (ex) \
+               debug_text_exception(qdio_dbf_##name, level, text); \
+       else \
+               debug_text_event(qdio_dbf_##name, level, text); \
+       } while (0)
+
+#define QDIO_DBF_HEX0(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 0, addr, len)
+#define QDIO_DBF_HEX1(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 1, addr, len)
+#define QDIO_DBF_HEX2(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 2, addr, len)
+
+#ifdef CONFIG_QDIO_DEBUG
+#define QDIO_DBF_HEX3(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 3, addr, len)
+#define QDIO_DBF_HEX4(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 4, addr, len)
+#define QDIO_DBF_HEX5(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 5, addr, len)
+#define QDIO_DBF_HEX6(ex, name, addr, len) QDIO_DBF_HEX(ex, name, 6, addr, len)
+#else
+#define QDIO_DBF_HEX3(ex, name, addr, len) do {} while (0)
+#define QDIO_DBF_HEX4(ex, name, addr, len) do {} while (0)
+#define QDIO_DBF_HEX5(ex, name, addr, len) do {} while (0)
+#define QDIO_DBF_HEX6(ex, name, addr, len) do {} while (0)
+#endif /* CONFIG_QDIO_DEBUG */
+
+#define QDIO_DBF_TEXT0(ex, name, text) QDIO_DBF_TEXT(ex, name, 0, text)
+#define QDIO_DBF_TEXT1(ex, name, text) QDIO_DBF_TEXT(ex, name, 1, text)
+#define QDIO_DBF_TEXT2(ex, name, text) QDIO_DBF_TEXT(ex, name, 2, text)
+
+#ifdef CONFIG_QDIO_DEBUG
+#define QDIO_DBF_TEXT3(ex, name, text) QDIO_DBF_TEXT(ex, name, 3, text)
+#define QDIO_DBF_TEXT4(ex, name, text) QDIO_DBF_TEXT(ex, name, 4, text)
+#define QDIO_DBF_TEXT5(ex, name, text) QDIO_DBF_TEXT(ex, name, 5, text)
+#define QDIO_DBF_TEXT6(ex, name, text) QDIO_DBF_TEXT(ex, name, 6, text)
+#else
+#define QDIO_DBF_TEXT3(ex, name, text) do {} while (0)
+#define QDIO_DBF_TEXT4(ex, name, text) do {} while (0)
+#define QDIO_DBF_TEXT5(ex, name, text) do {} while (0)
+#define QDIO_DBF_TEXT6(ex, name, text) do {} while (0)
+#endif /* CONFIG_QDIO_DEBUG */
+
+/* s390dbf views */
+#define QDIO_DBF_SETUP_LEN             8
+#define QDIO_DBF_SETUP_PAGES           4
+#define QDIO_DBF_SETUP_NR_AREAS                1
+
+#define QDIO_DBF_TRACE_LEN             8
+#define QDIO_DBF_TRACE_NR_AREAS                2
+
+#ifdef CONFIG_QDIO_DEBUG
+#define QDIO_DBF_TRACE_PAGES           16
+#define QDIO_DBF_SETUP_LEVEL           6
+#define QDIO_DBF_TRACE_LEVEL           4
+#else /* !CONFIG_QDIO_DEBUG */
+#define QDIO_DBF_TRACE_PAGES           4
+#define QDIO_DBF_SETUP_LEVEL           2
+#define QDIO_DBF_TRACE_LEVEL           2
+#endif /* CONFIG_QDIO_DEBUG */
+
+extern debug_info_t *qdio_dbf_setup;
+extern debug_info_t *qdio_dbf_trace;
+
+void qdio_allocate_do_dbf(struct qdio_initialize *init_data);
+void debug_print_bstat(struct qdio_q *q);
+void qdio_setup_debug_entries(struct qdio_irq *irq_ptr,
+                             struct ccw_device *cdev);
+void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr,
+                                struct ccw_device *cdev);
+int qdio_debug_init(void);
+void qdio_debug_exit(void);
+#endif
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c

new file mode 100644 (file)

index 0000000..d10c73c
--- /dev/null
+++ b/drivers/s390/cio/qdio_main.c
@@ -0,0 +1,1755 @@
+/*
+ * linux/drivers/s390/cio/qdio_main.c
+ *
+ * Linux for s390 qdio support, buffer handling, qdio API and module support.
+ *
+ * Copyright 2000,2008 IBM Corp.
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ *           Jan Glauber <jang@linux.vnet.ibm.com>
+ * 2.6 cio integration by Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <asm/atomic.h>
+#include <asm/debug.h>
+#include <asm/qdio.h>
+
+#include "cio.h"
+#include "css.h"
+#include "device.h"
+#include "qdio.h"
+#include "qdio_debug.h"
+#include "qdio_perf.h"
+
+MODULE_AUTHOR("Utz Bacher <utz.bacher@de.ibm.com>,"\
+       "Jan Glauber <jang@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("QDIO base support");
+MODULE_LICENSE("GPL");
+
+static inline int do_siga_sync(struct subchannel_id schid,
+                              unsigned int out_mask, unsigned int in_mask)
+{
+       register unsigned long __fc asm ("0") = 2;
+       register struct subchannel_id __schid asm ("1") = schid;
+       register unsigned long out asm ("2") = out_mask;
+       register unsigned long in asm ("3") = in_mask;
+       int cc;
+
+       asm volatile(
+               "       siga    0\n"
+               "       ipm     %0\n"
+               "       srl     %0,28\n"
+               : "=d" (cc)
+               : "d" (__fc), "d" (__schid), "d" (out), "d" (in) : "cc");
+       return cc;
+}
+
+static inline int do_siga_input(struct subchannel_id schid, unsigned int mask)
+{
+       register unsigned long __fc asm ("0") = 1;
+       register struct subchannel_id __schid asm ("1") = schid;
+       register unsigned long __mask asm ("2") = mask;
+       int cc;
+
+       asm volatile(
+               "       siga    0\n"
+               "       ipm     %0\n"
+               "       srl     %0,28\n"
+               : "=d" (cc)
+               : "d" (__fc), "d" (__schid), "d" (__mask) : "cc", "memory");
+       return cc;
+}
+
+/**
+ * do_siga_output - perform SIGA-w/wt function
+ * @schid: subchannel id or in case of QEBSM the subchannel token
+ * @mask: which output queues to process
+ * @bb: busy bit indicator, set only if SIGA-w/wt could not access a buffer
+ * @fc: function code to perform
+ *
+ * Returns cc or QDIO_ERROR_SIGA_ACCESS_EXCEPTION.
+ * Note: For IQDC unicast queues only the highest priority queue is processed.
+ */
+static inline int do_siga_output(unsigned long schid, unsigned long mask,
+                                u32 *bb, unsigned int fc)
+{
+       register unsigned long __fc asm("0") = fc;
+       register unsigned long __schid asm("1") = schid;
+       register unsigned long __mask asm("2") = mask;
+       int cc = QDIO_ERROR_SIGA_ACCESS_EXCEPTION;
+
+       asm volatile(
+               "       siga    0\n"
+               "0:     ipm     %0\n"
+               "       srl     %0,28\n"
+               "1:\n"
+               EX_TABLE(0b, 1b)
+               : "+d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask)
+               : : "cc", "memory");
+       *bb = ((unsigned int) __fc) >> 31;
+       return cc;
+}
+
+static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
+{
+       char dbf_text[15];
+
+       /* all done or next buffer state different */
+       if (ccq == 0 || ccq == 32)
+               return 0;
+       /* not all buffers processed */
+       if (ccq == 96 || ccq == 97)
+               return 1;
+       /* notify devices immediately */
+       sprintf(dbf_text, "%d", ccq);
+       QDIO_DBF_TEXT2(1, trace, dbf_text);
+       return -EIO;
+}
+
+/**
+ * qdio_do_eqbs - extract buffer states for QEBSM
+ * @q: queue to manipulate
+ * @state: state of the extracted buffers
+ * @start: buffer number to start at
+ * @count: count of buffers to examine
+ *
+ * Returns the number of successfull extracted equal buffer states.
+ * Stops processing if a state is different from the last buffers state.
+ */
+static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
+                       int start, int count)
+{
+       unsigned int ccq = 0;
+       int tmp_count = count, tmp_start = start;
+       int nr = q->nr;
+       int rc;
+       char dbf_text[15];
+
+       BUG_ON(!q->irq_ptr->sch_token);
+
+       if (!q->is_input_q)
+               nr += q->irq_ptr->nr_input_qs;
+again:
+       ccq = do_eqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count);
+       rc = qdio_check_ccq(q, ccq);
+
+       /* At least one buffer was processed, return and extract the remaining
+        * buffers later.
+        */
+       if ((ccq == 96) && (count != tmp_count))
+               return (count - tmp_count);
+       if (rc == 1) {
+               QDIO_DBF_TEXT5(1, trace, "eqAGAIN");
+               goto again;
+       }
+
+       if (rc < 0) {
+               QDIO_DBF_TEXT2(1, trace, "eqberr");
+               sprintf(dbf_text, "%2x,%2x,%d,%d", count, tmp_count, ccq, nr);
+               QDIO_DBF_TEXT2(1, trace, dbf_text);
+               q->handler(q->irq_ptr->cdev,
+                          QDIO_ERROR_ACTIVATE_CHECK_CONDITION,
+                          0, -1, -1, q->irq_ptr->int_parm);
+               return 0;
+       }
+       return count - tmp_count;
+}
+
+/**
+ * qdio_do_sqbs - set buffer states for QEBSM
+ * @q: queue to manipulate
+ * @state: new state of the buffers
+ * @start: first buffer number to change
+ * @count: how many buffers to change
+ *
+ * Returns the number of successfully changed buffers.
+ * Does retrying until the specified count of buffer states is set or an
+ * error occurs.
+ */
+static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start,
+                       int count)
+{
+       unsigned int ccq = 0;
+       int tmp_count = count, tmp_start = start;
+       int nr = q->nr;
+       int rc;
+       char dbf_text[15];
+
+       BUG_ON(!q->irq_ptr->sch_token);
+
+       if (!q->is_input_q)
+               nr += q->irq_ptr->nr_input_qs;
+again:
+       ccq = do_sqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count);
+       rc = qdio_check_ccq(q, ccq);
+       if (rc == 1) {
+               QDIO_DBF_TEXT5(1, trace, "sqAGAIN");
+               goto again;
+       }
+       if (rc < 0) {
+               QDIO_DBF_TEXT3(1, trace, "sqberr");
+               sprintf(dbf_text, "%2x,%2x", count, tmp_count);
+               QDIO_DBF_TEXT3(1, trace, dbf_text);
+               sprintf(dbf_text, "%d,%d", ccq, nr);
+               QDIO_DBF_TEXT3(1, trace, dbf_text);
+
+               q->handler(q->irq_ptr->cdev,
+                          QDIO_ERROR_ACTIVATE_CHECK_CONDITION,
+                          0, -1, -1, q->irq_ptr->int_parm);
+               return 0;
+       }
+       WARN_ON(tmp_count);
+       return count - tmp_count;
+}
+
+/* returns number of examined buffers and their common state in *state */
+static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr,
+                                unsigned char *state, unsigned int count)
+{
+       unsigned char __state = 0;
+       int i;
+
+       BUG_ON(bufnr > QDIO_MAX_BUFFERS_MASK);
+       BUG_ON(count > QDIO_MAX_BUFFERS_PER_Q);
+
+       if (is_qebsm(q))
+               return qdio_do_eqbs(q, state, bufnr, count);
+
+       for (i = 0; i < count; i++) {
+               if (!__state)
+                       __state = q->slsb.val[bufnr];
+               else if (q->slsb.val[bufnr] != __state)
+                       break;
+               bufnr = next_buf(bufnr);
+       }
+       *state = __state;
+       return i;
+}
+
+inline int get_buf_state(struct qdio_q *q, unsigned int bufnr,
+                 unsigned char *state)
+{
+       return get_buf_states(q, bufnr, state, 1);
+}
+
+/* wrap-around safe setting of slsb states, returns number of changed buffers */
+static inline int set_buf_states(struct qdio_q *q, int bufnr,
+                                unsigned char state, int count)
+{
+       int i;
+
+       BUG_ON(bufnr > QDIO_MAX_BUFFERS_MASK);
+       BUG_ON(count > QDIO_MAX_BUFFERS_PER_Q);
+
+       if (is_qebsm(q))
+               return qdio_do_sqbs(q, state, bufnr, count);
+
+       for (i = 0; i < count; i++) {
+               xchg(&q->slsb.val[bufnr], state);
+               bufnr = next_buf(bufnr);
+       }
+       return count;
+}
+
+static inline int set_buf_state(struct qdio_q *q, int bufnr,
+                               unsigned char state)
+{
+       return set_buf_states(q, bufnr, state, 1);
+}
+
+/* set slsb states to initial state */
+void qdio_init_buf_states(struct qdio_irq *irq_ptr)
+{
+       struct qdio_q *q;
+       int i;
+
+       for_each_input_queue(irq_ptr, q, i)
+               set_buf_states(q, 0, SLSB_P_INPUT_NOT_INIT,
+                              QDIO_MAX_BUFFERS_PER_Q);
+       for_each_output_queue(irq_ptr, q, i)
+               set_buf_states(q, 0, SLSB_P_OUTPUT_NOT_INIT,
+                              QDIO_MAX_BUFFERS_PER_Q);
+}
+
+static int qdio_siga_sync(struct qdio_q *q, unsigned int output,
+                         unsigned int input)
+{
+       int cc;
+
+       if (!need_siga_sync(q))
+               return 0;
+
+       qdio_perf_stat_inc(&perf_stats.siga_sync);
+
+       cc = do_siga_sync(q->irq_ptr->schid, output, input);
+       if (cc) {
+               QDIO_DBF_TEXT4(0, trace, "sigasync");
+               QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+               QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *));
+       }
+       return cc;
+}
+
+inline int qdio_siga_sync_q(struct qdio_q *q)
+{
+       if (q->is_input_q)
+               return qdio_siga_sync(q, 0, q->mask);
+       else
+               return qdio_siga_sync(q, q->mask, 0);
+}
+
+static inline int qdio_siga_sync_out(struct qdio_q *q)
+{
+       return qdio_siga_sync(q, ~0U, 0);
+}
+
+static inline int qdio_siga_sync_all(struct qdio_q *q)
+{
+       return qdio_siga_sync(q, ~0U, ~0U);
+}
+
+static inline int qdio_do_siga_output(struct qdio_q *q, unsigned int *busy_bit)
+{
+       unsigned int fc = 0;
+       unsigned long schid;
+
+       if (!is_qebsm(q))
+               schid = *((u32 *)&q->irq_ptr->schid);
+       else {
+               schid = q->irq_ptr->sch_token;
+               fc |= 0x80;
+       }
+       return do_siga_output(schid, q->mask, busy_bit, fc);
+}
+
+static int qdio_siga_output(struct qdio_q *q)
+{
+       int cc;
+       u32 busy_bit;
+       u64 start_time = 0;
+
+       QDIO_DBF_TEXT5(0, trace, "sigaout");
+       QDIO_DBF_HEX5(0, trace, &q, sizeof(void *));
+
+       qdio_perf_stat_inc(&perf_stats.siga_out);
+again:
+       cc = qdio_do_siga_output(q, &busy_bit);
+       if (queue_type(q) == QDIO_IQDIO_QFMT && cc == 2 && busy_bit) {
+               if (!start_time)
+                       start_time = get_usecs();
+               else if ((get_usecs() - start_time) < QDIO_BUSY_BIT_PATIENCE)
+                       goto again;
+       }
+
+       if (cc == 2 && busy_bit)
+               cc |= QDIO_ERROR_SIGA_BUSY;
+       if (cc)
+               QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *));
+       return cc;
+}
+
+static inline int qdio_siga_input(struct qdio_q *q)
+{
+       int cc;
+
+       QDIO_DBF_TEXT4(0, trace, "sigain");
+       QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+
+       qdio_perf_stat_inc(&perf_stats.siga_in);
+
+       cc = do_siga_input(q->irq_ptr->schid, q->mask);
+       if (cc)
+               QDIO_DBF_HEX3(0, trace, &cc, sizeof(int *));
+       return cc;
+}
+
+/* called from thinint inbound handler */
+void qdio_sync_after_thinint(struct qdio_q *q)
+{
+       if (pci_out_supported(q)) {
+               if (need_siga_sync_thinint(q))
+                       qdio_siga_sync_all(q);
+               else if (need_siga_sync_out_thinint(q))
+                       qdio_siga_sync_out(q);
+       } else
+               qdio_siga_sync_q(q);
+}
+
+inline void qdio_stop_polling(struct qdio_q *q)
+{
+       spin_lock_bh(&q->u.in.lock);
+       if (!q->u.in.polling) {
+               spin_unlock_bh(&q->u.in.lock);
+               return;
+       }
+       q->u.in.polling = 0;
+       qdio_perf_stat_inc(&perf_stats.debug_stop_polling);
+
+       /* show the card that we are not polling anymore */
+       set_buf_state(q, q->last_move_ftc, SLSB_P_INPUT_NOT_INIT);
+       spin_unlock_bh(&q->u.in.lock);
+}
+
+static void announce_buffer_error(struct qdio_q *q)
+{
+       char dbf_text[15];
+
+       if (q->is_input_q)
+               QDIO_DBF_TEXT3(1, trace, "inperr");
+       else
+               QDIO_DBF_TEXT3(0, trace, "outperr");
+
+       sprintf(dbf_text, "%x-%x-%x", q->first_to_check,
+               q->sbal[q->first_to_check]->element[14].flags,
+               q->sbal[q->first_to_check]->element[15].flags);
+       QDIO_DBF_TEXT3(1, trace, dbf_text);
+       QDIO_DBF_HEX2(1, trace, q->sbal[q->first_to_check], 256);
+
+       q->qdio_error = QDIO_ERROR_SLSB_STATE;
+}
+
+static int get_inbound_buffer_frontier(struct qdio_q *q)
+{
+       int count, stop;
+       unsigned char state;
+
+       /*
+        * If we still poll don't update last_move_ftc, keep the
+        * previously ACK buffer there.
+        */
+       if (!q->u.in.polling)
+               q->last_move_ftc = q->first_to_check;
+
+       /*
+        * Don't check 128 buffers, as otherwise qdio_inbound_q_moved
+        * would return 0.
+        */
+       count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
+       stop = add_buf(q->first_to_check, count);
+
+       /*
+        * No siga sync here, as a PCI or we after a thin interrupt
+        * will sync the queues.
+        */
+
+       /* need to set count to 1 for non-qebsm */
+       if (!is_qebsm(q))
+               count = 1;
+
+check_next:
+       if (q->first_to_check == stop)
+               goto out;
+
+       count = get_buf_states(q, q->first_to_check, &state, count);
+       if (!count)
+               goto out;
+
+       switch (state) {
+       case SLSB_P_INPUT_PRIMED:
+               QDIO_DBF_TEXT5(0, trace, "inptprim");
+
+               /*
+                * Only ACK the first buffer. The ACK will be removed in
+                * qdio_stop_polling.
+                */
+               if (q->u.in.polling)
+                       state = SLSB_P_INPUT_NOT_INIT;
+               else {
+                       q->u.in.polling = 1;
+                       state = SLSB_P_INPUT_ACK;
+               }
+               set_buf_state(q, q->first_to_check, state);
+
+               /*
+                * Need to change all PRIMED buffers to NOT_INIT, otherwise
+                * we're loosing initiative in the thinint code.
+                */
+               if (count > 1)
+                       set_buf_states(q, next_buf(q->first_to_check),
+                                      SLSB_P_INPUT_NOT_INIT, count - 1);
+
+               /*
+                * No siga-sync needed for non-qebsm here, as the inbound queue
+                * will be synced on the next siga-r, resp.
+                * tiqdio_is_inbound_q_done will do the siga-sync.
+                */
+               q->first_to_check = add_buf(q->first_to_check, count);
+               atomic_sub(count, &q->nr_buf_used);
+               goto check_next;
+       case SLSB_P_INPUT_ERROR:
+               announce_buffer_error(q);
+               /* process the buffer, the upper layer will take care of it */
+               q->first_to_check = add_buf(q->first_to_check, count);
+               atomic_sub(count, &q->nr_buf_used);
+               break;
+       case SLSB_CU_INPUT_EMPTY:
+       case SLSB_P_INPUT_NOT_INIT:
+       case SLSB_P_INPUT_ACK:
+               QDIO_DBF_TEXT5(0, trace, "inpnipro");
+               break;
+       default:
+               BUG();
+       }
+out:
+       QDIO_DBF_HEX4(0, trace, &q->first_to_check, sizeof(int));
+       return q->first_to_check;
+}
+
+int qdio_inbound_q_moved(struct qdio_q *q)
+{
+       int bufnr;
+
+       bufnr = get_inbound_buffer_frontier(q);
+
+       if ((bufnr != q->last_move_ftc) || q->qdio_error) {
+               if (!need_siga_sync(q) && !pci_out_supported(q))
+                       q->u.in.timestamp = get_usecs();
+
+               QDIO_DBF_TEXT4(0, trace, "inhasmvd");
+               QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+               return 1;
+       } else
+               return 0;
+}
+
+static int qdio_inbound_q_done(struct qdio_q *q)
+{
+       unsigned char state;
+#ifdef CONFIG_QDIO_DEBUG
+       char dbf_text[15];
+#endif
+
+       if (!atomic_read(&q->nr_buf_used))
+               return 1;
+
+       /*
+        * We need that one for synchronization with the adapter, as it
+        * does a kind of PCI avoidance.
+        */
+       qdio_siga_sync_q(q);
+
+       get_buf_state(q, q->first_to_check, &state);
+       if (state == SLSB_P_INPUT_PRIMED)
+               /* we got something to do */
+               return 0;
+
+       /* on VM, we don't poll, so the q is always done here */
+       if (need_siga_sync(q) || pci_out_supported(q))
+               return 1;
+
+       /*
+        * At this point we know, that inbound first_to_check
+        * has (probably) not moved (see qdio_inbound_processing).
+        */
+       if (get_usecs() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) {
+#ifdef CONFIG_QDIO_DEBUG
+               QDIO_DBF_TEXT4(0, trace, "inqisdon");
+               QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+               sprintf(dbf_text, "pf%02x", q->first_to_check);
+               QDIO_DBF_TEXT4(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+               return 1;
+       } else {
+#ifdef CONFIG_QDIO_DEBUG
+               QDIO_DBF_TEXT4(0, trace, "inqisntd");
+               QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+               sprintf(dbf_text, "pf%02x", q->first_to_check);
+               QDIO_DBF_TEXT4(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+               return 0;
+       }
+}
+
+void qdio_kick_inbound_handler(struct qdio_q *q)
+{
+       int count, start, end;
+#ifdef CONFIG_QDIO_DEBUG
+       char dbf_text[15];
+#endif
+
+       qdio_perf_stat_inc(&perf_stats.inbound_handler);
+
+       start = q->first_to_kick;
+       end = q->first_to_check;
+       if (end >= start)
+               count = end - start;
+       else
+               count = end + QDIO_MAX_BUFFERS_PER_Q - start;
+
+#ifdef CONFIG_QDIO_DEBUG
+       sprintf(dbf_text, "s=%2xc=%2x", start, count);
+       QDIO_DBF_TEXT4(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+
+       if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
+               return;
+
+       q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr,
+                  start, count, q->irq_ptr->int_parm);
+
+       /* for the next time */
+       q->first_to_kick = q->first_to_check;
+       q->qdio_error = 0;
+}
+
+static void __qdio_inbound_processing(struct qdio_q *q)
+{
+       qdio_perf_stat_inc(&perf_stats.tasklet_inbound);
+again:
+       if (!qdio_inbound_q_moved(q))
+               return;
+
+       qdio_kick_inbound_handler(q);
+
+       if (!qdio_inbound_q_done(q))
+               /* means poll time is not yet over */
+               goto again;
+
+       qdio_stop_polling(q);
+       /*
+        * We need to check again to not lose initiative after
+        * resetting the ACK state.
+        */
+       if (!qdio_inbound_q_done(q))
+               goto again;
+}
+
+/* inbound tasklet */
+void qdio_inbound_processing(unsigned long data)
+{
+       struct qdio_q *q = (struct qdio_q *)data;
+       __qdio_inbound_processing(q);
+}
+
+static int get_outbound_buffer_frontier(struct qdio_q *q)
+{
+       int count, stop;
+       unsigned char state;
+
+       if (((queue_type(q) != QDIO_IQDIO_QFMT) && !pci_out_supported(q)) ||
+           (queue_type(q) == QDIO_IQDIO_QFMT && multicast_outbound(q)))
+               qdio_siga_sync_q(q);
+
+       /*
+        * Don't check 128 buffers, as otherwise qdio_inbound_q_moved
+        * would return 0.
+        */
+       count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
+       stop = add_buf(q->first_to_check, count);
+
+       /* need to set count to 1 for non-qebsm */
+       if (!is_qebsm(q))
+               count = 1;
+
+check_next:
+       if (q->first_to_check == stop)
+               return q->first_to_check;
+
+       count = get_buf_states(q, q->first_to_check, &state, count);
+       if (!count)
+               return q->first_to_check;
+
+       switch (state) {
+       case SLSB_P_OUTPUT_EMPTY:
+               /* the adapter got it */
+               QDIO_DBF_TEXT5(0, trace, "outpempt");
+
+               atomic_sub(count, &q->nr_buf_used);
+               q->first_to_check = add_buf(q->first_to_check, count);
+               /*
+                * We fetch all buffer states at once. get_buf_states may
+                * return count < stop. For QEBSM we do not loop.
+                */
+               if (is_qebsm(q))
+                       break;
+               goto check_next;
+       case SLSB_P_OUTPUT_ERROR:
+               announce_buffer_error(q);
+               /* process the buffer, the upper layer will take care of it */
+               q->first_to_check = add_buf(q->first_to_check, count);
+               atomic_sub(count, &q->nr_buf_used);
+               break;
+       case SLSB_CU_OUTPUT_PRIMED:
+               /* the adapter has not fetched the output yet */
+               QDIO_DBF_TEXT5(0, trace, "outpprim");
+               break;
+       case SLSB_P_OUTPUT_NOT_INIT:
+       case SLSB_P_OUTPUT_HALTED:
+               break;
+       default:
+               BUG();
+       }
+       return q->first_to_check;
+}
+
+/* all buffers processed? */
+static inline int qdio_outbound_q_done(struct qdio_q *q)
+{
+       return atomic_read(&q->nr_buf_used) == 0;
+}
+
+static inline int qdio_outbound_q_moved(struct qdio_q *q)
+{
+       int bufnr;
+
+       bufnr = get_outbound_buffer_frontier(q);
+
+       if ((bufnr != q->last_move_ftc) || q->qdio_error) {
+               q->last_move_ftc = bufnr;
+               QDIO_DBF_TEXT4(0, trace, "oqhasmvd");
+               QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+               return 1;
+       } else
+               return 0;
+}
+
+/*
+ * VM could present us cc=2 and busy bit set on SIGA-write
+ * during reconfiguration of their Guest LAN (only in iqdio mode,
+ * otherwise qdio is asynchronous and cc=2 and busy bit there will take
+ * the queues down immediately).
+ *
+ * Therefore qdio_siga_output will try for a short time constantly,
+ * if such a condition occurs. If it doesn't change, it will
+ * increase the busy_siga_counter and save the timestamp, and
+ * schedule the queue for later processing. qdio_outbound_processing
+ * will check out the counter. If non-zero, it will call qdio_kick_outbound_q
+ * as often as the value of the counter. This will attempt further SIGA
+ * instructions. For each successful SIGA, the counter is
+ * decreased, for failing SIGAs the counter remains the same, after
+ * all. After some time of no movement, qdio_kick_outbound_q will
+ * finally fail and reflect corresponding error codes to call
+ * the upper layer module and have it take the queues down.
+ *
+ * Note that this is a change from the original HiperSockets design
+ * (saying cc=2 and busy bit means take the queues down), but in
+ * these days Guest LAN didn't exist... excessive cc=2 with busy bit
+ * conditions will still take the queues down, but the threshold is
+ * higher due to the Guest LAN environment.
+ *
+ * Called from outbound tasklet and do_QDIO handler.
+ */
+static void qdio_kick_outbound_q(struct qdio_q *q)
+{
+       int rc;
+#ifdef CONFIG_QDIO_DEBUG
+       char dbf_text[15];
+
+       QDIO_DBF_TEXT5(0, trace, "kickoutq");
+       QDIO_DBF_HEX5(0, trace, &q, sizeof(void *));
+#endif /* CONFIG_QDIO_DEBUG */
+
+       if (!need_siga_out(q))
+               return;
+
+       rc = qdio_siga_output(q);
+       switch (rc) {
+       case 0:
+               /* went smooth this time, reset timestamp */
+               q->u.out.timestamp = 0;
+
+               /* TODO: improve error handling for CC=0 case */
+#ifdef CONFIG_QDIO_DEBUG
+               QDIO_DBF_TEXT3(0, trace, "cc2reslv");
+               sprintf(dbf_text, "%4x%2x%2x", q->irq_ptr->schid.sch_no, q->nr,
+                       atomic_read(&q->u.out.busy_siga_counter));
+               QDIO_DBF_TEXT3(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+               break;
+       /* cc=2 and busy bit */
+       case (2 | QDIO_ERROR_SIGA_BUSY):
+               atomic_inc(&q->u.out.busy_siga_counter);
+
+               /* if the last siga was successful, save timestamp here */
+               if (!q->u.out.timestamp)
+                       q->u.out.timestamp = get_usecs();
+
+               /* if we're in time, don't touch qdio_error */
+               if (get_usecs() - q->u.out.timestamp < QDIO_BUSY_BIT_GIVE_UP) {
+                       tasklet_schedule(&q->tasklet);
+                       break;
+               }
+               QDIO_DBF_TEXT2(0, trace, "cc2REPRT");
+#ifdef CONFIG_QDIO_DEBUG
+               sprintf(dbf_text, "%4x%2x%2x", q->irq_ptr->schid.sch_no, q->nr,
+                       atomic_read(&q->u.out.busy_siga_counter));
+               QDIO_DBF_TEXT3(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+       default:
+               /* for plain cc=1, 2 or 3 */
+               q->qdio_error = rc;
+       }
+}
+
+static void qdio_kick_outbound_handler(struct qdio_q *q)
+{
+       int start, end, count;
+#ifdef CONFIG_QDIO_DEBUG
+       char dbf_text[15];
+#endif
+
+       start = q->first_to_kick;
+       end = q->last_move_ftc;
+       if (end >= start)
+               count = end - start;
+       else
+               count = end + QDIO_MAX_BUFFERS_PER_Q - start;
+
+#ifdef CONFIG_QDIO_DEBUG
+       QDIO_DBF_TEXT4(0, trace, "kickouth");
+       QDIO_DBF_HEX4(0, trace, &q, sizeof(void *));
+
+       sprintf(dbf_text, "s=%2xc=%2x", start, count);
+       QDIO_DBF_TEXT4(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+
+       if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
+               return;
+
+       q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count,
+                  q->irq_ptr->int_parm);
+
+       /* for the next time: */
+       q->first_to_kick = q->last_move_ftc;
+       q->qdio_error = 0;
+}
+
+static void __qdio_outbound_processing(struct qdio_q *q)
+{
+       int siga_attempts;
+
+       qdio_perf_stat_inc(&perf_stats.tasklet_outbound);
+
+       /* see comment in qdio_kick_outbound_q */
+       siga_attempts = atomic_read(&q->u.out.busy_siga_counter);
+       while (siga_attempts--) {
+               atomic_dec(&q->u.out.busy_siga_counter);
+               qdio_kick_outbound_q(q);
+       }
+
+       BUG_ON(atomic_read(&q->nr_buf_used) < 0);
+
+       if (qdio_outbound_q_moved(q))
+               qdio_kick_outbound_handler(q);
+
+       if (queue_type(q) == QDIO_ZFCP_QFMT) {
+               if (!pci_out_supported(q) && !qdio_outbound_q_done(q))
+                       tasklet_schedule(&q->tasklet);
+               return;
+       }
+
+       /* bail out for HiperSockets unicast queues */
+       if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q))
+               return;
+
+       if (q->u.out.pci_out_enabled)
+               return;
+
+       /*
+        * Now we know that queue type is either qeth without pci enabled
+        * or HiperSockets multicast. Make sure buffer switch from PRIMED to
+        * EMPTY is noticed and outbound_handler is called after some time.
+        */
+       if (qdio_outbound_q_done(q))
+               del_timer(&q->u.out.timer);
+       else {
+               if (!timer_pending(&q->u.out.timer)) {
+                       mod_timer(&q->u.out.timer, jiffies + 10 * HZ);
+                       qdio_perf_stat_inc(&perf_stats.debug_tl_out_timer);
+               }
+       }
+}
+
+/* outbound tasklet */
+void qdio_outbound_processing(unsigned long data)
+{
+       struct qdio_q *q = (struct qdio_q *)data;
+       __qdio_outbound_processing(q);
+}
+
+void qdio_outbound_timer(unsigned long data)
+{
+       struct qdio_q *q = (struct qdio_q *)data;
+       tasklet_schedule(&q->tasklet);
+}
+
+/* called from thinint inbound tasklet */
+void qdio_check_outbound_after_thinint(struct qdio_q *q)
+{
+       struct qdio_q *out;
+       int i;
+
+       if (!pci_out_supported(q))
+               return;
+
+       for_each_output_queue(q->irq_ptr, out, i)
+               if (!qdio_outbound_q_done(out))
+                       tasklet_schedule(&out->tasklet);
+}
+
+static inline void qdio_set_state(struct qdio_irq *irq_ptr,
+                                 enum qdio_irq_states state)
+{
+#ifdef CONFIG_QDIO_DEBUG
+       char dbf_text[15];
+
+       QDIO_DBF_TEXT5(0, trace, "newstate");
+       sprintf(dbf_text, "%4x%4x", irq_ptr->schid.sch_no, state);
+       QDIO_DBF_TEXT5(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+
+       irq_ptr->state = state;
+       mb();
+}
+
+static void qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb)
+{
+       char dbf_text[15];
+
+       if (irb->esw.esw0.erw.cons) {
+               sprintf(dbf_text, "sens%4x", schid.sch_no);
+               QDIO_DBF_TEXT2(1, trace, dbf_text);
+               QDIO_DBF_HEX0(0, trace, irb, 64);
+               QDIO_DBF_HEX0(0, trace, irb->ecw, 64);
+       }
+}
+
+/* PCI interrupt handler */
+static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
+{
+       int i;
+       struct qdio_q *q;
+
+       qdio_perf_stat_inc(&perf_stats.pci_int);
+
+       for_each_input_queue(irq_ptr, q, i)
+               tasklet_schedule(&q->tasklet);
+
+       if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED))
+               return;
+
+       for_each_output_queue(irq_ptr, q, i) {
+               if (qdio_outbound_q_done(q))
+                       continue;
+
+               if (!siga_syncs_out_pci(q))
+                       qdio_siga_sync_q(q);
+
+               tasklet_schedule(&q->tasklet);
+       }
+}
+
+static void qdio_handle_activate_check(struct ccw_device *cdev,
+                               unsigned long intparm, int cstat, int dstat)
+{
+       struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct qdio_q *q;
+       char dbf_text[15];
+
+       QDIO_DBF_TEXT2(1, trace, "ick2");
+       sprintf(dbf_text, "%s", cdev->dev.bus_id);
+       QDIO_DBF_TEXT2(1, trace, dbf_text);
+       QDIO_DBF_HEX2(0, trace, &intparm, sizeof(int));
+       QDIO_DBF_HEX2(0, trace, &dstat, sizeof(int));
+       QDIO_DBF_HEX2(0, trace, &cstat, sizeof(int));
+
+       if (irq_ptr->nr_input_qs) {
+               q = irq_ptr->input_qs[0];
+       } else if (irq_ptr->nr_output_qs) {
+               q = irq_ptr->output_qs[0];
+       } else {
+               dump_stack();
+               goto no_handler;
+       }
+       q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION,
+                  0, -1, -1, irq_ptr->int_parm);
+no_handler:
+       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
+}
+
+static void qdio_call_shutdown(struct work_struct *work)
+{
+       struct ccw_device_private *priv;
+       struct ccw_device *cdev;
+
+       priv = container_of(work, struct ccw_device_private, kick_work);
+       cdev = priv->cdev;
+       qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
+       put_device(&cdev->dev);
+}
+
+static void qdio_int_error(struct ccw_device *cdev)
+{
+       struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+
+       switch (irq_ptr->state) {
+       case QDIO_IRQ_STATE_INACTIVE:
+       case QDIO_IRQ_STATE_CLEANUP:
+               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+               break;
+       case QDIO_IRQ_STATE_ESTABLISHED:
+       case QDIO_IRQ_STATE_ACTIVE:
+               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
+               if (get_device(&cdev->dev)) {
+                       /* Can't call shutdown from interrupt context. */
+                       PREPARE_WORK(&cdev->private->kick_work,
+                                    qdio_call_shutdown);
+                       queue_work(ccw_device_work, &cdev->private->kick_work);
+               }
+               break;
+       default:
+               WARN_ON(1);
+       }
+       wake_up(&cdev->private->wait_q);
+}
+
+static int qdio_establish_check_errors(struct ccw_device *cdev, int cstat,
+                                          int dstat)
+{
+       struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+
+       if (cstat || (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END))) {
+               QDIO_DBF_TEXT2(1, setup, "eq:ckcon");
+               goto error;
+       }
+
+       if (!(dstat & DEV_STAT_DEV_END)) {
+               QDIO_DBF_TEXT2(1, setup, "eq:no de");
+               goto error;
+       }
+
+       if (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END)) {
+               QDIO_DBF_TEXT2(1, setup, "eq:badio");
+               goto error;
+       }
+       return 0;
+error:
+       QDIO_DBF_HEX2(0, trace, &cstat, sizeof(int));
+       QDIO_DBF_HEX2(0, trace, &dstat, sizeof(int));
+       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+       return 1;
+}
+
+static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat,
+                                     int dstat)
+{
+       struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       char dbf_text[15];
+
+       sprintf(dbf_text, "qehi%4x", cdev->private->schid.sch_no);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_TEXT0(0, trace, dbf_text);
+
+       if (!qdio_establish_check_errors(cdev, cstat, dstat))
+               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
+}
+
+/* qdio interrupt handler */
+void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
+                     struct irb *irb)
+{
+       struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       int cstat, dstat;
+       char dbf_text[15];
+
+       qdio_perf_stat_inc(&perf_stats.qdio_int);
+
+       if (!intparm || !irq_ptr) {
+               sprintf(dbf_text, "qihd%4x", cdev->private->schid.sch_no);
+               QDIO_DBF_TEXT2(1, setup, dbf_text);
+               return;
+       }
+
+       if (IS_ERR(irb)) {
+               switch (PTR_ERR(irb)) {
+               case -EIO:
+                       sprintf(dbf_text, "ierr%4x",
+                               cdev->private->schid.sch_no);
+                       QDIO_DBF_TEXT2(1, setup, dbf_text);
+                       qdio_int_error(cdev);
+                       return;
+               case -ETIMEDOUT:
+                       sprintf(dbf_text, "qtoh%4x",
+                               cdev->private->schid.sch_no);
+                       QDIO_DBF_TEXT2(1, setup, dbf_text);
+                       qdio_int_error(cdev);
+                       return;
+               default:
+                       WARN_ON(1);
+                       return;
+               }
+       }
+       qdio_irq_check_sense(irq_ptr->schid, irb);
+
+       cstat = irb->scsw.cmd.cstat;
+       dstat = irb->scsw.cmd.dstat;
+
+       switch (irq_ptr->state) {
+       case QDIO_IRQ_STATE_INACTIVE:
+               qdio_establish_handle_irq(cdev, cstat, dstat);
+               break;
+
+       case QDIO_IRQ_STATE_CLEANUP:
+               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
+               break;
+
+       case QDIO_IRQ_STATE_ESTABLISHED:
+       case QDIO_IRQ_STATE_ACTIVE:
+               if (cstat & SCHN_STAT_PCI) {
+                       qdio_int_handler_pci(irq_ptr);
+                       /* no state change so no need to wake up wait_q */
+                       return;
+               }
+               if ((cstat & ~SCHN_STAT_PCI) || dstat) {
+                       qdio_handle_activate_check(cdev, intparm, cstat,
+                                                  dstat);
+                       break;
+               }
+       default:
+               WARN_ON(1);
+       }
+       wake_up(&cdev->private->wait_q);
+}
+
+/**
+ * qdio_get_ssqd_desc - get qdio subchannel description
+ * @cdev: ccw device to get description for
+ *
+ * Returns a pointer to the saved qdio subchannel description,
+ * or NULL for not setup qdio devices.
+ */
+struct qdio_ssqd_desc *qdio_get_ssqd_desc(struct ccw_device *cdev)
+{
+       struct qdio_irq *irq_ptr;
+
+       QDIO_DBF_TEXT0(0, setup, "getssqd");
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return NULL;
+
+       return &irq_ptr->ssqd_desc;
+}
+EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc);
+
+/**
+ * qdio_cleanup - shutdown queues and free data structures
+ * @cdev: associated ccw device
+ * @how: use halt or clear to shutdown
+ *
+ * This function calls qdio_shutdown() for @cdev with method @how
+ * and on success qdio_free() for @cdev.
+ */
+int qdio_cleanup(struct ccw_device *cdev, int how)
+{
+       struct qdio_irq *irq_ptr;
+       char dbf_text[15];
+       int rc;
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return -ENODEV;
+
+       sprintf(dbf_text, "qcln%4x", irq_ptr->schid.sch_no);
+       QDIO_DBF_TEXT1(0, trace, dbf_text);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+
+       rc = qdio_shutdown(cdev, how);
+       if (rc == 0)
+               rc = qdio_free(cdev);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(qdio_cleanup);
+
+static void qdio_shutdown_queues(struct ccw_device *cdev)
+{
+       struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct qdio_q *q;
+       int i;
+
+       for_each_input_queue(irq_ptr, q, i)
+               tasklet_disable(&q->tasklet);
+
+       for_each_output_queue(irq_ptr, q, i) {
+               tasklet_disable(&q->tasklet);
+               del_timer(&q->u.out.timer);
+       }
+}
+
+/**
+ * qdio_shutdown - shut down a qdio subchannel
+ * @cdev: associated ccw device
+ * @how: use halt or clear to shutdown
+ */
+int qdio_shutdown(struct ccw_device *cdev, int how)
+{
+       struct qdio_irq *irq_ptr;
+       int rc;
+       unsigned long flags;
+       char dbf_text[15];
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return -ENODEV;
+
+       mutex_lock(&irq_ptr->setup_mutex);
+       /*
+        * Subchannel was already shot down. We cannot prevent being called
+        * twice since cio may trigger a shutdown asynchronously.
+        */
+       if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
+               mutex_unlock(&irq_ptr->setup_mutex);
+               return 0;
+       }
+
+       sprintf(dbf_text, "qsqs%4x", irq_ptr->schid.sch_no);
+       QDIO_DBF_TEXT1(0, trace, dbf_text);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+
+       tiqdio_remove_input_queues(irq_ptr);
+       qdio_shutdown_queues(cdev);
+       qdio_shutdown_debug_entries(irq_ptr, cdev);
+
+       /* cleanup subchannel */
+       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+
+       if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
+               rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
+       else
+               /* default behaviour is halt */
+               rc = ccw_device_halt(cdev, QDIO_DOING_CLEANUP);
+       if (rc) {
+               sprintf(dbf_text, "sher%4x", irq_ptr->schid.sch_no);
+               QDIO_DBF_TEXT0(0, setup, dbf_text);
+               sprintf(dbf_text, "rc=%d", rc);
+               QDIO_DBF_TEXT0(0, setup, dbf_text);
+               goto no_cleanup;
+       }
+
+       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
+       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+       wait_event_interruptible_timeout(cdev->private->wait_q,
+               irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
+               irq_ptr->state == QDIO_IRQ_STATE_ERR,
+               10 * HZ);
+       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+
+no_cleanup:
+       qdio_shutdown_thinint(irq_ptr);
+
+       /* restore interrupt handler */
+       if ((void *)cdev->handler == (void *)qdio_int_handler)
+               cdev->handler = irq_ptr->orig_handler;
+       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+
+       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
+       mutex_unlock(&irq_ptr->setup_mutex);
+       module_put(THIS_MODULE);
+       if (rc)
+               return rc;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(qdio_shutdown);
+
+/**
+ * qdio_free - free data structures for a qdio subchannel
+ * @cdev: associated ccw device
+ */
+int qdio_free(struct ccw_device *cdev)
+{
+       struct qdio_irq *irq_ptr;
+       char dbf_text[15];
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return -ENODEV;
+
+       mutex_lock(&irq_ptr->setup_mutex);
+
+       sprintf(dbf_text, "qfqs%4x", irq_ptr->schid.sch_no);
+       QDIO_DBF_TEXT1(0, trace, dbf_text);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+
+       cdev->private->qdio_data = NULL;
+       mutex_unlock(&irq_ptr->setup_mutex);
+
+       qdio_release_memory(irq_ptr);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(qdio_free);
+
+/**
+ * qdio_initialize - allocate and establish queues for a qdio subchannel
+ * @init_data: initialization data
+ *
+ * This function first allocates queues via qdio_allocate() and on success
+ * establishes them via qdio_establish().
+ */
+int qdio_initialize(struct qdio_initialize *init_data)
+{
+       int rc;
+       char dbf_text[15];
+
+       sprintf(dbf_text, "qini%4x", init_data->cdev->private->schid.sch_no);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_TEXT0(0, trace, dbf_text);
+
+       rc = qdio_allocate(init_data);
+       if (rc)
+               return rc;
+
+       rc = qdio_establish(init_data);
+       if (rc)
+               qdio_free(init_data->cdev);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(qdio_initialize);
+
+/**
+ * qdio_allocate - allocate qdio queues and associated data
+ * @init_data: initialization data
+ */
+int qdio_allocate(struct qdio_initialize *init_data)
+{
+       struct qdio_irq *irq_ptr;
+       char dbf_text[15];
+
+       sprintf(dbf_text, "qalc%4x", init_data->cdev->private->schid.sch_no);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_TEXT0(0, trace, dbf_text);
+
+       if ((init_data->no_input_qs && !init_data->input_handler) ||
+           (init_data->no_output_qs && !init_data->output_handler))
+               return -EINVAL;
+
+       if ((init_data->no_input_qs > QDIO_MAX_QUEUES_PER_IRQ) ||
+           (init_data->no_output_qs > QDIO_MAX_QUEUES_PER_IRQ))
+               return -EINVAL;
+
+       if ((!init_data->input_sbal_addr_array) ||
+           (!init_data->output_sbal_addr_array))
+               return -EINVAL;
+
+       qdio_allocate_do_dbf(init_data);
+
+       /* irq_ptr must be in GFP_DMA since it contains ccw1.cda */
+       irq_ptr = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+       if (!irq_ptr)
+               goto out_err;
+       QDIO_DBF_TEXT0(0, setup, "irq_ptr:");
+       QDIO_DBF_HEX0(0, setup, &irq_ptr, sizeof(void *));
+
+       mutex_init(&irq_ptr->setup_mutex);
+
+       /*
+        * Allocate a page for the chsc calls in qdio_establish.
+        * Must be pre-allocated since a zfcp recovery will call
+        * qdio_establish. In case of low memory and swap on a zfcp disk
+        * we may not be able to allocate memory otherwise.
+        */
+       irq_ptr->chsc_page = get_zeroed_page(GFP_KERNEL);
+       if (!irq_ptr->chsc_page)
+               goto out_rel;
+
+       /* qdr is used in ccw1.cda which is u32 */
+       irq_ptr->qdr = kzalloc(sizeof(struct qdr), GFP_KERNEL | GFP_DMA);
+       if (!irq_ptr->qdr)
+               goto out_rel;
+       WARN_ON((unsigned long)irq_ptr->qdr & 0xfff);
+
+       QDIO_DBF_TEXT0(0, setup, "qdr:");
+       QDIO_DBF_HEX0(0, setup, &irq_ptr->qdr, sizeof(void *));
+
+       if (qdio_allocate_qs(irq_ptr, init_data->no_input_qs,
+                            init_data->no_output_qs))
+               goto out_rel;
+
+       init_data->cdev->private->qdio_data = irq_ptr;
+       qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
+       return 0;
+out_rel:
+       qdio_release_memory(irq_ptr);
+out_err:
+       return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(qdio_allocate);
+
+/**
+ * qdio_establish - establish queues on a qdio subchannel
+ * @init_data: initialization data
+ */
+int qdio_establish(struct qdio_initialize *init_data)
+{
+       char dbf_text[20];
+       struct qdio_irq *irq_ptr;
+       struct ccw_device *cdev = init_data->cdev;
+       unsigned long saveflags;
+       int rc;
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return -ENODEV;
+
+       if (cdev->private->state != DEV_STATE_ONLINE)
+               return -EINVAL;
+
+       if (!try_module_get(THIS_MODULE))
+               return -EINVAL;
+
+       sprintf(dbf_text, "qest%4x", cdev->private->schid.sch_no);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_TEXT0(0, trace, dbf_text);
+
+       mutex_lock(&irq_ptr->setup_mutex);
+       qdio_setup_irq(init_data);
+
+       rc = qdio_establish_thinint(irq_ptr);
+       if (rc) {
+               mutex_unlock(&irq_ptr->setup_mutex);
+               qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
+               return rc;
+       }
+
+       /* establish q */
+       irq_ptr->ccw.cmd_code = irq_ptr->equeue.cmd;
+       irq_ptr->ccw.flags = CCW_FLAG_SLI;
+       irq_ptr->ccw.count = irq_ptr->equeue.count;
+       irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr);
+
+       spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+       ccw_device_set_options_mask(cdev, 0);
+
+       rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
+       if (rc) {
+               sprintf(dbf_text, "eq:io%4x", irq_ptr->schid.sch_no);
+               QDIO_DBF_TEXT2(1, setup, dbf_text);
+               sprintf(dbf_text, "eq:rc%4x", rc);
+               QDIO_DBF_TEXT2(1, setup, dbf_text);
+       }
+       spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
+
+       if (rc) {
+               mutex_unlock(&irq_ptr->setup_mutex);
+               qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
+               return rc;
+       }
+
+       wait_event_interruptible_timeout(cdev->private->wait_q,
+               irq_ptr->state == QDIO_IRQ_STATE_ESTABLISHED ||
+               irq_ptr->state == QDIO_IRQ_STATE_ERR, HZ);
+
+       if (irq_ptr->state != QDIO_IRQ_STATE_ESTABLISHED) {
+               mutex_unlock(&irq_ptr->setup_mutex);
+               qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
+               return -EIO;
+       }
+
+       qdio_setup_ssqd_info(irq_ptr);
+       sprintf(dbf_text, "qib ac%2x", irq_ptr->qib.ac);
+       QDIO_DBF_TEXT2(0, setup, dbf_text);
+
+       /* qebsm is now setup if available, initialize buffer states */
+       qdio_init_buf_states(irq_ptr);
+
+       mutex_unlock(&irq_ptr->setup_mutex);
+       qdio_print_subchannel_info(irq_ptr, cdev);
+       qdio_setup_debug_entries(irq_ptr, cdev);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(qdio_establish);
+
+/**
+ * qdio_activate - activate queues on a qdio subchannel
+ * @cdev: associated cdev
+ */
+int qdio_activate(struct ccw_device *cdev)
+{
+       struct qdio_irq *irq_ptr;
+       int rc;
+       unsigned long saveflags;
+       char dbf_text[20];
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return -ENODEV;
+
+       if (cdev->private->state != DEV_STATE_ONLINE)
+               return -EINVAL;
+
+       mutex_lock(&irq_ptr->setup_mutex);
+       if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
+               rc = -EBUSY;
+               goto out;
+       }
+
+       sprintf(dbf_text, "qact%4x", irq_ptr->schid.sch_no);
+       QDIO_DBF_TEXT2(0, setup, dbf_text);
+       QDIO_DBF_TEXT2(0, trace, dbf_text);
+
+       irq_ptr->ccw.cmd_code = irq_ptr->aqueue.cmd;
+       irq_ptr->ccw.flags = CCW_FLAG_SLI;
+       irq_ptr->ccw.count = irq_ptr->aqueue.count;
+       irq_ptr->ccw.cda = 0;
+
+       spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+       ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
+
+       rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE,
+                             0, DOIO_DENY_PREFETCH);
+       if (rc) {
+               sprintf(dbf_text, "aq:io%4x", irq_ptr->schid.sch_no);
+               QDIO_DBF_TEXT2(1, setup, dbf_text);
+               sprintf(dbf_text, "aq:rc%4x", rc);
+               QDIO_DBF_TEXT2(1, setup, dbf_text);
+       }
+       spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
+
+       if (rc)
+               goto out;
+
+       if (is_thinint_irq(irq_ptr))
+               tiqdio_add_input_queues(irq_ptr);
+
+       /* wait for subchannel to become active */
+       msleep(5);
+
+       switch (irq_ptr->state) {
+       case QDIO_IRQ_STATE_STOPPED:
+       case QDIO_IRQ_STATE_ERR:
+               mutex_unlock(&irq_ptr->setup_mutex);
+               qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
+               return -EIO;
+       default:
+               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ACTIVE);
+               rc = 0;
+       }
+out:
+       mutex_unlock(&irq_ptr->setup_mutex);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(qdio_activate);
+
+static inline int buf_in_between(int bufnr, int start, int count)
+{
+       int end = add_buf(start, count);
+
+       if (end > start) {
+               if (bufnr >= start && bufnr < end)
+                       return 1;
+               else
+                       return 0;
+       }
+
+       /* wrap-around case */
+       if ((bufnr >= start && bufnr <= QDIO_MAX_BUFFERS_PER_Q) ||
+           (bufnr < end))
+               return 1;
+       else
+               return 0;
+}
+
+/**
+ * handle_inbound - reset processed input buffers
+ * @q: queue containing the buffers
+ * @callflags: flags
+ * @bufnr: first buffer to process
+ * @count: how many buffers are emptied
+ */
+static void handle_inbound(struct qdio_q *q, unsigned int callflags,
+                          int bufnr, int count)
+{
+       unsigned long flags;
+       int used, rc;
+
+       /*
+        * do_QDIO could run in parallel with the queue tasklet so the
+        * upper-layer programm could empty the ACK'ed buffer here.
+        * If that happens we must clear the polling flag, otherwise
+        * qdio_stop_polling() could set the buffer to NOT_INIT after
+        * it was set to EMPTY which would kill us.
+        */
+       spin_lock_irqsave(&q->u.in.lock, flags);
+       if (q->u.in.polling)
+               if (buf_in_between(q->last_move_ftc, bufnr, count))
+                       q->u.in.polling = 0;
+
+       count = set_buf_states(q, bufnr, SLSB_CU_INPUT_EMPTY, count);
+       spin_unlock_irqrestore(&q->u.in.lock, flags);
+
+       used = atomic_add_return(count, &q->nr_buf_used) - count;
+       BUG_ON(used + count > QDIO_MAX_BUFFERS_PER_Q);
+
+       /* no need to signal as long as the adapter had free buffers */
+       if (used)
+               return;
+
+       if (need_siga_in(q)) {
+               rc = qdio_siga_input(q);
+               if (rc)
+                       q->qdio_error = rc;
+       }
+}
+
+/**
+ * handle_outbound - process filled outbound buffers
+ * @q: queue containing the buffers
+ * @callflags: flags
+ * @bufnr: first buffer to process
+ * @count: how many buffers are filled
+ */
+static void handle_outbound(struct qdio_q *q, unsigned int callflags,
+                           int bufnr, int count)
+{
+       unsigned char state;
+       int used;
+
+       qdio_perf_stat_inc(&perf_stats.outbound_handler);
+
+       count = set_buf_states(q, bufnr, SLSB_CU_OUTPUT_PRIMED, count);
+       used = atomic_add_return(count, &q->nr_buf_used);
+       BUG_ON(used > QDIO_MAX_BUFFERS_PER_Q);
+
+       if (callflags & QDIO_FLAG_PCI_OUT)
+               q->u.out.pci_out_enabled = 1;
+       else
+               q->u.out.pci_out_enabled = 0;
+
+       if (queue_type(q) == QDIO_IQDIO_QFMT) {
+               if (multicast_outbound(q))
+                       qdio_kick_outbound_q(q);
+               else
+                       /*
+                        * One siga-w per buffer required for unicast
+                        * HiperSockets.
+                        */
+                       while (count--)
+                               qdio_kick_outbound_q(q);
+               goto out;
+       }
+
+       if (need_siga_sync(q)) {
+               qdio_siga_sync_q(q);
+               goto out;
+       }
+
+       /* try to fast requeue buffers */
+       get_buf_state(q, prev_buf(bufnr), &state);
+       if (state != SLSB_CU_OUTPUT_PRIMED)
+               qdio_kick_outbound_q(q);
+       else {
+               QDIO_DBF_TEXT5(0, trace, "fast-req");
+               qdio_perf_stat_inc(&perf_stats.fast_requeue);
+       }
+out:
+       /* Fixme: could wait forever if called from process context */
+       tasklet_schedule(&q->tasklet);
+}
+
+/**
+ * do_QDIO - process input or output buffers
+ * @cdev: associated ccw_device for the qdio subchannel
+ * @callflags: input or output and special flags from the program
+ * @q_nr: queue number
+ * @bufnr: buffer number
+ * @count: how many buffers to process
+ */
+int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
+           int q_nr, int bufnr, int count)
+{
+       struct qdio_irq *irq_ptr;
+#ifdef CONFIG_QDIO_DEBUG
+       char dbf_text[20];
+
+       sprintf(dbf_text, "doQD%04x", cdev->private->schid.sch_no);
+       QDIO_DBF_TEXT3(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+
+       if ((bufnr > QDIO_MAX_BUFFERS_PER_Q) ||
+           (count > QDIO_MAX_BUFFERS_PER_Q) ||
+           (q_nr > QDIO_MAX_QUEUES_PER_IRQ))
+               return -EINVAL;
+
+       if (!count)
+               return 0;
+
+       irq_ptr = cdev->private->qdio_data;
+       if (!irq_ptr)
+               return -ENODEV;
+
+#ifdef CONFIG_QDIO_DEBUG
+       if (callflags & QDIO_FLAG_SYNC_INPUT)
+               QDIO_DBF_HEX3(0, trace, &irq_ptr->input_qs[q_nr],
+                             sizeof(void *));
+       else
+               QDIO_DBF_HEX3(0, trace, &irq_ptr->output_qs[q_nr],
+                             sizeof(void *));
+
+       sprintf(dbf_text, "flag%04x", callflags);
+       QDIO_DBF_TEXT3(0, trace, dbf_text);
+       sprintf(dbf_text, "qi%02xct%02x", bufnr, count);
+       QDIO_DBF_TEXT3(0, trace, dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+
+       if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)
+               return -EBUSY;
+
+       if (callflags & QDIO_FLAG_SYNC_INPUT)
+               handle_inbound(irq_ptr->input_qs[q_nr],
+                              callflags, bufnr, count);
+       else if (callflags & QDIO_FLAG_SYNC_OUTPUT)
+               handle_outbound(irq_ptr->output_qs[q_nr],
+                               callflags, bufnr, count);
+       else {
+               QDIO_DBF_TEXT3(1, trace, "doQD:inv");
+               return -EINVAL;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(do_QDIO);
+
+static int __init init_QDIO(void)
+{
+       int rc;
+
+       rc = qdio_setup_init();
+       if (rc)
+               return rc;
+       rc = tiqdio_allocate_memory();
+       if (rc)
+               goto out_cache;
+       rc = qdio_debug_init();
+       if (rc)
+               goto out_ti;
+       rc = qdio_setup_perf_stats();
+       if (rc)
+               goto out_debug;
+       rc = tiqdio_register_thinints();
+       if (rc)
+               goto out_perf;
+       return 0;
+
+out_perf:
+       qdio_remove_perf_stats();
+out_debug:
+       qdio_debug_exit();
+out_ti:
+       tiqdio_free_memory();
+out_cache:
+       qdio_setup_exit();
+       return rc;
+}
+
+static void __exit exit_QDIO(void)
+{
+       tiqdio_unregister_thinints();
+       tiqdio_free_memory();
+       qdio_remove_perf_stats();
+       qdio_debug_exit();
+       qdio_setup_exit();
+}
+
+module_init(init_QDIO);
+module_exit(exit_QDIO);
diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c

new file mode 100644 (file)

index 0000000..ea01b85
--- /dev/null
+++ b/drivers/s390/cio/qdio_perf.c
@@ -0,0 +1,151 @@
+/*
+ *  drivers/s390/cio/qdio_perf.c
+ *
+ *  Copyright IBM Corp. 2008
+ *
+ *  Author: Jan Glauber (jang@linux.vnet.ibm.com)
+ */
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/ccwdev.h>
+
+#include "cio.h"
+#include "css.h"
+#include "device.h"
+#include "ioasm.h"
+#include "chsc.h"
+#include "qdio_debug.h"
+#include "qdio_perf.h"
+
+int qdio_performance_stats;
+struct qdio_perf_stats perf_stats;
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *qdio_perf_pde;
+#endif
+
+inline void qdio_perf_stat_inc(atomic_long_t *count)
+{
+       if (qdio_performance_stats)
+               atomic_long_inc(count);
+}
+
+inline void qdio_perf_stat_dec(atomic_long_t *count)
+{
+       if (qdio_performance_stats)
+               atomic_long_dec(count);
+}
+
+/*
+ * procfs functions
+ */
+static int qdio_perf_proc_show(struct seq_file *m, void *v)
+{
+       seq_printf(m, "Number of qdio interrupts\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.qdio_int));
+       seq_printf(m, "Number of PCI interrupts\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.pci_int));
+       seq_printf(m, "Number of adapter interrupts\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.thin_int));
+       seq_printf(m, "\n");
+       seq_printf(m, "Inbound tasklet runs\t\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.tasklet_inbound));
+       seq_printf(m, "Outbound tasklet runs\t\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.tasklet_outbound));
+       seq_printf(m, "Adapter interrupt tasklet runs/loops\t\t: %li/%li\n",
+                  (long)atomic_long_read(&perf_stats.tasklet_thinint),
+                  (long)atomic_long_read(&perf_stats.tasklet_thinint_loop));
+       seq_printf(m, "Adapter interrupt inbound tasklet runs/loops\t: %li/%li\n",
+                  (long)atomic_long_read(&perf_stats.thinint_inbound),
+                  (long)atomic_long_read(&perf_stats.thinint_inbound_loop));
+       seq_printf(m, "\n");
+       seq_printf(m, "Number of SIGA In issued\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.siga_in));
+       seq_printf(m, "Number of SIGA Out issued\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.siga_out));
+       seq_printf(m, "Number of SIGA Sync issued\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.siga_sync));
+       seq_printf(m, "\n");
+       seq_printf(m, "Number of inbound transfers\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.inbound_handler));
+       seq_printf(m, "Number of outbound transfers\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.outbound_handler));
+       seq_printf(m, "\n");
+       seq_printf(m, "Number of fast requeues (outg. SBAL w/o SIGA)\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.fast_requeue));
+       seq_printf(m, "Number of outbound tasklet mod_timer calls\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.debug_tl_out_timer));
+       seq_printf(m, "Number of stop polling calls\t\t\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.debug_stop_polling));
+       seq_printf(m, "AI inbound tasklet loops after stop polling\t: %li\n",
+                  (long)atomic_long_read(&perf_stats.thinint_inbound_loop2));
+       seq_printf(m, "\n");
+       return 0;
+}
+static int qdio_perf_seq_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, qdio_perf_proc_show, NULL);
+}
+
+static struct file_operations qdio_perf_proc_fops = {
+       .owner   = THIS_MODULE,
+       .open    = qdio_perf_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = single_release,
+};
+
+/*
+ * sysfs functions
+ */
+static ssize_t qdio_perf_stats_show(struct bus_type *bus, char *buf)
+{
+       return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0);
+}
+
+static ssize_t qdio_perf_stats_store(struct bus_type *bus,
+                             const char *buf, size_t count)
+{
+       unsigned long i;
+
+       if (strict_strtoul(buf, 16, &i) != 0)
+               return -EINVAL;
+       if ((i != 0) && (i != 1))
+               return -EINVAL;
+       if (i == qdio_performance_stats)
+               return count;
+
+       qdio_performance_stats = i;
+       /* reset performance statistics */
+       if (i == 0)
+               memset(&perf_stats, 0, sizeof(struct qdio_perf_stats));
+       return count;
+}
+
+static BUS_ATTR(qdio_performance_stats, 0644, qdio_perf_stats_show,
+               qdio_perf_stats_store);
+
+int __init qdio_setup_perf_stats(void)
+{
+       int rc;
+
+       rc = bus_create_file(&ccw_bus_type, &bus_attr_qdio_performance_stats);
+       if (rc)
+               return rc;
+
+#ifdef CONFIG_PROC_FS
+       memset(&perf_stats, 0, sizeof(struct qdio_perf_stats));
+       qdio_perf_pde = proc_create("qdio_perf", S_IFREG | S_IRUGO,
+                                   NULL, &qdio_perf_proc_fops);
+#endif
+       return 0;
+}
+
+void __exit qdio_remove_perf_stats(void)
+{
+#ifdef CONFIG_PROC_FS
+       remove_proc_entry("qdio_perf", NULL);
+#endif
+       bus_remove_file(&ccw_bus_type, &bus_attr_qdio_performance_stats);
+}
diff --git a/drivers/s390/cio/qdio_perf.h b/drivers/s390/cio/qdio_perf.h

new file mode 100644 (file)

index 0000000..5c406a8
--- /dev/null
+++ b/drivers/s390/cio/qdio_perf.h
@@ -0,0 +1,54 @@
+/*
+ *  drivers/s390/cio/qdio_perf.h
+ *
+ *  Copyright IBM Corp. 2008
+ *
+ *  Author: Jan Glauber (jang@linux.vnet.ibm.com)
+ */
+#ifndef QDIO_PERF_H
+#define QDIO_PERF_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <asm/atomic.h>
+
+struct qdio_perf_stats {
+       /* interrupt handler calls */
+       atomic_long_t qdio_int;
+       atomic_long_t pci_int;
+       atomic_long_t thin_int;
+
+       /* tasklet runs */
+       atomic_long_t tasklet_inbound;
+       atomic_long_t tasklet_outbound;
+       atomic_long_t tasklet_thinint;
+       atomic_long_t tasklet_thinint_loop;
+       atomic_long_t thinint_inbound;
+       atomic_long_t thinint_inbound_loop;
+       atomic_long_t thinint_inbound_loop2;
+
+       /* signal adapter calls */
+       atomic_long_t siga_out;
+       atomic_long_t siga_in;
+       atomic_long_t siga_sync;
+
+       /* misc */
+       atomic_long_t inbound_handler;
+       atomic_long_t outbound_handler;
+       atomic_long_t fast_requeue;
+
+       /* for debugging */
+       atomic_long_t debug_tl_out_timer;
+       atomic_long_t debug_stop_polling;
+};
+
+extern struct qdio_perf_stats perf_stats;
+extern int qdio_performance_stats;
+
+int qdio_setup_perf_stats(void);
+void qdio_remove_perf_stats(void);
+
+extern void qdio_perf_stat_inc(atomic_long_t *count);
+extern void qdio_perf_stat_dec(atomic_long_t *count);
+
+#endif
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c

new file mode 100644 (file)

index 0000000..f0923a8
--- /dev/null
+++ b/drivers/s390/cio/qdio_setup.c
@@ -0,0 +1,521 @@
+/*
+ * driver/s390/cio/qdio_setup.c
+ *
+ * qdio queue initialization
+ *
+ * Copyright (C) IBM Corp. 2008
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/qdio.h>
+
+#include "cio.h"
+#include "css.h"
+#include "device.h"
+#include "ioasm.h"
+#include "chsc.h"
+#include "qdio.h"
+#include "qdio_debug.h"
+
+static struct kmem_cache *qdio_q_cache;
+
+/*
+ * qebsm is only available under 64bit but the adapter sets the feature
+ * flag anyway, so we manually override it.
+ */
+static inline int qebsm_possible(void)
+{
+#ifdef CONFIG_64BIT
+       return css_general_characteristics.qebsm;
+#endif
+       return 0;
+}
+
+/*
+ * qib_param_field: pointer to 128 bytes or NULL, if no param field
+ * nr_input_qs: pointer to nr_queues*128 words of data or NULL
+ */
+static void set_impl_params(struct qdio_irq *irq_ptr,
+                           unsigned int qib_param_field_format,
+                           unsigned char *qib_param_field,
+                           unsigned long *input_slib_elements,
+                           unsigned long *output_slib_elements)
+{
+       struct qdio_q *q;
+       int i, j;
+
+       if (!irq_ptr)
+               return;
+
+       WARN_ON((unsigned long)&irq_ptr->qib & 0xff);
+       irq_ptr->qib.pfmt = qib_param_field_format;
+       if (qib_param_field)
+               memcpy(irq_ptr->qib.parm, qib_param_field,
+                      QDIO_MAX_BUFFERS_PER_Q);
+
+       if (!input_slib_elements)
+               goto output;
+
+       for_each_input_queue(irq_ptr, q, i) {
+               for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
+                       q->slib->slibe[j].parms =
+                               input_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j];
+       }
+output:
+       if (!output_slib_elements)
+               return;
+
+       for_each_output_queue(irq_ptr, q, i) {
+               for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
+                       q->slib->slibe[j].parms =
+                               output_slib_elements[i * QDIO_MAX_BUFFERS_PER_Q + j];
+       }
+}
+
+static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
+{
+       struct qdio_q *q;
+       int i;
+
+       for (i = 0; i < nr_queues; i++) {
+               q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL);
+               if (!q)
+                       return -ENOMEM;
+               WARN_ON((unsigned long)q & 0xff);
+
+               q->slib = (struct slib *) __get_free_page(GFP_KERNEL);
+               if (!q->slib) {
+                       kmem_cache_free(qdio_q_cache, q);
+                       return -ENOMEM;
+               }
+               WARN_ON((unsigned long)q->slib & 0x7ff);
+               irq_ptr_qs[i] = q;
+       }
+       return 0;
+}
+
+int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, int nr_output_qs)
+{
+       int rc;
+
+       rc = __qdio_allocate_qs(irq_ptr->input_qs, nr_input_qs);
+       if (rc)
+               return rc;
+       rc = __qdio_allocate_qs(irq_ptr->output_qs, nr_output_qs);
+       return rc;
+}
+
+static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr,
+                             qdio_handler_t *handler, int i)
+{
+       /* must be cleared by every qdio_establish */
+       memset(q, 0, ((char *)&q->slib) - ((char *)q));
+       memset(q->slib, 0, PAGE_SIZE);
+
+       q->irq_ptr = irq_ptr;
+       q->mask = 1 << (31 - i);
+       q->nr = i;
+       q->handler = handler;
+}
+
+static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr,
+                               void **sbals_array, char *dbf_text, int i)
+{
+       struct qdio_q *prev;
+       int j;
+
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       QDIO_DBF_HEX0(0, setup, &q, sizeof(void *));
+
+       q->sl = (struct sl *)((char *)q->slib + PAGE_SIZE / 2);
+
+       /* fill in sbal */
+       for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) {
+               q->sbal[j] = *sbals_array++;
+               WARN_ON((unsigned long)q->sbal[j] & 0xff);
+       }
+
+       /* fill in slib */
+       if (i > 0) {
+               prev = (q->is_input_q) ? irq_ptr->input_qs[i - 1]
+                       : irq_ptr->output_qs[i - 1];
+               prev->slib->nsliba = (unsigned long)q->slib;
+       }
+
+       q->slib->sla = (unsigned long)q->sl;
+       q->slib->slsba = (unsigned long)&q->slsb.val[0];
+
+       /* fill in sl */
+       for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
+               q->sl->element[j].sbal = (unsigned long)q->sbal[j];
+
+       QDIO_DBF_TEXT2(0, setup, "sl-sb-b0");
+       QDIO_DBF_HEX2(0, setup, q->sl, sizeof(void *));
+       QDIO_DBF_HEX2(0, setup, &q->slsb, sizeof(void *));
+       QDIO_DBF_HEX2(0, setup, q->sbal, sizeof(void *));
+}
+
+static void setup_queues(struct qdio_irq *irq_ptr,
+                        struct qdio_initialize *qdio_init)
+{
+       char dbf_text[20];
+       struct qdio_q *q;
+       void **input_sbal_array = qdio_init->input_sbal_addr_array;
+       void **output_sbal_array = qdio_init->output_sbal_addr_array;
+       int i;
+
+       sprintf(dbf_text, "qfqs%4x", qdio_init->cdev->private->schid.sch_no);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+
+       for_each_input_queue(irq_ptr, q, i) {
+               sprintf(dbf_text, "in-q%4x", i);
+               setup_queues_misc(q, irq_ptr, qdio_init->input_handler, i);
+
+               q->is_input_q = 1;
+               spin_lock_init(&q->u.in.lock);
+               setup_storage_lists(q, irq_ptr, input_sbal_array, dbf_text, i);
+               input_sbal_array += QDIO_MAX_BUFFERS_PER_Q;
+
+               if (is_thinint_irq(irq_ptr))
+                       tasklet_init(&q->tasklet, tiqdio_inbound_processing,
+                                    (unsigned long) q);
+               else
+                       tasklet_init(&q->tasklet, qdio_inbound_processing,
+                                    (unsigned long) q);
+       }
+
+       for_each_output_queue(irq_ptr, q, i) {
+               sprintf(dbf_text, "outq%4x", i);
+               setup_queues_misc(q, irq_ptr, qdio_init->output_handler, i);
+
+               q->is_input_q = 0;
+               setup_storage_lists(q, irq_ptr, output_sbal_array,
+                                   dbf_text, i);
+               output_sbal_array += QDIO_MAX_BUFFERS_PER_Q;
+
+               tasklet_init(&q->tasklet, qdio_outbound_processing,
+                            (unsigned long) q);
+               setup_timer(&q->u.out.timer, (void(*)(unsigned long))
+                           &qdio_outbound_timer, (unsigned long)q);
+       }
+}
+
+static void process_ac_flags(struct qdio_irq *irq_ptr, unsigned char qdioac)
+{
+       if (qdioac & AC1_SIGA_INPUT_NEEDED)
+               irq_ptr->siga_flag.input = 1;
+       if (qdioac & AC1_SIGA_OUTPUT_NEEDED)
+               irq_ptr->siga_flag.output = 1;
+       if (qdioac & AC1_SIGA_SYNC_NEEDED)
+               irq_ptr->siga_flag.sync = 1;
+       if (qdioac & AC1_AUTOMATIC_SYNC_ON_THININT)
+               irq_ptr->siga_flag.no_sync_ti = 1;
+       if (qdioac & AC1_AUTOMATIC_SYNC_ON_OUT_PCI)
+               irq_ptr->siga_flag.no_sync_out_pci = 1;
+
+       if (irq_ptr->siga_flag.no_sync_out_pci &&
+           irq_ptr->siga_flag.no_sync_ti)
+               irq_ptr->siga_flag.no_sync_out_ti = 1;
+}
+
+static void check_and_setup_qebsm(struct qdio_irq *irq_ptr,
+                                 unsigned char qdioac, unsigned long token)
+{
+       char dbf_text[15];
+
+       if (!(irq_ptr->qib.rflags & QIB_RFLAGS_ENABLE_QEBSM))
+               goto no_qebsm;
+       if (!(qdioac & AC1_SC_QEBSM_AVAILABLE) ||
+           (!(qdioac & AC1_SC_QEBSM_ENABLED)))
+               goto no_qebsm;
+
+       irq_ptr->sch_token = token;
+
+       QDIO_DBF_TEXT0(0, setup, "V=V:1");
+       sprintf(dbf_text, "%8lx", irq_ptr->sch_token);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       return;
+
+no_qebsm:
+       irq_ptr->sch_token = 0;
+       irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM;
+       QDIO_DBF_TEXT0(0, setup, "noV=V");
+}
+
+static int __get_ssqd_info(struct qdio_irq *irq_ptr)
+{
+       struct chsc_ssqd_area *ssqd;
+       int rc;
+
+       QDIO_DBF_TEXT0(0, setup, "getssqd");
+       ssqd = (struct chsc_ssqd_area *)irq_ptr->chsc_page;
+       memset(ssqd, 0, PAGE_SIZE);
+
+       ssqd->request = (struct chsc_header) {
+               .length = 0x0010,
+               .code   = 0x0024,
+       };
+       ssqd->first_sch = irq_ptr->schid.sch_no;
+       ssqd->last_sch = irq_ptr->schid.sch_no;
+       ssqd->ssid = irq_ptr->schid.ssid;
+
+       if (chsc(ssqd))
+               return -EIO;
+       rc = chsc_error_from_response(ssqd->response.code);
+       if (rc)
+               return rc;
+
+       if (!(ssqd->qdio_ssqd.flags & CHSC_FLAG_QDIO_CAPABILITY) ||
+           !(ssqd->qdio_ssqd.flags & CHSC_FLAG_VALIDITY) ||
+           (ssqd->qdio_ssqd.sch != irq_ptr->schid.sch_no))
+               return -EINVAL;
+
+       memcpy(&irq_ptr->ssqd_desc, &ssqd->qdio_ssqd,
+              sizeof(struct qdio_ssqd_desc));
+       return 0;
+}
+
+void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr)
+{
+       unsigned char qdioac;
+       char dbf_text[15];
+       int rc;
+
+       rc = __get_ssqd_info(irq_ptr);
+       if (rc) {
+               QDIO_DBF_TEXT2(0, setup, "ssqdasig");
+               sprintf(dbf_text, "schno%x", irq_ptr->schid.sch_no);
+               QDIO_DBF_TEXT2(0, setup, dbf_text);
+               sprintf(dbf_text, "rc:%d", rc);
+               QDIO_DBF_TEXT2(0, setup, dbf_text);
+               /* all flags set, worst case */
+               qdioac = AC1_SIGA_INPUT_NEEDED | AC1_SIGA_OUTPUT_NEEDED |
+                        AC1_SIGA_SYNC_NEEDED;
+       } else
+               qdioac = irq_ptr->ssqd_desc.qdioac1;
+
+       check_and_setup_qebsm(irq_ptr, qdioac, irq_ptr->ssqd_desc.sch_token);
+       process_ac_flags(irq_ptr, qdioac);
+
+       sprintf(dbf_text, "qdioac%2x", qdioac);
+       QDIO_DBF_TEXT2(0, setup, dbf_text);
+}
+
+void qdio_release_memory(struct qdio_irq *irq_ptr)
+{
+       struct qdio_q *q;
+       int i;
+
+       /*
+        * Must check queue array manually since irq_ptr->nr_input_queues /
+        * irq_ptr->nr_input_queues may not yet be set.
+        */
+       for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) {
+               q = irq_ptr->input_qs[i];
+               if (q) {
+                       free_page((unsigned long) q->slib);
+                       kmem_cache_free(qdio_q_cache, q);
+               }
+       }
+       for (i = 0; i < QDIO_MAX_QUEUES_PER_IRQ; i++) {
+               q = irq_ptr->output_qs[i];
+               if (q) {
+                       free_page((unsigned long) q->slib);
+                       kmem_cache_free(qdio_q_cache, q);
+               }
+       }
+       kfree(irq_ptr->qdr);
+       free_page(irq_ptr->chsc_page);
+       free_page((unsigned long) irq_ptr);
+}
+
+static void __qdio_allocate_fill_qdr(struct qdio_irq *irq_ptr,
+                                    struct qdio_q **irq_ptr_qs,
+                                    int i, int nr)
+{
+       irq_ptr->qdr->qdf0[i + nr].sliba =
+               (unsigned long)irq_ptr_qs[i]->slib;
+
+       irq_ptr->qdr->qdf0[i + nr].sla =
+               (unsigned long)irq_ptr_qs[i]->sl;
+
+       irq_ptr->qdr->qdf0[i + nr].slsba =
+               (unsigned long)&irq_ptr_qs[i]->slsb.val[0];
+
+       irq_ptr->qdr->qdf0[i + nr].akey = PAGE_DEFAULT_KEY;
+       irq_ptr->qdr->qdf0[i + nr].bkey = PAGE_DEFAULT_KEY;
+       irq_ptr->qdr->qdf0[i + nr].ckey = PAGE_DEFAULT_KEY;
+       irq_ptr->qdr->qdf0[i + nr].dkey = PAGE_DEFAULT_KEY;
+}
+
+static void setup_qdr(struct qdio_irq *irq_ptr,
+                     struct qdio_initialize *qdio_init)
+{
+       int i;
+
+       irq_ptr->qdr->qfmt = qdio_init->q_format;
+       irq_ptr->qdr->iqdcnt = qdio_init->no_input_qs;
+       irq_ptr->qdr->oqdcnt = qdio_init->no_output_qs;
+       irq_ptr->qdr->iqdsz = sizeof(struct qdesfmt0) / 4; /* size in words */
+       irq_ptr->qdr->oqdsz = sizeof(struct qdesfmt0) / 4;
+       irq_ptr->qdr->qiba = (unsigned long)&irq_ptr->qib;
+       irq_ptr->qdr->qkey = PAGE_DEFAULT_KEY;
+
+       for (i = 0; i < qdio_init->no_input_qs; i++)
+               __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->input_qs, i, 0);
+
+       for (i = 0; i < qdio_init->no_output_qs; i++)
+               __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->output_qs, i,
+                                        qdio_init->no_input_qs);
+}
+
+static void setup_qib(struct qdio_irq *irq_ptr,
+                     struct qdio_initialize *init_data)
+{
+       if (qebsm_possible())
+               irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM;
+
+       irq_ptr->qib.qfmt = init_data->q_format;
+       if (init_data->no_input_qs)
+               irq_ptr->qib.isliba =
+                       (unsigned long)(irq_ptr->input_qs[0]->slib);
+       if (init_data->no_output_qs)
+               irq_ptr->qib.osliba =
+                       (unsigned long)(irq_ptr->output_qs[0]->slib);
+       memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8);
+}
+
+int qdio_setup_irq(struct qdio_initialize *init_data)
+{
+       struct ciw *ciw;
+       struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data;
+       int rc;
+
+       memset(irq_ptr, 0, ((char *)&irq_ptr->qdr) - ((char *)irq_ptr));
+       /* wipes qib.ac, required by ar7063 */
+       memset(irq_ptr->qdr, 0, sizeof(struct qdr));
+
+       irq_ptr->int_parm = init_data->int_parm;
+       irq_ptr->nr_input_qs = init_data->no_input_qs;
+       irq_ptr->nr_output_qs = init_data->no_output_qs;
+
+       irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev);
+       irq_ptr->cdev = init_data->cdev;
+       setup_queues(irq_ptr, init_data);
+
+       setup_qib(irq_ptr, init_data);
+       qdio_setup_thinint(irq_ptr);
+       set_impl_params(irq_ptr, init_data->qib_param_field_format,
+                       init_data->qib_param_field,
+                       init_data->input_slib_elements,
+                       init_data->output_slib_elements);
+
+       /* fill input and output descriptors */
+       setup_qdr(irq_ptr, init_data);
+
+       /* qdr, qib, sls, slsbs, slibs, sbales are filled now */
+
+       /* get qdio commands */
+       ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE);
+       if (!ciw) {
+               QDIO_DBF_TEXT2(1, setup, "no eq");
+               rc = -EINVAL;
+               goto out_err;
+       }
+       irq_ptr->equeue = *ciw;
+
+       ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE);
+       if (!ciw) {
+               QDIO_DBF_TEXT2(1, setup, "no aq");
+               rc = -EINVAL;
+               goto out_err;
+       }
+       irq_ptr->aqueue = *ciw;
+
+       /* set new interrupt handler */
+       irq_ptr->orig_handler = init_data->cdev->handler;
+       init_data->cdev->handler = qdio_int_handler;
+       return 0;
+out_err:
+       qdio_release_memory(irq_ptr);
+       return rc;
+}
+
+void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
+                               struct ccw_device *cdev)
+{
+       char s[80];
+
+       sprintf(s, "%s ", cdev->dev.bus_id);
+
+       switch (irq_ptr->qib.qfmt) {
+       case QDIO_QETH_QFMT:
+               sprintf(s + strlen(s), "OSADE ");
+               break;
+       case QDIO_ZFCP_QFMT:
+               sprintf(s + strlen(s), "ZFCP ");
+               break;
+       case QDIO_IQDIO_QFMT:
+               sprintf(s + strlen(s), "HiperSockets ");
+               break;
+       }
+       sprintf(s + strlen(s), "using: ");
+
+       if (!is_thinint_irq(irq_ptr))
+               sprintf(s + strlen(s), "no");
+       sprintf(s + strlen(s), "AdapterInterrupts ");
+       if (!(irq_ptr->sch_token != 0))
+               sprintf(s + strlen(s), "no");
+       sprintf(s + strlen(s), "QEBSM ");
+       if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED))
+               sprintf(s + strlen(s), "no");
+       sprintf(s + strlen(s), "OutboundPCI ");
+       if (!css_general_characteristics.aif_tdd)
+               sprintf(s + strlen(s), "no");
+       sprintf(s + strlen(s), "TDD\n");
+       printk(KERN_INFO "qdio: %s", s);
+
+       memset(s, 0, sizeof(s));
+       sprintf(s, "%s SIGA required: ", cdev->dev.bus_id);
+       if (irq_ptr->siga_flag.input)
+               sprintf(s + strlen(s), "Read ");
+       if (irq_ptr->siga_flag.output)
+               sprintf(s + strlen(s), "Write ");
+       if (irq_ptr->siga_flag.sync)
+               sprintf(s + strlen(s), "Sync ");
+       if (!irq_ptr->siga_flag.no_sync_ti)
+               sprintf(s + strlen(s), "SyncAI ");
+       if (!irq_ptr->siga_flag.no_sync_out_ti)
+               sprintf(s + strlen(s), "SyncOutAI ");
+       if (!irq_ptr->siga_flag.no_sync_out_pci)
+               sprintf(s + strlen(s), "SyncOutPCI");
+       sprintf(s + strlen(s), "\n");
+       printk(KERN_INFO "qdio: %s", s);
+}
+
+int __init qdio_setup_init(void)
+{
+       char dbf_text[15];
+
+       qdio_q_cache = kmem_cache_create("qdio_q", sizeof(struct qdio_q),
+                                        256, 0, NULL);
+       if (!qdio_q_cache)
+               return -ENOMEM;
+
+       /* Check for OSA/FCP thin interrupts (bit 67). */
+       sprintf(dbf_text, "thini%1x",
+               (css_general_characteristics.aif_osa) ? 1 : 0);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+
+       /* Check for QEBSM support in general (bit 58). */
+       sprintf(dbf_text, "cssQBS:%1x",
+               (qebsm_possible()) ? 1 : 0);
+       QDIO_DBF_TEXT0(0, setup, dbf_text);
+       return 0;
+}
+
+void __exit qdio_setup_exit(void)
+{
+       kmem_cache_destroy(qdio_q_cache);
+}
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c

new file mode 100644 (file)

index 0000000..9291a77
--- /dev/null
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -0,0 +1,380 @@
+/*
+ * linux/drivers/s390/cio/thinint_qdio.c
+ *
+ * thin interrupt support for qdio
+ *
+ * Copyright 2000-2008 IBM Corp.
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ *           Cornelia Huck <cornelia.huck@de.ibm.com>
+ *           Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/io.h>
+#include <asm/atomic.h>
+#include <asm/debug.h>
+#include <asm/qdio.h>
+#include <asm/airq.h>
+#include <asm/isc.h>
+
+#include "cio.h"
+#include "ioasm.h"
+#include "qdio.h"
+#include "qdio_debug.h"
+#include "qdio_perf.h"
+
+/*
+ * Restriction: only 63 iqdio subchannels would have its own indicator,
+ * after that, subsequent subchannels share one indicator
+ */
+#define TIQDIO_NR_NONSHARED_IND                63
+#define TIQDIO_NR_INDICATORS           (TIQDIO_NR_NONSHARED_IND + 1)
+#define TIQDIO_SHARED_IND              63
+
+/* list of thin interrupt input queues */
+static LIST_HEAD(tiq_list);
+
+/* adapter local summary indicator */
+static unsigned char *tiqdio_alsi;
+
+/* device state change indicators */
+struct indicator_t {
+       u32 ind;        /* u32 because of compare-and-swap performance */
+       atomic_t count; /* use count, 0 or 1 for non-shared indicators */
+};
+static struct indicator_t *q_indicators;
+
+static void tiqdio_tasklet_fn(unsigned long data);
+static DECLARE_TASKLET(tiqdio_tasklet, tiqdio_tasklet_fn, 0);
+
+static int css_qdio_omit_svs;
+
+static inline unsigned long do_clear_global_summary(void)
+{
+       register unsigned long __fn asm("1") = 3;
+       register unsigned long __tmp asm("2");
+       register unsigned long __time asm("3");
+
+       asm volatile(
+               "       .insn   rre,0xb2650000,2,0"
+               : "+d" (__fn), "=d" (__tmp), "=d" (__time));
+       return __time;
+}
+
+/* returns addr for the device state change indicator */
+static u32 *get_indicator(void)
+{
+       int i;
+
+       for (i = 0; i < TIQDIO_NR_NONSHARED_IND; i++)
+               if (!atomic_read(&q_indicators[i].count)) {
+                       atomic_set(&q_indicators[i].count, 1);
+                       return &q_indicators[i].ind;
+               }
+
+       /* use the shared indicator */
+       atomic_inc(&q_indicators[TIQDIO_SHARED_IND].count);
+       return &q_indicators[TIQDIO_SHARED_IND].ind;
+}
+
+static void put_indicator(u32 *addr)
+{
+       int i;
+
+       if (!addr)
+               return;
+       i = ((unsigned long)addr - (unsigned long)q_indicators) /
+               sizeof(struct indicator_t);
+       atomic_dec(&q_indicators[i].count);
+}
+
+void tiqdio_add_input_queues(struct qdio_irq *irq_ptr)
+{
+       struct qdio_q *q;
+       int i;
+
+       /* No TDD facility? If we must use SIGA-s we can also omit SVS. */
+       if (!css_qdio_omit_svs && irq_ptr->siga_flag.sync)
+               css_qdio_omit_svs = 1;
+
+       for_each_input_queue(irq_ptr, q, i) {
+               list_add_rcu(&q->entry, &tiq_list);
+               synchronize_rcu();
+       }
+       xchg(irq_ptr->dsci, 1);
+       tasklet_schedule(&tiqdio_tasklet);
+}
+
+/*
+ * we cannot stop the tiqdio tasklet here since it is for all
+ * thinint qdio devices and it must run as long as there is a
+ * thinint device left
+ */
+void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
+{
+       struct qdio_q *q;
+       int i;
+
+       for_each_input_queue(irq_ptr, q, i) {
+               list_del_rcu(&q->entry);
+               synchronize_rcu();
+       }
+}
+
+static inline int tiqdio_inbound_q_done(struct qdio_q *q)
+{
+       unsigned char state;
+
+       if (!atomic_read(&q->nr_buf_used))
+               return 1;
+
+       qdio_siga_sync_q(q);
+       get_buf_state(q, q->first_to_check, &state);
+
+       if (state == SLSB_P_INPUT_PRIMED)
+               /* more work coming */
+               return 0;
+       return 1;
+}
+
+static inline int shared_ind(struct qdio_irq *irq_ptr)
+{
+       return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind;
+}
+
+static void __tiqdio_inbound_processing(struct qdio_q *q)
+{
+       qdio_perf_stat_inc(&perf_stats.thinint_inbound);
+       qdio_sync_after_thinint(q);
+
+       /*
+        * Maybe we have work on our outbound queues... at least
+        * we have to check the PCI capable queues.
+        */
+       qdio_check_outbound_after_thinint(q);
+
+again:
+       if (!qdio_inbound_q_moved(q))
+               return;
+
+       qdio_kick_inbound_handler(q);
+
+       if (!tiqdio_inbound_q_done(q)) {
+               qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop);
+               goto again;
+       }
+
+       qdio_stop_polling(q);
+       /*
+        * We need to check again to not lose initiative after
+        * resetting the ACK state.
+        */
+       if (!tiqdio_inbound_q_done(q)) {
+               qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop2);
+               goto again;
+       }
+}
+
+void tiqdio_inbound_processing(unsigned long data)
+{
+       struct qdio_q *q = (struct qdio_q *)data;
+
+       __tiqdio_inbound_processing(q);
+}
+
+/* check for work on all inbound thinint queues */
+static void tiqdio_tasklet_fn(unsigned long data)
+{
+       struct qdio_q *q;
+
+       qdio_perf_stat_inc(&perf_stats.tasklet_thinint);
+again:
+
+       /* protect tiq_list entries, only changed in activate or shutdown */
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(q, &tiq_list, entry)
+               /* only process queues from changed sets */
+               if (*q->irq_ptr->dsci) {
+
+                       /* only clear it if the indicator is non-shared */
+                       if (!shared_ind(q->irq_ptr))
+                               xchg(q->irq_ptr->dsci, 0);
+                       /*
+                        * don't call inbound processing directly since
+                        * that could starve other thinint queues
+                        */
+                       tasklet_schedule(&q->tasklet);
+               }
+
+       rcu_read_unlock();
+
+       /*
+        * if we used the shared indicator clear it now after all queues
+        * were processed
+        */
+       if (atomic_read(&q_indicators[TIQDIO_SHARED_IND].count)) {
+               xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 0);
+
+               /* prevent racing */
+               if (*tiqdio_alsi)
+                       xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 1);
+       }
+
+       /* check for more work */
+       if (*tiqdio_alsi) {
+               xchg(tiqdio_alsi, 0);
+               qdio_perf_stat_inc(&perf_stats.tasklet_thinint_loop);
+               goto again;
+       }
+}
+
+/**
+ * tiqdio_thinint_handler - thin interrupt handler for qdio
+ * @ind: pointer to adapter local summary indicator
+ * @drv_data: NULL
+ */
+static void tiqdio_thinint_handler(void *ind, void *drv_data)
+{
+       qdio_perf_stat_inc(&perf_stats.thin_int);
+
+       /*
+        * SVS only when needed: issue SVS to benefit from iqdio interrupt
+        * avoidance (SVS clears adapter interrupt suppression overwrite)
+        */
+       if (!css_qdio_omit_svs)
+               do_clear_global_summary();
+
+       /*
+        * reset local summary indicator (tiqdio_alsi) to stop adapter
+        * interrupts for now, the tasklet will clean all dsci's
+        */
+       xchg((u8 *)ind, 0);
+       tasklet_hi_schedule(&tiqdio_tasklet);
+}
+
+static int set_subchannel_ind(struct qdio_irq *irq_ptr, int reset)
+{
+       struct scssc_area *scssc_area;
+       char dbf_text[15];
+       void *ptr;
+       int rc;
+
+       scssc_area = (struct scssc_area *)irq_ptr->chsc_page;
+       memset(scssc_area, 0, PAGE_SIZE);
+
+       if (reset) {
+               scssc_area->summary_indicator_addr = 0;
+               scssc_area->subchannel_indicator_addr = 0;
+       } else {
+               scssc_area->summary_indicator_addr = virt_to_phys(tiqdio_alsi);
+               scssc_area->subchannel_indicator_addr =
+                       virt_to_phys(irq_ptr->dsci);
+       }
+
+       scssc_area->request = (struct chsc_header) {
+               .length = 0x0fe0,
+               .code   = 0x0021,
+       };
+       scssc_area->operation_code = 0;
+       scssc_area->ks = PAGE_DEFAULT_KEY;
+       scssc_area->kc = PAGE_DEFAULT_KEY;
+       scssc_area->isc = QDIO_AIRQ_ISC;
+       scssc_area->schid = irq_ptr->schid;
+
+       /* enable the time delay disablement facility */
+       if (css_general_characteristics.aif_tdd)
+               scssc_area->word_with_d_bit = 0x10000000;
+
+       rc = chsc(scssc_area);
+       if (rc)
+               return -EIO;
+
+       rc = chsc_error_from_response(scssc_area->response.code);
+       if (rc) {
+               sprintf(dbf_text, "sidR%4x", scssc_area->response.code);
+               QDIO_DBF_TEXT1(0, trace, dbf_text);
+               QDIO_DBF_TEXT1(0, setup, dbf_text);
+               ptr = &scssc_area->response;
+               QDIO_DBF_HEX2(1, setup, &ptr, QDIO_DBF_SETUP_LEN);
+               return rc;
+       }
+
+       QDIO_DBF_TEXT2(0, setup, "setscind");
+       QDIO_DBF_HEX2(0, setup, &scssc_area->summary_indicator_addr,
+                     sizeof(unsigned long));
+       QDIO_DBF_HEX2(0, setup, &scssc_area->subchannel_indicator_addr,
+                     sizeof(unsigned long));
+       return 0;
+}
+
+/* allocate non-shared indicators and shared indicator */
+int __init tiqdio_allocate_memory(void)
+{
+       q_indicators = kzalloc(sizeof(struct indicator_t) * TIQDIO_NR_INDICATORS,
+                            GFP_KERNEL);
+       if (!q_indicators)
+               return -ENOMEM;
+       return 0;
+}
+
+void tiqdio_free_memory(void)
+{
+       kfree(q_indicators);
+}
+
+int __init tiqdio_register_thinints(void)
+{
+       char dbf_text[20];
+
+       isc_register(QDIO_AIRQ_ISC);
+       tiqdio_alsi = s390_register_adapter_interrupt(&tiqdio_thinint_handler,
+                                                     NULL, QDIO_AIRQ_ISC);
+       if (IS_ERR(tiqdio_alsi)) {
+               sprintf(dbf_text, "regthn%lx", PTR_ERR(tiqdio_alsi));
+               QDIO_DBF_TEXT0(0, setup, dbf_text);
+               tiqdio_alsi = NULL;
+               isc_unregister(QDIO_AIRQ_ISC);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+int qdio_establish_thinint(struct qdio_irq *irq_ptr)
+{
+       if (!is_thinint_irq(irq_ptr))
+               return 0;
+
+       /* Check for aif time delay disablement. If installed,
+        * omit SVS even under LPAR
+        */
+       if (css_general_characteristics.aif_tdd)
+               css_qdio_omit_svs = 1;
+       return set_subchannel_ind(irq_ptr, 0);
+}
+
+void qdio_setup_thinint(struct qdio_irq *irq_ptr)
+{
+       if (!is_thinint_irq(irq_ptr))
+               return;
+       irq_ptr->dsci = get_indicator();
+       QDIO_DBF_HEX1(0, setup, &irq_ptr->dsci, sizeof(void *));
+}
+
+void qdio_shutdown_thinint(struct qdio_irq *irq_ptr)
+{
+       if (!is_thinint_irq(irq_ptr))
+               return;
+
+       /* reset adapter interrupt indicators */
+       put_indicator(irq_ptr->dsci);
+       set_subchannel_ind(irq_ptr, 1);
+}
+
+void __exit tiqdio_unregister_thinints(void)
+{
+       tasklet_disable(&tiqdio_tasklet);
+
+       if (tiqdio_alsi) {
+               s390_unregister_adapter_interrupt(tiqdio_alsi, QDIO_AIRQ_ISC);
+               isc_unregister(QDIO_AIRQ_ISC);
+       }
+}
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h

index 699ac11debd84c49c0bbdbd21a2f70f9914deedc..1895dbb553cd1e338f7d681724be40f9894f10f5 100644 (file)
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -239,11 +239,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
  /*not used unless the microcode gets patched*/
  #define QETH_PCI_TIMER_VALUE(card) 3
  
-#define QETH_MIN_INPUT_THRESHOLD 1
-#define QETH_MAX_INPUT_THRESHOLD 500
-#define QETH_MIN_OUTPUT_THRESHOLD 1
-#define QETH_MAX_OUTPUT_THRESHOLD 300
-
  /* priority queing */
  #define QETH_PRIOQ_DEFAULT QETH_NO_PRIO_QUEUEING
  #define QETH_DEFAULT_QUEUE    2
@@ -811,17 +806,14 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
  struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *,
                         enum qeth_ipa_cmds, enum qeth_prot_versions);
  int qeth_query_setadapterparms(struct qeth_card *);
-int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int,
-                      unsigned int, const char *);
+int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int, const char *);
  void qeth_queue_input_buffer(struct qeth_card *, int);
  struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
                 struct qdio_buffer *, struct qdio_buffer_element **, int *,
                 struct qeth_hdr **);
  void qeth_schedule_recovery(struct qeth_card *);
  void qeth_qdio_output_handler(struct ccw_device *, unsigned int,
-                       unsigned int, unsigned int,
-                       unsigned int, int, int,
-                       unsigned long);
+                       int, int, int, unsigned long);
  void qeth_clear_ipacmd_list(struct qeth_card *);
  int qeth_qdio_clear_card(struct qeth_card *, int);
  void qeth_clear_working_pool_list(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c

index 0ac54dc638c21280f09dacf6c6a6ce2405111452..c3ad89e302bd1ae6695e0e377faf21b4a3fa4008 100644 (file)
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2073,7 +2073,7 @@ static void qeth_create_qib_param_field_blkt(struct qeth_card *card,
  static int qeth_qdio_activate(struct qeth_card *card)
  {
         QETH_DBF_TEXT(SETUP, 3, "qdioact");
-       return qdio_activate(CARD_DDEV(card), 0);
+       return qdio_activate(CARD_DDEV(card));
  }
  
  static int qeth_dm_act(struct qeth_card *card)
@@ -2349,16 +2349,11 @@ int qeth_init_qdio_queues(struct qeth_card *card)
         card->qdio.in_q->next_buf_to_init =
                 card->qdio.in_buf_pool.buf_count - 1;
         rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0,
-                    card->qdio.in_buf_pool.buf_count - 1, NULL);
+                    card->qdio.in_buf_pool.buf_count - 1);
         if (rc) {
                 QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);
                 return rc;
         }
-       rc = qdio_synchronize(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0);
-       if (rc) {
-               QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
-               return rc;
-       }
         /* outbound queue */
         for (i = 0; i < card->qdio.no_out_queues; ++i) {
                 memset(card->qdio.out_qs[i]->qdio_bufs, 0,
@@ -2559,9 +2554,9 @@ int qeth_query_setadapterparms(struct qeth_card *card)
  EXPORT_SYMBOL_GPL(qeth_query_setadapterparms);
  
  int qeth_check_qdio_errors(struct qdio_buffer *buf, unsigned int qdio_error,
-               unsigned int siga_error, const char *dbftext)
+               const char *dbftext)
  {
-       if (qdio_error || siga_error) {
+       if (qdio_error) {
                 QETH_DBF_TEXT(TRACE, 2, dbftext);
                 QETH_DBF_TEXT(QERR, 2, dbftext);
                 QETH_DBF_TEXT_(QERR, 2, " F15=%02X",
@@ -2569,7 +2564,6 @@ int qeth_check_qdio_errors(struct qdio_buffer *buf, unsigned int qdio_error,
                 QETH_DBF_TEXT_(QERR, 2, " F14=%02X",
                                buf->element[14].flags & 0xff);
                 QETH_DBF_TEXT_(QERR, 2, " qerr=%X", qdio_error);
-               QETH_DBF_TEXT_(QERR, 2, " serr=%X", siga_error);
                 return 1;
         }
         return 0;
@@ -2622,9 +2616,8 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index)
                         card->perf_stats.inbound_do_qdio_start_time =
                                 qeth_get_micros();
                 }
-               rc = do_QDIO(CARD_DDEV(card),
-                            QDIO_FLAG_SYNC_INPUT | QDIO_FLAG_UNDER_INTERRUPT,
-                            0, queue->next_buf_to_init, count, NULL);
+               rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0,
+                            queue->next_buf_to_init, count);
                 if (card->options.performance_stats)
                         card->perf_stats.inbound_do_qdio_time +=
                                 qeth_get_micros() -
@@ -2643,14 +2636,13 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index)
  EXPORT_SYMBOL_GPL(qeth_queue_input_buffer);
  
  static int qeth_handle_send_error(struct qeth_card *card,
-               struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err,
-               unsigned int siga_err)
+               struct qeth_qdio_out_buffer *buffer, unsigned int qdio_err)
  {
         int sbalf15 = buffer->buffer->element[15].flags & 0xff;
-       int cc = siga_err & 3;
+       int cc = qdio_err & 3;
  
         QETH_DBF_TEXT(TRACE, 6, "hdsnderr");
-       qeth_check_qdio_errors(buffer->buffer, qdio_err, siga_err, "qouterr");
+       qeth_check_qdio_errors(buffer->buffer, qdio_err, "qouterr");
         switch (cc) {
         case 0:
                 if (qdio_err) {
@@ -2662,7 +2654,7 @@ static int qeth_handle_send_error(struct qeth_card *card,
                 }
                 return QETH_SEND_ERROR_NONE;
         case 2:
-               if (siga_err & QDIO_SIGA_ERROR_B_BIT_SET) {
+               if (qdio_err & QDIO_ERROR_SIGA_BUSY) {
                         QETH_DBF_TEXT(TRACE, 1, "SIGAcc2B");
                         QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card));
                         return QETH_SEND_ERROR_KICK_IT;
@@ -2758,8 +2750,8 @@ static int qeth_flush_buffers_on_no_pci(struct qeth_qdio_out_q *queue)
         return 0;
  }
  
-static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int,
-               int index, int count)
+static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
+                              int count)
  {
         struct qeth_qdio_out_buffer *buf;
         int rc;
@@ -2807,12 +2799,10 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int,
                         qeth_get_micros();
         }
         qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
-       if (under_int)
-               qdio_flags |= QDIO_FLAG_UNDER_INTERRUPT;
         if (atomic_read(&queue->set_pci_flags_count))
                 qdio_flags |= QDIO_FLAG_PCI_OUT;
         rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
-                    queue->queue_no, index, count, NULL);
+                    queue->queue_no, index, count);
         if (queue->card->options.performance_stats)
                 queue->card->perf_stats.outbound_do_qdio_time +=
                         qeth_get_micros() -
@@ -2866,16 +2856,15 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
                                 queue->card->perf_stats.bufs_sent_pack +=
                                         flush_cnt;
                         if (flush_cnt)
-                               qeth_flush_buffers(queue, 1, index, flush_cnt);
+                               qeth_flush_buffers(queue, index, flush_cnt);
                         atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
                 }
         }
  }
  
-void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status,
-               unsigned int qdio_error, unsigned int siga_error,
-               unsigned int __queue, int first_element, int count,
-               unsigned long card_ptr)
+void qeth_qdio_output_handler(struct ccw_device *ccwdev,
+               unsigned int qdio_error, int __queue, int first_element,
+               int count, unsigned long card_ptr)
  {
         struct qeth_card *card        = (struct qeth_card *) card_ptr;
         struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
@@ -2883,15 +2872,12 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status,
         int i;
  
         QETH_DBF_TEXT(TRACE, 6, "qdouhdl");
-       if (status & QDIO_STATUS_LOOK_FOR_ERROR) {
-               if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) {
-                       QETH_DBF_TEXT(TRACE, 2, "achkcond");
-                       QETH_DBF_TEXT_(TRACE, 2, "%s", CARD_BUS_ID(card));
-                       QETH_DBF_TEXT_(TRACE, 2, "%08x", status);
-                       netif_stop_queue(card->dev);
-                       qeth_schedule_recovery(card);
-                       return;
-               }
+       if (qdio_error & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) {
+               QETH_DBF_TEXT(TRACE, 2, "achkcond");
+               QETH_DBF_TEXT_(TRACE, 2, "%s", CARD_BUS_ID(card));
+               netif_stop_queue(card->dev);
+               qeth_schedule_recovery(card);
+               return;
         }
         if (card->options.performance_stats) {
                 card->perf_stats.outbound_handler_cnt++;
@@ -2901,8 +2887,7 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, unsigned int status,
         for (i = first_element; i < (first_element + count); ++i) {
                 buffer = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q];
                 /*we only handle the KICK_IT error by doing a recovery */
-               if (qeth_handle_send_error(card, buffer,
-                                          qdio_error, siga_error)
+               if (qeth_handle_send_error(card, buffer, qdio_error)
                                 == QETH_SEND_ERROR_KICK_IT){
                         netif_stop_queue(card->dev);
                         qeth_schedule_recovery(card);
@@ -3164,11 +3149,11 @@ int qeth_do_send_packet_fast(struct qeth_card *card,
         atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
         if (ctx == NULL) {
                 qeth_fill_buffer(queue, buffer, skb);
-               qeth_flush_buffers(queue, 0, index, 1);
+               qeth_flush_buffers(queue, index, 1);
         } else {
                 flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index);
                 WARN_ON(buffers_needed != flush_cnt);
-               qeth_flush_buffers(queue, 0, index, flush_cnt);
+               qeth_flush_buffers(queue, index, flush_cnt);
         }
         return 0;
  out:
@@ -3221,8 +3206,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
                                  * again */
                                 if (atomic_read(&buffer->state) !=
                                                 QETH_QDIO_BUF_EMPTY){
-                                       qeth_flush_buffers(queue, 0,
-                                               start_index, flush_count);
+                                       qeth_flush_buffers(queue, start_index,
+                                                          flush_count);
                                         atomic_set(&queue->state,
                                                 QETH_OUT_Q_UNLOCKED);
                                         return -EBUSY;
@@ -3253,7 +3238,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
         flush_count += tmp;
  out:
         if (flush_count)
-               qeth_flush_buffers(queue, 0, start_index, flush_count);
+               qeth_flush_buffers(queue, start_index, flush_count);
         else if (!atomic_read(&queue->set_pci_flags_count))
                 atomic_xchg(&queue->state, QETH_OUT_Q_LOCKED_FLUSH);
         /*
@@ -3274,7 +3259,7 @@ out:
                 if (!flush_count && !atomic_read(&queue->set_pci_flags_count))
                         flush_count += qeth_flush_buffers_on_no_pci(queue);
                 if (flush_count)
-                       qeth_flush_buffers(queue, 0, start_index, flush_count);
+                       qeth_flush_buffers(queue, start_index, flush_count);
         }
         /* at this point the queue is UNLOCKED again */
         if (queue->card->options.performance_stats && do_pack)
@@ -3686,10 +3671,6 @@ static int qeth_qdio_establish(struct qeth_card *card)
         init_data.q_format               = qeth_get_qdio_q_format(card);
         init_data.qib_param_field_format = 0;
         init_data.qib_param_field        = qib_param_field;
-       init_data.min_input_threshold    = QETH_MIN_INPUT_THRESHOLD;
-       init_data.max_input_threshold    = QETH_MAX_INPUT_THRESHOLD;
-       init_data.min_output_threshold   = QETH_MIN_OUTPUT_THRESHOLD;
-       init_data.max_output_threshold   = QETH_MAX_OUTPUT_THRESHOLD;
         init_data.no_input_qs            = 1;
         init_data.no_output_qs           = card->qdio.no_out_queues;
         init_data.input_handler          = card->discipline.input_handler;
@@ -3751,8 +3732,9 @@ static int qeth_core_driver_group(const char *buf, struct device *root_dev,
  
  int qeth_core_hardsetup_card(struct qeth_card *card)
  {
+       struct qdio_ssqd_desc *qdio_ssqd;
         int retries = 3;
-       int mpno;
+       int mpno = 0;
         int rc;
  
         QETH_DBF_TEXT(SETUP, 2, "hrdsetup");
@@ -3784,7 +3766,10 @@ retry:
                 QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
                 return rc;
         }
-       mpno = qdio_get_ssqd_pct(CARD_DDEV(card));
+
+       qdio_ssqd = qdio_get_ssqd_desc(CARD_DDEV(card));
+       if (qdio_ssqd)
+               mpno = qdio_ssqd->pcnt;
         if (mpno)
                 mpno = min(mpno - 1, QETH_MAX_PORTNO);
         if (card->info.portno > mpno) {
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c

index f682f7b144806b0478e6bdfcb9c9825b5b94756d..3fbc3bdec0c5cfada6edccd973072f81ee2b97f7 100644 (file)
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -726,8 +726,7 @@ tx_drop:
  }
  
  static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev,
-                       unsigned int status, unsigned int qdio_err,
-                       unsigned int siga_err, unsigned int queue,
+                       unsigned int qdio_err, unsigned int queue,
                         int first_element, int count, unsigned long card_ptr)
  {
         struct net_device *net_dev;
@@ -742,23 +741,20 @@ static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev,
                 card->perf_stats.inbound_cnt++;
                 card->perf_stats.inbound_start_time = qeth_get_micros();
         }
-       if (status & QDIO_STATUS_LOOK_FOR_ERROR) {
-               if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) {
-                       QETH_DBF_TEXT(TRACE, 1, "qdinchk");
-                       QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card));
-                       QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", first_element,
-                                       count);
-                       QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", queue, status);
-                       qeth_schedule_recovery(card);
-                       return;
-               }
+       if (qdio_err & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) {
+               QETH_DBF_TEXT(TRACE, 1, "qdinchk");
+               QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card));
+               QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", first_element,
+                               count);
+               QETH_DBF_TEXT_(TRACE, 1, "%04X", queue);
+               qeth_schedule_recovery(card);
+               return;
         }
         for (i = first_element; i < (first_element + count); ++i) {
                 index = i % QDIO_MAX_BUFFERS_PER_Q;
                 buffer = &card->qdio.in_q->bufs[index];
-               if (!((status & QDIO_STATUS_LOOK_FOR_ERROR) &&
-                     qeth_check_qdio_errors(buffer->buffer,
-                                            qdio_err, siga_err, "qinerr")))
+               if (!(qdio_err &&
+                     qeth_check_qdio_errors(buffer->buffer, qdio_err, "qinerr")))
                         qeth_l2_process_inbound_buffer(card, buffer, index);
                 /* clear buffer and give back to hardware */
                 qeth_put_buffer_pool_entry(card, buffer->pool_entry);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c

index 06deaee50f6d1ce00e72bc837d23d6f483be4e7e..22f64aa6dd1fc9e7699c457c0c4c5b2d06c25118 100644 (file)
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2939,8 +2939,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
  }
  
  static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev,
-               unsigned int status, unsigned int qdio_err,
-               unsigned int siga_err, unsigned int queue, int first_element,
+               unsigned int qdio_err, unsigned int queue, int first_element,
                 int count, unsigned long card_ptr)
  {
         struct net_device *net_dev;
@@ -2955,23 +2954,21 @@ static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev,
                 card->perf_stats.inbound_cnt++;
                 card->perf_stats.inbound_start_time = qeth_get_micros();
         }
-       if (status & QDIO_STATUS_LOOK_FOR_ERROR) {
-               if (status & QDIO_STATUS_ACTIVATE_CHECK_CONDITION) {
-                       QETH_DBF_TEXT(TRACE, 1, "qdinchk");
-                       QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card));
-                       QETH_DBF_TEXT_(TRACE, 1, "%04X%04X",
-                                       first_element, count);
-                       QETH_DBF_TEXT_(TRACE, 1, "%04X%04X", queue, status);
-                       qeth_schedule_recovery(card);
-                       return;
-               }
+       if (qdio_err & QDIO_ERROR_ACTIVATE_CHECK_CONDITION) {
+               QETH_DBF_TEXT(TRACE, 1, "qdinchk");
+               QETH_DBF_TEXT_(TRACE, 1, "%s", CARD_BUS_ID(card));
+               QETH_DBF_TEXT_(TRACE, 1, "%04X%04X",
+                               first_element, count);
+               QETH_DBF_TEXT_(TRACE, 1, "%04X", queue);
+               qeth_schedule_recovery(card);
+               return;
         }
         for (i = first_element; i < (first_element + count); ++i) {
                 index = i % QDIO_MAX_BUFFERS_PER_Q;
                 buffer = &card->qdio.in_q->bufs[index];
-               if (!((status & QDIO_STATUS_LOOK_FOR_ERROR) &&
+               if (!(qdio_err &&
                       qeth_check_qdio_errors(buffer->buffer,
-                                            qdio_err, siga_err, "qinerr")))
+                                            qdio_err, "qinerr")))
                         qeth_l3_process_inbound_buffer(card, buffer, index);
                 /* clear buffer and give back to hardware */
                 qeth_put_buffer_pool_entry(card, buffer->pool_entry);
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c

index 36169c6944fd1d78b06161e66488b355d0d98ba3..fca48b88fc53b662be4e85dce9aaec8a3038e293 100644 (file)
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -297,15 +297,13 @@ void zfcp_hba_dbf_event_fsf_unsol(const char *tag, struct zfcp_adapter *adapter,
  /**
   * zfcp_hba_dbf_event_qdio - trace event for QDIO related failure
   * @adapter: adapter affected by this QDIO related event
- * @status: as passed by qdio module
   * @qdio_error: as passed by qdio module
- * @siga_error: as passed by qdio module
   * @sbal_index: first buffer with error condition, as passed by qdio module
   * @sbal_count: number of buffers affected, as passed by qdio module
   */
-void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status,
-                            unsigned int qdio_error, unsigned int siga_error,
-                            int sbal_index, int sbal_count)
+void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter,
+                            unsigned int qdio_error, int sbal_index,
+                            int sbal_count)
  {
         struct zfcp_hba_dbf_record *r = &adapter->hba_dbf_buf;
         unsigned long flags;
@@ -313,9 +311,7 @@ void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status,
         spin_lock_irqsave(&adapter->hba_dbf_lock, flags);
         memset(r, 0, sizeof(*r));
         strncpy(r->tag, "qdio", ZFCP_DBF_TAG_SIZE);
-       r->u.qdio.status = status;
         r->u.qdio.qdio_error = qdio_error;
-       r->u.qdio.siga_error = siga_error;
         r->u.qdio.sbal_index = sbal_index;
         r->u.qdio.sbal_count = sbal_count;
         debug_event(adapter->hba_dbf, 0, r, sizeof(*r));
@@ -398,9 +394,7 @@ static void zfcp_hba_dbf_view_status(char **p,
  
  static void zfcp_hba_dbf_view_qdio(char **p, struct zfcp_hba_dbf_record_qdio *r)
  {
-       zfcp_dbf_out(p, "status", "0x%08x", r->status);
         zfcp_dbf_out(p, "qdio_error", "0x%08x", r->qdio_error);
-       zfcp_dbf_out(p, "siga_error", "0x%08x", r->siga_error);
         zfcp_dbf_out(p, "sbal_index", "0x%02x", r->sbal_index);
         zfcp_dbf_out(p, "sbal_count", "0x%02x", r->sbal_count);
  }
diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h

index d04aea604974edb534239b167d068e672836a7c5..0ddb18449d11b190a602d628461d2f5f01dc564d 100644 (file)
--- a/drivers/s390/scsi/zfcp_dbf.h
+++ b/drivers/s390/scsi/zfcp_dbf.h
@@ -139,9 +139,7 @@ struct zfcp_hba_dbf_record_status {
  } __attribute__ ((packed));
  
  struct zfcp_hba_dbf_record_qdio {
-       u32 status;
         u32 qdio_error;
-       u32 siga_error;
         u8 sbal_index;
         u8 sbal_count;
  } __attribute__ ((packed));
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h

index 8065b2b224b7562be74eb85006eeae426094ae22..edfdb21591f32427ef670e4c1274ea3c28035060 100644 (file)
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -48,9 +48,8 @@ extern void zfcp_rec_dbf_event_action(u8, struct zfcp_erp_action *);
  extern void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *);
  extern void zfcp_hba_dbf_event_fsf_unsol(const char *, struct zfcp_adapter *,
                                          struct fsf_status_read_buffer *);
-extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *,
-                                   unsigned int, unsigned int, unsigned int,
-                                   int, int);
+extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, unsigned int, int,
+                                   int);
  extern void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *);
  extern void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *);
  extern void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *);
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c

index 72e3094796d48235b451b0a64f949fb53a07c92d..d6dbd653fde9a46496de95b971d4338f9ec48035 100644 (file)
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -74,17 +74,15 @@ static void zfcp_qdio_zero_sbals(struct qdio_buffer *sbal[], int first, int cnt)
         }
  }
  
-static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int status,
-                             unsigned int qdio_err, unsigned int siga_err,
-                             unsigned int queue_no, int first, int count,
+static void zfcp_qdio_int_req(struct ccw_device *cdev, unsigned int qdio_err,
+                             int queue_no, int first, int count,
                               unsigned long parm)
  {
         struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm;
         struct zfcp_qdio_queue *queue = &adapter->req_q;
  
-       if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) {
-               zfcp_hba_dbf_event_qdio(adapter, status, qdio_err, siga_err,
-                                       first, count);
+       if (unlikely(qdio_err)) {
+               zfcp_hba_dbf_event_qdio(adapter, qdio_err, first, count);
                 zfcp_qdio_handler_error(adapter, 140);
                 return;
         }
@@ -129,8 +127,7 @@ static void zfcp_qdio_resp_put_back(struct zfcp_adapter *adapter, int processed)
  
         count = atomic_read(&queue->count) + processed;
  
-       retval = do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT | QDIO_FLAG_UNDER_INTERRUPT,
-                        0, start, count, NULL);
+       retval = do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, start, count);
  
         if (unlikely(retval)) {
                 atomic_set(&queue->count, count);
@@ -142,9 +139,8 @@ static void zfcp_qdio_resp_put_back(struct zfcp_adapter *adapter, int processed)
         }
  }
  
-static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int status,
-                              unsigned int qdio_err, unsigned int siga_err,
-                              unsigned int queue_no, int first, int count,
+static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err,
+                              int queue_no, int first, int count,
                                unsigned long parm)
  {
         struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm;
@@ -152,9 +148,8 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int status,
         volatile struct qdio_buffer_element *sbale;
         int sbal_idx, sbale_idx, sbal_no;
  
-       if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) {
-               zfcp_hba_dbf_event_qdio(adapter, status, qdio_err, siga_err,
-                                       first, count);
+       if (unlikely(qdio_err)) {
+               zfcp_hba_dbf_event_qdio(adapter, qdio_err, first, count);
                 zfcp_qdio_handler_error(adapter, 147);
                 return;
         }
@@ -362,7 +357,7 @@ int zfcp_qdio_send(struct zfcp_fsf_req *fsf_req)
         }
  
         retval = do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_OUTPUT, 0, first,
-                        count, NULL);
+                        count);
         if (unlikely(retval)) {
                 zfcp_qdio_zero_sbals(req_q->sbal, first, count);
                 return retval;
@@ -400,10 +395,6 @@ int zfcp_qdio_allocate(struct zfcp_adapter *adapter)
         init_data->qib_param_field = NULL;
         init_data->input_slib_elements = NULL;
         init_data->output_slib_elements = NULL;
-       init_data->min_input_threshold = 1;
-       init_data->max_input_threshold = 5000;
-       init_data->min_output_threshold = 1;
-       init_data->max_output_threshold = 1000;
         init_data->no_input_qs = 1;
         init_data->no_output_qs = 1;
         init_data->input_handler = zfcp_qdio_int_resp;
@@ -436,9 +427,7 @@ void zfcp_qdio_close(struct zfcp_adapter *adapter)
         atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
         spin_unlock(&req_q->lock);
  
-       while (qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR)
-                       == -EINPROGRESS)
-               ssleep(1);
+       qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR);
  
         /* cleanup used outbound sbals */
         count = atomic_read(&req_q->count);
@@ -473,7 +462,7 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter)
                 return -EIO;
         }
  
-       if (qdio_activate(adapter->ccw_device, 0)) {
+       if (qdio_activate(adapter->ccw_device)) {
                 dev_err(&adapter->ccw_device->dev,
                          "Activate of QDIO queues failed.\n");
                 goto failed_qdio;
@@ -487,7 +476,7 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter)
         }
  
         if (do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_INPUT, 0, 0,
-                    QDIO_MAX_BUFFERS_PER_Q, NULL)) {
+                    QDIO_MAX_BUFFERS_PER_Q)) {
                 dev_err(&adapter->ccw_device->dev,
                          "Init of QDIO response queue failed.\n");
                 goto failed_qdio;
@@ -501,9 +490,6 @@ int zfcp_qdio_open(struct zfcp_adapter *adapter)
         return 0;
  
  failed_qdio:
-       while (qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR)
-                       == -EINPROGRESS)
-               ssleep(1);
-
+       qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR);
         return -EIO;
  }
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c

index ed53f14007a2107621441357eb2396b473ec62d1..f2467e936e554ea703c60621d42e3dceeaf6760f 100644 (file)
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -416,12 +416,17 @@ static int clariion_bus_notify(struct notifier_block *nb,
                                 unsigned long action, void *data)
  {
         struct device *dev = data;
-       struct scsi_device *sdev = to_scsi_device(dev);
+       struct scsi_device *sdev;
         struct scsi_dh_data *scsi_dh_data;
         struct clariion_dh_data *h;
         int i, found = 0;
         unsigned long flags;
  
+       if (!scsi_is_sdev_device(dev))
+               return 0;
+
+       sdev = to_scsi_device(dev);
+
         if (action == BUS_NOTIFY_ADD_DEVICE) {
                 for (i = 0; clariion_dev_list[i].vendor; i++) {
                         if (!strncmp(sdev->vendor, clariion_dev_list[i].vendor,
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c

index 12ceab7b366297fee5bad7ada6d3d7143d00fe2f..ae6be87d6a8327d8b523236c0d3aadf1622f63c1 100644 (file)
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -131,11 +131,16 @@ static int hp_sw_bus_notify(struct notifier_block *nb,
                             unsigned long action, void *data)
  {
         struct device *dev = data;
-       struct scsi_device *sdev = to_scsi_device(dev);
+       struct scsi_device *sdev;
         struct scsi_dh_data *scsi_dh_data;
         int i, found = 0;
         unsigned long flags;
  
+       if (!scsi_is_sdev_device(dev))
+               return 0;
+
+       sdev = to_scsi_device(dev);
+
         if (action == BUS_NOTIFY_ADD_DEVICE) {
                 for (i = 0; hp_sw_dh_data_list[i].vendor; i++) {
                         if (!strncmp(sdev->vendor, hp_sw_dh_data_list[i].vendor,
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c

index 6fff077a888d8d69dc128d80f2519dd125a73621..fdf34b0ec6e1770951181d82a314380a27934d50 100644 (file)
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -608,12 +608,17 @@ static int rdac_bus_notify(struct notifier_block *nb,
                             unsigned long action, void *data)
  {
         struct device *dev = data;
-       struct scsi_device *sdev = to_scsi_device(dev);
+       struct scsi_device *sdev;
         struct scsi_dh_data *scsi_dh_data;
         struct rdac_dh_data *h;
         int i, found = 0;
         unsigned long flags;
  
+       if (!scsi_is_sdev_device(dev))
+               return 0;
+
+       sdev = to_scsi_device(dev);
+
         if (action == BUS_NOTIFY_ADD_DEVICE) {
                 for (i = 0; rdac_dev_list[i].vendor; i++) {
                         if (!strncmp(sdev->vendor, rdac_dev_list[i].vendor,
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c

index 683bce375c7476fde30b15311adb66972d19d6fe..f843c1383a4b599480fdd53c9887a1263132e294 100644 (file)
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -258,19 +258,6 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
         return ide_stopped;
  }
  
-static ide_startstop_t
-idescsi_atapi_abort(ide_drive_t *drive, struct request *rq)
-{
-       debug_log("%s called for %lu\n", __func__,
-               ((struct ide_atapi_pc *) rq->special)->scsi_cmd->serial_number);
-
-       rq->errors |= ERROR_MAX;
-
-       idescsi_end_request(drive, 0, 0);
-
-       return ide_stopped;
-}
-
  static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
  {
         idescsi_scsi_t *scsi = drive_to_idescsi(drive);
@@ -524,7 +511,6 @@ static ide_driver_t idescsi_driver = {
         .do_request             = idescsi_do_request,
         .end_request            = idescsi_end_request,
         .error                  = idescsi_atapi_error,
-       .abort                  = idescsi_atapi_abort,
  #ifdef CONFIG_IDE_PROC_FS
         .proc                   = idescsi_proc,
  #endif
diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c

index 97c68d021d28bcb3ecd1cae57846cdac3fdaed7e..638b68649e791a4e4d94ebef601e6714e1dd18bb 100644 (file)
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -383,21 +383,14 @@ static int __devinit check_name(char *name)
         return 0;
  }
  
-static int __devinit check_resources(struct pnp_option *option)
+static int __devinit check_resources(struct pnp_dev *dev)
  {
-       struct pnp_option *tmp;
-       if (!option)
-               return 0;
+       resource_size_t base[] = {0x2f8, 0x3f8, 0x2e8, 0x3e8};
+       int i;
  
-       for (tmp = option; tmp; tmp = tmp->next) {
-               struct pnp_port *port;
-               for (port = tmp->port; port; port = port->next)
-                       if ((port->size == 8) &&
-                           ((port->min == 0x2f8) ||
-                            (port->min == 0x3f8) ||
-                            (port->min == 0x2e8) ||
-                            (port->min == 0x3e8)))
-                               return 1;
+       for (i = 0; i < ARRAY_SIZE(base); i++) {
+               if (pnp_possible_config(dev, IORESOURCE_IO, base[i], 8))
+                       return 1;
         }
  
         return 0;
@@ -420,10 +413,7 @@ static int __devinit serial_pnp_guess_board(struct pnp_dev *dev, int *flags)
                 (dev->card && check_name(dev->card->name))))
                         return -ENODEV;
  
-       if (check_resources(dev->independent))
-               return 0;
-
-       if (check_resources(dev->dependent))
+       if (check_resources(dev))
                 return 0;
  
         return -ENODEV;
diff --git a/fs/Kconfig b/fs/Kconfig

index 313b2e06ded5dd525dcb9c9c422800564aacbe5b..17216ba99c8528f11e2c3e4456d32eebfd9f2c5c 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1375,6 +1375,9 @@ config JFFS2_CMODE_FAVOURLZO
  
  endchoice
  
+# UBIFS File system configuration
+source "fs/ubifs/Kconfig"
+
  config CRAMFS
         tristate "Compressed ROM file system support (cramfs)"
         depends on BLOCK
@@ -1544,10 +1547,6 @@ config UFS_FS
            The recently released UFS2 variant (used in FreeBSD 5.x) is
            READ-ONLY supported.
  
-         If you only intend to mount files from some other Unix over the
-         network using NFS, you don't need the UFS file system support (but
-         you need NFS file system support obviously).
-
           Note that this option is generally not needed for floppies, since a
           good portable way to transport files and directories between unixes
           (and even other operating systems) is given by the tar program ("man
@@ -1587,6 +1586,7 @@ menuconfig NETWORK_FILESYSTEMS
           Say Y here to get to see options for network filesystems and
           filesystem-related networking code, such as NFS daemon and
           RPCSEC security modules.
+
           This option alone does not add any kernel code.
  
           If you say N, all options in this submenu will be skipped and
@@ -1595,76 +1595,92 @@ menuconfig NETWORK_FILESYSTEMS
  if NETWORK_FILESYSTEMS
  
  config NFS_FS
-       tristate "NFS file system support"
+       tristate "NFS client support"
         depends on INET
         select LOCKD
         select SUNRPC
         select NFS_ACL_SUPPORT if NFS_V3_ACL
         help
-         If you are connected to some other (usually local) Unix computer
-         (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing
-         on that computer (the NFS server) using the Network File Sharing
-         protocol, say Y. "Mounting files" means that the client can access
-         the files with usual UNIX commands as if they were sitting on the
-         client's hard disk. For this to work, the server must run the
-         programs nfsd and mountd (but does not need to have NFS file system
-         support enabled in its kernel). NFS is explained in the Network
-         Administrator's Guide, available from
-         <http://www.tldp.org/docs.html#guide>, on its man page: "man
-         nfs", and in the NFS-HOWTO.
+         Choose Y here if you want to access files residing on other
+         computers using Sun's Network File System protocol.  To compile
+         this file system support as a module, choose M here: the module
+         will be called nfs.
  
-         A superior but less widely used alternative to NFS is provided by
-         the Coda file system; see "Coda file system support" below.
+         To mount file systems exported by NFS servers, you also need to
+         install the user space mount.nfs command which can be found in
+         the Linux nfs-utils package, available from http://linux-nfs.org/.
+         Information about using the mount command is available in the
+         mount(8) man page.  More detail about the Linux NFS client
+         implementation is available via the nfs(5) man page.
  
-         If you say Y here, you should have said Y to TCP/IP networking also.
-         This option would enlarge your kernel by about 27 KB.
-
-         To compile this file system support as a module, choose M here: the
-         module will be called nfs.
+         Below you can choose which versions of the NFS protocol are
+         available in the kernel to mount NFS servers.  Support for NFS
+         version 2 (RFC 1094) is always available when NFS_FS is selected.
  
-         If you are configuring a diskless machine which will mount its root
-         file system over NFS at boot time, say Y here and to "Kernel
-         level IP autoconfiguration" above and to "Root file system on NFS"
-         below. You cannot compile this driver as a module in this case.
-         There are two packages designed for booting diskless machines over
-         the net: netboot, available from
-         <http://ftp1.sourceforge.net/netboot/>, and Etherboot,
-         available from <http://ftp1.sourceforge.net/etherboot/>.
+         To configure a system which mounts its root file system via NFS
+         at boot time, say Y here, select "Kernel level IP
+         autoconfiguration" in the NETWORK menu, and select "Root file
+         system on NFS" below.  You cannot compile this file system as a
+         module in this case.
  
-         If you don't know what all this is about, say N.
+         If unsure, say N.
  
  config NFS_V3
-       bool "Provide NFSv3 client support"
+       bool "NFS client support for NFS version 3"
         depends on NFS_FS
         help
-         Say Y here if you want your NFS client to be able to speak version
-         3 of the NFS protocol.
+         This option enables support for version 3 of the NFS protocol
+         (RFC 1813) in the kernel's NFS client.
  
           If unsure, say Y.
  
  config NFS_V3_ACL
-       bool "Provide client support for the NFSv3 ACL protocol extension"
+       bool "NFS client support for the NFSv3 ACL protocol extension"
         depends on NFS_V3
         help
-         Implement the NFSv3 ACL protocol extension for manipulating POSIX
-         Access Control Lists.  The server should also be compiled with
-         the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option.
+         Some NFS servers support an auxiliary NFSv3 ACL protocol that
+         Sun added to Solaris but never became an official part of the
+         NFS version 3 protocol.  This protocol extension allows
+         applications on NFS clients to manipulate POSIX Access Control
+         Lists on files residing on NFS servers.  NFS servers enforce
+         ACLs on local files whether this protocol is available or not.
+
+         Choose Y here if your NFS server supports the Solaris NFSv3 ACL
+         protocol extension and you want your NFS client to allow
+         applications to access and modify ACLs on files on the server.
+
+         Most NFS servers don't support the Solaris NFSv3 ACL protocol
+         extension.  You can choose N here or specify the "noacl" mount
+         option to prevent your NFS client from trying to use the NFSv3
+         ACL protocol.
  
           If unsure, say N.
  
  config NFS_V4
-       bool "Provide NFSv4 client support (EXPERIMENTAL)"
+       bool "NFS client support for NFS version 4 (EXPERIMENTAL)"
         depends on NFS_FS && EXPERIMENTAL
         select RPCSEC_GSS_KRB5
         help
-         Say Y here if you want your NFS client to be able to speak the newer
-         version 4 of the NFS protocol.
+         This option enables support for version 4 of the NFS protocol
+         (RFC 3530) in the kernel's NFS client.
  
-         Note: Requires auxiliary userspace daemons which may be found on
-               http://www.citi.umich.edu/projects/nfsv4/
+         To mount NFS servers using NFSv4, you also need to install user
+         space programs which can be found in the Linux nfs-utils package,
+         available from http://linux-nfs.org/.
  
           If unsure, say N.
  
+config ROOT_NFS
+       bool "Root file system on NFS"
+       depends on NFS_FS=y && IP_PNP
+       help
+         If you want your system to mount its root file system via NFS,
+         choose Y here.  This is common practice for managing systems
+         without local permanent storage.  For details, read
+         <file:Documentation/filesystems/nfsroot.txt>.
+
+         Most people say N here.
+
  config NFSD
         tristate "NFS server support"
         depends on INET
@@ -1746,20 +1762,6 @@ config NFSD_V4
  
           If unsure, say N.
  
-config ROOT_NFS
-       bool "Root file system on NFS"
-       depends on NFS_FS=y && IP_PNP
-       help
-         If you want your Linux box to mount its whole root file system (the
-         one containing the directory /) from some other computer over the
-         net via NFS (presumably because your box doesn't have a hard disk),
-         say Y. Read <file:Documentation/filesystems/nfsroot.txt> for
-         details. It is likely that in this case, you also want to say Y to
-         "Kernel level IP autoconfiguration" so that your box can discover
-         its network address at boot time.
-
-         Most people say N here.
-
  config LOCKD
         tristate
  
@@ -1800,27 +1802,6 @@ config SUNRPC_XPRT_RDMA
  
           If unsure, say N.
  
-config SUNRPC_BIND34
-       bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
-       depends on SUNRPC && EXPERIMENTAL
-       default n
-       help
-         RPC requests over IPv6 networks require support for larger
-         addresses when performing an RPC bind.  Sun added support for
-         IPv6 addressing by creating two new versions of the rpcbind
-         protocol (RFC 1833).
-
-         This option enables support in the kernel RPC client for
-         querying rpcbind servers via versions 3 and 4 of the rpcbind
-         protocol.  The kernel automatically falls back to version 2
-         if a remote rpcbind service does not support versions 3 or 4.
-         By themselves, these new versions do not provide support for
-         RPC over IPv6, but the new protocol versions are necessary to
-         support it.
-
-         If unsure, say N to get traditional behavior (version 2 rpcbind
-         requests only).
-
  config RPCSEC_GSS_KRB5
         tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
         depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile

index 277b079dec9e4751959db2688e69f5d17ed671de..3b2178b4bb66a6a776aa34b287123c8ef6552d3f 100644 (file)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_NTFS_FS)               += ntfs/
  obj-$(CONFIG_UFS_FS)           += ufs/
  obj-$(CONFIG_EFS_FS)           += efs/
  obj-$(CONFIG_JFFS2_FS)         += jffs2/
+obj-$(CONFIG_UBIFS_FS)         += ubifs/
  obj-$(CONFIG_AFFS_FS)          += affs/
  obj-$(CONFIG_ROMFS_FS)         += romfs/
  obj-$(CONFIG_QNX4FS_FS)                += qnx4/
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index ae45f77765c0ee65a647fc81c899315de2f0862f..25adfc3c693ab4673777801ecc0e014db9624ec8 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -424,8 +424,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
   * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
   * that it can be located for waiting on in __writeback_single_inode().
   *
- * Called under inode_lock.
- *
   * If `bdi' is non-zero then we're being asked to writeback a specific queue.
   * This function assumes that the blockdev superblock's inodes are backed by
   * a variety of queues, so all inodes are searched.  For other superblocks,
@@ -441,11 +439,12 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
   * on the writer throttling path, and we get decent balancing between many
   * throttled threads: we don't want them all piling up on inode_sync_wait.
   */
-static void
-sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
+void generic_sync_sb_inodes(struct super_block *sb,
+                               struct writeback_control *wbc)
  {
         const unsigned long start = jiffies;    /* livelock avoidance */
  
+       spin_lock(&inode_lock);
         if (!wbc->for_kupdate || list_empty(&sb->s_io))
                 queue_io(sb, wbc->older_than_this);
  
@@ -524,8 +523,16 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
                 if (!list_empty(&sb->s_more_io))
                         wbc->more_io = 1;
         }
+       spin_unlock(&inode_lock);
         return;         /* Leave any unwritten inodes on s_io */
  }
+EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
+
+static void sync_sb_inodes(struct super_block *sb,
+                               struct writeback_control *wbc)
+{
+       generic_sync_sb_inodes(sb, wbc);
+}
  
  /*
   * Start writeback of dirty pagecache data against all unlocked inodes.
@@ -565,11 +572,8 @@ restart:
                          * be unmounted by the time it is released.
                          */
                         if (down_read_trylock(&sb->s_umount)) {
-                               if (sb->s_root) {
-                                       spin_lock(&inode_lock);
+                               if (sb->s_root)
                                         sync_sb_inodes(sb, wbc);
-                                       spin_unlock(&inode_lock);
-                               }
                                 up_read(&sb->s_umount);
                         }
                         spin_lock(&sb_lock);
@@ -607,9 +611,7 @@ void sync_inodes_sb(struct super_block *sb, int wait)
                         (inodes_stat.nr_inodes - inodes_stat.nr_unused) +
                         nr_dirty + nr_unstable;
         wbc.nr_to_write += wbc.nr_to_write / 2;         /* Bit more for luck */
-       spin_lock(&inode_lock);
         sync_sb_inodes(sb, &wbc);
-       spin_unlock(&inode_lock);
  }
  
  /*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c

index 5df517b81f3f2a7911e1518c33021e9eb557c85e..1f6dc518505c90b77e3bed2217f8d8de93f0803a 100644 (file)
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -224,7 +224,9 @@ void nlm_release_call(struct nlm_rqst *call)
  
  static void nlmclnt_rpc_release(void *data)
  {
+       lock_kernel();
         nlm_release_call(data);
+       unlock_kernel();
  }
  
  static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -430,7 +432,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
                          * Report the conflicting lock back to the application.
                          */
                         fl->fl_start = req->a_res.lock.fl.fl_start;
-                       fl->fl_end = req->a_res.lock.fl.fl_start;
+                       fl->fl_end = req->a_res.lock.fl.fl_end;
                         fl->fl_type = req->a_res.lock.fl.fl_type;
                         fl->fl_pid = 0;
                         break;
@@ -710,7 +712,9 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
  die:
         return;
   retry_rebind:
+       lock_kernel();
         nlm_rebind_host(req->a_host);
+       unlock_kernel();
   retry_unlock:
         rpc_restart_call(task);
  }
@@ -788,7 +792,9 @@ retry_cancel:
         /* Don't ever retry more than 3 times */
         if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
                 goto die;
+       lock_kernel();
         nlm_rebind_host(req->a_host);
+       unlock_kernel();
         rpc_restart_call(task);
         rpc_delay(task, 30 * HZ);
  }
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c

index 385437e3387de895fde1540f58c7d3269a842373..2e27176ff42fa391e052543540d0600c42e07011 100644 (file)
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -248,7 +248,9 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
  
  static void nlm4svc_callback_release(void *data)
  {
+       lock_kernel();
         nlm_release_call(data);
+       unlock_kernel();
  }
  
  static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c

index 81aca859bfde909f2574c2c7f9e9e02608a1a165..56a08ab9a4cb6b9745e8abc059a85609083ba13d 100644 (file)
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -795,6 +795,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
  
         dprintk("lockd: GRANT_MSG RPC callback\n");
  
+       lock_kernel();
         /* if the block is not on a list at this point then it has
          * been invalidated. Don't try to requeue it.
          *
@@ -804,7 +805,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
          * for nlm_blocked?
          */
         if (list_empty(&block->b_list))
-               return;
+               goto out;
  
         /* Technically, we should down the file semaphore here. Since we
          * move the block towards the head of the queue only, no harm
@@ -818,13 +819,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
         }
         nlmsvc_insert_block(block, timeout);
         svc_wake_up(block->b_daemon);
+out:
+       unlock_kernel();
  }
  
  static void nlmsvc_grant_release(void *data)
  {
         struct nlm_rqst         *call = data;
  
+       lock_kernel();
         nlmsvc_release_block(call->a_block);
+       unlock_kernel();
  }
  
  static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c

index 88379cc6e0b1c0ee196c8686c3f0e7c46cabef9a..ce6952b50a757a3401ce1b0d20272855a839c44f 100644 (file)
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -278,7 +278,9 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
  
  static void nlmsvc_callback_release(void *data)
  {
+       lock_kernel();
         nlm_release_call(data);
+       unlock_kernel();
  }
  
  static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c

index c1e7c83006292c5085d2b2cb5b54f6d46fb0e2fa..f447f4b4476cc37b26144ffdfcef83cd80eab53e 100644 (file)
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -27,7 +27,7 @@
  
  struct nfs_callback_data {
         unsigned int users;
-       struct svc_serv *serv;
+       struct svc_rqst *rqst;
         struct task_struct *task;
  };
  
@@ -91,21 +91,17 @@ nfs_callback_svc(void *vrqstp)
                 svc_process(rqstp);
         }
         unlock_kernel();
-       nfs_callback_info.task = NULL;
-       svc_exit_thread(rqstp);
         return 0;
  }
  
  /*
- * Bring up the server process if it is not already up.
+ * Bring up the callback thread if it is not already up.
   */
  int nfs_callback_up(void)
  {
         struct svc_serv *serv = NULL;
-       struct svc_rqst *rqstp;
         int ret = 0;
  
-       lock_kernel();
         mutex_lock(&nfs_callback_mutex);
         if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
                 goto out;
@@ -121,22 +117,23 @@ int nfs_callback_up(void)
         nfs_callback_tcpport = ret;
         dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
  
-       rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
-       if (IS_ERR(rqstp)) {
-               ret = PTR_ERR(rqstp);
+       nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
+       if (IS_ERR(nfs_callback_info.rqst)) {
+               ret = PTR_ERR(nfs_callback_info.rqst);
+               nfs_callback_info.rqst = NULL;
                 goto out_err;
         }
  
         svc_sock_update_bufs(serv);
-       nfs_callback_info.serv = serv;
  
-       nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp,
+       nfs_callback_info.task = kthread_run(nfs_callback_svc,
+                                            nfs_callback_info.rqst,
                                              "nfsv4-svc");
         if (IS_ERR(nfs_callback_info.task)) {
                 ret = PTR_ERR(nfs_callback_info.task);
-               nfs_callback_info.serv = NULL;
+               svc_exit_thread(nfs_callback_info.rqst);
+               nfs_callback_info.rqst = NULL;
                 nfs_callback_info.task = NULL;
-               svc_exit_thread(rqstp);
                 goto out_err;
         }
  out:
@@ -149,7 +146,6 @@ out:
         if (serv)
                 svc_destroy(serv);
         mutex_unlock(&nfs_callback_mutex);
-       unlock_kernel();
         return ret;
  out_err:
         dprintk("Couldn't create callback socket or server thread; err = %d\n",
@@ -159,17 +155,19 @@ out_err:
  }
  
  /*
- * Kill the server process if it is not already down.
+ * Kill the callback thread if it's no longer being used.
   */
  void nfs_callback_down(void)
  {
-       lock_kernel();
         mutex_lock(&nfs_callback_mutex);
         nfs_callback_info.users--;
-       if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL)
+       if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) {
                 kthread_stop(nfs_callback_info.task);
+               svc_exit_thread(nfs_callback_info.rqst);
+               nfs_callback_info.rqst = NULL;
+               nfs_callback_info.task = NULL;
+       }
         mutex_unlock(&nfs_callback_mutex);
-       unlock_kernel();
  }
  
  static int nfs_callback_authenticate(struct svc_rqst *rqstp)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index f2a092ca69b5649479698172795ffedc0bbf8219..5ee23e7058b305c061a4e1449fdccc722a68eceb 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -431,14 +431,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
  {
         to->to_initval = timeo * HZ / 10;
         to->to_retries = retrans;
-       if (!to->to_retries)
-               to->to_retries = 2;
  
         switch (proto) {
         case XPRT_TRANSPORT_TCP:
         case XPRT_TRANSPORT_RDMA:
+               if (to->to_retries == 0)
+                       to->to_retries = NFS_DEF_TCP_RETRANS;
                 if (to->to_initval == 0)
-                       to->to_initval = 60 * HZ;
+                       to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
                 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
                         to->to_initval = NFS_MAX_TCP_TIMEOUT;
                 to->to_increment = to->to_initval;
@@ -450,14 +450,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
                 to->to_exponential = 0;
                 break;
         case XPRT_TRANSPORT_UDP:
-       default:
+               if (to->to_retries == 0)
+                       to->to_retries = NFS_DEF_UDP_RETRANS;
                 if (!to->to_initval)
-                       to->to_initval = 11 * HZ / 10;
+                       to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10;
                 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
                         to->to_initval = NFS_MAX_UDP_TIMEOUT;
                 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
                 to->to_exponential = 1;
                 break;
+       default:
+               BUG();
         }
  }
  
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 982a2064fe4cdd3755946ced633b6909e6d58d4e..28a238dab23a2a16919ef58706a431447bcc8f5b 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -133,13 +133,14 @@ nfs_opendir(struct inode *inode, struct file *filp)
  {
         int res;
  
-       dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
-                       inode->i_sb->s_id, inode->i_ino);
+       dfprintk(FILE, "NFS: open dir(%s/%s)\n",
+                       filp->f_path.dentry->d_parent->d_name.name,
+                       filp->f_path.dentry->d_name.name);
+
+       nfs_inc_stats(inode, NFSIOS_VFSOPEN);
  
-       lock_kernel();
         /* Call generic open code in order to cache credentials */
         res = nfs_open(inode, filp);
-       unlock_kernel();
         return res;
  }
  
@@ -528,13 +529,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
         struct nfs_fattr fattr;
         long            res;
  
-       dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+       dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
                         dentry->d_parent->d_name.name, dentry->d_name.name,
                         (long long)filp->f_pos);
         nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
  
-       lock_kernel();
-
         /*
          * filp->f_pos points to the dirent entry number.
          * *desc->dir_cookie has the cookie for the next entry. We have
@@ -592,10 +591,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
         }
  out:
         nfs_unblock_sillyrename(dentry);
-       unlock_kernel();
         if (res > 0)
                 res = 0;
-       dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+       dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n",
                         dentry->d_parent->d_name.name, dentry->d_name.name,
                         res);
         return res;
@@ -603,7 +601,15 @@ out:
  
  static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
  {
-       mutex_lock(&filp->f_path.dentry->d_inode->i_mutex);
+       struct dentry *dentry = filp->f_path.dentry;
+       struct inode *inode = dentry->d_inode;
+
+       dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
+                       dentry->d_parent->d_name.name,
+                       dentry->d_name.name,
+                       offset, origin);
+
+       mutex_lock(&inode->i_mutex);
         switch (origin) {
                 case 1:
                         offset += filp->f_pos;
@@ -619,7 +625,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
                 nfs_file_open_context(filp)->dir_cookie = 0;
         }
  out:
-       mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
+       mutex_unlock(&inode->i_mutex);
         return offset;
  }
  
@@ -629,10 +635,11 @@ out:
   */
  static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
  {
-       dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+       dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
                         dentry->d_parent->d_name.name, dentry->d_name.name,
                         datasync);
  
+       nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
         return 0;
  }
  
@@ -767,7 +774,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
         struct nfs_fattr fattr;
  
         parent = dget_parent(dentry);
-       lock_kernel();
         dir = parent->d_inode;
         nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
         inode = dentry->d_inode;
@@ -805,7 +811,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
  
         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   out_valid:
-       unlock_kernel();
         dput(parent);
         dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
                         __func__, dentry->d_parent->d_name.name,
@@ -824,7 +829,6 @@ out_zap_parent:
                 shrink_dcache_parent(dentry);
         }
         d_drop(dentry);
-       unlock_kernel();
         dput(parent);
         dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
                         __func__, dentry->d_parent->d_name.name,
@@ -858,6 +862,14 @@ static int nfs_dentry_delete(struct dentry *dentry)
  
  }
  
+static void nfs_drop_nlink(struct inode *inode)
+{
+       spin_lock(&inode->i_lock);
+       if (inode->i_nlink > 0)
+               drop_nlink(inode);
+       spin_unlock(&inode->i_lock);
+}
+
  /*
   * Called when the dentry loses inode.
   * We use it to clean up silly-renamed files.
@@ -869,10 +881,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
                 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
  
         if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-               lock_kernel();
                 drop_nlink(inode);
                 nfs_complete_unlink(dentry, inode);
-               unlock_kernel();
         }
         iput(inode);
  }
@@ -903,8 +913,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
         res = ERR_PTR(-ENOMEM);
         dentry->d_op = NFS_PROTO(dir)->dentry_ops;
  
-       lock_kernel();
-
         /*
          * If we're doing an exclusive create, optimize away the lookup
          * but don't hash the dentry.
@@ -912,7 +920,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
         if (nfs_is_exclusive_create(dir, nd)) {
                 d_instantiate(dentry, NULL);
                 res = NULL;
-               goto out_unlock;
+               goto out;
         }
  
         parent = dentry->d_parent;
@@ -940,8 +948,6 @@ no_entry:
         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
  out_unblock_sillyrename:
         nfs_unblock_sillyrename(parent);
-out_unlock:
-       unlock_kernel();
  out:
         return res;
  }
@@ -999,9 +1005,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
         }
  
         /* Open the file on the server */
-       lock_kernel();
         res = nfs4_atomic_open(dir, dentry, nd);
-       unlock_kernel();
         if (IS_ERR(res)) {
                 error = PTR_ERR(res);
                 switch (error) {
@@ -1063,9 +1067,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
          * operations that change the directory. We therefore save the
          * change attribute *before* we do the RPC call.
          */
-       lock_kernel();
         ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
-       unlock_kernel();
  out:
         dput(parent);
         if (!ret)
@@ -1218,14 +1220,11 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
         if ((nd->flags & LOOKUP_CREATE) != 0)
                 open_flags = nd->intent.open.flags;
  
-       lock_kernel();
         error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
         if (error != 0)
                 goto out_err;
-       unlock_kernel();
         return 0;
  out_err:
-       unlock_kernel();
         d_drop(dentry);
         return error;
  }
@@ -1248,14 +1247,11 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
         attr.ia_mode = mode;
         attr.ia_valid = ATTR_MODE;
  
-       lock_kernel();
         status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
         if (status != 0)
                 goto out_err;
-       unlock_kernel();
         return 0;
  out_err:
-       unlock_kernel();
         d_drop(dentry);
         return status;
  }
@@ -1274,15 +1270,12 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
         attr.ia_valid = ATTR_MODE;
         attr.ia_mode = mode | S_IFDIR;
  
-       lock_kernel();
         error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
         if (error != 0)
                 goto out_err;
-       unlock_kernel();
         return 0;
  out_err:
         d_drop(dentry);
-       unlock_kernel();
         return error;
  }
  
@@ -1299,14 +1292,12 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
         dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
                         dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
  
-       lock_kernel();
         error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
         /* Ensure the VFS deletes this inode */
         if (error == 0 && dentry->d_inode != NULL)
                 clear_nlink(dentry->d_inode);
         else if (error == -ENOENT)
                 nfs_dentry_handle_enoent(dentry);
-       unlock_kernel();
  
         return error;
  }
@@ -1408,7 +1399,7 @@ static int nfs_safe_remove(struct dentry *dentry)
                 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
                 /* The VFS may want to delete this inode */
                 if (error == 0)
-                       drop_nlink(inode);
+                       nfs_drop_nlink(inode);
                 nfs_mark_for_revalidate(inode);
         } else
                 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
@@ -1431,7 +1422,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
         dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
                 dir->i_ino, dentry->d_name.name);
  
-       lock_kernel();
         spin_lock(&dcache_lock);
         spin_lock(&dentry->d_lock);
         if (atomic_read(&dentry->d_count) > 1) {
@@ -1440,7 +1430,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
                 /* Start asynchronous writeout of the inode */
                 write_inode_now(dentry->d_inode, 0);
                 error = nfs_sillyrename(dir, dentry);
-               unlock_kernel();
                 return error;
         }
         if (!d_unhashed(dentry)) {
@@ -1454,7 +1443,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
                 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
         } else if (need_rehash)
                 d_rehash(dentry);
-       unlock_kernel();
         return error;
  }
  
@@ -1491,13 +1479,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
         attr.ia_mode = S_IFLNK | S_IRWXUGO;
         attr.ia_valid = ATTR_MODE;
  
-       lock_kernel();
-
         page = alloc_page(GFP_HIGHUSER);
-       if (!page) {
-               unlock_kernel();
+       if (!page)
                 return -ENOMEM;
-       }
  
         kaddr = kmap_atomic(page, KM_USER0);
         memcpy(kaddr, symname, pathlen);
@@ -1512,7 +1496,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
                         dentry->d_name.name, symname, error);
                 d_drop(dentry);
                 __free_page(page);
-               unlock_kernel();
                 return error;
         }
  
@@ -1530,7 +1513,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
         } else
                 __free_page(page);
  
-       unlock_kernel();
         return 0;
  }
  
@@ -1544,14 +1526,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
                 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
                 dentry->d_parent->d_name.name, dentry->d_name.name);
  
-       lock_kernel();
         d_drop(dentry);
         error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
         if (error == 0) {
                 atomic_inc(&inode->i_count);
                 d_add(dentry, inode);
         }
-       unlock_kernel();
         return error;
  }
  
@@ -1591,7 +1571,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
          * To prevent any new references to the target during the rename,
          * we unhash the dentry and free the inode in advance.
          */
-       lock_kernel();
         if (!d_unhashed(new_dentry)) {
                 d_drop(new_dentry);
                 rehash = new_dentry;
@@ -1635,7 +1614,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         /* dentry still busy? */
                         goto out;
         } else
-               drop_nlink(new_inode);
+               nfs_drop_nlink(new_inode);
  
  go_ahead:
         /*
@@ -1669,7 +1648,6 @@ out:
         /* new dentry created? */
         if (dentry)
                 dput(dentry);
-       unlock_kernel();
         return error;
  }
  
@@ -1962,8 +1940,6 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
         }
  
  force_lookup:
-       lock_kernel();
-
         if (!NFS_PROTO(inode)->access)
                 goto out_notsup;
  
@@ -1973,7 +1949,6 @@ force_lookup:
                 put_rpccred(cred);
         } else
                 res = PTR_ERR(cred);
-       unlock_kernel();
  out:
         dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
                 inode->i_sb->s_id, inode->i_ino, mask, res);
@@ -1982,7 +1957,6 @@ out_notsup:
         res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
         if (res == 0)
                 res = generic_permission(inode, mask, NULL);
-       unlock_kernel();
         goto out;
  }
  
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c

index 4757a2b326a1132d1e6e2d0d4401767eb06aa1d3..08f6b040d289d9ecb26979342bdf478db3c5a0b8 100644 (file)
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -890,7 +890,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
         count = iov_length(iov, nr_segs);
         nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
  
-       dprintk("nfs: direct read(%s/%s, %zd@%Ld)\n",
+       dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
                 file->f_path.dentry->d_parent->d_name.name,
                 file->f_path.dentry->d_name.name,
                 count, (long long) pos);
@@ -947,7 +947,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
         count = iov_length(iov, nr_segs);
         nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
  
-       dfprintk(VFS, "nfs: direct write(%s/%s, %zd@%Ld)\n",
+       dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
                 file->f_path.dentry->d_parent->d_name.name,
                 file->f_path.dentry->d_name.name,
                 count, (long long) pos);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index 4e98a56a17776cc1ea484db0c5d24f9e2a7490a0..78460657f5cbff1a155a4e5d05ab1ba3ea0e564d 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -50,7 +50,7 @@ static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
  static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
                                 unsigned long nr_segs, loff_t pos);
  static int  nfs_file_flush(struct file *, fl_owner_t id);
-static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
+static int  nfs_file_fsync(struct file *, struct dentry *dentry, int datasync);
  static int nfs_check_flags(int flags);
  static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
  static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
@@ -72,7 +72,7 @@ const struct file_operations nfs_file_operations = {
         .open           = nfs_file_open,
         .flush          = nfs_file_flush,
         .release        = nfs_file_release,
-       .fsync          = nfs_fsync,
+       .fsync          = nfs_file_fsync,
         .lock           = nfs_lock,
         .flock          = nfs_flock,
         .splice_read    = nfs_file_splice_read,
@@ -119,25 +119,33 @@ nfs_file_open(struct inode *inode, struct file *filp)
  {
         int res;
  
+       dprintk("NFS: open file(%s/%s)\n",
+                       filp->f_path.dentry->d_parent->d_name.name,
+                       filp->f_path.dentry->d_name.name);
+
         res = nfs_check_flags(filp->f_flags);
         if (res)
                 return res;
  
         nfs_inc_stats(inode, NFSIOS_VFSOPEN);
-       lock_kernel();
-       res = NFS_PROTO(inode)->file_open(inode, filp);
-       unlock_kernel();
+       res = nfs_open(inode, filp);
         return res;
  }
  
  static int
  nfs_file_release(struct inode *inode, struct file *filp)
  {
+       struct dentry *dentry = filp->f_path.dentry;
+
+       dprintk("NFS: release(%s/%s)\n",
+                       dentry->d_parent->d_name.name,
+                       dentry->d_name.name);
+
         /* Ensure that dirty pages are flushed out with the right creds */
         if (filp->f_mode & FMODE_WRITE)
-               nfs_wb_all(filp->f_path.dentry->d_inode);
+               nfs_wb_all(dentry->d_inode);
         nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
-       return NFS_PROTO(inode)->file_release(inode, filp);
+       return nfs_release(inode, filp);
  }
  
  /**
@@ -171,6 +179,12 @@ force_reval:
  static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
  {
         loff_t loff;
+
+       dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
+                       filp->f_path.dentry->d_parent->d_name.name,
+                       filp->f_path.dentry->d_name.name,
+                       offset, origin);
+
         /* origin == SEEK_END => we must revalidate the cached file length */
         if (origin == SEEK_END) {
                 struct inode *inode = filp->f_mapping->host;
@@ -185,7 +199,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
  }
  
  /*
- * Helper for nfs_file_flush() and nfs_fsync()
+ * Helper for nfs_file_flush() and nfs_file_fsync()
   *
   * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
   * disk, but it retrieves and clears ctx->error after synching, despite
@@ -211,16 +225,18 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
  
  /*
   * Flush all dirty pages, and check for write errors.
- *
   */
  static int
  nfs_file_flush(struct file *file, fl_owner_t id)
  {
         struct nfs_open_context *ctx = nfs_file_open_context(file);
-       struct inode    *inode = file->f_path.dentry->d_inode;
+       struct dentry   *dentry = file->f_path.dentry;
+       struct inode    *inode = dentry->d_inode;
         int             status;
  
-       dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+       dprintk("NFS: flush(%s/%s)\n",
+                       dentry->d_parent->d_name.name,
+                       dentry->d_name.name);
  
         if ((file->f_mode & FMODE_WRITE) == 0)
                 return 0;
@@ -245,7 +261,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
         if (iocb->ki_filp->f_flags & O_DIRECT)
                 return nfs_file_direct_read(iocb, iov, nr_segs, pos);
  
-       dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+       dprintk("NFS: read(%s/%s, %lu@%lu)\n",
                 dentry->d_parent->d_name.name, dentry->d_name.name,
                 (unsigned long) count, (unsigned long) pos);
  
@@ -265,7 +281,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
         struct inode *inode = dentry->d_inode;
         ssize_t res;
  
-       dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
+       dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",
                 dentry->d_parent->d_name.name, dentry->d_name.name,
                 (unsigned long) count, (unsigned long long) *ppos);
  
@@ -282,7 +298,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
         struct inode *inode = dentry->d_inode;
         int     status;
  
-       dfprintk(VFS, "nfs: mmap(%s/%s)\n",
+       dprintk("NFS: mmap(%s/%s)\n",
                 dentry->d_parent->d_name.name, dentry->d_name.name);
  
         status = nfs_revalidate_mapping(inode, file->f_mapping);
@@ -300,12 +316,14 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
   * whether any write errors occurred for this process.
   */
  static int
-nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
  {
         struct nfs_open_context *ctx = nfs_file_open_context(file);
         struct inode *inode = dentry->d_inode;
  
-       dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+       dprintk("NFS: fsync file(%s/%s) datasync %d\n",
+                       dentry->d_parent->d_name.name, dentry->d_name.name,
+                       datasync);
  
         nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
         return nfs_do_fsync(ctx, inode);
@@ -328,6 +346,11 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
         struct page *page;
         index = pos >> PAGE_CACHE_SHIFT;
  
+       dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+               file->f_path.dentry->d_parent->d_name.name,
+               file->f_path.dentry->d_name.name,
+               mapping->host->i_ino, len, (long long) pos);
+
         page = __grab_cache_page(mapping, index);
         if (!page)
                 return -ENOMEM;
@@ -348,9 +371,32 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
         unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
         int status;
  
-       lock_kernel();
+       dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+               file->f_path.dentry->d_parent->d_name.name,
+               file->f_path.dentry->d_name.name,
+               mapping->host->i_ino, len, (long long) pos);
+
+       /*
+        * Zero any uninitialised parts of the page, and then mark the page
+        * as up to date if it turns out that we're extending the file.
+        */
+       if (!PageUptodate(page)) {
+               unsigned pglen = nfs_page_length(page);
+               unsigned end = offset + len;
+
+               if (pglen == 0) {
+                       zero_user_segments(page, 0, offset,
+                                       end, PAGE_CACHE_SIZE);
+                       SetPageUptodate(page);
+               } else if (end >= pglen) {
+                       zero_user_segment(page, end, PAGE_CACHE_SIZE);
+                       if (offset == 0)
+                               SetPageUptodate(page);
+               } else
+                       zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+       }
+
         status = nfs_updatepage(file, page, offset, copied);
-       unlock_kernel();
  
         unlock_page(page);
         page_cache_release(page);
@@ -362,6 +408,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
  
  static void nfs_invalidate_page(struct page *page, unsigned long offset)
  {
+       dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset);
+
         if (offset != 0)
                 return;
         /* Cancel any unstarted writes on this page */
@@ -370,13 +418,20 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
  
  static int nfs_release_page(struct page *page, gfp_t gfp)
  {
+       dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
+
         /* If PagePrivate() is set, then the page is not freeable */
         return 0;
  }
  
  static int nfs_launder_page(struct page *page)
  {
-       return nfs_wb_page(page->mapping->host, page);
+       struct inode *inode = page->mapping->host;
+
+       dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
+               inode->i_ino, (long long)page_offset(page));
+
+       return nfs_wb_page(inode, page);
  }
  
  const struct address_space_operations nfs_file_aops = {
@@ -396,13 +451,19 @@ const struct address_space_operations nfs_file_aops = {
  static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
  {
         struct file *filp = vma->vm_file;
+       struct dentry *dentry = filp->f_path.dentry;
         unsigned pagelen;
         int ret = -EINVAL;
         struct address_space *mapping;
  
+       dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
+               dentry->d_parent->d_name.name, dentry->d_name.name,
+               filp->f_mapping->host->i_ino,
+               (long long)page_offset(page));
+
         lock_page(page);
         mapping = page->mapping;
-       if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
+       if (mapping != dentry->d_inode->i_mapping)
                 goto out_unlock;
  
         ret = 0;
@@ -450,9 +511,9 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
         if (iocb->ki_filp->f_flags & O_DIRECT)
                 return nfs_file_direct_write(iocb, iov, nr_segs, pos);
  
-       dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
+       dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
                 dentry->d_parent->d_name.name, dentry->d_name.name,
-               inode->i_ino, (unsigned long) count, (long long) pos);
+               (unsigned long) count, (long long) pos);
  
         result = -EBUSY;
         if (IS_SWAPFILE(inode))
@@ -586,7 +647,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
          * This makes locking act as a cache coherency point.
          */
         nfs_sync_mapping(filp->f_mapping);
-       nfs_zap_caches(inode);
+       if (!nfs_have_delegation(inode, FMODE_READ))
+               nfs_zap_caches(inode);
  out:
         return status;
  }
@@ -596,23 +658,35 @@ out:
   */
  static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
  {
-       struct inode * inode = filp->f_mapping->host;
+       struct inode *inode = filp->f_mapping->host;
+       int ret = -ENOLCK;
  
-       dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
-                       inode->i_sb->s_id, inode->i_ino,
+       dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
+                       filp->f_path.dentry->d_parent->d_name.name,
+                       filp->f_path.dentry->d_name.name,
                         fl->fl_type, fl->fl_flags,
                         (long long)fl->fl_start, (long long)fl->fl_end);
+
         nfs_inc_stats(inode, NFSIOS_VFSLOCK);
  
         /* No mandatory locks over NFS */
         if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
-               return -ENOLCK;
+               goto out_err;
+
+       if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
+               ret = NFS_PROTO(inode)->lock_check_bounds(fl);
+               if (ret < 0)
+                       goto out_err;
+       }
  
         if (IS_GETLK(cmd))
-               return do_getlk(filp, cmd, fl);
-       if (fl->fl_type == F_UNLCK)
-               return do_unlk(filp, cmd, fl);
-       return do_setlk(filp, cmd, fl);
+               ret = do_getlk(filp, cmd, fl);
+       else if (fl->fl_type == F_UNLCK)
+               ret = do_unlk(filp, cmd, fl);
+       else
+               ret = do_setlk(filp, cmd, fl);
+out_err:
+       return ret;
  }
  
  /*
@@ -620,9 +694,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
   */
  static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
  {
-       dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
-                       filp->f_path.dentry->d_inode->i_sb->s_id,
-                       filp->f_path.dentry->d_inode->i_ino,
+       dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
+                       filp->f_path.dentry->d_parent->d_name.name,
+                       filp->f_path.dentry->d_name.name,
                         fl->fl_type, fl->fl_flags);
  
         /*
@@ -645,12 +719,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
         return do_setlk(filp, cmd, fl);
  }
  
+/*
+ * There is no protocol support for leases, so we have no way to implement
+ * them correctly in the face of opens by other clients.
+ */
  static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
  {
-       /*
-        * There is no protocol support for leases, so we have no way
-        * to implement them correctly in the face of opens by other
-        * clients.
-        */
+       dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
+                       file->f_path.dentry->d_parent->d_name.name,
+                       file->f_path.dentry->d_name.name, arg);
+
         return -EINVAL;
  }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index 596c5d8e86f4783f78699ae685911958699162d5..df23f987da6beeb006496ece538f20765599acb7 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
  static void nfs_invalidate_inode(struct inode *);
  static int nfs_update_inode(struct inode *, struct nfs_fattr *);
  
-static void nfs_zap_acl_cache(struct inode *);
-
  static struct kmem_cache * nfs_inode_cachep;
  
  static inline unsigned long
@@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
         }
  }
  
-static void nfs_zap_acl_cache(struct inode *inode)
+void nfs_zap_acl_cache(struct inode *inode)
  {
         void (*clear_acl_cache)(struct inode *);
  
@@ -347,7 +345,7 @@ out_no_inode:
         goto out;
  }
  
-#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET)
+#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE)
  
  int
  nfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -369,10 +367,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
  
         /* Optimization: if the end result is no change, don't RPC */
         attr->ia_valid &= NFS_VALID_ATTRS;
-       if (attr->ia_valid == 0)
+       if ((attr->ia_valid & ~ATTR_FILE) == 0)
                 return 0;
  
-       lock_kernel();
         /* Write all dirty data */
         if (S_ISREG(inode->i_mode)) {
                 filemap_write_and_wait(inode->i_mapping);
@@ -386,10 +383,65 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
         error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
         if (error == 0)
                 nfs_refresh_inode(inode, &fattr);
-       unlock_kernel();
         return error;
  }
  
+/**
+ * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * This is a copy of the common vmtruncate, but with the locking
+ * corrected to take into account the fact that NFS requires
+ * inode->i_size to be updated under the inode->i_lock.
+ */
+static int nfs_vmtruncate(struct inode * inode, loff_t offset)
+{
+       if (i_size_read(inode) < offset) {
+               unsigned long limit;
+
+               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+               if (limit != RLIM_INFINITY && offset > limit)
+                       goto out_sig;
+               if (offset > inode->i_sb->s_maxbytes)
+                       goto out_big;
+               spin_lock(&inode->i_lock);
+               i_size_write(inode, offset);
+               spin_unlock(&inode->i_lock);
+       } else {
+               struct address_space *mapping = inode->i_mapping;
+
+               /*
+                * truncation of in-use swapfiles is disallowed - it would
+                * cause subsequent swapout to scribble on the now-freed
+                * blocks.
+                */
+               if (IS_SWAPFILE(inode))
+                       return -ETXTBSY;
+               spin_lock(&inode->i_lock);
+               i_size_write(inode, offset);
+               spin_unlock(&inode->i_lock);
+
+               /*
+                * unmap_mapping_range is called twice, first simply for
+                * efficiency so that truncate_inode_pages does fewer
+                * single-page unmaps.  However after this first call, and
+                * before truncate_inode_pages finishes, it is possible for
+                * private pages to be COWed, which remain after
+                * truncate_inode_pages finishes, hence the second
+                * unmap_mapping_range call must be made for correctness.
+                */
+               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+               truncate_inode_pages(mapping, offset);
+               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+       }
+       return 0;
+out_sig:
+       send_sig(SIGXFSZ, current, 0);
+out_big:
+       return -EFBIG;
+}
+
  /**
   * nfs_setattr_update_inode - Update inode metadata after a setattr call.
   * @inode: pointer to struct inode
@@ -416,8 +468,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
         }
         if ((attr->ia_valid & ATTR_SIZE) != 0) {
                 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
-               inode->i_size = attr->ia_size;
-               vmtruncate(inode, attr->ia_size);
+               nfs_vmtruncate(inode, attr->ia_size);
         }
  }
  
@@ -647,7 +698,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
                 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
  
         nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
-       lock_kernel();
         if (is_bad_inode(inode))
                 goto out_nowait;
         if (NFS_STALE(inode))
@@ -696,7 +746,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
         nfs_wake_up_inode(inode);
  
   out_nowait:
-       unlock_kernel();
         return status;
  }
  
@@ -831,9 +880,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         if (S_ISDIR(inode->i_mode))
                                 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
                 }
-               if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
+               if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
                     nfsi->npages == 0)
-                       inode->i_size = nfs_size_to_loff_t(fattr->size);
+                       i_size_write(inode, nfs_size_to_loff_t(fattr->size));
         }
  }
  
@@ -974,7 +1023,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
                         (fattr->valid & NFS_ATTR_WCC) == 0) {
                 memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
                 memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
-               fattr->pre_size = inode->i_size;
+               fattr->pre_size = i_size_read(inode);
                 fattr->valid |= NFS_ATTR_WCC;
         }
         return nfs_post_op_update_inode(inode, fattr);
@@ -1059,7 +1108,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                 /* Do we perhaps have any outstanding writes, or has
                  * the file grown beyond our last write? */
                 if (nfsi->npages == 0 || new_isize > cur_isize) {
-                       inode->i_size = new_isize;
+                       i_size_write(inode, new_isize);
                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
                 }
                 dprintk("NFS: isize change on server for file %s/%ld\n",
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h

index 04ae867dddba43a8267cb09771f07f057fa7ab59..24241fcbb98d7647fb6b07fcda9a9e0aee10139a 100644 (file)
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -150,6 +150,7 @@ extern void nfs_clear_inode(struct inode *);
  #ifdef CONFIG_NFS_V4
  extern void nfs4_clear_inode(struct inode *);
  #endif
+void nfs_zap_acl_cache(struct inode *inode);
  
  /* super.c */
  extern struct file_system_type nfs_xdev_fs_type;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h

index 6350ecbde589a8a976867965815d12c5960af307..a36952810032f8c492acb4e1f4f96c7494250b3f 100644 (file)
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -5,135 +5,41 @@
   *
   *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
   *
- *  NFS client per-mount statistics provide information about the health of
- *  the NFS client and the health of each NFS mount point.  Generally these
- *  are not for detailed problem diagnosis, but simply to indicate that there
- *  is a problem.
- *
- *  These counters are not meant to be human-readable, but are meant to be
- *  integrated into system monitoring tools such as "sar" and "iostat".  As
- *  such, the counters are sampled by the tools over time, and are never
- *  zeroed after a file system is mounted.  Moving averages can be computed
- *  by the tools by taking the difference between two instantaneous samples
- *  and dividing that by the time between the samples.
   */
  
  #ifndef _NFS_IOSTAT
  #define _NFS_IOSTAT
  
-#define NFS_IOSTAT_VERS                "1.0"
-
-/*
- * NFS byte counters
- *
- * 1.  SERVER - the number of payload bytes read from or written to the
- *     server by the NFS client via an NFS READ or WRITE request.
- *
- * 2.  NORMAL - the number of bytes read or written by applications via
- *     the read(2) and write(2) system call interfaces.
- *
- * 3.  DIRECT - the number of bytes read or written from files opened
- *     with the O_DIRECT flag.
- *
- * These counters give a view of the data throughput into and out of the NFS
- * client.  Comparing the number of bytes requested by an application with the
- * number of bytes the client requests from the server can provide an
- * indication of client efficiency (per-op, cache hits, etc).
- *
- * These counters can also help characterize which access methods are in
- * use.  DIRECT by itself shows whether there is any O_DIRECT traffic.
- * NORMAL + DIRECT shows how much data is going through the system call
- * interface.  A large amount of SERVER traffic without much NORMAL or
- * DIRECT traffic shows that applications are using mapped files.
- *
- * NFS page counters
- *
- * These count the number of pages read or written via nfs_readpage(),
- * nfs_readpages(), or their write equivalents.
- */
-enum nfs_stat_bytecounters {
-       NFSIOS_NORMALREADBYTES = 0,
-       NFSIOS_NORMALWRITTENBYTES,
-       NFSIOS_DIRECTREADBYTES,
-       NFSIOS_DIRECTWRITTENBYTES,
-       NFSIOS_SERVERREADBYTES,
-       NFSIOS_SERVERWRITTENBYTES,
-       NFSIOS_READPAGES,
-       NFSIOS_WRITEPAGES,
-       __NFSIOS_BYTESMAX,
-};
-
-/*
- * NFS event counters
- *
- * These counters provide a low-overhead way of monitoring client activity
- * without enabling NFS trace debugging.  The counters show the rate at
- * which VFS requests are made, and how often the client invalidates its
- * data and attribute caches.  This allows system administrators to monitor
- * such things as how close-to-open is working, and answer questions such
- * as "why are there so many GETATTR requests on the wire?"
- *
- * They also count anamolous events such as short reads and writes, silly
- * renames due to close-after-delete, and operations that change the size
- * of a file (such operations can often be the source of data corruption
- * if applications aren't using file locking properly).
- */
-enum nfs_stat_eventcounters {
-       NFSIOS_INODEREVALIDATE = 0,
-       NFSIOS_DENTRYREVALIDATE,
-       NFSIOS_DATAINVALIDATE,
-       NFSIOS_ATTRINVALIDATE,
-       NFSIOS_VFSOPEN,
-       NFSIOS_VFSLOOKUP,
-       NFSIOS_VFSACCESS,
-       NFSIOS_VFSUPDATEPAGE,
-       NFSIOS_VFSREADPAGE,
-       NFSIOS_VFSREADPAGES,
-       NFSIOS_VFSWRITEPAGE,
-       NFSIOS_VFSWRITEPAGES,
-       NFSIOS_VFSGETDENTS,
-       NFSIOS_VFSSETATTR,
-       NFSIOS_VFSFLUSH,
-       NFSIOS_VFSFSYNC,
-       NFSIOS_VFSLOCK,
-       NFSIOS_VFSRELEASE,
-       NFSIOS_CONGESTIONWAIT,
-       NFSIOS_SETATTRTRUNC,
-       NFSIOS_EXTENDWRITE,
-       NFSIOS_SILLYRENAME,
-       NFSIOS_SHORTREAD,
-       NFSIOS_SHORTWRITE,
-       NFSIOS_DELAY,
-       __NFSIOS_COUNTSMAX,
-};
-
-#ifdef __KERNEL__
-
  #include <linux/percpu.h>
  #include <linux/cache.h>
+#include <linux/nfs_iostat.h>
  
  struct nfs_iostats {
         unsigned long long      bytes[__NFSIOS_BYTESMAX];
         unsigned long           events[__NFSIOS_COUNTSMAX];
  } ____cacheline_aligned;
  
-static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+static inline void nfs_inc_server_stats(const struct nfs_server *server,
+                                       enum nfs_stat_eventcounters stat)
  {
         struct nfs_iostats *iostats;
         int cpu;
  
         cpu = get_cpu();
         iostats = per_cpu_ptr(server->io_stats, cpu);
-       iostats->events[stat] ++;
+       iostats->events[stat]++;
         put_cpu_no_resched();
  }
  
-static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+static inline void nfs_inc_stats(const struct inode *inode,
+                                enum nfs_stat_eventcounters stat)
  {
         nfs_inc_server_stats(NFS_SERVER(inode), stat);
  }
  
-static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+static inline void nfs_add_server_stats(const struct nfs_server *server,
+                                       enum nfs_stat_bytecounters stat,
+                                       unsigned long addend)
  {
         struct nfs_iostats *iostats;
         int cpu;
@@ -144,7 +50,9 @@ static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat
         put_cpu_no_resched();
  }
  
-static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+static inline void nfs_add_stats(const struct inode *inode,
+                                enum nfs_stat_bytecounters stat,
+                                unsigned long addend)
  {
         nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
  }
@@ -160,5 +68,4 @@ static inline void nfs_free_iostats(struct nfs_iostats *stats)
                 free_percpu(stats);
  }
  
-#endif
-#endif
+#endif /* _NFS_IOSTAT */
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c

index 9b7362565c0c3b9b8c5dc1610a2c354305ae4999..423842f51ac91b6c447ce708dd31a2ab6cc513f1 100644 (file)
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -5,6 +5,8 @@
  #include <linux/posix_acl_xattr.h>
  #include <linux/nfsacl.h>
  
+#include "internal.h"
+
  #define NFSDBG_FACILITY        NFSDBG_PROC
  
  ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
         status = nfs_revalidate_inode(server, inode);
         if (status < 0)
                 return ERR_PTR(status);
+       if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+               nfs_zap_acl_cache(inode);
         acl = nfs3_get_cached_acl(inode, type);
         if (acl != ERR_PTR(-EAGAIN))
                 return acl;
@@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
         dprintk("NFS call setacl\n");
         msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
         status = rpc_call_sync(server->client_acl, &msg, 0);
-       spin_lock(&inode->i_lock);
-       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
-       spin_unlock(&inode->i_lock);
+       nfs_access_zap_cache(inode);
+       nfs_zap_acl_cache(inode);
         dprintk("NFS reply setacl: %d\n", status);
  
         /* pages may have been allocated at the xdr layer. */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c

index c3523ad03ed1b6d860b4782878eeb01ccfd071c3..1e750e4574a911e982606265edb63033be870de4 100644 (file)
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -129,6 +129,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
         int     status;
  
         dprintk("NFS call  setattr\n");
+       if (sattr->ia_valid & ATTR_FILE)
+               msg.rpc_cred = nfs_file_cred(sattr->ia_file);
         nfs_fattr_init(fattr);
         status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
         if (status == 0)
@@ -248,6 +250,53 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
         return status;
  }
  
+struct nfs3_createdata {
+       struct rpc_message msg;
+       union {
+               struct nfs3_createargs create;
+               struct nfs3_mkdirargs mkdir;
+               struct nfs3_symlinkargs symlink;
+               struct nfs3_mknodargs mknod;
+       } arg;
+       struct nfs3_diropres res;
+       struct nfs_fh fh;
+       struct nfs_fattr fattr;
+       struct nfs_fattr dir_attr;
+};
+
+static struct nfs3_createdata *nfs3_alloc_createdata(void)
+{
+       struct nfs3_createdata *data;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (data != NULL) {
+               data->msg.rpc_argp = &data->arg;
+               data->msg.rpc_resp = &data->res;
+               data->res.fh = &data->fh;
+               data->res.fattr = &data->fattr;
+               data->res.dir_attr = &data->dir_attr;
+               nfs_fattr_init(data->res.fattr);
+               nfs_fattr_init(data->res.dir_attr);
+       }
+       return data;
+}
+
+static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data)
+{
+       int status;
+
+       status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
+       nfs_post_op_update_inode(dir, data->res.dir_attr);
+       if (status == 0)
+               status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+       return status;
+}
+
+static void nfs3_free_createdata(struct nfs3_createdata *data)
+{
+       kfree(data);
+}
+
  /*
   * Create a regular file.
   * For now, we don't implement O_EXCL.
@@ -256,70 +305,60 @@ static int
  nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  int flags, struct nameidata *nd)
  {
-       struct nfs_fh           fhandle;
-       struct nfs_fattr        fattr;
-       struct nfs_fattr        dir_attr;
-       struct nfs3_createargs  arg = {
-               .fh             = NFS_FH(dir),
-               .name           = dentry->d_name.name,
-               .len            = dentry->d_name.len,
-               .sattr          = sattr,
-       };
-       struct nfs3_diropres    res = {
-               .dir_attr       = &dir_attr,
-               .fh             = &fhandle,
-               .fattr          = &fattr
-       };
-       struct rpc_message msg = {
-               .rpc_proc       = &nfs3_procedures[NFS3PROC_CREATE],
-               .rpc_argp       = &arg,
-               .rpc_resp       = &res,
-       };
+       struct nfs3_createdata *data;
         mode_t mode = sattr->ia_mode;
-       int status;
+       int status = -ENOMEM;
  
         dprintk("NFS call  create %s\n", dentry->d_name.name);
-       arg.createmode = NFS3_CREATE_UNCHECKED;
+
+       data = nfs3_alloc_createdata();
+       if (data == NULL)
+               goto out;
+
+       data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE];
+       data->arg.create.fh = NFS_FH(dir);
+       data->arg.create.name = dentry->d_name.name;
+       data->arg.create.len = dentry->d_name.len;
+       data->arg.create.sattr = sattr;
+
+       data->arg.create.createmode = NFS3_CREATE_UNCHECKED;
         if (flags & O_EXCL) {
-               arg.createmode  = NFS3_CREATE_EXCLUSIVE;
-               arg.verifier[0] = jiffies;
-               arg.verifier[1] = current->pid;
+               data->arg.create.createmode  = NFS3_CREATE_EXCLUSIVE;
+               data->arg.create.verifier[0] = jiffies;
+               data->arg.create.verifier[1] = current->pid;
         }
  
         sattr->ia_mode &= ~current->fs->umask;
  
-again:
-       nfs_fattr_init(&dir_attr);
-       nfs_fattr_init(&fattr);
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       nfs_refresh_inode(dir, &dir_attr);
+       for (;;) {
+               status = nfs3_do_create(dir, dentry, data);
  
-       /* If the server doesn't support the exclusive creation semantics,
-        * try again with simple 'guarded' mode. */
-       if (status == -ENOTSUPP) {
-               switch (arg.createmode) {
+               if (status != -ENOTSUPP)
+                       break;
+               /* If the server doesn't support the exclusive creation
+                * semantics, try again with simple 'guarded' mode. */
+               switch (data->arg.create.createmode) {
                         case NFS3_CREATE_EXCLUSIVE:
-                               arg.createmode = NFS3_CREATE_GUARDED;
+                               data->arg.create.createmode = NFS3_CREATE_GUARDED;
                                 break;
  
                         case NFS3_CREATE_GUARDED:
-                               arg.createmode = NFS3_CREATE_UNCHECKED;
+                               data->arg.create.createmode = NFS3_CREATE_UNCHECKED;
                                 break;
  
                         case NFS3_CREATE_UNCHECKED:
                                 goto out;
                 }
-               goto again;
+               nfs_fattr_init(data->res.dir_attr);
+               nfs_fattr_init(data->res.fattr);
         }
  
-       if (status == 0)
-               status = nfs_instantiate(dentry, &fhandle, &fattr);
         if (status != 0)
                 goto out;
  
         /* When we created the file with exclusive semantics, make
          * sure we set the attributes afterwards. */
-       if (arg.createmode == NFS3_CREATE_EXCLUSIVE) {
+       if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) {
                 dprintk("NFS call  setattr (post-create)\n");
  
                 if (!(sattr->ia_valid & ATTR_ATIME_SET))
@@ -330,14 +369,15 @@ again:
                 /* Note: we could use a guarded setattr here, but I'm
                  * not sure this buys us anything (and I'd have
                  * to revamp the NFSv3 XDR code) */
-               status = nfs3_proc_setattr(dentry, &fattr, sattr);
-               nfs_post_op_update_inode(dentry->d_inode, &fattr);
+               status = nfs3_proc_setattr(dentry, data->res.fattr, sattr);
+               nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);
                 dprintk("NFS reply setattr (post-create): %d\n", status);
+               if (status != 0)
+                       goto out;
         }
-       if (status != 0)
-               goto out;
         status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
  out:
+       nfs3_free_createdata(data);
         dprintk("NFS reply create: %d\n", status);
         return status;
  }
@@ -452,40 +492,28 @@ static int
  nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
                   unsigned int len, struct iattr *sattr)
  {
-       struct nfs_fh fhandle;
-       struct nfs_fattr fattr, dir_attr;
-       struct nfs3_symlinkargs arg = {
-               .fromfh         = NFS_FH(dir),
-               .fromname       = dentry->d_name.name,
-               .fromlen        = dentry->d_name.len,
-               .pages          = &page,
-               .pathlen        = len,
-               .sattr          = sattr
-       };
-       struct nfs3_diropres    res = {
-               .dir_attr       = &dir_attr,
-               .fh             = &fhandle,
-               .fattr          = &fattr
-       };
-       struct rpc_message msg = {
-               .rpc_proc       = &nfs3_procedures[NFS3PROC_SYMLINK],
-               .rpc_argp       = &arg,
-               .rpc_resp       = &res,
-       };
-       int                     status;
+       struct nfs3_createdata *data;
+       int status = -ENOMEM;
  
         if (len > NFS3_MAXPATHLEN)
                 return -ENAMETOOLONG;
  
         dprintk("NFS call  symlink %s\n", dentry->d_name.name);
  
-       nfs_fattr_init(&dir_attr);
-       nfs_fattr_init(&fattr);
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       nfs_post_op_update_inode(dir, &dir_attr);
-       if (status != 0)
+       data = nfs3_alloc_createdata();
+       if (data == NULL)
                 goto out;
-       status = nfs_instantiate(dentry, &fhandle, &fattr);
+       data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK];
+       data->arg.symlink.fromfh = NFS_FH(dir);
+       data->arg.symlink.fromname = dentry->d_name.name;
+       data->arg.symlink.fromlen = dentry->d_name.len;
+       data->arg.symlink.pages = &page;
+       data->arg.symlink.pathlen = len;
+       data->arg.symlink.sattr = sattr;
+
+       status = nfs3_do_create(dir, dentry, data);
+
+       nfs3_free_createdata(data);
  out:
         dprintk("NFS reply symlink: %d\n", status);
         return status;
@@ -494,42 +522,31 @@ out:
  static int
  nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
  {
-       struct nfs_fh fhandle;
-       struct nfs_fattr fattr, dir_attr;
-       struct nfs3_mkdirargs   arg = {
-               .fh             = NFS_FH(dir),
-               .name           = dentry->d_name.name,
-               .len            = dentry->d_name.len,
-               .sattr          = sattr
-       };
-       struct nfs3_diropres    res = {
-               .dir_attr       = &dir_attr,
-               .fh             = &fhandle,
-               .fattr          = &fattr
-       };
-       struct rpc_message msg = {
-               .rpc_proc       = &nfs3_procedures[NFS3PROC_MKDIR],
-               .rpc_argp       = &arg,
-               .rpc_resp       = &res,
-       };
+       struct nfs3_createdata *data;
         int mode = sattr->ia_mode;
-       int status;
+       int status = -ENOMEM;
  
         dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
  
         sattr->ia_mode &= ~current->fs->umask;
  
-       nfs_fattr_init(&dir_attr);
-       nfs_fattr_init(&fattr);
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       nfs_post_op_update_inode(dir, &dir_attr);
-       if (status != 0)
+       data = nfs3_alloc_createdata();
+       if (data == NULL)
                 goto out;
-       status = nfs_instantiate(dentry, &fhandle, &fattr);
+
+       data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
+       data->arg.mkdir.fh = NFS_FH(dir);
+       data->arg.mkdir.name = dentry->d_name.name;
+       data->arg.mkdir.len = dentry->d_name.len;
+       data->arg.mkdir.sattr = sattr;
+
+       status = nfs3_do_create(dir, dentry, data);
         if (status != 0)
                 goto out;
+
         status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
  out:
+       nfs3_free_createdata(data);
         dprintk("NFS reply mkdir: %d\n", status);
         return status;
  }
@@ -615,52 +632,50 @@ static int
  nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 dev_t rdev)
  {
-       struct nfs_fh fh;
-       struct nfs_fattr fattr, dir_attr;
-       struct nfs3_mknodargs   arg = {
-               .fh             = NFS_FH(dir),
-               .name           = dentry->d_name.name,
-               .len            = dentry->d_name.len,
-               .sattr          = sattr,
-               .rdev           = rdev
-       };
-       struct nfs3_diropres    res = {
-               .dir_attr       = &dir_attr,
-               .fh             = &fh,
-               .fattr          = &fattr
-       };
-       struct rpc_message msg = {
-               .rpc_proc       = &nfs3_procedures[NFS3PROC_MKNOD],
-               .rpc_argp       = &arg,
-               .rpc_resp       = &res,
-       };
+       struct nfs3_createdata *data;
         mode_t mode = sattr->ia_mode;
-       int status;
-
-       switch (sattr->ia_mode & S_IFMT) {
-       case S_IFBLK:   arg.type = NF3BLK;  break;
-       case S_IFCHR:   arg.type = NF3CHR;  break;
-       case S_IFIFO:   arg.type = NF3FIFO; break;
-       case S_IFSOCK:  arg.type = NF3SOCK; break;
-       default:        return -EINVAL;
-       }
+       int status = -ENOMEM;
  
         dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
                         MAJOR(rdev), MINOR(rdev));
  
         sattr->ia_mode &= ~current->fs->umask;
  
-       nfs_fattr_init(&dir_attr);
-       nfs_fattr_init(&fattr);
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       nfs_post_op_update_inode(dir, &dir_attr);
-       if (status != 0)
+       data = nfs3_alloc_createdata();
+       if (data == NULL)
                 goto out;
-       status = nfs_instantiate(dentry, &fh, &fattr);
+
+       data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD];
+       data->arg.mknod.fh = NFS_FH(dir);
+       data->arg.mknod.name = dentry->d_name.name;
+       data->arg.mknod.len = dentry->d_name.len;
+       data->arg.mknod.sattr = sattr;
+       data->arg.mknod.rdev = rdev;
+
+       switch (sattr->ia_mode & S_IFMT) {
+       case S_IFBLK:
+               data->arg.mknod.type = NF3BLK;
+               break;
+       case S_IFCHR:
+               data->arg.mknod.type = NF3CHR;
+               break;
+       case S_IFIFO:
+               data->arg.mknod.type = NF3FIFO;
+               break;
+       case S_IFSOCK:
+               data->arg.mknod.type = NF3SOCK;
+               break;
+       default:
+               status = -EINVAL;
+               goto out;
+       }
+
+       status = nfs3_do_create(dir, dentry, data);
         if (status != 0)
                 goto out;
         status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
  out:
+       nfs3_free_createdata(data);
         dprintk("NFS reply mknod: %d\n", status);
         return status;
  }
@@ -801,8 +816,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
         .write_done     = nfs3_write_done,
         .commit_setup   = nfs3_proc_commit_setup,
         .commit_done    = nfs3_commit_done,
-       .file_open      = nfs_open,
-       .file_release   = nfs_release,
         .lock           = nfs3_proc_lock,
         .clear_acl_cache = nfs3_forget_cached_acls,
  };
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 1293e0acd82b7ab80604a9077277f0f4f97c333d..c910413eaecafd80ff2c6012dda6baad388cc6dd 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -451,9 +451,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
                 /* Save the delegation */
                 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
                 rcu_read_unlock();
-               lock_kernel();
                 ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
-               unlock_kernel();
                 if (ret != 0)
                         goto out;
                 ret = -EAGAIN;
@@ -1139,8 +1137,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int
         return res;
  }
  
-static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
-                struct iattr *sattr, struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+                           struct nfs_fattr *fattr, struct iattr *sattr,
+                           struct nfs4_state *state)
  {
         struct nfs_server *server = NFS_SERVER(inode);
          struct nfs_setattrargs  arg = {
@@ -1154,9 +1153,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
                 .server         = server,
          };
          struct rpc_message msg = {
-                .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
-                .rpc_argp       = &arg,
-                .rpc_resp       = &res,
+               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+               .rpc_cred       = cred,
          };
         unsigned long timestamp = jiffies;
         int status;
@@ -1166,7 +1166,6 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
         if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
                 /* Use that stateid */
         } else if (state != NULL) {
-               msg.rpc_cred = state->owner->so_cred;
                 nfs4_copy_stateid(&arg.stateid, state, current->files);
         } else
                 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
@@ -1177,15 +1176,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
         return status;
  }
  
-static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
-                struct iattr *sattr, struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+                          struct nfs_fattr *fattr, struct iattr *sattr,
+                          struct nfs4_state *state)
  {
         struct nfs_server *server = NFS_SERVER(inode);
         struct nfs4_exception exception = { };
         int err;
         do {
                 err = nfs4_handle_exception(server,
-                               _nfs4_do_setattr(inode, fattr, sattr, state),
+                               _nfs4_do_setattr(inode, cred, fattr, sattr, state),
                                 &exception);
         } while (exception.retry);
         return err;
@@ -1647,29 +1647,25 @@ static int
  nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                   struct iattr *sattr)
  {
-       struct rpc_cred *cred;
         struct inode *inode = dentry->d_inode;
-       struct nfs_open_context *ctx;
+       struct rpc_cred *cred = NULL;
         struct nfs4_state *state = NULL;
         int status;
  
         nfs_fattr_init(fattr);
         
-       cred = rpc_lookup_cred();
-       if (IS_ERR(cred))
-               return PTR_ERR(cred);
-
         /* Search for an existing open(O_WRITE) file */
-       ctx = nfs_find_open_context(inode, cred, FMODE_WRITE);
-       if (ctx != NULL)
+       if (sattr->ia_valid & ATTR_FILE) {
+               struct nfs_open_context *ctx;
+
+               ctx = nfs_file_open_context(sattr->ia_file);
+               cred = ctx->cred;
                 state = ctx->state;
+       }
  
-       status = nfs4_do_setattr(inode, fattr, sattr, state);
+       status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
         if (status == 0)
                 nfs_setattr_update_inode(inode, sattr);
-       if (ctx != NULL)
-               put_nfs_open_context(ctx);
-       put_rpccred(cred);
         return status;
  }
  
@@ -1897,17 +1893,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 goto out;
         }
         state = nfs4_do_open(dir, &path, flags, sattr, cred);
-       put_rpccred(cred);
         d_drop(dentry);
         if (IS_ERR(state)) {
                 status = PTR_ERR(state);
-               goto out;
+               goto out_putcred;
         }
         d_add(dentry, igrab(state->inode));
         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
         if (flags & O_EXCL) {
                 struct nfs_fattr fattr;
-               status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
+               status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);
                 if (status == 0)
                         nfs_setattr_update_inode(state->inode, sattr);
                 nfs_post_op_update_inode(state->inode, &fattr);
@@ -1916,6 +1911,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 status = nfs4_intent_set_file(nd, &path, state);
         else
                 nfs4_close_sync(&path, state, flags);
+out_putcred:
+       put_rpccred(cred);
  out:
         return status;
  }
@@ -2079,47 +2076,81 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
         return err;
  }
  
+struct nfs4_createdata {
+       struct rpc_message msg;
+       struct nfs4_create_arg arg;
+       struct nfs4_create_res res;
+       struct nfs_fh fh;
+       struct nfs_fattr fattr;
+       struct nfs_fattr dir_fattr;
+};
+
+static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
+               struct qstr *name, struct iattr *sattr, u32 ftype)
+{
+       struct nfs4_createdata *data;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (data != NULL) {
+               struct nfs_server *server = NFS_SERVER(dir);
+
+               data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE];
+               data->msg.rpc_argp = &data->arg;
+               data->msg.rpc_resp = &data->res;
+               data->arg.dir_fh = NFS_FH(dir);
+               data->arg.server = server;
+               data->arg.name = name;
+               data->arg.attrs = sattr;
+               data->arg.ftype = ftype;
+               data->arg.bitmask = server->attr_bitmask;
+               data->res.server = server;
+               data->res.fh = &data->fh;
+               data->res.fattr = &data->fattr;
+               data->res.dir_fattr = &data->dir_fattr;
+               nfs_fattr_init(data->res.fattr);
+               nfs_fattr_init(data->res.dir_fattr);
+       }
+       return data;
+}
+
+static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
+{
+       int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
+       if (status == 0) {
+               update_changeattr(dir, &data->res.dir_cinfo);
+               nfs_post_op_update_inode(dir, data->res.dir_fattr);
+               status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+       }
+       return status;
+}
+
+static void nfs4_free_createdata(struct nfs4_createdata *data)
+{
+       kfree(data);
+}
+
  static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
                 struct page *page, unsigned int len, struct iattr *sattr)
  {
-       struct nfs_server *server = NFS_SERVER(dir);
-       struct nfs_fh fhandle;
-       struct nfs_fattr fattr, dir_fattr;
-       struct nfs4_create_arg arg = {
-               .dir_fh = NFS_FH(dir),
-               .server = server,
-               .name = &dentry->d_name,
-               .attrs = sattr,
-               .ftype = NF4LNK,
-               .bitmask = server->attr_bitmask,
-       };
-       struct nfs4_create_res res = {
-               .server = server,
-               .fh = &fhandle,
-               .fattr = &fattr,
-               .dir_fattr = &dir_fattr,
-       };
-       struct rpc_message msg = {
-               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
-               .rpc_argp = &arg,
-               .rpc_resp = &res,
-       };
-       int                     status;
+       struct nfs4_createdata *data;
+       int status = -ENAMETOOLONG;
  
         if (len > NFS4_MAXPATHLEN)
-               return -ENAMETOOLONG;
+               goto out;
  
-       arg.u.symlink.pages = &page;
-       arg.u.symlink.len = len;
-       nfs_fattr_init(&fattr);
-       nfs_fattr_init(&dir_fattr);
+       status = -ENOMEM;
+       data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4LNK);
+       if (data == NULL)
+               goto out;
+
+       data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK];
+       data->arg.u.symlink.pages = &page;
+       data->arg.u.symlink.len = len;
         
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       if (!status) {
-               update_changeattr(dir, &res.dir_cinfo);
-               nfs_post_op_update_inode(dir, res.dir_fattr);
-               status = nfs_instantiate(dentry, &fhandle, &fattr);
-       }
+       status = nfs4_do_create(dir, dentry, data);
+
+       nfs4_free_createdata(data);
+out:
         return status;
  }
  
@@ -2140,39 +2171,17 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
  static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
                 struct iattr *sattr)
  {
-       struct nfs_server *server = NFS_SERVER(dir);
-       struct nfs_fh fhandle;
-       struct nfs_fattr fattr, dir_fattr;
-       struct nfs4_create_arg arg = {
-               .dir_fh = NFS_FH(dir),
-               .server = server,
-               .name = &dentry->d_name,
-               .attrs = sattr,
-               .ftype = NF4DIR,
-               .bitmask = server->attr_bitmask,
-       };
-       struct nfs4_create_res res = {
-               .server = server,
-               .fh = &fhandle,
-               .fattr = &fattr,
-               .dir_fattr = &dir_fattr,
-       };
-       struct rpc_message msg = {
-               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
-               .rpc_argp = &arg,
-               .rpc_resp = &res,
-       };
-       int                     status;
+       struct nfs4_createdata *data;
+       int status = -ENOMEM;
  
-       nfs_fattr_init(&fattr);
-       nfs_fattr_init(&dir_fattr);
-       
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       if (!status) {
-               update_changeattr(dir, &res.dir_cinfo);
-               nfs_post_op_update_inode(dir, res.dir_fattr);
-               status = nfs_instantiate(dentry, &fhandle, &fattr);
-       }
+       data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR);
+       if (data == NULL)
+               goto out;
+
+       status = nfs4_do_create(dir, dentry, data);
+
+       nfs4_free_createdata(data);
+out:
         return status;
  }
  
@@ -2242,56 +2251,34 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
  static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
                 struct iattr *sattr, dev_t rdev)
  {
-       struct nfs_server *server = NFS_SERVER(dir);
-       struct nfs_fh fh;
-       struct nfs_fattr fattr, dir_fattr;
-       struct nfs4_create_arg arg = {
-               .dir_fh = NFS_FH(dir),
-               .server = server,
-               .name = &dentry->d_name,
-               .attrs = sattr,
-               .bitmask = server->attr_bitmask,
-       };
-       struct nfs4_create_res res = {
-               .server = server,
-               .fh = &fh,
-               .fattr = &fattr,
-               .dir_fattr = &dir_fattr,
-       };
-       struct rpc_message msg = {
-               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
-               .rpc_argp = &arg,
-               .rpc_resp = &res,
-       };
-       int                     status;
-       int                     mode = sattr->ia_mode;
-
-       nfs_fattr_init(&fattr);
-       nfs_fattr_init(&dir_fattr);
+       struct nfs4_createdata *data;
+       int mode = sattr->ia_mode;
+       int status = -ENOMEM;
  
         BUG_ON(!(sattr->ia_valid & ATTR_MODE));
         BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+
+       data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK);
+       if (data == NULL)
+               goto out;
+
         if (S_ISFIFO(mode))
-               arg.ftype = NF4FIFO;
+               data->arg.ftype = NF4FIFO;
         else if (S_ISBLK(mode)) {
-               arg.ftype = NF4BLK;
-               arg.u.device.specdata1 = MAJOR(rdev);
-               arg.u.device.specdata2 = MINOR(rdev);
+               data->arg.ftype = NF4BLK;
+               data->arg.u.device.specdata1 = MAJOR(rdev);
+               data->arg.u.device.specdata2 = MINOR(rdev);
         }
         else if (S_ISCHR(mode)) {
-               arg.ftype = NF4CHR;
-               arg.u.device.specdata1 = MAJOR(rdev);
-               arg.u.device.specdata2 = MINOR(rdev);
+               data->arg.ftype = NF4CHR;
+               data->arg.u.device.specdata1 = MAJOR(rdev);
+               data->arg.u.device.specdata2 = MINOR(rdev);
         }
-       else
-               arg.ftype = NF4SOCK;
         
-       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-       if (status == 0) {
-               update_changeattr(dir, &res.dir_cinfo);
-               nfs_post_op_update_inode(dir, res.dir_fattr);
-               status = nfs_instantiate(dentry, &fh, &fattr);
-       }
+       status = nfs4_do_create(dir, dentry, data);
+
+       nfs4_free_createdata(data);
+out:
         return status;
  }
  
@@ -2706,6 +2693,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
         ret = nfs_revalidate_inode(server, inode);
         if (ret < 0)
                 return ret;
+       if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+               nfs_zap_acl_cache(inode);
         ret = nfs4_read_cached_acl(inode, buf, buflen);
         if (ret != -ENOENT)
                 return ret;
@@ -2733,7 +2722,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
         nfs_inode_return_delegation(inode);
         buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
         ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-       nfs_zap_caches(inode);
+       nfs_access_zap_cache(inode);
+       nfs_zap_acl_cache(inode);
         return ret;
  }
  
@@ -2767,8 +2757,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
                         task->tk_status = 0;
                         return -EAGAIN;
                 case -NFS4ERR_DELAY:
-                       nfs_inc_server_stats((struct nfs_server *) server,
-                                               NFSIOS_DELAY);
+                       nfs_inc_server_stats(server, NFSIOS_DELAY);
                 case -NFS4ERR_GRACE:
                         rpc_delay(task, NFS4_POLL_RETRY_MAX);
                         task->tk_status = 0;
@@ -2933,7 +2922,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre
  
  int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
  {
-       long timeout;
+       long timeout = 0;
         int err;
         do {
                 err = _nfs4_proc_setclientid_confirm(clp, cred);
@@ -3725,8 +3714,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
         .write_done     = nfs4_write_done,
         .commit_setup   = nfs4_proc_commit_setup,
         .commit_done    = nfs4_commit_done,
-       .file_open      = nfs_open,
-       .file_release   = nfs_release,
         .lock           = nfs4_proc_lock,
         .clear_acl_cache = nfs4_zap_acl_attr,
  };
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index 856a8934f610ef4addd8ea4b51deb5d918dd5e53..401ef8b28f979231cc291ad016586abd353eff42 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -940,7 +940,6 @@ static int reclaimer(void *ptr)
         allow_signal(SIGKILL);
  
         /* Ensure exclusive access to NFSv4 state */
-       lock_kernel();
         down_write(&clp->cl_sem);
         /* Are there any NFS mounts out there? */
         if (list_empty(&clp->cl_superblocks))
@@ -1000,7 +999,6 @@ restart_loop:
         nfs_delegation_reap_unclaimed(clp);
  out:
         up_write(&clp->cl_sem);
-       unlock_kernel();
         if (status == -NFS4ERR_CB_PATH_DOWN)
                 nfs_handle_cb_pathdown(clp);
         nfs4_clear_recover_bit(clp);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c

index 531379d36823032c59740eecd738411cd4575593..46763d1cd3976bca27f4d5dca5d3dd9e36d3453b 100644 (file)
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -1,6 +1,4 @@
  /*
- *  $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
- *
   *  Copyright (C) 1995, 1996  Gero Kuhlmann <gero@gkminix.han.de>
   *
   *  Allow an NFS filesystem to be mounted as root. The way this works is:
@@ -297,10 +295,10 @@ static int __init root_nfs_name(char *name)
         nfs_data.flags    = NFS_MOUNT_NONLM;    /* No lockd in nfs root yet */
         nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;
         nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE;
-       nfs_data.acregmin = 3;
-       nfs_data.acregmax = 60;
-       nfs_data.acdirmin = 30;
-       nfs_data.acdirmax = 60;
+       nfs_data.acregmin = NFS_DEF_ACREGMIN;
+       nfs_data.acregmax = NFS_DEF_ACREGMAX;
+       nfs_data.acdirmin = NFS_DEF_ACDIRMIN;
+       nfs_data.acdirmax = NFS_DEF_ACDIRMAX;
         strcpy(buf, NFS_ROOT);
  
         /* Process options received from the remote server */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c

index 03599bfe81cf878fb0a65f233b80aeb184146bb4..4dbb84df1b686995346cf352bc8107b5dd424028 100644 (file)
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -129,6 +129,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
         sattr->ia_mode &= S_IALLUGO;
  
         dprintk("NFS call  setattr\n");
+       if (sattr->ia_valid & ATTR_FILE)
+               msg.rpc_cred = nfs_file_cred(sattr->ia_file);
         nfs_fattr_init(fattr);
         status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
         if (status == 0)
@@ -598,6 +600,29 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
         return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
  }
  
+/* Helper functions for NFS lock bounds checking */
+#define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL)
+static int nfs_lock_check_bounds(const struct file_lock *fl)
+{
+       __s32 start, end;
+
+       start = (__s32)fl->fl_start;
+       if ((loff_t)start != fl->fl_start)
+               goto out_einval;
+
+       if (fl->fl_end != OFFSET_MAX) {
+               end = (__s32)fl->fl_end;
+               if ((loff_t)end != fl->fl_end)
+                       goto out_einval;
+       } else
+               end = NFS_LOCK32_OFFSET_MAX;
+
+       if (start < 0 || start > end)
+               goto out_einval;
+       return 0;
+out_einval:
+       return -EINVAL;
+}
  
  const struct nfs_rpc_ops nfs_v2_clientops = {
         .version        = 2,                   /* protocol version */
@@ -630,7 +655,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
         .write_setup    = nfs_proc_write_setup,
         .write_done     = nfs_write_done,
         .commit_setup   = nfs_proc_commit_setup,
-       .file_open      = nfs_open,
-       .file_release   = nfs_release,
         .lock           = nfs_proc_lock,
+       .lock_check_bounds = nfs_lock_check_bounds,
  };
diff --git a/fs/nfs/super.c b/fs/nfs/super.c

index 614efeed5437f95da60a43563130593aa0d02d32..1b94e3650f5cb343d241d372d0d03d85ed42528d 100644 (file)
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -47,6 +47,7 @@
  #include <linux/inet.h>
  #include <linux/in6.h>
  #include <net/ipv6.h>
+#include <linux/netdevice.h>
  #include <linux/nfs_xdr.h>
  #include <linux/magic.h>
  #include <linux/parser.h>
@@ -65,7 +66,6 @@
  enum {
         /* Mount options that take no arguments */
         Opt_soft, Opt_hard,
-       Opt_intr, Opt_nointr,
         Opt_posix, Opt_noposix,
         Opt_cto, Opt_nocto,
         Opt_ac, Opt_noac,
@@ -92,8 +92,8 @@ enum {
         Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
         Opt_addr, Opt_mountaddr, Opt_clientaddr,
  
-       /* Mount options that are ignored */
-       Opt_userspace, Opt_deprecated,
+       /* Special mount options */
+       Opt_userspace, Opt_deprecated, Opt_sloppy,
  
         Opt_err
  };
@@ -101,10 +101,14 @@ enum {
  static match_table_t nfs_mount_option_tokens = {
         { Opt_userspace, "bg" },
         { Opt_userspace, "fg" },
+       { Opt_userspace, "retry=%s" },
+
+       { Opt_sloppy, "sloppy" },
+
         { Opt_soft, "soft" },
         { Opt_hard, "hard" },
-       { Opt_intr, "intr" },
-       { Opt_nointr, "nointr" },
+       { Opt_deprecated, "intr" },
+       { Opt_deprecated, "nointr" },
         { Opt_posix, "posix" },
         { Opt_noposix, "noposix" },
         { Opt_cto, "cto" },
@@ -136,7 +140,6 @@ static match_table_t nfs_mount_option_tokens = {
         { Opt_acdirmin, "acdirmin=%u" },
         { Opt_acdirmax, "acdirmax=%u" },
         { Opt_actimeo, "actimeo=%u" },
-       { Opt_userspace, "retry=%u" },
         { Opt_namelen, "namlen=%u" },
         { Opt_mountport, "mountport=%u" },
         { Opt_mountvers, "mountvers=%u" },
@@ -207,6 +210,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type,
                 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
  static void nfs_kill_super(struct super_block *);
  static void nfs_put_super(struct super_block *);
+static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
  
  static struct file_system_type nfs_fs_type = {
         .owner          = THIS_MODULE,
@@ -234,6 +238,7 @@ static const struct super_operations nfs_sops = {
         .umount_begin   = nfs_umount_begin,
         .show_options   = nfs_show_options,
         .show_stats     = nfs_show_stats,
+       .remount_fs     = nfs_remount,
  };
  
  #ifdef CONFIG_NFS_V4
@@ -278,6 +283,7 @@ static const struct super_operations nfs4_sops = {
         .umount_begin   = nfs_umount_begin,
         .show_options   = nfs_show_options,
         .show_stats     = nfs_show_stats,
+       .remount_fs     = nfs_remount,
  };
  #endif
  
@@ -368,8 +374,6 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
         };
         int error;
  
-       lock_kernel();
-
         error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
         if (error < 0)
                 goto out_err;
@@ -401,12 +405,10 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  
         buf->f_namelen = server->namelen;
  
-       unlock_kernel();
         return 0;
  
   out_err:
         dprintk("%s: statfs error = %d\n", __func__, -error);
-       unlock_kernel();
         return error;
  }
  
@@ -514,13 +516,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
         if (nfss->bsize != 0)
                 seq_printf(m, ",bsize=%u", nfss->bsize);
         seq_printf(m, ",namlen=%u", nfss->namelen);
-       if (nfss->acregmin != 3*HZ || showdefaults)
+       if (nfss->acregmin != NFS_DEF_ACREGMIN*HZ || showdefaults)
                 seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ);
-       if (nfss->acregmax != 60*HZ || showdefaults)
+       if (nfss->acregmax != NFS_DEF_ACREGMAX*HZ || showdefaults)
                 seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ);
-       if (nfss->acdirmin != 30*HZ || showdefaults)
+       if (nfss->acdirmin != NFS_DEF_ACDIRMIN*HZ || showdefaults)
                 seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
-       if (nfss->acdirmax != 60*HZ || showdefaults)
+       if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)
                 seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
         for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
                 if (nfss->flags & nfs_infop->flag)
@@ -702,49 +704,233 @@ static int nfs_verify_server_address(struct sockaddr *addr)
         return 0;
  }
  
+static void nfs_parse_ipv4_address(char *string, size_t str_len,
+                                  struct sockaddr *sap, size_t *addr_len)
+{
+       struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+       u8 *addr = (u8 *)&sin->sin_addr.s_addr;
+
+       if (str_len <= INET_ADDRSTRLEN) {
+               dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n",
+                               (int)str_len, string);
+
+               sin->sin_family = AF_INET;
+               *addr_len = sizeof(*sin);
+               if (in4_pton(string, str_len, addr, '\0', NULL))
+                       return;
+       }
+
+       sap->sa_family = AF_UNSPEC;
+       *addr_len = 0;
+}
+
+#define IPV6_SCOPE_DELIMITER   '%'
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
+                                   const char *delim,
+                                   struct sockaddr_in6 *sin6)
+{
+       char *p;
+       size_t len;
+
+       if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+               return ;
+       if (*delim != IPV6_SCOPE_DELIMITER)
+               return;
+
+       len = (string + str_len) - delim - 1;
+       p = kstrndup(delim + 1, len, GFP_KERNEL);
+       if (p) {
+               unsigned long scope_id = 0;
+               struct net_device *dev;
+
+               dev = dev_get_by_name(&init_net, p);
+               if (dev != NULL) {
+                       scope_id = dev->ifindex;
+                       dev_put(dev);
+               } else {
+                       /* scope_id is set to zero on error */
+                       strict_strtoul(p, 10, &scope_id);
+               }
+
+               kfree(p);
+               sin6->sin6_scope_id = scope_id;
+               dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
+       }
+}
+
+static void nfs_parse_ipv6_address(char *string, size_t str_len,
+                                  struct sockaddr *sap, size_t *addr_len)
+{
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+       u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
+       const char *delim;
+
+       if (str_len <= INET6_ADDRSTRLEN) {
+               dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n",
+                               (int)str_len, string);
+
+               sin6->sin6_family = AF_INET6;
+               *addr_len = sizeof(*sin6);
+               if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) {
+                       nfs_parse_ipv6_scope_id(string, str_len, delim, sin6);
+                       return;
+               }
+       }
+
+       sap->sa_family = AF_UNSPEC;
+       *addr_len = 0;
+}
+#else
+static void nfs_parse_ipv6_address(char *string, size_t str_len,
+                                  struct sockaddr *sap, size_t *addr_len)
+{
+       sap->sa_family = AF_UNSPEC;
+       *addr_len = 0;
+}
+#endif
+
  /*
- * Parse string addresses passed in via a mount option,
- * and construct a sockaddr based on the result.
+ * Construct a sockaddr based on the contents of a string that contains
+ * an IP address in presentation format.
   *
- * If address parsing fails, set the sockaddr's address
- * family to AF_UNSPEC to force nfs_verify_server_address()
- * to punt the mount.
+ * If there is a problem constructing the new sockaddr, set the address
+ * family to AF_UNSPEC.
   */
-static void nfs_parse_server_address(char *value,
-                                    struct sockaddr *sap,
-                                    size_t *len)
+static void nfs_parse_ip_address(char *string, size_t str_len,
+                                struct sockaddr *sap, size_t *addr_len)
  {
-       if (strchr(value, ':')) {
-               struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
-               u8 *addr = (u8 *)&ap->sin6_addr.in6_u;
+       unsigned int i, colons;
  
-               ap->sin6_family = AF_INET6;
-               *len = sizeof(*ap);
-               if (in6_pton(value, -1, addr, '\0', NULL))
-                       return;
-       } else {
-               struct sockaddr_in *ap = (struct sockaddr_in *)sap;
-               u8 *addr = (u8 *)&ap->sin_addr.s_addr;
+       colons = 0;
+       for (i = 0; i < str_len; i++)
+               if (string[i] == ':')
+                       colons++;
+
+       if (colons >= 2)
+               nfs_parse_ipv6_address(string, str_len, sap, addr_len);
+       else
+               nfs_parse_ipv4_address(string, str_len, sap, addr_len);
+}
+
+/*
+ * Sanity check the NFS transport protocol.
+ *
+ */
+static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt)
+{
+       switch (mnt->nfs_server.protocol) {
+       case XPRT_TRANSPORT_UDP:
+       case XPRT_TRANSPORT_TCP:
+       case XPRT_TRANSPORT_RDMA:
+               break;
+       default:
+               mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+       }
+}
+
+/*
+ * For text based NFSv2/v3 mounts, the mount protocol transport default
+ * settings should depend upon the specified NFS transport.
+ */
+static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
+{
+       nfs_validate_transport_protocol(mnt);
  
-               ap->sin_family = AF_INET;
-               *len = sizeof(*ap);
-               if (in4_pton(value, -1, addr, '\0', NULL))
+       if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP ||
+           mnt->mount_server.protocol == XPRT_TRANSPORT_TCP)
                         return;
+       switch (mnt->nfs_server.protocol) {
+       case XPRT_TRANSPORT_UDP:
+               mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
+               break;
+       case XPRT_TRANSPORT_TCP:
+       case XPRT_TRANSPORT_RDMA:
+               mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
         }
+}
  
-       sap->sa_family = AF_UNSPEC;
-       *len = 0;
+/*
+ * Parse the value of the 'sec=' option.
+ *
+ * The flavor_len setting is for v4 mounts.
+ */
+static int nfs_parse_security_flavors(char *value,
+                                     struct nfs_parsed_mount_data *mnt)
+{
+       substring_t args[MAX_OPT_ARGS];
+
+       dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
+
+       switch (match_token(value, nfs_secflavor_tokens, args)) {
+       case Opt_sec_none:
+               mnt->auth_flavor_len = 0;
+               mnt->auth_flavors[0] = RPC_AUTH_NULL;
+               break;
+       case Opt_sec_sys:
+               mnt->auth_flavor_len = 0;
+               mnt->auth_flavors[0] = RPC_AUTH_UNIX;
+               break;
+       case Opt_sec_krb5:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
+               break;
+       case Opt_sec_krb5i:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
+               break;
+       case Opt_sec_krb5p:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
+               break;
+       case Opt_sec_lkey:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
+               break;
+       case Opt_sec_lkeyi:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
+               break;
+       case Opt_sec_lkeyp:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
+               break;
+       case Opt_sec_spkm:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
+               break;
+       case Opt_sec_spkmi:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
+               break;
+       case Opt_sec_spkmp:
+               mnt->auth_flavor_len = 1;
+               mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
+               break;
+       default:
+               return 0;
+       }
+
+       return 1;
+}
+
+static void nfs_parse_invalid_value(const char *option)
+{
+       dfprintk(MOUNT, "NFS:   bad value specified for %s option\n", option);
  }
  
  /*
   * Error-check and convert a string of mount options from user space into
- * a data structure
+ * a data structure.  The whole mount string is processed; bad options are
+ * skipped as they are encountered.  If there were no errors, return 1;
+ * otherwise return 0 (zero).
   */
  static int nfs_parse_mount_options(char *raw,
                                    struct nfs_parsed_mount_data *mnt)
  {
         char *p, *string, *secdata;
-       int rc;
+       int rc, sloppy = 0, errors = 0;
  
         if (!raw) {
                 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -777,15 +963,16 @@ static int nfs_parse_mount_options(char *raw,
  
                 token = match_token(p, nfs_mount_option_tokens, args);
                 switch (token) {
+
+               /*
+                * boolean options:  foo/nofoo
+                */
                 case Opt_soft:
                         mnt->flags |= NFS_MOUNT_SOFT;
                         break;
                 case Opt_hard:
                         mnt->flags &= ~NFS_MOUNT_SOFT;
                         break;
-               case Opt_intr:
-               case Opt_nointr:
-                       break;
                 case Opt_posix:
                         mnt->flags |= NFS_MOUNT_POSIX;
                         break;
@@ -819,20 +1006,14 @@ static int nfs_parse_mount_options(char *raw,
                 case Opt_udp:
                         mnt->flags &= ~NFS_MOUNT_TCP;
                         mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
-                       mnt->timeo = 7;
-                       mnt->retrans = 5;
                         break;
                 case Opt_tcp:
                         mnt->flags |= NFS_MOUNT_TCP;
                         mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-                       mnt->timeo = 600;
-                       mnt->retrans = 2;
                         break;
                 case Opt_rdma:
                         mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
                         mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-                       mnt->timeo = 600;
-                       mnt->retrans = 2;
                         break;
                 case Opt_acl:
                         mnt->flags &= ~NFS_MOUNT_NOACL;
@@ -853,165 +1034,144 @@ static int nfs_parse_mount_options(char *raw,
                         mnt->flags |= NFS_MOUNT_UNSHARED;
                         break;
  
+               /*
+                * options that take numeric values
+                */
                 case Opt_port:
-                       if (match_int(args, &option))
-                               return 0;
-                       if (option < 0 || option > 65535)
-                               return 0;
-                       mnt->nfs_server.port = option;
+                       if (match_int(args, &option) ||
+                           option < 0 || option > USHORT_MAX) {
+                               errors++;
+                               nfs_parse_invalid_value("port");
+                       } else
+                               mnt->nfs_server.port = option;
                         break;
                 case Opt_rsize:
-                       if (match_int(args, &mnt->rsize))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("rsize");
+                       } else
+                               mnt->rsize = option;
                         break;
                 case Opt_wsize:
-                       if (match_int(args, &mnt->wsize))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("wsize");
+                       } else
+                               mnt->wsize = option;
                         break;
                 case Opt_bsize:
-                       if (match_int(args, &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       mnt->bsize = option;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("bsize");
+                       } else
+                               mnt->bsize = option;
                         break;
                 case Opt_timeo:
-                       if (match_int(args, &mnt->timeo))
-                               return 0;
+                       if (match_int(args, &option) || option <= 0) {
+                               errors++;
+                               nfs_parse_invalid_value("timeo");
+                       } else
+                               mnt->timeo = option;
                         break;
                 case Opt_retrans:
-                       if (match_int(args, &mnt->retrans))
-                               return 0;
+                       if (match_int(args, &option) || option <= 0) {
+                               errors++;
+                               nfs_parse_invalid_value("retrans");
+                       } else
+                               mnt->retrans = option;
                         break;
                 case Opt_acregmin:
-                       if (match_int(args, &mnt->acregmin))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("acregmin");
+                       } else
+                               mnt->acregmin = option;
                         break;
                 case Opt_acregmax:
-                       if (match_int(args, &mnt->acregmax))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("acregmax");
+                       } else
+                               mnt->acregmax = option;
                         break;
                 case Opt_acdirmin:
-                       if (match_int(args, &mnt->acdirmin))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("acdirmin");
+                       } else
+                               mnt->acdirmin = option;
                         break;
                 case Opt_acdirmax:
-                       if (match_int(args, &mnt->acdirmax))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("acdirmax");
+                       } else
+                               mnt->acdirmax = option;
                         break;
                 case Opt_actimeo:
-                       if (match_int(args, &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       mnt->acregmin =
-                       mnt->acregmax =
-                       mnt->acdirmin =
-                       mnt->acdirmax = option;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("actimeo");
+                       } else
+                               mnt->acregmin = mnt->acregmax =
+                               mnt->acdirmin = mnt->acdirmax = option;
                         break;
                 case Opt_namelen:
-                       if (match_int(args, &mnt->namlen))
-                               return 0;
+                       if (match_int(args, &option) || option < 0) {
+                               errors++;
+                               nfs_parse_invalid_value("namlen");
+                       } else
+                               mnt->namlen = option;
                         break;
                 case Opt_mountport:
-                       if (match_int(args, &option))
-                               return 0;
-                       if (option < 0 || option > 65535)
-                               return 0;
-                       mnt->mount_server.port = option;
+                       if (match_int(args, &option) ||
+                           option < 0 || option > USHORT_MAX) {
+                               errors++;
+                               nfs_parse_invalid_value("mountport");
+                       } else
+                               mnt->mount_server.port = option;
                         break;
                 case Opt_mountvers:
-                       if (match_int(args, &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       mnt->mount_server.version = option;
+                       if (match_int(args, &option) ||
+                           option < NFS_MNT_VERSION ||
+                           option > NFS_MNT3_VERSION) {
+                               errors++;
+                               nfs_parse_invalid_value("mountvers");
+                       } else
+                               mnt->mount_server.version = option;
                         break;
                 case Opt_nfsvers:
-                       if (match_int(args, &option))
-                               return 0;
+                       if (match_int(args, &option)) {
+                               errors++;
+                               nfs_parse_invalid_value("nfsvers");
+                               break;
+                       }
                         switch (option) {
-                       case 2:
+                       case NFS2_VERSION:
                                 mnt->flags &= ~NFS_MOUNT_VER3;
                                 break;
-                       case 3:
+                       case NFS3_VERSION:
                                 mnt->flags |= NFS_MOUNT_VER3;
                                 break;
                         default:
-                               goto out_unrec_vers;
+                               errors++;
+                               nfs_parse_invalid_value("nfsvers");
                         }
                         break;
  
+               /*
+                * options that take text values
+                */
                 case Opt_sec:
                         string = match_strdup(args);
                         if (string == NULL)
                                 goto out_nomem;
-                       token = match_token(string, nfs_secflavor_tokens, args);
+                       rc = nfs_parse_security_flavors(string, mnt);
                         kfree(string);
-
-                       /*
-                        * The flags setting is for v2/v3.  The flavor_len
-                        * setting is for v4.  v2/v3 also need to know the
-                        * difference between NULL and UNIX.
-                        */
-                       switch (token) {
-                       case Opt_sec_none:
-                               mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 0;
-                               mnt->auth_flavors[0] = RPC_AUTH_NULL;
-                               break;
-                       case Opt_sec_sys:
-                               mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 0;
-                               mnt->auth_flavors[0] = RPC_AUTH_UNIX;
-                               break;
-                       case Opt_sec_krb5:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
-                               break;
-                       case Opt_sec_krb5i:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
-                               break;
-                       case Opt_sec_krb5p:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
-                               break;
-                       case Opt_sec_lkey:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
-                               break;
-                       case Opt_sec_lkeyi:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
-                               break;
-                       case Opt_sec_lkeyp:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
-                               break;
-                       case Opt_sec_spkm:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
-                               break;
-                       case Opt_sec_spkmi:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
-                               break;
-                       case Opt_sec_spkmp:
-                               mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-                               mnt->auth_flavor_len = 1;
-                               mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
-                               break;
-                       default:
-                               goto out_unrec_sec;
+                       if (!rc) {
+                               errors++;
+                               dfprintk(MOUNT, "NFS:   unrecognized "
+                                               "security flavor\n");
                         }
                         break;
                 case Opt_proto:
@@ -1026,24 +1186,20 @@ static int nfs_parse_mount_options(char *raw,
                         case Opt_xprt_udp:
                                 mnt->flags &= ~NFS_MOUNT_TCP;
                                 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
-                               mnt->timeo = 7;
-                               mnt->retrans = 5;
                                 break;
                         case Opt_xprt_tcp:
                                 mnt->flags |= NFS_MOUNT_TCP;
                                 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-                               mnt->timeo = 600;
-                               mnt->retrans = 2;
                                 break;
                         case Opt_xprt_rdma:
                                 /* vector side protocols to TCP */
                                 mnt->flags |= NFS_MOUNT_TCP;
                                 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-                               mnt->timeo = 600;
-                               mnt->retrans = 2;
                                 break;
                         default:
-                               goto out_unrec_xprt;
+                               errors++;
+                               dfprintk(MOUNT, "NFS:   unrecognized "
+                                               "transport protocol\n");
                         }
                         break;
                 case Opt_mountproto:
@@ -1063,16 +1219,19 @@ static int nfs_parse_mount_options(char *raw,
                                 break;
                         case Opt_xprt_rdma: /* not used for side protocols */
                         default:
-                               goto out_unrec_xprt;
+                               errors++;
+                               dfprintk(MOUNT, "NFS:   unrecognized "
+                                               "transport protocol\n");
                         }
                         break;
                 case Opt_addr:
                         string = match_strdup(args);
                         if (string == NULL)
                                 goto out_nomem;
-                       nfs_parse_server_address(string, (struct sockaddr *)
-                                                &mnt->nfs_server.address,
-                                                &mnt->nfs_server.addrlen);
+                       nfs_parse_ip_address(string, strlen(string),
+                                            (struct sockaddr *)
+                                               &mnt->nfs_server.address,
+                                            &mnt->nfs_server.addrlen);
                         kfree(string);
                         break;
                 case Opt_clientaddr:
@@ -1093,24 +1252,33 @@ static int nfs_parse_mount_options(char *raw,
                         string = match_strdup(args);
                         if (string == NULL)
                                 goto out_nomem;
-                       nfs_parse_server_address(string, (struct sockaddr *)
-                                                &mnt->mount_server.address,
-                                                &mnt->mount_server.addrlen);
+                       nfs_parse_ip_address(string, strlen(string),
+                                            (struct sockaddr *)
+                                               &mnt->mount_server.address,
+                                            &mnt->mount_server.addrlen);
                         kfree(string);
                         break;
  
+               /*
+                * Special options
+                */
+               case Opt_sloppy:
+                       sloppy = 1;
+                       dfprintk(MOUNT, "NFS:   relaxing parsing rules\n");
+                       break;
                 case Opt_userspace:
                 case Opt_deprecated:
+                       dfprintk(MOUNT, "NFS:   ignoring mount option "
+                                       "'%s'\n", p);
                         break;
  
                 default:
-                       goto out_unknown;
+                       errors++;
+                       dfprintk(MOUNT, "NFS:   unrecognized mount option "
+                                       "'%s'\n", p);
                 }
         }
  
-       nfs_set_port((struct sockaddr *)&mnt->nfs_server.address,
-                               mnt->nfs_server.port);
-
         return 1;
  
  out_nomem:
@@ -1120,21 +1288,6 @@ out_security_failure:
         free_secdata(secdata);
         printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
         return 0;
-out_unrec_vers:
-       printk(KERN_INFO "NFS: unrecognized NFS version number\n");
-       return 0;
-
-out_unrec_xprt:
-       printk(KERN_INFO "NFS: unrecognized transport protocol\n");
-       return 0;
-
-out_unrec_sec:
-       printk(KERN_INFO "NFS: unrecognized security flavor\n");
-       return 0;
-
-out_unknown:
-       printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
-       return 0;
  }
  
  /*
@@ -1188,11 +1341,146 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
         if (status == 0)
                 return 0;
  
-       dfprintk(MOUNT, "NFS: unable to mount server %s, error %d",
+       dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
                         hostname, status);
         return status;
  }
  
+static int nfs_parse_simple_hostname(const char *dev_name,
+                                    char **hostname, size_t maxnamlen,
+                                    char **export_path, size_t maxpathlen)
+{
+       size_t len;
+       char *colon, *comma;
+
+       colon = strchr(dev_name, ':');
+       if (colon == NULL)
+               goto out_bad_devname;
+
+       len = colon - dev_name;
+       if (len > maxnamlen)
+               goto out_hostname;
+
+       /* N.B. caller will free nfs_server.hostname in all cases */
+       *hostname = kstrndup(dev_name, len, GFP_KERNEL);
+       if (!*hostname)
+               goto out_nomem;
+
+       /* kill possible hostname list: not supported */
+       comma = strchr(*hostname, ',');
+       if (comma != NULL) {
+               if (comma == *hostname)
+                       goto out_bad_devname;
+               *comma = '\0';
+       }
+
+       colon++;
+       len = strlen(colon);
+       if (len > maxpathlen)
+               goto out_path;
+       *export_path = kstrndup(colon, len, GFP_KERNEL);
+       if (!*export_path)
+               goto out_nomem;
+
+       dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
+       return 0;
+
+out_bad_devname:
+       dfprintk(MOUNT, "NFS: device name not in host:path format\n");
+       return -EINVAL;
+
+out_nomem:
+       dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
+       return -ENOMEM;
+
+out_hostname:
+       dfprintk(MOUNT, "NFS: server hostname too long\n");
+       return -ENAMETOOLONG;
+
+out_path:
+       dfprintk(MOUNT, "NFS: export pathname too long\n");
+       return -ENAMETOOLONG;
+}
+
+/*
+ * Hostname has square brackets around it because it contains one or
+ * more colons.  We look for the first closing square bracket, and a
+ * colon must follow it.
+ */
+static int nfs_parse_protected_hostname(const char *dev_name,
+                                       char **hostname, size_t maxnamlen,
+                                       char **export_path, size_t maxpathlen)
+{
+       size_t len;
+       char *start, *end;
+
+       start = (char *)(dev_name + 1);
+
+       end = strchr(start, ']');
+       if (end == NULL)
+               goto out_bad_devname;
+       if (*(end + 1) != ':')
+               goto out_bad_devname;
+
+       len = end - start;
+       if (len > maxnamlen)
+               goto out_hostname;
+
+       /* N.B. caller will free nfs_server.hostname in all cases */
+       *hostname = kstrndup(start, len, GFP_KERNEL);
+       if (*hostname == NULL)
+               goto out_nomem;
+
+       end += 2;
+       len = strlen(end);
+       if (len > maxpathlen)
+               goto out_path;
+       *export_path = kstrndup(end, len, GFP_KERNEL);
+       if (!*export_path)
+               goto out_nomem;
+
+       return 0;
+
+out_bad_devname:
+       dfprintk(MOUNT, "NFS: device name not in host:path format\n");
+       return -EINVAL;
+
+out_nomem:
+       dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
+       return -ENOMEM;
+
+out_hostname:
+       dfprintk(MOUNT, "NFS: server hostname too long\n");
+       return -ENAMETOOLONG;
+
+out_path:
+       dfprintk(MOUNT, "NFS: export pathname too long\n");
+       return -ENAMETOOLONG;
+}
+
+/*
+ * Split "dev_name" into "hostname:export_path".
+ *
+ * The leftmost colon demarks the split between the server's hostname
+ * and the export path.  If the hostname starts with a left square
+ * bracket, then it may contain colons.
+ *
+ * Note: caller frees hostname and export path, even on error.
+ */
+static int nfs_parse_devname(const char *dev_name,
+                            char **hostname, size_t maxnamlen,
+                            char **export_path, size_t maxpathlen)
+{
+       if (*dev_name == '[')
+               return nfs_parse_protected_hostname(dev_name,
+                                                   hostname, maxnamlen,
+                                                   export_path, maxpathlen);
+
+       return nfs_parse_simple_hostname(dev_name,
+                                        hostname, maxnamlen,
+                                        export_path, maxpathlen);
+}
+
  /*
   * Validate the NFS2/NFS3 mount data
   * - fills in the mount root filehandle
@@ -1222,16 +1510,14 @@ static int nfs_validate_mount_data(void *options,
         args->flags             = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
         args->rsize             = NFS_MAX_FILE_IO_SIZE;
         args->wsize             = NFS_MAX_FILE_IO_SIZE;
-       args->timeo             = 600;
-       args->retrans           = 2;
-       args->acregmin          = 3;
-       args->acregmax          = 60;
-       args->acdirmin          = 30;
-       args->acdirmax          = 60;
+       args->acregmin          = NFS_DEF_ACREGMIN;
+       args->acregmax          = NFS_DEF_ACREGMAX;
+       args->acdirmin          = NFS_DEF_ACDIRMIN;
+       args->acdirmax          = NFS_DEF_ACDIRMAX;
         args->mount_server.port = 0;    /* autobind unless user sets port */
-       args->mount_server.protocol = XPRT_TRANSPORT_UDP;
         args->nfs_server.port   = 0;    /* autobind unless user sets port */
         args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+       args->auth_flavors[0]   = RPC_AUTH_UNIX;
  
         switch (data->version) {
         case 1:
@@ -1289,7 +1575,9 @@ static int nfs_validate_mount_data(void *options,
                 args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
                 args->namlen            = data->namlen;
                 args->bsize             = data->bsize;
-               args->auth_flavors[0]   = data->pseudoflavor;
+
+               if (data->flags & NFS_MOUNT_SECFLAVOUR)
+                       args->auth_flavors[0] = data->pseudoflavor;
                 if (!args->nfs_server.hostname)
                         goto out_nomem;
  
@@ -1321,8 +1609,6 @@ static int nfs_validate_mount_data(void *options,
  
                 break;
         default: {
-               unsigned int len;
-               char *c;
                 int status;
  
                 if (nfs_parse_mount_options((char *)options, args) == 0)
@@ -1332,21 +1618,22 @@ static int nfs_validate_mount_data(void *options,
                                                 &args->nfs_server.address))
                         goto out_no_address;
  
-               c = strchr(dev_name, ':');
-               if (c == NULL)
-                       return -EINVAL;
-               len = c - dev_name;
-               /* N.B. caller will free nfs_server.hostname in all cases */
-               args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
-               if (!args->nfs_server.hostname)
-                       goto out_nomem;
+               nfs_set_port((struct sockaddr *)&args->nfs_server.address,
+                               args->nfs_server.port);
  
-               c++;
-               if (strlen(c) > NFS_MAXPATHLEN)
-                       return -ENAMETOOLONG;
-               args->nfs_server.export_path = c;
+               nfs_set_mount_transport_protocol(args);
+
+               status = nfs_parse_devname(dev_name,
+                                          &args->nfs_server.hostname,
+                                          PAGE_SIZE,
+                                          &args->nfs_server.export_path,
+                                          NFS_MAXPATHLEN);
+               if (!status)
+                       status = nfs_try_mount(args, mntfh);
+
+               kfree(args->nfs_server.export_path);
+               args->nfs_server.export_path = NULL;
  
-               status = nfs_try_mount(args, mntfh);
                 if (status)
                         return status;
  
@@ -1354,9 +1641,6 @@ static int nfs_validate_mount_data(void *options,
                 }
         }
  
-       if (!(args->flags & NFS_MOUNT_SECFLAVOUR))
-               args->auth_flavors[0] = RPC_AUTH_UNIX;
-
  #ifndef CONFIG_NFS_V3
         if (args->flags & NFS_MOUNT_VER3)
                 goto out_v3_not_compiled;
@@ -1396,6 +1680,80 @@ out_invalid_fh:
         return -EINVAL;
  }
  
+static int
+nfs_compare_remount_data(struct nfs_server *nfss,
+                        struct nfs_parsed_mount_data *data)
+{
+       if (data->flags != nfss->flags ||
+           data->rsize != nfss->rsize ||
+           data->wsize != nfss->wsize ||
+           data->retrans != nfss->client->cl_timeout->to_retries ||
+           data->auth_flavors[0] != nfss->client->cl_auth->au_flavor ||
+           data->acregmin != nfss->acregmin / HZ ||
+           data->acregmax != nfss->acregmax / HZ ||
+           data->acdirmin != nfss->acdirmin / HZ ||
+           data->acdirmax != nfss->acdirmax / HZ ||
+           data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
+           data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
+           memcmp(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
+                  data->nfs_server.addrlen) != 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int
+nfs_remount(struct super_block *sb, int *flags, char *raw_data)
+{
+       int error;
+       struct nfs_server *nfss = sb->s_fs_info;
+       struct nfs_parsed_mount_data *data;
+       struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data;
+       struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
+       u32 nfsvers = nfss->nfs_client->rpc_ops->version;
+
+       /*
+        * Userspace mount programs that send binary options generally send
+        * them populated with default values. We have no way to know which
+        * ones were explicitly specified. Fall back to legacy behavior and
+        * just return success.
+        */
+       if ((nfsvers == 4 && options4->version == 1) ||
+           (nfsvers <= 3 && options->version >= 1 &&
+            options->version <= 6))
+               return 0;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (data == NULL)
+               return -ENOMEM;
+
+       /* fill out struct with values from existing mount */
+       data->flags = nfss->flags;
+       data->rsize = nfss->rsize;
+       data->wsize = nfss->wsize;
+       data->retrans = nfss->client->cl_timeout->to_retries;
+       data->auth_flavors[0] = nfss->client->cl_auth->au_flavor;
+       data->acregmin = nfss->acregmin / HZ;
+       data->acregmax = nfss->acregmax / HZ;
+       data->acdirmin = nfss->acdirmin / HZ;
+       data->acdirmax = nfss->acdirmax / HZ;
+       data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
+       data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
+       memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
+               data->nfs_server.addrlen);
+
+       /* overwrite those values with any that were specified */
+       error = nfs_parse_mount_options((char *)options, data);
+       if (error < 0)
+               goto out;
+
+       /* compare new mount options with old ones */
+       error = nfs_compare_remount_data(nfss, data);
+out:
+       kfree(data);
+       return error;
+}
+
  /*
   * Initialise the common bits of the superblock
   */
@@ -1811,14 +2169,13 @@ static int nfs4_validate_mount_data(void *options,
  
         args->rsize             = NFS_MAX_FILE_IO_SIZE;
         args->wsize             = NFS_MAX_FILE_IO_SIZE;
-       args->timeo             = 600;
-       args->retrans           = 2;
-       args->acregmin          = 3;
-       args->acregmax          = 60;
-       args->acdirmin          = 30;
-       args->acdirmax          = 60;
+       args->acregmin          = NFS_DEF_ACREGMIN;
+       args->acregmax          = NFS_DEF_ACREGMAX;
+       args->acdirmin          = NFS_DEF_ACDIRMIN;
+       args->acdirmax          = NFS_DEF_ACDIRMAX;
         args->nfs_server.port   = NFS_PORT; /* 2049 unless user set port= */
-       args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+       args->auth_flavors[0]   = RPC_AUTH_UNIX;
+       args->auth_flavor_len   = 0;
  
         switch (data->version) {
         case 1:
@@ -1834,18 +2191,13 @@ static int nfs4_validate_mount_data(void *options,
                                                 &args->nfs_server.address))
                         goto out_no_address;
  
-               switch (data->auth_flavourlen) {
-               case 0:
-                       args->auth_flavors[0] = RPC_AUTH_UNIX;
-                       break;
-               case 1:
+               if (data->auth_flavourlen) {
+                       if (data->auth_flavourlen > 1)
+                               goto out_inval_auth;
                         if (copy_from_user(&args->auth_flavors[0],
                                            data->auth_flavours,
                                            sizeof(args->auth_flavors[0])))
                                 return -EFAULT;
-                       break;
-               default:
-                       goto out_inval_auth;
                 }
  
                 c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
@@ -1879,10 +2231,11 @@ static int nfs4_validate_mount_data(void *options,
                 args->acdirmin  = data->acdirmin;
                 args->acdirmax  = data->acdirmax;
                 args->nfs_server.protocol = data->proto;
+               nfs_validate_transport_protocol(args);
  
                 break;
         default: {
-               unsigned int len;
+               int status;
  
                 if (nfs_parse_mount_options((char *)options, args) == 0)
                         return -EINVAL;
@@ -1891,44 +2244,25 @@ static int nfs4_validate_mount_data(void *options,
                                                 &args->nfs_server.address))
                         return -EINVAL;
  
-               switch (args->auth_flavor_len) {
-               case 0:
-                       args->auth_flavors[0] = RPC_AUTH_UNIX;
-                       break;
-               case 1:
-                       break;
-               default:
-                       goto out_inval_auth;
-               }
+               nfs_set_port((struct sockaddr *)&args->nfs_server.address,
+                               args->nfs_server.port);
  
-               /*
-                * Split "dev_name" into "hostname:mntpath".
-                */
-               c = strchr(dev_name, ':');
-               if (c == NULL)
-                       return -EINVAL;
-               /* while calculating len, pretend ':' is '\0' */
-               len = c - dev_name;
-               if (len > NFS4_MAXNAMLEN)
-                       return -ENAMETOOLONG;
-               /* N.B. caller will free nfs_server.hostname in all cases */
-               args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
-               if (!args->nfs_server.hostname)
-                       goto out_nomem;
-
-               c++;                    /* step over the ':' */
-               len = strlen(c);
-               if (len > NFS4_MAXPATHLEN)
-                       return -ENAMETOOLONG;
-               args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
-               if (!args->nfs_server.export_path)
-                       goto out_nomem;
+               nfs_validate_transport_protocol(args);
  
-               dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
+               if (args->auth_flavor_len > 1)
+                       goto out_inval_auth;
  
                 if (args->client_address == NULL)
                         goto out_no_client_address;
  
+               status = nfs_parse_devname(dev_name,
+                                          &args->nfs_server.hostname,
+                                          NFS4_MAXNAMLEN,
+                                          &args->nfs_server.export_path,
+                                          NFS4_MAXPATHLEN);
+               if (status < 0)
+                       return status;
+
                 break;
                 }
         }
@@ -1944,10 +2278,6 @@ out_inval_auth:
                  data->auth_flavourlen);
         return -EINVAL;
  
-out_nomem:
-       dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n");
-       return -ENOMEM;
-
  out_no_address:
         dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
         return -EINVAL;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index f333848fd3be29a010dfee8116e4978cd8655b57..3229e217c773a357cd7735f9cd72351c075dbee1 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -34,9 +34,6 @@
  /*
   * Local function declarations
   */
-static struct nfs_page * nfs_update_request(struct nfs_open_context*,
-                                           struct page *,
-                                           unsigned int, unsigned int);
  static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
                                   struct inode *inode, int ioflags);
  static void nfs_redirty_request(struct nfs_page *req);
@@ -136,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page)
  static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
  {
         struct inode *inode = page->mapping->host;
-       loff_t end, i_size = i_size_read(inode);
-       pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+       loff_t end, i_size;
+       pgoff_t end_index;
  
+       spin_lock(&inode->i_lock);
+       i_size = i_size_read(inode);
+       end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
         if (i_size > 0 && page->index < end_index)
-               return;
+               goto out;
         end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
         if (i_size >= end)
-               return;
-       nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+               goto out;
         i_size_write(inode, end);
+       nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+out:
+       spin_unlock(&inode->i_lock);
  }
  
  /* A writeback failed: mark the page as bad, and invalidate the page cache */
@@ -169,29 +171,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
         SetPageUptodate(page);
  }
  
-static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
-               unsigned int offset, unsigned int count)
-{
-       struct nfs_page *req;
-       int ret;
-
-       for (;;) {
-               req = nfs_update_request(ctx, page, offset, count);
-               if (!IS_ERR(req))
-                       break;
-               ret = PTR_ERR(req);
-               if (ret != -EBUSY)
-                       return ret;
-               ret = nfs_wb_page(page->mapping->host, page);
-               if (ret != 0)
-                       return ret;
-       }
-       /* Update file length */
-       nfs_grow_file(page, offset, count);
-       nfs_clear_page_tag_locked(req);
-       return 0;
-}
-
  static int wb_priority(struct writeback_control *wbc)
  {
         if (wbc->for_reclaim)
@@ -268,12 +247,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
                         return ret;
                 spin_lock(&inode->i_lock);
         }
-       if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-               /* This request is marked for commit */
+       if (test_bit(PG_CLEAN, &req->wb_flags)) {
                 spin_unlock(&inode->i_lock);
-               nfs_clear_page_tag_locked(req);
-               nfs_pageio_complete(pgio);
-               return 0;
+               BUG();
         }
         if (nfs_set_page_writeback(page) != 0) {
                 spin_unlock(&inode->i_lock);
@@ -355,11 +331,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
  /*
   * Insert a write request into an inode
   */
-static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
  {
         struct nfs_inode *nfsi = NFS_I(inode);
         int error;
  
+       error = radix_tree_preload(GFP_NOFS);
+       if (error != 0)
+               goto out;
+
+       /* Lock the request! */
+       nfs_lock_request_dontget(req);
+
+       spin_lock(&inode->i_lock);
         error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
         BUG_ON(error);
         if (!nfsi->npages) {
@@ -373,6 +357,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
         kref_get(&req->wb_kref);
         radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
                                 NFS_PAGE_TAG_LOCKED);
+       spin_unlock(&inode->i_lock);
+       radix_tree_preload_end();
+out:
+       return error;
  }
  
  /*
@@ -405,19 +393,6 @@ nfs_mark_request_dirty(struct nfs_page *req)
         __set_page_dirty_nobuffers(req->wb_page);
  }
  
-/*
- * Check if a request is dirty
- */
-static inline int
-nfs_dirty_request(struct nfs_page *req)
-{
-       struct page *page = req->wb_page;
-
-       if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags))
-               return 0;
-       return !PageWriteback(page);
-}
-
  #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
  /*
   * Add a request to the inode's commit list.
@@ -430,7 +405,7 @@ nfs_mark_request_commit(struct nfs_page *req)
  
         spin_lock(&inode->i_lock);
         nfsi->ncommit++;
-       set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
+       set_bit(PG_CLEAN, &(req)->wb_flags);
         radix_tree_tag_set(&nfsi->nfs_page_tree,
                         req->wb_index,
                         NFS_PAGE_TAG_COMMIT);
@@ -440,6 +415,19 @@ nfs_mark_request_commit(struct nfs_page *req)
         __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  }
  
+static int
+nfs_clear_request_commit(struct nfs_page *req)
+{
+       struct page *page = req->wb_page;
+
+       if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
+               dec_zone_page_state(page, NR_UNSTABLE_NFS);
+               dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+               return 1;
+       }
+       return 0;
+}
+
  static inline
  int nfs_write_need_commit(struct nfs_write_data *data)
  {
@@ -449,7 +437,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)
  static inline
  int nfs_reschedule_unstable_write(struct nfs_page *req)
  {
-       if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+       if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
                 nfs_mark_request_commit(req);
                 return 1;
         }
@@ -465,6 +453,12 @@ nfs_mark_request_commit(struct nfs_page *req)
  {
  }
  
+static inline int
+nfs_clear_request_commit(struct nfs_page *req)
+{
+       return 0;
+}
+
  static inline
  int nfs_write_need_commit(struct nfs_write_data *data)
  {
@@ -522,11 +516,8 @@ static void nfs_cancel_commit_list(struct list_head *head)
  
         while(!list_empty(head)) {
                 req = nfs_list_entry(head->next);
-               dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-               dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-                               BDI_RECLAIMABLE);
                 nfs_list_remove_request(req);
-               clear_bit(PG_NEED_COMMIT, &(req)->wb_flags);
+               nfs_clear_request_commit(req);
                 nfs_inode_remove_request(req);
                 nfs_unlock_request(req);
         }
@@ -564,110 +555,124 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg
  #endif
  
  /*
- * Try to update any existing write request, or create one if there is none.
- * In order to match, the request's credentials must match those of
- * the calling process.
+ * Search for an existing write request, and attempt to update
+ * it to reflect a new dirty region on a given page.
   *
- * Note: Should always be called with the Page Lock held!
+ * If the attempt fails, then the existing request is flushed out
+ * to disk.
   */
-static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
-               struct page *page, unsigned int offset, unsigned int bytes)
+static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+               struct page *page,
+               unsigned int offset,
+               unsigned int bytes)
  {
-       struct address_space *mapping = page->mapping;
-       struct inode *inode = mapping->host;
-       struct nfs_page         *req, *new = NULL;
-       pgoff_t         rqend, end;
+       struct nfs_page *req;
+       unsigned int rqend;
+       unsigned int end;
+       int error;
+
+       if (!PagePrivate(page))
+               return NULL;
  
         end = offset + bytes;
+       spin_lock(&inode->i_lock);
  
         for (;;) {
-               /* Loop over all inode entries and see if we find
-                * A request for the page we wish to update
+               req = nfs_page_find_request_locked(page);
+               if (req == NULL)
+                       goto out_unlock;
+
+               rqend = req->wb_offset + req->wb_bytes;
+               /*
+                * Tell the caller to flush out the request if
+                * the offsets are non-contiguous.
+                * Note: nfs_flush_incompatible() will already
+                * have flushed out requests having wrong owners.
                  */
-               if (new) {
-                       if (radix_tree_preload(GFP_NOFS)) {
-                               nfs_release_request(new);
-                               return ERR_PTR(-ENOMEM);
-                       }
-               }
+               if (offset > rqend
+                   || end < req->wb_offset)
+                       goto out_flushme;
  
-               spin_lock(&inode->i_lock);
-               req = nfs_page_find_request_locked(page);
-               if (req) {
-                       if (!nfs_set_page_tag_locked(req)) {
-                               int error;
-
-                               spin_unlock(&inode->i_lock);
-                               error = nfs_wait_on_request(req);
-                               nfs_release_request(req);
-                               if (error < 0) {
-                                       if (new) {
-                                               radix_tree_preload_end();
-                                               nfs_release_request(new);
-                                       }
-                                       return ERR_PTR(error);
-                               }
-                               continue;
-                       }
-                       spin_unlock(&inode->i_lock);
-                       if (new) {
-                               radix_tree_preload_end();
-                               nfs_release_request(new);
-                       }
+               if (nfs_set_page_tag_locked(req))
                         break;
-               }
  
-               if (new) {
-                       nfs_lock_request_dontget(new);
-                       nfs_inode_add_request(inode, new);
-                       spin_unlock(&inode->i_lock);
-                       radix_tree_preload_end();
-                       req = new;
-                       goto zero_page;
-               }
+               /* The request is locked, so wait and then retry */
                 spin_unlock(&inode->i_lock);
-
-               new = nfs_create_request(ctx, inode, page, offset, bytes);
-               if (IS_ERR(new))
-                       return new;
+               error = nfs_wait_on_request(req);
+               nfs_release_request(req);
+               if (error != 0)
+                       goto out_err;
+               spin_lock(&inode->i_lock);
         }
  
-       /* We have a request for our page.
-        * If the creds don't match, or the
-        * page addresses don't match,
-        * tell the caller to wait on the conflicting
-        * request.
-        */
-       rqend = req->wb_offset + req->wb_bytes;
-       if (req->wb_context != ctx
-           || req->wb_page != page
-           || !nfs_dirty_request(req)
-           || offset > rqend || end < req->wb_offset) {
-               nfs_clear_page_tag_locked(req);
-               return ERR_PTR(-EBUSY);
-       }
+       if (nfs_clear_request_commit(req))
+               radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
+                               req->wb_index, NFS_PAGE_TAG_COMMIT);
  
         /* Okay, the request matches. Update the region */
         if (offset < req->wb_offset) {
                 req->wb_offset = offset;
                 req->wb_pgbase = offset;
-               req->wb_bytes = max(end, rqend) - req->wb_offset;
-               goto zero_page;
         }
-
         if (end > rqend)
                 req->wb_bytes = end - req->wb_offset;
-
+       else
+               req->wb_bytes = rqend - req->wb_offset;
+out_unlock:
+       spin_unlock(&inode->i_lock);
         return req;
-zero_page:
-       /* If this page might potentially be marked as up to date,
-        * then we need to zero any uninitalised data. */
-       if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
-                       && !PageUptodate(req->wb_page))
-               zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE);
+out_flushme:
+       spin_unlock(&inode->i_lock);
+       nfs_release_request(req);
+       error = nfs_wb_page(inode, page);
+out_err:
+       return ERR_PTR(error);
+}
+
+/*
+ * Try to update an existing write request, or create one if there is none.
+ *
+ * Note: Should always be called with the Page Lock held to prevent races
+ * if we have to add a new request. Also assumes that the caller has
+ * already called nfs_flush_incompatible() if necessary.
+ */
+static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+               struct page *page, unsigned int offset, unsigned int bytes)
+{
+       struct inode *inode = page->mapping->host;
+       struct nfs_page *req;
+       int error;
+
+       req = nfs_try_to_update_request(inode, page, offset, bytes);
+       if (req != NULL)
+               goto out;
+       req = nfs_create_request(ctx, inode, page, offset, bytes);
+       if (IS_ERR(req))
+               goto out;
+       error = nfs_inode_add_request(inode, req);
+       if (error != 0) {
+               nfs_release_request(req);
+               req = ERR_PTR(error);
+       }
+out:
         return req;
  }
  
+static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+               unsigned int offset, unsigned int count)
+{
+       struct nfs_page *req;
+
+       req = nfs_setup_write_request(ctx, page, offset, count);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+       /* Update file length */
+       nfs_grow_file(page, offset, count);
+       nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+       nfs_clear_page_tag_locked(req);
+       return 0;
+}
+
  int nfs_flush_incompatible(struct file *file, struct page *page)
  {
         struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -685,8 +690,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
                 req = nfs_page_find_request(page);
                 if (req == NULL)
                         return 0;
-               do_flush = req->wb_page != page || req->wb_context != ctx
-                       || !nfs_dirty_request(req);
+               do_flush = req->wb_page != page || req->wb_context != ctx;
                 nfs_release_request(req);
                 if (!do_flush)
                         return 0;
@@ -721,10 +725,10 @@ int nfs_updatepage(struct file *file, struct page *page,
  
         nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
  
-       dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
+       dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",
                 file->f_path.dentry->d_parent->d_name.name,
                 file->f_path.dentry->d_name.name, count,
-               (long long)(page_offset(page) +offset));
+               (long long)(page_offset(page) + offset));
  
         /* If we're not using byte range locks, and we know the page
          * is up to date, it may be more efficient to extend the write
@@ -744,7 +748,7 @@ int nfs_updatepage(struct file *file, struct page *page,
         else
                 __set_page_dirty_nobuffers(page);
  
-        dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
+       dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
                         status, (long long)i_size_read(inode));
         return status;
  }
@@ -752,12 +756,7 @@ int nfs_updatepage(struct file *file, struct page *page,
  static void nfs_writepage_release(struct nfs_page *req)
  {
  
-       if (PageError(req->wb_page)) {
-               nfs_end_page_writeback(req->wb_page);
-               nfs_inode_remove_request(req);
-       } else if (!nfs_reschedule_unstable_write(req)) {
-               /* Set the PG_uptodate flag */
-               nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
+       if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
                 nfs_end_page_writeback(req->wb_page);
                 nfs_inode_remove_request(req);
         } else
@@ -834,7 +833,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
         NFS_PROTO(inode)->write_setup(data, &msg);
  
         dprintk("NFS: %5u initiated write call "
-               "(req %s/%Ld, %u bytes @ offset %Lu)\n",
+               "(req %s/%lld, %u bytes @ offset %llu)\n",
                 data->task.tk_pid,
                 inode->i_sb->s_id,
                 (long long)NFS_FILEID(inode),
@@ -978,13 +977,13 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
  static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
  {
         struct nfs_write_data   *data = calldata;
-       struct nfs_page         *req = data->req;
  
-       dprintk("NFS: write (%s/%Ld %d@%Ld)",
-               req->wb_context->path.dentry->d_inode->i_sb->s_id,
-               (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
-               req->wb_bytes,
-               (long long)req_offset(req));
+       dprintk("NFS: %5u write(%s/%lld %d@%lld)",
+               task->tk_pid,
+               data->req->wb_context->path.dentry->d_inode->i_sb->s_id,
+               (long long)
+                 NFS_FILEID(data->req->wb_context->path.dentry->d_inode),
+               data->req->wb_bytes, (long long)req_offset(data->req));
  
         nfs_writeback_done(task, data);
  }
@@ -1058,7 +1057,8 @@ static void nfs_writeback_release_full(void *calldata)
  
                 nfs_list_remove_request(req);
  
-               dprintk("NFS: write (%s/%Ld %d@%Ld)",
+               dprintk("NFS: %5u write (%s/%lld %d@%lld)",
+                       data->task.tk_pid,
                         req->wb_context->path.dentry->d_inode->i_sb->s_id,
                         (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
                         req->wb_bytes,
@@ -1078,8 +1078,6 @@ static void nfs_writeback_release_full(void *calldata)
                         dprintk(" marked for commit\n");
                         goto next;
                 }
-               /* Set the PG_uptodate flag? */
-               nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
                 dprintk(" OK\n");
  remove_request:
                 nfs_end_page_writeback(page);
@@ -1133,7 +1131,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
                 static unsigned long    complain;
  
                 if (time_before(complain, jiffies)) {
-                       dprintk("NFS: faulty NFS server %s:"
+                       dprintk("NFS:       faulty NFS server %s:"
                                 " (committed = %d) != (stable = %d)\n",
                                 NFS_SERVER(data->inode)->nfs_client->cl_hostname,
                                 resp->verf->committed, argp->stable);
@@ -1297,12 +1295,9 @@ static void nfs_commit_release(void *calldata)
         while (!list_empty(&data->pages)) {
                 req = nfs_list_entry(data->pages.next);
                 nfs_list_remove_request(req);
-               clear_bit(PG_NEED_COMMIT, &(req)->wb_flags);
-               dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-               dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-                               BDI_RECLAIMABLE);
+               nfs_clear_request_commit(req);
  
-               dprintk("NFS: commit (%s/%Ld %d@%Ld)",
+               dprintk("NFS:       commit (%s/%lld %d@%lld)",
                         req->wb_context->path.dentry->d_inode->i_sb->s_id,
                         (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
                         req->wb_bytes,
@@ -1318,9 +1313,6 @@ static void nfs_commit_release(void *calldata)
                  * returned by the server against all stored verfs. */
                 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
                         /* We have a match */
-                       /* Set the PG_uptodate flag */
-                       nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
-                                       req->wb_bytes);
                         nfs_inode_remove_request(req);
                         dprintk(" OK\n");
                         goto next;
@@ -1479,7 +1471,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
                 req = nfs_page_find_request(page);
                 if (req == NULL)
                         goto out;
-               if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+               if (test_bit(PG_CLEAN, &req->wb_flags)) {
                         nfs_release_request(req);
                         break;
                 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c

index 4d4760e687c351a3dead64752d67ec7ce622a1c7..702fa577aa6e075a6fef3a9b31be6ffdb1d3a9c5 100644 (file)
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -381,7 +381,7 @@ static int do_probe_callback(void *data)
                 .program        = &cb_program,
                 .version        = nfs_cb_version[1]->number,
                 .authflavor     = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
-               .flags          = (RPC_CLNT_CREATE_NOPING),
+               .flags          = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
         };
         struct rpc_message msg = {
                 .rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig

new file mode 100644 (file)

index 0000000..91ceeda
--- /dev/null
+++ b/fs/ubifs/Kconfig
@@ -0,0 +1,72 @@
+config UBIFS_FS
+       tristate "UBIFS file system support"
+       select CRC16
+       select CRC32
+       select CRYPTO if UBIFS_FS_ADVANCED_COMPR
+       select CRYPTO if UBIFS_FS_LZO
+       select CRYPTO if UBIFS_FS_ZLIB
+       select CRYPTO_LZO if UBIFS_FS_LZO
+       select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
+       depends on MTD_UBI
+       help
+         UBIFS is a file system for flash devices which works on top of UBI.
+
+config UBIFS_FS_XATTR
+       bool "Extended attributes support"
+       depends on UBIFS_FS
+       help
+         This option enables support of extended attributes.
+
+config UBIFS_FS_ADVANCED_COMPR
+       bool "Advanced compression options"
+       depends on UBIFS_FS
+       help
+         This option allows to explicitly choose which compressions, if any,
+         are enabled in UBIFS. Removing compressors means inbility to read
+         existing file systems.
+
+         If unsure, say 'N'.
+
+config UBIFS_FS_LZO
+       bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR
+       depends on UBIFS_FS
+       default y
+       help
+          LZO compressor is generally faster then zlib but compresses worse.
+          Say 'Y' if unsure.
+
+config UBIFS_FS_ZLIB
+       bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR
+       depends on UBIFS_FS
+       default y
+       help
+         Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
+
+# Debugging-related stuff
+config UBIFS_FS_DEBUG
+       bool "Enable debugging"
+       depends on UBIFS_FS
+       select DEBUG_FS
+       select KALLSYMS_ALL
+       help
+         This option enables UBIFS debugging.
+
+config UBIFS_FS_DEBUG_MSG_LVL
+       int "Default message level (0 = no extra messages, 3 = lots)"
+       depends on UBIFS_FS_DEBUG
+       default "0"
+       help
+         This controls the amount of debugging messages produced by UBIFS.
+         If reporting bugs, please try to have available a full dump of the
+         messages at level 1 while the misbehaviour was occurring. Level 2
+         may become necessary if level 1 messages were not enough to find the
+         bug. Generally Level 3 should be avoided.
+
+config UBIFS_FS_DEBUG_CHKS
+       bool "Enable extra checks"
+       depends on UBIFS_FS_DEBUG
+       help
+         If extra checks are enabled UBIFS will check the consistency of its
+         internal data structures during operation. However, UBIFS performance
+         is dramatically slower when this option is selected especially if the
+         file system is large.
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile

new file mode 100644 (file)

index 0000000..80e93c3
--- /dev/null
+++ b/fs/ubifs/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_UBIFS_FS) += ubifs.o
+
+ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
+ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
+ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
+ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o
+
+ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o
+ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c

new file mode 100644 (file)

index 0000000..d81fb9e
--- /dev/null
+++ b/fs/ubifs/budget.c
@@ -0,0 +1,731 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements the budgeting sub-system which is responsible for UBIFS
+ * space management.
+ *
+ * Factors such as compression, wasted space at the ends of LEBs, space in other
+ * journal heads, the effect of updates on the index, and so on, make it
+ * impossible to accurately predict the amount of space needed. Consequently
+ * approximations are used.
+ */
+
+#include "ubifs.h"
+#include <linux/writeback.h>
+#include <asm/div64.h>
+
+/*
+ * When pessimistic budget calculations say that there is no enough space,
+ * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
+ * or committing. The below constants define maximum number of times UBIFS
+ * repeats the operations.
+ */
+#define MAX_SHRINK_RETRIES 8
+#define MAX_GC_RETRIES     4
+#define MAX_CMT_RETRIES    2
+#define MAX_NOSPC_RETRIES  1
+
+/*
+ * The below constant defines amount of dirty pages which should be written
+ * back at when trying to shrink the liability.
+ */
+#define NR_TO_WRITE 16
+
+/**
+ * struct retries_info - information about re-tries while making free space.
+ * @prev_liability: previous liability
+ * @shrink_cnt: how many times the liability was shrinked
+ * @shrink_retries: count of liability shrink re-tries (increased when
+ *                  liability does not shrink)
+ * @try_gc: GC should be tried first
+ * @gc_retries: how many times GC was run
+ * @cmt_retries: how many times commit has been done
+ * @nospc_retries: how many times GC returned %-ENOSPC
+ *
+ * Since we consider budgeting to be the fast-path, and this structure has to
+ * be allocated on stack and zeroed out, we make it smaller using bit-fields.
+ */
+struct retries_info {
+       long long prev_liability;
+       unsigned int shrink_cnt;
+       unsigned int shrink_retries:5;
+       unsigned int try_gc:1;
+       unsigned int gc_retries:4;
+       unsigned int cmt_retries:3;
+       unsigned int nospc_retries:1;
+};
+
+/**
+ * shrink_liability - write-back some dirty pages/inodes.
+ * @c: UBIFS file-system description object
+ * @nr_to_write: how many dirty pages to write-back
+ *
+ * This function shrinks UBIFS liability by means of writing back some amount
+ * of dirty inodes and their pages. Returns the amount of pages which were
+ * written back. The returned value does not include dirty inodes which were
+ * synchronized.
+ *
+ * Note, this function synchronizes even VFS inodes which are locked
+ * (@i_mutex) by the caller of the budgeting function, because write-back does
+ * not touch @i_mutex.
+ */
+static int shrink_liability(struct ubifs_info *c, int nr_to_write)
+{
+       int nr_written;
+       struct writeback_control wbc = {
+               .sync_mode   = WB_SYNC_NONE,
+               .range_end   = LLONG_MAX,
+               .nr_to_write = nr_to_write,
+       };
+
+       generic_sync_sb_inodes(c->vfs_sb, &wbc);
+       nr_written = nr_to_write - wbc.nr_to_write;
+
+       if (!nr_written) {
+               /*
+                * Re-try again but wait on pages/inodes which are being
+                * written-back concurrently (e.g., by pdflush).
+                */
+               memset(&wbc, 0, sizeof(struct writeback_control));
+               wbc.sync_mode   = WB_SYNC_ALL;
+               wbc.range_end   = LLONG_MAX;
+               wbc.nr_to_write = nr_to_write;
+               generic_sync_sb_inodes(c->vfs_sb, &wbc);
+               nr_written = nr_to_write - wbc.nr_to_write;
+       }
+
+       dbg_budg("%d pages were written back", nr_written);
+       return nr_written;
+}
+
+
+/**
+ * run_gc - run garbage collector.
+ * @c: UBIFS file-system description object
+ *
+ * This function runs garbage collector to make some more free space. Returns
+ * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
+ * negative error code in case of failure.
+ */
+static int run_gc(struct ubifs_info *c)
+{
+       int err, lnum;
+
+       /* Make some free space by garbage-collecting dirty space */
+       down_read(&c->commit_sem);
+       lnum = ubifs_garbage_collect(c, 1);
+       up_read(&c->commit_sem);
+       if (lnum < 0)
+               return lnum;
+
+       /* GC freed one LEB, return it to lprops */
+       dbg_budg("GC freed LEB %d", lnum);
+       err = ubifs_return_leb(c, lnum);
+       if (err)
+               return err;
+       return 0;
+}
+
+/**
+ * make_free_space - make more free space on the file-system.
+ * @c: UBIFS file-system description object
+ * @ri: information about previous invocations of this function
+ *
+ * This function is called when an operation cannot be budgeted because there
+ * is supposedly no free space. But in most cases there is some free space:
+ *   o budgeting is pessimistic, so it always budgets more then it is actually
+ *     needed, so shrinking the liability is one way to make free space - the
+ *     cached data will take less space then it was budgeted for;
+ *   o GC may turn some dark space into free space (budgeting treats dark space
+ *     as not available);
+ *   o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
+ *
+ * So this function tries to do the above. Returns %-EAGAIN if some free space
+ * was presumably made and the caller has to re-try budgeting the operation.
+ * Returns %-ENOSPC if it couldn't do more free space, and other negative error
+ * codes on failures.
+ */
+static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
+{
+       int err;
+
+       /*
+        * If we have some dirty pages and inodes (liability), try to write
+        * them back unless this was tried too many times without effect
+        * already.
+        */
+       if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
+               long long liability;
+
+               spin_lock(&c->space_lock);
+               liability = c->budg_idx_growth + c->budg_data_growth +
+                           c->budg_dd_growth;
+               spin_unlock(&c->space_lock);
+
+               if (ri->prev_liability >= liability) {
+                       /* Liability does not shrink, next time try GC then */
+                       ri->shrink_retries += 1;
+                       if (ri->gc_retries < MAX_GC_RETRIES)
+                               ri->try_gc = 1;
+                       dbg_budg("liability did not shrink: retries %d of %d",
+                                ri->shrink_retries, MAX_SHRINK_RETRIES);
+               }
+
+               dbg_budg("force write-back (count %d)", ri->shrink_cnt);
+               shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
+
+               ri->prev_liability = liability;
+               ri->shrink_cnt += 1;
+               return -EAGAIN;
+       }
+
+       /*
+        * Try to run garbage collector unless it was already tried too many
+        * times.
+        */
+       if (ri->gc_retries < MAX_GC_RETRIES) {
+               ri->gc_retries += 1;
+               dbg_budg("run GC, retries %d of %d",
+                        ri->gc_retries, MAX_GC_RETRIES);
+
+               ri->try_gc = 0;
+               err = run_gc(c);
+               if (!err)
+                       return -EAGAIN;
+
+               if (err == -EAGAIN) {
+                       dbg_budg("GC asked to commit");
+                       err = ubifs_run_commit(c);
+                       if (err)
+                               return err;
+                       return -EAGAIN;
+               }
+
+               if (err != -ENOSPC)
+                       return err;
+
+               /*
+                * GC could not make any progress. If this is the first time,
+                * then it makes sense to try to commit, because it might make
+                * some dirty space.
+                */
+               dbg_budg("GC returned -ENOSPC, retries %d",
+                        ri->nospc_retries);
+               if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
+                       return err;
+               ri->nospc_retries += 1;
+       }
+
+       /* Neither GC nor write-back helped, try to commit */
+       if (ri->cmt_retries < MAX_CMT_RETRIES) {
+               ri->cmt_retries += 1;
+               dbg_budg("run commit, retries %d of %d",
+                        ri->cmt_retries, MAX_CMT_RETRIES);
+               err = ubifs_run_commit(c);
+               if (err)
+                       return err;
+               return -EAGAIN;
+       }
+       return -ENOSPC;
+}
+
+/**
+ * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates and returns the number of eraseblocks which should
+ * be kept for index usage.
+ */
+int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
+{
+       int ret;
+       uint64_t idx_size;
+
+       idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
+
+       /* And make sure we have twice the index size of space reserved */
+       idx_size <<= 1;
+
+       /*
+        * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
+        * pair, nor similarly the two variables for the new index size, so we
+        * have to do this costly 64-bit division on fast-path.
+        */
+       if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
+               ret = idx_size + 1;
+       else
+               ret = idx_size;
+       /*
+        * The index head is not available for the in-the-gaps method, so add an
+        * extra LEB to compensate.
+        */
+       ret += 1;
+       /*
+        * At present the index needs at least 2 LEBs: one for the index head
+        * and one for in-the-gaps method (which currently does not cater for
+        * the index head and so excludes it from consideration).
+        */
+       if (ret < 2)
+               ret = 2;
+       return ret;
+}
+
+/**
+ * ubifs_calc_available - calculate available FS space.
+ * @c: UBIFS file-system description object
+ * @min_idx_lebs: minimum number of LEBs reserved for the index
+ *
+ * This function calculates and returns amount of FS space available for use.
+ */
+long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
+{
+       int subtract_lebs;
+       long long available;
+
+       /*
+        * Force the amount available to the total size reported if the used
+        * space is zero.
+        */
+       if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
+           c->budg_data_growth + c->budg_dd_growth == 0) {
+               /* Do the same calculation as for c->block_cnt */
+               available = c->main_lebs - 2;
+               available *= c->leb_size - c->dark_wm;
+               return available;
+       }
+
+       available = c->main_bytes - c->lst.total_used;
+
+       /*
+        * Now 'available' contains theoretically available flash space
+        * assuming there is no index, so we have to subtract the space which
+        * is reserved for the index.
+        */
+       subtract_lebs = min_idx_lebs;
+
+       /* Take into account that GC reserves one LEB for its own needs */
+       subtract_lebs += 1;
+
+       /*
+        * The GC journal head LEB is not really accessible. And since
+        * different write types go to different heads, we may count only on
+        * one head's space.
+        */
+       subtract_lebs += c->jhead_cnt - 1;
+
+       /* We also reserve one LEB for deletions, which bypass budgeting */
+       subtract_lebs += 1;
+
+       available -= (long long)subtract_lebs * c->leb_size;
+
+       /* Subtract the dead space which is not available for use */
+       available -= c->lst.total_dead;
+
+       /*
+        * Subtract dark space, which might or might not be usable - it depends
+        * on the data which we have on the media and which will be written. If
+        * this is a lot of uncompressed or not-compressible data, the dark
+        * space cannot be used.
+        */
+       available -= c->lst.total_dark;
+
+       /*
+        * However, there is more dark space. The index may be bigger than
+        * @min_idx_lebs. Those extra LEBs are assumed to be available, but
+        * their dark space is not included in total_dark, so it is subtracted
+        * here.
+        */
+       if (c->lst.idx_lebs > min_idx_lebs) {
+               subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
+               available -= subtract_lebs * c->dark_wm;
+       }
+
+       /* The calculations are rough and may end up with a negative number */
+       return available > 0 ? available : 0;
+}
+
+/**
+ * can_use_rp - check whether the user is allowed to use reserved pool.
+ * @c: UBIFS file-system description object
+ *
+ * UBIFS has so-called "reserved pool" which is flash space reserved
+ * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
+ * This function checks whether current user is allowed to use reserved pool.
+ * Returns %1  current user is allowed to use reserved pool and %0 otherwise.
+ */
+static int can_use_rp(struct ubifs_info *c)
+{
+       if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
+           (c->rp_gid != 0 && in_group_p(c->rp_gid)))
+               return 1;
+       return 0;
+}
+
+/**
+ * do_budget_space - reserve flash space for index and data growth.
+ * @c: UBIFS file-system description object
+ *
+ * This function makes sure UBIFS has enough free eraseblocks for index growth
+ * and data.
+ *
+ * When budgeting index space, UBIFS reserves twice as more LEBs as the index
+ * would take if it was consolidated and written to the flash. This guarantees
+ * that the "in-the-gaps" commit method always succeeds and UBIFS will always
+ * be able to commit dirty index. So this function basically adds amount of
+ * budgeted index space to the size of the current index, multiplies this by 2,
+ * and makes sure this does not exceed the amount of free eraseblocks.
+ *
+ * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
+ * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
+ *    be large, because UBIFS does not do any index consolidation as long as
+ *    there is free space. IOW, the index may take a lot of LEBs, but the LEBs
+ *    will contain a lot of dirt.
+ * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
+ *   consolidated to take up to @c->min_idx_lebs LEBs.
+ *
+ * This function returns zero in case of success, and %-ENOSPC in case of
+ * failure.
+ */
+static int do_budget_space(struct ubifs_info *c)
+{
+       long long outstanding, available;
+       int lebs, rsvd_idx_lebs, min_idx_lebs;
+
+       /* First budget index space */
+       min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+       /* Now 'min_idx_lebs' contains number of LEBs to reserve */
+       if (min_idx_lebs > c->lst.idx_lebs)
+               rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+       else
+               rsvd_idx_lebs = 0;
+
+       /*
+        * The number of LEBs that are available to be used by the index is:
+        *
+        *    @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
+        *    @c->lst.taken_empty_lebs
+        *
+        * @empty_lebs are available because they are empty. @freeable_cnt are
+        * available because they contain only free and dirty space and the
+        * index allocation always occurs after wbufs are synch'ed.
+        * @idx_gc_cnt are available because they are index LEBs that have been
+        * garbage collected (including trivial GC) and are awaiting the commit
+        * before they can be unmapped - note that the in-the-gaps method will
+        * grab these if it needs them. @taken_empty_lebs are empty_lebs that
+        * have already been allocated for some purpose (also includes those
+        * LEBs on the @idx_gc list).
+        *
+        * Note, @taken_empty_lebs may temporarily be higher by one because of
+        * the way we serialize LEB allocations and budgeting. See a comment in
+        * 'ubifs_find_free_space()'.
+        */
+       lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+              c->lst.taken_empty_lebs;
+       if (unlikely(rsvd_idx_lebs > lebs)) {
+               dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
+                        "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
+                        rsvd_idx_lebs);
+               return -ENOSPC;
+       }
+
+       available = ubifs_calc_available(c, min_idx_lebs);
+       outstanding = c->budg_data_growth + c->budg_dd_growth;
+
+       if (unlikely(available < outstanding)) {
+               dbg_budg("out of data space: available %lld, outstanding %lld",
+                        available, outstanding);
+               return -ENOSPC;
+       }
+
+       if (available - outstanding <= c->rp_size && !can_use_rp(c))
+               return -ENOSPC;
+
+       c->min_idx_lebs = min_idx_lebs;
+       return 0;
+}
+
+/**
+ * calc_idx_growth - calculate approximate index growth from budgeting request.
+ * @c: UBIFS file-system description object
+ * @req: budgeting request
+ *
+ * For now we assume each new node adds one znode. But this is rather poor
+ * approximation, though.
+ */
+static int calc_idx_growth(const struct ubifs_info *c,
+                          const struct ubifs_budget_req *req)
+{
+       int znodes;
+
+       znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
+                req->new_dent;
+       return znodes * c->max_idx_node_sz;
+}
+
+/**
+ * calc_data_growth - calculate approximate amount of new data from budgeting
+ * request.
+ * @c: UBIFS file-system description object
+ * @req: budgeting request
+ */
+static int calc_data_growth(const struct ubifs_info *c,
+                           const struct ubifs_budget_req *req)
+{
+       int data_growth;
+
+       data_growth = req->new_ino  ? c->inode_budget : 0;
+       if (req->new_page)
+               data_growth += c->page_budget;
+       if (req->new_dent)
+               data_growth += c->dent_budget;
+       data_growth += req->new_ino_d;
+       return data_growth;
+}
+
+/**
+ * calc_dd_growth - calculate approximate amount of data which makes other data
+ * dirty from budgeting request.
+ * @c: UBIFS file-system description object
+ * @req: budgeting request
+ */
+static int calc_dd_growth(const struct ubifs_info *c,
+                         const struct ubifs_budget_req *req)
+{
+       int dd_growth;
+
+       dd_growth = req->dirtied_page ? c->page_budget : 0;
+
+       if (req->dirtied_ino)
+               dd_growth += c->inode_budget << (req->dirtied_ino - 1);
+       if (req->mod_dent)
+               dd_growth += c->dent_budget;
+       dd_growth += req->dirtied_ino_d;
+       return dd_growth;
+}
+
+/**
+ * ubifs_budget_space - ensure there is enough space to complete an operation.
+ * @c: UBIFS file-system description object
+ * @req: budget request
+ *
+ * This function allocates budget for an operation. It uses pessimistic
+ * approximation of how much flash space the operation needs. The goal of this
+ * function is to make sure UBIFS always has flash space to flush all dirty
+ * pages, dirty inodes, and dirty znodes (liability). This function may force
+ * commit, garbage-collection or write-back. Returns zero in case of success,
+ * %-ENOSPC if there is no free space and other negative error codes in case of
+ * failures.
+ */
+int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
+{
+       int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
+       int err, idx_growth, data_growth, dd_growth;
+       struct retries_info ri;
+
+       ubifs_assert(req->dirtied_ino <= 4);
+       ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+
+       data_growth = calc_data_growth(c, req);
+       dd_growth = calc_dd_growth(c, req);
+       if (!data_growth && !dd_growth)
+               return 0;
+       idx_growth = calc_idx_growth(c, req);
+       memset(&ri, 0, sizeof(struct retries_info));
+
+again:
+       spin_lock(&c->space_lock);
+       ubifs_assert(c->budg_idx_growth >= 0);
+       ubifs_assert(c->budg_data_growth >= 0);
+       ubifs_assert(c->budg_dd_growth >= 0);
+
+       if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
+               dbg_budg("no space");
+               spin_unlock(&c->space_lock);
+               return -ENOSPC;
+       }
+
+       c->budg_idx_growth += idx_growth;
+       c->budg_data_growth += data_growth;
+       c->budg_dd_growth += dd_growth;
+
+       err = do_budget_space(c);
+       if (likely(!err)) {
+               req->idx_growth = idx_growth;
+               req->data_growth = data_growth;
+               req->dd_growth = dd_growth;
+               spin_unlock(&c->space_lock);
+               return 0;
+       }
+
+       /* Restore the old values */
+       c->budg_idx_growth -= idx_growth;
+       c->budg_data_growth -= data_growth;
+       c->budg_dd_growth -= dd_growth;
+       spin_unlock(&c->space_lock);
+
+       if (req->fast) {
+               dbg_budg("no space for fast budgeting");
+               return err;
+       }
+
+       err = make_free_space(c, &ri);
+       if (err == -EAGAIN) {
+               dbg_budg("try again");
+               cond_resched();
+               goto again;
+       } else if (err == -ENOSPC) {
+               dbg_budg("FS is full, -ENOSPC");
+               c->nospace = 1;
+               if (can_use_rp(c) || c->rp_size == 0)
+                       c->nospace_rp = 1;
+               smp_wmb();
+       } else
+               ubifs_err("cannot budget space, error %d", err);
+       return err;
+}
+
+/**
+ * ubifs_release_budget - release budgeted free space.
+ * @c: UBIFS file-system description object
+ * @req: budget request
+ *
+ * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
+ * since the index changes (which were budgeted for in @req->idx_growth) will
+ * only be written to the media on commit, this function moves the index budget
+ * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
+ * zeroed by the commit operation.
+ */
+void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
+{
+       ubifs_assert(req->dirtied_ino <= 4);
+       ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+       if (!req->recalculate) {
+               ubifs_assert(req->idx_growth >= 0);
+               ubifs_assert(req->data_growth >= 0);
+               ubifs_assert(req->dd_growth >= 0);
+       }
+
+       if (req->recalculate) {
+               req->data_growth = calc_data_growth(c, req);
+               req->dd_growth = calc_dd_growth(c, req);
+               req->idx_growth = calc_idx_growth(c, req);
+       }
+
+       if (!req->data_growth && !req->dd_growth)
+               return;
+
+       c->nospace = c->nospace_rp = 0;
+       smp_wmb();
+
+       spin_lock(&c->space_lock);
+       c->budg_idx_growth -= req->idx_growth;
+       c->budg_uncommitted_idx += req->idx_growth;
+       c->budg_data_growth -= req->data_growth;
+       c->budg_dd_growth -= req->dd_growth;
+       c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+       ubifs_assert(c->budg_idx_growth >= 0);
+       ubifs_assert(c->budg_data_growth >= 0);
+       ubifs_assert(c->min_idx_lebs < c->main_lebs);
+       spin_unlock(&c->space_lock);
+}
+
+/**
+ * ubifs_convert_page_budget - convert budget of a new page.
+ * @c: UBIFS file-system description object
+ *
+ * This function converts budget which was allocated for a new page of data to
+ * the budget of changing an existing page of data. The latter is smaller then
+ * the former, so this function only does simple re-calculation and does not
+ * involve any write-back.
+ */
+void ubifs_convert_page_budget(struct ubifs_info *c)
+{
+       spin_lock(&c->space_lock);
+       /* Release the index growth reservation */
+       c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+       /* Release the data growth reservation */
+       c->budg_data_growth -= c->page_budget;
+       /* Increase the dirty data growth reservation instead */
+       c->budg_dd_growth += c->page_budget;
+       /* And re-calculate the indexing space reservation */
+       c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+       spin_unlock(&c->space_lock);
+}
+
+/**
+ * ubifs_release_dirty_inode_budget - release dirty inode budget.
+ * @c: UBIFS file-system description object
+ * @ui: UBIFS inode to release the budget for
+ *
+ * This function releases budget corresponding to a dirty inode. It is usually
+ * called when after the inode has been written to the media and marked as
+ * clean.
+ */
+void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
+                                     struct ubifs_inode *ui)
+{
+       struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
+                                      .dirtied_ino_d = ui->data_len};
+
+       ubifs_release_budget(c, &req);
+}
+
+/**
+ * ubifs_budg_get_free_space - return amount of free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns amount of free space on the file-system.
+ */
+long long ubifs_budg_get_free_space(struct ubifs_info *c)
+{
+       int min_idx_lebs, rsvd_idx_lebs;
+       long long available, outstanding, free;
+
+       /* Do exactly the same calculations as in 'do_budget_space()' */
+       spin_lock(&c->space_lock);
+       min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+       if (min_idx_lebs > c->lst.idx_lebs)
+               rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+       else
+               rsvd_idx_lebs = 0;
+
+       if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
+                               - c->lst.taken_empty_lebs) {
+               spin_unlock(&c->space_lock);
+               return 0;
+       }
+
+       available = ubifs_calc_available(c, min_idx_lebs);
+       outstanding = c->budg_data_growth + c->budg_dd_growth;
+       c->min_idx_lebs = min_idx_lebs;
+       spin_unlock(&c->space_lock);
+
+       if (available > outstanding)
+               free = ubifs_reported_space(c, available - outstanding);
+       else
+               free = 0;
+       return free;
+}
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c

new file mode 100644 (file)

index 0000000..3b51631
--- /dev/null
+++ b/fs/ubifs/commit.c
@@ -0,0 +1,677 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements functions that manage the running of the commit process.
+ * Each affected module has its own functions to accomplish their part in the
+ * commit and those functions are called here.
+ *
+ * The commit is the process whereby all updates to the index and LEB properties
+ * are written out together and the journal becomes empty. This keeps the
+ * file system consistent - at all times the state can be recreated by reading
+ * the index and LEB properties and then replaying the journal.
+ *
+ * The commit is split into two parts named "commit start" and "commit end".
+ * During commit start, the commit process has exclusive access to the journal
+ * by holding the commit semaphore down for writing. As few I/O operations as
+ * possible are performed during commit start, instead the nodes that are to be
+ * written are merely identified. During commit end, the commit semaphore is no
+ * longer held and the journal is again in operation, allowing users to continue
+ * to use the file system while the bulk of the commit I/O is performed. The
+ * purpose of this two-step approach is to prevent the commit from causing any
+ * latency blips. Note that in any case, the commit does not prevent lookups
+ * (as permitted by the TNC mutex), or access to VFS data structures e.g. page
+ * cache.
+ */
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include "ubifs.h"
+
+/**
+ * do_commit - commit the journal.
+ * @c: UBIFS file-system description object
+ *
+ * This function implements UBIFS commit. It has to be called with commit lock
+ * locked. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int do_commit(struct ubifs_info *c)
+{
+       int err, new_ltail_lnum, old_ltail_lnum, i;
+       struct ubifs_zbranch zroot;
+       struct ubifs_lp_stats lst;
+
+       dbg_cmt("start");
+       if (c->ro_media) {
+               err = -EROFS;
+               goto out_up;
+       }
+
+       /* Sync all write buffers (necessary for recovery) */
+       for (i = 0; i < c->jhead_cnt; i++) {
+               err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+               if (err)
+                       goto out_up;
+       }
+
+       err = ubifs_gc_start_commit(c);
+       if (err)
+               goto out_up;
+       err = dbg_check_lprops(c);
+       if (err)
+               goto out_up;
+       err = ubifs_log_start_commit(c, &new_ltail_lnum);
+       if (err)
+               goto out_up;
+       err = ubifs_tnc_start_commit(c, &zroot);
+       if (err)
+               goto out_up;
+       err = ubifs_lpt_start_commit(c);
+       if (err)
+               goto out_up;
+       err = ubifs_orphan_start_commit(c);
+       if (err)
+               goto out_up;
+
+       ubifs_get_lp_stats(c, &lst);
+
+       up_write(&c->commit_sem);
+
+       err = ubifs_tnc_end_commit(c);
+       if (err)
+               goto out;
+       err = ubifs_lpt_end_commit(c);
+       if (err)
+               goto out;
+       err = ubifs_orphan_end_commit(c);
+       if (err)
+               goto out;
+       old_ltail_lnum = c->ltail_lnum;
+       err = ubifs_log_end_commit(c, new_ltail_lnum);
+       if (err)
+               goto out;
+       err = dbg_check_old_index(c, &zroot);
+       if (err)
+               goto out;
+
+       mutex_lock(&c->mst_mutex);
+       c->mst_node->cmt_no      = cpu_to_le64(++c->cmt_no);
+       c->mst_node->log_lnum    = cpu_to_le32(new_ltail_lnum);
+       c->mst_node->root_lnum   = cpu_to_le32(zroot.lnum);
+       c->mst_node->root_offs   = cpu_to_le32(zroot.offs);
+       c->mst_node->root_len    = cpu_to_le32(zroot.len);
+       c->mst_node->ihead_lnum  = cpu_to_le32(c->ihead_lnum);
+       c->mst_node->ihead_offs  = cpu_to_le32(c->ihead_offs);
+       c->mst_node->index_size  = cpu_to_le64(c->old_idx_sz);
+       c->mst_node->lpt_lnum    = cpu_to_le32(c->lpt_lnum);
+       c->mst_node->lpt_offs    = cpu_to_le32(c->lpt_offs);
+       c->mst_node->nhead_lnum  = cpu_to_le32(c->nhead_lnum);
+       c->mst_node->nhead_offs  = cpu_to_le32(c->nhead_offs);
+       c->mst_node->ltab_lnum   = cpu_to_le32(c->ltab_lnum);
+       c->mst_node->ltab_offs   = cpu_to_le32(c->ltab_offs);
+       c->mst_node->lsave_lnum  = cpu_to_le32(c->lsave_lnum);
+       c->mst_node->lsave_offs  = cpu_to_le32(c->lsave_offs);
+       c->mst_node->lscan_lnum  = cpu_to_le32(c->lscan_lnum);
+       c->mst_node->empty_lebs  = cpu_to_le32(lst.empty_lebs);
+       c->mst_node->idx_lebs    = cpu_to_le32(lst.idx_lebs);
+       c->mst_node->total_free  = cpu_to_le64(lst.total_free);
+       c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty);
+       c->mst_node->total_used  = cpu_to_le64(lst.total_used);
+       c->mst_node->total_dead  = cpu_to_le64(lst.total_dead);
+       c->mst_node->total_dark  = cpu_to_le64(lst.total_dark);
+       if (c->no_orphs)
+               c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+       else
+               c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
+       err = ubifs_write_master(c);
+       mutex_unlock(&c->mst_mutex);
+       if (err)
+               goto out;
+
+       err = ubifs_log_post_commit(c, old_ltail_lnum);
+       if (err)
+               goto out;
+       err = ubifs_gc_end_commit(c);
+       if (err)
+               goto out;
+       err = ubifs_lpt_post_commit(c);
+       if (err)
+               goto out;
+
+       spin_lock(&c->cs_lock);
+       c->cmt_state = COMMIT_RESTING;
+       wake_up(&c->cmt_wq);
+       dbg_cmt("commit end");
+       spin_unlock(&c->cs_lock);
+
+       return 0;
+
+out_up:
+       up_write(&c->commit_sem);
+out:
+       ubifs_err("commit failed, error %d", err);
+       spin_lock(&c->cs_lock);
+       c->cmt_state = COMMIT_BROKEN;
+       wake_up(&c->cmt_wq);
+       spin_unlock(&c->cs_lock);
+       ubifs_ro_mode(c, err);
+       return err;
+}
+
+/**
+ * run_bg_commit - run background commit if it is needed.
+ * @c: UBIFS file-system description object
+ *
+ * This function runs background commit if it is needed. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int run_bg_commit(struct ubifs_info *c)
+{
+       spin_lock(&c->cs_lock);
+       /*
+        * Run background commit only if background commit was requested or if
+        * commit is required.
+        */
+       if (c->cmt_state != COMMIT_BACKGROUND &&
+           c->cmt_state != COMMIT_REQUIRED)
+               goto out;
+       spin_unlock(&c->cs_lock);
+
+       down_write(&c->commit_sem);
+       spin_lock(&c->cs_lock);
+       if (c->cmt_state == COMMIT_REQUIRED)
+               c->cmt_state = COMMIT_RUNNING_REQUIRED;
+       else if (c->cmt_state == COMMIT_BACKGROUND)
+               c->cmt_state = COMMIT_RUNNING_BACKGROUND;
+       else
+               goto out_cmt_unlock;
+       spin_unlock(&c->cs_lock);
+
+       return do_commit(c);
+
+out_cmt_unlock:
+       up_write(&c->commit_sem);
+out:
+       spin_unlock(&c->cs_lock);
+       return 0;
+}
+
+/**
+ * ubifs_bg_thread - UBIFS background thread function.
+ * @info: points to the file-system description object
+ *
+ * This function implements various file-system background activities:
+ * o when a write-buffer timer expires it synchronizes the appropriate
+ *   write-buffer;
+ * o when the journal is about to be full, it starts in-advance commit.
+ *
+ * Note, other stuff like background garbage collection may be added here in
+ * future.
+ */
+int ubifs_bg_thread(void *info)
+{
+       int err;
+       struct ubifs_info *c = info;
+
+       ubifs_msg("background thread \"%s\" started, PID %d",
+                 c->bgt_name, current->pid);
+       set_freezable();
+
+       while (1) {
+               if (kthread_should_stop())
+                       break;
+
+               if (try_to_freeze())
+                       continue;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               /* Check if there is something to do */
+               if (!c->need_bgt) {
+                       /*
+                        * Nothing prevents us from going sleep now and
+                        * be never woken up and block the task which
+                        * could wait in 'kthread_stop()' forever.
+                        */
+                       if (kthread_should_stop())
+                               break;
+                       schedule();
+                       continue;
+               } else
+                       __set_current_state(TASK_RUNNING);
+
+               c->need_bgt = 0;
+               err = ubifs_bg_wbufs_sync(c);
+               if (err)
+                       ubifs_ro_mode(c, err);
+
+               run_bg_commit(c);
+               cond_resched();
+       }
+
+       dbg_msg("background thread \"%s\" stops", c->bgt_name);
+       return 0;
+}
+
+/**
+ * ubifs_commit_required - set commit state to "required".
+ * @c: UBIFS file-system description object
+ *
+ * This function is called if a commit is required but cannot be done from the
+ * calling function, so it is just flagged instead.
+ */
+void ubifs_commit_required(struct ubifs_info *c)
+{
+       spin_lock(&c->cs_lock);
+       switch (c->cmt_state) {
+       case COMMIT_RESTING:
+       case COMMIT_BACKGROUND:
+               dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
+                       dbg_cstate(COMMIT_REQUIRED));
+               c->cmt_state = COMMIT_REQUIRED;
+               break;
+       case COMMIT_RUNNING_BACKGROUND:
+               dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
+                       dbg_cstate(COMMIT_RUNNING_REQUIRED));
+               c->cmt_state = COMMIT_RUNNING_REQUIRED;
+               break;
+       case COMMIT_REQUIRED:
+       case COMMIT_RUNNING_REQUIRED:
+       case COMMIT_BROKEN:
+               break;
+       }
+       spin_unlock(&c->cs_lock);
+}
+
+/**
+ * ubifs_request_bg_commit - notify the background thread to do a commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function is called if the journal is full enough to make a commit
+ * worthwhile, so background thread is kicked to start it.
+ */
+void ubifs_request_bg_commit(struct ubifs_info *c)
+{
+       spin_lock(&c->cs_lock);
+       if (c->cmt_state == COMMIT_RESTING) {
+               dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
+                       dbg_cstate(COMMIT_BACKGROUND));
+               c->cmt_state = COMMIT_BACKGROUND;
+               spin_unlock(&c->cs_lock);
+               ubifs_wake_up_bgt(c);
+       } else
+               spin_unlock(&c->cs_lock);
+}
+
+/**
+ * wait_for_commit - wait for commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function sleeps until the commit operation is no longer running.
+ */
+static int wait_for_commit(struct ubifs_info *c)
+{
+       dbg_cmt("pid %d goes sleep", current->pid);
+
+       /*
+        * The following sleeps if the condition is false, and will be woken
+        * when the commit ends. It is possible, although very unlikely, that we
+        * will wake up and see the subsequent commit running, rather than the
+        * one we were waiting for, and go back to sleep.  However, we will be
+        * woken again, so there is no danger of sleeping forever.
+        */
+       wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND &&
+                             c->cmt_state != COMMIT_RUNNING_REQUIRED);
+       dbg_cmt("commit finished, pid %d woke up", current->pid);
+       return 0;
+}
+
+/**
+ * ubifs_run_commit - run or wait for commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function runs commit and returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+int ubifs_run_commit(struct ubifs_info *c)
+{
+       int err = 0;
+
+       spin_lock(&c->cs_lock);
+       if (c->cmt_state == COMMIT_BROKEN) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
+               /*
+                * We set the commit state to 'running required' to indicate
+                * that we want it to complete as quickly as possible.
+                */
+               c->cmt_state = COMMIT_RUNNING_REQUIRED;
+
+       if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
+               spin_unlock(&c->cs_lock);
+               return wait_for_commit(c);
+       }
+       spin_unlock(&c->cs_lock);
+
+       /* Ok, the commit is indeed needed */
+
+       down_write(&c->commit_sem);
+       spin_lock(&c->cs_lock);
+       /*
+        * Since we unlocked 'c->cs_lock', the state may have changed, so
+        * re-check it.
+        */
+       if (c->cmt_state == COMMIT_BROKEN) {
+               err = -EINVAL;
+               goto out_cmt_unlock;
+       }
+
+       if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
+               c->cmt_state = COMMIT_RUNNING_REQUIRED;
+
+       if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
+               up_write(&c->commit_sem);
+               spin_unlock(&c->cs_lock);
+               return wait_for_commit(c);
+       }
+       c->cmt_state = COMMIT_RUNNING_REQUIRED;
+       spin_unlock(&c->cs_lock);
+
+       err = do_commit(c);
+       return err;
+
+out_cmt_unlock:
+       up_write(&c->commit_sem);
+out:
+       spin_unlock(&c->cs_lock);
+       return err;
+}
+
+/**
+ * ubifs_gc_should_commit - determine if it is time for GC to run commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function is called by garbage collection to determine if commit should
+ * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal
+ * is full enough to start commit, this function returns true. It is not
+ * absolutely necessary to commit yet, but it feels like this should be better
+ * then to keep doing GC. This function returns %1 if GC has to initiate commit
+ * and %0 if not.
+ */
+int ubifs_gc_should_commit(struct ubifs_info *c)
+{
+       int ret = 0;
+
+       spin_lock(&c->cs_lock);
+       if (c->cmt_state == COMMIT_BACKGROUND) {
+               dbg_cmt("commit required now");
+               c->cmt_state = COMMIT_REQUIRED;
+       } else
+               dbg_cmt("commit not requested");
+       if (c->cmt_state == COMMIT_REQUIRED)
+               ret = 1;
+       spin_unlock(&c->cs_lock);
+       return ret;
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+/**
+ * struct idx_node - hold index nodes during index tree traversal.
+ * @list: list
+ * @iip: index in parent (slot number of this indexing node in the parent
+ *       indexing node)
+ * @upper_key: all keys in this indexing node have to be less or equivalent to
+ *             this key
+ * @idx: index node (8-byte aligned because all node structures must be 8-byte
+ *       aligned)
+ */
+struct idx_node {
+       struct list_head list;
+       int iip;
+       union ubifs_key upper_key;
+       struct ubifs_idx_node idx __attribute__((aligned(8)));
+};
+
+/**
+ * dbg_old_index_check_init - get information for the next old index check.
+ * @c: UBIFS file-system description object
+ * @zroot: root of the index
+ *
+ * This function records information about the index that will be needed for the
+ * next old index check i.e. 'dbg_check_old_index()'.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
+{
+       struct ubifs_idx_node *idx;
+       int lnum, offs, len, err = 0;
+
+       c->old_zroot = *zroot;
+
+       lnum = c->old_zroot.lnum;
+       offs = c->old_zroot.offs;
+       len = c->old_zroot.len;
+
+       idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
+       if (!idx)
+               return -ENOMEM;
+
+       err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
+       if (err)
+               goto out;
+
+       c->old_zroot_level = le16_to_cpu(idx->level);
+       c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
+out:
+       kfree(idx);
+       return err;
+}
+
+/**
+ * dbg_check_old_index - check the old copy of the index.
+ * @c: UBIFS file-system description object
+ * @zroot: root of the new index
+ *
+ * In order to be able to recover from an unclean unmount, a complete copy of
+ * the index must exist on flash. This is the "old" index. The commit process
+ * must write the "new" index to flash without overwriting or destroying any
+ * part of the old index. This function is run at commit end in order to check
+ * that the old index does indeed exist completely intact.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
+{
+       int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
+       int first = 1, iip;
+       union ubifs_key lower_key, upper_key, l_key, u_key;
+       unsigned long long uninitialized_var(last_sqnum);
+       struct ubifs_idx_node *idx;
+       struct list_head list;
+       struct idx_node *i;
+       size_t sz;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
+               goto out;
+
+       INIT_LIST_HEAD(&list);
+
+       sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) -
+            UBIFS_IDX_NODE_SZ;
+
+       /* Start at the old zroot */
+       lnum = c->old_zroot.lnum;
+       offs = c->old_zroot.offs;
+       len = c->old_zroot.len;
+       iip = 0;
+
+       /*
+        * Traverse the index tree preorder depth-first i.e. do a node and then
+        * its subtrees from left to right.
+        */
+       while (1) {
+               struct ubifs_branch *br;
+
+               /* Get the next index node */
+               i = kmalloc(sz, GFP_NOFS);
+               if (!i) {
+                       err = -ENOMEM;
+                       goto out_free;
+               }
+               i->iip = iip;
+               /* Keep the index nodes on our path in a linked list */
+               list_add_tail(&i->list, &list);
+               /* Read the index node */
+               idx = &i->idx;
+               err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
+               if (err)
+                       goto out_free;
+               /* Validate index node */
+               child_cnt = le16_to_cpu(idx->child_cnt);
+               if (child_cnt < 1 || child_cnt > c->fanout) {
+                       err = 1;
+                       goto out_dump;
+               }
+               if (first) {
+                       first = 0;
+                       /* Check root level and sqnum */
+                       if (le16_to_cpu(idx->level) != c->old_zroot_level) {
+                               err = 2;
+                               goto out_dump;
+                       }
+                       if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) {
+                               err = 3;
+                               goto out_dump;
+                       }
+                       /* Set last values as though root had a parent */
+                       last_level = le16_to_cpu(idx->level) + 1;
+                       last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1;
+                       key_read(c, ubifs_idx_key(c, idx), &lower_key);
+                       highest_ino_key(c, &upper_key, INUM_WATERMARK);
+               }
+               key_copy(c, &upper_key, &i->upper_key);
+               if (le16_to_cpu(idx->level) != last_level - 1) {
+                       err = 3;
+                       goto out_dump;
+               }
+               /*
+                * The index is always written bottom up hence a child's sqnum
+                * is always less than the parents.
+                */
+               if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) {
+                       err = 4;
+                       goto out_dump;
+               }
+               /* Check key range */
+               key_read(c, ubifs_idx_key(c, idx), &l_key);
+               br = ubifs_idx_branch(c, idx, child_cnt - 1);
+               key_read(c, &br->key, &u_key);
+               if (keys_cmp(c, &lower_key, &l_key) > 0) {
+                       err = 5;
+                       goto out_dump;
+               }
+               if (keys_cmp(c, &upper_key, &u_key) < 0) {
+                       err = 6;
+                       goto out_dump;
+               }
+               if (keys_cmp(c, &upper_key, &u_key) == 0)
+                       if (!is_hash_key(c, &u_key)) {
+                               err = 7;
+                               goto out_dump;
+                       }
+               /* Go to next index node */
+               if (le16_to_cpu(idx->level) == 0) {
+                       /* At the bottom, so go up until can go right */
+                       while (1) {
+                               /* Drop the bottom of the list */
+                               list_del(&i->list);
+                               kfree(i);
+                               /* No more list means we are done */
+                               if (list_empty(&list))
+                                       goto out;
+                               /* Look at the new bottom */
+                               i = list_entry(list.prev, struct idx_node,
+                                              list);
+                               idx = &i->idx;
+                               /* Can we go right */
+                               if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
+                                       iip = iip + 1;
+                                       break;
+                               } else
+                                       /* Nope, so go up again */
+                                       iip = i->iip;
+                       }
+               } else
+                       /* Go down left */
+                       iip = 0;
+               /*
+                * We have the parent in 'idx' and now we set up for reading the
+                * child pointed to by slot 'iip'.
+                */
+               last_level = le16_to_cpu(idx->level);
+               last_sqnum = le64_to_cpu(idx->ch.sqnum);
+               br = ubifs_idx_branch(c, idx, iip);
+               lnum = le32_to_cpu(br->lnum);
+               offs = le32_to_cpu(br->offs);
+               len = le32_to_cpu(br->len);
+               key_read(c, &br->key, &lower_key);
+               if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
+                       br = ubifs_idx_branch(c, idx, iip + 1);
+                       key_read(c, &br->key, &upper_key);
+               } else
+                       key_copy(c, &i->upper_key, &upper_key);
+       }
+out:
+       err = dbg_old_index_check_init(c, zroot);
+       if (err)
+               goto out_free;
+
+       return 0;
+
+out_dump:
+       dbg_err("dumping index node (iip=%d)", i->iip);
+       dbg_dump_node(c, idx);
+       list_del(&i->list);
+       kfree(i);
+       if (!list_empty(&list)) {
+               i = list_entry(list.prev, struct idx_node, list);
+               dbg_err("dumping parent index node");
+               dbg_dump_node(c, &i->idx);
+       }
+out_free:
+       while (!list_empty(&list)) {
+               i = list_entry(list.next, struct idx_node, list);
+               list_del(&i->list);
+               kfree(i);
+       }
+       ubifs_err("failed, error %d", err);
+       if (err > 0)
+               err = -EINVAL;
+       return err;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c

new file mode 100644 (file)

index 0000000..5bb51da
--- /dev/null
+++ b/fs/ubifs/compress.c
@@ -0,0 +1,253 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ * Copyright (C) 2006, 2007 University of Szeged, Hungary
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ *          Zoltan Sogor
+ */
+
+/*
+ * This file provides a single place to access to compression and
+ * decompression.
+ */
+
+#include <linux/crypto.h>
+#include "ubifs.h"
+
+/* Fake description object for the "none" compressor */
+static struct ubifs_compressor none_compr = {
+       .compr_type = UBIFS_COMPR_NONE,
+       .name = "no compression",
+       .capi_name = "",
+};
+
+#ifdef CONFIG_UBIFS_FS_LZO
+static DEFINE_MUTEX(lzo_mutex);
+
+static struct ubifs_compressor lzo_compr = {
+       .compr_type = UBIFS_COMPR_LZO,
+       .comp_mutex = &lzo_mutex,
+       .name = "LZO",
+       .capi_name = "lzo",
+};
+#else
+static struct ubifs_compressor lzo_compr = {
+       .compr_type = UBIFS_COMPR_LZO,
+       .name = "LZO",
+};
+#endif
+
+#ifdef CONFIG_UBIFS_FS_ZLIB
+static DEFINE_MUTEX(deflate_mutex);
+static DEFINE_MUTEX(inflate_mutex);
+
+static struct ubifs_compressor zlib_compr = {
+       .compr_type = UBIFS_COMPR_ZLIB,
+       .comp_mutex = &deflate_mutex,
+       .decomp_mutex = &inflate_mutex,
+       .name = "zlib",
+       .capi_name = "deflate",
+};
+#else
+static struct ubifs_compressor zlib_compr = {
+       .compr_type = UBIFS_COMPR_ZLIB,
+       .name = "zlib",
+};
+#endif
+
+/* All UBIFS compressors */
+struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
+
+/**
+ * ubifs_compress - compress data.
+ * @in_buf: data to compress
+ * @in_len: length of the data to compress
+ * @out_buf: output buffer where compressed data should be stored
+ * @out_len: output buffer length is returned here
+ * @compr_type: type of compression to use on enter, actually used compression
+ *              type on exit
+ *
+ * This function compresses input buffer @in_buf of length @in_len and stores
+ * the result in the output buffer @out_buf and the resulting length in
+ * @out_len. If the input buffer does not compress, it is just copied to the
+ * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if
+ * compression error occurred.
+ *
+ * Note, if the input buffer was not compressed, it is copied to the output
+ * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
+                   int *compr_type)
+{
+       int err;
+       struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
+
+       if (*compr_type == UBIFS_COMPR_NONE)
+               goto no_compr;
+
+       /* If the input data is small, do not even try to compress it */
+       if (in_len < UBIFS_MIN_COMPR_LEN)
+               goto no_compr;
+
+       if (compr->comp_mutex)
+               mutex_lock(compr->comp_mutex);
+       err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
+                                  out_len);
+       if (compr->comp_mutex)
+               mutex_unlock(compr->comp_mutex);
+       if (unlikely(err)) {
+               ubifs_warn("cannot compress %d bytes, compressor %s, "
+                          "error %d, leave data uncompressed",
+                          in_len, compr->name, err);
+                goto no_compr;
+       }
+
+       /*
+        * Presently, we just require that compression results in less data,
+        * rather than any defined minimum compression ratio or amount.
+        */
+       if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8))
+               goto no_compr;
+
+       return;
+
+no_compr:
+       memcpy(out_buf, in_buf, in_len);
+       *out_len = in_len;
+       *compr_type = UBIFS_COMPR_NONE;
+}
+
+/**
+ * ubifs_decompress - decompress data.
+ * @in_buf: data to decompress
+ * @in_len: length of the data to decompress
+ * @out_buf: output buffer where decompressed data should
+ * @out_len: output length is returned here
+ * @compr_type: type of compression
+ *
+ * This function decompresses data from buffer @in_buf into buffer @out_buf.
+ * The length of the uncompressed data is returned in @out_len. This functions
+ * returns %0 on success or a negative error code on failure.
+ */
+int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
+                    int *out_len, int compr_type)
+{
+       int err;
+       struct ubifs_compressor *compr;
+
+       if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
+               ubifs_err("invalid compression type %d", compr_type);
+               return -EINVAL;
+       }
+
+       compr = ubifs_compressors[compr_type];
+
+       if (unlikely(!compr->capi_name)) {
+               ubifs_err("%s compression is not compiled in", compr->name);
+               return -EINVAL;
+       }
+
+       if (compr_type == UBIFS_COMPR_NONE) {
+               memcpy(out_buf, in_buf, in_len);
+               *out_len = in_len;
+               return 0;
+       }
+
+       if (compr->decomp_mutex)
+               mutex_lock(compr->decomp_mutex);
+       err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
+                                    out_len);
+       if (compr->decomp_mutex)
+               mutex_unlock(compr->decomp_mutex);
+       if (err)
+               ubifs_err("cannot decompress %d bytes, compressor %s, "
+                         "error %d", in_len, compr->name, err);
+
+       return err;
+}
+
+/**
+ * compr_init - initialize a compressor.
+ * @compr: compressor description object
+ *
+ * This function initializes the requested compressor and returns zero in case
+ * of success or a negative error code in case of failure.
+ */
+static int __init compr_init(struct ubifs_compressor *compr)
+{
+       if (compr->capi_name) {
+               compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
+               if (IS_ERR(compr->cc)) {
+                       ubifs_err("cannot initialize compressor %s, error %ld",
+                                 compr->name, PTR_ERR(compr->cc));
+                       return PTR_ERR(compr->cc);
+               }
+       }
+
+       ubifs_compressors[compr->compr_type] = compr;
+       return 0;
+}
+
+/**
+ * compr_exit - de-initialize a compressor.
+ * @compr: compressor description object
+ */
+static void compr_exit(struct ubifs_compressor *compr)
+{
+       if (compr->capi_name)
+               crypto_free_comp(compr->cc);
+       return;
+}
+
+/**
+ * ubifs_compressors_init - initialize UBIFS compressors.
+ *
+ * This function initializes the compressor which were compiled in. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+int __init ubifs_compressors_init(void)
+{
+       int err;
+
+       err = compr_init(&lzo_compr);
+       if (err)
+               return err;
+
+       err = compr_init(&zlib_compr);
+       if (err)
+               goto out_lzo;
+
+       ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
+       return 0;
+
+out_lzo:
+       compr_exit(&lzo_compr);
+       return err;
+}
+
+/**
+ * ubifs_compressors_exit - de-initialize UBIFS compressors.
+ */
+void __exit ubifs_compressors_exit(void)
+{
+       compr_exit(&lzo_compr);
+       compr_exit(&zlib_compr);
+}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c

new file mode 100644 (file)

index 0000000..4e3aaeb
--- /dev/null
+++ b/fs/ubifs/debug.c
@@ -0,0 +1,2289 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements most of the debugging stuff which is compiled in only
+ * when it is enabled. But some debugging check functions are implemented in
+ * corresponding subsystem, just because they are closely related and utilize
+ * various local functions of those subsystems.
+ */
+
+#define UBIFS_DBG_PRESERVE_UBI
+
+#include "ubifs.h"
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+DEFINE_SPINLOCK(dbg_lock);
+
+static char dbg_key_buf0[128];
+static char dbg_key_buf1[128];
+
+unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT;
+unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
+unsigned int ubifs_tst_flags;
+
+module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
+
+MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
+MODULE_PARM_DESC(debug_chks, "Debug check flags");
+MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
+
+static const char *get_key_fmt(int fmt)
+{
+       switch (fmt) {
+       case UBIFS_SIMPLE_KEY_FMT:
+               return "simple";
+       default:
+               return "unknown/invalid format";
+       }
+}
+
+static const char *get_key_hash(int hash)
+{
+       switch (hash) {
+       case UBIFS_KEY_HASH_R5:
+               return "R5";
+       case UBIFS_KEY_HASH_TEST:
+               return "test";
+       default:
+               return "unknown/invalid name hash";
+       }
+}
+
+static const char *get_key_type(int type)
+{
+       switch (type) {
+       case UBIFS_INO_KEY:
+               return "inode";
+       case UBIFS_DENT_KEY:
+               return "direntry";
+       case UBIFS_XENT_KEY:
+               return "xentry";
+       case UBIFS_DATA_KEY:
+               return "data";
+       case UBIFS_TRUN_KEY:
+               return "truncate";
+       default:
+               return "unknown/invalid key";
+       }
+}
+
+static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
+                       char *buffer)
+{
+       char *p = buffer;
+       int type = key_type(c, key);
+
+       if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
+               switch (type) {
+               case UBIFS_INO_KEY:
+                       sprintf(p, "(%lu, %s)", key_inum(c, key),
+                              get_key_type(type));
+                       break;
+               case UBIFS_DENT_KEY:
+               case UBIFS_XENT_KEY:
+                       sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key),
+                               get_key_type(type), key_hash(c, key));
+                       break;
+               case UBIFS_DATA_KEY:
+                       sprintf(p, "(%lu, %s, %u)", key_inum(c, key),
+                               get_key_type(type), key_block(c, key));
+                       break;
+               case UBIFS_TRUN_KEY:
+                       sprintf(p, "(%lu, %s)",
+                               key_inum(c, key), get_key_type(type));
+                       break;
+               default:
+                       sprintf(p, "(bad key type: %#08x, %#08x)",
+                               key->u32[0], key->u32[1]);
+               }
+       } else
+               sprintf(p, "bad key format %d", c->key_fmt);
+}
+
+const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key)
+{
+       /* dbg_lock must be held */
+       sprintf_key(c, key, dbg_key_buf0);
+       return dbg_key_buf0;
+}
+
+const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key)
+{
+       /* dbg_lock must be held */
+       sprintf_key(c, key, dbg_key_buf1);
+       return dbg_key_buf1;
+}
+
+const char *dbg_ntype(int type)
+{
+       switch (type) {
+       case UBIFS_PAD_NODE:
+               return "padding node";
+       case UBIFS_SB_NODE:
+               return "superblock node";
+       case UBIFS_MST_NODE:
+               return "master node";
+       case UBIFS_REF_NODE:
+               return "reference node";
+       case UBIFS_INO_NODE:
+               return "inode node";
+       case UBIFS_DENT_NODE:
+               return "direntry node";
+       case UBIFS_XENT_NODE:
+               return "xentry node";
+       case UBIFS_DATA_NODE:
+               return "data node";
+       case UBIFS_TRUN_NODE:
+               return "truncate node";
+       case UBIFS_IDX_NODE:
+               return "indexing node";
+       case UBIFS_CS_NODE:
+               return "commit start node";
+       case UBIFS_ORPH_NODE:
+               return "orphan node";
+       default:
+               return "unknown node";
+       }
+}
+
+static const char *dbg_gtype(int type)
+{
+       switch (type) {
+       case UBIFS_NO_NODE_GROUP:
+               return "no node group";
+       case UBIFS_IN_NODE_GROUP:
+               return "in node group";
+       case UBIFS_LAST_OF_NODE_GROUP:
+               return "last of node group";
+       default:
+               return "unknown";
+       }
+}
+
+const char *dbg_cstate(int cmt_state)
+{
+       switch (cmt_state) {
+       case COMMIT_RESTING:
+               return "commit resting";
+       case COMMIT_BACKGROUND:
+               return "background commit requested";
+       case COMMIT_REQUIRED:
+               return "commit required";
+       case COMMIT_RUNNING_BACKGROUND:
+               return "BACKGROUND commit running";
+       case COMMIT_RUNNING_REQUIRED:
+               return "commit running and required";
+       case COMMIT_BROKEN:
+               return "broken commit";
+       default:
+               return "unknown commit state";
+       }
+}
+
+static void dump_ch(const struct ubifs_ch *ch)
+{
+       printk(KERN_DEBUG "\tmagic          %#x\n", le32_to_cpu(ch->magic));
+       printk(KERN_DEBUG "\tcrc            %#x\n", le32_to_cpu(ch->crc));
+       printk(KERN_DEBUG "\tnode_type      %d (%s)\n", ch->node_type,
+              dbg_ntype(ch->node_type));
+       printk(KERN_DEBUG "\tgroup_type     %d (%s)\n", ch->group_type,
+              dbg_gtype(ch->group_type));
+       printk(KERN_DEBUG "\tsqnum          %llu\n",
+              (unsigned long long)le64_to_cpu(ch->sqnum));
+       printk(KERN_DEBUG "\tlen            %u\n", le32_to_cpu(ch->len));
+}
+
+void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
+{
+       const struct ubifs_inode *ui = ubifs_inode(inode);
+
+       printk(KERN_DEBUG "inode      %lu\n", inode->i_ino);
+       printk(KERN_DEBUG "size       %llu\n",
+              (unsigned long long)i_size_read(inode));
+       printk(KERN_DEBUG "nlink      %u\n", inode->i_nlink);
+       printk(KERN_DEBUG "uid        %u\n", (unsigned int)inode->i_uid);
+       printk(KERN_DEBUG "gid        %u\n", (unsigned int)inode->i_gid);
+       printk(KERN_DEBUG "atime      %u.%u\n",
+              (unsigned int)inode->i_atime.tv_sec,
+              (unsigned int)inode->i_atime.tv_nsec);
+       printk(KERN_DEBUG "mtime      %u.%u\n",
+              (unsigned int)inode->i_mtime.tv_sec,
+              (unsigned int)inode->i_mtime.tv_nsec);
+       printk(KERN_DEBUG "ctime       %u.%u\n",
+              (unsigned int)inode->i_ctime.tv_sec,
+              (unsigned int)inode->i_ctime.tv_nsec);
+       printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum);
+       printk(KERN_DEBUG "xattr_size  %u\n", ui->xattr_size);
+       printk(KERN_DEBUG "xattr_cnt   %u\n", ui->xattr_cnt);
+       printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names);
+       printk(KERN_DEBUG "dirty       %u\n", ui->dirty);
+       printk(KERN_DEBUG "xattr       %u\n", ui->xattr);
+       printk(KERN_DEBUG "flags       %d\n", ui->flags);
+       printk(KERN_DEBUG "compr_type  %d\n", ui->compr_type);
+       printk(KERN_DEBUG "data_len    %d\n", ui->data_len);
+}
+
+void dbg_dump_node(const struct ubifs_info *c, const void *node)
+{
+       int i, n;
+       union ubifs_key key;
+       const struct ubifs_ch *ch = node;
+
+       if (dbg_failure_mode)
+               return;
+
+       /* If the magic is incorrect, just hexdump the first bytes */
+       if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
+               printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ);
+               print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+                              (void *)node, UBIFS_CH_SZ, 1);
+               return;
+       }
+
+       spin_lock(&dbg_lock);
+       dump_ch(node);
+
+       switch (ch->node_type) {
+       case UBIFS_PAD_NODE:
+       {
+               const struct ubifs_pad_node *pad = node;
+
+               printk(KERN_DEBUG "\tpad_len        %u\n",
+                      le32_to_cpu(pad->pad_len));
+               break;
+       }
+       case UBIFS_SB_NODE:
+       {
+               const struct ubifs_sb_node *sup = node;
+               unsigned int sup_flags = le32_to_cpu(sup->flags);
+
+               printk(KERN_DEBUG "\tkey_hash       %d (%s)\n",
+                      (int)sup->key_hash, get_key_hash(sup->key_hash));
+               printk(KERN_DEBUG "\tkey_fmt        %d (%s)\n",
+                      (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
+               printk(KERN_DEBUG "\tflags          %#x\n", sup_flags);
+               printk(KERN_DEBUG "\t  big_lpt      %u\n",
+                      !!(sup_flags & UBIFS_FLG_BIGLPT));
+               printk(KERN_DEBUG "\tmin_io_size    %u\n",
+                      le32_to_cpu(sup->min_io_size));
+               printk(KERN_DEBUG "\tleb_size       %u\n",
+                      le32_to_cpu(sup->leb_size));
+               printk(KERN_DEBUG "\tleb_cnt        %u\n",
+                      le32_to_cpu(sup->leb_cnt));
+               printk(KERN_DEBUG "\tmax_leb_cnt    %u\n",
+                      le32_to_cpu(sup->max_leb_cnt));
+               printk(KERN_DEBUG "\tmax_bud_bytes  %llu\n",
+                      (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
+               printk(KERN_DEBUG "\tlog_lebs       %u\n",
+                      le32_to_cpu(sup->log_lebs));
+               printk(KERN_DEBUG "\tlpt_lebs       %u\n",
+                      le32_to_cpu(sup->lpt_lebs));
+               printk(KERN_DEBUG "\torph_lebs      %u\n",
+                      le32_to_cpu(sup->orph_lebs));
+               printk(KERN_DEBUG "\tjhead_cnt      %u\n",
+                      le32_to_cpu(sup->jhead_cnt));
+               printk(KERN_DEBUG "\tfanout         %u\n",
+                      le32_to_cpu(sup->fanout));
+               printk(KERN_DEBUG "\tlsave_cnt      %u\n",
+                      le32_to_cpu(sup->lsave_cnt));
+               printk(KERN_DEBUG "\tdefault_compr  %u\n",
+                      (int)le16_to_cpu(sup->default_compr));
+               printk(KERN_DEBUG "\trp_size        %llu\n",
+                      (unsigned long long)le64_to_cpu(sup->rp_size));
+               printk(KERN_DEBUG "\trp_uid         %u\n",
+                      le32_to_cpu(sup->rp_uid));
+               printk(KERN_DEBUG "\trp_gid         %u\n",
+                      le32_to_cpu(sup->rp_gid));
+               printk(KERN_DEBUG "\tfmt_version    %u\n",
+                      le32_to_cpu(sup->fmt_version));
+               printk(KERN_DEBUG "\ttime_gran      %u\n",
+                      le32_to_cpu(sup->time_gran));
+               printk(KERN_DEBUG "\tUUID           %02X%02X%02X%02X-%02X%02X"
+                      "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
+                      sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
+                      sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
+                      sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
+                      sup->uuid[12], sup->uuid[13], sup->uuid[14],
+                      sup->uuid[15]);
+               break;
+       }
+       case UBIFS_MST_NODE:
+       {
+               const struct ubifs_mst_node *mst = node;
+
+               printk(KERN_DEBUG "\thighest_inum   %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->highest_inum));
+               printk(KERN_DEBUG "\tcommit number  %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->cmt_no));
+               printk(KERN_DEBUG "\tflags          %#x\n",
+                      le32_to_cpu(mst->flags));
+               printk(KERN_DEBUG "\tlog_lnum       %u\n",
+                      le32_to_cpu(mst->log_lnum));
+               printk(KERN_DEBUG "\troot_lnum      %u\n",
+                      le32_to_cpu(mst->root_lnum));
+               printk(KERN_DEBUG "\troot_offs      %u\n",
+                      le32_to_cpu(mst->root_offs));
+               printk(KERN_DEBUG "\troot_len       %u\n",
+                      le32_to_cpu(mst->root_len));
+               printk(KERN_DEBUG "\tgc_lnum        %u\n",
+                      le32_to_cpu(mst->gc_lnum));
+               printk(KERN_DEBUG "\tihead_lnum     %u\n",
+                      le32_to_cpu(mst->ihead_lnum));
+               printk(KERN_DEBUG "\tihead_offs     %u\n",
+                      le32_to_cpu(mst->ihead_offs));
+               printk(KERN_DEBUG "\tindex_size     %u\n",
+                      le32_to_cpu(mst->index_size));
+               printk(KERN_DEBUG "\tlpt_lnum       %u\n",
+                      le32_to_cpu(mst->lpt_lnum));
+               printk(KERN_DEBUG "\tlpt_offs       %u\n",
+                      le32_to_cpu(mst->lpt_offs));
+               printk(KERN_DEBUG "\tnhead_lnum     %u\n",
+                      le32_to_cpu(mst->nhead_lnum));
+               printk(KERN_DEBUG "\tnhead_offs     %u\n",
+                      le32_to_cpu(mst->nhead_offs));
+               printk(KERN_DEBUG "\tltab_lnum      %u\n",
+                      le32_to_cpu(mst->ltab_lnum));
+               printk(KERN_DEBUG "\tltab_offs      %u\n",
+                      le32_to_cpu(mst->ltab_offs));
+               printk(KERN_DEBUG "\tlsave_lnum     %u\n",
+                      le32_to_cpu(mst->lsave_lnum));
+               printk(KERN_DEBUG "\tlsave_offs     %u\n",
+                      le32_to_cpu(mst->lsave_offs));
+               printk(KERN_DEBUG "\tlscan_lnum     %u\n",
+                      le32_to_cpu(mst->lscan_lnum));
+               printk(KERN_DEBUG "\tleb_cnt        %u\n",
+                      le32_to_cpu(mst->leb_cnt));
+               printk(KERN_DEBUG "\tempty_lebs     %u\n",
+                      le32_to_cpu(mst->empty_lebs));
+               printk(KERN_DEBUG "\tidx_lebs       %u\n",
+                      le32_to_cpu(mst->idx_lebs));
+               printk(KERN_DEBUG "\ttotal_free     %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->total_free));
+               printk(KERN_DEBUG "\ttotal_dirty    %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->total_dirty));
+               printk(KERN_DEBUG "\ttotal_used     %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->total_used));
+               printk(KERN_DEBUG "\ttotal_dead     %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->total_dead));
+               printk(KERN_DEBUG "\ttotal_dark     %llu\n",
+                      (unsigned long long)le64_to_cpu(mst->total_dark));
+               break;
+       }
+       case UBIFS_REF_NODE:
+       {
+               const struct ubifs_ref_node *ref = node;
+
+               printk(KERN_DEBUG "\tlnum           %u\n",
+                      le32_to_cpu(ref->lnum));
+               printk(KERN_DEBUG "\toffs           %u\n",
+                      le32_to_cpu(ref->offs));
+               printk(KERN_DEBUG "\tjhead          %u\n",
+                      le32_to_cpu(ref->jhead));
+               break;
+       }
+       case UBIFS_INO_NODE:
+       {
+               const struct ubifs_ino_node *ino = node;
+
+               key_read(c, &ino->key, &key);
+               printk(KERN_DEBUG "\tkey            %s\n", DBGKEY(&key));
+               printk(KERN_DEBUG "\tcreat_sqnum    %llu\n",
+                      (unsigned long long)le64_to_cpu(ino->creat_sqnum));
+               printk(KERN_DEBUG "\tsize           %llu\n",
+                      (unsigned long long)le64_to_cpu(ino->size));
+               printk(KERN_DEBUG "\tnlink          %u\n",
+                      le32_to_cpu(ino->nlink));
+               printk(KERN_DEBUG "\tatime          %lld.%u\n",
+                      (long long)le64_to_cpu(ino->atime_sec),
+                      le32_to_cpu(ino->atime_nsec));
+               printk(KERN_DEBUG "\tmtime          %lld.%u\n",
+                      (long long)le64_to_cpu(ino->mtime_sec),
+                      le32_to_cpu(ino->mtime_nsec));
+               printk(KERN_DEBUG "\tctime          %lld.%u\n",
+                      (long long)le64_to_cpu(ino->ctime_sec),
+                      le32_to_cpu(ino->ctime_nsec));
+               printk(KERN_DEBUG "\tuid            %u\n",
+                      le32_to_cpu(ino->uid));
+               printk(KERN_DEBUG "\tgid            %u\n",
+                      le32_to_cpu(ino->gid));
+               printk(KERN_DEBUG "\tmode           %u\n",
+                      le32_to_cpu(ino->mode));
+               printk(KERN_DEBUG "\tflags          %#x\n",
+                      le32_to_cpu(ino->flags));
+               printk(KERN_DEBUG "\txattr_cnt      %u\n",
+                      le32_to_cpu(ino->xattr_cnt));
+               printk(KERN_DEBUG "\txattr_size     %u\n",
+                      le32_to_cpu(ino->xattr_size));
+               printk(KERN_DEBUG "\txattr_names    %u\n",
+                      le32_to_cpu(ino->xattr_names));
+               printk(KERN_DEBUG "\tcompr_type     %#x\n",
+                      (int)le16_to_cpu(ino->compr_type));
+               printk(KERN_DEBUG "\tdata len       %u\n",
+                      le32_to_cpu(ino->data_len));
+               break;
+       }
+       case UBIFS_DENT_NODE:
+       case UBIFS_XENT_NODE:
+       {
+               const struct ubifs_dent_node *dent = node;
+               int nlen = le16_to_cpu(dent->nlen);
+
+               key_read(c, &dent->key, &key);
+               printk(KERN_DEBUG "\tkey            %s\n", DBGKEY(&key));
+               printk(KERN_DEBUG "\tinum           %llu\n",
+                      (unsigned long long)le64_to_cpu(dent->inum));
+               printk(KERN_DEBUG "\ttype           %d\n", (int)dent->type);
+               printk(KERN_DEBUG "\tnlen           %d\n", nlen);
+               printk(KERN_DEBUG "\tname           ");
+
+               if (nlen > UBIFS_MAX_NLEN)
+                       printk(KERN_DEBUG "(bad name length, not printing, "
+                                         "bad or corrupted node)");
+               else {
+                       for (i = 0; i < nlen && dent->name[i]; i++)
+                               printk("%c", dent->name[i]);
+               }
+               printk("\n");
+
+               break;
+       }
+       case UBIFS_DATA_NODE:
+       {
+               const struct ubifs_data_node *dn = node;
+               int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
+
+               key_read(c, &dn->key, &key);
+               printk(KERN_DEBUG "\tkey            %s\n", DBGKEY(&key));
+               printk(KERN_DEBUG "\tsize           %u\n",
+                      le32_to_cpu(dn->size));
+               printk(KERN_DEBUG "\tcompr_typ      %d\n",
+                      (int)le16_to_cpu(dn->compr_type));
+               printk(KERN_DEBUG "\tdata size      %d\n",
+                      dlen);
+               printk(KERN_DEBUG "\tdata:\n");
+               print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1,
+                              (void *)&dn->data, dlen, 0);
+               break;
+       }
+       case UBIFS_TRUN_NODE:
+       {
+               const struct ubifs_trun_node *trun = node;
+
+               printk(KERN_DEBUG "\tinum           %u\n",
+                      le32_to_cpu(trun->inum));
+               printk(KERN_DEBUG "\told_size       %llu\n",
+                      (unsigned long long)le64_to_cpu(trun->old_size));
+               printk(KERN_DEBUG "\tnew_size       %llu\n",
+                      (unsigned long long)le64_to_cpu(trun->new_size));
+               break;
+       }
+       case UBIFS_IDX_NODE:
+       {
+               const struct ubifs_idx_node *idx = node;
+
+               n = le16_to_cpu(idx->child_cnt);
+               printk(KERN_DEBUG "\tchild_cnt      %d\n", n);
+               printk(KERN_DEBUG "\tlevel          %d\n",
+                      (int)le16_to_cpu(idx->level));
+               printk(KERN_DEBUG "\tBranches:\n");
+
+               for (i = 0; i < n && i < c->fanout - 1; i++) {
+                       const struct ubifs_branch *br;
+
+                       br = ubifs_idx_branch(c, idx, i);
+                       key_read(c, &br->key, &key);
+                       printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
+                              i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
+                              le32_to_cpu(br->len), DBGKEY(&key));
+               }
+               break;
+       }
+       case UBIFS_CS_NODE:
+               break;
+       case UBIFS_ORPH_NODE:
+       {
+               const struct ubifs_orph_node *orph = node;
+
+               printk(KERN_DEBUG "\tcommit number  %llu\n",
+                      (unsigned long long)
+                               le64_to_cpu(orph->cmt_no) & LLONG_MAX);
+               printk(KERN_DEBUG "\tlast node flag %llu\n",
+                      (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
+               n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
+               printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
+               for (i = 0; i < n; i++)
+                       printk(KERN_DEBUG "\t  ino %llu\n",
+                              le64_to_cpu(orph->inos[i]));
+               break;
+       }
+       default:
+               printk(KERN_DEBUG "node type %d was not recognized\n",
+                      (int)ch->node_type);
+       }
+       spin_unlock(&dbg_lock);
+}
+
+void dbg_dump_budget_req(const struct ubifs_budget_req *req)
+{
+       spin_lock(&dbg_lock);
+       printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n",
+              req->new_ino, req->dirtied_ino);
+       printk(KERN_DEBUG "\tnew_ino_d   %d, dirtied_ino_d %d\n",
+              req->new_ino_d, req->dirtied_ino_d);
+       printk(KERN_DEBUG "\tnew_page    %d, dirtied_page %d\n",
+              req->new_page, req->dirtied_page);
+       printk(KERN_DEBUG "\tnew_dent    %d, mod_dent     %d\n",
+              req->new_dent, req->mod_dent);
+       printk(KERN_DEBUG "\tidx_growth  %d\n", req->idx_growth);
+       printk(KERN_DEBUG "\tdata_growth %d dd_growth     %d\n",
+              req->data_growth, req->dd_growth);
+       spin_unlock(&dbg_lock);
+}
+
+void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
+{
+       spin_lock(&dbg_lock);
+       printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs  %d\n",
+              lst->empty_lebs, lst->idx_lebs);
+       printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
+              "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
+              lst->total_dirty);
+       printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, "
+              "total_dead %lld\n", lst->total_used, lst->total_dark,
+              lst->total_dead);
+       spin_unlock(&dbg_lock);
+}
+
+void dbg_dump_budg(struct ubifs_info *c)
+{
+       int i;
+       struct rb_node *rb;
+       struct ubifs_bud *bud;
+       struct ubifs_gced_idx_leb *idx_gc;
+
+       spin_lock(&dbg_lock);
+       printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, "
+              "budg_dd_growth %lld, budg_idx_growth %lld\n",
+              c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
+       printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
+              "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
+              c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
+              c->freeable_cnt);
+       printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
+              "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
+              c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
+       printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
+              "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
+              atomic_long_read(&c->dirty_zn_cnt),
+              atomic_long_read(&c->clean_zn_cnt));
+       printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+              c->dark_wm, c->dead_wm, c->max_idx_node_sz);
+       printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
+              c->gc_lnum, c->ihead_lnum);
+       for (i = 0; i < c->jhead_cnt; i++)
+               printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
+                      c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
+       for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
+               bud = rb_entry(rb, struct ubifs_bud, rb);
+               printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
+       }
+       list_for_each_entry(bud, &c->old_buds, list)
+               printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum);
+       list_for_each_entry(idx_gc, &c->idx_gc, list)
+               printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
+                      idx_gc->lnum, idx_gc->unmap);
+       printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
+       spin_unlock(&dbg_lock);
+}
+
+void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
+{
+       printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
+              "flags %#x\n", lp->lnum, lp->free, lp->dirty,
+              c->leb_size - lp->free - lp->dirty, lp->flags);
+}
+
+void dbg_dump_lprops(struct ubifs_info *c)
+{
+       int lnum, err;
+       struct ubifs_lprops lp;
+       struct ubifs_lp_stats lst;
+
+       printk(KERN_DEBUG "Dumping LEB properties\n");
+       ubifs_get_lp_stats(c, &lst);
+       dbg_dump_lstats(&lst);
+
+       for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
+               err = ubifs_read_one_lp(c, lnum, &lp);
+               if (err)
+                       ubifs_err("cannot read lprops for LEB %d", lnum);
+
+               dbg_dump_lprop(c, &lp);
+       }
+}
+
+void dbg_dump_leb(const struct ubifs_info *c, int lnum)
+{
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+
+       if (dbg_failure_mode)
+               return;
+
+       printk(KERN_DEBUG "Dumping LEB %d\n", lnum);
+
+       sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+       if (IS_ERR(sleb)) {
+               ubifs_err("scan error %d", (int)PTR_ERR(sleb));
+               return;
+       }
+
+       printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
+              sleb->nodes_cnt, sleb->endpt);
+
+       list_for_each_entry(snod, &sleb->nodes, list) {
+               cond_resched();
+               printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum,
+                      snod->offs, snod->len);
+               dbg_dump_node(c, snod->node);
+       }
+
+       ubifs_scan_destroy(sleb);
+       return;
+}
+
+void dbg_dump_znode(const struct ubifs_info *c,
+                   const struct ubifs_znode *znode)
+{
+       int n;
+       const struct ubifs_zbranch *zbr;
+
+       spin_lock(&dbg_lock);
+       if (znode->parent)
+               zbr = &znode->parent->zbranch[znode->iip];
+       else
+               zbr = &c->zroot;
+
+       printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
+              " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
+              zbr->len, znode->parent, znode->iip, znode->level,
+              znode->child_cnt, znode->flags);
+
+       if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
+               spin_unlock(&dbg_lock);
+               return;
+       }
+
+       printk(KERN_DEBUG "zbranches:\n");
+       for (n = 0; n < znode->child_cnt; n++) {
+               zbr = &znode->zbranch[n];
+               if (znode->level > 0)
+                       printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
+                                         "%s\n", n, zbr->znode, zbr->lnum,
+                                         zbr->offs, zbr->len,
+                                         DBGKEY(&zbr->key));
+               else
+                       printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
+                                         "%s\n", n, zbr->znode, zbr->lnum,
+                                         zbr->offs, zbr->len,
+                                         DBGKEY(&zbr->key));
+       }
+       spin_unlock(&dbg_lock);
+}
+
+void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
+{
+       int i;
+
+       printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n",
+              cat, heap->cnt);
+       for (i = 0; i < heap->cnt; i++) {
+               struct ubifs_lprops *lprops = heap->arr[i];
+
+               printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d "
+                      "flags %d\n", i, lprops->lnum, lprops->hpos,
+                      lprops->free, lprops->dirty, lprops->flags);
+       }
+}
+
+void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+                   struct ubifs_nnode *parent, int iip)
+{
+       int i;
+
+       printk(KERN_DEBUG "Dumping pnode:\n");
+       printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
+              (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
+       printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
+              pnode->flags, iip, pnode->level, pnode->num);
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               struct ubifs_lprops *lp = &pnode->lprops[i];
+
+               printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n",
+                      i, lp->free, lp->dirty, lp->flags, lp->lnum);
+       }
+}
+
+void dbg_dump_tnc(struct ubifs_info *c)
+{
+       struct ubifs_znode *znode;
+       int level;
+
+       printk(KERN_DEBUG "\n");
+       printk(KERN_DEBUG "Dumping the TNC tree\n");
+       znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
+       level = znode->level;
+       printk(KERN_DEBUG "== Level %d ==\n", level);
+       while (znode) {
+               if (level != znode->level) {
+                       level = znode->level;
+                       printk(KERN_DEBUG "== Level %d ==\n", level);
+               }
+               dbg_dump_znode(c, znode);
+               znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
+       }
+
+       printk(KERN_DEBUG "\n");
+}
+
+static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
+                     void *priv)
+{
+       dbg_dump_znode(c, znode);
+       return 0;
+}
+
+/**
+ * dbg_dump_index - dump the on-flash index.
+ * @c: UBIFS file-system description object
+ *
+ * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()'
+ * which dumps only in-memory znodes and does not read znodes which from flash.
+ */
+void dbg_dump_index(struct ubifs_info *c)
+{
+       dbg_walk_index(c, NULL, dump_znode, NULL);
+}
+
+/**
+ * dbg_check_synced_i_size - check synchronized inode size.
+ * @inode: inode to check
+ *
+ * If inode is clean, synchronized inode size has to be equivalent to current
+ * inode size. This function has to be called only for locked inodes (@i_mutex
+ * has to be locked). Returns %0 if synchronized inode size if correct, and
+ * %-EINVAL if not.
+ */
+int dbg_check_synced_i_size(struct inode *inode)
+{
+       int err = 0;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+               return 0;
+       if (!S_ISREG(inode->i_mode))
+               return 0;
+
+       mutex_lock(&ui->ui_mutex);
+       spin_lock(&ui->ui_lock);
+       if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
+               ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode "
+                         "is clean", ui->ui_size, ui->synced_i_size);
+               ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
+                         inode->i_mode, i_size_read(inode));
+               dbg_dump_stack();
+               err = -EINVAL;
+       }
+       spin_unlock(&ui->ui_lock);
+       mutex_unlock(&ui->ui_mutex);
+       return err;
+}
+
+/*
+ * dbg_check_dir - check directory inode size and link count.
+ * @c: UBIFS file-system description object
+ * @dir: the directory to calculate size for
+ * @size: the result is returned here
+ *
+ * This function makes sure that directory size and link count are correct.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ *
+ * Note, it is good idea to make sure the @dir->i_mutex is locked before
+ * calling this function.
+ */
+int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
+{
+       unsigned int nlink = 2;
+       union ubifs_key key;
+       struct ubifs_dent_node *dent, *pdent = NULL;
+       struct qstr nm = { .name = NULL };
+       loff_t size = UBIFS_INO_NODE_SZ;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+               return 0;
+
+       if (!S_ISDIR(dir->i_mode))
+               return 0;
+
+       lowest_dent_key(c, &key, dir->i_ino);
+       while (1) {
+               int err;
+
+               dent = ubifs_tnc_next_ent(c, &key, &nm);
+               if (IS_ERR(dent)) {
+                       err = PTR_ERR(dent);
+                       if (err == -ENOENT)
+                               break;
+                       return err;
+               }
+
+               nm.name = dent->name;
+               nm.len = le16_to_cpu(dent->nlen);
+               size += CALC_DENT_SIZE(nm.len);
+               if (dent->type == UBIFS_ITYPE_DIR)
+                       nlink += 1;
+               kfree(pdent);
+               pdent = dent;
+               key_read(c, &dent->key, &key);
+       }
+       kfree(pdent);
+
+       if (i_size_read(dir) != size) {
+               ubifs_err("directory inode %lu has size %llu, "
+                         "but calculated size is %llu", dir->i_ino,
+                         (unsigned long long)i_size_read(dir),
+                         (unsigned long long)size);
+               dump_stack();
+               return -EINVAL;
+       }
+       if (dir->i_nlink != nlink) {
+               ubifs_err("directory inode %lu has nlink %u, but calculated "
+                         "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
+               dump_stack();
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * dbg_check_key_order - make sure that colliding keys are properly ordered.
+ * @c: UBIFS file-system description object
+ * @zbr1: first zbranch
+ * @zbr2: following zbranch
+ *
+ * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of
+ * names of the direntries/xentries which are referred by the keys. This
+ * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes
+ * sure the name of direntry/xentry referred by @zbr1 is less than
+ * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not,
+ * and a negative error code in case of failure.
+ */
+static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
+                              struct ubifs_zbranch *zbr2)
+{
+       int err, nlen1, nlen2, cmp;
+       struct ubifs_dent_node *dent1, *dent2;
+       union ubifs_key key;
+
+       ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
+       dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
+       if (!dent1)
+               return -ENOMEM;
+       dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
+       if (!dent2) {
+               err = -ENOMEM;
+               goto out_free;
+       }
+
+       err = ubifs_tnc_read_node(c, zbr1, dent1);
+       if (err)
+               goto out_free;
+       err = ubifs_validate_entry(c, dent1);
+       if (err)
+               goto out_free;
+
+       err = ubifs_tnc_read_node(c, zbr2, dent2);
+       if (err)
+               goto out_free;
+       err = ubifs_validate_entry(c, dent2);
+       if (err)
+               goto out_free;
+
+       /* Make sure node keys are the same as in zbranch */
+       err = 1;
+       key_read(c, &dent1->key, &key);
+       if (keys_cmp(c, &zbr1->key, &key)) {
+               dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
+                       zbr1->offs, DBGKEY(&key));
+               dbg_err("but it should have key %s according to tnc",
+                       DBGKEY(&zbr1->key));
+                       dbg_dump_node(c, dent1);
+                       goto out_free;
+       }
+
+       key_read(c, &dent2->key, &key);
+       if (keys_cmp(c, &zbr2->key, &key)) {
+               dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
+                       zbr1->offs, DBGKEY(&key));
+               dbg_err("but it should have key %s according to tnc",
+                       DBGKEY(&zbr2->key));
+                       dbg_dump_node(c, dent2);
+                       goto out_free;
+       }
+
+       nlen1 = le16_to_cpu(dent1->nlen);
+       nlen2 = le16_to_cpu(dent2->nlen);
+
+       cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2));
+       if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) {
+               err = 0;
+               goto out_free;
+       }
+       if (cmp == 0 && nlen1 == nlen2)
+               dbg_err("2 xent/dent nodes with the same name");
+       else
+               dbg_err("bad order of colliding key %s",
+                       DBGKEY(&key));
+
+       dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
+       dbg_dump_node(c, dent1);
+       dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
+       dbg_dump_node(c, dent2);
+
+out_free:
+       kfree(dent2);
+       kfree(dent1);
+       return err;
+}
+
+/**
+ * dbg_check_znode - check if znode is all right.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch which points to this znode
+ *
+ * This function makes sure that znode referred to by @zbr is all right.
+ * Returns zero if it is, and %-EINVAL if it is not.
+ */
+static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
+{
+       struct ubifs_znode *znode = zbr->znode;
+       struct ubifs_znode *zp = znode->parent;
+       int n, err, cmp;
+
+       if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
+               err = 1;
+               goto out;
+       }
+       if (znode->level < 0) {
+               err = 2;
+               goto out;
+       }
+       if (znode->iip < 0 || znode->iip >= c->fanout) {
+               err = 3;
+               goto out;
+       }
+
+       if (zbr->len == 0)
+               /* Only dirty zbranch may have no on-flash nodes */
+               if (!ubifs_zn_dirty(znode)) {
+                       err = 4;
+                       goto out;
+               }
+
+       if (ubifs_zn_dirty(znode)) {
+               /*
+                * If znode is dirty, its parent has to be dirty as well. The
+                * order of the operation is important, so we have to have
+                * memory barriers.
+                */
+               smp_mb();
+               if (zp && !ubifs_zn_dirty(zp)) {
+                       /*
+                        * The dirty flag is atomic and is cleared outside the
+                        * TNC mutex, so znode's dirty flag may now have
+                        * been cleared. The child is always cleared before the
+                        * parent, so we just need to check again.
+                        */
+                       smp_mb();
+                       if (ubifs_zn_dirty(znode)) {
+                               err = 5;
+                               goto out;
+                       }
+               }
+       }
+
+       if (zp) {
+               const union ubifs_key *min, *max;
+
+               if (znode->level != zp->level - 1) {
+                       err = 6;
+                       goto out;
+               }
+
+               /* Make sure the 'parent' pointer in our znode is correct */
+               err = ubifs_search_zbranch(c, zp, &zbr->key, &n);
+               if (!err) {
+                       /* This zbranch does not exist in the parent */
+                       err = 7;
+                       goto out;
+               }
+
+               if (znode->iip >= zp->child_cnt) {
+                       err = 8;
+                       goto out;
+               }
+
+               if (znode->iip != n) {
+                       /* This may happen only in case of collisions */
+                       if (keys_cmp(c, &zp->zbranch[n].key,
+                                    &zp->zbranch[znode->iip].key)) {
+                               err = 9;
+                               goto out;
+                       }
+                       n = znode->iip;
+               }
+
+               /*
+                * Make sure that the first key in our znode is greater than or
+                * equal to the key in the pointing zbranch.
+                */
+               min = &zbr->key;
+               cmp = keys_cmp(c, min, &znode->zbranch[0].key);
+               if (cmp == 1) {
+                       err = 10;
+                       goto out;
+               }
+
+               if (n + 1 < zp->child_cnt) {
+                       max = &zp->zbranch[n + 1].key;
+
+                       /*
+                        * Make sure the last key in our znode is less or
+                        * equivalent than the the key in zbranch which goes
+                        * after our pointing zbranch.
+                        */
+                       cmp = keys_cmp(c, max,
+                               &znode->zbranch[znode->child_cnt - 1].key);
+                       if (cmp == -1) {
+                               err = 11;
+                               goto out;
+                       }
+               }
+       } else {
+               /* This may only be root znode */
+               if (zbr != &c->zroot) {
+                       err = 12;
+                       goto out;
+               }
+       }
+
+       /*
+        * Make sure that next key is greater or equivalent then the previous
+        * one.
+        */
+       for (n = 1; n < znode->child_cnt; n++) {
+               cmp = keys_cmp(c, &znode->zbranch[n - 1].key,
+                              &znode->zbranch[n].key);
+               if (cmp > 0) {
+                       err = 13;
+                       goto out;
+               }
+               if (cmp == 0) {
+                       /* This can only be keys with colliding hash */
+                       if (!is_hash_key(c, &znode->zbranch[n].key)) {
+                               err = 14;
+                               goto out;
+                       }
+
+                       if (znode->level != 0 || c->replaying)
+                               continue;
+
+                       /*
+                        * Colliding keys should follow binary order of
+                        * corresponding xentry/dentry names.
+                        */
+                       err = dbg_check_key_order(c, &znode->zbranch[n - 1],
+                                                 &znode->zbranch[n]);
+                       if (err < 0)
+                               return err;
+                       if (err) {
+                               err = 15;
+                               goto out;
+                       }
+               }
+       }
+
+       for (n = 0; n < znode->child_cnt; n++) {
+               if (!znode->zbranch[n].znode &&
+                   (znode->zbranch[n].lnum == 0 ||
+                    znode->zbranch[n].len == 0)) {
+                       err = 16;
+                       goto out;
+               }
+
+               if (znode->zbranch[n].lnum != 0 &&
+                   znode->zbranch[n].len == 0) {
+                       err = 17;
+                       goto out;
+               }
+
+               if (znode->zbranch[n].lnum == 0 &&
+                   znode->zbranch[n].len != 0) {
+                       err = 18;
+                       goto out;
+               }
+
+               if (znode->zbranch[n].lnum == 0 &&
+                   znode->zbranch[n].offs != 0) {
+                       err = 19;
+                       goto out;
+               }
+
+               if (znode->level != 0 && znode->zbranch[n].znode)
+                       if (znode->zbranch[n].znode->parent != znode) {
+                               err = 20;
+                               goto out;
+                       }
+       }
+
+       return 0;
+
+out:
+       ubifs_err("failed, error %d", err);
+       ubifs_msg("dump of the znode");
+       dbg_dump_znode(c, znode);
+       if (zp) {
+               ubifs_msg("dump of the parent znode");
+               dbg_dump_znode(c, zp);
+       }
+       dump_stack();
+       return -EINVAL;
+}
+
+/**
+ * dbg_check_tnc - check TNC tree.
+ * @c: UBIFS file-system description object
+ * @extra: do extra checks that are possible at start commit
+ *
+ * This function traverses whole TNC tree and checks every znode. Returns zero
+ * if everything is all right and %-EINVAL if something is wrong with TNC.
+ */
+int dbg_check_tnc(struct ubifs_info *c, int extra)
+{
+       struct ubifs_znode *znode;
+       long clean_cnt = 0, dirty_cnt = 0;
+       int err, last;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
+               return 0;
+
+       ubifs_assert(mutex_is_locked(&c->tnc_mutex));
+       if (!c->zroot.znode)
+               return 0;
+
+       znode = ubifs_tnc_postorder_first(c->zroot.znode);
+       while (1) {
+               struct ubifs_znode *prev;
+               struct ubifs_zbranch *zbr;
+
+               if (!znode->parent)
+                       zbr = &c->zroot;
+               else
+                       zbr = &znode->parent->zbranch[znode->iip];
+
+               err = dbg_check_znode(c, zbr);
+               if (err)
+                       return err;
+
+               if (extra) {
+                       if (ubifs_zn_dirty(znode))
+                               dirty_cnt += 1;
+                       else
+                               clean_cnt += 1;
+               }
+
+               prev = znode;
+               znode = ubifs_tnc_postorder_next(znode);
+               if (!znode)
+                       break;
+
+               /*
+                * If the last key of this znode is equivalent to the first key
+                * of the next znode (collision), then check order of the keys.
+                */
+               last = prev->child_cnt - 1;
+               if (prev->level == 0 && znode->level == 0 && !c->replaying &&
+                   !keys_cmp(c, &prev->zbranch[last].key,
+                             &znode->zbranch[0].key)) {
+                       err = dbg_check_key_order(c, &prev->zbranch[last],
+                                                 &znode->zbranch[0]);
+                       if (err < 0)
+                               return err;
+                       if (err) {
+                               ubifs_msg("first znode");
+                               dbg_dump_znode(c, prev);
+                               ubifs_msg("second znode");
+                               dbg_dump_znode(c, znode);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       if (extra) {
+               if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) {
+                       ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld",
+                                 atomic_long_read(&c->clean_zn_cnt),
+                                 clean_cnt);
+                       return -EINVAL;
+               }
+               if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) {
+                       ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld",
+                                 atomic_long_read(&c->dirty_zn_cnt),
+                                 dirty_cnt);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * dbg_walk_index - walk the on-flash index.
+ * @c: UBIFS file-system description object
+ * @leaf_cb: called for each leaf node
+ * @znode_cb: called for each indexing node
+ * @priv: private date which is passed to callbacks
+ *
+ * This function walks the UBIFS index and calls the @leaf_cb for each leaf
+ * node and @znode_cb for each indexing node. Returns zero in case of success
+ * and a negative error code in case of failure.
+ *
+ * It would be better if this function removed every znode it pulled to into
+ * the TNC, so that the behavior more closely matched the non-debugging
+ * behavior.
+ */
+int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
+                  dbg_znode_callback znode_cb, void *priv)
+{
+       int err;
+       struct ubifs_zbranch *zbr;
+       struct ubifs_znode *znode, *child;
+
+       mutex_lock(&c->tnc_mutex);
+       /* If the root indexing node is not in TNC - pull it */
+       if (!c->zroot.znode) {
+               c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
+               if (IS_ERR(c->zroot.znode)) {
+                       err = PTR_ERR(c->zroot.znode);
+                       c->zroot.znode = NULL;
+                       goto out_unlock;
+               }
+       }
+
+       /*
+        * We are going to traverse the indexing tree in the postorder manner.
+        * Go down and find the leftmost indexing node where we are going to
+        * start from.
+        */
+       znode = c->zroot.znode;
+       while (znode->level > 0) {
+               zbr = &znode->zbranch[0];
+               child = zbr->znode;
+               if (!child) {
+                       child = ubifs_load_znode(c, zbr, znode, 0);
+                       if (IS_ERR(child)) {
+                               err = PTR_ERR(child);
+                               goto out_unlock;
+                       }
+                       zbr->znode = child;
+               }
+
+               znode = child;
+       }
+
+       /* Iterate over all indexing nodes */
+       while (1) {
+               int idx;
+
+               cond_resched();
+
+               if (znode_cb) {
+                       err = znode_cb(c, znode, priv);
+                       if (err) {
+                               ubifs_err("znode checking function returned "
+                                         "error %d", err);
+                               dbg_dump_znode(c, znode);
+                               goto out_dump;
+                       }
+               }
+               if (leaf_cb && znode->level == 0) {
+                       for (idx = 0; idx < znode->child_cnt; idx++) {
+                               zbr = &znode->zbranch[idx];
+                               err = leaf_cb(c, zbr, priv);
+                               if (err) {
+                                       ubifs_err("leaf checking function "
+                                                 "returned error %d, for leaf "
+                                                 "at LEB %d:%d",
+                                                 err, zbr->lnum, zbr->offs);
+                                       goto out_dump;
+                               }
+                       }
+               }
+
+               if (!znode->parent)
+                       break;
+
+               idx = znode->iip + 1;
+               znode = znode->parent;
+               if (idx < znode->child_cnt) {
+                       /* Switch to the next index in the parent */
+                       zbr = &znode->zbranch[idx];
+                       child = zbr->znode;
+                       if (!child) {
+                               child = ubifs_load_znode(c, zbr, znode, idx);
+                               if (IS_ERR(child)) {
+                                       err = PTR_ERR(child);
+                                       goto out_unlock;
+                               }
+                               zbr->znode = child;
+                       }
+                       znode = child;
+               } else
+                       /*
+                        * This is the last child, switch to the parent and
+                        * continue.
+                        */
+                       continue;
+
+               /* Go to the lowest leftmost znode in the new sub-tree */
+               while (znode->level > 0) {
+                       zbr = &znode->zbranch[0];
+                       child = zbr->znode;
+                       if (!child) {
+                               child = ubifs_load_znode(c, zbr, znode, 0);
+                               if (IS_ERR(child)) {
+                                       err = PTR_ERR(child);
+                                       goto out_unlock;
+                               }
+                               zbr->znode = child;
+                       }
+                       znode = child;
+               }
+       }
+
+       mutex_unlock(&c->tnc_mutex);
+       return 0;
+
+out_dump:
+       if (znode->parent)
+               zbr = &znode->parent->zbranch[znode->iip];
+       else
+               zbr = &c->zroot;
+       ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
+       dbg_dump_znode(c, znode);
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * add_size - add znode size to partially calculated index size.
+ * @c: UBIFS file-system description object
+ * @znode: znode to add size for
+ * @priv: partially calculated index size
+ *
+ * This is a helper function for 'dbg_check_idx_size()' which is called for
+ * every indexing node and adds its size to the 'long long' variable pointed to
+ * by @priv.
+ */
+static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv)
+{
+       long long *idx_size = priv;
+       int add;
+
+       add = ubifs_idx_node_sz(c, znode->child_cnt);
+       add = ALIGN(add, 8);
+       *idx_size += add;
+       return 0;
+}
+
+/**
+ * dbg_check_idx_size - check index size.
+ * @c: UBIFS file-system description object
+ * @idx_size: size to check
+ *
+ * This function walks the UBIFS index, calculates its size and checks that the
+ * size is equivalent to @idx_size. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
+{
+       int err;
+       long long calc = 0;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
+               return 0;
+
+       err = dbg_walk_index(c, NULL, add_size, &calc);
+       if (err) {
+               ubifs_err("error %d while walking the index", err);
+               return err;
+       }
+
+       if (calc != idx_size) {
+               ubifs_err("index size check failed: calculated size is %lld, "
+                         "should be %lld", calc, idx_size);
+               dump_stack();
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * struct fsck_inode - information about an inode used when checking the file-system.
+ * @rb: link in the RB-tree of inodes
+ * @inum: inode number
+ * @mode: inode type, permissions, etc
+ * @nlink: inode link count
+ * @xattr_cnt: count of extended attributes
+ * @references: how many directory/xattr entries refer this inode (calculated
+ *              while walking the index)
+ * @calc_cnt: for directory inode count of child directories
+ * @size: inode size (read from on-flash inode)
+ * @xattr_sz: summary size of all extended attributes (read from on-flash
+ *            inode)
+ * @calc_sz: for directories calculated directory size
+ * @calc_xcnt: count of extended attributes
+ * @calc_xsz: calculated summary size of all extended attributes
+ * @xattr_nms: sum of lengths of all extended attribute names belonging to this
+ *             inode (read from on-flash inode)
+ * @calc_xnms: calculated sum of lengths of all extended attribute names
+ */
+struct fsck_inode {
+       struct rb_node rb;
+       ino_t inum;
+       umode_t mode;
+       unsigned int nlink;
+       unsigned int xattr_cnt;
+       int references;
+       int calc_cnt;
+       long long size;
+       unsigned int xattr_sz;
+       long long calc_sz;
+       long long calc_xcnt;
+       long long calc_xsz;
+       unsigned int xattr_nms;
+       long long calc_xnms;
+};
+
+/**
+ * struct fsck_data - private FS checking information.
+ * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects)
+ */
+struct fsck_data {
+       struct rb_root inodes;
+};
+
+/**
+ * add_inode - add inode information to RB-tree of inodes.
+ * @c: UBIFS file-system description object
+ * @fsckd: FS checking information
+ * @ino: raw UBIFS inode to add
+ *
+ * This is a helper function for 'check_leaf()' which adds information about
+ * inode @ino to the RB-tree of inodes. Returns inode information pointer in
+ * case of success and a negative error code in case of failure.
+ */
+static struct fsck_inode *add_inode(struct ubifs_info *c,
+                                   struct fsck_data *fsckd,
+                                   struct ubifs_ino_node *ino)
+{
+       struct rb_node **p, *parent = NULL;
+       struct fsck_inode *fscki;
+       ino_t inum = key_inum_flash(c, &ino->key);
+
+       p = &fsckd->inodes.rb_node;
+       while (*p) {
+               parent = *p;
+               fscki = rb_entry(parent, struct fsck_inode, rb);
+               if (inum < fscki->inum)
+                       p = &(*p)->rb_left;
+               else if (inum > fscki->inum)
+                       p = &(*p)->rb_right;
+               else
+                       return fscki;
+       }
+
+       if (inum > c->highest_inum) {
+               ubifs_err("too high inode number, max. is %lu",
+                         c->highest_inum);
+               return ERR_PTR(-EINVAL);
+       }
+
+       fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS);
+       if (!fscki)
+               return ERR_PTR(-ENOMEM);
+
+       fscki->inum = inum;
+       fscki->nlink = le32_to_cpu(ino->nlink);
+       fscki->size = le64_to_cpu(ino->size);
+       fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
+       fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
+       fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
+       fscki->mode = le32_to_cpu(ino->mode);
+       if (S_ISDIR(fscki->mode)) {
+               fscki->calc_sz = UBIFS_INO_NODE_SZ;
+               fscki->calc_cnt = 2;
+       }
+       rb_link_node(&fscki->rb, parent, p);
+       rb_insert_color(&fscki->rb, &fsckd->inodes);
+       return fscki;
+}
+
+/**
+ * search_inode - search inode in the RB-tree of inodes.
+ * @fsckd: FS checking information
+ * @inum: inode number to search
+ *
+ * This is a helper function for 'check_leaf()' which searches inode @inum in
+ * the RB-tree of inodes and returns an inode information pointer or %NULL if
+ * the inode was not found.
+ */
+static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum)
+{
+       struct rb_node *p;
+       struct fsck_inode *fscki;
+
+       p = fsckd->inodes.rb_node;
+       while (p) {
+               fscki = rb_entry(p, struct fsck_inode, rb);
+               if (inum < fscki->inum)
+                       p = p->rb_left;
+               else if (inum > fscki->inum)
+                       p = p->rb_right;
+               else
+                       return fscki;
+       }
+       return NULL;
+}
+
+/**
+ * read_add_inode - read inode node and add it to RB-tree of inodes.
+ * @c: UBIFS file-system description object
+ * @fsckd: FS checking information
+ * @inum: inode number to read
+ *
+ * This is a helper function for 'check_leaf()' which finds inode node @inum in
+ * the index, reads it, and adds it to the RB-tree of inodes. Returns inode
+ * information pointer in case of success and a negative error code in case of
+ * failure.
+ */
+static struct fsck_inode *read_add_inode(struct ubifs_info *c,
+                                        struct fsck_data *fsckd, ino_t inum)
+{
+       int n, err;
+       union ubifs_key key;
+       struct ubifs_znode *znode;
+       struct ubifs_zbranch *zbr;
+       struct ubifs_ino_node *ino;
+       struct fsck_inode *fscki;
+
+       fscki = search_inode(fsckd, inum);
+       if (fscki)
+               return fscki;
+
+       ino_key_init(c, &key, inum);
+       err = ubifs_lookup_level0(c, &key, &znode, &n);
+       if (!err) {
+               ubifs_err("inode %lu not found in index", inum);
+               return ERR_PTR(-ENOENT);
+       } else if (err < 0) {
+               ubifs_err("error %d while looking up inode %lu", err, inum);
+               return ERR_PTR(err);
+       }
+
+       zbr = &znode->zbranch[n];
+       if (zbr->len < UBIFS_INO_NODE_SZ) {
+               ubifs_err("bad node %lu node length %d", inum, zbr->len);
+               return ERR_PTR(-EINVAL);
+       }
+
+       ino = kmalloc(zbr->len, GFP_NOFS);
+       if (!ino)
+               return ERR_PTR(-ENOMEM);
+
+       err = ubifs_tnc_read_node(c, zbr, ino);
+       if (err) {
+               ubifs_err("cannot read inode node at LEB %d:%d, error %d",
+                         zbr->lnum, zbr->offs, err);
+               kfree(ino);
+               return ERR_PTR(err);
+       }
+
+       fscki = add_inode(c, fsckd, ino);
+       kfree(ino);
+       if (IS_ERR(fscki)) {
+               ubifs_err("error %ld while adding inode %lu node",
+                         PTR_ERR(fscki), inum);
+               return fscki;
+       }
+
+       return fscki;
+}
+
+/**
+ * check_leaf - check leaf node.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of the leaf node to check
+ * @priv: FS checking information
+ *
+ * This is a helper function for 'dbg_check_filesystem()' which is called for
+ * every single leaf node while walking the indexing tree. It checks that the
+ * leaf node referred from the indexing tree exists, has correct CRC, and does
+ * some other basic validation. This function is also responsible for building
+ * an RB-tree of inodes - it adds all inodes into the RB-tree. It also
+ * calculates reference count, size, etc for each inode in order to later
+ * compare them to the information stored inside the inodes and detect possible
+ * inconsistencies. Returns zero in case of success and a negative error code
+ * in case of failure.
+ */
+static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                     void *priv)
+{
+       ino_t inum;
+       void *node;
+       struct ubifs_ch *ch;
+       int err, type = key_type(c, &zbr->key);
+       struct fsck_inode *fscki;
+
+       if (zbr->len < UBIFS_CH_SZ) {
+               ubifs_err("bad leaf length %d (LEB %d:%d)",
+                         zbr->len, zbr->lnum, zbr->offs);
+               return -EINVAL;
+       }
+
+       node = kmalloc(zbr->len, GFP_NOFS);
+       if (!node)
+               return -ENOMEM;
+
+       err = ubifs_tnc_read_node(c, zbr, node);
+       if (err) {
+               ubifs_err("cannot read leaf node at LEB %d:%d, error %d",
+                         zbr->lnum, zbr->offs, err);
+               goto out_free;
+       }
+
+       /* If this is an inode node, add it to RB-tree of inodes */
+       if (type == UBIFS_INO_KEY) {
+               fscki = add_inode(c, priv, node);
+               if (IS_ERR(fscki)) {
+                       err = PTR_ERR(fscki);
+                       ubifs_err("error %d while adding inode node", err);
+                       goto out_dump;
+               }
+               goto out;
+       }
+
+       if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY &&
+           type != UBIFS_DATA_KEY) {
+               ubifs_err("unexpected node type %d at LEB %d:%d",
+                         type, zbr->lnum, zbr->offs);
+               err = -EINVAL;
+               goto out_free;
+       }
+
+       ch = node;
+       if (le64_to_cpu(ch->sqnum) > c->max_sqnum) {
+               ubifs_err("too high sequence number, max. is %llu",
+                         c->max_sqnum);
+               err = -EINVAL;
+               goto out_dump;
+       }
+
+       if (type == UBIFS_DATA_KEY) {
+               long long blk_offs;
+               struct ubifs_data_node *dn = node;
+
+               /*
+                * Search the inode node this data node belongs to and insert
+                * it to the RB-tree of inodes.
+                */
+               inum = key_inum_flash(c, &dn->key);
+               fscki = read_add_inode(c, priv, inum);
+               if (IS_ERR(fscki)) {
+                       err = PTR_ERR(fscki);
+                       ubifs_err("error %d while processing data node and "
+                                 "trying to find inode node %lu", err, inum);
+                       goto out_dump;
+               }
+
+               /* Make sure the data node is within inode size */
+               blk_offs = key_block_flash(c, &dn->key);
+               blk_offs <<= UBIFS_BLOCK_SHIFT;
+               blk_offs += le32_to_cpu(dn->size);
+               if (blk_offs > fscki->size) {
+                       ubifs_err("data node at LEB %d:%d is not within inode "
+                                 "size %lld", zbr->lnum, zbr->offs,
+                                 fscki->size);
+                       err = -EINVAL;
+                       goto out_dump;
+               }
+       } else {
+               int nlen;
+               struct ubifs_dent_node *dent = node;
+               struct fsck_inode *fscki1;
+
+               err = ubifs_validate_entry(c, dent);
+               if (err)
+                       goto out_dump;
+
+               /*
+                * Search the inode node this entry refers to and the parent
+                * inode node and insert them to the RB-tree of inodes.
+                */
+               inum = le64_to_cpu(dent->inum);
+               fscki = read_add_inode(c, priv, inum);
+               if (IS_ERR(fscki)) {
+                       err = PTR_ERR(fscki);
+                       ubifs_err("error %d while processing entry node and "
+                                 "trying to find inode node %lu", err, inum);
+                       goto out_dump;
+               }
+
+               /* Count how many direntries or xentries refers this inode */
+               fscki->references += 1;
+
+               inum = key_inum_flash(c, &dent->key);
+               fscki1 = read_add_inode(c, priv, inum);
+               if (IS_ERR(fscki1)) {
+                       err = PTR_ERR(fscki);
+                       ubifs_err("error %d while processing entry node and "
+                                 "trying to find parent inode node %lu",
+                                 err, inum);
+                       goto out_dump;
+               }
+
+               nlen = le16_to_cpu(dent->nlen);
+               if (type == UBIFS_XENT_KEY) {
+                       fscki1->calc_xcnt += 1;
+                       fscki1->calc_xsz += CALC_DENT_SIZE(nlen);
+                       fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size);
+                       fscki1->calc_xnms += nlen;
+               } else {
+                       fscki1->calc_sz += CALC_DENT_SIZE(nlen);
+                       if (dent->type == UBIFS_ITYPE_DIR)
+                               fscki1->calc_cnt += 1;
+               }
+       }
+
+out:
+       kfree(node);
+       return 0;
+
+out_dump:
+       ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
+       dbg_dump_node(c, node);
+out_free:
+       kfree(node);
+       return err;
+}
+
+/**
+ * free_inodes - free RB-tree of inodes.
+ * @fsckd: FS checking information
+ */
+static void free_inodes(struct fsck_data *fsckd)
+{
+       struct rb_node *this = fsckd->inodes.rb_node;
+       struct fsck_inode *fscki;
+
+       while (this) {
+               if (this->rb_left)
+                       this = this->rb_left;
+               else if (this->rb_right)
+                       this = this->rb_right;
+               else {
+                       fscki = rb_entry(this, struct fsck_inode, rb);
+                       this = rb_parent(this);
+                       if (this) {
+                               if (this->rb_left == &fscki->rb)
+                                       this->rb_left = NULL;
+                               else
+                                       this->rb_right = NULL;
+                       }
+                       kfree(fscki);
+               }
+       }
+}
+
+/**
+ * check_inodes - checks all inodes.
+ * @c: UBIFS file-system description object
+ * @fsckd: FS checking information
+ *
+ * This is a helper function for 'dbg_check_filesystem()' which walks the
+ * RB-tree of inodes after the index scan has been finished, and checks that
+ * inode nlink, size, etc are correct. Returns zero if inodes are fine,
+ * %-EINVAL if not, and a negative error code in case of failure.
+ */
+static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
+{
+       int n, err;
+       union ubifs_key key;
+       struct ubifs_znode *znode;
+       struct ubifs_zbranch *zbr;
+       struct ubifs_ino_node *ino;
+       struct fsck_inode *fscki;
+       struct rb_node *this = rb_first(&fsckd->inodes);
+
+       while (this) {
+               fscki = rb_entry(this, struct fsck_inode, rb);
+               this = rb_next(this);
+
+               if (S_ISDIR(fscki->mode)) {
+                       /*
+                        * Directories have to have exactly one reference (they
+                        * cannot have hardlinks), although root inode is an
+                        * exception.
+                        */
+                       if (fscki->inum != UBIFS_ROOT_INO &&
+                           fscki->references != 1) {
+                               ubifs_err("directory inode %lu has %d "
+                                         "direntries which refer it, but "
+                                         "should be 1", fscki->inum,
+                                         fscki->references);
+                               goto out_dump;
+                       }
+                       if (fscki->inum == UBIFS_ROOT_INO &&
+                           fscki->references != 0) {
+                               ubifs_err("root inode %lu has non-zero (%d) "
+                                         "direntries which refer it",
+                                         fscki->inum, fscki->references);
+                               goto out_dump;
+                       }
+                       if (fscki->calc_sz != fscki->size) {
+                               ubifs_err("directory inode %lu size is %lld, "
+                                         "but calculated size is %lld",
+                                         fscki->inum, fscki->size,
+                                         fscki->calc_sz);
+                               goto out_dump;
+                       }
+                       if (fscki->calc_cnt != fscki->nlink) {
+                               ubifs_err("directory inode %lu nlink is %d, "
+                                         "but calculated nlink is %d",
+                                         fscki->inum, fscki->nlink,
+                                         fscki->calc_cnt);
+                               goto out_dump;
+                       }
+               } else {
+                       if (fscki->references != fscki->nlink) {
+                               ubifs_err("inode %lu nlink is %d, but "
+                                         "calculated nlink is %d", fscki->inum,
+                                         fscki->nlink, fscki->references);
+                               goto out_dump;
+                       }
+               }
+               if (fscki->xattr_sz != fscki->calc_xsz) {
+                       ubifs_err("inode %lu has xattr size %u, but "
+                                 "calculated size is %lld",
+                                 fscki->inum, fscki->xattr_sz,
+                                 fscki->calc_xsz);
+                       goto out_dump;
+               }
+               if (fscki->xattr_cnt != fscki->calc_xcnt) {
+                       ubifs_err("inode %lu has %u xattrs, but "
+                                 "calculated count is %lld", fscki->inum,
+                                 fscki->xattr_cnt, fscki->calc_xcnt);
+                       goto out_dump;
+               }
+               if (fscki->xattr_nms != fscki->calc_xnms) {
+                       ubifs_err("inode %lu has xattr names' size %u, but "
+                                 "calculated names' size is %lld",
+                                 fscki->inum, fscki->xattr_nms,
+                                 fscki->calc_xnms);
+                       goto out_dump;
+               }
+       }
+
+       return 0;
+
+out_dump:
+       /* Read the bad inode and dump it */
+       ino_key_init(c, &key, fscki->inum);
+       err = ubifs_lookup_level0(c, &key, &znode, &n);
+       if (!err) {
+               ubifs_err("inode %lu not found in index", fscki->inum);
+               return -ENOENT;
+       } else if (err < 0) {
+               ubifs_err("error %d while looking up inode %lu",
+                         err, fscki->inum);
+               return err;
+       }
+
+       zbr = &znode->zbranch[n];
+       ino = kmalloc(zbr->len, GFP_NOFS);
+       if (!ino)
+               return -ENOMEM;
+
+       err = ubifs_tnc_read_node(c, zbr, ino);
+       if (err) {
+               ubifs_err("cannot read inode node at LEB %d:%d, error %d",
+                         zbr->lnum, zbr->offs, err);
+               kfree(ino);
+               return err;
+       }
+
+       ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
+                 fscki->inum, zbr->lnum, zbr->offs);
+       dbg_dump_node(c, ino);
+       kfree(ino);
+       return -EINVAL;
+}
+
+/**
+ * dbg_check_filesystem - check the file-system.
+ * @c: UBIFS file-system description object
+ *
+ * This function checks the file system, namely:
+ * o makes sure that all leaf nodes exist and their CRCs are correct;
+ * o makes sure inode nlink, size, xattr size/count are correct (for all
+ *   inodes).
+ *
+ * The function reads whole indexing tree and all nodes, so it is pretty
+ * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if
+ * not, and a negative error code in case of failure.
+ */
+int dbg_check_filesystem(struct ubifs_info *c)
+{
+       int err;
+       struct fsck_data fsckd;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_FS))
+               return 0;
+
+       fsckd.inodes = RB_ROOT;
+       err = dbg_walk_index(c, check_leaf, NULL, &fsckd);
+       if (err)
+               goto out_free;
+
+       err = check_inodes(c, &fsckd);
+       if (err)
+               goto out_free;
+
+       free_inodes(&fsckd);
+       return 0;
+
+out_free:
+       ubifs_err("file-system check failed with error %d", err);
+       dump_stack();
+       free_inodes(&fsckd);
+       return err;
+}
+
+static int invocation_cnt;
+
+int dbg_force_in_the_gaps(void)
+{
+       if (!dbg_force_in_the_gaps_enabled)
+               return 0;
+       /* Force in-the-gaps every 8th commit */
+       return !((invocation_cnt++) & 0x7);
+}
+
+/* Failure mode for recovery testing */
+
+#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
+
+struct failure_mode_info {
+       struct list_head list;
+       struct ubifs_info *c;
+};
+
+static LIST_HEAD(fmi_list);
+static DEFINE_SPINLOCK(fmi_lock);
+
+static unsigned int next;
+
+static int simple_rand(void)
+{
+       if (next == 0)
+               next = current->pid;
+       next = next * 1103515245 + 12345;
+       return (next >> 16) & 32767;
+}
+
+void dbg_failure_mode_registration(struct ubifs_info *c)
+{
+       struct failure_mode_info *fmi;
+
+       fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
+       if (!fmi) {
+               dbg_err("Failed to register failure mode - no memory");
+               return;
+       }
+       fmi->c = c;
+       spin_lock(&fmi_lock);
+       list_add_tail(&fmi->list, &fmi_list);
+       spin_unlock(&fmi_lock);
+}
+
+void dbg_failure_mode_deregistration(struct ubifs_info *c)
+{
+       struct failure_mode_info *fmi, *tmp;
+
+       spin_lock(&fmi_lock);
+       list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
+               if (fmi->c == c) {
+                       list_del(&fmi->list);
+                       kfree(fmi);
+               }
+       spin_unlock(&fmi_lock);
+}
+
+static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
+{
+       struct failure_mode_info *fmi;
+
+       spin_lock(&fmi_lock);
+       list_for_each_entry(fmi, &fmi_list, list)
+               if (fmi->c->ubi == desc) {
+                       struct ubifs_info *c = fmi->c;
+
+                       spin_unlock(&fmi_lock);
+                       return c;
+               }
+       spin_unlock(&fmi_lock);
+       return NULL;
+}
+
+static int in_failure_mode(struct ubi_volume_desc *desc)
+{
+       struct ubifs_info *c = dbg_find_info(desc);
+
+       if (c && dbg_failure_mode)
+               return c->failure_mode;
+       return 0;
+}
+
+static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
+{
+       struct ubifs_info *c = dbg_find_info(desc);
+
+       if (!c || !dbg_failure_mode)
+               return 0;
+       if (c->failure_mode)
+               return 1;
+       if (!c->fail_cnt) {
+               /* First call - decide delay to failure */
+               if (chance(1, 2)) {
+                       unsigned int delay = 1 << (simple_rand() >> 11);
+
+                       if (chance(1, 2)) {
+                               c->fail_delay = 1;
+                               c->fail_timeout = jiffies +
+                                                 msecs_to_jiffies(delay);
+                               dbg_rcvry("failing after %ums", delay);
+                       } else {
+                               c->fail_delay = 2;
+                               c->fail_cnt_max = delay;
+                               dbg_rcvry("failing after %u calls", delay);
+                       }
+               }
+               c->fail_cnt += 1;
+       }
+       /* Determine if failure delay has expired */
+       if (c->fail_delay == 1) {
+               if (time_before(jiffies, c->fail_timeout))
+                       return 0;
+       } else if (c->fail_delay == 2)
+               if (c->fail_cnt++ < c->fail_cnt_max)
+                       return 0;
+       if (lnum == UBIFS_SB_LNUM) {
+               if (write) {
+                       if (chance(1, 2))
+                               return 0;
+               } else if (chance(19, 20))
+                       return 0;
+               dbg_rcvry("failing in super block LEB %d", lnum);
+       } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
+               if (chance(19, 20))
+                       return 0;
+               dbg_rcvry("failing in master LEB %d", lnum);
+       } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
+               if (write) {
+                       if (chance(99, 100))
+                               return 0;
+               } else if (chance(399, 400))
+                       return 0;
+               dbg_rcvry("failing in log LEB %d", lnum);
+       } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
+               if (write) {
+                       if (chance(7, 8))
+                               return 0;
+               } else if (chance(19, 20))
+                       return 0;
+               dbg_rcvry("failing in LPT LEB %d", lnum);
+       } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
+               if (write) {
+                       if (chance(1, 2))
+                               return 0;
+               } else if (chance(9, 10))
+                       return 0;
+               dbg_rcvry("failing in orphan LEB %d", lnum);
+       } else if (lnum == c->ihead_lnum) {
+               if (chance(99, 100))
+                       return 0;
+               dbg_rcvry("failing in index head LEB %d", lnum);
+       } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
+               if (chance(9, 10))
+                       return 0;
+               dbg_rcvry("failing in GC head LEB %d", lnum);
+       } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
+                  !ubifs_search_bud(c, lnum)) {
+               if (chance(19, 20))
+                       return 0;
+               dbg_rcvry("failing in non-bud LEB %d", lnum);
+       } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
+                  c->cmt_state == COMMIT_RUNNING_REQUIRED) {
+               if (chance(999, 1000))
+                       return 0;
+               dbg_rcvry("failing in bud LEB %d commit running", lnum);
+       } else {
+               if (chance(9999, 10000))
+                       return 0;
+               dbg_rcvry("failing in bud LEB %d commit not running", lnum);
+       }
+       ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
+       c->failure_mode = 1;
+       dump_stack();
+       return 1;
+}
+
+static void cut_data(const void *buf, int len)
+{
+       int flen, i;
+       unsigned char *p = (void *)buf;
+
+       flen = (len * (long long)simple_rand()) >> 15;
+       for (i = flen; i < len; i++)
+               p[i] = 0xff;
+}
+
+int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+                int len, int check)
+{
+       if (in_failure_mode(desc))
+               return -EIO;
+       return ubi_leb_read(desc, lnum, buf, offset, len, check);
+}
+
+int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+                 int offset, int len, int dtype)
+{
+       int err;
+
+       if (in_failure_mode(desc))
+               return -EIO;
+       if (do_fail(desc, lnum, 1))
+               cut_data(buf, len);
+       err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
+       if (err)
+               return err;
+       if (in_failure_mode(desc))
+               return -EIO;
+       return 0;
+}
+
+int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+                  int len, int dtype)
+{
+       int err;
+
+       if (do_fail(desc, lnum, 1))
+               return -EIO;
+       err = ubi_leb_change(desc, lnum, buf, len, dtype);
+       if (err)
+               return err;
+       if (do_fail(desc, lnum, 1))
+               return -EIO;
+       return 0;
+}
+
+int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
+{
+       int err;
+
+       if (do_fail(desc, lnum, 0))
+               return -EIO;
+       err = ubi_leb_erase(desc, lnum);
+       if (err)
+               return err;
+       if (do_fail(desc, lnum, 0))
+               return -EIO;
+       return 0;
+}
+
+int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
+{
+       int err;
+
+       if (do_fail(desc, lnum, 0))
+               return -EIO;
+       err = ubi_leb_unmap(desc, lnum);
+       if (err)
+               return err;
+       if (do_fail(desc, lnum, 0))
+               return -EIO;
+       return 0;
+}
+
+int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
+{
+       if (in_failure_mode(desc))
+               return -EIO;
+       return ubi_is_mapped(desc, lnum);
+}
+
+int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
+{
+       int err;
+
+       if (do_fail(desc, lnum, 0))
+               return -EIO;
+       err = ubi_leb_map(desc, lnum, dtype);
+       if (err)
+               return err;
+       if (do_fail(desc, lnum, 0))
+               return -EIO;
+       return 0;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h

new file mode 100644 (file)

index 0000000..3c4f1e9
--- /dev/null
+++ b/fs/ubifs/debug.h
@@ -0,0 +1,403 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+#ifndef __UBIFS_DEBUG_H__
+#define __UBIFS_DEBUG_H__
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+#define UBIFS_DBG(op) op
+
+#define ubifs_assert(expr)  do {                                               \
+       if (unlikely(!(expr))) {                                               \
+               printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
+                      __func__, __LINE__, current->pid);                      \
+               dbg_dump_stack();                                              \
+       }                                                                      \
+} while (0)
+
+#define ubifs_assert_cmt_locked(c) do {                                        \
+       if (unlikely(down_write_trylock(&(c)->commit_sem))) {                  \
+               up_write(&(c)->commit_sem);                                    \
+               printk(KERN_CRIT "commit lock is not locked!\n");              \
+               ubifs_assert(0);                                               \
+       }                                                                      \
+} while (0)
+
+#define dbg_dump_stack() do {                                                  \
+       if (!dbg_failure_mode)                                                 \
+               dump_stack();                                                  \
+} while (0)
+
+/* Generic debugging messages */
+#define dbg_msg(fmt, ...) do {                                                 \
+       spin_lock(&dbg_lock);                                                  \
+       printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid,   \
+              __func__, ##__VA_ARGS__);                                       \
+       spin_unlock(&dbg_lock);                                                \
+} while (0)
+
+#define dbg_do_msg(typ, fmt, ...) do {                                         \
+       if (ubifs_msg_flags & typ)                                             \
+               dbg_msg(fmt, ##__VA_ARGS__);                                   \
+} while (0)
+
+#define dbg_err(fmt, ...) do {                                                 \
+       spin_lock(&dbg_lock);                                                  \
+       ubifs_err(fmt, ##__VA_ARGS__);                                         \
+       spin_unlock(&dbg_lock);                                                \
+} while (0)
+
+const char *dbg_key_str0(const struct ubifs_info *c,
+                        const union ubifs_key *key);
+const char *dbg_key_str1(const struct ubifs_info *c,
+                        const union ubifs_key *key);
+
+/*
+ * DBGKEY macros require dbg_lock to be held, which it is in the dbg message
+ * macros.
+ */
+#define DBGKEY(key) dbg_key_str0(c, (key))
+#define DBGKEY1(key) dbg_key_str1(c, (key))
+
+/* General messages */
+#define dbg_gen(fmt, ...)        dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
+
+/* Additional journal messages */
+#define dbg_jnl(fmt, ...)        dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
+
+/* Additional TNC messages */
+#define dbg_tnc(fmt, ...)        dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
+
+/* Additional lprops messages */
+#define dbg_lp(fmt, ...)         dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
+
+/* Additional LEB find messages */
+#define dbg_find(fmt, ...)       dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
+
+/* Additional mount messages */
+#define dbg_mnt(fmt, ...)        dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
+
+/* Additional I/O messages */
+#define dbg_io(fmt, ...)         dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
+
+/* Additional commit messages */
+#define dbg_cmt(fmt, ...)        dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
+
+/* Additional budgeting messages */
+#define dbg_budg(fmt, ...)       dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
+
+/* Additional log messages */
+#define dbg_log(fmt, ...)        dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
+
+/* Additional gc messages */
+#define dbg_gc(fmt, ...)         dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
+
+/* Additional scan messages */
+#define dbg_scan(fmt, ...)       dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
+
+/* Additional recovery messages */
+#define dbg_rcvry(fmt, ...)      dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
+
+/*
+ * Debugging message type flags (must match msg_type_names in debug.c).
+ *
+ * UBIFS_MSG_GEN: general messages
+ * UBIFS_MSG_JNL: journal messages
+ * UBIFS_MSG_MNT: mount messages
+ * UBIFS_MSG_CMT: commit messages
+ * UBIFS_MSG_FIND: LEB find messages
+ * UBIFS_MSG_BUDG: budgeting messages
+ * UBIFS_MSG_GC: garbage collection messages
+ * UBIFS_MSG_TNC: TNC messages
+ * UBIFS_MSG_LP: lprops messages
+ * UBIFS_MSG_IO: I/O messages
+ * UBIFS_MSG_LOG: log messages
+ * UBIFS_MSG_SCAN: scan messages
+ * UBIFS_MSG_RCVRY: recovery messages
+ */
+enum {
+       UBIFS_MSG_GEN   = 0x1,
+       UBIFS_MSG_JNL   = 0x2,
+       UBIFS_MSG_MNT   = 0x4,
+       UBIFS_MSG_CMT   = 0x8,
+       UBIFS_MSG_FIND  = 0x10,
+       UBIFS_MSG_BUDG  = 0x20,
+       UBIFS_MSG_GC    = 0x40,
+       UBIFS_MSG_TNC   = 0x80,
+       UBIFS_MSG_LP    = 0x100,
+       UBIFS_MSG_IO    = 0x200,
+       UBIFS_MSG_LOG   = 0x400,
+       UBIFS_MSG_SCAN  = 0x800,
+       UBIFS_MSG_RCVRY = 0x1000,
+};
+
+/* Debugging message type flags for each default debug message level */
+#define UBIFS_MSG_LVL_0 0
+#define UBIFS_MSG_LVL_1 0x1
+#define UBIFS_MSG_LVL_2 0x7f
+#define UBIFS_MSG_LVL_3 0xffff
+
+/*
+ * Debugging check flags (must match chk_names in debug.c).
+ *
+ * UBIFS_CHK_GEN: general checks
+ * UBIFS_CHK_TNC: check TNC
+ * UBIFS_CHK_IDX_SZ: check index size
+ * UBIFS_CHK_ORPH: check orphans
+ * UBIFS_CHK_OLD_IDX: check the old index
+ * UBIFS_CHK_LPROPS: check lprops
+ * UBIFS_CHK_FS: check the file-system
+ */
+enum {
+       UBIFS_CHK_GEN     = 0x1,
+       UBIFS_CHK_TNC     = 0x2,
+       UBIFS_CHK_IDX_SZ  = 0x4,
+       UBIFS_CHK_ORPH    = 0x8,
+       UBIFS_CHK_OLD_IDX = 0x10,
+       UBIFS_CHK_LPROPS  = 0x20,
+       UBIFS_CHK_FS      = 0x40,
+};
+
+/*
+ * Special testing flags (must match tst_names in debug.c).
+ *
+ * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
+ * UBIFS_TST_RCVRY: failure mode for recovery testing
+ */
+enum {
+       UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
+       UBIFS_TST_RCVRY             = 0x4,
+};
+
+#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
+#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
+#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
+#else
+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
+#endif
+
+#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
+#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
+#else
+#define UBIFS_CHK_FLAGS_DEFAULT 0
+#endif
+
+extern spinlock_t dbg_lock;
+
+extern unsigned int ubifs_msg_flags;
+extern unsigned int ubifs_chk_flags;
+extern unsigned int ubifs_tst_flags;
+
+/* Dump functions */
+
+const char *dbg_ntype(int type);
+const char *dbg_cstate(int cmt_state);
+const char *dbg_get_key_dump(const struct ubifs_info *c,
+                            const union ubifs_key *key);
+void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
+void dbg_dump_node(const struct ubifs_info *c, const void *node);
+void dbg_dump_budget_req(const struct ubifs_budget_req *req);
+void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
+void dbg_dump_budg(struct ubifs_info *c);
+void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
+void dbg_dump_lprops(struct ubifs_info *c);
+void dbg_dump_leb(const struct ubifs_info *c, int lnum);
+void dbg_dump_znode(const struct ubifs_info *c,
+                   const struct ubifs_znode *znode);
+void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat);
+void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+                   struct ubifs_nnode *parent, int iip);
+void dbg_dump_tnc(struct ubifs_info *c);
+void dbg_dump_index(struct ubifs_info *c);
+
+/* Checking helper functions */
+
+typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
+                                struct ubifs_zbranch *zbr, void *priv);
+typedef int (*dbg_znode_callback)(struct ubifs_info *c,
+                                 struct ubifs_znode *znode, void *priv);
+
+int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
+                  dbg_znode_callback znode_cb, void *priv);
+
+/* Checking functions */
+
+int dbg_check_lprops(struct ubifs_info *c);
+
+int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
+int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
+
+int dbg_check_cats(struct ubifs_info *c);
+
+int dbg_check_ltab(struct ubifs_info *c);
+
+int dbg_check_synced_i_size(struct inode *inode);
+
+int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
+
+int dbg_check_tnc(struct ubifs_info *c, int extra);
+
+int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
+
+int dbg_check_filesystem(struct ubifs_info *c);
+
+void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
+                   int add_pos);
+
+int dbg_check_lprops(struct ubifs_info *c);
+int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
+                       int row, int col);
+
+/* Force the use of in-the-gaps method for testing */
+
+#define dbg_force_in_the_gaps_enabled \
+       (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
+
+int dbg_force_in_the_gaps(void);
+
+/* Failure mode for recovery testing */
+
+#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
+
+void dbg_failure_mode_registration(struct ubifs_info *c);
+void dbg_failure_mode_deregistration(struct ubifs_info *c);
+
+#ifndef UBIFS_DBG_PRESERVE_UBI
+
+#define ubi_leb_read   dbg_leb_read
+#define ubi_leb_write  dbg_leb_write
+#define ubi_leb_change dbg_leb_change
+#define ubi_leb_erase  dbg_leb_erase
+#define ubi_leb_unmap  dbg_leb_unmap
+#define ubi_is_mapped  dbg_is_mapped
+#define ubi_leb_map    dbg_leb_map
+
+#endif
+
+int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+                int len, int check);
+int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+                 int offset, int len, int dtype);
+int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+                  int len, int dtype);
+int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
+int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
+int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
+
+static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
+                          int offset, int len)
+{
+       return dbg_leb_read(desc, lnum, buf, offset, len, 0);
+}
+
+static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
+                           const void *buf, int offset, int len)
+{
+       return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
+}
+
+static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
+                                   const void *buf, int len)
+{
+       return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
+}
+
+#else /* !CONFIG_UBIFS_FS_DEBUG */
+
+#define UBIFS_DBG(op)
+#define ubifs_assert(expr)                         ({})
+#define ubifs_assert_cmt_locked(c)
+#define dbg_dump_stack()
+#define dbg_err(fmt, ...)                          ({})
+#define dbg_msg(fmt, ...)                          ({})
+#define dbg_key(c, key, fmt, ...)                  ({})
+
+#define dbg_gen(fmt, ...)                          ({})
+#define dbg_jnl(fmt, ...)                          ({})
+#define dbg_tnc(fmt, ...)                          ({})
+#define dbg_lp(fmt, ...)                           ({})
+#define dbg_find(fmt, ...)                         ({})
+#define dbg_mnt(fmt, ...)                          ({})
+#define dbg_io(fmt, ...)                           ({})
+#define dbg_cmt(fmt, ...)                          ({})
+#define dbg_budg(fmt, ...)                         ({})
+#define dbg_log(fmt, ...)                          ({})
+#define dbg_gc(fmt, ...)                           ({})
+#define dbg_scan(fmt, ...)                         ({})
+#define dbg_rcvry(fmt, ...)                        ({})
+
+#define dbg_ntype(type)                            ""
+#define dbg_cstate(cmt_state)                      ""
+#define dbg_get_key_dump(c, key)                   ({})
+#define dbg_dump_inode(c, inode)                   ({})
+#define dbg_dump_node(c, node)                     ({})
+#define dbg_dump_budget_req(req)                   ({})
+#define dbg_dump_lstats(lst)                       ({})
+#define dbg_dump_budg(c)                           ({})
+#define dbg_dump_lprop(c, lp)                      ({})
+#define dbg_dump_lprops(c)                         ({})
+#define dbg_dump_leb(c, lnum)                      ({})
+#define dbg_dump_znode(c, znode)                   ({})
+#define dbg_dump_heap(c, heap, cat)                ({})
+#define dbg_dump_pnode(c, pnode, parent, iip)      ({})
+#define dbg_dump_tnc(c)                            ({})
+#define dbg_dump_index(c)                          ({})
+
+#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
+
+#define dbg_old_index_check_init(c, zroot)         0
+#define dbg_check_old_index(c, zroot)              0
+
+#define dbg_check_cats(c)                          0
+
+#define dbg_check_ltab(c)                          0
+
+#define dbg_check_synced_i_size(inode)             0
+
+#define dbg_check_dir_size(c, dir)                 0
+
+#define dbg_check_tnc(c, x)                        0
+
+#define dbg_check_idx_size(c, idx_size)            0
+
+#define dbg_check_filesystem(c)                    0
+
+#define dbg_check_heap(c, heap, cat, add_pos)      ({})
+
+#define dbg_check_lprops(c)                        0
+#define dbg_check_lpt_nodes(c, cnode, row, col)    0
+
+#define dbg_force_in_the_gaps_enabled              0
+#define dbg_force_in_the_gaps()                    0
+
+#define dbg_failure_mode                           0
+#define dbg_failure_mode_registration(c)           ({})
+#define dbg_failure_mode_deregistration(c)         ({})
+
+#endif /* !CONFIG_UBIFS_FS_DEBUG */
+
+#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c

new file mode 100644 (file)

index 0000000..e90374b
--- /dev/null
+++ b/fs/ubifs/dir.c
@@ -0,0 +1,1240 @@
+/* * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ * Copyright (C) 2006, 2007 University of Szeged, Hungary
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ *          Zoltan Sogor
+ */
+
+/*
+ * This file implements directory operations.
+ *
+ * All FS operations in this file allocate budget before writing anything to the
+ * media. If they fail to allocate it, the error is returned. The only
+ * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even
+ * if they unable to allocate the budget, because deletion %-ENOSPC failure is
+ * not what users are usually ready to get. UBIFS budgeting subsystem has some
+ * space reserved for these purposes.
+ *
+ * All operations in this file write all inodes which they change straight
+ * away, instead of marking them dirty. For example, 'ubifs_link()' changes
+ * @i_size of the parent inode and writes the parent inode together with the
+ * target inode. This was done to simplify file-system recovery which would
+ * otherwise be very difficult to do. The only exception is rename which marks
+ * the re-named inode dirty (because its @i_ctime is updated) but does not
+ * write it, but just marks it as dirty.
+ */
+
+#include "ubifs.h"
+
+/**
+ * inherit_flags - inherit flags of the parent inode.
+ * @dir: parent inode
+ * @mode: new inode mode flags
+ *
+ * This is a helper function for 'ubifs_new_inode()' which inherits flag of the
+ * parent directory inode @dir. UBIFS inodes inherit the following flags:
+ * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on
+ *   sub-directory basis;
+ * o %UBIFS_SYNC_FL - useful for the same reasons;
+ * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories.
+ *
+ * This function returns the inherited flags.
+ */
+static int inherit_flags(const struct inode *dir, int mode)
+{
+       int flags;
+       const struct ubifs_inode *ui = ubifs_inode(dir);
+
+       if (!S_ISDIR(dir->i_mode))
+               /*
+                * The parent is not a directory, which means that an extended
+                * attribute inode is being created. No flags.
+                */
+               return 0;
+
+       flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL);
+       if (!S_ISDIR(mode))
+               /* The "DIRSYNC" flag only applies to directories */
+               flags &= ~UBIFS_DIRSYNC_FL;
+       return flags;
+}
+
+/**
+ * ubifs_new_inode - allocate new UBIFS inode object.
+ * @c: UBIFS file-system description object
+ * @dir: parent directory inode
+ * @mode: inode mode flags
+ *
+ * This function finds an unused inode number, allocates new inode and
+ * initializes it. Returns new inode in case of success and an error code in
+ * case of failure.
+ */
+struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
+                             int mode)
+{
+       struct inode *inode;
+       struct ubifs_inode *ui;
+
+       inode = new_inode(c->vfs_sb);
+       ui = ubifs_inode(inode);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and
+        * marking them dirty in file write path (see 'file_update_time()').
+        * UBIFS has to fully control "clean <-> dirty" transitions of inodes
+        * to make budgeting work.
+        */
+       inode->i_flags |= (S_NOCMTIME);
+
+       inode->i_uid = current->fsuid;
+       if (dir->i_mode & S_ISGID) {
+               inode->i_gid = dir->i_gid;
+               if (S_ISDIR(mode))
+                       mode |= S_ISGID;
+       } else
+               inode->i_gid = current->fsgid;
+       inode->i_mode = mode;
+       inode->i_mtime = inode->i_atime = inode->i_ctime =
+                        ubifs_current_time(inode);
+       inode->i_mapping->nrpages = 0;
+       /* Disable readahead */
+       inode->i_mapping->backing_dev_info = &c->bdi;
+
+       switch (mode & S_IFMT) {
+       case S_IFREG:
+               inode->i_mapping->a_ops = &ubifs_file_address_operations;
+               inode->i_op = &ubifs_file_inode_operations;
+               inode->i_fop = &ubifs_file_operations;
+               break;
+       case S_IFDIR:
+               inode->i_op  = &ubifs_dir_inode_operations;
+               inode->i_fop = &ubifs_dir_operations;
+               inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ;
+               break;
+       case S_IFLNK:
+               inode->i_op = &ubifs_symlink_inode_operations;
+               break;
+       case S_IFSOCK:
+       case S_IFIFO:
+       case S_IFBLK:
+       case S_IFCHR:
+               inode->i_op  = &ubifs_file_inode_operations;
+               break;
+       default:
+               BUG();
+       }
+
+       ui->flags = inherit_flags(dir, mode);
+       ubifs_set_inode_flags(inode);
+       if (S_ISREG(mode))
+               ui->compr_type = c->default_compr;
+       else
+               ui->compr_type = UBIFS_COMPR_NONE;
+       ui->synced_i_size = 0;
+
+       spin_lock(&c->cnt_lock);
+       /* Inode number overflow is currently not supported */
+       if (c->highest_inum >= INUM_WARN_WATERMARK) {
+               if (c->highest_inum >= INUM_WATERMARK) {
+                       spin_unlock(&c->cnt_lock);
+                       ubifs_err("out of inode numbers");
+                       make_bad_inode(inode);
+                       iput(inode);
+                       return ERR_PTR(-EINVAL);
+               }
+               ubifs_warn("running out of inode numbers (current %lu, max %d)",
+                          c->highest_inum, INUM_WATERMARK);
+       }
+
+       inode->i_ino = ++c->highest_inum;
+       inode->i_generation = ++c->vfs_gen;
+       /*
+        * The creation sequence number remains with this inode for its
+        * lifetime. All nodes for this inode have a greater sequence number,
+        * and so it is possible to distinguish obsolete nodes belonging to a
+        * previous incarnation of the same inode number - for example, for the
+        * purpose of rebuilding the index.
+        */
+       ui->creat_sqnum = ++c->max_sqnum;
+       spin_unlock(&c->cnt_lock);
+       return inode;
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
+{
+       if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+               return 0;
+       if (le16_to_cpu(dent->nlen) != nm->len)
+               return -EINVAL;
+       if (memcmp(dent->name, nm->name, nm->len))
+               return -EINVAL;
+       return 0;
+}
+
+#else
+
+#define dbg_check_name(dent, nm) 0
+
+#endif
+
+static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
+                                  struct nameidata *nd)
+{
+       int err;
+       union ubifs_key key;
+       struct inode *inode = NULL;
+       struct ubifs_dent_node *dent;
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+
+       dbg_gen("'%.*s' in dir ino %lu",
+               dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+
+       if (dentry->d_name.len > UBIFS_MAX_NLEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
+       if (!dent)
+               return ERR_PTR(-ENOMEM);
+
+       dent_key_init(c, &key, dir->i_ino, &dentry->d_name);
+
+       err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
+       if (err) {
+               /*
+                * Do not hash the direntry if parent 'i_nlink' is zero, because
+                * this has side-effects - '->delete_inode()' call will not be
+                * called for the parent orphan inode, because 'd_count' of its
+                * direntry will stay 1 (it'll be negative direntry I guess)
+                * and prevent 'iput_final()' until the dentry is destroyed due
+                * to unmount or memory pressure.
+                */
+               if (err == -ENOENT && dir->i_nlink != 0) {
+                       dbg_gen("not found");
+                       goto done;
+               }
+               goto out;
+       }
+
+       if (dbg_check_name(dent, &dentry->d_name)) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
+       if (IS_ERR(inode)) {
+               /*
+                * This should not happen. Probably the file-system needs
+                * checking.
+                */
+               err = PTR_ERR(inode);
+               ubifs_err("dead directory entry '%.*s', error %d",
+                         dentry->d_name.len, dentry->d_name.name, err);
+               ubifs_ro_mode(c, err);
+               goto out;
+       }
+
+done:
+       kfree(dent);
+       /*
+        * Note, d_splice_alias() would be required instead if we supported
+        * NFS.
+        */
+       d_add(dentry, inode);
+       return NULL;
+
+out:
+       kfree(dent);
+       return ERR_PTR(err);
+}
+
+static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
+                       struct nameidata *nd)
+{
+       struct inode *inode;
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+                                       .dirtied_ino = 1 };
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+
+       /*
+        * Budget request settings: new inode, new direntry, changing the
+        * parent directory inode.
+        */
+
+       dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
+               dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       inode = ubifs_new_inode(c, dir, mode);
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_budg;
+       }
+
+       mutex_lock(&dir_ui->ui_mutex);
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
+       if (err)
+               goto out_cancel;
+       mutex_unlock(&dir_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       insert_inode_hash(inode);
+       d_instantiate(dentry, inode);
+       return 0;
+
+out_cancel:
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       mutex_unlock(&dir_ui->ui_mutex);
+       make_bad_inode(inode);
+       iput(inode);
+out_budg:
+       ubifs_release_budget(c, &req);
+       ubifs_err("cannot create regular file, error %d", err);
+       return err;
+}
+
+/**
+ * vfs_dent_type - get VFS directory entry type.
+ * @type: UBIFS directory entry type
+ *
+ * This function converts UBIFS directory entry type into VFS directory entry
+ * type.
+ */
+static unsigned int vfs_dent_type(uint8_t type)
+{
+       switch (type) {
+       case UBIFS_ITYPE_REG:
+               return DT_REG;
+       case UBIFS_ITYPE_DIR:
+               return DT_DIR;
+       case UBIFS_ITYPE_LNK:
+               return DT_LNK;
+       case UBIFS_ITYPE_BLK:
+               return DT_BLK;
+       case UBIFS_ITYPE_CHR:
+               return DT_CHR;
+       case UBIFS_ITYPE_FIFO:
+               return DT_FIFO;
+       case UBIFS_ITYPE_SOCK:
+               return DT_SOCK;
+       default:
+               BUG();
+       }
+       return 0;
+}
+
+/*
+ * The classical Unix view for directory is that it is a linear array of
+ * (name, inode number) entries. Linux/VFS assumes this model as well.
+ * Particularly, 'readdir()' call wants us to return a directory entry offset
+ * which later may be used to continue 'readdir()'ing the directory or to
+ * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this
+ * model because directory entries are identified by keys, which may collide.
+ *
+ * UBIFS uses directory entry hash value for directory offsets, so
+ * 'seekdir()'/'telldir()' may not always work because of possible key
+ * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work
+ * properly by means of saving full directory entry name in the private field
+ * of the file description object.
+ *
+ * This means that UBIFS cannot support NFS which requires full
+ * 'seekdir()'/'telldir()' support.
+ */
+static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+       int err, over = 0;
+       struct qstr nm;
+       union ubifs_key key;
+       struct ubifs_dent_node *dent;
+       struct inode *dir = file->f_path.dentry->d_inode;
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+
+       dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
+
+       if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2)
+               /*
+                * The directory was seek'ed to a senseless position or there
+                * are no more entries.
+                */
+               return 0;
+
+       /* File positions 0 and 1 correspond to "." and ".." */
+       if (file->f_pos == 0) {
+               ubifs_assert(!file->private_data);
+               over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR);
+               if (over)
+                       return 0;
+               file->f_pos = 1;
+       }
+
+       if (file->f_pos == 1) {
+               ubifs_assert(!file->private_data);
+               over = filldir(dirent, "..", 2, 1,
+                              parent_ino(file->f_path.dentry), DT_DIR);
+               if (over)
+                       return 0;
+
+               /* Find the first entry in TNC and save it */
+               lowest_dent_key(c, &key, dir->i_ino);
+               nm.name = NULL;
+               dent = ubifs_tnc_next_ent(c, &key, &nm);
+               if (IS_ERR(dent)) {
+                       err = PTR_ERR(dent);
+                       goto out;
+               }
+
+               file->f_pos = key_hash_flash(c, &dent->key);
+               file->private_data = dent;
+       }
+
+       dent = file->private_data;
+       if (!dent) {
+               /*
+                * The directory was seek'ed to and is now readdir'ed.
+                * Find the entry corresponding to @file->f_pos or the
+                * closest one.
+                */
+               dent_key_init_hash(c, &key, dir->i_ino, file->f_pos);
+               nm.name = NULL;
+               dent = ubifs_tnc_next_ent(c, &key, &nm);
+               if (IS_ERR(dent)) {
+                       err = PTR_ERR(dent);
+                       goto out;
+               }
+               file->f_pos = key_hash_flash(c, &dent->key);
+               file->private_data = dent;
+       }
+
+       while (1) {
+               dbg_gen("feed '%s', ino %llu, new f_pos %#x",
+                       dent->name, le64_to_cpu(dent->inum),
+                       key_hash_flash(c, &dent->key));
+               ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
+
+               nm.len = le16_to_cpu(dent->nlen);
+               over = filldir(dirent, dent->name, nm.len, file->f_pos,
+                              le64_to_cpu(dent->inum),
+                              vfs_dent_type(dent->type));
+               if (over)
+                       return 0;
+
+               /* Switch to the next entry */
+               key_read(c, &dent->key, &key);
+               nm.name = dent->name;
+               dent = ubifs_tnc_next_ent(c, &key, &nm);
+               if (IS_ERR(dent)) {
+                       err = PTR_ERR(dent);
+                       goto out;
+               }
+
+               kfree(file->private_data);
+               file->f_pos = key_hash_flash(c, &dent->key);
+               file->private_data = dent;
+               cond_resched();
+       }
+
+out:
+       if (err != -ENOENT) {
+               ubifs_err("cannot find next direntry, error %d", err);
+               return err;
+       }
+
+       kfree(file->private_data);
+       file->private_data = NULL;
+       file->f_pos = 2;
+       return 0;
+}
+
+/* If a directory is seeked, we have to free saved readdir() state */
+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+       kfree(file->private_data);
+       file->private_data = NULL;
+       return generic_file_llseek(file, offset, origin);
+}
+
+/* Free saved readdir() state when the directory is closed */
+static int ubifs_dir_release(struct inode *dir, struct file *file)
+{
+       kfree(file->private_data);
+       file->private_data = NULL;
+       return 0;
+}
+
+/**
+ * lock_2_inodes - lock two UBIFS inodes.
+ * @inode1: first inode
+ * @inode2: second inode
+ */
+static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
+{
+       if (inode1->i_ino < inode2->i_ino) {
+               mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2);
+               mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3);
+       } else {
+               mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
+               mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3);
+       }
+}
+
+/**
+ * unlock_2_inodes - unlock two UBIFS inodes inodes.
+ * @inode1: first inode
+ * @inode2: second inode
+ */
+static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
+{
+       mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
+       mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+}
+
+static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
+                     struct dentry *dentry)
+{
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       struct inode *inode = old_dentry->d_inode;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
+                                       .dirtied_ino_d = ui->data_len };
+
+       /*
+        * Budget request settings: new direntry, changing the target inode,
+        * changing the parent inode.
+        */
+
+       dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
+               dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+               inode->i_nlink, dir->i_ino);
+       err = dbg_check_synced_i_size(inode);
+       if (err)
+               return err;
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       lock_2_inodes(dir, inode);
+       inc_nlink(inode);
+       atomic_inc(&inode->i_count);
+       inode->i_ctime = ubifs_current_time(inode);
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
+       if (err)
+               goto out_cancel;
+       unlock_2_inodes(dir, inode);
+
+       ubifs_release_budget(c, &req);
+       d_instantiate(dentry, inode);
+       return 0;
+
+out_cancel:
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       drop_nlink(inode);
+       unlock_2_inodes(dir, inode);
+       ubifs_release_budget(c, &req);
+       iput(inode);
+       return err;
+}
+
+static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       struct inode *inode = dentry->d_inode;
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       int err, budgeted = 1;
+       struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
+
+       /*
+        * Budget request settings: deletion direntry, deletion inode (+1 for
+        * @dirtied_ino), changing the parent directory inode. If budgeting
+        * fails, go ahead anyway because we have extra space reserved for
+        * deletions.
+        */
+
+       dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
+               dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+               inode->i_nlink, dir->i_ino);
+       err = dbg_check_synced_i_size(inode);
+       if (err)
+               return err;
+
+       err = ubifs_budget_space(c, &req);
+       if (err) {
+               if (err != -ENOSPC)
+                       return err;
+               err = 0;
+               budgeted = 0;
+       }
+
+       lock_2_inodes(dir, inode);
+       inode->i_ctime = ubifs_current_time(dir);
+       drop_nlink(inode);
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
+       if (err)
+               goto out_cancel;
+       unlock_2_inodes(dir, inode);
+
+       if (budgeted)
+               ubifs_release_budget(c, &req);
+       else {
+               /* We've deleted something - clean the "no space" flags */
+               c->nospace = c->nospace_rp = 0;
+               smp_wmb();
+       }
+       return 0;
+
+out_cancel:
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       inc_nlink(inode);
+       unlock_2_inodes(dir, inode);
+       if (budgeted)
+               ubifs_release_budget(c, &req);
+       return err;
+}
+
+/**
+ * check_dir_empty - check if a directory is empty or not.
+ * @c: UBIFS file-system description object
+ * @dir: VFS inode object of the directory to check
+ *
+ * This function checks if directory @dir is empty. Returns zero if the
+ * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes
+ * in case of of errors.
+ */
+static int check_dir_empty(struct ubifs_info *c, struct inode *dir)
+{
+       struct qstr nm = { .name = NULL };
+       struct ubifs_dent_node *dent;
+       union ubifs_key key;
+       int err;
+
+       lowest_dent_key(c, &key, dir->i_ino);
+       dent = ubifs_tnc_next_ent(c, &key, &nm);
+       if (IS_ERR(dent)) {
+               err = PTR_ERR(dent);
+               if (err == -ENOENT)
+                       err = 0;
+       } else {
+               kfree(dent);
+               err = -ENOTEMPTY;
+       }
+       return err;
+}
+
+static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       struct inode *inode = dentry->d_inode;
+       int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       int err, budgeted = 1;
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
+
+       /*
+        * Budget request settings: deletion direntry, deletion inode and
+        * changing the parent inode. If budgeting fails, go ahead anyway
+        * because we have extra space reserved for deletions.
+        */
+
+       dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
+               dentry->d_name.name, inode->i_ino, dir->i_ino);
+
+       err = check_dir_empty(c, dentry->d_inode);
+       if (err)
+               return err;
+
+       err = ubifs_budget_space(c, &req);
+       if (err) {
+               if (err != -ENOSPC)
+                       return err;
+               budgeted = 0;
+       }
+
+       lock_2_inodes(dir, inode);
+       inode->i_ctime = ubifs_current_time(dir);
+       clear_nlink(inode);
+       drop_nlink(dir);
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
+       if (err)
+               goto out_cancel;
+       unlock_2_inodes(dir, inode);
+
+       if (budgeted)
+               ubifs_release_budget(c, &req);
+       else {
+               /* We've deleted something - clean the "no space" flags */
+               c->nospace = c->nospace_rp = 0;
+               smp_wmb();
+       }
+       return 0;
+
+out_cancel:
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       inc_nlink(dir);
+       inc_nlink(inode);
+       inc_nlink(inode);
+       unlock_2_inodes(dir, inode);
+       if (budgeted)
+               ubifs_release_budget(c, &req);
+       return err;
+}
+
+static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+       struct inode *inode;
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+                                       .dirtied_ino_d = 1 };
+
+       /*
+        * Budget request settings: new inode, new direntry and changing parent
+        * directory inode.
+        */
+
+       dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
+               dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_budg;
+       }
+
+       mutex_lock(&dir_ui->ui_mutex);
+       insert_inode_hash(inode);
+       inc_nlink(inode);
+       inc_nlink(dir);
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
+       if (err) {
+               ubifs_err("cannot create directory, error %d", err);
+               goto out_cancel;
+       }
+       mutex_unlock(&dir_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       d_instantiate(dentry, inode);
+       return 0;
+
+out_cancel:
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       drop_nlink(dir);
+       mutex_unlock(&dir_ui->ui_mutex);
+       make_bad_inode(inode);
+       iput(inode);
+out_budg:
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
+                      int mode, dev_t rdev)
+{
+       struct inode *inode;
+       struct ubifs_inode *ui;
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       union ubifs_dev_desc *dev = NULL;
+       int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       int err, devlen = 0;
+       struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+                                       .new_ino_d = devlen, .dirtied_ino = 1 };
+
+       /*
+        * Budget request settings: new inode, new direntry and changing parent
+        * directory inode.
+        */
+
+       dbg_gen("dent '%.*s' in dir ino %lu",
+               dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+
+       if (!new_valid_dev(rdev))
+               return -EINVAL;
+
+       if (S_ISBLK(mode) || S_ISCHR(mode)) {
+               dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
+               if (!dev)
+                       return -ENOMEM;
+               devlen = ubifs_encode_dev(dev, rdev);
+       }
+
+       err = ubifs_budget_space(c, &req);
+       if (err) {
+               kfree(dev);
+               return err;
+       }
+
+       inode = ubifs_new_inode(c, dir, mode);
+       if (IS_ERR(inode)) {
+               kfree(dev);
+               err = PTR_ERR(inode);
+               goto out_budg;
+       }
+
+       init_special_inode(inode, inode->i_mode, rdev);
+       inode->i_size = ubifs_inode(inode)->ui_size = devlen;
+       ui = ubifs_inode(inode);
+       ui->data = dev;
+       ui->data_len = devlen;
+
+       mutex_lock(&dir_ui->ui_mutex);
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
+       if (err)
+               goto out_cancel;
+       mutex_unlock(&dir_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       insert_inode_hash(inode);
+       d_instantiate(dentry, inode);
+       return 0;
+
+out_cancel:
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       mutex_unlock(&dir_ui->ui_mutex);
+       make_bad_inode(inode);
+       iput(inode);
+out_budg:
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
+                        const char *symname)
+{
+       struct inode *inode;
+       struct ubifs_inode *ui;
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       int err, len = strlen(symname);
+       int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
+       struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+                                       .new_ino_d = len, .dirtied_ino = 1 };
+
+       /*
+        * Budget request settings: new inode, new direntry and changing parent
+        * directory inode.
+        */
+
+       dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
+               dentry->d_name.name, symname, dir->i_ino);
+
+       if (len > UBIFS_MAX_INO_DATA)
+               return -ENAMETOOLONG;
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_budg;
+       }
+
+       ui = ubifs_inode(inode);
+       ui->data = kmalloc(len + 1, GFP_NOFS);
+       if (!ui->data) {
+               err = -ENOMEM;
+               goto out_inode;
+       }
+
+       memcpy(ui->data, symname, len);
+       ((char *)ui->data)[len] = '\0';
+       /*
+        * The terminating zero byte is not written to the flash media and it
+        * is put just to make later in-memory string processing simpler. Thus,
+        * data length is @len, not @len + %1.
+        */
+       ui->data_len = len;
+       inode->i_size = ubifs_inode(inode)->ui_size = len;
+
+       mutex_lock(&dir_ui->ui_mutex);
+       dir->i_size += sz_change;
+       dir_ui->ui_size = dir->i_size;
+       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
+       if (err)
+               goto out_cancel;
+       mutex_unlock(&dir_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       insert_inode_hash(inode);
+       d_instantiate(dentry, inode);
+       return 0;
+
+out_cancel:
+       dir->i_size -= sz_change;
+       dir_ui->ui_size = dir->i_size;
+       mutex_unlock(&dir_ui->ui_mutex);
+out_inode:
+       make_bad_inode(inode);
+       iput(inode);
+out_budg:
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+/**
+ * lock_3_inodes - lock three UBIFS inodes for rename.
+ * @inode1: first inode
+ * @inode2: second inode
+ * @inode3: third inode
+ *
+ * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may
+ * be null.
+ */
+static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
+                         struct inode *inode3)
+{
+       struct inode *i1, *i2, *i3;
+
+       if (!inode3) {
+               if (inode1 != inode2) {
+                       lock_2_inodes(inode1, inode2);
+                       return;
+               }
+               mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+               return;
+       }
+
+       if (inode1 == inode2) {
+               lock_2_inodes(inode1, inode3);
+               return;
+       }
+
+       /* 3 different inodes */
+       if (inode1 < inode2) {
+               i3 = inode2;
+               if (inode1 < inode3) {
+                       i1 = inode1;
+                       i2 = inode3;
+               } else {
+                       i1 = inode3;
+                       i2 = inode1;
+               }
+       } else {
+               i3 = inode1;
+               if (inode2 < inode3) {
+                       i1 = inode2;
+                       i2 = inode3;
+               } else {
+                       i1 = inode3;
+                       i2 = inode2;
+               }
+       }
+       mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1);
+       lock_2_inodes(i2, i3);
+}
+
+/**
+ * unlock_3_inodes - unlock three UBIFS inodes for rename.
+ * @inode1: first inode
+ * @inode2: second inode
+ * @inode3: third inode
+ */
+static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
+                           struct inode *inode3)
+{
+       mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
+       if (inode1 != inode2)
+               mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+       if (inode3)
+               mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
+}
+
+static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
+                       struct inode *new_dir, struct dentry *new_dentry)
+{
+       struct ubifs_info *c = old_dir->i_sb->s_fs_info;
+       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *new_inode = new_dentry->d_inode;
+       struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode);
+       int err, release, sync = 0, move = (new_dir != old_dir);
+       int is_dir = S_ISDIR(old_inode->i_mode);
+       int unlink = !!new_inode;
+       int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len);
+       int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len);
+       struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
+                                       .dirtied_ino = 3 };
+       struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
+                               .dirtied_ino_d = old_inode_ui->data_len };
+       struct timespec time;
+
+       /*
+        * Budget request settings: deletion direntry, new direntry, removing
+        * the old inode, and changing old and new parent directory inodes.
+        *
+        * However, this operation also marks the target inode as dirty and
+        * does not write it, so we allocate budget for the target inode
+        * separately.
+        */
+
+       dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in "
+               "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
+               old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
+               new_dentry->d_name.name, new_dir->i_ino);
+
+       if (unlink && is_dir) {
+               err = check_dir_empty(c, new_inode);
+               if (err)
+                       return err;
+       }
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+       err = ubifs_budget_space(c, &ino_req);
+       if (err) {
+               ubifs_release_budget(c, &req);
+               return err;
+       }
+
+       lock_3_inodes(old_dir, new_dir, new_inode);
+
+       /*
+        * Like most other Unix systems, set the @i_ctime for inodes on a
+        * rename.
+        */
+       time = ubifs_current_time(old_dir);
+       old_inode->i_ctime = time;
+
+       /* We must adjust parent link count when renaming directories */
+       if (is_dir) {
+               if (move) {
+                       /*
+                        * @old_dir loses a link because we are moving
+                        * @old_inode to a different directory.
+                        */
+                       drop_nlink(old_dir);
+                       /*
+                        * @new_dir only gains a link if we are not also
+                        * overwriting an existing directory.
+                        */
+                       if (!unlink)
+                               inc_nlink(new_dir);
+               } else {
+                       /*
+                        * @old_inode is not moving to a different directory,
+                        * but @old_dir still loses a link if we are
+                        * overwriting an existing directory.
+                        */
+                       if (unlink)
+                               drop_nlink(old_dir);
+               }
+       }
+
+       old_dir->i_size -= old_sz;
+       ubifs_inode(old_dir)->ui_size = old_dir->i_size;
+       old_dir->i_mtime = old_dir->i_ctime = time;
+       new_dir->i_mtime = new_dir->i_ctime = time;
+
+       /*
+        * And finally, if we unlinked a direntry which happened to have the
+        * same name as the moved direntry, we have to decrement @i_nlink of
+        * the unlinked inode and change its ctime.
+        */
+       if (unlink) {
+               /*
+                * Directories cannot have hard-links, so if this is a
+                * directory, decrement its @i_nlink twice because an empty
+                * directory has @i_nlink 2.
+                */
+               if (is_dir)
+                       drop_nlink(new_inode);
+               new_inode->i_ctime = time;
+               drop_nlink(new_inode);
+       } else {
+               new_dir->i_size += new_sz;
+               ubifs_inode(new_dir)->ui_size = new_dir->i_size;
+       }
+
+       /*
+        * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode
+        * is dirty, because this will be done later on at the end of
+        * 'ubifs_rename()'.
+        */
+       if (IS_SYNC(old_inode)) {
+               sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
+               if (unlink && IS_SYNC(new_inode))
+                       sync = 1;
+       }
+       err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry,
+                              sync);
+       if (err)
+               goto out_cancel;
+
+       unlock_3_inodes(old_dir, new_dir, new_inode);
+       ubifs_release_budget(c, &req);
+
+       mutex_lock(&old_inode_ui->ui_mutex);
+       release = old_inode_ui->dirty;
+       mark_inode_dirty_sync(old_inode);
+       mutex_unlock(&old_inode_ui->ui_mutex);
+
+       if (release)
+               ubifs_release_budget(c, &ino_req);
+       if (IS_SYNC(old_inode))
+               err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
+       return err;
+
+out_cancel:
+       if (unlink) {
+               if (is_dir)
+                       inc_nlink(new_inode);
+               inc_nlink(new_inode);
+       } else {
+               new_dir->i_size -= new_sz;
+               ubifs_inode(new_dir)->ui_size = new_dir->i_size;
+       }
+       old_dir->i_size += old_sz;
+       ubifs_inode(old_dir)->ui_size = old_dir->i_size;
+       if (is_dir) {
+               if (move) {
+                       inc_nlink(old_dir);
+                       if (!unlink)
+                               drop_nlink(new_dir);
+               } else {
+                       if (unlink)
+                               inc_nlink(old_dir);
+               }
+       }
+       unlock_3_inodes(old_dir, new_dir, new_inode);
+       ubifs_release_budget(c, &ino_req);
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                 struct kstat *stat)
+{
+       loff_t size;
+       struct inode *inode = dentry->d_inode;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       mutex_lock(&ui->ui_mutex);
+       stat->dev = inode->i_sb->s_dev;
+       stat->ino = inode->i_ino;
+       stat->mode = inode->i_mode;
+       stat->nlink = inode->i_nlink;
+       stat->uid = inode->i_uid;
+       stat->gid = inode->i_gid;
+       stat->rdev = inode->i_rdev;
+       stat->atime = inode->i_atime;
+       stat->mtime = inode->i_mtime;
+       stat->ctime = inode->i_ctime;
+       stat->blksize = UBIFS_BLOCK_SIZE;
+       stat->size = ui->ui_size;
+
+       /*
+        * Unfortunately, the 'stat()' system call was designed for block
+        * device based file systems, and it is not appropriate for UBIFS,
+        * because UBIFS does not have notion of "block". For example, it is
+        * difficult to tell how many block a directory takes - it actually
+        * takes less than 300 bytes, but we have to round it to block size,
+        * which introduces large mistake. This makes utilities like 'du' to
+        * report completely senseless numbers. This is the reason why UBIFS
+        * goes the same way as JFFS2 - it reports zero blocks for everything
+        * but regular files, which makes more sense than reporting completely
+        * wrong sizes.
+        */
+       if (S_ISREG(inode->i_mode)) {
+               size = ui->xattr_size;
+               size += stat->size;
+               size = ALIGN(size, UBIFS_BLOCK_SIZE);
+               /*
+                * Note, user-space expects 512-byte blocks count irrespectively
+                * of what was reported in @stat->size.
+                */
+               stat->blocks = size >> 9;
+       } else
+               stat->blocks = 0;
+       mutex_unlock(&ui->ui_mutex);
+       return 0;
+}
+
+struct inode_operations ubifs_dir_inode_operations = {
+       .lookup      = ubifs_lookup,
+       .create      = ubifs_create,
+       .link        = ubifs_link,
+       .symlink     = ubifs_symlink,
+       .unlink      = ubifs_unlink,
+       .mkdir       = ubifs_mkdir,
+       .rmdir       = ubifs_rmdir,
+       .mknod       = ubifs_mknod,
+       .rename      = ubifs_rename,
+       .setattr     = ubifs_setattr,
+       .getattr     = ubifs_getattr,
+#ifdef CONFIG_UBIFS_FS_XATTR
+       .setxattr    = ubifs_setxattr,
+       .getxattr    = ubifs_getxattr,
+       .listxattr   = ubifs_listxattr,
+       .removexattr = ubifs_removexattr,
+#endif
+};
+
+struct file_operations ubifs_dir_operations = {
+       .llseek         = ubifs_dir_llseek,
+       .release        = ubifs_dir_release,
+       .read           = generic_read_dir,
+       .readdir        = ubifs_readdir,
+       .fsync          = ubifs_fsync,
+       .unlocked_ioctl = ubifs_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ubifs_compat_ioctl,
+#endif
+};
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c

new file mode 100644 (file)

index 0000000..005a3b8
--- /dev/null
+++ b/fs/ubifs/file.c
@@ -0,0 +1,1275 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements VFS file and inode operations of regular files, device
+ * nodes and symlinks as well as address space operations.
+ *
+ * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
+ * page is dirty and is used for budgeting purposes - dirty pages should not be
+ * budgeted. The PG_checked flag is set if full budgeting is required for the
+ * page e.g., when it corresponds to a file hole or it is just beyond the file
+ * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
+ * fail in this function, and the budget is released in 'ubifs_write_end()'. So
+ * the PG_private and PG_checked flags carry the information about how the page
+ * was budgeted, to make it possible to release the budget properly.
+ *
+ * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
+ * we implement. However, this is not true for '->writepage()', which might be
+ * called with 'i_mutex' unlocked. For example, when pdflush is performing
+ * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
+ * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
+ * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
+ * path'. So, in '->writepage()' we are only guaranteed that the page is
+ * locked.
+ *
+ * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
+ * readahead path does not have it locked ("sys_read -> generic_file_aio_read
+ * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
+ * not set as well. However, UBIFS disables readahead.
+ *
+ * This, for example means that there might be 2 concurrent '->writepage()'
+ * calls for the same inode, but different inode dirty pages.
+ */
+
+#include "ubifs.h"
+#include <linux/mount.h>
+
+static int read_block(struct inode *inode, void *addr, unsigned int block,
+                     struct ubifs_data_node *dn)
+{
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       int err, len, out_len;
+       union ubifs_key key;
+       unsigned int dlen;
+
+       data_key_init(c, &key, inode->i_ino, block);
+       err = ubifs_tnc_lookup(c, &key, dn);
+       if (err) {
+               if (err == -ENOENT)
+                       /* Not found, so it must be a hole */
+                       memset(addr, 0, UBIFS_BLOCK_SIZE);
+               return err;
+       }
+
+       ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
+
+       len = le32_to_cpu(dn->size);
+       if (len <= 0 || len > UBIFS_BLOCK_SIZE)
+               goto dump;
+
+       dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+       out_len = UBIFS_BLOCK_SIZE;
+       err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
+                              le16_to_cpu(dn->compr_type));
+       if (err || len != out_len)
+               goto dump;
+
+       /*
+        * Data length can be less than a full block, even for blocks that are
+        * not the last in the file (e.g., as a result of making a hole and
+        * appending data). Ensure that the remainder is zeroed out.
+        */
+       if (len < UBIFS_BLOCK_SIZE)
+               memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
+
+       return 0;
+
+dump:
+       ubifs_err("bad data node (block %u, inode %lu)",
+                 block, inode->i_ino);
+       dbg_dump_node(c, dn);
+       return -EINVAL;
+}
+
+static int do_readpage(struct page *page)
+{
+       void *addr;
+       int err = 0, i;
+       unsigned int block, beyond;
+       struct ubifs_data_node *dn;
+       struct inode *inode = page->mapping->host;
+       loff_t i_size = i_size_read(inode);
+
+       dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
+               inode->i_ino, page->index, i_size, page->flags);
+       ubifs_assert(!PageChecked(page));
+       ubifs_assert(!PagePrivate(page));
+
+       addr = kmap(page);
+
+       block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+       beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
+       if (block >= beyond) {
+               /* Reading beyond inode */
+               SetPageChecked(page);
+               memset(addr, 0, PAGE_CACHE_SIZE);
+               goto out;
+       }
+
+       dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
+       if (!dn) {
+               err = -ENOMEM;
+               goto error;
+       }
+
+       i = 0;
+       while (1) {
+               int ret;
+
+               if (block >= beyond) {
+                       /* Reading beyond inode */
+                       err = -ENOENT;
+                       memset(addr, 0, UBIFS_BLOCK_SIZE);
+               } else {
+                       ret = read_block(inode, addr, block, dn);
+                       if (ret) {
+                               err = ret;
+                               if (err != -ENOENT)
+                                       break;
+                       }
+               }
+               if (++i >= UBIFS_BLOCKS_PER_PAGE)
+                       break;
+               block += 1;
+               addr += UBIFS_BLOCK_SIZE;
+       }
+       if (err) {
+               if (err == -ENOENT) {
+                       /* Not found, so it must be a hole */
+                       SetPageChecked(page);
+                       dbg_gen("hole");
+                       goto out_free;
+               }
+               ubifs_err("cannot read page %lu of inode %lu, error %d",
+                         page->index, inode->i_ino, err);
+               goto error;
+       }
+
+out_free:
+       kfree(dn);
+out:
+       SetPageUptodate(page);
+       ClearPageError(page);
+       flush_dcache_page(page);
+       kunmap(page);
+       return 0;
+
+error:
+       kfree(dn);
+       ClearPageUptodate(page);
+       SetPageError(page);
+       flush_dcache_page(page);
+       kunmap(page);
+       return err;
+}
+
+/**
+ * release_new_page_budget - release budget of a new page.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function which releases budget corresponding to the budget
+ * of one new page of data.
+ */
+static void release_new_page_budget(struct ubifs_info *c)
+{
+       struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 };
+
+       ubifs_release_budget(c, &req);
+}
+
+/**
+ * release_existing_page_budget - release budget of an existing page.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function which releases budget corresponding to the budget
+ * of changing one one page of data which already exists on the flash media.
+ */
+static void release_existing_page_budget(struct ubifs_info *c)
+{
+       struct ubifs_budget_req req = { .dd_growth = c->page_budget};
+
+       ubifs_release_budget(c, &req);
+}
+
+static int write_begin_slow(struct address_space *mapping,
+                           loff_t pos, unsigned len, struct page **pagep)
+{
+       struct inode *inode = mapping->host;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+       struct ubifs_budget_req req = { .new_page = 1 };
+       int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+       struct page *page;
+
+       dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
+               inode->i_ino, pos, len, inode->i_size);
+
+       /*
+        * At the slow path we have to budget before locking the page, because
+        * budgeting may force write-back, which would wait on locked pages and
+        * deadlock if we had the page locked. At this point we do not know
+        * anything about the page, so assume that this is a new page which is
+        * written to a hole. This corresponds to largest budget. Later the
+        * budget will be amended if this is not true.
+        */
+       if (appending)
+               /* We are appending data, budget for inode change */
+               req.dirtied_ino = 1;
+
+       err = ubifs_budget_space(c, &req);
+       if (unlikely(err))
+               return err;
+
+       page = __grab_cache_page(mapping, index);
+       if (unlikely(!page)) {
+               ubifs_release_budget(c, &req);
+               return -ENOMEM;
+       }
+
+       if (!PageUptodate(page)) {
+               if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+                       SetPageChecked(page);
+               else {
+                       err = do_readpage(page);
+                       if (err) {
+                               unlock_page(page);
+                               page_cache_release(page);
+                               return err;
+                       }
+               }
+
+               SetPageUptodate(page);
+               ClearPageError(page);
+       }
+
+       if (PagePrivate(page))
+               /*
+                * The page is dirty, which means it was budgeted twice:
+                *   o first time the budget was allocated by the task which
+                *     made the page dirty and set the PG_private flag;
+                *   o and then we budgeted for it for the second time at the
+                *     very beginning of this function.
+                *
+                * So what we have to do is to release the page budget we
+                * allocated.
+                */
+               release_new_page_budget(c);
+       else if (!PageChecked(page))
+               /*
+                * We are changing a page which already exists on the media.
+                * This means that changing the page does not make the amount
+                * of indexing information larger, and this part of the budget
+                * which we have already acquired may be released.
+                */
+               ubifs_convert_page_budget(c);
+
+       if (appending) {
+               struct ubifs_inode *ui = ubifs_inode(inode);
+
+               /*
+                * 'ubifs_write_end()' is optimized from the fast-path part of
+                * 'ubifs_write_begin()' and expects the @ui_mutex to be locked
+                * if data is appended.
+                */
+               mutex_lock(&ui->ui_mutex);
+               if (ui->dirty)
+                       /*
+                        * The inode is dirty already, so we may free the
+                        * budget we allocated.
+                        */
+                       ubifs_release_dirty_inode_budget(c, ui);
+       }
+
+       *pagep = page;
+       return 0;
+}
+
+/**
+ * allocate_budget - allocate budget for 'ubifs_write_begin()'.
+ * @c: UBIFS file-system description object
+ * @page: page to allocate budget for
+ * @ui: UBIFS inode object the page belongs to
+ * @appending: non-zero if the page is appended
+ *
+ * This is a helper function for 'ubifs_write_begin()' which allocates budget
+ * for the operation. The budget is allocated differently depending on whether
+ * this is appending, whether the page is dirty or not, and so on. This
+ * function leaves the @ui->ui_mutex locked in case of appending. Returns zero
+ * in case of success and %-ENOSPC in case of failure.
+ */
+static int allocate_budget(struct ubifs_info *c, struct page *page,
+                          struct ubifs_inode *ui, int appending)
+{
+       struct ubifs_budget_req req = { .fast = 1 };
+
+       if (PagePrivate(page)) {
+               if (!appending)
+                       /*
+                        * The page is dirty and we are not appending, which
+                        * means no budget is needed at all.
+                        */
+                       return 0;
+
+               mutex_lock(&ui->ui_mutex);
+               if (ui->dirty)
+                       /*
+                        * The page is dirty and we are appending, so the inode
+                        * has to be marked as dirty. However, it is already
+                        * dirty, so we do not need any budget. We may return,
+                        * but @ui->ui_mutex hast to be left locked because we
+                        * should prevent write-back from flushing the inode
+                        * and freeing the budget. The lock will be released in
+                        * 'ubifs_write_end()'.
+                        */
+                       return 0;
+
+               /*
+                * The page is dirty, we are appending, the inode is clean, so
+                * we need to budget the inode change.
+                */
+               req.dirtied_ino = 1;
+       } else {
+               if (PageChecked(page))
+                       /*
+                        * The page corresponds to a hole and does not
+                        * exist on the media. So changing it makes
+                        * make the amount of indexing information
+                        * larger, and we have to budget for a new
+                        * page.
+                        */
+                       req.new_page = 1;
+               else
+                       /*
+                        * Not a hole, the change will not add any new
+                        * indexing information, budget for page
+                        * change.
+                        */
+                       req.dirtied_page = 1;
+
+               if (appending) {
+                       mutex_lock(&ui->ui_mutex);
+                       if (!ui->dirty)
+                               /*
+                                * The inode is clean but we will have to mark
+                                * it as dirty because we are appending. This
+                                * needs a budget.
+                                */
+                               req.dirtied_ino = 1;
+               }
+       }
+
+       return ubifs_budget_space(c, &req);
+}
+
+/*
+ * This function is called when a page of data is going to be written. Since
+ * the page of data will not necessarily go to the flash straight away, UBIFS
+ * has to reserve space on the media for it, which is done by means of
+ * budgeting.
+ *
+ * This is the hot-path of the file-system and we are trying to optimize it as
+ * much as possible. For this reasons it is split on 2 parts - slow and fast.
+ *
+ * There many budgeting cases:
+ *     o a new page is appended - we have to budget for a new page and for
+ *       changing the inode; however, if the inode is already dirty, there is
+ *       no need to budget for it;
+ *     o an existing clean page is changed - we have budget for it; if the page
+ *       does not exist on the media (a hole), we have to budget for a new
+ *       page; otherwise, we may budget for changing an existing page; the
+ *       difference between these cases is that changing an existing page does
+ *       not introduce anything new to the FS indexing information, so it does
+ *       not grow, and smaller budget is acquired in this case;
+ *     o an existing dirty page is changed - no need to budget at all, because
+ *       the page budget has been acquired by earlier, when the page has been
+ *       marked dirty.
+ *
+ * UBIFS budgeting sub-system may force write-back if it thinks there is no
+ * space to reserve. This imposes some locking restrictions and makes it
+ * impossible to take into account the above cases, and makes it impossible to
+ * optimize budgeting.
+ *
+ * The solution for this is that the fast path of 'ubifs_write_begin()' assumes
+ * there is a plenty of flash space and the budget will be acquired quickly,
+ * without forcing write-back. The slow path does not make this assumption.
+ */
+static int ubifs_write_begin(struct file *file, struct address_space *mapping,
+                            loff_t pos, unsigned len, unsigned flags,
+                            struct page **pagep, void **fsdata)
+{
+       struct inode *inode = mapping->host;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+       int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+       struct page *page;
+
+
+       ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
+
+       if (unlikely(c->ro_media))
+               return -EROFS;
+
+       /* Try out the fast-path part first */
+       page = __grab_cache_page(mapping, index);
+       if (unlikely(!page))
+               return -ENOMEM;
+
+       if (!PageUptodate(page)) {
+               /* The page is not loaded from the flash */
+               if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+                       /*
+                        * We change whole page so no need to load it. But we
+                        * have to set the @PG_checked flag to make the further
+                        * code the page is new. This might be not true, but it
+                        * is better to budget more that to read the page from
+                        * the media.
+                        */
+                       SetPageChecked(page);
+               else {
+                       err = do_readpage(page);
+                       if (err) {
+                               unlock_page(page);
+                               page_cache_release(page);
+                               return err;
+                       }
+               }
+
+               SetPageUptodate(page);
+               ClearPageError(page);
+       }
+
+       err = allocate_budget(c, page, ui, appending);
+       if (unlikely(err)) {
+               ubifs_assert(err == -ENOSPC);
+               /*
+                * Budgeting failed which means it would have to force
+                * write-back but didn't, because we set the @fast flag in the
+                * request. Write-back cannot be done now, while we have the
+                * page locked, because it would deadlock. Unlock and free
+                * everything and fall-back to slow-path.
+                */
+               if (appending) {
+                       ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+                       mutex_unlock(&ui->ui_mutex);
+               }
+               unlock_page(page);
+               page_cache_release(page);
+
+               return write_begin_slow(mapping, pos, len, pagep);
+       }
+
+       /*
+        * Whee, we aquired budgeting quickly - without involving
+        * garbage-collection, committing or forceing write-back. We return
+        * with @ui->ui_mutex locked if we are appending pages, and unlocked
+        * otherwise. This is an optimization (slightly hacky though).
+        */
+       *pagep = page;
+       return 0;
+
+}
+
+/**
+ * cancel_budget - cancel budget.
+ * @c: UBIFS file-system description object
+ * @page: page to cancel budget for
+ * @ui: UBIFS inode object the page belongs to
+ * @appending: non-zero if the page is appended
+ *
+ * This is a helper function for a page write operation. It unlocks the
+ * @ui->ui_mutex in case of appending.
+ */
+static void cancel_budget(struct ubifs_info *c, struct page *page,
+                         struct ubifs_inode *ui, int appending)
+{
+       if (appending) {
+               if (!ui->dirty)
+                       ubifs_release_dirty_inode_budget(c, ui);
+               mutex_unlock(&ui->ui_mutex);
+       }
+       if (!PagePrivate(page)) {
+               if (PageChecked(page))
+                       release_new_page_budget(c);
+               else
+                       release_existing_page_budget(c);
+       }
+}
+
+static int ubifs_write_end(struct file *file, struct address_space *mapping,
+                          loff_t pos, unsigned len, unsigned copied,
+                          struct page *page, void *fsdata)
+{
+       struct inode *inode = mapping->host;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       loff_t end_pos = pos + len;
+       int appending = !!(end_pos > inode->i_size);
+
+       dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
+               inode->i_ino, pos, page->index, len, copied, inode->i_size);
+
+       if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) {
+               /*
+                * VFS copied less data to the page that it intended and
+                * declared in its '->write_begin()' call via the @len
+                * argument. If the page was not up-to-date, and @len was
+                * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did
+                * not load it from the media (for optimization reasons). This
+                * means that part of the page contains garbage. So read the
+                * page now.
+                */
+               dbg_gen("copied %d instead of %d, read page and repeat",
+                       copied, len);
+               cancel_budget(c, page, ui, appending);
+
+               /*
+                * Return 0 to force VFS to repeat the whole operation, or the
+                * error code if 'do_readpage()' failes.
+                */
+               copied = do_readpage(page);
+               goto out;
+       }
+
+       if (!PagePrivate(page)) {
+               SetPagePrivate(page);
+               atomic_long_inc(&c->dirty_pg_cnt);
+               __set_page_dirty_nobuffers(page);
+       }
+
+       if (appending) {
+               i_size_write(inode, end_pos);
+               ui->ui_size = end_pos;
+               /*
+                * Note, we do not set @I_DIRTY_PAGES (which means that the
+                * inode has dirty pages), this has been done in
+                * '__set_page_dirty_nobuffers()'.
+                */
+               __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+               ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+               mutex_unlock(&ui->ui_mutex);
+       }
+
+out:
+       unlock_page(page);
+       page_cache_release(page);
+       return copied;
+}
+
+static int ubifs_readpage(struct file *file, struct page *page)
+{
+       do_readpage(page);
+       unlock_page(page);
+       return 0;
+}
+
+static int do_writepage(struct page *page, int len)
+{
+       int err = 0, i, blen;
+       unsigned int block;
+       void *addr;
+       union ubifs_key key;
+       struct inode *inode = page->mapping->host;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+
+#ifdef UBIFS_DEBUG
+       spin_lock(&ui->ui_lock);
+       ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
+       spin_unlock(&ui->ui_lock);
+#endif
+
+       /* Update radix tree tags */
+       set_page_writeback(page);
+
+       addr = kmap(page);
+       block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+       i = 0;
+       while (len) {
+               blen = min_t(int, len, UBIFS_BLOCK_SIZE);
+               data_key_init(c, &key, inode->i_ino, block);
+               err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
+               if (err)
+                       break;
+               if (++i >= UBIFS_BLOCKS_PER_PAGE)
+                       break;
+               block += 1;
+               addr += blen;
+               len -= blen;
+       }
+       if (err) {
+               SetPageError(page);
+               ubifs_err("cannot write page %lu of inode %lu, error %d",
+                         page->index, inode->i_ino, err);
+               ubifs_ro_mode(c, err);
+       }
+
+       ubifs_assert(PagePrivate(page));
+       if (PageChecked(page))
+               release_new_page_budget(c);
+       else
+               release_existing_page_budget(c);
+
+       atomic_long_dec(&c->dirty_pg_cnt);
+       ClearPagePrivate(page);
+       ClearPageChecked(page);
+
+       kunmap(page);
+       unlock_page(page);
+       end_page_writeback(page);
+       return err;
+}
+
+/*
+ * When writing-back dirty inodes, VFS first writes-back pages belonging to the
+ * inode, then the inode itself. For UBIFS this may cause a problem. Consider a
+ * situation when a we have an inode with size 0, then a megabyte of data is
+ * appended to the inode, then write-back starts and flushes some amount of the
+ * dirty pages, the journal becomes full, commit happens and finishes, and then
+ * an unclean reboot happens. When the file system is mounted next time, the
+ * inode size would still be 0, but there would be many pages which are beyond
+ * the inode size, they would be indexed and consume flash space. Because the
+ * journal has been committed, the replay would not be able to detect this
+ * situation and correct the inode size. This means UBIFS would have to scan
+ * whole index and correct all inode sizes, which is long an unacceptable.
+ *
+ * To prevent situations like this, UBIFS writes pages back only if they are
+ * within last synchronized inode size, i.e. the the size which has been
+ * written to the flash media last time. Otherwise, UBIFS forces inode
+ * write-back, thus making sure the on-flash inode contains current inode size,
+ * and then keeps writing pages back.
+ *
+ * Some locking issues explanation. 'ubifs_writepage()' first is called with
+ * the page locked, and it locks @ui_mutex. However, write-back does take inode
+ * @i_mutex, which means other VFS operations may be run on this inode at the
+ * same time. And the problematic one is truncation to smaller size, from where
+ * we have to call 'vmtruncate()', which first changes @inode->i_size, then
+ * drops the truncated pages. And while dropping the pages, it takes the page
+ * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
+ * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
+ * means that @inode->i_size is changed while @ui_mutex is unlocked.
+ *
+ * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
+ * inode size. How do we do this if @inode->i_size may became smaller while we
+ * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
+ * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size
+ * internally and updates it under @ui_mutex.
+ *
+ * Q: why we do not worry that if we race with truncation, we may end up with a
+ * situation when the inode is truncated while we are in the middle of
+ * 'do_writepage()', so we do write beyond inode size?
+ * A: If we are in the middle of 'do_writepage()', truncation would be locked
+ * on the page lock and it would not write the truncated inode node to the
+ * journal before we have finished.
+ */
+static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
+{
+       struct inode *inode = page->mapping->host;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       loff_t i_size =  i_size_read(inode), synced_i_size;
+       pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+       int err, len = i_size & (PAGE_CACHE_SIZE - 1);
+       void *kaddr;
+
+       dbg_gen("ino %lu, pg %lu, pg flags %#lx",
+               inode->i_ino, page->index, page->flags);
+       ubifs_assert(PagePrivate(page));
+
+       /* Is the page fully outside @i_size? (truncate in progress) */
+       if (page->index > end_index || (page->index == end_index && !len)) {
+               err = 0;
+               goto out_unlock;
+       }
+
+       spin_lock(&ui->ui_lock);
+       synced_i_size = ui->synced_i_size;
+       spin_unlock(&ui->ui_lock);
+
+       /* Is the page fully inside @i_size? */
+       if (page->index < end_index) {
+               if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
+                       err = inode->i_sb->s_op->write_inode(inode, 1);
+                       if (err)
+                               goto out_unlock;
+                       /*
+                        * The inode has been written, but the write-buffer has
+                        * not been synchronized, so in case of an unclean
+                        * reboot we may end up with some pages beyond inode
+                        * size, but they would be in the journal (because
+                        * commit flushes write buffers) and recovery would deal
+                        * with this.
+                        */
+               }
+               return do_writepage(page, PAGE_CACHE_SIZE);
+       }
+
+       /*
+        * The page straddles @i_size. It must be zeroed out on each and every
+        * writepage invocation because it may be mmapped. "A file is mapped
+        * in multiples of the page size. For a file that is not a multiple of
+        * the page size, the remaining memory is zeroed when mapped, and
+        * writes to that region are not written out to the file."
+        */
+       kaddr = kmap_atomic(page, KM_USER0);
+       memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
+       flush_dcache_page(page);
+       kunmap_atomic(kaddr, KM_USER0);
+
+       if (i_size > synced_i_size) {
+               err = inode->i_sb->s_op->write_inode(inode, 1);
+               if (err)
+                       goto out_unlock;
+       }
+
+       return do_writepage(page, len);
+
+out_unlock:
+       unlock_page(page);
+       return err;
+}
+
+/**
+ * do_attr_changes - change inode attributes.
+ * @inode: inode to change attributes for
+ * @attr: describes attributes to change
+ */
+static void do_attr_changes(struct inode *inode, const struct iattr *attr)
+{
+       if (attr->ia_valid & ATTR_UID)
+               inode->i_uid = attr->ia_uid;
+       if (attr->ia_valid & ATTR_GID)
+               inode->i_gid = attr->ia_gid;
+       if (attr->ia_valid & ATTR_ATIME)
+               inode->i_atime = timespec_trunc(attr->ia_atime,
+                                               inode->i_sb->s_time_gran);
+       if (attr->ia_valid & ATTR_MTIME)
+               inode->i_mtime = timespec_trunc(attr->ia_mtime,
+                                               inode->i_sb->s_time_gran);
+       if (attr->ia_valid & ATTR_CTIME)
+               inode->i_ctime = timespec_trunc(attr->ia_ctime,
+                                               inode->i_sb->s_time_gran);
+       if (attr->ia_valid & ATTR_MODE) {
+               umode_t mode = attr->ia_mode;
+
+               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                       mode &= ~S_ISGID;
+               inode->i_mode = mode;
+       }
+}
+
+/**
+ * do_truncation - truncate an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to truncate
+ * @attr: inode attribute changes description
+ *
+ * This function implements VFS '->setattr()' call when the inode is truncated
+ * to a smaller size. Returns zero in case of success and a negative error code
+ * in case of failure.
+ */
+static int do_truncation(struct ubifs_info *c, struct inode *inode,
+                        const struct iattr *attr)
+{
+       int err;
+       struct ubifs_budget_req req;
+       loff_t old_size = inode->i_size, new_size = attr->ia_size;
+       int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
+       memset(&req, 0, sizeof(struct ubifs_budget_req));
+
+       /*
+        * If this is truncation to a smaller size, and we do not truncate on a
+        * block boundary, budget for changing one data block, because the last
+        * block will be re-written.
+        */
+       if (new_size & (UBIFS_BLOCK_SIZE - 1))
+               req.dirtied_page = 1;
+
+       req.dirtied_ino = 1;
+       /* A funny way to budget for truncation node */
+       req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       err = vmtruncate(inode, new_size);
+       if (err)
+               goto out_budg;
+
+       if (offset) {
+               pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
+               struct page *page;
+
+               page = find_lock_page(inode->i_mapping, index);
+               if (page) {
+                       if (PageDirty(page)) {
+                               /*
+                                * 'ubifs_jnl_truncate()' will try to truncate
+                                * the last data node, but it contains
+                                * out-of-date data because the page is dirty.
+                                * Write the page now, so that
+                                * 'ubifs_jnl_truncate()' will see an already
+                                * truncated (and up to date) data node.
+                                */
+                               ubifs_assert(PagePrivate(page));
+
+                               clear_page_dirty_for_io(page);
+                               if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
+                                       offset = new_size &
+                                                (PAGE_CACHE_SIZE - 1);
+                               err = do_writepage(page, offset);
+                               page_cache_release(page);
+                               if (err)
+                                       goto out_budg;
+                               /*
+                                * We could now tell 'ubifs_jnl_truncate()' not
+                                * to read the last block.
+                                */
+                       } else {
+                               /*
+                                * We could 'kmap()' the page and pass the data
+                                * to 'ubifs_jnl_truncate()' to save it from
+                                * having to read it.
+                                */
+                               unlock_page(page);
+                               page_cache_release(page);
+                       }
+               }
+       }
+
+       mutex_lock(&ui->ui_mutex);
+       ui->ui_size = inode->i_size;
+       /* Truncation changes inode [mc]time */
+       inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
+       /* The other attributes may be changed at the same time as well */
+       do_attr_changes(inode, attr);
+
+       err = ubifs_jnl_truncate(c, inode, old_size, new_size);
+       mutex_unlock(&ui->ui_mutex);
+out_budg:
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+/**
+ * do_setattr - change inode attributes.
+ * @c: UBIFS file-system description object
+ * @inode: inode to change attributes for
+ * @attr: inode attribute changes description
+ *
+ * This function implements VFS '->setattr()' call for all cases except
+ * truncations to smaller size. Returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+static int do_setattr(struct ubifs_info *c, struct inode *inode,
+                     const struct iattr *attr)
+{
+       int err, release;
+       loff_t new_size = attr->ia_size;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_budget_req req = { .dirtied_ino = 1,
+                                       .dirtied_ino_d = ui->data_len };
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       if (attr->ia_valid & ATTR_SIZE) {
+               dbg_gen("size %lld -> %lld", inode->i_size, new_size);
+               err = vmtruncate(inode, new_size);
+               if (err)
+                       goto out;
+       }
+
+       mutex_lock(&ui->ui_mutex);
+       if (attr->ia_valid & ATTR_SIZE) {
+               /* Truncation changes inode [mc]time */
+               inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
+               /* 'vmtruncate()' changed @i_size, update @ui_size */
+               ui->ui_size = inode->i_size;
+       }
+
+       do_attr_changes(inode, attr);
+
+       release = ui->dirty;
+       if (attr->ia_valid & ATTR_SIZE)
+               /*
+                * Inode length changed, so we have to make sure
+                * @I_DIRTY_DATASYNC is set.
+                */
+                __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+       else
+               mark_inode_dirty_sync(inode);
+       mutex_unlock(&ui->ui_mutex);
+
+       if (release)
+               ubifs_release_budget(c, &req);
+       if (IS_SYNC(inode))
+               err = inode->i_sb->s_op->write_inode(inode, 1);
+       return err;
+
+out:
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+       int err;
+       struct inode *inode = dentry->d_inode;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+
+       dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid);
+       err = inode_change_ok(inode, attr);
+       if (err)
+               return err;
+
+       err = dbg_check_synced_i_size(inode);
+       if (err)
+               return err;
+
+       if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size)
+               /* Truncation to a smaller size */
+               err = do_truncation(c, inode, attr);
+       else
+               err = do_setattr(c, inode, attr);
+
+       return err;
+}
+
+static void ubifs_invalidatepage(struct page *page, unsigned long offset)
+{
+       struct inode *inode = page->mapping->host;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+
+       ubifs_assert(PagePrivate(page));
+       if (offset)
+               /* Partial page remains dirty */
+               return;
+
+       if (PageChecked(page))
+               release_new_page_budget(c);
+       else
+               release_existing_page_budget(c);
+
+       atomic_long_dec(&c->dirty_pg_cnt);
+       ClearPagePrivate(page);
+       ClearPageChecked(page);
+}
+
+static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+       struct ubifs_inode *ui = ubifs_inode(dentry->d_inode);
+
+       nd_set_link(nd, ui->data);
+       return NULL;
+}
+
+int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+       struct inode *inode = dentry->d_inode;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       int err;
+
+       dbg_gen("syncing inode %lu", inode->i_ino);
+
+       /*
+        * VFS has already synchronized dirty pages for this inode. Synchronize
+        * the inode unless this is a 'datasync()' call.
+        */
+       if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
+               err = inode->i_sb->s_op->write_inode(inode, 1);
+               if (err)
+                       return err;
+       }
+
+       /*
+        * Nodes related to this inode may still sit in a write-buffer. Flush
+        * them.
+        */
+       err = ubifs_sync_wbufs_by_inode(c, inode);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/**
+ * mctime_update_needed - check if mtime or ctime update is needed.
+ * @inode: the inode to do the check for
+ * @now: current time
+ *
+ * This helper function checks if the inode mtime/ctime should be updated or
+ * not. If current values of the time-stamps are within the UBIFS inode time
+ * granularity, they are not updated. This is an optimization.
+ */
+static inline int mctime_update_needed(const struct inode *inode,
+                                      const struct timespec *now)
+{
+       if (!timespec_equal(&inode->i_mtime, now) ||
+           !timespec_equal(&inode->i_ctime, now))
+               return 1;
+       return 0;
+}
+
+/**
+ * update_ctime - update mtime and ctime of an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to update
+ *
+ * This function updates mtime and ctime of the inode if it is not equivalent to
+ * current time. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int update_mctime(struct ubifs_info *c, struct inode *inode)
+{
+       struct timespec now = ubifs_current_time(inode);
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       if (mctime_update_needed(inode, &now)) {
+               int err, release;
+               struct ubifs_budget_req req = { .dirtied_ino = 1,
+                                               .dirtied_ino_d = ui->data_len };
+
+               err = ubifs_budget_space(c, &req);
+               if (err)
+                       return err;
+
+               mutex_lock(&ui->ui_mutex);
+               inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
+               release = ui->dirty;
+               mark_inode_dirty_sync(inode);
+               mutex_unlock(&ui->ui_mutex);
+               if (release)
+                       ubifs_release_budget(c, &req);
+       }
+
+       return 0;
+}
+
+static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
+                              unsigned long nr_segs, loff_t pos)
+{
+       int err;
+       ssize_t ret;
+       struct inode *inode = iocb->ki_filp->f_mapping->host;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+
+       err = update_mctime(c, inode);
+       if (err)
+               return err;
+
+       ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       if (ret < 0)
+               return ret;
+
+       if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
+               err = ubifs_sync_wbufs_by_inode(c, inode);
+               if (err)
+                       return err;
+       }
+
+       return ret;
+}
+
+static int ubifs_set_page_dirty(struct page *page)
+{
+       int ret;
+
+       ret = __set_page_dirty_nobuffers(page);
+       /*
+        * An attempt to dirty a page without budgeting for it - should not
+        * happen.
+        */
+       ubifs_assert(ret == 0);
+       return ret;
+}
+
+static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
+{
+       /*
+        * An attempt to release a dirty page without budgeting for it - should
+        * not happen.
+        */
+       if (PageWriteback(page))
+               return 0;
+       ubifs_assert(PagePrivate(page));
+       ubifs_assert(0);
+       ClearPagePrivate(page);
+       ClearPageChecked(page);
+       return 1;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. UBIFS must ensure page is budgeted for.
+ */
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       struct timespec now = ubifs_current_time(inode);
+       struct ubifs_budget_req req = { .new_page = 1 };
+       int err, update_time;
+
+       dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
+               i_size_read(inode));
+       ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
+
+       if (unlikely(c->ro_media))
+               return -EROFS;
+
+       /*
+        * We have not locked @page so far so we may budget for changing the
+        * page. Note, we cannot do this after we locked the page, because
+        * budgeting may cause write-back which would cause deadlock.
+        *
+        * At the moment we do not know whether the page is dirty or not, so we
+        * assume that it is not and budget for a new page. We could look at
+        * the @PG_private flag and figure this out, but we may race with write
+        * back and the page state may change by the time we lock it, so this
+        * would need additional care. We do not bother with this at the
+        * moment, although it might be good idea to do. Instead, we allocate
+        * budget for a new page and amend it later on if the page was in fact
+        * dirty.
+        *
+        * The budgeting-related logic of this function is similar to what we
+        * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there
+        * for more comments.
+        */
+       update_time = mctime_update_needed(inode, &now);
+       if (update_time)
+               /*
+                * We have to change inode time stamp which requires extra
+                * budgeting.
+                */
+               req.dirtied_ino = 1;
+
+       err = ubifs_budget_space(c, &req);
+       if (unlikely(err)) {
+               if (err == -ENOSPC)
+                       ubifs_warn("out of space for mmapped file "
+                                  "(inode number %lu)", inode->i_ino);
+               return err;
+       }
+
+       lock_page(page);
+       if (unlikely(page->mapping != inode->i_mapping ||
+                    page_offset(page) > i_size_read(inode))) {
+               /* Page got truncated out from underneath us */
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
+       if (PagePrivate(page))
+               release_new_page_budget(c);
+       else {
+               if (!PageChecked(page))
+                       ubifs_convert_page_budget(c);
+               SetPagePrivate(page);
+               atomic_long_inc(&c->dirty_pg_cnt);
+               __set_page_dirty_nobuffers(page);
+       }
+
+       if (update_time) {
+               int release;
+               struct ubifs_inode *ui = ubifs_inode(inode);
+
+               mutex_lock(&ui->ui_mutex);
+               inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
+               release = ui->dirty;
+               mark_inode_dirty_sync(inode);
+               mutex_unlock(&ui->ui_mutex);
+               if (release)
+                       ubifs_release_dirty_inode_budget(c, ui);
+       }
+
+       unlock_page(page);
+       return 0;
+
+out_unlock:
+       unlock_page(page);
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+static struct vm_operations_struct ubifs_file_vm_ops = {
+       .fault        = filemap_fault,
+       .page_mkwrite = ubifs_vm_page_mkwrite,
+};
+
+static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       int err;
+
+       /* 'generic_file_mmap()' takes care of NOMMU case */
+       err = generic_file_mmap(file, vma);
+       if (err)
+               return err;
+       vma->vm_ops = &ubifs_file_vm_ops;
+       return 0;
+}
+
+struct address_space_operations ubifs_file_address_operations = {
+       .readpage       = ubifs_readpage,
+       .writepage      = ubifs_writepage,
+       .write_begin    = ubifs_write_begin,
+       .write_end      = ubifs_write_end,
+       .invalidatepage = ubifs_invalidatepage,
+       .set_page_dirty = ubifs_set_page_dirty,
+       .releasepage    = ubifs_releasepage,
+};
+
+struct inode_operations ubifs_file_inode_operations = {
+       .setattr     = ubifs_setattr,
+       .getattr     = ubifs_getattr,
+#ifdef CONFIG_UBIFS_FS_XATTR
+       .setxattr    = ubifs_setxattr,
+       .getxattr    = ubifs_getxattr,
+       .listxattr   = ubifs_listxattr,
+       .removexattr = ubifs_removexattr,
+#endif
+};
+
+struct inode_operations ubifs_symlink_inode_operations = {
+       .readlink    = generic_readlink,
+       .follow_link = ubifs_follow_link,
+       .setattr     = ubifs_setattr,
+       .getattr     = ubifs_getattr,
+};
+
+struct file_operations ubifs_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = do_sync_read,
+       .write          = do_sync_write,
+       .aio_read       = generic_file_aio_read,
+       .aio_write      = ubifs_aio_write,
+       .mmap           = ubifs_file_mmap,
+       .fsync          = ubifs_fsync,
+       .unlocked_ioctl = ubifs_ioctl,
+       .splice_read    = generic_file_splice_read,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ubifs_compat_ioctl,
+#endif
+};
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c

new file mode 100644 (file)

index 0000000..10394c5
--- /dev/null
+++ b/fs/ubifs/find.c
@@ -0,0 +1,975 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file contains functions for finding LEBs for various purposes e.g.
+ * garbage collection. In general, lprops category heaps and lists are used
+ * for fast access, falling back on scanning the LPT as a last resort.
+ */
+
+#include <linux/sort.h>
+#include "ubifs.h"
+
+/**
+ * struct scan_data - data provided to scan callback functions
+ * @min_space: minimum number of bytes for which to scan
+ * @pick_free: whether it is OK to scan for empty LEBs
+ * @lnum: LEB number found is returned here
+ * @exclude_index: whether to exclude index LEBs
+ */
+struct scan_data {
+       int min_space;
+       int pick_free;
+       int lnum;
+       int exclude_index;
+};
+
+/**
+ * valuable - determine whether LEB properties are valuable.
+ * @c: the UBIFS file-system description object
+ * @lprops: LEB properties
+ *
+ * This function return %1 if the LEB properties should be added to the LEB
+ * properties tree in memory. Otherwise %0 is returned.
+ */
+static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
+{
+       int n, cat = lprops->flags & LPROPS_CAT_MASK;
+       struct ubifs_lpt_heap *heap;
+
+       switch (cat) {
+       case LPROPS_DIRTY:
+       case LPROPS_DIRTY_IDX:
+       case LPROPS_FREE:
+               heap = &c->lpt_heap[cat - 1];
+               if (heap->cnt < heap->max_cnt)
+                       return 1;
+               if (lprops->free + lprops->dirty >= c->dark_wm)
+                       return 1;
+               return 0;
+       case LPROPS_EMPTY:
+               n = c->lst.empty_lebs + c->freeable_cnt -
+                   c->lst.taken_empty_lebs;
+               if (n < c->lsave_cnt)
+                       return 1;
+               return 0;
+       case LPROPS_FREEABLE:
+               return 1;
+       case LPROPS_FRDI_IDX:
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * scan_for_dirty_cb - dirty space scan callback.
+ * @c: the UBIFS file-system description object
+ * @lprops: LEB properties to scan
+ * @in_tree: whether the LEB properties are in main memory
+ * @data: information passed to and from the caller of the scan
+ *
+ * This function returns a code that indicates whether the scan should continue
+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
+ * (%LPT_SCAN_STOP).
+ */
+static int scan_for_dirty_cb(struct ubifs_info *c,
+                            const struct ubifs_lprops *lprops, int in_tree,
+                            struct scan_data *data)
+{
+       int ret = LPT_SCAN_CONTINUE;
+
+       /* Exclude LEBs that are currently in use */
+       if (lprops->flags & LPROPS_TAKEN)
+               return LPT_SCAN_CONTINUE;
+       /* Determine whether to add these LEB properties to the tree */
+       if (!in_tree && valuable(c, lprops))
+               ret |= LPT_SCAN_ADD;
+       /* Exclude LEBs with too little space */
+       if (lprops->free + lprops->dirty < data->min_space)
+               return ret;
+       /* If specified, exclude index LEBs */
+       if (data->exclude_index && lprops->flags & LPROPS_INDEX)
+               return ret;
+       /* If specified, exclude empty or freeable LEBs */
+       if (lprops->free + lprops->dirty == c->leb_size) {
+               if (!data->pick_free)
+                       return ret;
+       /* Exclude LEBs with too little dirty space (unless it is empty) */
+       } else if (lprops->dirty < c->dead_wm)
+               return ret;
+       /* Finally we found space */
+       data->lnum = lprops->lnum;
+       return LPT_SCAN_ADD | LPT_SCAN_STOP;
+}
+
+/**
+ * scan_for_dirty - find a data LEB with free space.
+ * @c: the UBIFS file-system description object
+ * @min_space: minimum amount free plus dirty space the returned LEB has to
+ *             have
+ * @pick_free: if it is OK to return a free or freeable LEB
+ * @exclude_index: whether to exclude index LEBs
+ *
+ * This function returns a pointer to the LEB properties found or a negative
+ * error code.
+ */
+static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
+                                                int min_space, int pick_free,
+                                                int exclude_index)
+{
+       const struct ubifs_lprops *lprops;
+       struct ubifs_lpt_heap *heap;
+       struct scan_data data;
+       int err, i;
+
+       /* There may be an LEB with enough dirty space on the free heap */
+       heap = &c->lpt_heap[LPROPS_FREE - 1];
+       for (i = 0; i < heap->cnt; i++) {
+               lprops = heap->arr[i];
+               if (lprops->free + lprops->dirty < min_space)
+                       continue;
+               if (lprops->dirty < c->dead_wm)
+                       continue;
+               return lprops;
+       }
+       /*
+        * A LEB may have fallen off of the bottom of the dirty heap, and ended
+        * up as uncategorized even though it has enough dirty space for us now,
+        * so check the uncategorized list. N.B. neither empty nor freeable LEBs
+        * can end up as uncategorized because they are kept on lists not
+        * finite-sized heaps.
+        */
+       list_for_each_entry(lprops, &c->uncat_list, list) {
+               if (lprops->flags & LPROPS_TAKEN)
+                       continue;
+               if (lprops->free + lprops->dirty < min_space)
+                       continue;
+               if (exclude_index && (lprops->flags & LPROPS_INDEX))
+                       continue;
+               if (lprops->dirty < c->dead_wm)
+                       continue;
+               return lprops;
+       }
+       /* We have looked everywhere in main memory, now scan the flash */
+       if (c->pnodes_have >= c->pnode_cnt)
+               /* All pnodes are in memory, so skip scan */
+               return ERR_PTR(-ENOSPC);
+       data.min_space = min_space;
+       data.pick_free = pick_free;
+       data.lnum = -1;
+       data.exclude_index = exclude_index;
+       err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
+                                   (ubifs_lpt_scan_callback)scan_for_dirty_cb,
+                                   &data);
+       if (err)
+               return ERR_PTR(err);
+       ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+       c->lscan_lnum = data.lnum;
+       lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
+       if (IS_ERR(lprops))
+               return lprops;
+       ubifs_assert(lprops->lnum == data.lnum);
+       ubifs_assert(lprops->free + lprops->dirty >= min_space);
+       ubifs_assert(lprops->dirty >= c->dead_wm ||
+                    (pick_free &&
+                     lprops->free + lprops->dirty == c->leb_size));
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX));
+       return lprops;
+}
+
+/**
+ * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector.
+ * @c: the UBIFS file-system description object
+ * @ret_lp: LEB properties are returned here on exit
+ * @min_space: minimum amount free plus dirty space the returned LEB has to
+ *             have
+ * @pick_free: controls whether it is OK to pick empty or index LEBs
+ *
+ * This function tries to find a dirty logical eraseblock which has at least
+ * @min_space free and dirty space. It prefers to take an LEB from the dirty or
+ * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
+ * or do not have an LEB which satisfies the @min_space criteria.
+ *
+ * Note:
+ *   o LEBs which have less than dead watermark of dirty space are never picked
+ *   by this function;
+ *
+ * Returns zero and the LEB properties of
+ * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
+ * negative error code in case of other failures. The returned LEB is marked as
+ * "taken".
+ *
+ * The additional @pick_free argument controls if this function has to return a
+ * free or freeable LEB if one is present. For example, GC must to set it to %1,
+ * when called from the journal space reservation function, because the
+ * appearance of free space may coincide with the loss of enough dirty space
+ * for GC to succeed anyway.
+ *
+ * In contrast, if the Garbage Collector is called from budgeting, it should
+ * just make free space, not return LEBs which are already free or freeable.
+ *
+ * In addition @pick_free is set to %2 by the recovery process in order to
+ * recover gc_lnum in which case an index LEB must not be returned.
+ */
+int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
+                        int min_space, int pick_free)
+{
+       int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0;
+       const struct ubifs_lprops *lp = NULL, *idx_lp = NULL;
+       struct ubifs_lpt_heap *heap, *idx_heap;
+
+       ubifs_get_lprops(c);
+
+       if (pick_free) {
+               int lebs, rsvd_idx_lebs = 0;
+
+               spin_lock(&c->space_lock);
+               lebs = c->lst.empty_lebs;
+               lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
+
+               /*
+                * Note, the index may consume more LEBs than have been reserved
+                * for it. It is OK because it might be consolidated by GC.
+                * But if the index takes fewer LEBs than it is reserved for it,
+                * this function must avoid picking those reserved LEBs.
+                */
+               if (c->min_idx_lebs >= c->lst.idx_lebs) {
+                       rsvd_idx_lebs = c->min_idx_lebs -  c->lst.idx_lebs;
+                       exclude_index = 1;
+               }
+               spin_unlock(&c->space_lock);
+
+               /* Check if there are enough free LEBs for the index */
+               if (rsvd_idx_lebs < lebs) {
+                       /* OK, try to find an empty LEB */
+                       lp = ubifs_fast_find_empty(c);
+                       if (lp)
+                               goto found;
+
+                       /* Or a freeable LEB */
+                       lp = ubifs_fast_find_freeable(c);
+                       if (lp)
+                               goto found;
+               } else
+                       /*
+                        * We cannot pick free/freeable LEBs in the below code.
+                        */
+                       pick_free = 0;
+       } else {
+               spin_lock(&c->space_lock);
+               exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
+               spin_unlock(&c->space_lock);
+       }
+
+       /* Look on the dirty and dirty index heaps */
+       heap = &c->lpt_heap[LPROPS_DIRTY - 1];
+       idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
+
+       if (idx_heap->cnt && !exclude_index) {
+               idx_lp = idx_heap->arr[0];
+               sum = idx_lp->free + idx_lp->dirty;
+               /*
+                * Since we reserve twice as more space for the index than it
+                * actually takes, it does not make sense to pick indexing LEBs
+                * with less than half LEB of dirty space.
+                */
+               if (sum < min_space || sum < c->half_leb_size)
+                       idx_lp = NULL;
+       }
+
+       if (heap->cnt) {
+               lp = heap->arr[0];
+               if (lp->dirty + lp->free < min_space)
+                       lp = NULL;
+       }
+
+       /* Pick the LEB with most space */
+       if (idx_lp && lp) {
+               if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty)
+                       lp = idx_lp;
+       } else if (idx_lp && !lp)
+               lp = idx_lp;
+
+       if (lp) {
+               ubifs_assert(lp->dirty >= c->dead_wm);
+               goto found;
+       }
+
+       /* Did not find a dirty LEB on the dirty heaps, have to scan */
+       dbg_find("scanning LPT for a dirty LEB");
+       lp = scan_for_dirty(c, min_space, pick_free, exclude_index);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+       ubifs_assert(lp->dirty >= c->dead_wm ||
+                    (pick_free && lp->free + lp->dirty == c->leb_size));
+
+found:
+       dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
+                lp->lnum, lp->free, lp->dirty, lp->flags);
+
+       lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
+                            lp->flags | LPROPS_TAKEN, 0);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+
+       memcpy(ret_lp, lp, sizeof(struct ubifs_lprops));
+
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * scan_for_free_cb - free space scan callback.
+ * @c: the UBIFS file-system description object
+ * @lprops: LEB properties to scan
+ * @in_tree: whether the LEB properties are in main memory
+ * @data: information passed to and from the caller of the scan
+ *
+ * This function returns a code that indicates whether the scan should continue
+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
+ * (%LPT_SCAN_STOP).
+ */
+static int scan_for_free_cb(struct ubifs_info *c,
+                           const struct ubifs_lprops *lprops, int in_tree,
+                           struct scan_data *data)
+{
+       int ret = LPT_SCAN_CONTINUE;
+
+       /* Exclude LEBs that are currently in use */
+       if (lprops->flags & LPROPS_TAKEN)
+               return LPT_SCAN_CONTINUE;
+       /* Determine whether to add these LEB properties to the tree */
+       if (!in_tree && valuable(c, lprops))
+               ret |= LPT_SCAN_ADD;
+       /* Exclude index LEBs */
+       if (lprops->flags & LPROPS_INDEX)
+               return ret;
+       /* Exclude LEBs with too little space */
+       if (lprops->free < data->min_space)
+               return ret;
+       /* If specified, exclude empty LEBs */
+       if (!data->pick_free && lprops->free == c->leb_size)
+               return ret;
+       /*
+        * LEBs that have only free and dirty space must not be allocated
+        * because they may have been unmapped already or they may have data
+        * that is obsolete only because of nodes that are still sitting in a
+        * wbuf.
+        */
+       if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0)
+               return ret;
+       /* Finally we found space */
+       data->lnum = lprops->lnum;
+       return LPT_SCAN_ADD | LPT_SCAN_STOP;
+}
+
+/**
+ * do_find_free_space - find a data LEB with free space.
+ * @c: the UBIFS file-system description object
+ * @min_space: minimum amount of free space required
+ * @pick_free: whether it is OK to scan for empty LEBs
+ * @squeeze: whether to try to find space in a non-empty LEB first
+ *
+ * This function returns a pointer to the LEB properties found or a negative
+ * error code.
+ */
+static
+const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
+                                             int min_space, int pick_free,
+                                             int squeeze)
+{
+       const struct ubifs_lprops *lprops;
+       struct ubifs_lpt_heap *heap;
+       struct scan_data data;
+       int err, i;
+
+       if (squeeze) {
+               lprops = ubifs_fast_find_free(c);
+               if (lprops && lprops->free >= min_space)
+                       return lprops;
+       }
+       if (pick_free) {
+               lprops = ubifs_fast_find_empty(c);
+               if (lprops)
+                       return lprops;
+       }
+       if (!squeeze) {
+               lprops = ubifs_fast_find_free(c);
+               if (lprops && lprops->free >= min_space)
+                       return lprops;
+       }
+       /* There may be an LEB with enough free space on the dirty heap */
+       heap = &c->lpt_heap[LPROPS_DIRTY - 1];
+       for (i = 0; i < heap->cnt; i++) {
+               lprops = heap->arr[i];
+               if (lprops->free >= min_space)
+                       return lprops;
+       }
+       /*
+        * A LEB may have fallen off of the bottom of the free heap, and ended
+        * up as uncategorized even though it has enough free space for us now,
+        * so check the uncategorized list. N.B. neither empty nor freeable LEBs
+        * can end up as uncategorized because they are kept on lists not
+        * finite-sized heaps.
+        */
+       list_for_each_entry(lprops, &c->uncat_list, list) {
+               if (lprops->flags & LPROPS_TAKEN)
+                       continue;
+               if (lprops->flags & LPROPS_INDEX)
+                       continue;
+               if (lprops->free >= min_space)
+                       return lprops;
+       }
+       /* We have looked everywhere in main memory, now scan the flash */
+       if (c->pnodes_have >= c->pnode_cnt)
+               /* All pnodes are in memory, so skip scan */
+               return ERR_PTR(-ENOSPC);
+       data.min_space = min_space;
+       data.pick_free = pick_free;
+       data.lnum = -1;
+       err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
+                                   (ubifs_lpt_scan_callback)scan_for_free_cb,
+                                   &data);
+       if (err)
+               return ERR_PTR(err);
+       ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+       c->lscan_lnum = data.lnum;
+       lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
+       if (IS_ERR(lprops))
+               return lprops;
+       ubifs_assert(lprops->lnum == data.lnum);
+       ubifs_assert(lprops->free >= min_space);
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+       return lprops;
+}
+
+/**
+ * ubifs_find_free_space - find a data LEB with free space.
+ * @c: the UBIFS file-system description object
+ * @min_space: minimum amount of required free space
+ * @free: contains amount of free space in the LEB on exit
+ * @squeeze: whether to try to find space in a non-empty LEB first
+ *
+ * This function looks for an LEB with at least @min_space bytes of free space.
+ * It tries to find an empty LEB if possible. If no empty LEBs are available,
+ * this function searches for a non-empty data LEB. The returned LEB is marked
+ * as "taken".
+ *
+ * This function returns found LEB number in case of success, %-ENOSPC if it
+ * failed to find a LEB with @min_space bytes of free space and other a negative
+ * error codes in case of failure.
+ */
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+                         int squeeze)
+{
+       const struct ubifs_lprops *lprops;
+       int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags;
+
+       dbg_find("min_space %d", min_space);
+       ubifs_get_lprops(c);
+
+       /* Check if there are enough empty LEBs for commit */
+       spin_lock(&c->space_lock);
+       if (c->min_idx_lebs > c->lst.idx_lebs)
+               rsvd_idx_lebs = c->min_idx_lebs -  c->lst.idx_lebs;
+       else
+               rsvd_idx_lebs = 0;
+       lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+              c->lst.taken_empty_lebs;
+       ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
+       if (rsvd_idx_lebs < lebs)
+               /*
+                * OK to allocate an empty LEB, but we still don't want to go
+                * looking for one if there aren't any.
+                */
+               if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+                       pick_free = 1;
+                       /*
+                        * Because we release the space lock, we must account
+                        * for this allocation here. After the LEB properties
+                        * flags have been updated, we subtract one. Note, the
+                        * result of this is that lprops also decreases
+                        * @taken_empty_lebs in 'ubifs_change_lp()', so it is
+                        * off by one for a short period of time which may
+                        * introduce a small disturbance to budgeting
+                        * calculations, but this is harmless because at the
+                        * worst case this would make the budgeting subsystem
+                        * be more pessimistic than needed.
+                        *
+                        * Fundamentally, this is about serialization of the
+                        * budgeting and lprops subsystems. We could make the
+                        * @space_lock a mutex and avoid dropping it before
+                        * calling 'ubifs_change_lp()', but mutex is more
+                        * heavy-weight, and we want budgeting to be as fast as
+                        * possible.
+                        */
+                       c->lst.taken_empty_lebs += 1;
+               }
+       spin_unlock(&c->space_lock);
+
+       lprops = do_find_free_space(c, min_space, pick_free, squeeze);
+       if (IS_ERR(lprops)) {
+               err = PTR_ERR(lprops);
+               goto out;
+       }
+
+       lnum = lprops->lnum;
+       flags = lprops->flags | LPROPS_TAKEN;
+
+       lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0);
+       if (IS_ERR(lprops)) {
+               err = PTR_ERR(lprops);
+               goto out;
+       }
+
+       if (pick_free) {
+               spin_lock(&c->space_lock);
+               c->lst.taken_empty_lebs -= 1;
+               spin_unlock(&c->space_lock);
+       }
+
+       *free = lprops->free;
+       ubifs_release_lprops(c);
+
+       if (*free == c->leb_size) {
+               /*
+                * Ensure that empty LEBs have been unmapped. They may not have
+                * been, for example, because of an unclean unmount.  Also
+                * LEBs that were freeable LEBs (free + dirty == leb_size) will
+                * not have been unmapped.
+                */
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+       }
+
+       dbg_find("found LEB %d, free %d", lnum, *free);
+       ubifs_assert(*free >= min_space);
+       return lnum;
+
+out:
+       if (pick_free) {
+               spin_lock(&c->space_lock);
+               c->lst.taken_empty_lebs -= 1;
+               spin_unlock(&c->space_lock);
+       }
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * scan_for_idx_cb - callback used by the scan for a free LEB for the index.
+ * @c: the UBIFS file-system description object
+ * @lprops: LEB properties to scan
+ * @in_tree: whether the LEB properties are in main memory
+ * @data: information passed to and from the caller of the scan
+ *
+ * This function returns a code that indicates whether the scan should continue
+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
+ * (%LPT_SCAN_STOP).
+ */
+static int scan_for_idx_cb(struct ubifs_info *c,
+                          const struct ubifs_lprops *lprops, int in_tree,
+                          struct scan_data *data)
+{
+       int ret = LPT_SCAN_CONTINUE;
+
+       /* Exclude LEBs that are currently in use */
+       if (lprops->flags & LPROPS_TAKEN)
+               return LPT_SCAN_CONTINUE;
+       /* Determine whether to add these LEB properties to the tree */
+       if (!in_tree && valuable(c, lprops))
+               ret |= LPT_SCAN_ADD;
+       /* Exclude index LEBS */
+       if (lprops->flags & LPROPS_INDEX)
+               return ret;
+       /* Exclude LEBs that cannot be made empty */
+       if (lprops->free + lprops->dirty != c->leb_size)
+               return ret;
+       /*
+        * We are allocating for the index so it is safe to allocate LEBs with
+        * only free and dirty space, because write buffers are sync'd at commit
+        * start.
+        */
+       data->lnum = lprops->lnum;
+       return LPT_SCAN_ADD | LPT_SCAN_STOP;
+}
+
+/**
+ * scan_for_leb_for_idx - scan for a free LEB for the index.
+ * @c: the UBIFS file-system description object
+ */
+static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+       struct scan_data data;
+       int err;
+
+       data.lnum = -1;
+       err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
+                                   (ubifs_lpt_scan_callback)scan_for_idx_cb,
+                                   &data);
+       if (err)
+               return ERR_PTR(err);
+       ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+       c->lscan_lnum = data.lnum;
+       lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
+       if (IS_ERR(lprops))
+               return lprops;
+       ubifs_assert(lprops->lnum == data.lnum);
+       ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+       return lprops;
+}
+
+/**
+ * ubifs_find_free_leb_for_idx - find a free LEB for the index.
+ * @c: the UBIFS file-system description object
+ *
+ * This function looks for a free LEB and returns that LEB number. The returned
+ * LEB is marked as "taken", "index".
+ *
+ * Only empty LEBs are allocated. This is for two reasons. First, the commit
+ * calculates the number of LEBs to allocate based on the assumption that they
+ * will be empty. Secondly, free space at the end of an index LEB is not
+ * guaranteed to be empty because it may have been used by the in-the-gaps
+ * method prior to an unclean unmount.
+ *
+ * If no LEB is found %-ENOSPC is returned. For other failures another negative
+ * error code is returned.
+ */
+int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
+{
+       const struct ubifs_lprops *lprops;
+       int lnum = -1, err, flags;
+
+       ubifs_get_lprops(c);
+
+       lprops = ubifs_fast_find_empty(c);
+       if (!lprops) {
+               lprops = ubifs_fast_find_freeable(c);
+               if (!lprops) {
+                       ubifs_assert(c->freeable_cnt == 0);
+                       if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+                               lprops = scan_for_leb_for_idx(c);
+                               if (IS_ERR(lprops)) {
+                                       err = PTR_ERR(lprops);
+                                       goto out;
+                               }
+                       }
+               }
+       }
+
+       if (!lprops) {
+               err = -ENOSPC;
+               goto out;
+       }
+
+       lnum = lprops->lnum;
+
+       dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
+                lnum, lprops->free, lprops->dirty, lprops->flags);
+
+       flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX;
+       lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0);
+       if (IS_ERR(lprops)) {
+               err = PTR_ERR(lprops);
+               goto out;
+       }
+
+       ubifs_release_lprops(c);
+
+       /*
+        * Ensure that empty LEBs have been unmapped. They may not have been,
+        * for example, because of an unclean unmount. Also LEBs that were
+        * freeable LEBs (free + dirty == leb_size) will not have been unmapped.
+        */
+       err = ubifs_leb_unmap(c, lnum);
+       if (err) {
+               ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+                                   LPROPS_TAKEN | LPROPS_INDEX, 0);
+               return err;
+       }
+
+       return lnum;
+
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+static int cmp_dirty_idx(const struct ubifs_lprops **a,
+                        const struct ubifs_lprops **b)
+{
+       const struct ubifs_lprops *lpa = *a;
+       const struct ubifs_lprops *lpb = *b;
+
+       return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
+}
+
+static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b,
+                          int size)
+{
+       struct ubifs_lprops *t = *a;
+
+       *a = *b;
+       *b = t;
+}
+
+/**
+ * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos.
+ * @c: the UBIFS file-system description object
+ *
+ * This function is called each commit to create an array of LEB numbers of
+ * dirty index LEBs sorted in order of dirty and free space.  This is used by
+ * the in-the-gaps method of TNC commit.
+ */
+int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
+{
+       int i;
+
+       ubifs_get_lprops(c);
+       /* Copy the LPROPS_DIRTY_IDX heap */
+       c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt;
+       memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr,
+              sizeof(void *) * c->dirty_idx.cnt);
+       /* Sort it so that the dirtiest is now at the end */
+       sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
+            (int (*)(const void *, const void *))cmp_dirty_idx,
+            (void (*)(void *, void *, int))swap_dirty_idx);
+       dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
+       if (c->dirty_idx.cnt)
+               dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
+                        c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum,
+                        c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty,
+                        c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free);
+       /* Replace the lprops pointers with LEB numbers */
+       for (i = 0; i < c->dirty_idx.cnt; i++)
+               c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum;
+       ubifs_release_lprops(c);
+       return 0;
+}
+
+/**
+ * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB.
+ * @c: the UBIFS file-system description object
+ * @lprops: LEB properties to scan
+ * @in_tree: whether the LEB properties are in main memory
+ * @data: information passed to and from the caller of the scan
+ *
+ * This function returns a code that indicates whether the scan should continue
+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
+ * (%LPT_SCAN_STOP).
+ */
+static int scan_dirty_idx_cb(struct ubifs_info *c,
+                          const struct ubifs_lprops *lprops, int in_tree,
+                          struct scan_data *data)
+{
+       int ret = LPT_SCAN_CONTINUE;
+
+       /* Exclude LEBs that are currently in use */
+       if (lprops->flags & LPROPS_TAKEN)
+               return LPT_SCAN_CONTINUE;
+       /* Determine whether to add these LEB properties to the tree */
+       if (!in_tree && valuable(c, lprops))
+               ret |= LPT_SCAN_ADD;
+       /* Exclude non-index LEBs */
+       if (!(lprops->flags & LPROPS_INDEX))
+               return ret;
+       /* Exclude LEBs with too little space */
+       if (lprops->free + lprops->dirty < c->min_idx_node_sz)
+               return ret;
+       /* Finally we found space */
+       data->lnum = lprops->lnum;
+       return LPT_SCAN_ADD | LPT_SCAN_STOP;
+}
+
+/**
+ * find_dirty_idx_leb - find a dirty index LEB.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns LEB number upon success and a negative error code upon
+ * failure.  In particular, -ENOSPC is returned if a dirty index LEB is not
+ * found.
+ *
+ * Note that this function scans the entire LPT but it is called very rarely.
+ */
+static int find_dirty_idx_leb(struct ubifs_info *c)
+{
+       const struct ubifs_lprops *lprops;
+       struct ubifs_lpt_heap *heap;
+       struct scan_data data;
+       int err, i, ret;
+
+       /* Check all structures in memory first */
+       data.lnum = -1;
+       heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
+       for (i = 0; i < heap->cnt; i++) {
+               lprops = heap->arr[i];
+               ret = scan_dirty_idx_cb(c, lprops, 1, &data);
+               if (ret & LPT_SCAN_STOP)
+                       goto found;
+       }
+       list_for_each_entry(lprops, &c->frdi_idx_list, list) {
+               ret = scan_dirty_idx_cb(c, lprops, 1, &data);
+               if (ret & LPT_SCAN_STOP)
+                       goto found;
+       }
+       list_for_each_entry(lprops, &c->uncat_list, list) {
+               ret = scan_dirty_idx_cb(c, lprops, 1, &data);
+               if (ret & LPT_SCAN_STOP)
+                       goto found;
+       }
+       if (c->pnodes_have >= c->pnode_cnt)
+               /* All pnodes are in memory, so skip scan */
+               return -ENOSPC;
+       err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
+                                   (ubifs_lpt_scan_callback)scan_dirty_idx_cb,
+                                   &data);
+       if (err)
+               return err;
+found:
+       ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+       c->lscan_lnum = data.lnum;
+       lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
+       if (IS_ERR(lprops))
+               return PTR_ERR(lprops);
+       ubifs_assert(lprops->lnum == data.lnum);
+       ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz);
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert((lprops->flags & LPROPS_INDEX));
+
+       dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x",
+                lprops->lnum, lprops->free, lprops->dirty, lprops->flags);
+
+       lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC,
+                                lprops->flags | LPROPS_TAKEN, 0);
+       if (IS_ERR(lprops))
+               return PTR_ERR(lprops);
+
+       return lprops->lnum;
+}
+
+/**
+ * get_idx_gc_leb - try to get a LEB number from trivial GC.
+ * @c: the UBIFS file-system description object
+ */
+static int get_idx_gc_leb(struct ubifs_info *c)
+{
+       const struct ubifs_lprops *lp;
+       int err, lnum;
+
+       err = ubifs_get_idx_gc_leb(c);
+       if (err < 0)
+               return err;
+       lnum = err;
+       /*
+        * The LEB was due to be unmapped after the commit but
+        * it is needed now for this commit.
+        */
+       lp = ubifs_lpt_lookup_dirty(c, lnum);
+       if (unlikely(IS_ERR(lp)))
+               return PTR_ERR(lp);
+       lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
+                            lp->flags | LPROPS_INDEX, -1);
+       if (unlikely(IS_ERR(lp)))
+               return PTR_ERR(lp);
+       dbg_find("LEB %d, dirty %d and free %d flags %#x",
+                lp->lnum, lp->dirty, lp->free, lp->flags);
+       return lnum;
+}
+
+/**
+ * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array.
+ * @c: the UBIFS file-system description object
+ */
+static int find_dirtiest_idx_leb(struct ubifs_info *c)
+{
+       const struct ubifs_lprops *lp;
+       int lnum;
+
+       while (1) {
+               if (!c->dirty_idx.cnt)
+                       return -ENOSPC;
+               /* The lprops pointers were replaced by LEB numbers */
+               lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt];
+               lp = ubifs_lpt_lookup(c, lnum);
+               if (IS_ERR(lp))
+                       return PTR_ERR(lp);
+               if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX))
+                       continue;
+               lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
+                                    lp->flags | LPROPS_TAKEN, 0);
+               if (IS_ERR(lp))
+                       return PTR_ERR(lp);
+               break;
+       }
+       dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
+                lp->free, lp->flags);
+       ubifs_assert(lp->flags | LPROPS_TAKEN);
+       ubifs_assert(lp->flags | LPROPS_INDEX);
+       return lnum;
+}
+
+/**
+ * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit.
+ * @c: the UBIFS file-system description object
+ *
+ * This function attempts to find an untaken index LEB with the most free and
+ * dirty space that can be used without overwriting index nodes that were in the
+ * last index committed.
+ */
+int ubifs_find_dirty_idx_leb(struct ubifs_info *c)
+{
+       int err;
+
+       ubifs_get_lprops(c);
+
+       /*
+        * We made an array of the dirtiest index LEB numbers as at the start of
+        * last commit.  Try that array first.
+        */
+       err = find_dirtiest_idx_leb(c);
+
+       /* Next try scanning the entire LPT */
+       if (err == -ENOSPC)
+               err = find_dirty_idx_leb(c);
+
+       /* Finally take any index LEBs awaiting trivial GC */
+       if (err == -ENOSPC)
+               err = get_idx_gc_leb(c);
+
+       ubifs_release_lprops(c);
+       return err;
+}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c

new file mode 100644 (file)

index 0000000..d0f3dac
--- /dev/null
+++ b/fs/ubifs/gc.c
@@ -0,0 +1,773 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements garbage collection. The procedure for garbage collection
+ * is different depending on whether a LEB as an index LEB (contains index
+ * nodes) or not. For non-index LEBs, garbage collection finds a LEB which
+ * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete
+ * nodes to the journal, at which point the garbage-collected LEB is free to be
+ * reused. For index LEBs, garbage collection marks the non-obsolete index nodes
+ * dirty in the TNC, and after the next commit, the garbage-collected LEB is
+ * to be reused. Garbage collection will cause the number of dirty index nodes
+ * to grow, however sufficient space is reserved for the index to ensure the
+ * commit will never run out of space.
+ */
+
+#include <linux/pagemap.h>
+#include "ubifs.h"
+
+/*
+ * GC tries to optimize the way it fit nodes to available space, and it sorts
+ * nodes a little. The below constants are watermarks which define "large",
+ * "medium", and "small" nodes.
+ */
+#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
+#define SMALL_NODE_WM  UBIFS_MAX_DENT_NODE_SZ
+
+/*
+ * GC may need to move more then one LEB to make progress. The below constants
+ * define "soft" and "hard" limits on the number of LEBs the garbage collector
+ * may move.
+ */
+#define SOFT_LEBS_LIMIT 4
+#define HARD_LEBS_LIMIT 32
+
+/**
+ * switch_gc_head - switch the garbage collection journal head.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to write
+ * @len: length of the buffer to write
+ * @lnum: LEB number written is returned here
+ * @offs: offset written is returned here
+ *
+ * This function switch the GC head to the next LEB which is reserved in
+ * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
+ * and other negative error code in case of failures.
+ */
+static int switch_gc_head(struct ubifs_info *c)
+{
+       int err, gc_lnum = c->gc_lnum;
+       struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+
+       ubifs_assert(gc_lnum != -1);
+       dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
+              wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
+              c->leb_size - wbuf->offs - wbuf->used);
+
+       err = ubifs_wbuf_sync_nolock(wbuf);
+       if (err)
+               return err;
+
+       /*
+        * The GC write-buffer was synchronized, we may safely unmap
+        * 'c->gc_lnum'.
+        */
+       err = ubifs_leb_unmap(c, gc_lnum);
+       if (err)
+               return err;
+
+       err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
+       if (err)
+               return err;
+
+       c->gc_lnum = -1;
+       err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM);
+       return err;
+}
+
+/**
+ * move_nodes - move nodes.
+ * @c: UBIFS file-system description object
+ * @sleb: describes nodes to move
+ *
+ * This function moves valid nodes from data LEB described by @sleb to the GC
+ * journal head. The obsolete nodes are dropped.
+ *
+ * When moving nodes we have to deal with classical bin-packing problem: the
+ * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
+ * where the nodes in the @sleb->nodes list are the elements which should be
+ * fit optimally to the bins. This function uses the "first fit decreasing"
+ * strategy, although it does not really sort the nodes but just split them on
+ * 3 classes - large, medium, and small, so they are roughly sorted.
+ *
+ * This function returns zero in case of success, %-EAGAIN if commit is
+ * required, and other negative error codes in case of other failures.
+ */
+static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+{
+       struct ubifs_scan_node *snod, *tmp;
+       struct list_head large, medium, small;
+       struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+       int avail, err, min = INT_MAX;
+
+       INIT_LIST_HEAD(&large);
+       INIT_LIST_HEAD(&medium);
+       INIT_LIST_HEAD(&small);
+
+       list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+               struct list_head *lst;
+
+               ubifs_assert(snod->type != UBIFS_IDX_NODE);
+               ubifs_assert(snod->type != UBIFS_REF_NODE);
+               ubifs_assert(snod->type != UBIFS_CS_NODE);
+
+               err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
+                                        snod->offs, 0);
+               if (err < 0)
+                       goto out;
+
+               lst = &snod->list;
+               list_del(lst);
+               if (!err) {
+                       /* The node is obsolete, remove it from the list */
+                       kfree(snod);
+                       continue;
+               }
+
+               /*
+                * Sort the list of nodes so that large nodes go first, and
+                * small nodes go last.
+                */
+               if (snod->len > MEDIUM_NODE_WM)
+                       list_add(lst, &large);
+               else if (snod->len > SMALL_NODE_WM)
+                       list_add(lst, &medium);
+               else
+                       list_add(lst, &small);
+
+               /* And find the smallest node */
+               if (snod->len < min)
+                       min = snod->len;
+       }
+
+       /*
+        * Join the tree lists so that we'd have one roughly sorted list
+        * ('large' will be the head of the joined list).
+        */
+       list_splice(&medium, large.prev);
+       list_splice(&small, large.prev);
+
+       if (wbuf->lnum == -1) {
+               /*
+                * The GC journal head is not set, because it is the first GC
+                * invocation since mount.
+                */
+               err = switch_gc_head(c);
+               if (err)
+                       goto out;
+       }
+
+       /* Write nodes to their new location. Use the first-fit strategy */
+       while (1) {
+               avail = c->leb_size - wbuf->offs - wbuf->used;
+               list_for_each_entry_safe(snod, tmp, &large, list) {
+                       int new_lnum, new_offs;
+
+                       if (avail < min)
+                               break;
+
+                       if (snod->len > avail)
+                               /* This node does not fit */
+                               continue;
+
+                       cond_resched();
+
+                       new_lnum = wbuf->lnum;
+                       new_offs = wbuf->offs + wbuf->used;
+                       err = ubifs_wbuf_write_nolock(wbuf, snod->node,
+                                                     snod->len);
+                       if (err)
+                               goto out;
+                       err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
+                                               snod->offs, new_lnum, new_offs,
+                                               snod->len);
+                       if (err)
+                               goto out;
+
+                       avail = c->leb_size - wbuf->offs - wbuf->used;
+                       list_del(&snod->list);
+                       kfree(snod);
+               }
+
+               if (list_empty(&large))
+                       break;
+
+               /*
+                * Waste the rest of the space in the LEB and switch to the
+                * next LEB.
+                */
+               err = switch_gc_head(c);
+               if (err)
+                       goto out;
+       }
+
+       return 0;
+
+out:
+       list_for_each_entry_safe(snod, tmp, &large, list) {
+               list_del(&snod->list);
+               kfree(snod);
+       }
+       return err;
+}
+
+/**
+ * gc_sync_wbufs - sync write-buffers for GC.
+ * @c: UBIFS file-system description object
+ *
+ * We must guarantee that obsoleting nodes are on flash. Unfortunately they may
+ * be in a write-buffer instead. That is, a node could be written to a
+ * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is
+ * erased before the write-buffer is sync'd and then there is an unclean
+ * unmount, then an existing node is lost. To avoid this, we sync all
+ * write-buffers.
+ *
+ * This function returns %0 on success or a negative error code on failure.
+ */
+static int gc_sync_wbufs(struct ubifs_info *c)
+{
+       int err, i;
+
+       for (i = 0; i < c->jhead_cnt; i++) {
+               if (i == GCHD)
+                       continue;
+               err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+/**
+ * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock.
+ * @c: UBIFS file-system description object
+ * @lp: describes the LEB to garbage collect
+ *
+ * This function garbage-collects an LEB and returns one of the @LEB_FREED,
+ * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is
+ * required, and other negative error codes in case of failures.
+ */
+int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
+{
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+       int err = 0, lnum = lp->lnum;
+
+       ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
+                    c->need_recovery);
+       ubifs_assert(c->gc_lnum != lnum);
+       ubifs_assert(wbuf->lnum != lnum);
+
+       /*
+        * We scan the entire LEB even though we only really need to scan up to
+        * (c->leb_size - lp->free).
+        */
+       sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+       if (IS_ERR(sleb))
+               return PTR_ERR(sleb);
+
+       ubifs_assert(!list_empty(&sleb->nodes));
+       snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
+
+       if (snod->type == UBIFS_IDX_NODE) {
+               struct ubifs_gced_idx_leb *idx_gc;
+
+               dbg_gc("indexing LEB %d (free %d, dirty %d)",
+                      lnum, lp->free, lp->dirty);
+               list_for_each_entry(snod, &sleb->nodes, list) {
+                       struct ubifs_idx_node *idx = snod->node;
+                       int level = le16_to_cpu(idx->level);
+
+                       ubifs_assert(snod->type == UBIFS_IDX_NODE);
+                       key_read(c, ubifs_idx_key(c, idx), &snod->key);
+                       err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
+                                                  snod->offs);
+                       if (err)
+                               goto out;
+               }
+
+               idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
+               if (!idx_gc) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               idx_gc->lnum = lnum;
+               idx_gc->unmap = 0;
+               list_add(&idx_gc->list, &c->idx_gc);
+
+               /*
+                * Don't release the LEB until after the next commit, because
+                * it may contain date which is needed for recovery. So
+                * although we freed this LEB, it will become usable only after
+                * the commit.
+                */
+               err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0,
+                                         LPROPS_INDEX, 1);
+               if (err)
+                       goto out;
+               err = LEB_FREED_IDX;
+       } else {
+               dbg_gc("data LEB %d (free %d, dirty %d)",
+                      lnum, lp->free, lp->dirty);
+
+               err = move_nodes(c, sleb);
+               if (err)
+                       goto out;
+
+               err = gc_sync_wbufs(c);
+               if (err)
+                       goto out;
+
+               err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
+               if (err)
+                       goto out;
+
+               if (c->gc_lnum == -1) {
+                       c->gc_lnum = lnum;
+                       err = LEB_RETAINED;
+               } else {
+                       err = ubifs_wbuf_sync_nolock(wbuf);
+                       if (err)
+                               goto out;
+
+                       err = ubifs_leb_unmap(c, lnum);
+                       if (err)
+                               goto out;
+
+                       err = LEB_FREED;
+               }
+       }
+
+out:
+       ubifs_scan_destroy(sleb);
+       return err;
+}
+
+/**
+ * ubifs_garbage_collect - UBIFS garbage collector.
+ * @c: UBIFS file-system description object
+ * @anyway: do GC even if there are free LEBs
+ *
+ * This function does out-of-place garbage collection. The return codes are:
+ *   o positive LEB number if the LEB has been freed and may be used;
+ *   o %-EAGAIN if the caller has to run commit;
+ *   o %-ENOSPC if GC failed to make any progress;
+ *   o other negative error codes in case of other errors.
+ *
+ * Garbage collector writes data to the journal when GC'ing data LEBs, and just
+ * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point
+ * commit may be required. But commit cannot be run from inside GC, because the
+ * caller might be holding the commit lock, so %-EAGAIN is returned instead;
+ * And this error code means that the caller has to run commit, and re-run GC
+ * if there is still no free space.
+ *
+ * There are many reasons why this function may return %-EAGAIN:
+ * o the log is full and there is no space to write an LEB reference for
+ *   @c->gc_lnum;
+ * o the journal is too large and exceeds size limitations;
+ * o GC moved indexing LEBs, but they can be used only after the commit;
+ * o the shrinker fails to find clean znodes to free and requests the commit;
+ * o etc.
+ *
+ * Note, if the file-system is close to be full, this function may return
+ * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of
+ * the function. E.g., this happens if the limits on the journal size are too
+ * tough and GC writes too much to the journal before an LEB is freed. This
+ * might also mean that the journal is too large, and the TNC becomes to big,
+ * so that the shrinker is constantly called, finds not clean znodes to free,
+ * and requests commit. Well, this may also happen if the journal is all right,
+ * but another kernel process consumes too much memory. Anyway, infinite
+ * %-EAGAIN may happen, but in some extreme/misconfiguration cases.
+ */
+int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
+{
+       int i, err, ret, min_space = c->dead_wm;
+       struct ubifs_lprops lp;
+       struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+
+       ubifs_assert_cmt_locked(c);
+
+       if (ubifs_gc_should_commit(c))
+               return -EAGAIN;
+
+       mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+
+       if (c->ro_media) {
+               ret = -EROFS;
+               goto out_unlock;
+       }
+
+       /* We expect the write-buffer to be empty on entry */
+       ubifs_assert(!wbuf->used);
+
+       for (i = 0; ; i++) {
+               int space_before = c->leb_size - wbuf->offs - wbuf->used;
+               int space_after;
+
+               cond_resched();
+
+               /* Give the commit an opportunity to run */
+               if (ubifs_gc_should_commit(c)) {
+                       ret = -EAGAIN;
+                       break;
+               }
+
+               if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) {
+                       /*
+                        * We've done enough iterations. Indexing LEBs were
+                        * moved and will be available after the commit.
+                        */
+                       dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN");
+                       ubifs_commit_required(c);
+                       ret = -EAGAIN;
+                       break;
+               }
+
+               if (i > HARD_LEBS_LIMIT) {
+                       /*
+                        * We've moved too many LEBs and have not made
+                        * progress, give up.
+                        */
+                       dbg_gc("hard limit, -ENOSPC");
+                       ret = -ENOSPC;
+                       break;
+               }
+
+               /*
+                * Empty and freeable LEBs can turn up while we waited for
+                * the wbuf lock, or while we have been running GC. In that
+                * case, we should just return one of those instead of
+                * continuing to GC dirty LEBs. Hence we request
+                * 'ubifs_find_dirty_leb()' to return an empty LEB if it can.
+                */
+               ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1);
+               if (ret) {
+                       if (ret == -ENOSPC)
+                               dbg_gc("no more dirty LEBs");
+                       break;
+               }
+
+               dbg_gc("found LEB %d: free %d, dirty %d, sum %d "
+                      "(min. space %d)", lp.lnum, lp.free, lp.dirty,
+                      lp.free + lp.dirty, min_space);
+
+               if (lp.free + lp.dirty == c->leb_size) {
+                       /* An empty LEB was returned */
+                       dbg_gc("LEB %d is free, return it", lp.lnum);
+                       /*
+                        * ubifs_find_dirty_leb() doesn't return freeable index
+                        * LEBs.
+                        */
+                       ubifs_assert(!(lp.flags & LPROPS_INDEX));
+                       if (lp.free != c->leb_size) {
+                               /*
+                                * Write buffers must be sync'd before
+                                * unmapping freeable LEBs, because one of them
+                                * may contain data which obsoletes something
+                                * in 'lp.pnum'.
+                                */
+                               ret = gc_sync_wbufs(c);
+                               if (ret)
+                                       goto out;
+                               ret = ubifs_change_one_lp(c, lp.lnum,
+                                                         c->leb_size, 0, 0, 0,
+                                                         0);
+                               if (ret)
+                                       goto out;
+                       }
+                       ret = ubifs_leb_unmap(c, lp.lnum);
+                       if (ret)
+                               goto out;
+                       ret = lp.lnum;
+                       break;
+               }
+
+               space_before = c->leb_size - wbuf->offs - wbuf->used;
+               if (wbuf->lnum == -1)
+                       space_before = 0;
+
+               ret = ubifs_garbage_collect_leb(c, &lp);
+               if (ret < 0) {
+                       if (ret == -EAGAIN || ret == -ENOSPC) {
+                               /*
+                                * These codes are not errors, so we have to
+                                * return the LEB to lprops. But if the
+                                * 'ubifs_return_leb()' function fails, its
+                                * failure code is propagated to the caller
+                                * instead of the original '-EAGAIN' or
+                                * '-ENOSPC'.
+                                */
+                               err = ubifs_return_leb(c, lp.lnum);
+                               if (err)
+                                       ret = err;
+                               break;
+                       }
+                       goto out;
+               }
+
+               if (ret == LEB_FREED) {
+                       /* An LEB has been freed and is ready for use */
+                       dbg_gc("LEB %d freed, return", lp.lnum);
+                       ret = lp.lnum;
+                       break;
+               }
+
+               if (ret == LEB_FREED_IDX) {
+                       /*
+                        * This was an indexing LEB and it cannot be
+                        * immediately used. And instead of requesting the
+                        * commit straight away, we try to garbage collect some
+                        * more.
+                        */
+                       dbg_gc("indexing LEB %d freed, continue", lp.lnum);
+                       continue;
+               }
+
+               ubifs_assert(ret == LEB_RETAINED);
+               space_after = c->leb_size - wbuf->offs - wbuf->used;
+               dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
+                      space_after - space_before);
+
+               if (space_after > space_before) {
+                       /* GC makes progress, keep working */
+                       min_space >>= 1;
+                       if (min_space < c->dead_wm)
+                               min_space = c->dead_wm;
+                       continue;
+               }
+
+               dbg_gc("did not make progress");
+
+               /*
+                * GC moved an LEB bud have not done any progress. This means
+                * that the previous GC head LEB contained too few free space
+                * and the LEB which was GC'ed contained only large nodes which
+                * did not fit that space.
+                *
+                * We can do 2 things:
+                * 1. pick another LEB in a hope it'll contain a small node
+                *    which will fit the space we have at the end of current GC
+                *    head LEB, but there is no guarantee, so we try this out
+                *    unless we have already been working for too long;
+                * 2. request an LEB with more dirty space, which will force
+                *    'ubifs_find_dirty_leb()' to start scanning the lprops
+                *    table, instead of just picking one from the heap
+                *    (previously it already picked the dirtiest LEB).
+                */
+               if (i < SOFT_LEBS_LIMIT) {
+                       dbg_gc("try again");
+                       continue;
+               }
+
+               min_space <<= 1;
+               if (min_space > c->dark_wm)
+                       min_space = c->dark_wm;
+               dbg_gc("set min. space to %d", min_space);
+       }
+
+       if (ret == -ENOSPC && !list_empty(&c->idx_gc)) {
+               dbg_gc("no space, some index LEBs GC'ed, -EAGAIN");
+               ubifs_commit_required(c);
+               ret = -EAGAIN;
+       }
+
+       err = ubifs_wbuf_sync_nolock(wbuf);
+       if (!err)
+               err = ubifs_leb_unmap(c, c->gc_lnum);
+       if (err) {
+               ret = err;
+               goto out;
+       }
+out_unlock:
+       mutex_unlock(&wbuf->io_mutex);
+       return ret;
+
+out:
+       ubifs_assert(ret < 0);
+       ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
+       ubifs_ro_mode(c, ret);
+       ubifs_wbuf_sync_nolock(wbuf);
+       mutex_unlock(&wbuf->io_mutex);
+       ubifs_return_leb(c, lp.lnum);
+       return ret;
+}
+
+/**
+ * ubifs_gc_start_commit - garbage collection at start of commit.
+ * @c: UBIFS file-system description object
+ *
+ * If a LEB has only dirty and free space, then we may safely unmap it and make
+ * it free.  Note, we cannot do this with indexing LEBs because dirty space may
+ * correspond index nodes that are required for recovery.  In that case, the
+ * LEB cannot be unmapped until after the next commit.
+ *
+ * This function returns %0 upon success and a negative error code upon failure.
+ */
+int ubifs_gc_start_commit(struct ubifs_info *c)
+{
+       struct ubifs_gced_idx_leb *idx_gc;
+       const struct ubifs_lprops *lp;
+       int err = 0, flags;
+
+       ubifs_get_lprops(c);
+
+       /*
+        * Unmap (non-index) freeable LEBs. Note that recovery requires that all
+        * wbufs are sync'd before this, which is done in 'do_commit()'.
+        */
+       while (1) {
+               lp = ubifs_fast_find_freeable(c);
+               if (unlikely(IS_ERR(lp))) {
+                       err = PTR_ERR(lp);
+                       goto out;
+               }
+               if (!lp)
+                       break;
+               ubifs_assert(!(lp->flags & LPROPS_TAKEN));
+               ubifs_assert(!(lp->flags & LPROPS_INDEX));
+               err = ubifs_leb_unmap(c, lp->lnum);
+               if (err)
+                       goto out;
+               lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
+               if (unlikely(IS_ERR(lp))) {
+                       err = PTR_ERR(lp);
+                       goto out;
+               }
+               ubifs_assert(!(lp->flags & LPROPS_TAKEN));
+               ubifs_assert(!(lp->flags & LPROPS_INDEX));
+       }
+
+       /* Mark GC'd index LEBs OK to unmap after this commit finishes */
+       list_for_each_entry(idx_gc, &c->idx_gc, list)
+               idx_gc->unmap = 1;
+
+       /* Record index freeable LEBs for unmapping after commit */
+       while (1) {
+               lp = ubifs_fast_find_frdi_idx(c);
+               if (unlikely(IS_ERR(lp))) {
+                       err = PTR_ERR(lp);
+                       goto out;
+               }
+               if (!lp)
+                       break;
+               idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
+               if (!idx_gc) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               ubifs_assert(!(lp->flags & LPROPS_TAKEN));
+               ubifs_assert(lp->flags & LPROPS_INDEX);
+               /* Don't release the LEB until after the next commit */
+               flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
+               lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
+               if (unlikely(IS_ERR(lp))) {
+                       err = PTR_ERR(lp);
+                       kfree(idx_gc);
+                       goto out;
+               }
+               ubifs_assert(lp->flags & LPROPS_TAKEN);
+               ubifs_assert(!(lp->flags & LPROPS_INDEX));
+               idx_gc->lnum = lp->lnum;
+               idx_gc->unmap = 1;
+               list_add(&idx_gc->list, &c->idx_gc);
+       }
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * ubifs_gc_end_commit - garbage collection at end of commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function completes out-of-place garbage collection of index LEBs.
+ */
+int ubifs_gc_end_commit(struct ubifs_info *c)
+{
+       struct ubifs_gced_idx_leb *idx_gc, *tmp;
+       struct ubifs_wbuf *wbuf;
+       int err = 0;
+
+       wbuf = &c->jheads[GCHD].wbuf;
+       mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+       list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list)
+               if (idx_gc->unmap) {
+                       dbg_gc("LEB %d", idx_gc->lnum);
+                       err = ubifs_leb_unmap(c, idx_gc->lnum);
+                       if (err)
+                               goto out;
+                       err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC,
+                                         LPROPS_NC, 0, LPROPS_TAKEN, -1);
+                       if (err)
+                               goto out;
+                       list_del(&idx_gc->list);
+                       kfree(idx_gc);
+               }
+out:
+       mutex_unlock(&wbuf->io_mutex);
+       return err;
+}
+
+/**
+ * ubifs_destroy_idx_gc - destroy idx_gc list.
+ * @c: UBIFS file-system description object
+ *
+ * This function destroys the idx_gc list. It is called when unmounting or
+ * remounting read-only so locks are not needed.
+ */
+void ubifs_destroy_idx_gc(struct ubifs_info *c)
+{
+       while (!list_empty(&c->idx_gc)) {
+               struct ubifs_gced_idx_leb *idx_gc;
+
+               idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb,
+                                   list);
+               c->idx_gc_cnt -= 1;
+               list_del(&idx_gc->list);
+               kfree(idx_gc);
+       }
+
+}
+
+/**
+ * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list.
+ * @c: UBIFS file-system description object
+ *
+ * Called during start commit so locks are not needed.
+ */
+int ubifs_get_idx_gc_leb(struct ubifs_info *c)
+{
+       struct ubifs_gced_idx_leb *idx_gc;
+       int lnum;
+
+       if (list_empty(&c->idx_gc))
+               return -ENOSPC;
+       idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list);
+       lnum = idx_gc->lnum;
+       /* c->idx_gc_cnt is updated by the caller when lprops are updated */
+       list_del(&idx_gc->list);
+       kfree(idx_gc);
+       return lnum;
+}
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c

new file mode 100644 (file)

index 0000000..3374f91
--- /dev/null
+++ b/fs/ubifs/io.c
@@ -0,0 +1,914 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ * Copyright (C) 2006, 2007 University of Szeged, Hungary
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ *          Zoltan Sogor
+ */
+
+/*
+ * This file implements UBIFS I/O subsystem which provides various I/O-related
+ * helper functions (reading/writing/checking/validating nodes) and implements
+ * write-buffering support. Write buffers help to save space which otherwise
+ * would have been wasted for padding to the nearest minimal I/O unit boundary.
+ * Instead, data first goes to the write-buffer and is flushed when the
+ * buffer is full or when it is not used for some time (by timer). This is
+ * similarto the mechanism is used by JFFS2.
+ *
+ * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
+ * mutexes defined inside these objects. Since sometimes upper-level code
+ * has to lock the write-buffer (e.g. journal space reservation code), many
+ * functions related to write-buffers have "nolock" suffix which means that the
+ * caller has to lock the write-buffer before calling this function.
+ *
+ * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not
+ * aligned, UBIFS starts the next node from the aligned address, and the padded
+ * bytes may contain any rubbish. In other words, UBIFS does not put padding
+ * bytes in those small gaps. Common headers of nodes store real node lengths,
+ * not aligned lengths. Indexing nodes also store real lengths in branches.
+ *
+ * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
+ * uses padding nodes or padding bytes, if the padding node does not fit.
+ *
+ * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
+ * every time they are read from the flash media.
+ */
+
+#include <linux/crc32.h>
+#include "ubifs.h"
+
+/**
+ * ubifs_check_node - check node.
+ * @c: UBIFS file-system description object
+ * @buf: node to check
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ * @quiet: print no messages
+ *
+ * This function checks node magic number and CRC checksum. This function also
+ * validates node length to prevent UBIFS from becoming crazy when an attacker
+ * feeds it a file-system image with incorrect nodes. For example, too large
+ * node length in the common header could cause UBIFS to read memory outside of
+ * allocated buffer when checking the CRC checksum.
+ *
+ * This function returns zero in case of success %-EUCLEAN in case of bad CRC
+ * or magic.
+ */
+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
+                    int offs, int quiet)
+{
+       int err = -EINVAL, type, node_len;
+       uint32_t crc, node_crc, magic;
+       const struct ubifs_ch *ch = buf;
+
+       ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+       ubifs_assert(!(offs & 7) && offs < c->leb_size);
+
+       magic = le32_to_cpu(ch->magic);
+       if (magic != UBIFS_NODE_MAGIC) {
+               if (!quiet)
+                       ubifs_err("bad magic %#08x, expected %#08x",
+                                 magic, UBIFS_NODE_MAGIC);
+               err = -EUCLEAN;
+               goto out;
+       }
+
+       type = ch->node_type;
+       if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
+               if (!quiet)
+                       ubifs_err("bad node type %d", type);
+               goto out;
+       }
+
+       node_len = le32_to_cpu(ch->len);
+       if (node_len + offs > c->leb_size)
+               goto out_len;
+
+       if (c->ranges[type].max_len == 0) {
+               if (node_len != c->ranges[type].len)
+                       goto out_len;
+       } else if (node_len < c->ranges[type].min_len ||
+                  node_len > c->ranges[type].max_len)
+               goto out_len;
+
+       crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
+       node_crc = le32_to_cpu(ch->crc);
+       if (crc != node_crc) {
+               if (!quiet)
+                       ubifs_err("bad CRC: calculated %#08x, read %#08x",
+                                 crc, node_crc);
+               err = -EUCLEAN;
+               goto out;
+       }
+
+       return 0;
+
+out_len:
+       if (!quiet)
+               ubifs_err("bad node length %d", node_len);
+out:
+       if (!quiet) {
+               ubifs_err("bad node at LEB %d:%d", lnum, offs);
+               dbg_dump_node(c, buf);
+               dbg_dump_stack();
+       }
+       return err;
+}
+
+/**
+ * ubifs_pad - pad flash space.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to put padding to
+ * @pad: how many bytes to pad
+ *
+ * The flash media obliges us to write only in chunks of %c->min_io_size and
+ * when we have to write less data we add padding node to the write-buffer and
+ * pad it to the next minimal I/O unit's boundary. Padding nodes help when the
+ * media is being scanned. If the amount of wasted space is not enough to fit a
+ * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes
+ * pattern (%UBIFS_PADDING_BYTE).
+ *
+ * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is
+ * used.
+ */
+void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
+{
+       uint32_t crc;
+
+       ubifs_assert(pad >= 0 && !(pad & 7));
+
+       if (pad >= UBIFS_PAD_NODE_SZ) {
+               struct ubifs_ch *ch = buf;
+               struct ubifs_pad_node *pad_node = buf;
+
+               ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
+               ch->node_type = UBIFS_PAD_NODE;
+               ch->group_type = UBIFS_NO_NODE_GROUP;
+               ch->padding[0] = ch->padding[1] = 0;
+               ch->sqnum = 0;
+               ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ);
+               pad -= UBIFS_PAD_NODE_SZ;
+               pad_node->pad_len = cpu_to_le32(pad);
+               crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8);
+               ch->crc = cpu_to_le32(crc);
+               memset(buf + UBIFS_PAD_NODE_SZ, 0, pad);
+       } else if (pad > 0)
+               /* Too little space, padding node won't fit */
+               memset(buf, UBIFS_PADDING_BYTE, pad);
+}
+
+/**
+ * next_sqnum - get next sequence number.
+ * @c: UBIFS file-system description object
+ */
+static unsigned long long next_sqnum(struct ubifs_info *c)
+{
+       unsigned long long sqnum;
+
+       spin_lock(&c->cnt_lock);
+       sqnum = ++c->max_sqnum;
+       spin_unlock(&c->cnt_lock);
+
+       if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
+               if (sqnum >= SQNUM_WATERMARK) {
+                       ubifs_err("sequence number overflow %llu, end of life",
+                                 sqnum);
+                       ubifs_ro_mode(c, -EINVAL);
+               }
+               ubifs_warn("running out of sequence numbers, end of life soon");
+       }
+
+       return sqnum;
+}
+
+/**
+ * ubifs_prepare_node - prepare node to be written to flash.
+ * @c: UBIFS file-system description object
+ * @node: the node to pad
+ * @len: node length
+ * @pad: if the buffer has to be padded
+ *
+ * This function prepares node at @node to be written to the media - it
+ * calculates node CRC, fills the common header, and adds proper padding up to
+ * the next minimum I/O unit if @pad is not zero.
+ */
+void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
+{
+       uint32_t crc;
+       struct ubifs_ch *ch = node;
+       unsigned long long sqnum = next_sqnum(c);
+
+       ubifs_assert(len >= UBIFS_CH_SZ);
+
+       ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
+       ch->len = cpu_to_le32(len);
+       ch->group_type = UBIFS_NO_NODE_GROUP;
+       ch->sqnum = cpu_to_le64(sqnum);
+       ch->padding[0] = ch->padding[1] = 0;
+       crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
+       ch->crc = cpu_to_le32(crc);
+
+       if (pad) {
+               len = ALIGN(len, 8);
+               pad = ALIGN(len, c->min_io_size) - len;
+               ubifs_pad(c, node + len, pad);
+       }
+}
+
+/**
+ * ubifs_prep_grp_node - prepare node of a group to be written to flash.
+ * @c: UBIFS file-system description object
+ * @node: the node to pad
+ * @len: node length
+ * @last: indicates the last node of the group
+ *
+ * This function prepares node at @node to be written to the media - it
+ * calculates node CRC and fills the common header.
+ */
+void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
+{
+       uint32_t crc;
+       struct ubifs_ch *ch = node;
+       unsigned long long sqnum = next_sqnum(c);
+
+       ubifs_assert(len >= UBIFS_CH_SZ);
+
+       ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
+       ch->len = cpu_to_le32(len);
+       if (last)
+               ch->group_type = UBIFS_LAST_OF_NODE_GROUP;
+       else
+               ch->group_type = UBIFS_IN_NODE_GROUP;
+       ch->sqnum = cpu_to_le64(sqnum);
+       ch->padding[0] = ch->padding[1] = 0;
+       crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
+       ch->crc = cpu_to_le32(crc);
+}
+
+/**
+ * wbuf_timer_callback - write-buffer timer callback function.
+ * @data: timer data (write-buffer descriptor)
+ *
+ * This function is called when the write-buffer timer expires.
+ */
+static void wbuf_timer_callback_nolock(unsigned long data)
+{
+       struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data;
+
+       wbuf->need_sync = 1;
+       wbuf->c->need_wbuf_sync = 1;
+       ubifs_wake_up_bgt(wbuf->c);
+}
+
+/**
+ * new_wbuf_timer - start new write-buffer timer.
+ * @wbuf: write-buffer descriptor
+ */
+static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
+{
+       ubifs_assert(!timer_pending(&wbuf->timer));
+
+       if (!wbuf->timeout)
+               return;
+
+       wbuf->timer.expires = jiffies + wbuf->timeout;
+       add_timer(&wbuf->timer);
+}
+
+/**
+ * cancel_wbuf_timer - cancel write-buffer timer.
+ * @wbuf: write-buffer descriptor
+ */
+static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
+{
+       /*
+        * If the syncer is waiting for the lock (from the background thread's
+        * context) and another task is changing write-buffer then the syncing
+        * should be canceled.
+        */
+       wbuf->need_sync = 0;
+       del_timer(&wbuf->timer);
+}
+
+/**
+ * ubifs_wbuf_sync_nolock - synchronize write-buffer.
+ * @wbuf: write-buffer to synchronize
+ *
+ * This function synchronizes write-buffer @buf and returns zero in case of
+ * success or a negative error code in case of failure.
+ */
+int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
+{
+       struct ubifs_info *c = wbuf->c;
+       int err, dirt;
+
+       cancel_wbuf_timer_nolock(wbuf);
+       if (!wbuf->used || wbuf->lnum == -1)
+               /* Write-buffer is empty or not seeked */
+               return 0;
+
+       dbg_io("LEB %d:%d, %d bytes",
+              wbuf->lnum, wbuf->offs, wbuf->used);
+       ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
+       ubifs_assert(!(wbuf->avail & 7));
+       ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
+
+       if (c->ro_media)
+               return -EROFS;
+
+       ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
+       err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+                           c->min_io_size, wbuf->dtype);
+       if (err) {
+               ubifs_err("cannot write %d bytes to LEB %d:%d",
+                         c->min_io_size, wbuf->lnum, wbuf->offs);
+               dbg_dump_stack();
+               return err;
+       }
+
+       dirt = wbuf->avail;
+
+       spin_lock(&wbuf->lock);
+       wbuf->offs += c->min_io_size;
+       wbuf->avail = c->min_io_size;
+       wbuf->used = 0;
+       wbuf->next_ino = 0;
+       spin_unlock(&wbuf->lock);
+
+       if (wbuf->sync_callback)
+               err = wbuf->sync_callback(c, wbuf->lnum,
+                                         c->leb_size - wbuf->offs, dirt);
+       return err;
+}
+
+/**
+ * ubifs_wbuf_seek_nolock - seek write-buffer.
+ * @wbuf: write-buffer
+ * @lnum: logical eraseblock number to seek to
+ * @offs: logical eraseblock offset to seek to
+ * @dtype: data type
+ *
+ * This function targets the write buffer to logical eraseblock @lnum:@offs.
+ * The write-buffer is synchronized if it is not empty. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
+                          int dtype)
+{
+       const struct ubifs_info *c = wbuf->c;
+
+       dbg_io("LEB %d:%d", lnum, offs);
+       ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
+       ubifs_assert(offs >= 0 && offs <= c->leb_size);
+       ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
+       ubifs_assert(lnum != wbuf->lnum);
+
+       if (wbuf->used > 0) {
+               int err = ubifs_wbuf_sync_nolock(wbuf);
+
+               if (err)
+                       return err;
+       }
+
+       spin_lock(&wbuf->lock);
+       wbuf->lnum = lnum;
+       wbuf->offs = offs;
+       wbuf->avail = c->min_io_size;
+       wbuf->used = 0;
+       spin_unlock(&wbuf->lock);
+       wbuf->dtype = dtype;
+
+       return 0;
+}
+
+/**
+ * ubifs_bg_wbufs_sync - synchronize write-buffers.
+ * @c: UBIFS file-system description object
+ *
+ * This function is called by background thread to synchronize write-buffers.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_bg_wbufs_sync(struct ubifs_info *c)
+{
+       int err, i;
+
+       if (!c->need_wbuf_sync)
+               return 0;
+       c->need_wbuf_sync = 0;
+
+       if (c->ro_media) {
+               err = -EROFS;
+               goto out_timers;
+       }
+
+       dbg_io("synchronize");
+       for (i = 0; i < c->jhead_cnt; i++) {
+               struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
+
+               cond_resched();
+
+               /*
+                * If the mutex is locked then wbuf is being changed, so
+                * synchronization is not necessary.
+                */
+               if (mutex_is_locked(&wbuf->io_mutex))
+                       continue;
+
+               mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+               if (!wbuf->need_sync) {
+                       mutex_unlock(&wbuf->io_mutex);
+                       continue;
+               }
+
+               err = ubifs_wbuf_sync_nolock(wbuf);
+               mutex_unlock(&wbuf->io_mutex);
+               if (err) {
+                       ubifs_err("cannot sync write-buffer, error %d", err);
+                       ubifs_ro_mode(c, err);
+                       goto out_timers;
+               }
+       }
+
+       return 0;
+
+out_timers:
+       /* Cancel all timers to prevent repeated errors */
+       for (i = 0; i < c->jhead_cnt; i++) {
+               struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
+
+               mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+               cancel_wbuf_timer_nolock(wbuf);
+               mutex_unlock(&wbuf->io_mutex);
+       }
+       return err;
+}
+
+/**
+ * ubifs_wbuf_write_nolock - write data to flash via write-buffer.
+ * @wbuf: write-buffer
+ * @buf: node to write
+ * @len: node length
+ *
+ * This function writes data to flash via write-buffer @wbuf. This means that
+ * the last piece of the node won't reach the flash media immediately if it
+ * does not take whole minimal I/O unit. Instead, the node will sit in RAM
+ * until the write-buffer is synchronized (e.g., by timer).
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If the node cannot be written because there is no more
+ * space in this logical eraseblock, %-ENOSPC is returned.
+ */
+int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
+{
+       struct ubifs_info *c = wbuf->c;
+       int err, written, n, aligned_len = ALIGN(len, 8), offs;
+
+       dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len,
+              dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum,
+              wbuf->offs + wbuf->used);
+       ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
+       ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
+       ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
+       ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
+       ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
+
+       if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
+               err = -ENOSPC;
+               goto out;
+       }
+
+       cancel_wbuf_timer_nolock(wbuf);
+
+       if (c->ro_media)
+               return -EROFS;
+
+       if (aligned_len <= wbuf->avail) {
+               /*
+                * The node is not very large and fits entirely within
+                * write-buffer.
+                */
+               memcpy(wbuf->buf + wbuf->used, buf, len);
+
+               if (aligned_len == wbuf->avail) {
+                       dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum,
+                               wbuf->offs);
+                       err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
+                                           wbuf->offs, c->min_io_size,
+                                           wbuf->dtype);
+                       if (err)
+                               goto out;
+
+                       spin_lock(&wbuf->lock);
+                       wbuf->offs += c->min_io_size;
+                       wbuf->avail = c->min_io_size;
+                       wbuf->used = 0;
+                       wbuf->next_ino = 0;
+                       spin_unlock(&wbuf->lock);
+               } else {
+                       spin_lock(&wbuf->lock);
+                       wbuf->avail -= aligned_len;
+                       wbuf->used += aligned_len;
+                       spin_unlock(&wbuf->lock);
+               }
+
+               goto exit;
+       }
+
+       /*
+        * The node is large enough and does not fit entirely within current
+        * minimal I/O unit. We have to fill and flush write-buffer and switch
+        * to the next min. I/O unit.
+        */
+       dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs);
+       memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
+       err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+                           c->min_io_size, wbuf->dtype);
+       if (err)
+               goto out;
+
+       offs = wbuf->offs + c->min_io_size;
+       len -= wbuf->avail;
+       aligned_len -= wbuf->avail;
+       written = wbuf->avail;
+
+       /*
+        * The remaining data may take more whole min. I/O units, so write the
+        * remains multiple to min. I/O unit size directly to the flash media.
+        * We align node length to 8-byte boundary because we anyway flash wbuf
+        * if the remaining space is less than 8 bytes.
+        */
+       n = aligned_len >> c->min_io_shift;
+       if (n) {
+               n <<= c->min_io_shift;
+               dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
+               err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
+                                   wbuf->dtype);
+               if (err)
+                       goto out;
+               offs += n;
+               aligned_len -= n;
+               len -= n;
+               written += n;
+       }
+
+       spin_lock(&wbuf->lock);
+       if (aligned_len)
+               /*
+                * And now we have what's left and what does not take whole
+                * min. I/O unit, so write it to the write-buffer and we are
+                * done.
+                */
+               memcpy(wbuf->buf, buf + written, len);
+
+       wbuf->offs = offs;
+       wbuf->used = aligned_len;
+       wbuf->avail = c->min_io_size - aligned_len;
+       wbuf->next_ino = 0;
+       spin_unlock(&wbuf->lock);
+
+exit:
+       if (wbuf->sync_callback) {
+               int free = c->leb_size - wbuf->offs - wbuf->used;
+
+               err = wbuf->sync_callback(c, wbuf->lnum, free, 0);
+               if (err)
+                       goto out;
+       }
+
+       if (wbuf->used)
+               new_wbuf_timer_nolock(wbuf);
+
+       return 0;
+
+out:
+       ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
+                 len, wbuf->lnum, wbuf->offs, err);
+       dbg_dump_node(c, buf);
+       dbg_dump_stack();
+       dbg_dump_leb(c, wbuf->lnum);
+       return err;
+}
+
+/**
+ * ubifs_write_node - write node to the media.
+ * @c: UBIFS file-system description object
+ * @buf: the node to write
+ * @len: node length
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
+ *
+ * This function automatically fills node magic number, assigns sequence
+ * number, and calculates node CRC checksum. The length of the @buf buffer has
+ * to be aligned to the minimal I/O unit size. This function automatically
+ * appends padding node and padding bytes if needed. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
+                    int offs, int dtype)
+{
+       int err, buf_len = ALIGN(len, c->min_io_size);
+
+       dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
+              lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len,
+              buf_len);
+       ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+       ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
+
+       if (c->ro_media)
+               return -EROFS;
+
+       ubifs_prepare_node(c, buf, len, 1);
+       err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
+       if (err) {
+               ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
+                         buf_len, lnum, offs, err);
+               dbg_dump_node(c, buf);
+               dbg_dump_stack();
+       }
+
+       return err;
+}
+
+/**
+ * ubifs_read_node_wbuf - read node from the media or write-buffer.
+ * @wbuf: wbuf to check for un-written data
+ * @buf: buffer to read to
+ * @type: node type
+ * @len: node length
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ *
+ * This function reads a node of known type and length, checks it and stores
+ * in @buf. If the node partially or fully sits in the write-buffer, this
+ * function takes data from the buffer, otherwise it reads the flash media.
+ * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative
+ * error code in case of failure.
+ */
+int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
+                        int lnum, int offs)
+{
+       const struct ubifs_info *c = wbuf->c;
+       int err, rlen, overlap;
+       struct ubifs_ch *ch = buf;
+
+       dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+       ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+       ubifs_assert(!(offs & 7) && offs < c->leb_size);
+       ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
+
+       spin_lock(&wbuf->lock);
+       overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
+       if (!overlap) {
+               /* We may safely unlock the write-buffer and read the data */
+               spin_unlock(&wbuf->lock);
+               return ubifs_read_node(c, buf, type, len, lnum, offs);
+       }
+
+       /* Don't read under wbuf */
+       rlen = wbuf->offs - offs;
+       if (rlen < 0)
+               rlen = 0;
+
+       /* Copy the rest from the write-buffer */
+       memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
+       spin_unlock(&wbuf->lock);
+
+       if (rlen > 0) {
+               /* Read everything that goes before write-buffer */
+               err = ubi_read(c->ubi, lnum, buf, offs, rlen);
+               if (err && err != -EBADMSG) {
+                       ubifs_err("failed to read node %d from LEB %d:%d, "
+                                 "error %d", type, lnum, offs, err);
+                       dbg_dump_stack();
+                       return err;
+               }
+       }
+
+       if (type != ch->node_type) {
+               ubifs_err("bad node type (%d but expected %d)",
+                         ch->node_type, type);
+               goto out;
+       }
+
+       err = ubifs_check_node(c, buf, lnum, offs, 0);
+       if (err) {
+               ubifs_err("expected node type %d", type);
+               return err;
+       }
+
+       rlen = le32_to_cpu(ch->len);
+       if (rlen != len) {
+               ubifs_err("bad node length %d, expected %d", rlen, len);
+               goto out;
+       }
+
+       return 0;
+
+out:
+       ubifs_err("bad node at LEB %d:%d", lnum, offs);
+       dbg_dump_node(c, buf);
+       dbg_dump_stack();
+       return -EINVAL;
+}
+
+/**
+ * ubifs_read_node - read node.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to read to
+ * @type: node type
+ * @len: node length (not aligned)
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ *
+ * This function reads a node of known type and and length, checks it and
+ * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
+ * and a negative error code in case of failure.
+ */
+int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
+                   int lnum, int offs)
+{
+       int err, l;
+       struct ubifs_ch *ch = buf;
+
+       dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+       ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+       ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
+       ubifs_assert(!(offs & 7) && offs < c->leb_size);
+       ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
+
+       err = ubi_read(c->ubi, lnum, buf, offs, len);
+       if (err && err != -EBADMSG) {
+               ubifs_err("cannot read node %d from LEB %d:%d, error %d",
+                         type, lnum, offs, err);
+               return err;
+       }
+
+       if (type != ch->node_type) {
+               ubifs_err("bad node type (%d but expected %d)",
+                         ch->node_type, type);
+               goto out;
+       }
+
+       err = ubifs_check_node(c, buf, lnum, offs, 0);
+       if (err) {
+               ubifs_err("expected node type %d", type);
+               return err;
+       }
+
+       l = le32_to_cpu(ch->len);
+       if (l != len) {
+               ubifs_err("bad node length %d, expected %d", l, len);
+               goto out;
+       }
+
+       return 0;
+
+out:
+       ubifs_err("bad node at LEB %d:%d", lnum, offs);
+       dbg_dump_node(c, buf);
+       dbg_dump_stack();
+       return -EINVAL;
+}
+
+/**
+ * ubifs_wbuf_init - initialize write-buffer.
+ * @c: UBIFS file-system description object
+ * @wbuf: write-buffer to initialize
+ *
+ * This function initializes write buffer. Returns zero in case of success
+ * %-ENOMEM in case of failure.
+ */
+int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
+{
+       size_t size;
+
+       wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
+       if (!wbuf->buf)
+               return -ENOMEM;
+
+       size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
+       wbuf->inodes = kmalloc(size, GFP_KERNEL);
+       if (!wbuf->inodes) {
+               kfree(wbuf->buf);
+               wbuf->buf = NULL;
+               return -ENOMEM;
+       }
+
+       wbuf->used = 0;
+       wbuf->lnum = wbuf->offs = -1;
+       wbuf->avail = c->min_io_size;
+       wbuf->dtype = UBI_UNKNOWN;
+       wbuf->sync_callback = NULL;
+       mutex_init(&wbuf->io_mutex);
+       spin_lock_init(&wbuf->lock);
+
+       wbuf->c = c;
+       init_timer(&wbuf->timer);
+       wbuf->timer.function = wbuf_timer_callback_nolock;
+       wbuf->timer.data = (unsigned long)wbuf;
+       wbuf->timeout = DEFAULT_WBUF_TIMEOUT;
+       wbuf->next_ino = 0;
+
+       return 0;
+}
+
+/**
+ * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
+ * @wbuf: the write-buffer whereto add
+ * @inum: the inode number
+ *
+ * This function adds an inode number to the inode array of the write-buffer.
+ */
+void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum)
+{
+       if (!wbuf->buf)
+               /* NOR flash or something similar */
+               return;
+
+       spin_lock(&wbuf->lock);
+       if (wbuf->used)
+               wbuf->inodes[wbuf->next_ino++] = inum;
+       spin_unlock(&wbuf->lock);
+}
+
+/**
+ * wbuf_has_ino - returns if the wbuf contains data from the inode.
+ * @wbuf: the write-buffer
+ * @inum: the inode number
+ *
+ * This function returns with %1 if the write-buffer contains some data from the
+ * given inode otherwise it returns with %0.
+ */
+static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum)
+{
+       int i, ret = 0;
+
+       spin_lock(&wbuf->lock);
+       for (i = 0; i < wbuf->next_ino; i++)
+               if (inum == wbuf->inodes[i]) {
+                       ret = 1;
+                       break;
+               }
+       spin_unlock(&wbuf->lock);
+
+       return ret;
+}
+
+/**
+ * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to synchronize
+ *
+ * This function synchronizes write-buffers which contain nodes belonging to
+ * @inode. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode)
+{
+       int i, err = 0;
+
+       for (i = 0; i < c->jhead_cnt; i++) {
+               struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
+
+               if (i == GCHD)
+                       /*
+                        * GC head is special, do not look at it. Even if the
+                        * head contains something related to this inode, it is
+                        * a _copy_ of corresponding on-flash node which sits
+                        * somewhere else.
+                        */
+                       continue;
+
+               if (!wbuf_has_ino(wbuf, inode->i_ino))
+                       continue;
+
+               mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+               if (wbuf_has_ino(wbuf, inode->i_ino))
+                       err = ubifs_wbuf_sync_nolock(wbuf);
+               mutex_unlock(&wbuf->io_mutex);
+
+               if (err) {
+                       ubifs_ro_mode(c, err);
+                       return err;
+               }
+       }
+       return 0;
+}
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c

new file mode 100644 (file)

index 0000000..5e82cff
--- /dev/null
+++ b/fs/ubifs/ioctl.c
@@ -0,0 +1,204 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ * Copyright (C) 2006, 2007 University of Szeged, Hungary
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Zoltan Sogor
+ *          Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/* This file implements EXT2-compatible extended attribute ioctl() calls */
+
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
+#include <linux/mount.h>
+#include "ubifs.h"
+
+/**
+ * ubifs_set_inode_flags - set VFS inode flags.
+ * @inode: VFS inode to set flags for
+ *
+ * This function propagates flags from UBIFS inode object to VFS inode object.
+ */
+void ubifs_set_inode_flags(struct inode *inode)
+{
+       unsigned int flags = ubifs_inode(inode)->flags;
+
+       inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC);
+       if (flags & UBIFS_SYNC_FL)
+               inode->i_flags |= S_SYNC;
+       if (flags & UBIFS_APPEND_FL)
+               inode->i_flags |= S_APPEND;
+       if (flags & UBIFS_IMMUTABLE_FL)
+               inode->i_flags |= S_IMMUTABLE;
+       if (flags & UBIFS_DIRSYNC_FL)
+               inode->i_flags |= S_DIRSYNC;
+}
+
+/*
+ * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags.
+ * @ioctl_flags: flags to convert
+ *
+ * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
+ * (@UBIFS_COMPR_FL, etc).
+ */
+static int ioctl2ubifs(int ioctl_flags)
+{
+       int ubifs_flags = 0;
+
+       if (ioctl_flags & FS_COMPR_FL)
+               ubifs_flags |= UBIFS_COMPR_FL;
+       if (ioctl_flags & FS_SYNC_FL)
+               ubifs_flags |= UBIFS_SYNC_FL;
+       if (ioctl_flags & FS_APPEND_FL)
+               ubifs_flags |= UBIFS_APPEND_FL;
+       if (ioctl_flags & FS_IMMUTABLE_FL)
+               ubifs_flags |= UBIFS_IMMUTABLE_FL;
+       if (ioctl_flags & FS_DIRSYNC_FL)
+               ubifs_flags |= UBIFS_DIRSYNC_FL;
+
+       return ubifs_flags;
+}
+
+/*
+ * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags.
+ * @ubifs_flags: flags to convert
+ *
+ * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags
+ * (@FS_COMPR_FL, etc).
+ */
+static int ubifs2ioctl(int ubifs_flags)
+{
+       int ioctl_flags = 0;
+
+       if (ubifs_flags & UBIFS_COMPR_FL)
+               ioctl_flags |= FS_COMPR_FL;
+       if (ubifs_flags & UBIFS_SYNC_FL)
+               ioctl_flags |= FS_SYNC_FL;
+       if (ubifs_flags & UBIFS_APPEND_FL)
+               ioctl_flags |= FS_APPEND_FL;
+       if (ubifs_flags & UBIFS_IMMUTABLE_FL)
+               ioctl_flags |= FS_IMMUTABLE_FL;
+       if (ubifs_flags & UBIFS_DIRSYNC_FL)
+               ioctl_flags |= FS_DIRSYNC_FL;
+
+       return ioctl_flags;
+}
+
+static int setflags(struct inode *inode, int flags)
+{
+       int oldflags, err, release;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       struct ubifs_budget_req req = { .dirtied_ino = 1,
+                                       .dirtied_ino_d = ui->data_len };
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       /*
+        * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+        * the relevant capability.
+        */
+       mutex_lock(&ui->ui_mutex);
+       oldflags = ubifs2ioctl(ui->flags);
+       if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+               if (!capable(CAP_LINUX_IMMUTABLE)) {
+                       err = -EPERM;
+                       goto out_unlock;
+               }
+       }
+
+       ui->flags = ioctl2ubifs(flags);
+       ubifs_set_inode_flags(inode);
+       inode->i_ctime = ubifs_current_time(inode);
+       release = ui->dirty;
+       mark_inode_dirty_sync(inode);
+       mutex_unlock(&ui->ui_mutex);
+
+       if (release)
+               ubifs_release_budget(c, &req);
+       if (IS_SYNC(inode))
+               err = write_inode_now(inode, 1);
+       return err;
+
+out_unlock:
+       ubifs_err("can't modify inode %lu attributes", inode->i_ino);
+       mutex_unlock(&ui->ui_mutex);
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       int flags, err;
+       struct inode *inode = file->f_path.dentry->d_inode;
+
+       switch (cmd) {
+       case FS_IOC_GETFLAGS:
+               flags = ubifs2ioctl(ubifs_inode(inode)->flags);
+
+               return put_user(flags, (int __user *) arg);
+
+       case FS_IOC_SETFLAGS: {
+               if (IS_RDONLY(inode))
+                       return -EROFS;
+
+               if (!is_owner_or_cap(inode))
+                       return -EACCES;
+
+               if (get_user(flags, (int __user *) arg))
+                       return -EFAULT;
+
+               if (!S_ISDIR(inode->i_mode))
+                       flags &= ~FS_DIRSYNC_FL;
+
+               /*
+                * Make sure the file-system is read-write and make sure it
+                * will not become read-only while we are changing the flags.
+                */
+               err = mnt_want_write(file->f_path.mnt);
+               if (err)
+                       return err;
+               err = setflags(inode, flags);
+               mnt_drop_write(file->f_path.mnt);
+               return err;
+       }
+
+       default:
+               return -ENOTTY;
+       }
+}
+
+#ifdef CONFIG_COMPAT
+long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case FS_IOC32_GETFLAGS:
+               cmd = FS_IOC_GETFLAGS;
+               break;
+       case FS_IOC32_SETFLAGS:
+               cmd = FS_IOC_SETFLAGS;
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c

new file mode 100644 (file)

index 0000000..283155a
--- /dev/null
+++ b/fs/ubifs/journal.c
@@ -0,0 +1,1387 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements UBIFS journal.
+ *
+ * The journal consists of 2 parts - the log and bud LEBs. The log has fixed
+ * length and position, while a bud logical eraseblock is any LEB in the main
+ * area. Buds contain file system data - data nodes, inode nodes, etc. The log
+ * contains only references to buds and some other stuff like commit
+ * start node. The idea is that when we commit the journal, we do
+ * not copy the data, the buds just become indexed. Since after the commit the
+ * nodes in bud eraseblocks become leaf nodes of the file system index tree, we
+ * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will
+ * become leafs in the future.
+ *
+ * The journal is multi-headed because we want to write data to the journal as
+ * optimally as possible. It is nice to have nodes belonging to the same inode
+ * in one LEB, so we may write data owned by different inodes to different
+ * journal heads, although at present only one data head is used.
+ *
+ * For recovery reasons, the base head contains all inode nodes, all directory
+ * entry nodes and all truncate nodes. This means that the other heads contain
+ * only data nodes.
+ *
+ * Bud LEBs may be half-indexed. For example, if the bud was not full at the
+ * time of commit, the bud is retained to continue to be used in the journal,
+ * even though the "front" of the LEB is now indexed. In that case, the log
+ * reference contains the offset where the bud starts for the purposes of the
+ * journal.
+ *
+ * The journal size has to be limited, because the larger is the journal, the
+ * longer it takes to mount UBIFS (scanning the journal) and the more memory it
+ * takes (indexing in the TNC).
+ *
+ * All the journal write operations like 'ubifs_jnl_update()' here, which write
+ * multiple UBIFS nodes to the journal at one go, are atomic with respect to
+ * unclean reboots. Should the unclean reboot happen, the recovery code drops
+ * all the nodes.
+ */
+
+#include "ubifs.h"
+
+/**
+ * zero_ino_node_unused - zero out unused fields of an on-flash inode node.
+ * @ino: the inode to zero out
+ */
+static inline void zero_ino_node_unused(struct ubifs_ino_node *ino)
+{
+       memset(ino->padding1, 0, 4);
+       memset(ino->padding2, 0, 26);
+}
+
+/**
+ * zero_dent_node_unused - zero out unused fields of an on-flash directory
+ *                         entry node.
+ * @dent: the directory entry to zero out
+ */
+static inline void zero_dent_node_unused(struct ubifs_dent_node *dent)
+{
+       dent->padding1 = 0;
+       memset(dent->padding2, 0, 4);
+}
+
+/**
+ * zero_data_node_unused - zero out unused fields of an on-flash data node.
+ * @data: the data node to zero out
+ */
+static inline void zero_data_node_unused(struct ubifs_data_node *data)
+{
+       memset(data->padding, 0, 2);
+}
+
+/**
+ * zero_trun_node_unused - zero out unused fields of an on-flash truncation
+ *                         node.
+ * @trun: the truncation node to zero out
+ */
+static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
+{
+       memset(trun->padding, 0, 12);
+}
+
+/**
+ * reserve_space - reserve space in the journal.
+ * @c: UBIFS file-system description object
+ * @jhead: journal head number
+ * @len: node length
+ *
+ * This function reserves space in journal head @head. If the reservation
+ * succeeded, the journal head stays locked and later has to be unlocked using
+ * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock
+ * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and
+ * other negative error codes in case of other failures.
+ */
+static int reserve_space(struct ubifs_info *c, int jhead, int len)
+{
+       int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
+       struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
+
+       /*
+        * Typically, the base head has smaller nodes written to it, so it is
+        * better to try to allocate space at the ends of eraseblocks. This is
+        * what the squeeze parameter does.
+        */
+       squeeze = (jhead == BASEHD);
+again:
+       mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+
+       if (c->ro_media) {
+               err = -EROFS;
+               goto out_unlock;
+       }
+
+       avail = c->leb_size - wbuf->offs - wbuf->used;
+       if (wbuf->lnum != -1 && avail >= len)
+               return 0;
+
+       /*
+        * Write buffer wasn't seek'ed or there is no enough space - look for an
+        * LEB with some empty space.
+        */
+       lnum = ubifs_find_free_space(c, len, &free, squeeze);
+       if (lnum >= 0) {
+               /* Found an LEB, add it to the journal head */
+               offs = c->leb_size - free;
+               err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
+               if (err)
+                       goto out_return;
+               /* A new bud was successfully allocated and added to the log */
+               goto out;
+       }
+
+       err = lnum;
+       if (err != -ENOSPC)
+               goto out_unlock;
+
+       /*
+        * No free space, we have to run garbage collector to make
+        * some. But the write-buffer mutex has to be unlocked because
+        * GC also takes it.
+        */
+       dbg_jnl("no free space  jhead %d, run GC", jhead);
+       mutex_unlock(&wbuf->io_mutex);
+
+       lnum = ubifs_garbage_collect(c, 0);
+       if (lnum < 0) {
+               err = lnum;
+               if (err != -ENOSPC)
+                       return err;
+
+               /*
+                * GC could not make a free LEB. But someone else may
+                * have allocated new bud for this journal head,
+                * because we dropped @wbuf->io_mutex, so try once
+                * again.
+                */
+               dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
+               if (retries++ < 2) {
+                       dbg_jnl("retry (%d)", retries);
+                       goto again;
+               }
+
+               dbg_jnl("return -ENOSPC");
+               return err;
+       }
+
+       mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+       dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
+       avail = c->leb_size - wbuf->offs - wbuf->used;
+
+       if (wbuf->lnum != -1 && avail >= len) {
+               /*
+                * Someone else has switched the journal head and we have
+                * enough space now. This happens when more then one process is
+                * trying to write to the same journal head at the same time.
+                */
+               dbg_jnl("return LEB %d back, already have LEB %d:%d",
+                       lnum, wbuf->lnum, wbuf->offs + wbuf->used);
+               err = ubifs_return_leb(c, lnum);
+               if (err)
+                       goto out_unlock;
+               return 0;
+       }
+
+       err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
+       if (err)
+               goto out_return;
+       offs = 0;
+
+out:
+       err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM);
+       if (err)
+               goto out_unlock;
+
+       return 0;
+
+out_unlock:
+       mutex_unlock(&wbuf->io_mutex);
+       return err;
+
+out_return:
+       /* An error occurred and the LEB has to be returned to lprops */
+       ubifs_assert(err < 0);
+       err1 = ubifs_return_leb(c, lnum);
+       if (err1 && err == -EAGAIN)
+               /*
+                * Return original error code only if it is not %-EAGAIN,
+                * which is not really an error. Otherwise, return the error
+                * code of 'ubifs_return_leb()'.
+                */
+               err = err1;
+       mutex_unlock(&wbuf->io_mutex);
+       return err;
+}
+
+/**
+ * write_node - write node to a journal head.
+ * @c: UBIFS file-system description object
+ * @jhead: journal head
+ * @node: node to write
+ * @len: node length
+ * @lnum: LEB number written is returned here
+ * @offs: offset written is returned here
+ *
+ * This function writes a node to reserved space of journal head @jhead.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
+                     int *lnum, int *offs)
+{
+       struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
+
+       ubifs_assert(jhead != GCHD);
+
+       *lnum = c->jheads[jhead].wbuf.lnum;
+       *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
+
+       dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+       ubifs_prepare_node(c, node, len, 0);
+
+       return ubifs_wbuf_write_nolock(wbuf, node, len);
+}
+
+/**
+ * write_head - write data to a journal head.
+ * @c: UBIFS file-system description object
+ * @jhead: journal head
+ * @buf: buffer to write
+ * @len: length to write
+ * @lnum: LEB number written is returned here
+ * @offs: offset written is returned here
+ * @sync: non-zero if the write-buffer has to by synchronized
+ *
+ * This function is the same as 'write_node()' but it does not assume the
+ * buffer it is writing is a node, so it does not prepare it (which means
+ * initializing common header and calculating CRC).
+ */
+static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
+                     int *lnum, int *offs, int sync)
+{
+       int err;
+       struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
+
+       ubifs_assert(jhead != GCHD);
+
+       *lnum = c->jheads[jhead].wbuf.lnum;
+       *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
+       dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+
+       err = ubifs_wbuf_write_nolock(wbuf, buf, len);
+       if (err)
+               return err;
+       if (sync)
+               err = ubifs_wbuf_sync_nolock(wbuf);
+       return err;
+}
+
+/**
+ * make_reservation - reserve journal space.
+ * @c: UBIFS file-system description object
+ * @jhead: journal head
+ * @len: how many bytes to reserve
+ *
+ * This function makes space reservation in journal head @jhead. The function
+ * takes the commit lock and locks the journal head, and the caller has to
+ * unlock the head and finish the reservation with 'finish_reservation()'.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ *
+ * Note, the journal head may be unlocked as soon as the data is written, while
+ * the commit lock has to be released after the data has been added to the
+ * TNC.
+ */
+static int make_reservation(struct ubifs_info *c, int jhead, int len)
+{
+       int err, cmt_retries = 0, nospc_retries = 0;
+
+again:
+       down_read(&c->commit_sem);
+       err = reserve_space(c, jhead, len);
+       if (!err)
+               return 0;
+       up_read(&c->commit_sem);
+
+       if (err == -ENOSPC) {
+               /*
+                * GC could not make any progress. We should try to commit
+                * once because it could make some dirty space and GC would
+                * make progress, so make the error -EAGAIN so that the below
+                * will commit and re-try.
+                */
+               if (nospc_retries++ < 2) {
+                       dbg_jnl("no space, retry");
+                       err = -EAGAIN;
+               }
+
+               /*
+                * This means that the budgeting is incorrect. We always have
+                * to be able to write to the media, because all operations are
+                * budgeted. Deletions are not budgeted, though, but we reserve
+                * an extra LEB for them.
+                */
+       }
+
+       if (err != -EAGAIN)
+               goto out;
+
+       /*
+        * -EAGAIN means that the journal is full or too large, or the above
+        * code wants to do one commit. Do this and re-try.
+        */
+       if (cmt_retries > 128) {
+               /*
+                * This should not happen unless the journal size limitations
+                * are too tough.
+                */
+               ubifs_err("stuck in space allocation");
+               err = -ENOSPC;
+               goto out;
+       } else if (cmt_retries > 32)
+               ubifs_warn("too many space allocation re-tries (%d)",
+                          cmt_retries);
+
+       dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
+               cmt_retries);
+       cmt_retries += 1;
+
+       err = ubifs_run_commit(c);
+       if (err)
+               return err;
+       goto again;
+
+out:
+       ubifs_err("cannot reserve %d bytes in jhead %d, error %d",
+                 len, jhead, err);
+       if (err == -ENOSPC) {
+               /* This are some budgeting problems, print useful information */
+               down_write(&c->commit_sem);
+               spin_lock(&c->space_lock);
+               dbg_dump_stack();
+               dbg_dump_budg(c);
+               spin_unlock(&c->space_lock);
+               dbg_dump_lprops(c);
+               cmt_retries = dbg_check_lprops(c);
+               up_write(&c->commit_sem);
+       }
+       return err;
+}
+
+/**
+ * release_head - release a journal head.
+ * @c: UBIFS file-system description object
+ * @jhead: journal head
+ *
+ * This function releases journal head @jhead which was locked by
+ * the 'make_reservation()' function. It has to be called after each successful
+ * 'make_reservation()' invocation.
+ */
+static inline void release_head(struct ubifs_info *c, int jhead)
+{
+       mutex_unlock(&c->jheads[jhead].wbuf.io_mutex);
+}
+
+/**
+ * finish_reservation - finish a reservation.
+ * @c: UBIFS file-system description object
+ *
+ * This function finishes journal space reservation. It must be called after
+ * 'make_reservation()'.
+ */
+static void finish_reservation(struct ubifs_info *c)
+{
+       up_read(&c->commit_sem);
+}
+
+/**
+ * get_dent_type - translate VFS inode mode to UBIFS directory entry type.
+ * @mode: inode mode
+ */
+static int get_dent_type(int mode)
+{
+       switch (mode & S_IFMT) {
+       case S_IFREG:
+               return UBIFS_ITYPE_REG;
+       case S_IFDIR:
+               return UBIFS_ITYPE_DIR;
+       case S_IFLNK:
+               return UBIFS_ITYPE_LNK;
+       case S_IFBLK:
+               return UBIFS_ITYPE_BLK;
+       case S_IFCHR:
+               return UBIFS_ITYPE_CHR;
+       case S_IFIFO:
+               return UBIFS_ITYPE_FIFO;
+       case S_IFSOCK:
+               return UBIFS_ITYPE_SOCK;
+       default:
+               BUG();
+       }
+       return 0;
+}
+
+/**
+ * pack_inode - pack an inode node.
+ * @c: UBIFS file-system description object
+ * @ino: buffer in which to pack inode node
+ * @inode: inode to pack
+ * @last: indicates the last node of the group
+ * @last_reference: non-zero if this is a deletion inode
+ */
+static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
+                      const struct inode *inode, int last,
+                      int last_reference)
+{
+       int data_len = 0;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       ino->ch.node_type = UBIFS_INO_NODE;
+       ino_key_init_flash(c, &ino->key, inode->i_ino);
+       ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
+       ino->atime_sec  = cpu_to_le64(inode->i_atime.tv_sec);
+       ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+       ino->ctime_sec  = cpu_to_le64(inode->i_ctime.tv_sec);
+       ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       ino->mtime_sec  = cpu_to_le64(inode->i_mtime.tv_sec);
+       ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+       ino->uid   = cpu_to_le32(inode->i_uid);
+       ino->gid   = cpu_to_le32(inode->i_gid);
+       ino->mode  = cpu_to_le32(inode->i_mode);
+       ino->flags = cpu_to_le32(ui->flags);
+       ino->size  = cpu_to_le64(ui->ui_size);
+       ino->nlink = cpu_to_le32(inode->i_nlink);
+       ino->compr_type  = cpu_to_le16(ui->compr_type);
+       ino->data_len    = cpu_to_le32(ui->data_len);
+       ino->xattr_cnt   = cpu_to_le32(ui->xattr_cnt);
+       ino->xattr_size  = cpu_to_le32(ui->xattr_size);
+       ino->xattr_names = cpu_to_le32(ui->xattr_names);
+       zero_ino_node_unused(ino);
+
+       /*
+        * Drop the attached data if this is a deletion inode, the data is not
+        * needed anymore.
+        */
+       if (!last_reference) {
+               memcpy(ino->data, ui->data, ui->data_len);
+               data_len = ui->data_len;
+       }
+
+       ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last);
+}
+
+/**
+ * mark_inode_clean - mark UBIFS inode as clean.
+ * @c: UBIFS file-system description object
+ * @ui: UBIFS inode to mark as clean
+ *
+ * This helper function marks UBIFS inode @ui as clean by cleaning the
+ * @ui->dirty flag and releasing its budget. Note, VFS may still treat the
+ * inode as dirty and try to write it back, but 'ubifs_write_inode()' would
+ * just do nothing.
+ */
+static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui)
+{
+       if (ui->dirty)
+               ubifs_release_dirty_inode_budget(c, ui);
+       ui->dirty = 0;
+}
+
+/**
+ * ubifs_jnl_update - update inode.
+ * @c: UBIFS file-system description object
+ * @dir: parent inode or host inode in case of extended attributes
+ * @nm: directory entry name
+ * @inode: inode to update
+ * @deletion: indicates a directory entry deletion i.e unlink or rmdir
+ * @xent: non-zero if the directory entry is an extended attribute entry
+ *
+ * This function updates an inode by writing a directory entry (or extended
+ * attribute entry), the inode itself, and the parent directory inode (or the
+ * host inode) to the journal.
+ *
+ * The function writes the host inode @dir last, which is important in case of
+ * extended attributes. Indeed, then we guarantee that if the host inode gets
+ * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed,
+ * the extended attribute inode gets flushed too. And this is exactly what the
+ * user expects - synchronizing the host inode synchronizes its extended
+ * attributes. Similarly, this guarantees that if @dir is synchronized, its
+ * directory entry corresponding to @nm gets synchronized too.
+ *
+ * If the inode (@inode) or the parent directory (@dir) are synchronous, this
+ * function synchronizes the write-buffer.
+ *
+ * This function marks the @dir and @inode inodes as clean and returns zero on
+ * success. In case of failure, a negative error code is returned.
+ */
+int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
+                    const struct qstr *nm, const struct inode *inode,
+                    int deletion, int xent)
+{
+       int err, dlen, ilen, len, lnum, ino_offs, dent_offs;
+       int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir);
+       int last_reference = !!(deletion && inode->i_nlink == 0);
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       struct ubifs_dent_node *dent;
+       struct ubifs_ino_node *ino;
+       union ubifs_key dent_key, ino_key;
+
+       dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu",
+               inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino);
+       ubifs_assert(dir_ui->data_len == 0);
+       ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex));
+
+       dlen = UBIFS_DENT_NODE_SZ + nm->len + 1;
+       ilen = UBIFS_INO_NODE_SZ;
+
+       /*
+        * If the last reference to the inode is being deleted, then there is
+        * no need to attach and write inode data, it is being deleted anyway.
+        * And if the inode is being deleted, no need to synchronize
+        * write-buffer even if the inode is synchronous.
+        */
+       if (!last_reference) {
+               ilen += ui->data_len;
+               sync |= IS_SYNC(inode);
+       }
+
+       aligned_dlen = ALIGN(dlen, 8);
+       aligned_ilen = ALIGN(ilen, 8);
+       len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ;
+       dent = kmalloc(len, GFP_NOFS);
+       if (!dent)
+               return -ENOMEM;
+
+       /* Make reservation before allocating sequence numbers */
+       err = make_reservation(c, BASEHD, len);
+       if (err)
+               goto out_free;
+
+       if (!xent) {
+               dent->ch.node_type = UBIFS_DENT_NODE;
+               dent_key_init(c, &dent_key, dir->i_ino, nm);
+       } else {
+               dent->ch.node_type = UBIFS_XENT_NODE;
+               xent_key_init(c, &dent_key, dir->i_ino, nm);
+       }
+
+       key_write(c, &dent_key, dent->key);
+       dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino);
+       dent->type = get_dent_type(inode->i_mode);
+       dent->nlen = cpu_to_le16(nm->len);
+       memcpy(dent->name, nm->name, nm->len);
+       dent->name[nm->len] = '\0';
+       zero_dent_node_unused(dent);
+       ubifs_prep_grp_node(c, dent, dlen, 0);
+
+       ino = (void *)dent + aligned_dlen;
+       pack_inode(c, ino, inode, 0, last_reference);
+       ino = (void *)ino + aligned_ilen;
+       pack_inode(c, ino, dir, 1, 0);
+
+       if (last_reference) {
+               err = ubifs_add_orphan(c, inode->i_ino);
+               if (err) {
+                       release_head(c, BASEHD);
+                       goto out_finish;
+               }
+       }
+
+       err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
+       if (err)
+               goto out_release;
+       if (!sync) {
+               struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
+
+               ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino);
+               ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino);
+       }
+       release_head(c, BASEHD);
+       kfree(dent);
+
+       if (deletion) {
+               err = ubifs_tnc_remove_nm(c, &dent_key, nm);
+               if (err)
+                       goto out_ro;
+               err = ubifs_add_dirt(c, lnum, dlen);
+       } else
+               err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm);
+       if (err)
+               goto out_ro;
+
+       /*
+        * Note, we do not remove the inode from TNC even if the last reference
+        * to it has just been deleted, because the inode may still be opened.
+        * Instead, the inode has been added to orphan lists and the orphan
+        * subsystem will take further care about it.
+        */
+       ino_key_init(c, &ino_key, inode->i_ino);
+       ino_offs = dent_offs + aligned_dlen;
+       err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen);
+       if (err)
+               goto out_ro;
+
+       ino_key_init(c, &ino_key, dir->i_ino);
+       ino_offs += aligned_ilen;
+       err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ);
+       if (err)
+               goto out_ro;
+
+       finish_reservation(c);
+       spin_lock(&ui->ui_lock);
+       ui->synced_i_size = ui->ui_size;
+       spin_unlock(&ui->ui_lock);
+       mark_inode_clean(c, ui);
+       mark_inode_clean(c, dir_ui);
+       return 0;
+
+out_finish:
+       finish_reservation(c);
+out_free:
+       kfree(dent);
+       return err;
+
+out_release:
+       release_head(c, BASEHD);
+out_ro:
+       ubifs_ro_mode(c, err);
+       if (last_reference)
+               ubifs_delete_orphan(c, inode->i_ino);
+       finish_reservation(c);
+       return err;
+}
+
+/**
+ * ubifs_jnl_write_data - write a data node to the journal.
+ * @c: UBIFS file-system description object
+ * @inode: inode the data node belongs to
+ * @key: node key
+ * @buf: buffer to write
+ * @len: data length (must not exceed %UBIFS_BLOCK_SIZE)
+ *
+ * This function writes a data node to the journal. Returns %0 if the data node
+ * was successfully written, and a negative error code in case of failure.
+ */
+int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
+                        const union ubifs_key *key, const void *buf, int len)
+{
+       struct ubifs_data_node *data;
+       int err, lnum, offs, compr_type, out_len;
+       int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key),
+               key_block(c, key), len, DBGKEY(key));
+       ubifs_assert(len <= UBIFS_BLOCK_SIZE);
+
+       data = kmalloc(dlen, GFP_NOFS);
+       if (!data)
+               return -ENOMEM;
+
+       data->ch.node_type = UBIFS_DATA_NODE;
+       key_write(c, key, &data->key);
+       data->size = cpu_to_le32(len);
+       zero_data_node_unused(data);
+
+       if (!(ui->flags && UBIFS_COMPR_FL))
+               /* Compression is disabled for this inode */
+               compr_type = UBIFS_COMPR_NONE;
+       else
+               compr_type = ui->compr_type;
+
+       out_len = dlen - UBIFS_DATA_NODE_SZ;
+       ubifs_compress(buf, len, &data->data, &out_len, &compr_type);
+       ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
+
+       dlen = UBIFS_DATA_NODE_SZ + out_len;
+       data->compr_type = cpu_to_le16(compr_type);
+
+       /* Make reservation before allocating sequence numbers */
+       err = make_reservation(c, DATAHD, dlen);
+       if (err)
+               goto out_free;
+
+       err = write_node(c, DATAHD, data, dlen, &lnum, &offs);
+       if (err)
+               goto out_release;
+       ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
+       release_head(c, DATAHD);
+
+       err = ubifs_tnc_add(c, key, lnum, offs, dlen);
+       if (err)
+               goto out_ro;
+
+       finish_reservation(c);
+       kfree(data);
+       return 0;
+
+out_release:
+       release_head(c, DATAHD);
+out_ro:
+       ubifs_ro_mode(c, err);
+       finish_reservation(c);
+out_free:
+       kfree(data);
+       return err;
+}
+
+/**
+ * ubifs_jnl_write_inode - flush inode to the journal.
+ * @c: UBIFS file-system description object
+ * @inode: inode to flush
+ * @deletion: inode has been deleted
+ *
+ * This function writes inode @inode to the journal. If the inode is
+ * synchronous, it also synchronizes the write-buffer. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
+                         int deletion)
+{
+       int err, len, lnum, offs, sync = 0;
+       struct ubifs_ino_node *ino;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       dbg_jnl("ino %lu%s", inode->i_ino,
+               deletion ? " (last reference)" : "");
+       if (deletion)
+               ubifs_assert(inode->i_nlink == 0);
+
+       len = UBIFS_INO_NODE_SZ;
+       /*
+        * If the inode is being deleted, do not write the attached data. No
+        * need to synchronize the write-buffer either.
+        */
+       if (!deletion) {
+               len += ui->data_len;
+               sync = IS_SYNC(inode);
+       }
+       ino = kmalloc(len, GFP_NOFS);
+       if (!ino)
+               return -ENOMEM;
+
+       /* Make reservation before allocating sequence numbers */
+       err = make_reservation(c, BASEHD, len);
+       if (err)
+               goto out_free;
+
+       pack_inode(c, ino, inode, 1, deletion);
+       err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
+       if (err)
+               goto out_release;
+       if (!sync)
+               ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
+                                         inode->i_ino);
+       release_head(c, BASEHD);
+
+       if (deletion) {
+               err = ubifs_tnc_remove_ino(c, inode->i_ino);
+               if (err)
+                       goto out_ro;
+               ubifs_delete_orphan(c, inode->i_ino);
+               err = ubifs_add_dirt(c, lnum, len);
+       } else {
+               union ubifs_key key;
+
+               ino_key_init(c, &key, inode->i_ino);
+               err = ubifs_tnc_add(c, &key, lnum, offs, len);
+       }
+       if (err)
+               goto out_ro;
+
+       finish_reservation(c);
+       spin_lock(&ui->ui_lock);
+       ui->synced_i_size = ui->ui_size;
+       spin_unlock(&ui->ui_lock);
+       kfree(ino);
+       return 0;
+
+out_release:
+       release_head(c, BASEHD);
+out_ro:
+       ubifs_ro_mode(c, err);
+       finish_reservation(c);
+out_free:
+       kfree(ino);
+       return err;
+}
+
+/**
+ * ubifs_jnl_rename - rename a directory entry.
+ * @c: UBIFS file-system description object
+ * @old_dir: parent inode of directory entry to rename
+ * @old_dentry: directory entry to rename
+ * @new_dir: parent inode of directory entry to rename
+ * @new_dentry: new directory entry (or directory entry to replace)
+ * @sync: non-zero if the write-buffer has to be synchronized
+ *
+ * This function implements the re-name operation which may involve writing up
+ * to 3 inodes and 2 directory entries. It marks the written inodes as clean
+ * and returns zero on success. In case of failure, a negative error code is
+ * returned.
+ */
+int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+                    const struct dentry *old_dentry,
+                    const struct inode *new_dir,
+                    const struct dentry *new_dentry, int sync)
+{
+       void *p;
+       union ubifs_key key;
+       struct ubifs_dent_node *dent, *dent2;
+       int err, dlen1, dlen2, ilen, lnum, offs, len;
+       const struct inode *old_inode = old_dentry->d_inode;
+       const struct inode *new_inode = new_dentry->d_inode;
+       int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
+       int last_reference = !!(new_inode && new_inode->i_nlink == 0);
+       int move = (old_dir != new_dir);
+       struct ubifs_inode *uninitialized_var(new_ui);
+
+       dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
+               old_dentry->d_name.len, old_dentry->d_name.name,
+               old_dir->i_ino, new_dentry->d_name.len,
+               new_dentry->d_name.name, new_dir->i_ino);
+       ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
+       ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
+       ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
+       ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex));
+
+       dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1;
+       dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1;
+       if (new_inode) {
+               new_ui = ubifs_inode(new_inode);
+               ubifs_assert(mutex_is_locked(&new_ui->ui_mutex));
+               ilen = UBIFS_INO_NODE_SZ;
+               if (!last_reference)
+                       ilen += new_ui->data_len;
+       } else
+               ilen = 0;
+
+       aligned_dlen1 = ALIGN(dlen1, 8);
+       aligned_dlen2 = ALIGN(dlen2, 8);
+       len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
+       if (old_dir != new_dir)
+               len += plen;
+       dent = kmalloc(len, GFP_NOFS);
+       if (!dent)
+               return -ENOMEM;
+
+       /* Make reservation before allocating sequence numbers */
+       err = make_reservation(c, BASEHD, len);
+       if (err)
+               goto out_free;
+
+       /* Make new dent */
+       dent->ch.node_type = UBIFS_DENT_NODE;
+       dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name);
+       dent->inum = cpu_to_le64(old_inode->i_ino);
+       dent->type = get_dent_type(old_inode->i_mode);
+       dent->nlen = cpu_to_le16(new_dentry->d_name.len);
+       memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len);
+       dent->name[new_dentry->d_name.len] = '\0';
+       zero_dent_node_unused(dent);
+       ubifs_prep_grp_node(c, dent, dlen1, 0);
+
+       /* Make deletion dent */
+       dent2 = (void *)dent + aligned_dlen1;
+       dent2->ch.node_type = UBIFS_DENT_NODE;
+       dent_key_init_flash(c, &dent2->key, old_dir->i_ino,
+                           &old_dentry->d_name);
+       dent2->inum = 0;
+       dent2->type = DT_UNKNOWN;
+       dent2->nlen = cpu_to_le16(old_dentry->d_name.len);
+       memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len);
+       dent2->name[old_dentry->d_name.len] = '\0';
+       zero_dent_node_unused(dent2);
+       ubifs_prep_grp_node(c, dent2, dlen2, 0);
+
+       p = (void *)dent2 + aligned_dlen2;
+       if (new_inode) {
+               pack_inode(c, p, new_inode, 0, last_reference);
+               p += ALIGN(ilen, 8);
+       }
+
+       if (!move)
+               pack_inode(c, p, old_dir, 1, 0);
+       else {
+               pack_inode(c, p, old_dir, 0, 0);
+               p += ALIGN(plen, 8);
+               pack_inode(c, p, new_dir, 1, 0);
+       }
+
+       if (last_reference) {
+               err = ubifs_add_orphan(c, new_inode->i_ino);
+               if (err) {
+                       release_head(c, BASEHD);
+                       goto out_finish;
+               }
+       }
+
+       err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
+       if (err)
+               goto out_release;
+       if (!sync) {
+               struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
+
+               ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino);
+               ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino);
+               if (new_inode)
+                       ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
+                                                 new_inode->i_ino);
+       }
+       release_head(c, BASEHD);
+
+       dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name);
+       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name);
+       if (err)
+               goto out_ro;
+
+       err = ubifs_add_dirt(c, lnum, dlen2);
+       if (err)
+               goto out_ro;
+
+       dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name);
+       err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name);
+       if (err)
+               goto out_ro;
+
+       offs += aligned_dlen1 + aligned_dlen2;
+       if (new_inode) {
+               ino_key_init(c, &key, new_inode->i_ino);
+               err = ubifs_tnc_add(c, &key, lnum, offs, ilen);
+               if (err)
+                       goto out_ro;
+               offs += ALIGN(ilen, 8);
+       }
+
+       ino_key_init(c, &key, old_dir->i_ino);
+       err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+       if (err)
+               goto out_ro;
+
+       if (old_dir != new_dir) {
+               offs += ALIGN(plen, 8);
+               ino_key_init(c, &key, new_dir->i_ino);
+               err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+               if (err)
+                       goto out_ro;
+       }
+
+       finish_reservation(c);
+       if (new_inode) {
+               mark_inode_clean(c, new_ui);
+               spin_lock(&new_ui->ui_lock);
+               new_ui->synced_i_size = new_ui->ui_size;
+               spin_unlock(&new_ui->ui_lock);
+       }
+       mark_inode_clean(c, ubifs_inode(old_dir));
+       if (move)
+               mark_inode_clean(c, ubifs_inode(new_dir));
+       kfree(dent);
+       return 0;
+
+out_release:
+       release_head(c, BASEHD);
+out_ro:
+       ubifs_ro_mode(c, err);
+       if (last_reference)
+               ubifs_delete_orphan(c, new_inode->i_ino);
+out_finish:
+       finish_reservation(c);
+out_free:
+       kfree(dent);
+       return err;
+}
+
+/**
+ * recomp_data_node - re-compress a truncated data node.
+ * @dn: data node to re-compress
+ * @new_len: new length
+ *
+ * This function is used when an inode is truncated and the last data node of
+ * the inode has to be re-compressed and re-written.
+ */
+static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
+{
+       void *buf;
+       int err, len, compr_type, out_len;
+
+       out_len = le32_to_cpu(dn->size);
+       buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS);
+       if (!buf)
+               return -ENOMEM;
+
+       len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+       compr_type = le16_to_cpu(dn->compr_type);
+       err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type);
+       if (err)
+               goto out;
+
+       ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type);
+       ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
+       dn->compr_type = cpu_to_le16(compr_type);
+       dn->size = cpu_to_le32(*new_len);
+       *new_len = UBIFS_DATA_NODE_SZ + out_len;
+out:
+       kfree(buf);
+       return err;
+}
+
+/**
+ * ubifs_jnl_truncate - update the journal for a truncation.
+ * @c: UBIFS file-system description object
+ * @inode: inode to truncate
+ * @old_size: old size
+ * @new_size: new size
+ *
+ * When the size of a file decreases due to truncation, a truncation node is
+ * written, the journal tree is updated, and the last data block is re-written
+ * if it has been affected. The inode is also updated in order to synchronize
+ * the new inode size.
+ *
+ * This function marks the inode as clean and returns zero on success. In case
+ * of failure, a negative error code is returned.
+ */
+int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
+                      loff_t old_size, loff_t new_size)
+{
+       union ubifs_key key, to_key;
+       struct ubifs_ino_node *ino;
+       struct ubifs_trun_node *trun;
+       struct ubifs_data_node *uninitialized_var(dn);
+       int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode);
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       ino_t inum = inode->i_ino;
+       unsigned int blk;
+
+       dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size);
+       ubifs_assert(!ui->data_len);
+       ubifs_assert(S_ISREG(inode->i_mode));
+       ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+
+       sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
+            UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
+       ino = kmalloc(sz, GFP_NOFS);
+       if (!ino)
+               return -ENOMEM;
+
+       trun = (void *)ino + UBIFS_INO_NODE_SZ;
+       trun->ch.node_type = UBIFS_TRUN_NODE;
+       trun->inum = cpu_to_le32(inum);
+       trun->old_size = cpu_to_le64(old_size);
+       trun->new_size = cpu_to_le64(new_size);
+       zero_trun_node_unused(trun);
+
+       dlen = new_size & (UBIFS_BLOCK_SIZE - 1);
+       if (dlen) {
+               /* Get last data block so it can be truncated */
+               dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
+               blk = new_size >> UBIFS_BLOCK_SHIFT;
+               data_key_init(c, &key, inum, blk);
+               dbg_jnl("last block key %s", DBGKEY(&key));
+               err = ubifs_tnc_lookup(c, &key, dn);
+               if (err == -ENOENT)
+                       dlen = 0; /* Not found (so it is a hole) */
+               else if (err)
+                       goto out_free;
+               else {
+                       if (le32_to_cpu(dn->size) <= dlen)
+                               dlen = 0; /* Nothing to do */
+                       else {
+                               int compr_type = le16_to_cpu(dn->compr_type);
+
+                               if (compr_type != UBIFS_COMPR_NONE) {
+                                       err = recomp_data_node(dn, &dlen);
+                                       if (err)
+                                               goto out_free;
+                               } else {
+                                       dn->size = cpu_to_le32(dlen);
+                                       dlen += UBIFS_DATA_NODE_SZ;
+                               }
+                               zero_data_node_unused(dn);
+                       }
+               }
+       }
+
+       /* Must make reservation before allocating sequence numbers */
+       len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ;
+       if (dlen)
+               len += dlen;
+       err = make_reservation(c, BASEHD, len);
+       if (err)
+               goto out_free;
+
+       pack_inode(c, ino, inode, 0, 0);
+       ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
+       if (dlen)
+               ubifs_prep_grp_node(c, dn, dlen, 1);
+
+       err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
+       if (err)
+               goto out_release;
+       if (!sync)
+               ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum);
+       release_head(c, BASEHD);
+
+       if (dlen) {
+               sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ;
+               err = ubifs_tnc_add(c, &key, lnum, sz, dlen);
+               if (err)
+                       goto out_ro;
+       }
+
+       ino_key_init(c, &key, inum);
+       err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ);
+       if (err)
+               goto out_ro;
+
+       err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ);
+       if (err)
+               goto out_ro;
+
+       bit = new_size & (UBIFS_BLOCK_SIZE - 1);
+       blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0);
+       data_key_init(c, &key, inum, blk);
+
+       bit = old_size & (UBIFS_BLOCK_SIZE - 1);
+       blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1);
+       data_key_init(c, &to_key, inum, blk);
+
+       err = ubifs_tnc_remove_range(c, &key, &to_key);
+       if (err)
+               goto out_ro;
+
+       finish_reservation(c);
+       spin_lock(&ui->ui_lock);
+       ui->synced_i_size = ui->ui_size;
+       spin_unlock(&ui->ui_lock);
+       mark_inode_clean(c, ui);
+       kfree(ino);
+       return 0;
+
+out_release:
+       release_head(c, BASEHD);
+out_ro:
+       ubifs_ro_mode(c, err);
+       finish_reservation(c);
+out_free:
+       kfree(ino);
+       return err;
+}
+
+#ifdef CONFIG_UBIFS_FS_XATTR
+
+/**
+ * ubifs_jnl_delete_xattr - delete an extended attribute.
+ * @c: UBIFS file-system description object
+ * @host: host inode
+ * @inode: extended attribute inode
+ * @nm: extended attribute entry name
+ *
+ * This function delete an extended attribute which is very similar to
+ * un-linking regular files - it writes a deletion xentry, a deletion inode and
+ * updates the target inode. Returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
+                          const struct inode *inode, const struct qstr *nm)
+{
+       int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen;
+       struct ubifs_dent_node *xent;
+       struct ubifs_ino_node *ino;
+       union ubifs_key xent_key, key1, key2;
+       int sync = IS_DIRSYNC(host);
+       struct ubifs_inode *host_ui = ubifs_inode(host);
+
+       dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d",
+               host->i_ino, inode->i_ino, nm->name,
+               ubifs_inode(inode)->data_len);
+       ubifs_assert(inode->i_nlink == 0);
+       ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
+
+       /*
+        * Since we are deleting the inode, we do not bother to attach any data
+        * to it and assume its length is %UBIFS_INO_NODE_SZ.
+        */
+       xlen = UBIFS_DENT_NODE_SZ + nm->len + 1;
+       aligned_xlen = ALIGN(xlen, 8);
+       hlen = host_ui->data_len + UBIFS_INO_NODE_SZ;
+       len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8);
+
+       xent = kmalloc(len, GFP_NOFS);
+       if (!xent)
+               return -ENOMEM;
+
+       /* Make reservation before allocating sequence numbers */
+       err = make_reservation(c, BASEHD, len);
+       if (err) {
+               kfree(xent);
+               return err;
+       }
+
+       xent->ch.node_type = UBIFS_XENT_NODE;
+       xent_key_init(c, &xent_key, host->i_ino, nm);
+       key_write(c, &xent_key, xent->key);
+       xent->inum = 0;
+       xent->type = get_dent_type(inode->i_mode);
+       xent->nlen = cpu_to_le16(nm->len);
+       memcpy(xent->name, nm->name, nm->len);
+       xent->name[nm->len] = '\0';
+       zero_dent_node_unused(xent);
+       ubifs_prep_grp_node(c, xent, xlen, 0);
+
+       ino = (void *)xent + aligned_xlen;
+       pack_inode(c, ino, inode, 0, 1);
+       ino = (void *)ino + UBIFS_INO_NODE_SZ;
+       pack_inode(c, ino, host, 1, 0);
+
+       err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
+       if (!sync && !err)
+               ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino);
+       release_head(c, BASEHD);
+       kfree(xent);
+       if (err)
+               goto out_ro;
+
+       /* Remove the extended attribute entry from TNC */
+       err = ubifs_tnc_remove_nm(c, &xent_key, nm);
+       if (err)
+               goto out_ro;
+       err = ubifs_add_dirt(c, lnum, xlen);
+       if (err)
+               goto out_ro;
+
+       /*
+        * Remove all nodes belonging to the extended attribute inode from TNC.
+        * Well, there actually must be only one node - the inode itself.
+        */
+       lowest_ino_key(c, &key1, inode->i_ino);
+       highest_ino_key(c, &key2, inode->i_ino);
+       err = ubifs_tnc_remove_range(c, &key1, &key2);
+       if (err)
+               goto out_ro;
+       err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ);
+       if (err)
+               goto out_ro;
+
+       /* And update TNC with the new host inode position */
+       ino_key_init(c, &key1, host->i_ino);
+       err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen);
+       if (err)
+               goto out_ro;
+
+       finish_reservation(c);
+       spin_lock(&host_ui->ui_lock);
+       host_ui->synced_i_size = host_ui->ui_size;
+       spin_unlock(&host_ui->ui_lock);
+       mark_inode_clean(c, host_ui);
+       return 0;
+
+out_ro:
+       ubifs_ro_mode(c, err);
+       finish_reservation(c);
+       return err;
+}
+
+/**
+ * ubifs_jnl_change_xattr - change an extended attribute.
+ * @c: UBIFS file-system description object
+ * @inode: extended attribute inode
+ * @host: host inode
+ *
+ * This function writes the updated version of an extended attribute inode and
+ * the host inode tho the journal (to the base head). The host inode is written
+ * after the extended attribute inode in order to guarantee that the extended
+ * attribute will be flushed when the inode is synchronized by 'fsync()' and
+ * consequently, the write-buffer is synchronized. This function returns zero
+ * in case of success and a negative error code in case of failure.
+ */
+int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
+                          const struct inode *host)
+{
+       int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
+       struct ubifs_inode *host_ui = ubifs_inode(inode);
+       struct ubifs_ino_node *ino;
+       union ubifs_key key;
+       int sync = IS_DIRSYNC(host);
+
+       dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
+       ubifs_assert(host->i_nlink > 0);
+       ubifs_assert(inode->i_nlink > 0);
+       ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
+
+       len1 = UBIFS_INO_NODE_SZ + host_ui->data_len;
+       len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len;
+       aligned_len1 = ALIGN(len1, 8);
+       aligned_len = aligned_len1 + ALIGN(len2, 8);
+
+       ino = kmalloc(aligned_len, GFP_NOFS);
+       if (!ino)
+               return -ENOMEM;
+
+       /* Make reservation before allocating sequence numbers */
+       err = make_reservation(c, BASEHD, aligned_len);
+       if (err)
+               goto out_free;
+
+       pack_inode(c, ino, host, 0, 0);
+       pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
+
+       err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
+       if (!sync && !err) {
+               struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
+
+               ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino);
+               ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino);
+       }
+       release_head(c, BASEHD);
+       if (err)
+               goto out_ro;
+
+       ino_key_init(c, &key, host->i_ino);
+       err = ubifs_tnc_add(c, &key, lnum, offs, len1);
+       if (err)
+               goto out_ro;
+
+       ino_key_init(c, &key, inode->i_ino);
+       err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2);
+       if (err)
+               goto out_ro;
+
+       finish_reservation(c);
+       spin_lock(&host_ui->ui_lock);
+       host_ui->synced_i_size = host_ui->ui_size;
+       spin_unlock(&host_ui->ui_lock);
+       mark_inode_clean(c, host_ui);
+       kfree(ino);
+       return 0;
+
+out_ro:
+       ubifs_ro_mode(c, err);
+       finish_reservation(c);
+out_free:
+       kfree(ino);
+       return err;
+}
+
+#endif /* CONFIG_UBIFS_FS_XATTR */
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h

new file mode 100644 (file)

index 0000000..8f74760
--- /dev/null
+++ b/fs/ubifs/key.h
@@ -0,0 +1,533 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This header contains various key-related definitions and helper function.
+ * UBIFS allows several key schemes, so we access key fields only via these
+ * helpers. At the moment only one key scheme is supported.
+ *
+ * Simple key scheme
+ * ~~~~~~~~~~~~~~~~~
+ *
+ * Keys are 64-bits long. First 32-bits are inode number (parent inode number
+ * in case of direntry key). Next 3 bits are node type. The last 29 bits are
+ * 4KiB offset in case of inode node, and direntry hash in case of a direntry
+ * node. We use "r5" hash borrowed from reiserfs.
+ */
+
+#ifndef __UBIFS_KEY_H__
+#define __UBIFS_KEY_H__
+
+/**
+ * key_r5_hash - R5 hash function (borrowed from reiserfs).
+ * @s: direntry name
+ * @len: name length
+ */
+static inline uint32_t key_r5_hash(const char *s, int len)
+{
+       uint32_t a = 0;
+       const signed char *str = (const signed char *)s;
+
+       while (*str) {
+               a += *str << 4;
+               a += *str >> 4;
+               a *= 11;
+               str++;
+       }
+
+       a &= UBIFS_S_KEY_HASH_MASK;
+
+       /*
+        * We use hash values as offset in directories, so values %0 and %1 are
+        * reserved for "." and "..". %2 is reserved for "end of readdir"
+        * marker.
+        */
+       if (unlikely(a >= 0 && a <= 2))
+               a += 3;
+       return a;
+}
+
+/**
+ * key_test_hash - testing hash function.
+ * @str: direntry name
+ * @len: name length
+ */
+static inline uint32_t key_test_hash(const char *str, int len)
+{
+       uint32_t a = 0;
+
+       len = min_t(uint32_t, len, 4);
+       memcpy(&a, str, len);
+       a &= UBIFS_S_KEY_HASH_MASK;
+       if (unlikely(a >= 0 && a <= 2))
+               a += 3;
+       return a;
+}
+
+/**
+ * ino_key_init - initialize inode key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: inode number
+ */
+static inline void ino_key_init(const struct ubifs_info *c,
+                               union ubifs_key *key, ino_t inum)
+{
+       key->u32[0] = inum;
+       key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS;
+}
+
+/**
+ * ino_key_init_flash - initialize on-flash inode key.
+ * @c: UBIFS file-system description object
+ * @k: key to initialize
+ * @inum: inode number
+ */
+static inline void ino_key_init_flash(const struct ubifs_info *c, void *k,
+                                     ino_t inum)
+{
+       union ubifs_key *key = k;
+
+       key->j32[0] = cpu_to_le32(inum);
+       key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS);
+       memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+}
+
+/**
+ * lowest_ino_key - get the lowest possible inode key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: inode number
+ */
+static inline void lowest_ino_key(const struct ubifs_info *c,
+                               union ubifs_key *key, ino_t inum)
+{
+       key->u32[0] = inum;
+       key->u32[1] = 0;
+}
+
+/**
+ * highest_ino_key - get the highest possible inode key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: inode number
+ */
+static inline void highest_ino_key(const struct ubifs_info *c,
+                               union ubifs_key *key, ino_t inum)
+{
+       key->u32[0] = inum;
+       key->u32[1] = 0xffffffff;
+}
+
+/**
+ * dent_key_init - initialize directory entry key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: parent inode number
+ * @nm: direntry name and length
+ */
+static inline void dent_key_init(const struct ubifs_info *c,
+                                union ubifs_key *key, ino_t inum,
+                                const struct qstr *nm)
+{
+       uint32_t hash = c->key_hash(nm->name, nm->len);
+
+       ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+       key->u32[0] = inum;
+       key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
+}
+
+/**
+ * dent_key_init_hash - initialize directory entry key without re-calculating
+ *                      hash function.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: parent inode number
+ * @hash: direntry name hash
+ */
+static inline void dent_key_init_hash(const struct ubifs_info *c,
+                                     union ubifs_key *key, ino_t inum,
+                                     uint32_t hash)
+{
+       ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+       key->u32[0] = inum;
+       key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
+}
+
+/**
+ * dent_key_init_flash - initialize on-flash directory entry key.
+ * @c: UBIFS file-system description object
+ * @k: key to initialize
+ * @inum: parent inode number
+ * @nm: direntry name and length
+ */
+static inline void dent_key_init_flash(const struct ubifs_info *c, void *k,
+                                      ino_t inum, const struct qstr *nm)
+{
+       union ubifs_key *key = k;
+       uint32_t hash = c->key_hash(nm->name, nm->len);
+
+       ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+       key->j32[0] = cpu_to_le32(inum);
+       key->j32[1] = cpu_to_le32(hash |
+                                 (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS));
+       memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+}
+
+/**
+ * lowest_dent_key - get the lowest possible directory entry key.
+ * @c: UBIFS file-system description object
+ * @key: where to store the lowest key
+ * @inum: parent inode number
+ */
+static inline void lowest_dent_key(const struct ubifs_info *c,
+                                  union ubifs_key *key, ino_t inum)
+{
+       key->u32[0] = inum;
+       key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS;
+}
+
+/**
+ * xent_key_init - initialize extended attribute entry key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: host inode number
+ * @nm: extended attribute entry name and length
+ */
+static inline void xent_key_init(const struct ubifs_info *c,
+                                union ubifs_key *key, ino_t inum,
+                                const struct qstr *nm)
+{
+       uint32_t hash = c->key_hash(nm->name, nm->len);
+
+       ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+       key->u32[0] = inum;
+       key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
+}
+
+/**
+ * xent_key_init_hash - initialize extended attribute entry key without
+ *                      re-calculating hash function.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: host inode number
+ * @hash: extended attribute entry name hash
+ */
+static inline void xent_key_init_hash(const struct ubifs_info *c,
+                                     union ubifs_key *key, ino_t inum,
+                                     uint32_t hash)
+{
+       ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+       key->u32[0] = inum;
+       key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
+}
+
+/**
+ * xent_key_init_flash - initialize on-flash extended attribute entry key.
+ * @c: UBIFS file-system description object
+ * @k: key to initialize
+ * @inum: host inode number
+ * @nm: extended attribute entry name and length
+ */
+static inline void xent_key_init_flash(const struct ubifs_info *c, void *k,
+                                      ino_t inum, const struct qstr *nm)
+{
+       union ubifs_key *key = k;
+       uint32_t hash = c->key_hash(nm->name, nm->len);
+
+       ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+       key->j32[0] = cpu_to_le32(inum);
+       key->j32[1] = cpu_to_le32(hash |
+                                 (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS));
+       memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+}
+
+/**
+ * lowest_xent_key - get the lowest possible extended attribute entry key.
+ * @c: UBIFS file-system description object
+ * @key: where to store the lowest key
+ * @inum: host inode number
+ */
+static inline void lowest_xent_key(const struct ubifs_info *c,
+                                  union ubifs_key *key, ino_t inum)
+{
+       key->u32[0] = inum;
+       key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS;
+}
+
+/**
+ * data_key_init - initialize data key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: inode number
+ * @block: block number
+ */
+static inline void data_key_init(const struct ubifs_info *c,
+                                union ubifs_key *key, ino_t inum,
+                                unsigned int block)
+{
+       ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
+       key->u32[0] = inum;
+       key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS);
+}
+
+/**
+ * data_key_init_flash - initialize on-flash data key.
+ * @c: UBIFS file-system description object
+ * @k: key to initialize
+ * @inum: inode number
+ * @block: block number
+ */
+static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
+                                      ino_t inum, unsigned int block)
+{
+       union ubifs_key *key = k;
+
+       ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
+       key->j32[0] = cpu_to_le32(inum);
+       key->j32[1] = cpu_to_le32(block |
+                                 (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
+       memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+}
+
+/**
+ * trun_key_init - initialize truncation node key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ * @inum: inode number
+ *
+ * Note, UBIFS does not have truncation keys on the media and this function is
+ * only used for purposes of replay.
+ */
+static inline void trun_key_init(const struct ubifs_info *c,
+                                union ubifs_key *key, ino_t inum)
+{
+       key->u32[0] = inum;
+       key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS;
+}
+
+/**
+ * key_type - get key type.
+ * @c: UBIFS file-system description object
+ * @key: key to get type of
+ */
+static inline int key_type(const struct ubifs_info *c,
+                          const union ubifs_key *key)
+{
+       return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS;
+}
+
+/**
+ * key_type_flash - get type of a on-flash formatted key.
+ * @c: UBIFS file-system description object
+ * @k: key to get type of
+ */
+static inline int key_type_flash(const struct ubifs_info *c, const void *k)
+{
+       const union ubifs_key *key = k;
+
+       return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
+}
+
+/**
+ * key_inum - fetch inode number from key.
+ * @c: UBIFS file-system description object
+ * @k: key to fetch inode number from
+ */
+static inline ino_t key_inum(const struct ubifs_info *c, const void *k)
+{
+       const union ubifs_key *key = k;
+
+       return key->u32[0];
+}
+
+/**
+ * key_inum_flash - fetch inode number from an on-flash formatted key.
+ * @c: UBIFS file-system description object
+ * @k: key to fetch inode number from
+ */
+static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
+{
+       const union ubifs_key *key = k;
+
+       return le32_to_cpu(key->j32[0]);
+}
+
+/**
+ * key_hash - get directory entry hash.
+ * @c: UBIFS file-system description object
+ * @key: the key to get hash from
+ */
+static inline int key_hash(const struct ubifs_info *c,
+                          const union ubifs_key *key)
+{
+       return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
+}
+
+/**
+ * key_hash_flash - get directory entry hash from an on-flash formatted key.
+ * @c: UBIFS file-system description object
+ * @k: the key to get hash from
+ */
+static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
+{
+       const union ubifs_key *key = k;
+
+       return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK;
+}
+
+/**
+ * key_block - get data block number.
+ * @c: UBIFS file-system description object
+ * @key: the key to get the block number from
+ */
+static inline unsigned int key_block(const struct ubifs_info *c,
+                                    const union ubifs_key *key)
+{
+       return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK;
+}
+
+/**
+ * key_block_flash - get data block number from an on-flash formatted key.
+ * @c: UBIFS file-system description object
+ * @k: the key to get the block number from
+ */
+static inline unsigned int key_block_flash(const struct ubifs_info *c,
+                                          const void *k)
+{
+       const union ubifs_key *key = k;
+
+       return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK;
+}
+
+/**
+ * key_read - transform a key to in-memory format.
+ * @c: UBIFS file-system description object
+ * @from: the key to transform
+ * @to: the key to store the result
+ */
+static inline void key_read(const struct ubifs_info *c, const void *from,
+                           union ubifs_key *to)
+{
+       const union ubifs_key *f = from;
+
+       to->u32[0] = le32_to_cpu(f->j32[0]);
+       to->u32[1] = le32_to_cpu(f->j32[1]);
+}
+
+/**
+ * key_write - transform a key from in-memory format.
+ * @c: UBIFS file-system description object
+ * @from: the key to transform
+ * @to: the key to store the result
+ */
+static inline void key_write(const struct ubifs_info *c,
+                            const union ubifs_key *from, void *to)
+{
+       union ubifs_key *t = to;
+
+       t->j32[0] = cpu_to_le32(from->u32[0]);
+       t->j32[1] = cpu_to_le32(from->u32[1]);
+       memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+}
+
+/**
+ * key_write_idx - transform a key from in-memory format for the index.
+ * @c: UBIFS file-system description object
+ * @from: the key to transform
+ * @to: the key to store the result
+ */
+static inline void key_write_idx(const struct ubifs_info *c,
+                                const union ubifs_key *from, void *to)
+{
+       union ubifs_key *t = to;
+
+       t->j32[0] = cpu_to_le32(from->u32[0]);
+       t->j32[1] = cpu_to_le32(from->u32[1]);
+}
+
+/**
+ * key_copy - copy a key.
+ * @c: UBIFS file-system description object
+ * @from: the key to copy from
+ * @to: the key to copy to
+ */
+static inline void key_copy(const struct ubifs_info *c,
+                           const union ubifs_key *from, union ubifs_key *to)
+{
+       to->u64[0] = from->u64[0];
+}
+
+/**
+ * keys_cmp - compare keys.
+ * @c: UBIFS file-system description object
+ * @key1: the first key to compare
+ * @key2: the second key to compare
+ *
+ * This function compares 2 keys and returns %-1 if @key1 is less than
+ * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2.
+ */
+static inline int keys_cmp(const struct ubifs_info *c,
+                          const union ubifs_key *key1,
+                          const union ubifs_key *key2)
+{
+       if (key1->u32[0] < key2->u32[0])
+               return -1;
+       if (key1->u32[0] > key2->u32[0])
+               return 1;
+       if (key1->u32[1] < key2->u32[1])
+               return -1;
+       if (key1->u32[1] > key2->u32[1])
+               return 1;
+
+       return 0;
+}
+
+/**
+ * is_hash_key - is a key vulnerable to hash collisions.
+ * @c: UBIFS file-system description object
+ * @key: key
+ *
+ * This function returns %1 if @key is a hashed key or %0 otherwise.
+ */
+static inline int is_hash_key(const struct ubifs_info *c,
+                             const union ubifs_key *key)
+{
+       int type = key_type(c, key);
+
+       return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY;
+}
+
+/**
+ * key_max_inode_size - get maximum file size allowed by current key format.
+ * @c: UBIFS file-system description object
+ */
+static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
+{
+       switch (c->key_fmt) {
+       case UBIFS_SIMPLE_KEY_FMT:
+               return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE;
+       default:
+               return 0;
+       }
+}
+#endif /* !__UBIFS_KEY_H__ */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c

new file mode 100644 (file)

index 0000000..36857b9
--- /dev/null
+++ b/fs/ubifs/log.c
@@ -0,0 +1,805 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file is a part of UBIFS journal implementation and contains various
+ * functions which manipulate the log. The log is a fixed area on the flash
+ * which does not contain any data but refers to buds. The log is a part of the
+ * journal.
+ */
+
+#include "ubifs.h"
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+static int dbg_check_bud_bytes(struct ubifs_info *c);
+#else
+#define dbg_check_bud_bytes(c) 0
+#endif
+
+/**
+ * ubifs_search_bud - search bud LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: logical eraseblock number to search
+ *
+ * This function searches bud LEB @lnum. Returns bud description object in case
+ * of success and %NULL if there is no bud with this LEB number.
+ */
+struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
+{
+       struct rb_node *p;
+       struct ubifs_bud *bud;
+
+       spin_lock(&c->buds_lock);
+       p = c->buds.rb_node;
+       while (p) {
+               bud = rb_entry(p, struct ubifs_bud, rb);
+               if (lnum < bud->lnum)
+                       p = p->rb_left;
+               else if (lnum > bud->lnum)
+                       p = p->rb_right;
+               else {
+                       spin_unlock(&c->buds_lock);
+                       return bud;
+               }
+       }
+       spin_unlock(&c->buds_lock);
+       return NULL;
+}
+
+/**
+ * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
+ * @c: UBIFS file-system description object
+ * @lnum: logical eraseblock number to search
+ *
+ * This functions returns the wbuf for @lnum or %NULL if there is not one.
+ */
+struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
+{
+       struct rb_node *p;
+       struct ubifs_bud *bud;
+       int jhead;
+
+       if (!c->jheads)
+               return NULL;
+
+       spin_lock(&c->buds_lock);
+       p = c->buds.rb_node;
+       while (p) {
+               bud = rb_entry(p, struct ubifs_bud, rb);
+               if (lnum < bud->lnum)
+                       p = p->rb_left;
+               else if (lnum > bud->lnum)
+                       p = p->rb_right;
+               else {
+                       jhead = bud->jhead;
+                       spin_unlock(&c->buds_lock);
+                       return &c->jheads[jhead].wbuf;
+               }
+       }
+       spin_unlock(&c->buds_lock);
+       return NULL;
+}
+
+/**
+ * next_log_lnum - switch to the next log LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: current log LEB
+ */
+static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
+{
+       lnum += 1;
+       if (lnum > c->log_last)
+               lnum = UBIFS_LOG_LNUM;
+
+       return lnum;
+}
+
+/**
+ * empty_log_bytes - calculate amount of empty space in the log.
+ * @c: UBIFS file-system description object
+ */
+static inline long long empty_log_bytes(const struct ubifs_info *c)
+{
+       long long h, t;
+
+       h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
+       t = (long long)c->ltail_lnum * c->leb_size;
+
+       if (h >= t)
+               return c->log_bytes - h + t;
+       else
+               return t - h;
+}
+
+/**
+ * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
+ * @c: UBIFS file-system description object
+ * @bud: the bud to add
+ */
+void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
+{
+       struct rb_node **p, *parent = NULL;
+       struct ubifs_bud *b;
+       struct ubifs_jhead *jhead;
+
+       spin_lock(&c->buds_lock);
+       p = &c->buds.rb_node;
+       while (*p) {
+               parent = *p;
+               b = rb_entry(parent, struct ubifs_bud, rb);
+               ubifs_assert(bud->lnum != b->lnum);
+               if (bud->lnum < b->lnum)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+
+       rb_link_node(&bud->rb, parent, p);
+       rb_insert_color(&bud->rb, &c->buds);
+       if (c->jheads) {
+               jhead = &c->jheads[bud->jhead];
+               list_add_tail(&bud->list, &jhead->buds_list);
+       } else
+               ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
+
+       /*
+        * Note, although this is a new bud, we anyway account this space now,
+        * before any data has been written to it, because this is about to
+        * guarantee fixed mount time, and this bud will anyway be read and
+        * scanned.
+        */
+       c->bud_bytes += c->leb_size - bud->start;
+
+       dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
+               bud->start, bud->jhead, c->bud_bytes);
+       spin_unlock(&c->buds_lock);
+}
+
+/**
+ * ubifs_create_buds_lists - create journal head buds lists for remount rw.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_create_buds_lists(struct ubifs_info *c)
+{
+       struct rb_node *p;
+
+       spin_lock(&c->buds_lock);
+       p = rb_first(&c->buds);
+       while (p) {
+               struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
+               struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
+
+               list_add_tail(&bud->list, &jhead->buds_list);
+               p = rb_next(p);
+       }
+       spin_unlock(&c->buds_lock);
+}
+
+/**
+ * ubifs_add_bud_to_log - add a new bud to the log.
+ * @c: UBIFS file-system description object
+ * @jhead: journal head the bud belongs to
+ * @lnum: LEB number of the bud
+ * @offs: starting offset of the bud
+ *
+ * This function writes reference node for the new bud LEB @lnum it to the log,
+ * and adds it to the buds tress. It also makes sure that log size does not
+ * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
+ * %-EAGAIN if commit is required, and a negative error codes in case of
+ * failure.
+ */
+int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
+{
+       int err;
+       struct ubifs_bud *bud;
+       struct ubifs_ref_node *ref;
+
+       bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
+       if (!bud)
+               return -ENOMEM;
+       ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
+       if (!ref) {
+               kfree(bud);
+               return -ENOMEM;
+       }
+
+       mutex_lock(&c->log_mutex);
+
+       if (c->ro_media) {
+               err = -EROFS;
+               goto out_unlock;
+       }
+
+       /* Make sure we have enough space in the log */
+       if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
+               dbg_log("not enough log space - %lld, required %d",
+                       empty_log_bytes(c), c->min_log_bytes);
+               ubifs_commit_required(c);
+               err = -EAGAIN;
+               goto out_unlock;
+       }
+
+       /*
+        * Make sure the the amount of space in buds will not exceed
+        * 'c->max_bud_bytes' limit, because we want to guarantee mount time
+        * limits.
+        *
+        * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
+        * because we are holding @c->log_mutex. All @c->bud_bytes take place
+        * when both @c->log_mutex and @c->bud_bytes are locked.
+        */
+       if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
+               dbg_log("bud bytes %lld (%lld max), require commit",
+                       c->bud_bytes, c->max_bud_bytes);
+               ubifs_commit_required(c);
+               err = -EAGAIN;
+               goto out_unlock;
+       }
+
+       /*
+        * If the journal is full enough - start background commit. Note, it is
+        * OK to read 'c->cmt_state' without spinlock because integer reads
+        * are atomic in the kernel.
+        */
+       if (c->bud_bytes >= c->bg_bud_bytes &&
+           c->cmt_state == COMMIT_RESTING) {
+               dbg_log("bud bytes %lld (%lld max), initiate BG commit",
+                       c->bud_bytes, c->max_bud_bytes);
+               ubifs_request_bg_commit(c);
+       }
+
+       bud->lnum = lnum;
+       bud->start = offs;
+       bud->jhead = jhead;
+
+       ref->ch.node_type = UBIFS_REF_NODE;
+       ref->lnum = cpu_to_le32(bud->lnum);
+       ref->offs = cpu_to_le32(bud->start);
+       ref->jhead = cpu_to_le32(jhead);
+
+       if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
+               c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+               c->lhead_offs = 0;
+       }
+
+       if (c->lhead_offs == 0) {
+               /* Must ensure next log LEB has been unmapped */
+               err = ubifs_leb_unmap(c, c->lhead_lnum);
+               if (err)
+                       goto out_unlock;
+       }
+
+       if (bud->start == 0) {
+               /*
+                * Before writing the LEB reference which refers an empty LEB
+                * to the log, we have to make sure it is mapped, because
+                * otherwise we'd risk to refer an LEB with garbage in case of
+                * an unclean reboot, because the target LEB might have been
+                * unmapped, but not yet physically erased.
+                */
+               err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
+               if (err)
+                       goto out_unlock;
+       }
+
+       dbg_log("write ref LEB %d:%d",
+               c->lhead_lnum, c->lhead_offs);
+       err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
+                              c->lhead_offs, UBI_SHORTTERM);
+       if (err)
+               goto out_unlock;
+
+       c->lhead_offs += c->ref_node_alsz;
+
+       ubifs_add_bud(c, bud);
+
+       mutex_unlock(&c->log_mutex);
+       kfree(ref);
+       return 0;
+
+out_unlock:
+       mutex_unlock(&c->log_mutex);
+       kfree(ref);
+       kfree(bud);
+       return err;
+}
+
+/**
+ * remove_buds - remove used buds.
+ * @c: UBIFS file-system description object
+ *
+ * This function removes use buds from the buds tree. It does not remove the
+ * buds which are pointed to by journal heads.
+ */
+static void remove_buds(struct ubifs_info *c)
+{
+       struct rb_node *p;
+
+       ubifs_assert(list_empty(&c->old_buds));
+       c->cmt_bud_bytes = 0;
+       spin_lock(&c->buds_lock);
+       p = rb_first(&c->buds);
+       while (p) {
+               struct rb_node *p1 = p;
+               struct ubifs_bud *bud;
+               struct ubifs_wbuf *wbuf;
+
+               p = rb_next(p);
+               bud = rb_entry(p1, struct ubifs_bud, rb);
+               wbuf = &c->jheads[bud->jhead].wbuf;
+
+               if (wbuf->lnum == bud->lnum) {
+                       /*
+                        * Do not remove buds which are pointed to by journal
+                        * heads (non-closed buds).
+                        */
+                       c->cmt_bud_bytes += wbuf->offs - bud->start;
+                       dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
+                               "cmt_bud_bytes %lld", bud->lnum, bud->start,
+                               bud->jhead, wbuf->offs - bud->start,
+                               c->cmt_bud_bytes);
+                       bud->start = wbuf->offs;
+               } else {
+                       c->cmt_bud_bytes += c->leb_size - bud->start;
+                       dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
+                               "cmt_bud_bytes %lld", bud->lnum, bud->start,
+                               bud->jhead, c->leb_size - bud->start,
+                               c->cmt_bud_bytes);
+                       rb_erase(p1, &c->buds);
+                       list_del(&bud->list);
+                       /*
+                        * If the commit does not finish, the recovery will need
+                        * to replay the journal, in which case the old buds
+                        * must be unchanged. Do not release them until post
+                        * commit i.e. do not allow them to be garbage
+                        * collected.
+                        */
+                       list_add(&bud->list, &c->old_buds);
+               }
+       }
+       spin_unlock(&c->buds_lock);
+}
+
+/**
+ * ubifs_log_start_commit - start commit.
+ * @c: UBIFS file-system description object
+ * @ltail_lnum: return new log tail LEB number
+ *
+ * The commit operation starts with writing "commit start" node to the log and
+ * reference nodes for all journal heads which will define new journal after
+ * the commit has been finished. The commit start and reference nodes are
+ * written in one go to the nearest empty log LEB (hence, when commit is
+ * finished UBIFS may safely unmap all the previous log LEBs). This function
+ * returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
+{
+       void *buf;
+       struct ubifs_cs_node *cs;
+       struct ubifs_ref_node *ref;
+       int err, i, max_len, len;
+
+       err = dbg_check_bud_bytes(c);
+       if (err)
+               return err;
+
+       max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
+       max_len = ALIGN(max_len, c->min_io_size);
+       buf = cs = kmalloc(max_len, GFP_NOFS);
+       if (!buf)
+               return -ENOMEM;
+
+       cs->ch.node_type = UBIFS_CS_NODE;
+       cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
+       ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
+
+       /*
+        * Note, we do not lock 'c->log_mutex' because this is the commit start
+        * phase and we are exclusively using the log. And we do not lock
+        * write-buffer because nobody can write to the file-system at this
+        * phase.
+        */
+
+       len = UBIFS_CS_NODE_SZ;
+       for (i = 0; i < c->jhead_cnt; i++) {
+               int lnum = c->jheads[i].wbuf.lnum;
+               int offs = c->jheads[i].wbuf.offs;
+
+               if (lnum == -1 || offs == c->leb_size)
+                       continue;
+
+               dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
+               ref = buf + len;
+               ref->ch.node_type = UBIFS_REF_NODE;
+               ref->lnum = cpu_to_le32(lnum);
+               ref->offs = cpu_to_le32(offs);
+               ref->jhead = cpu_to_le32(i);
+
+               ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
+               len += UBIFS_REF_NODE_SZ;
+       }
+
+       ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
+
+       /* Switch to the next log LEB */
+       if (c->lhead_offs) {
+               c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+               c->lhead_offs = 0;
+       }
+
+       if (c->lhead_offs == 0) {
+               /* Must ensure next LEB has been unmapped */
+               err = ubifs_leb_unmap(c, c->lhead_lnum);
+               if (err)
+                       goto out;
+       }
+
+       len = ALIGN(len, c->min_io_size);
+       dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
+       err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM);
+       if (err)
+               goto out;
+
+       *ltail_lnum = c->lhead_lnum;
+
+       c->lhead_offs += len;
+       if (c->lhead_offs == c->leb_size) {
+               c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+               c->lhead_offs = 0;
+       }
+
+       remove_buds(c);
+
+       /*
+        * We have started the commit and now users may use the rest of the log
+        * for new writes.
+        */
+       c->min_log_bytes = 0;
+
+out:
+       kfree(buf);
+       return err;
+}
+
+/**
+ * ubifs_log_end_commit - end commit.
+ * @c: UBIFS file-system description object
+ * @ltail_lnum: new log tail LEB number
+ *
+ * This function is called on when the commit operation was finished. It
+ * moves log tail to new position and unmaps LEBs which contain obsolete data.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
+{
+       int err;
+
+       /*
+        * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
+        * writes during commit. Its only short "commit" start phase when
+        * writers are blocked.
+        */
+       mutex_lock(&c->log_mutex);
+
+       dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
+               c->ltail_lnum, ltail_lnum);
+
+       c->ltail_lnum = ltail_lnum;
+       /*
+        * The commit is finished and from now on it must be guaranteed that
+        * there is always enough space for the next commit.
+        */
+       c->min_log_bytes = c->leb_size;
+
+       spin_lock(&c->buds_lock);
+       c->bud_bytes -= c->cmt_bud_bytes;
+       spin_unlock(&c->buds_lock);
+
+       err = dbg_check_bud_bytes(c);
+
+       mutex_unlock(&c->log_mutex);
+       return err;
+}
+
+/**
+ * ubifs_log_post_commit - things to do after commit is completed.
+ * @c: UBIFS file-system description object
+ * @old_ltail_lnum: old log tail LEB number
+ *
+ * Release buds only after commit is completed, because they must be unchanged
+ * if recovery is needed.
+ *
+ * Unmap log LEBs only after commit is completed, because they may be needed for
+ * recovery.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
+{
+       int lnum, err = 0;
+
+       while (!list_empty(&c->old_buds)) {
+               struct ubifs_bud *bud;
+
+               bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
+               err = ubifs_return_leb(c, bud->lnum);
+               if (err)
+                       return err;
+               list_del(&bud->list);
+               kfree(bud);
+       }
+       mutex_lock(&c->log_mutex);
+       for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
+            lnum = next_log_lnum(c, lnum)) {
+               dbg_log("unmap log LEB %d", lnum);
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       goto out;
+       }
+out:
+       mutex_unlock(&c->log_mutex);
+       return err;
+}
+
+/**
+ * struct done_ref - references that have been done.
+ * @rb: rb-tree node
+ * @lnum: LEB number
+ */
+struct done_ref {
+       struct rb_node rb;
+       int lnum;
+};
+
+/**
+ * done_already - determine if a reference has been done already.
+ * @done_tree: rb-tree to store references that have been done
+ * @lnum: LEB number of reference
+ *
+ * This function returns %1 if the reference has been done, %0 if not, otherwise
+ * a negative error code is returned.
+ */
+static int done_already(struct rb_root *done_tree, int lnum)
+{
+       struct rb_node **p = &done_tree->rb_node, *parent = NULL;
+       struct done_ref *dr;
+
+       while (*p) {
+               parent = *p;
+               dr = rb_entry(parent, struct done_ref, rb);
+               if (lnum < dr->lnum)
+                       p = &(*p)->rb_left;
+               else if (lnum > dr->lnum)
+                       p = &(*p)->rb_right;
+               else
+                       return 1;
+       }
+
+       dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
+       if (!dr)
+               return -ENOMEM;
+
+       dr->lnum = lnum;
+
+       rb_link_node(&dr->rb, parent, p);
+       rb_insert_color(&dr->rb, done_tree);
+
+       return 0;
+}
+
+/**
+ * destroy_done_tree - destroy the done tree.
+ * @done_tree: done tree to destroy
+ */
+static void destroy_done_tree(struct rb_root *done_tree)
+{
+       struct rb_node *this = done_tree->rb_node;
+       struct done_ref *dr;
+
+       while (this) {
+               if (this->rb_left) {
+                       this = this->rb_left;
+                       continue;
+               } else if (this->rb_right) {
+                       this = this->rb_right;
+                       continue;
+               }
+               dr = rb_entry(this, struct done_ref, rb);
+               this = rb_parent(this);
+               if (this) {
+                       if (this->rb_left == &dr->rb)
+                               this->rb_left = NULL;
+                       else
+                               this->rb_right = NULL;
+               }
+               kfree(dr);
+       }
+}
+
+/**
+ * add_node - add a node to the consolidated log.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to which to add
+ * @lnum: LEB number to which to write is passed and returned here
+ * @offs: offset to where to write is passed and returned here
+ * @node: node to add
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
+                   void *node)
+{
+       struct ubifs_ch *ch = node;
+       int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
+
+       if (len > remains) {
+               int sz = ALIGN(*offs, c->min_io_size), err;
+
+               ubifs_pad(c, buf + *offs, sz - *offs);
+               err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
+               if (err)
+                       return err;
+               *lnum = next_log_lnum(c, *lnum);
+               *offs = 0;
+       }
+       memcpy(buf + *offs, node, len);
+       *offs += ALIGN(len, 8);
+       return 0;
+}
+
+/**
+ * ubifs_consolidate_log - consolidate the log.
+ * @c: UBIFS file-system description object
+ *
+ * Repeated failed commits could cause the log to be full, but at least 1 LEB is
+ * needed for commit. This function rewrites the reference nodes in the log
+ * omitting duplicates, and failed CS nodes, and leaving no gaps.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_consolidate_log(struct ubifs_info *c)
+{
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       struct rb_root done_tree = RB_ROOT;
+       int lnum, err, first = 1, write_lnum, offs = 0;
+       void *buf;
+
+       dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
+                 c->lhead_lnum);
+       buf = vmalloc(c->leb_size);
+       if (!buf)
+               return -ENOMEM;
+       lnum = c->ltail_lnum;
+       write_lnum = lnum;
+       while (1) {
+               sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+               if (IS_ERR(sleb)) {
+                       err = PTR_ERR(sleb);
+                       goto out_free;
+               }
+               list_for_each_entry(snod, &sleb->nodes, list) {
+                       switch (snod->type) {
+                       case UBIFS_REF_NODE: {
+                               struct ubifs_ref_node *ref = snod->node;
+                               int ref_lnum = le32_to_cpu(ref->lnum);
+
+                               err = done_already(&done_tree, ref_lnum);
+                               if (err < 0)
+                                       goto out_scan;
+                               if (err != 1) {
+                                       err = add_node(c, buf, &write_lnum,
+                                                      &offs, snod->node);
+                                       if (err)
+                                               goto out_scan;
+                               }
+                               break;
+                       }
+                       case UBIFS_CS_NODE:
+                               if (!first)
+                                       break;
+                               err = add_node(c, buf, &write_lnum, &offs,
+                                              snod->node);
+                               if (err)
+                                       goto out_scan;
+                               first = 0;
+                               break;
+                       }
+               }
+               ubifs_scan_destroy(sleb);
+               if (lnum == c->lhead_lnum)
+                       break;
+               lnum = next_log_lnum(c, lnum);
+       }
+       if (offs) {
+               int sz = ALIGN(offs, c->min_io_size);
+
+               ubifs_pad(c, buf + offs, sz - offs);
+               err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM);
+               if (err)
+                       goto out_free;
+               offs = ALIGN(offs, c->min_io_size);
+       }
+       destroy_done_tree(&done_tree);
+       vfree(buf);
+       if (write_lnum == c->lhead_lnum) {
+               ubifs_err("log is too full");
+               return -EINVAL;
+       }
+       /* Unmap remaining LEBs */
+       lnum = write_lnum;
+       do {
+               lnum = next_log_lnum(c, lnum);
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+       } while (lnum != c->lhead_lnum);
+       c->lhead_lnum = write_lnum;
+       c->lhead_offs = offs;
+       dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
+       return 0;
+
+out_scan:
+       ubifs_scan_destroy(sleb);
+out_free:
+       destroy_done_tree(&done_tree);
+       vfree(buf);
+       return err;
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+/**
+ * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
+ * @c: UBIFS file-system description object
+ *
+ * This function makes sure the amount of flash space used by closed buds
+ * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
+ * case of failure.
+ */
+static int dbg_check_bud_bytes(struct ubifs_info *c)
+{
+       int i, err = 0;
+       struct ubifs_bud *bud;
+       long long bud_bytes = 0;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+               return 0;
+
+       spin_lock(&c->buds_lock);
+       for (i = 0; i < c->jhead_cnt; i++)
+               list_for_each_entry(bud, &c->jheads[i].buds_list, list)
+                       bud_bytes += c->leb_size - bud->start;
+
+       if (c->bud_bytes != bud_bytes) {
+               ubifs_err("bad bud_bytes %lld, calculated %lld",
+                         c->bud_bytes, bud_bytes);
+               err = -EINVAL;
+       }
+       spin_unlock(&c->buds_lock);
+
+       return err;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c

new file mode 100644 (file)

index 0000000..2ba93da
--- /dev/null
+++ b/fs/ubifs/lprops.c
@@ -0,0 +1,1357 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements the functions that access LEB properties and their
+ * categories. LEBs are categorized based on the needs of UBIFS, and the
+ * categories are stored as either heaps or lists to provide a fast way of
+ * finding a LEB in a particular category. For example, UBIFS may need to find
+ * an empty LEB for the journal, or a very dirty LEB for garbage collection.
+ */
+
+#include "ubifs.h"
+
+/**
+ * get_heap_comp_val - get the LEB properties value for heap comparisons.
+ * @lprops: LEB properties
+ * @cat: LEB category
+ */
+static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat)
+{
+       switch (cat) {
+       case LPROPS_FREE:
+               return lprops->free;
+       case LPROPS_DIRTY_IDX:
+               return lprops->free + lprops->dirty;
+       default:
+               return lprops->dirty;
+       }
+}
+
+/**
+ * move_up_lpt_heap - move a new heap entry up as far as possible.
+ * @c: UBIFS file-system description object
+ * @heap: LEB category heap
+ * @lprops: LEB properties to move
+ * @cat: LEB category
+ *
+ * New entries to a heap are added at the bottom and then moved up until the
+ * parent's value is greater.  In the case of LPT's category heaps, the value
+ * is either the amount of free space or the amount of dirty space, depending
+ * on the category.
+ */
+static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
+                            struct ubifs_lprops *lprops, int cat)
+{
+       int val1, val2, hpos;
+
+       hpos = lprops->hpos;
+       if (!hpos)
+               return; /* Already top of the heap */
+       val1 = get_heap_comp_val(lprops, cat);
+       /* Compare to parent and, if greater, move up the heap */
+       do {
+               int ppos = (hpos - 1) / 2;
+
+               val2 = get_heap_comp_val(heap->arr[ppos], cat);
+               if (val2 >= val1)
+                       return;
+               /* Greater than parent so move up */
+               heap->arr[ppos]->hpos = hpos;
+               heap->arr[hpos] = heap->arr[ppos];
+               heap->arr[ppos] = lprops;
+               lprops->hpos = ppos;
+               hpos = ppos;
+       } while (hpos);
+}
+
+/**
+ * adjust_lpt_heap - move a changed heap entry up or down the heap.
+ * @c: UBIFS file-system description object
+ * @heap: LEB category heap
+ * @lprops: LEB properties to move
+ * @hpos: heap position of @lprops
+ * @cat: LEB category
+ *
+ * Changed entries in a heap are moved up or down until the parent's value is
+ * greater.  In the case of LPT's category heaps, the value is either the amount
+ * of free space or the amount of dirty space, depending on the category.
+ */
+static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
+                           struct ubifs_lprops *lprops, int hpos, int cat)
+{
+       int val1, val2, val3, cpos;
+
+       val1 = get_heap_comp_val(lprops, cat);
+       /* Compare to parent and, if greater than parent, move up the heap */
+       if (hpos) {
+               int ppos = (hpos - 1) / 2;
+
+               val2 = get_heap_comp_val(heap->arr[ppos], cat);
+               if (val1 > val2) {
+                       /* Greater than parent so move up */
+                       while (1) {
+                               heap->arr[ppos]->hpos = hpos;
+                               heap->arr[hpos] = heap->arr[ppos];
+                               heap->arr[ppos] = lprops;
+                               lprops->hpos = ppos;
+                               hpos = ppos;
+                               if (!hpos)
+                                       return;
+                               ppos = (hpos - 1) / 2;
+                               val2 = get_heap_comp_val(heap->arr[ppos], cat);
+                               if (val1 <= val2)
+                                       return;
+                               /* Still greater than parent so keep going */
+                       }
+               }
+       }
+       /* Not greater than parent, so compare to children */
+       while (1) {
+               /* Compare to left child */
+               cpos = hpos * 2 + 1;
+               if (cpos >= heap->cnt)
+                       return;
+               val2 = get_heap_comp_val(heap->arr[cpos], cat);
+               if (val1 < val2) {
+                       /* Less than left child, so promote biggest child */
+                       if (cpos + 1 < heap->cnt) {
+                               val3 = get_heap_comp_val(heap->arr[cpos + 1],
+                                                        cat);
+                               if (val3 > val2)
+                                       cpos += 1; /* Right child is bigger */
+                       }
+                       heap->arr[cpos]->hpos = hpos;
+                       heap->arr[hpos] = heap->arr[cpos];
+                       heap->arr[cpos] = lprops;
+                       lprops->hpos = cpos;
+                       hpos = cpos;
+                       continue;
+               }
+               /* Compare to right child */
+               cpos += 1;
+               if (cpos >= heap->cnt)
+                       return;
+               val3 = get_heap_comp_val(heap->arr[cpos], cat);
+               if (val1 < val3) {
+                       /* Less than right child, so promote right child */
+                       heap->arr[cpos]->hpos = hpos;
+                       heap->arr[hpos] = heap->arr[cpos];
+                       heap->arr[cpos] = lprops;
+                       lprops->hpos = cpos;
+                       hpos = cpos;
+                       continue;
+               }
+               return;
+       }
+}
+
+/**
+ * add_to_lpt_heap - add LEB properties to a LEB category heap.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties to add
+ * @cat: LEB category
+ *
+ * This function returns %1 if @lprops is added to the heap for LEB category
+ * @cat, otherwise %0 is returned because the heap is full.
+ */
+static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops,
+                          int cat)
+{
+       struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
+
+       if (heap->cnt >= heap->max_cnt) {
+               const int b = LPT_HEAP_SZ / 2 - 1;
+               int cpos, val1, val2;
+
+               /* Compare to some other LEB on the bottom of heap */
+               /* Pick a position kind of randomly */
+               cpos = (((size_t)lprops >> 4) & b) + b;
+               ubifs_assert(cpos >= b);
+               ubifs_assert(cpos < LPT_HEAP_SZ);
+               ubifs_assert(cpos < heap->cnt);
+
+               val1 = get_heap_comp_val(lprops, cat);
+               val2 = get_heap_comp_val(heap->arr[cpos], cat);
+               if (val1 > val2) {
+                       struct ubifs_lprops *lp;
+
+                       lp = heap->arr[cpos];
+                       lp->flags &= ~LPROPS_CAT_MASK;
+                       lp->flags |= LPROPS_UNCAT;
+                       list_add(&lp->list, &c->uncat_list);
+                       lprops->hpos = cpos;
+                       heap->arr[cpos] = lprops;
+                       move_up_lpt_heap(c, heap, lprops, cat);
+                       dbg_check_heap(c, heap, cat, lprops->hpos);
+                       return 1; /* Added to heap */
+               }
+               dbg_check_heap(c, heap, cat, -1);
+               return 0; /* Not added to heap */
+       } else {
+               lprops->hpos = heap->cnt++;
+               heap->arr[lprops->hpos] = lprops;
+               move_up_lpt_heap(c, heap, lprops, cat);
+               dbg_check_heap(c, heap, cat, lprops->hpos);
+               return 1; /* Added to heap */
+       }
+}
+
+/**
+ * remove_from_lpt_heap - remove LEB properties from a LEB category heap.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties to remove
+ * @cat: LEB category
+ */
+static void remove_from_lpt_heap(struct ubifs_info *c,
+                                struct ubifs_lprops *lprops, int cat)
+{
+       struct ubifs_lpt_heap *heap;
+       int hpos = lprops->hpos;
+
+       heap = &c->lpt_heap[cat - 1];
+       ubifs_assert(hpos >= 0 && hpos < heap->cnt);
+       ubifs_assert(heap->arr[hpos] == lprops);
+       heap->cnt -= 1;
+       if (hpos < heap->cnt) {
+               heap->arr[hpos] = heap->arr[heap->cnt];
+               heap->arr[hpos]->hpos = hpos;
+               adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat);
+       }
+       dbg_check_heap(c, heap, cat, -1);
+}
+
+/**
+ * lpt_heap_replace - replace lprops in a category heap.
+ * @c: UBIFS file-system description object
+ * @old_lprops: LEB properties to replace
+ * @new_lprops: LEB properties with which to replace
+ * @cat: LEB category
+ *
+ * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode)
+ * and the lprops that the pnode contains.  When that happens, references in
+ * the category heaps to those lprops must be updated to point to the new
+ * lprops.  This function does that.
+ */
+static void lpt_heap_replace(struct ubifs_info *c,
+                            struct ubifs_lprops *old_lprops,
+                            struct ubifs_lprops *new_lprops, int cat)
+{
+       struct ubifs_lpt_heap *heap;
+       int hpos = new_lprops->hpos;
+
+       heap = &c->lpt_heap[cat - 1];
+       heap->arr[hpos] = new_lprops;
+}
+
+/**
+ * ubifs_add_to_cat - add LEB properties to a category list or heap.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties to add
+ * @cat: LEB category to which to add
+ *
+ * LEB properties are categorized to enable fast find operations.
+ */
+void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
+                     int cat)
+{
+       switch (cat) {
+       case LPROPS_DIRTY:
+       case LPROPS_DIRTY_IDX:
+       case LPROPS_FREE:
+               if (add_to_lpt_heap(c, lprops, cat))
+                       break;
+               /* No more room on heap so make it uncategorized */
+               cat = LPROPS_UNCAT;
+               /* Fall through */
+       case LPROPS_UNCAT:
+               list_add(&lprops->list, &c->uncat_list);
+               break;
+       case LPROPS_EMPTY:
+               list_add(&lprops->list, &c->empty_list);
+               break;
+       case LPROPS_FREEABLE:
+               list_add(&lprops->list, &c->freeable_list);
+               c->freeable_cnt += 1;
+               break;
+       case LPROPS_FRDI_IDX:
+               list_add(&lprops->list, &c->frdi_idx_list);
+               break;
+       default:
+               ubifs_assert(0);
+       }
+       lprops->flags &= ~LPROPS_CAT_MASK;
+       lprops->flags |= cat;
+}
+
+/**
+ * ubifs_remove_from_cat - remove LEB properties from a category list or heap.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties to remove
+ * @cat: LEB category from which to remove
+ *
+ * LEB properties are categorized to enable fast find operations.
+ */
+static void ubifs_remove_from_cat(struct ubifs_info *c,
+                                 struct ubifs_lprops *lprops, int cat)
+{
+       switch (cat) {
+       case LPROPS_DIRTY:
+       case LPROPS_DIRTY_IDX:
+       case LPROPS_FREE:
+               remove_from_lpt_heap(c, lprops, cat);
+               break;
+       case LPROPS_FREEABLE:
+               c->freeable_cnt -= 1;
+               ubifs_assert(c->freeable_cnt >= 0);
+               /* Fall through */
+       case LPROPS_UNCAT:
+       case LPROPS_EMPTY:
+       case LPROPS_FRDI_IDX:
+               ubifs_assert(!list_empty(&lprops->list));
+               list_del(&lprops->list);
+               break;
+       default:
+               ubifs_assert(0);
+       }
+}
+
+/**
+ * ubifs_replace_cat - replace lprops in a category list or heap.
+ * @c: UBIFS file-system description object
+ * @old_lprops: LEB properties to replace
+ * @new_lprops: LEB properties with which to replace
+ *
+ * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode)
+ * and the lprops that the pnode contains. When that happens, references in
+ * category lists and heaps must be replaced. This function does that.
+ */
+void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
+                      struct ubifs_lprops *new_lprops)
+{
+       int cat;
+
+       cat = new_lprops->flags & LPROPS_CAT_MASK;
+       switch (cat) {
+       case LPROPS_DIRTY:
+       case LPROPS_DIRTY_IDX:
+       case LPROPS_FREE:
+               lpt_heap_replace(c, old_lprops, new_lprops, cat);
+               break;
+       case LPROPS_UNCAT:
+       case LPROPS_EMPTY:
+       case LPROPS_FREEABLE:
+       case LPROPS_FRDI_IDX:
+               list_replace(&old_lprops->list, &new_lprops->list);
+               break;
+       default:
+               ubifs_assert(0);
+       }
+}
+
+/**
+ * ubifs_ensure_cat - ensure LEB properties are categorized.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties
+ *
+ * A LEB may have fallen off of the bottom of a heap, and ended up as
+ * uncategorized even though it has enough space for us now. If that is the case
+ * this function will put the LEB back onto a heap.
+ */
+void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
+{
+       int cat = lprops->flags & LPROPS_CAT_MASK;
+
+       if (cat != LPROPS_UNCAT)
+               return;
+       cat = ubifs_categorize_lprops(c, lprops);
+       if (cat == LPROPS_UNCAT)
+               return;
+       ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT);
+       ubifs_add_to_cat(c, lprops, cat);
+}
+
+/**
+ * ubifs_categorize_lprops - categorize LEB properties.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties to categorize
+ *
+ * LEB properties are categorized to enable fast find operations. This function
+ * returns the LEB category to which the LEB properties belong. Note however
+ * that if the LEB category is stored as a heap and the heap is full, the
+ * LEB properties may have their category changed to %LPROPS_UNCAT.
+ */
+int ubifs_categorize_lprops(const struct ubifs_info *c,
+                           const struct ubifs_lprops *lprops)
+{
+       if (lprops->flags & LPROPS_TAKEN)
+               return LPROPS_UNCAT;
+
+       if (lprops->free == c->leb_size) {
+               ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+               return LPROPS_EMPTY;
+       }
+
+       if (lprops->free + lprops->dirty == c->leb_size) {
+               if (lprops->flags & LPROPS_INDEX)
+                       return LPROPS_FRDI_IDX;
+               else
+                       return LPROPS_FREEABLE;
+       }
+
+       if (lprops->flags & LPROPS_INDEX) {
+               if (lprops->dirty + lprops->free >= c->min_idx_node_sz)
+                       return LPROPS_DIRTY_IDX;
+       } else {
+               if (lprops->dirty >= c->dead_wm &&
+                   lprops->dirty > lprops->free)
+                       return LPROPS_DIRTY;
+               if (lprops->free > 0)
+                       return LPROPS_FREE;
+       }
+
+       return LPROPS_UNCAT;
+}
+
+/**
+ * change_category - change LEB properties category.
+ * @c: UBIFS file-system description object
+ * @lprops: LEB properties to recategorize
+ *
+ * LEB properties are categorized to enable fast find operations. When the LEB
+ * properties change they must be recategorized.
+ */
+static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
+{
+       int old_cat = lprops->flags & LPROPS_CAT_MASK;
+       int new_cat = ubifs_categorize_lprops(c, lprops);
+
+       if (old_cat == new_cat) {
+               struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1];
+
+               /* lprops on a heap now must be moved up or down */
+               if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT)
+                       return; /* Not on a heap */
+               heap = &c->lpt_heap[new_cat - 1];
+               adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat);
+       } else {
+               ubifs_remove_from_cat(c, lprops, old_cat);
+               ubifs_add_to_cat(c, lprops, new_cat);
+       }
+}
+
+/**
+ * ubifs_get_lprops - get reference to LEB properties.
+ * @c: the UBIFS file-system description object
+ *
+ * This function locks lprops. Lprops have to be unlocked by
+ * 'ubifs_release_lprops()'.
+ */
+void ubifs_get_lprops(struct ubifs_info *c)
+{
+       mutex_lock(&c->lp_mutex);
+}
+
+/**
+ * calc_dark - calculate LEB dark space size.
+ * @c: the UBIFS file-system description object
+ * @spc: amount of free and dirty space in the LEB
+ *
+ * This function calculates amount of dark space in an LEB which has @spc bytes
+ * of free and dirty space. Returns the calculations result.
+ *
+ * Dark space is the space which is not always usable - it depends on which
+ * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
+ * it is dark space, because it cannot fit a large data node. So UBIFS cannot
+ * count on this LEB and treat these 512 bytes as usable because it is not true
+ * if, for example, only big chunks of uncompressible data will be written to
+ * the FS.
+ */
+static int calc_dark(struct ubifs_info *c, int spc)
+{
+       ubifs_assert(!(spc & 7));
+
+       if (spc < c->dark_wm)
+               return spc;
+
+       /*
+        * If we have slightly more space then the dark space watermark, we can
+        * anyway safely assume it we'll be able to write a node of the
+        * smallest size there.
+        */
+       if (spc - c->dark_wm < MIN_WRITE_SZ)
+               return spc - MIN_WRITE_SZ;
+
+       return c->dark_wm;
+}
+
+/**
+ * is_lprops_dirty - determine if LEB properties are dirty.
+ * @c: the UBIFS file-system description object
+ * @lprops: LEB properties to test
+ */
+static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
+{
+       struct ubifs_pnode *pnode;
+       int pos;
+
+       pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1);
+       pnode = (struct ubifs_pnode *)container_of(lprops - pos,
+                                                  struct ubifs_pnode,
+                                                  lprops[0]);
+       return !test_bit(COW_ZNODE, &pnode->flags) &&
+              test_bit(DIRTY_CNODE, &pnode->flags);
+}
+
+/**
+ * ubifs_change_lp - change LEB properties.
+ * @c: the UBIFS file-system description object
+ * @lp: LEB properties to change
+ * @free: new free space amount
+ * @dirty: new dirty space amount
+ * @flags: new flags
+ * @idx_gc_cnt: change to the count of idx_gc list
+ *
+ * This function changes LEB properties. This function does not change a LEB
+ * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC.
+ *
+ * This function returns a pointer to the updated LEB properties on success
+ * and a negative error code on failure. N.B. the LEB properties may have had to
+ * be copied (due to COW) and consequently the pointer returned may not be the
+ * same as the pointer passed.
+ */
+const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
+                                          const struct ubifs_lprops *lp,
+                                          int free, int dirty, int flags,
+                                          int idx_gc_cnt)
+{
+       /*
+        * This is the only function that is allowed to change lprops, so we
+        * discard the const qualifier.
+        */
+       struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
+
+       dbg_lp("LEB %d, free %d, dirty %d, flags %d",
+              lprops->lnum, free, dirty, flags);
+
+       ubifs_assert(mutex_is_locked(&c->lp_mutex));
+       ubifs_assert(c->lst.empty_lebs >= 0 &&
+                    c->lst.empty_lebs <= c->main_lebs);
+       ubifs_assert(c->freeable_cnt >= 0);
+       ubifs_assert(c->freeable_cnt <= c->main_lebs);
+       ubifs_assert(c->lst.taken_empty_lebs >= 0);
+       ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs);
+       ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7));
+       ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7));
+       ubifs_assert(!(c->lst.total_used & 7));
+       ubifs_assert(free == LPROPS_NC || free >= 0);
+       ubifs_assert(dirty == LPROPS_NC || dirty >= 0);
+
+       if (!is_lprops_dirty(c, lprops)) {
+               lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum);
+               if (IS_ERR(lprops))
+                       return lprops;
+       } else
+               ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum));
+
+       ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
+
+       spin_lock(&c->space_lock);
+
+       if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
+               c->lst.taken_empty_lebs -= 1;
+
+       if (!(lprops->flags & LPROPS_INDEX)) {
+               int old_spc;
+
+               old_spc = lprops->free + lprops->dirty;
+               if (old_spc < c->dead_wm)
+                       c->lst.total_dead -= old_spc;
+               else
+                       c->lst.total_dark -= calc_dark(c, old_spc);
+
+               c->lst.total_used -= c->leb_size - old_spc;
+       }
+
+       if (free != LPROPS_NC) {
+               free = ALIGN(free, 8);
+               c->lst.total_free += free - lprops->free;
+
+               /* Increase or decrease empty LEBs counter if needed */
+               if (free == c->leb_size) {
+                       if (lprops->free != c->leb_size)
+                               c->lst.empty_lebs += 1;
+               } else if (lprops->free == c->leb_size)
+                       c->lst.empty_lebs -= 1;
+               lprops->free = free;
+       }
+
+       if (dirty != LPROPS_NC) {
+               dirty = ALIGN(dirty, 8);
+               c->lst.total_dirty += dirty - lprops->dirty;
+               lprops->dirty = dirty;
+       }
+
+       if (flags != LPROPS_NC) {
+               /* Take care about indexing LEBs counter if needed */
+               if ((lprops->flags & LPROPS_INDEX)) {
+                       if (!(flags & LPROPS_INDEX))
+                               c->lst.idx_lebs -= 1;
+               } else if (flags & LPROPS_INDEX)
+                       c->lst.idx_lebs += 1;
+               lprops->flags = flags;
+       }
+
+       if (!(lprops->flags & LPROPS_INDEX)) {
+               int new_spc;
+
+               new_spc = lprops->free + lprops->dirty;
+               if (new_spc < c->dead_wm)
+                       c->lst.total_dead += new_spc;
+               else
+                       c->lst.total_dark += calc_dark(c, new_spc);
+
+               c->lst.total_used += c->leb_size - new_spc;
+       }
+
+       if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
+               c->lst.taken_empty_lebs += 1;
+
+       change_category(c, lprops);
+
+       c->idx_gc_cnt += idx_gc_cnt;
+
+       spin_unlock(&c->space_lock);
+
+       return lprops;
+}
+
+/**
+ * ubifs_release_lprops - release lprops lock.
+ * @c: the UBIFS file-system description object
+ *
+ * This function has to be called after each 'ubifs_get_lprops()' call to
+ * unlock lprops.
+ */
+void ubifs_release_lprops(struct ubifs_info *c)
+{
+       ubifs_assert(mutex_is_locked(&c->lp_mutex));
+       ubifs_assert(c->lst.empty_lebs >= 0 &&
+                    c->lst.empty_lebs <= c->main_lebs);
+
+       mutex_unlock(&c->lp_mutex);
+}
+
+/**
+ * ubifs_get_lp_stats - get lprops statistics.
+ * @c: UBIFS file-system description object
+ * @st: return statistics
+ */
+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st)
+{
+       spin_lock(&c->space_lock);
+       memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats));
+       spin_unlock(&c->space_lock);
+}
+
+/**
+ * ubifs_change_one_lp - change LEB properties.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB to change properties for
+ * @free: amount of free space
+ * @dirty: amount of dirty space
+ * @flags_set: flags to set
+ * @flags_clean: flags to clean
+ * @idx_gc_cnt: change to the count of idx_gc list
+ *
+ * This function changes properties of LEB @lnum. It is a helper wrapper over
+ * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the
+ * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
+                       int flags_set, int flags_clean, int idx_gc_cnt)
+{
+       int err = 0, flags;
+       const struct ubifs_lprops *lp;
+
+       ubifs_get_lprops(c);
+
+       lp = ubifs_lpt_lookup_dirty(c, lnum);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+
+       flags = (lp->flags | flags_set) & ~flags_clean;
+       lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt);
+       if (IS_ERR(lp))
+               err = PTR_ERR(lp);
+
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * ubifs_update_one_lp - update LEB properties.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB to change properties for
+ * @free: amount of free space
+ * @dirty: amount of dirty space to add
+ * @flags_set: flags to set
+ * @flags_clean: flags to clean
+ *
+ * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to
+ * current dirty space, not substitutes it.
+ */
+int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
+                       int flags_set, int flags_clean)
+{
+       int err = 0, flags;
+       const struct ubifs_lprops *lp;
+
+       ubifs_get_lprops(c);
+
+       lp = ubifs_lpt_lookup_dirty(c, lnum);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+
+       flags = (lp->flags | flags_set) & ~flags_clean;
+       lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0);
+       if (IS_ERR(lp))
+               err = PTR_ERR(lp);
+
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * ubifs_read_one_lp - read LEB properties.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB to read properties for
+ * @lp: where to store read properties
+ *
+ * This helper function reads properties of a LEB @lnum and stores them in @lp.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
+{
+       int err = 0;
+       const struct ubifs_lprops *lpp;
+
+       ubifs_get_lprops(c);
+
+       lpp = ubifs_lpt_lookup(c, lnum);
+       if (IS_ERR(lpp)) {
+               err = PTR_ERR(lpp);
+               goto out;
+       }
+
+       memcpy(lp, lpp, sizeof(struct ubifs_lprops));
+
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * ubifs_fast_find_free - try to find a LEB with free space quickly.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns LEB properties for a LEB with free space or %NULL if
+ * the function is unable to find a LEB quickly.
+ */
+const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+       struct ubifs_lpt_heap *heap;
+
+       ubifs_assert(mutex_is_locked(&c->lp_mutex));
+
+       heap = &c->lpt_heap[LPROPS_FREE - 1];
+       if (heap->cnt == 0)
+               return NULL;
+
+       lprops = heap->arr[0];
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+       return lprops;
+}
+
+/**
+ * ubifs_fast_find_empty - try to find an empty LEB quickly.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns LEB properties for an empty LEB or %NULL if the
+ * function is unable to find an empty LEB quickly.
+ */
+const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+
+       ubifs_assert(mutex_is_locked(&c->lp_mutex));
+
+       if (list_empty(&c->empty_list))
+               return NULL;
+
+       lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list);
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+       ubifs_assert(lprops->free == c->leb_size);
+       return lprops;
+}
+
+/**
+ * ubifs_fast_find_freeable - try to find a freeable LEB quickly.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns LEB properties for a freeable LEB or %NULL if the
+ * function is unable to find a freeable LEB quickly.
+ */
+const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+
+       ubifs_assert(mutex_is_locked(&c->lp_mutex));
+
+       if (list_empty(&c->freeable_list))
+               return NULL;
+
+       lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list);
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+       ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
+       ubifs_assert(c->freeable_cnt > 0);
+       return lprops;
+}
+
+/**
+ * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns LEB properties for a freeable index LEB or %NULL if the
+ * function is unable to find a freeable index LEB quickly.
+ */
+const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+
+       ubifs_assert(mutex_is_locked(&c->lp_mutex));
+
+       if (list_empty(&c->frdi_idx_list))
+               return NULL;
+
+       lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list);
+       ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
+       ubifs_assert((lprops->flags & LPROPS_INDEX));
+       ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
+       return lprops;
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+/**
+ * dbg_check_cats - check category heaps and lists.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_check_cats(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+       struct list_head *pos;
+       int i, cat;
+
+       if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+               return 0;
+
+       list_for_each_entry(lprops, &c->empty_list, list) {
+               if (lprops->free != c->leb_size) {
+                       ubifs_err("non-empty LEB %d on empty list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+               if (lprops->flags & LPROPS_TAKEN) {
+                       ubifs_err("taken LEB %d on empty list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+       }
+
+       i = 0;
+       list_for_each_entry(lprops, &c->freeable_list, list) {
+               if (lprops->free + lprops->dirty != c->leb_size) {
+                       ubifs_err("non-freeable LEB %d on freeable list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+               if (lprops->flags & LPROPS_TAKEN) {
+                       ubifs_err("taken LEB %d on freeable list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+               i += 1;
+       }
+       if (i != c->freeable_cnt) {
+               ubifs_err("freeable list count %d expected %d", i,
+                         c->freeable_cnt);
+               return -EINVAL;
+       }
+
+       i = 0;
+       list_for_each(pos, &c->idx_gc)
+               i += 1;
+       if (i != c->idx_gc_cnt) {
+               ubifs_err("idx_gc list count %d expected %d", i,
+                         c->idx_gc_cnt);
+               return -EINVAL;
+       }
+
+       list_for_each_entry(lprops, &c->frdi_idx_list, list) {
+               if (lprops->free + lprops->dirty != c->leb_size) {
+                       ubifs_err("non-freeable LEB %d on frdi_idx list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+               if (lprops->flags & LPROPS_TAKEN) {
+                       ubifs_err("taken LEB %d on frdi_idx list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+               if (!(lprops->flags & LPROPS_INDEX)) {
+                       ubifs_err("non-index LEB %d on frdi_idx list "
+                                 "(free %d dirty %d flags %d)", lprops->lnum,
+                                 lprops->free, lprops->dirty, lprops->flags);
+                       return -EINVAL;
+               }
+       }
+
+       for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) {
+               struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
+
+               for (i = 0; i < heap->cnt; i++) {
+                       lprops = heap->arr[i];
+                       if (!lprops) {
+                               ubifs_err("null ptr in LPT heap cat %d", cat);
+                               return -EINVAL;
+                       }
+                       if (lprops->hpos != i) {
+                               ubifs_err("bad ptr in LPT heap cat %d", cat);
+                               return -EINVAL;
+                       }
+                       if (lprops->flags & LPROPS_TAKEN) {
+                               ubifs_err("taken LEB in LPT heap cat %d", cat);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
+                   int add_pos)
+{
+       int i = 0, j, err = 0;
+
+       if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+               return;
+
+       for (i = 0; i < heap->cnt; i++) {
+               struct ubifs_lprops *lprops = heap->arr[i];
+               struct ubifs_lprops *lp;
+
+               if (i != add_pos)
+                       if ((lprops->flags & LPROPS_CAT_MASK) != cat) {
+                               err = 1;
+                               goto out;
+                       }
+               if (lprops->hpos != i) {
+                       err = 2;
+                       goto out;
+               }
+               lp = ubifs_lpt_lookup(c, lprops->lnum);
+               if (IS_ERR(lp)) {
+                       err = 3;
+                       goto out;
+               }
+               if (lprops != lp) {
+                       dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
+                               (size_t)lprops, (size_t)lp, lprops->lnum,
+                               lp->lnum);
+                       err = 4;
+                       goto out;
+               }
+               for (j = 0; j < i; j++) {
+                       lp = heap->arr[j];
+                       if (lp == lprops) {
+                               err = 5;
+                               goto out;
+                       }
+                       if (lp->lnum == lprops->lnum) {
+                               err = 6;
+                               goto out;
+                       }
+               }
+       }
+out:
+       if (err) {
+               dbg_msg("failed cat %d hpos %d err %d", cat, i, err);
+               dbg_dump_stack();
+               dbg_dump_heap(c, heap, cat);
+       }
+}
+
+/**
+ * struct scan_check_data - data provided to scan callback function.
+ * @lst: LEB properties statistics
+ * @err: error code
+ */
+struct scan_check_data {
+       struct ubifs_lp_stats lst;
+       int err;
+};
+
+/**
+ * scan_check_cb - scan callback.
+ * @c: the UBIFS file-system description object
+ * @lp: LEB properties to scan
+ * @in_tree: whether the LEB properties are in main memory
+ * @data: information passed to and from the caller of the scan
+ *
+ * This function returns a code that indicates whether the scan should continue
+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
+ * (%LPT_SCAN_STOP).
+ */
+static int scan_check_cb(struct ubifs_info *c,
+                        const struct ubifs_lprops *lp, int in_tree,
+                        struct scan_check_data *data)
+{
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       struct ubifs_lp_stats *lst = &data->lst;
+       int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty;
+
+       cat = lp->flags & LPROPS_CAT_MASK;
+       if (cat != LPROPS_UNCAT) {
+               cat = ubifs_categorize_lprops(c, lp);
+               if (cat != (lp->flags & LPROPS_CAT_MASK)) {
+                       ubifs_err("bad LEB category %d expected %d",
+                                 (lp->flags & LPROPS_CAT_MASK), cat);
+                       goto out;
+               }
+       }
+
+       /* Check lp is on its category list (if it has one) */
+       if (in_tree) {
+               struct list_head *list = NULL;
+
+               switch (cat) {
+               case LPROPS_EMPTY:
+                       list = &c->empty_list;
+                       break;
+               case LPROPS_FREEABLE:
+                       list = &c->freeable_list;
+                       break;
+               case LPROPS_FRDI_IDX:
+                       list = &c->frdi_idx_list;
+                       break;
+               case LPROPS_UNCAT:
+                       list = &c->uncat_list;
+                       break;
+               }
+               if (list) {
+                       struct ubifs_lprops *lprops;
+                       int found = 0;
+
+                       list_for_each_entry(lprops, list, list) {
+                               if (lprops == lp) {
+                                       found = 1;
+                                       break;
+                               }
+                       }
+                       if (!found) {
+                               ubifs_err("bad LPT list (category %d)", cat);
+                               goto out;
+                       }
+               }
+       }
+
+       /* Check lp is on its category heap (if it has one) */
+       if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) {
+               struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
+
+               if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
+                   lp != heap->arr[lp->hpos]) {
+                       ubifs_err("bad LPT heap (category %d)", cat);
+                       goto out;
+               }
+       }
+
+       sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+       if (IS_ERR(sleb)) {
+               /*
+                * After an unclean unmount, empty and freeable LEBs
+                * may contain garbage.
+                */
+               if (lp->free == c->leb_size) {
+                       ubifs_err("scan errors were in empty LEB "
+                                 "- continuing checking");
+                       lst->empty_lebs += 1;
+                       lst->total_free += c->leb_size;
+                       lst->total_dark += calc_dark(c, c->leb_size);
+                       return LPT_SCAN_CONTINUE;
+               }
+
+               if (lp->free + lp->dirty == c->leb_size &&
+                   !(lp->flags & LPROPS_INDEX)) {
+                       ubifs_err("scan errors were in freeable LEB "
+                                 "- continuing checking");
+                       lst->total_free  += lp->free;
+                       lst->total_dirty += lp->dirty;
+                       lst->total_dark  +=  calc_dark(c, c->leb_size);
+                       return LPT_SCAN_CONTINUE;
+               }
+               data->err = PTR_ERR(sleb);
+               return LPT_SCAN_STOP;
+       }
+
+       is_idx = -1;
+       list_for_each_entry(snod, &sleb->nodes, list) {
+               int found, level = 0;
+
+               cond_resched();
+
+               if (is_idx == -1)
+                       is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0;
+
+               if (is_idx && snod->type != UBIFS_IDX_NODE) {
+                       ubifs_err("indexing node in data LEB %d:%d",
+                                 lnum, snod->offs);
+                       goto out_destroy;
+               }
+
+               if (snod->type == UBIFS_IDX_NODE) {
+                       struct ubifs_idx_node *idx = snod->node;
+
+                       key_read(c, ubifs_idx_key(c, idx), &snod->key);
+                       level = le16_to_cpu(idx->level);
+               }
+
+               found = ubifs_tnc_has_node(c, &snod->key, level, lnum,
+                                          snod->offs, is_idx);
+               if (found) {
+                       if (found < 0)
+                               goto out_destroy;
+                       used += ALIGN(snod->len, 8);
+               }
+       }
+
+       free = c->leb_size - sleb->endpt;
+       dirty = sleb->endpt - used;
+
+       if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
+           dirty < 0) {
+               ubifs_err("bad calculated accounting for LEB %d: "
+                         "free %d, dirty %d", lnum, free, dirty);
+               goto out_destroy;
+       }
+
+       if (lp->free + lp->dirty == c->leb_size &&
+           free + dirty == c->leb_size)
+               if ((is_idx && !(lp->flags & LPROPS_INDEX)) ||
+                   (!is_idx && free == c->leb_size) ||
+                   lp->free == c->leb_size) {
+                       /*
+                        * Empty or freeable LEBs could contain index
+                        * nodes from an uncompleted commit due to an
+                        * unclean unmount. Or they could be empty for
+                        * the same reason. Or it may simply not have been
+                        * unmapped.
+                        */
+                       free = lp->free;
+                       dirty = lp->dirty;
+                       is_idx = 0;
+                   }
+
+       if (is_idx && lp->free + lp->dirty == free + dirty &&
+           lnum != c->ihead_lnum) {
+               /*
+                * After an unclean unmount, an index LEB could have a different
+                * amount of free space than the value recorded by lprops. That
+                * is because the in-the-gaps method may use free space or
+                * create free space (as a side-effect of using ubi_leb_change
+                * and not writing the whole LEB). The incorrect free space
+                * value is not a problem because the index is only ever
+                * allocated empty LEBs, so there will never be an attempt to
+                * write to the free space at the end of an index LEB - except
+                * by the in-the-gaps method for which it is not a problem.
+                */
+               free = lp->free;
+               dirty = lp->dirty;
+       }
+
+       if (lp->free != free || lp->dirty != dirty)
+               goto out_print;
+
+       if (is_idx && !(lp->flags & LPROPS_INDEX)) {
+               if (free == c->leb_size)
+                       /* Free but not unmapped LEB, it's fine */
+                       is_idx = 0;
+               else {
+                       ubifs_err("indexing node without indexing "
+                                 "flag");
+                       goto out_print;
+               }
+       }
+
+       if (!is_idx && (lp->flags & LPROPS_INDEX)) {
+               ubifs_err("data node with indexing flag");
+               goto out_print;
+       }
+
+       if (free == c->leb_size)
+               lst->empty_lebs += 1;
+
+       if (is_idx)
+               lst->idx_lebs += 1;
+
+       if (!(lp->flags & LPROPS_INDEX))
+               lst->total_used += c->leb_size - free - dirty;
+       lst->total_free += free;
+       lst->total_dirty += dirty;
+
+       if (!(lp->flags & LPROPS_INDEX)) {
+               int spc = free + dirty;
+
+               if (spc < c->dead_wm)
+                       lst->total_dead += spc;
+               else
+                       lst->total_dark += calc_dark(c, spc);
+       }
+
+       ubifs_scan_destroy(sleb);
+
+       return LPT_SCAN_CONTINUE;
+
+out_print:
+       ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
+                 "should be free %d, dirty %d",
+                 lnum, lp->free, lp->dirty, lp->flags, free, dirty);
+       dbg_dump_leb(c, lnum);
+out_destroy:
+       ubifs_scan_destroy(sleb);
+out:
+       data->err = -EINVAL;
+       return LPT_SCAN_STOP;
+}
+
+/**
+ * dbg_check_lprops - check all LEB properties.
+ * @c: UBIFS file-system description object
+ *
+ * This function checks all LEB properties and makes sure they are all correct.
+ * It returns zero if everything is fine, %-EINVAL if there is an inconsistency
+ * and other negative error codes in case of other errors. This function is
+ * called while the file system is locked (because of commit start), so no
+ * additional locking is required. Note that locking the LPT mutex would cause
+ * a circular lock dependency with the TNC mutex.
+ */
+int dbg_check_lprops(struct ubifs_info *c)
+{
+       int i, err;
+       struct scan_check_data data;
+       struct ubifs_lp_stats *lst = &data.lst;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+               return 0;
+
+       /*
+        * As we are going to scan the media, the write buffers have to be
+        * synchronized.
+        */
+       for (i = 0; i < c->jhead_cnt; i++) {
+               err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+               if (err)
+                       return err;
+       }
+
+       memset(lst, 0, sizeof(struct ubifs_lp_stats));
+
+       data.err = 0;
+       err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
+                                   (ubifs_lpt_scan_callback)scan_check_cb,
+                                   &data);
+       if (err && err != -ENOSPC)
+               goto out;
+       if (data.err) {
+               err = data.err;
+               goto out;
+       }
+
+       if (lst->empty_lebs != c->lst.empty_lebs ||
+           lst->idx_lebs != c->lst.idx_lebs ||
+           lst->total_free != c->lst.total_free ||
+           lst->total_dirty != c->lst.total_dirty ||
+           lst->total_used != c->lst.total_used) {
+               ubifs_err("bad overall accounting");
+               ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
+                         "total_free %lld, total_dirty %lld, total_used %lld",
+                         lst->empty_lebs, lst->idx_lebs, lst->total_free,
+                         lst->total_dirty, lst->total_used);
+               ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
+                         "total_free %lld, total_dirty %lld, total_used %lld",
+                         c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
+                         c->lst.total_dirty, c->lst.total_used);
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (lst->total_dead != c->lst.total_dead ||
+           lst->total_dark != c->lst.total_dark) {
+               ubifs_err("bad dead/dark space accounting");
+               ubifs_err("calculated: total_dead %lld, total_dark %lld",
+                         lst->total_dead, lst->total_dark);
+               ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
+                         c->lst.total_dead, c->lst.total_dark);
+               err = -EINVAL;
+               goto out;
+       }
+
+       err = dbg_check_cats(c);
+out:
+       return err;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c

new file mode 100644 (file)

index 0000000..9ff2463
--- /dev/null
+++ b/fs/ubifs/lpt.c
@@ -0,0 +1,2243 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements the LEB properties tree (LPT) area. The LPT area
+ * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and
+ * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits
+ * between the log and the orphan area.
+ *
+ * The LPT area is like a miniature self-contained file system. It is required
+ * that it never runs out of space, is fast to access and update, and scales
+ * logarithmically. The LEB properties tree is implemented as a wandering tree
+ * much like the TNC, and the LPT area has its own garbage collection.
+ *
+ * The LPT has two slightly different forms called the "small model" and the
+ * "big model". The small model is used when the entire LEB properties table
+ * can be written into a single eraseblock. In that case, garbage collection
+ * consists of just writing the whole table, which therefore makes all other
+ * eraseblocks reusable. In the case of the big model, dirty eraseblocks are
+ * selected for garbage collection, which consists are marking the nodes in
+ * that LEB as dirty, and then only the dirty nodes are written out. Also, in
+ * the case of the big model, a table of LEB numbers is saved so that the entire
+ * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
+ * mounted.
+ */
+
+#include <linux/crc16.h>
+#include "ubifs.h"
+
+/**
+ * do_calc_lpt_geom - calculate sizes for the LPT area.
+ * @c: the UBIFS file-system description object
+ *
+ * Calculate the sizes of LPT bit fields, nodes, and tree, based on the
+ * properties of the flash and whether LPT is "big" (c->big_lpt).
+ */
+static void do_calc_lpt_geom(struct ubifs_info *c)
+{
+       int i, n, bits, per_leb_wastage, max_pnode_cnt;
+       long long sz, tot_wastage;
+
+       n = c->main_lebs + c->max_leb_cnt - c->leb_cnt;
+       max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT);
+
+       c->lpt_hght = 1;
+       n = UBIFS_LPT_FANOUT;
+       while (n < max_pnode_cnt) {
+               c->lpt_hght += 1;
+               n <<= UBIFS_LPT_FANOUT_SHIFT;
+       }
+
+       c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
+
+       n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT);
+       c->nnode_cnt = n;
+       for (i = 1; i < c->lpt_hght; i++) {
+               n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT);
+               c->nnode_cnt += n;
+       }
+
+       c->space_bits = fls(c->leb_size) - 3;
+       c->lpt_lnum_bits = fls(c->lpt_lebs);
+       c->lpt_offs_bits = fls(c->leb_size - 1);
+       c->lpt_spc_bits = fls(c->leb_size);
+
+       n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT);
+       c->pcnt_bits = fls(n - 1);
+
+       c->lnum_bits = fls(c->max_leb_cnt - 1);
+
+       bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
+              (c->big_lpt ? c->pcnt_bits : 0) +
+              (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT;
+       c->pnode_sz = (bits + 7) / 8;
+
+       bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
+              (c->big_lpt ? c->pcnt_bits : 0) +
+              (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT;
+       c->nnode_sz = (bits + 7) / 8;
+
+       bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
+              c->lpt_lebs * c->lpt_spc_bits * 2;
+       c->ltab_sz = (bits + 7) / 8;
+
+       bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
+              c->lnum_bits * c->lsave_cnt;
+       c->lsave_sz = (bits + 7) / 8;
+
+       /* Calculate the minimum LPT size */
+       c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
+       c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
+       c->lpt_sz += c->ltab_sz;
+       c->lpt_sz += c->lsave_sz;
+
+       /* Add wastage */
+       sz = c->lpt_sz;
+       per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz);
+       sz += per_leb_wastage;
+       tot_wastage = per_leb_wastage;
+       while (sz > c->leb_size) {
+               sz += per_leb_wastage;
+               sz -= c->leb_size;
+               tot_wastage += per_leb_wastage;
+       }
+       tot_wastage += ALIGN(sz, c->min_io_size) - sz;
+       c->lpt_sz += tot_wastage;
+}
+
+/**
+ * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_calc_lpt_geom(struct ubifs_info *c)
+{
+       int lebs_needed;
+       uint64_t sz;
+
+       do_calc_lpt_geom(c);
+
+       /* Verify that lpt_lebs is big enough */
+       sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
+       sz += c->leb_size - 1;
+       do_div(sz, c->leb_size);
+       lebs_needed = sz;
+       if (lebs_needed > c->lpt_lebs) {
+               ubifs_err("too few LPT LEBs");
+               return -EINVAL;
+       }
+
+       /* Verify that ltab fits in a single LEB (since ltab is a single node */
+       if (c->ltab_sz > c->leb_size) {
+               ubifs_err("LPT ltab too big");
+               return -EINVAL;
+       }
+
+       c->check_lpt_free = c->big_lpt;
+
+       return 0;
+}
+
+/**
+ * calc_dflt_lpt_geom - calculate default LPT geometry.
+ * @c: the UBIFS file-system description object
+ * @main_lebs: number of main area LEBs is passed and returned here
+ * @big_lpt: whether the LPT area is "big" is returned here
+ *
+ * The size of the LPT area depends on parameters that themselves are dependent
+ * on the size of the LPT area. This function, successively recalculates the LPT
+ * area geometry until the parameters and resultant geometry are consistent.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
+                             int *big_lpt)
+{
+       int i, lebs_needed;
+       uint64_t sz;
+
+       /* Start by assuming the minimum number of LPT LEBs */
+       c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
+       c->main_lebs = *main_lebs - c->lpt_lebs;
+       if (c->main_lebs <= 0)
+               return -EINVAL;
+
+       /* And assume we will use the small LPT model */
+       c->big_lpt = 0;
+
+       /*
+        * Calculate the geometry based on assumptions above and then see if it
+        * makes sense
+        */
+       do_calc_lpt_geom(c);
+
+       /* Small LPT model must have lpt_sz < leb_size */
+       if (c->lpt_sz > c->leb_size) {
+               /* Nope, so try again using big LPT model */
+               c->big_lpt = 1;
+               do_calc_lpt_geom(c);
+       }
+
+       /* Now check there are enough LPT LEBs */
+       for (i = 0; i < 64 ; i++) {
+               sz = c->lpt_sz * 4; /* Allow 4 times the size */
+               sz += c->leb_size - 1;
+               do_div(sz, c->leb_size);
+               lebs_needed = sz;
+               if (lebs_needed > c->lpt_lebs) {
+                       /* Not enough LPT LEBs so try again with more */
+                       c->lpt_lebs = lebs_needed;
+                       c->main_lebs = *main_lebs - c->lpt_lebs;
+                       if (c->main_lebs <= 0)
+                               return -EINVAL;
+                       do_calc_lpt_geom(c);
+                       continue;
+               }
+               if (c->ltab_sz > c->leb_size) {
+                       ubifs_err("LPT ltab too big");
+                       return -EINVAL;
+               }
+               *main_lebs = c->main_lebs;
+               *big_lpt = c->big_lpt;
+               return 0;
+       }
+       return -EINVAL;
+}
+
+/**
+ * pack_bits - pack bit fields end-to-end.
+ * @addr: address at which to pack (passed and next address returned)
+ * @pos: bit position at which to pack (passed and next position returned)
+ * @val: value to pack
+ * @nrbits: number of bits of value to pack (1-32)
+ */
+static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits)
+{
+       uint8_t *p = *addr;
+       int b = *pos;
+
+       ubifs_assert(nrbits > 0);
+       ubifs_assert(nrbits <= 32);
+       ubifs_assert(*pos >= 0);
+       ubifs_assert(*pos < 8);
+       ubifs_assert((val >> nrbits) == 0 || nrbits == 32);
+       if (b) {
+               *p |= ((uint8_t)val) << b;
+               nrbits += b;
+               if (nrbits > 8) {
+                       *++p = (uint8_t)(val >>= (8 - b));
+                       if (nrbits > 16) {
+                               *++p = (uint8_t)(val >>= 8);
+                               if (nrbits > 24) {
+                                       *++p = (uint8_t)(val >>= 8);
+                                       if (nrbits > 32)
+                                               *++p = (uint8_t)(val >>= 8);
+                               }
+                       }
+               }
+       } else {
+               *p = (uint8_t)val;
+               if (nrbits > 8) {
+                       *++p = (uint8_t)(val >>= 8);
+                       if (nrbits > 16) {
+                               *++p = (uint8_t)(val >>= 8);
+                               if (nrbits > 24)
+                                       *++p = (uint8_t)(val >>= 8);
+                       }
+               }
+       }
+       b = nrbits & 7;
+       if (b == 0)
+               p++;
+       *addr = p;
+       *pos = b;
+}
+
+/**
+ * ubifs_unpack_bits - unpack bit fields.
+ * @addr: address at which to unpack (passed and next address returned)
+ * @pos: bit position at which to unpack (passed and next position returned)
+ * @nrbits: number of bits of value to unpack (1-32)
+ *
+ * This functions returns the value unpacked.
+ */
+uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
+{
+       const int k = 32 - nrbits;
+       uint8_t *p = *addr;
+       int b = *pos;
+       uint32_t val;
+
+       ubifs_assert(nrbits > 0);
+       ubifs_assert(nrbits <= 32);
+       ubifs_assert(*pos >= 0);
+       ubifs_assert(*pos < 8);
+       if (b) {
+               val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) |
+                     ((uint32_t)p[4] << 24);
+               val <<= (8 - b);
+               val |= *p >> b;
+               nrbits += b;
+       } else
+               val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
+                     ((uint32_t)p[3] << 24);
+       val <<= k;
+       val >>= k;
+       b = nrbits & 7;
+       p += nrbits / 8;
+       *addr = p;
+       *pos = b;
+       ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
+       return val;
+}
+
+/**
+ * ubifs_pack_pnode - pack all the bit fields of a pnode.
+ * @c: UBIFS file-system description object
+ * @buf: buffer into which to pack
+ * @pnode: pnode to pack
+ */
+void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
+                     struct ubifs_pnode *pnode)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0;
+       uint16_t crc;
+
+       pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS);
+       if (c->big_lpt)
+               pack_bits(&addr, &pos, pnode->num, c->pcnt_bits);
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               pack_bits(&addr, &pos, pnode->lprops[i].free >> 3,
+                         c->space_bits);
+               pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3,
+                         c->space_bits);
+               if (pnode->lprops[i].flags & LPROPS_INDEX)
+                       pack_bits(&addr, &pos, 1, 1);
+               else
+                       pack_bits(&addr, &pos, 0, 1);
+       }
+       crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
+                   c->pnode_sz - UBIFS_LPT_CRC_BYTES);
+       addr = buf;
+       pos = 0;
+       pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+}
+
+/**
+ * ubifs_pack_nnode - pack all the bit fields of a nnode.
+ * @c: UBIFS file-system description object
+ * @buf: buffer into which to pack
+ * @nnode: nnode to pack
+ */
+void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
+                     struct ubifs_nnode *nnode)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0;
+       uint16_t crc;
+
+       pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS);
+       if (c->big_lpt)
+               pack_bits(&addr, &pos, nnode->num, c->pcnt_bits);
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               int lnum = nnode->nbranch[i].lnum;
+
+               if (lnum == 0)
+                       lnum = c->lpt_last + 1;
+               pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits);
+               pack_bits(&addr, &pos, nnode->nbranch[i].offs,
+                         c->lpt_offs_bits);
+       }
+       crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
+                   c->nnode_sz - UBIFS_LPT_CRC_BYTES);
+       addr = buf;
+       pos = 0;
+       pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+}
+
+/**
+ * ubifs_pack_ltab - pack the LPT's own lprops table.
+ * @c: UBIFS file-system description object
+ * @buf: buffer into which to pack
+ * @ltab: LPT's own lprops table to pack
+ */
+void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
+                    struct ubifs_lpt_lprops *ltab)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0;
+       uint16_t crc;
+
+       pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS);
+       for (i = 0; i < c->lpt_lebs; i++) {
+               pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits);
+               pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits);
+       }
+       crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
+                   c->ltab_sz - UBIFS_LPT_CRC_BYTES);
+       addr = buf;
+       pos = 0;
+       pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+}
+
+/**
+ * ubifs_pack_lsave - pack the LPT's save table.
+ * @c: UBIFS file-system description object
+ * @buf: buffer into which to pack
+ * @lsave: LPT's save table to pack
+ */
+void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0;
+       uint16_t crc;
+
+       pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS);
+       for (i = 0; i < c->lsave_cnt; i++)
+               pack_bits(&addr, &pos, lsave[i], c->lnum_bits);
+       crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
+                   c->lsave_sz - UBIFS_LPT_CRC_BYTES);
+       addr = buf;
+       pos = 0;
+       pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+}
+
+/**
+ * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to which to add dirty space
+ * @dirty: amount of dirty space to add
+ */
+void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty)
+{
+       if (!dirty || !lnum)
+               return;
+       dbg_lp("LEB %d add %d to %d",
+              lnum, dirty, c->ltab[lnum - c->lpt_first].dirty);
+       ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
+       c->ltab[lnum - c->lpt_first].dirty += dirty;
+}
+
+/**
+ * set_ltab - set LPT LEB properties.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @free: amount of free space
+ * @dirty: amount of dirty space
+ */
+static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
+{
+       dbg_lp("LEB %d free %d dirty %d to %d %d",
+              lnum, c->ltab[lnum - c->lpt_first].free,
+              c->ltab[lnum - c->lpt_first].dirty, free, dirty);
+       ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
+       c->ltab[lnum - c->lpt_first].free = free;
+       c->ltab[lnum - c->lpt_first].dirty = dirty;
+}
+
+/**
+ * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties.
+ * @c: UBIFS file-system description object
+ * @nnode: nnode for which to add dirt
+ */
+void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode)
+{
+       struct ubifs_nnode *np = nnode->parent;
+
+       if (np)
+               ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum,
+                                  c->nnode_sz);
+       else {
+               ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz);
+               if (!(c->lpt_drty_flgs & LTAB_DIRTY)) {
+                       c->lpt_drty_flgs |= LTAB_DIRTY;
+                       ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz);
+               }
+       }
+}
+
+/**
+ * add_pnode_dirt - add dirty space to LPT LEB properties.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode for which to add dirt
+ */
+static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode)
+{
+       ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum,
+                          c->pnode_sz);
+}
+
+/**
+ * calc_nnode_num - calculate nnode number.
+ * @row: the row in the tree (root is zero)
+ * @col: the column in the row (leftmost is zero)
+ *
+ * The nnode number is a number that uniquely identifies a nnode and can be used
+ * easily to traverse the tree from the root to that nnode.
+ *
+ * This function calculates and returns the nnode number for the nnode at @row
+ * and @col.
+ */
+static int calc_nnode_num(int row, int col)
+{
+       int num, bits;
+
+       num = 1;
+       while (row--) {
+               bits = (col & (UBIFS_LPT_FANOUT - 1));
+               col >>= UBIFS_LPT_FANOUT_SHIFT;
+               num <<= UBIFS_LPT_FANOUT_SHIFT;
+               num |= bits;
+       }
+       return num;
+}
+
+/**
+ * calc_nnode_num_from_parent - calculate nnode number.
+ * @c: UBIFS file-system description object
+ * @parent: parent nnode
+ * @iip: index in parent
+ *
+ * The nnode number is a number that uniquely identifies a nnode and can be used
+ * easily to traverse the tree from the root to that nnode.
+ *
+ * This function calculates and returns the nnode number based on the parent's
+ * nnode number and the index in parent.
+ */
+static int calc_nnode_num_from_parent(struct ubifs_info *c,
+                                     struct ubifs_nnode *parent, int iip)
+{
+       int num, shft;
+
+       if (!parent)
+               return 1;
+       shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT;
+       num = parent->num ^ (1 << shft);
+       num |= (UBIFS_LPT_FANOUT + iip) << shft;
+       return num;
+}
+
+/**
+ * calc_pnode_num_from_parent - calculate pnode number.
+ * @c: UBIFS file-system description object
+ * @parent: parent nnode
+ * @iip: index in parent
+ *
+ * The pnode number is a number that uniquely identifies a pnode and can be used
+ * easily to traverse the tree from the root to that pnode.
+ *
+ * This function calculates and returns the pnode number based on the parent's
+ * nnode number and the index in parent.
+ */
+static int calc_pnode_num_from_parent(struct ubifs_info *c,
+                                     struct ubifs_nnode *parent, int iip)
+{
+       int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
+
+       for (i = 0; i < n; i++) {
+               num <<= UBIFS_LPT_FANOUT_SHIFT;
+               num |= pnum & (UBIFS_LPT_FANOUT - 1);
+               pnum >>= UBIFS_LPT_FANOUT_SHIFT;
+       }
+       num <<= UBIFS_LPT_FANOUT_SHIFT;
+       num |= iip;
+       return num;
+}
+
+/**
+ * ubifs_create_dflt_lpt - create default LPT.
+ * @c: UBIFS file-system description object
+ * @main_lebs: number of main area LEBs is passed and returned here
+ * @lpt_first: LEB number of first LPT LEB
+ * @lpt_lebs: number of LEBs for LPT is passed and returned here
+ * @big_lpt: use big LPT model is passed and returned here
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
+                         int *lpt_lebs, int *big_lpt)
+{
+       int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row;
+       int blnum, boffs, bsz, bcnt;
+       struct ubifs_pnode *pnode = NULL;
+       struct ubifs_nnode *nnode = NULL;
+       void *buf = NULL, *p;
+       struct ubifs_lpt_lprops *ltab = NULL;
+       int *lsave = NULL;
+
+       err = calc_dflt_lpt_geom(c, main_lebs, big_lpt);
+       if (err)
+               return err;
+       *lpt_lebs = c->lpt_lebs;
+
+       /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */
+       c->lpt_first = lpt_first;
+       /* Needed by 'set_ltab()' */
+       c->lpt_last = lpt_first + c->lpt_lebs - 1;
+       /* Needed by 'ubifs_pack_lsave()' */
+       c->main_first = c->leb_cnt - *main_lebs;
+
+       lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL);
+       pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL);
+       nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL);
+       buf = vmalloc(c->leb_size);
+       ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
+       if (!pnode || !nnode || !buf || !ltab || !lsave) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       ubifs_assert(!c->ltab);
+       c->ltab = ltab; /* Needed by set_ltab */
+
+       /* Initialize LPT's own lprops */
+       for (i = 0; i < c->lpt_lebs; i++) {
+               ltab[i].free = c->leb_size;
+               ltab[i].dirty = 0;
+               ltab[i].tgc = 0;
+               ltab[i].cmt = 0;
+       }
+
+       lnum = lpt_first;
+       p = buf;
+       /* Number of leaf nodes (pnodes) */
+       cnt = c->pnode_cnt;
+
+       /*
+        * The first pnode contains the LEB properties for the LEBs that contain
+        * the root inode node and the root index node of the index tree.
+        */
+       node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8);
+       iopos = ALIGN(node_sz, c->min_io_size);
+       pnode->lprops[0].free = c->leb_size - iopos;
+       pnode->lprops[0].dirty = iopos - node_sz;
+       pnode->lprops[0].flags = LPROPS_INDEX;
+
+       node_sz = UBIFS_INO_NODE_SZ;
+       iopos = ALIGN(node_sz, c->min_io_size);
+       pnode->lprops[1].free = c->leb_size - iopos;
+       pnode->lprops[1].dirty = iopos - node_sz;
+
+       for (i = 2; i < UBIFS_LPT_FANOUT; i++)
+               pnode->lprops[i].free = c->leb_size;
+
+       /* Add first pnode */
+       ubifs_pack_pnode(c, p, pnode);
+       p += c->pnode_sz;
+       len = c->pnode_sz;
+       pnode->num += 1;
+
+       /* Reset pnode values for remaining pnodes */
+       pnode->lprops[0].free = c->leb_size;
+       pnode->lprops[0].dirty = 0;
+       pnode->lprops[0].flags = 0;
+
+       pnode->lprops[1].free = c->leb_size;
+       pnode->lprops[1].dirty = 0;
+
+       /*
+        * To calculate the internal node branches, we keep information about
+        * the level below.
+        */
+       blnum = lnum; /* LEB number of level below */
+       boffs = 0; /* Offset of level below */
+       bcnt = cnt; /* Number of nodes in level below */
+       bsz = c->pnode_sz; /* Size of nodes in level below */
+
+       /* Add all remaining pnodes */
+       for (i = 1; i < cnt; i++) {
+               if (len + c->pnode_sz > c->leb_size) {
+                       alen = ALIGN(len, c->min_io_size);
+                       set_ltab(c, lnum, c->leb_size - alen, alen - len);
+                       memset(p, 0xff, alen - len);
+                       err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+                                            UBI_SHORTTERM);
+                       if (err)
+                               goto out;
+                       p = buf;
+                       len = 0;
+               }
+               ubifs_pack_pnode(c, p, pnode);
+               p += c->pnode_sz;
+               len += c->pnode_sz;
+               /*
+                * pnodes are simply numbered left to right starting at zero,
+                * which means the pnode number can be used easily to traverse
+                * down the tree to the corresponding pnode.
+                */
+               pnode->num += 1;
+       }
+
+       row = 0;
+       for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT)
+               row += 1;
+       /* Add all nnodes, one level at a time */
+       while (1) {
+               /* Number of internal nodes (nnodes) at next level */
+               cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT);
+               for (i = 0; i < cnt; i++) {
+                       if (len + c->nnode_sz > c->leb_size) {
+                               alen = ALIGN(len, c->min_io_size);
+                               set_ltab(c, lnum, c->leb_size - alen,
+                                           alen - len);
+                               memset(p, 0xff, alen - len);
+                               err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+                                                    UBI_SHORTTERM);
+                               if (err)
+                                       goto out;
+                               p = buf;
+                               len = 0;
+                       }
+                       /* Only 1 nnode at this level, so it is the root */
+                       if (cnt == 1) {
+                               c->lpt_lnum = lnum;
+                               c->lpt_offs = len;
+                       }
+                       /* Set branches to the level below */
+                       for (j = 0; j < UBIFS_LPT_FANOUT; j++) {
+                               if (bcnt) {
+                                       if (boffs + bsz > c->leb_size) {
+                                               blnum += 1;
+                                               boffs = 0;
+                                       }
+                                       nnode->nbranch[j].lnum = blnum;
+                                       nnode->nbranch[j].offs = boffs;
+                                       boffs += bsz;
+                                       bcnt--;
+                               } else {
+                                       nnode->nbranch[j].lnum = 0;
+                                       nnode->nbranch[j].offs = 0;
+                               }
+                       }
+                       nnode->num = calc_nnode_num(row, i);
+                       ubifs_pack_nnode(c, p, nnode);
+                       p += c->nnode_sz;
+                       len += c->nnode_sz;
+               }
+               /* Only 1 nnode at this level, so it is the root */
+               if (cnt == 1)
+                       break;
+               /* Update the information about the level below */
+               bcnt = cnt;
+               bsz = c->nnode_sz;
+               row -= 1;
+       }
+
+       if (*big_lpt) {
+               /* Need to add LPT's save table */
+               if (len + c->lsave_sz > c->leb_size) {
+                       alen = ALIGN(len, c->min_io_size);
+                       set_ltab(c, lnum, c->leb_size - alen, alen - len);
+                       memset(p, 0xff, alen - len);
+                       err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+                                            UBI_SHORTTERM);
+                       if (err)
+                               goto out;
+                       p = buf;
+                       len = 0;
+               }
+
+               c->lsave_lnum = lnum;
+               c->lsave_offs = len;
+
+               for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++)
+                       lsave[i] = c->main_first + i;
+               for (; i < c->lsave_cnt; i++)
+                       lsave[i] = c->main_first;
+
+               ubifs_pack_lsave(c, p, lsave);
+               p += c->lsave_sz;
+               len += c->lsave_sz;
+       }
+
+       /* Need to add LPT's own LEB properties table */
+       if (len + c->ltab_sz > c->leb_size) {
+               alen = ALIGN(len, c->min_io_size);
+               set_ltab(c, lnum, c->leb_size - alen, alen - len);
+               memset(p, 0xff, alen - len);
+               err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
+               if (err)
+                       goto out;
+               p = buf;
+               len = 0;
+       }
+
+       c->ltab_lnum = lnum;
+       c->ltab_offs = len;
+
+       /* Update ltab before packing it */
+       len += c->ltab_sz;
+       alen = ALIGN(len, c->min_io_size);
+       set_ltab(c, lnum, c->leb_size - alen, alen - len);
+
+       ubifs_pack_ltab(c, p, ltab);
+       p += c->ltab_sz;
+
+       /* Write remaining buffer */
+       memset(p, 0xff, alen - len);
+       err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
+       if (err)
+               goto out;
+
+       c->nhead_lnum = lnum;
+       c->nhead_offs = ALIGN(len, c->min_io_size);
+
+       dbg_lp("space_bits %d", c->space_bits);
+       dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
+       dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
+       dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits);
+       dbg_lp("pcnt_bits %d", c->pcnt_bits);
+       dbg_lp("lnum_bits %d", c->lnum_bits);
+       dbg_lp("pnode_sz %d", c->pnode_sz);
+       dbg_lp("nnode_sz %d", c->nnode_sz);
+       dbg_lp("ltab_sz %d", c->ltab_sz);
+       dbg_lp("lsave_sz %d", c->lsave_sz);
+       dbg_lp("lsave_cnt %d", c->lsave_cnt);
+       dbg_lp("lpt_hght %d", c->lpt_hght);
+       dbg_lp("big_lpt %d", c->big_lpt);
+       dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
+       dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
+       dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
+       if (c->big_lpt)
+               dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
+out:
+       c->ltab = NULL;
+       kfree(lsave);
+       vfree(ltab);
+       vfree(buf);
+       kfree(nnode);
+       kfree(pnode);
+       return err;
+}
+
+/**
+ * update_cats - add LEB properties of a pnode to LEB category lists and heaps.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode
+ *
+ * When a pnode is loaded into memory, the LEB properties it contains are added,
+ * by this function, to the LEB category lists and heaps.
+ */
+static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode)
+{
+       int i;
+
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK;
+               int lnum = pnode->lprops[i].lnum;
+
+               if (!lnum)
+                       return;
+               ubifs_add_to_cat(c, &pnode->lprops[i], cat);
+       }
+}
+
+/**
+ * replace_cats - add LEB properties of a pnode to LEB category lists and heaps.
+ * @c: UBIFS file-system description object
+ * @old_pnode: pnode copied
+ * @new_pnode: pnode copy
+ *
+ * During commit it is sometimes necessary to copy a pnode
+ * (see dirty_cow_pnode).  When that happens, references in
+ * category lists and heaps must be replaced.  This function does that.
+ */
+static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode,
+                        struct ubifs_pnode *new_pnode)
+{
+       int i;
+
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               if (!new_pnode->lprops[i].lnum)
+                       return;
+               ubifs_replace_cat(c, &old_pnode->lprops[i],
+                                 &new_pnode->lprops[i]);
+       }
+}
+
+/**
+ * check_lpt_crc - check LPT node crc is correct.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing node
+ * @len: length of node
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int check_lpt_crc(void *buf, int len)
+{
+       int pos = 0;
+       uint8_t *addr = buf;
+       uint16_t crc, calc_crc;
+
+       crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
+       calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
+                        len - UBIFS_LPT_CRC_BYTES);
+       if (crc != calc_crc) {
+               ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc,
+                         calc_crc);
+               dbg_dump_stack();
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * check_lpt_type - check LPT node type is correct.
+ * @c: UBIFS file-system description object
+ * @addr: address of type bit field is passed and returned updated here
+ * @pos: position of type bit field is passed and returned updated here
+ * @type: expected type
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int check_lpt_type(uint8_t **addr, int *pos, int type)
+{
+       int node_type;
+
+       node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
+       if (node_type != type) {
+               ubifs_err("invalid type (%d) in LPT node type %d", node_type,
+                         type);
+               dbg_dump_stack();
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * unpack_pnode - unpack a pnode.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing packed pnode to unpack
+ * @pnode: pnode structure to fill
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int unpack_pnode(struct ubifs_info *c, void *buf,
+                       struct ubifs_pnode *pnode)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0, err;
+
+       err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE);
+       if (err)
+               return err;
+       if (c->big_lpt)
+               pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               struct ubifs_lprops * const lprops = &pnode->lprops[i];
+
+               lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits);
+               lprops->free <<= 3;
+               lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits);
+               lprops->dirty <<= 3;
+
+               if (ubifs_unpack_bits(&addr, &pos, 1))
+                       lprops->flags = LPROPS_INDEX;
+               else
+                       lprops->flags = 0;
+               lprops->flags |= ubifs_categorize_lprops(c, lprops);
+       }
+       err = check_lpt_crc(buf, c->pnode_sz);
+       return err;
+}
+
+/**
+ * unpack_nnode - unpack a nnode.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing packed nnode to unpack
+ * @nnode: nnode structure to fill
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int unpack_nnode(struct ubifs_info *c, void *buf,
+                       struct ubifs_nnode *nnode)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0, err;
+
+       err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE);
+       if (err)
+               return err;
+       if (c->big_lpt)
+               nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               int lnum;
+
+               lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) +
+                      c->lpt_first;
+               if (lnum == c->lpt_last + 1)
+                       lnum = 0;
+               nnode->nbranch[i].lnum = lnum;
+               nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
+                                                    c->lpt_offs_bits);
+       }
+       err = check_lpt_crc(buf, c->nnode_sz);
+       return err;
+}
+
+/**
+ * unpack_ltab - unpack the LPT's own lprops table.
+ * @c: UBIFS file-system description object
+ * @buf: buffer from which to unpack
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int unpack_ltab(struct ubifs_info *c, void *buf)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0, err;
+
+       err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB);
+       if (err)
+               return err;
+       for (i = 0; i < c->lpt_lebs; i++) {
+               int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
+               int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
+
+               if (free < 0 || free > c->leb_size || dirty < 0 ||
+                   dirty > c->leb_size || free + dirty > c->leb_size)
+                       return -EINVAL;
+
+               c->ltab[i].free = free;
+               c->ltab[i].dirty = dirty;
+               c->ltab[i].tgc = 0;
+               c->ltab[i].cmt = 0;
+       }
+       err = check_lpt_crc(buf, c->ltab_sz);
+       return err;
+}
+
+/**
+ * unpack_lsave - unpack the LPT's save table.
+ * @c: UBIFS file-system description object
+ * @buf: buffer from which to unpack
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int unpack_lsave(struct ubifs_info *c, void *buf)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int i, pos = 0, err;
+
+       err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE);
+       if (err)
+               return err;
+       for (i = 0; i < c->lsave_cnt; i++) {
+               int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits);
+
+               if (lnum < c->main_first || lnum >= c->leb_cnt)
+                       return -EINVAL;
+               c->lsave[i] = lnum;
+       }
+       err = check_lpt_crc(buf, c->lsave_sz);
+       return err;
+}
+
+/**
+ * validate_nnode - validate a nnode.
+ * @c: UBIFS file-system description object
+ * @nnode: nnode to validate
+ * @parent: parent nnode (or NULL for the root nnode)
+ * @iip: index in parent
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
+                         struct ubifs_nnode *parent, int iip)
+{
+       int i, lvl, max_offs;
+
+       if (c->big_lpt) {
+               int num = calc_nnode_num_from_parent(c, parent, iip);
+
+               if (nnode->num != num)
+                       return -EINVAL;
+       }
+       lvl = parent ? parent->level - 1 : c->lpt_hght;
+       if (lvl < 1)
+               return -EINVAL;
+       if (lvl == 1)
+               max_offs = c->leb_size - c->pnode_sz;
+       else
+               max_offs = c->leb_size - c->nnode_sz;
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               int lnum = nnode->nbranch[i].lnum;
+               int offs = nnode->nbranch[i].offs;
+
+               if (lnum == 0) {
+                       if (offs != 0)
+                               return -EINVAL;
+                       continue;
+               }
+               if (lnum < c->lpt_first || lnum > c->lpt_last)
+                       return -EINVAL;
+               if (offs < 0 || offs > max_offs)
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * validate_pnode - validate a pnode.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode to validate
+ * @parent: parent nnode
+ * @iip: index in parent
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+                         struct ubifs_nnode *parent, int iip)
+{
+       int i;
+
+       if (c->big_lpt) {
+               int num = calc_pnode_num_from_parent(c, parent, iip);
+
+               if (pnode->num != num)
+                       return -EINVAL;
+       }
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               int free = pnode->lprops[i].free;
+               int dirty = pnode->lprops[i].dirty;
+
+               if (free < 0 || free > c->leb_size || free % c->min_io_size ||
+                   (free & 7))
+                       return -EINVAL;
+               if (dirty < 0 || dirty > c->leb_size || (dirty & 7))
+                       return -EINVAL;
+               if (dirty + free > c->leb_size)
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * set_pnode_lnum - set LEB numbers on a pnode.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode to update
+ *
+ * This function calculates the LEB numbers for the LEB properties it contains
+ * based on the pnode number.
+ */
+static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode)
+{
+       int i, lnum;
+
+       lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first;
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               if (lnum >= c->leb_cnt)
+                       return;
+               pnode->lprops[i].lnum = lnum++;
+       }
+}
+
+/**
+ * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory.
+ * @c: UBIFS file-system description object
+ * @parent: parent nnode (or NULL for the root)
+ * @iip: index in parent
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
+{
+       struct ubifs_nbranch *branch = NULL;
+       struct ubifs_nnode *nnode = NULL;
+       void *buf = c->lpt_nod_buf;
+       int err, lnum, offs;
+
+       if (parent) {
+               branch = &parent->nbranch[iip];
+               lnum = branch->lnum;
+               offs = branch->offs;
+       } else {
+               lnum = c->lpt_lnum;
+               offs = c->lpt_offs;
+       }
+       nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS);
+       if (!nnode) {
+               err = -ENOMEM;
+               goto out;
+       }
+       if (lnum == 0) {
+               /*
+                * This nnode was not written which just means that the LEB
+                * properties in the subtree below it describe empty LEBs. We
+                * make the nnode as though we had read it, which in fact means
+                * doing almost nothing.
+                */
+               if (c->big_lpt)
+                       nnode->num = calc_nnode_num_from_parent(c, parent, iip);
+       } else {
+               err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
+               if (err)
+                       goto out;
+               err = unpack_nnode(c, buf, nnode);
+               if (err)
+                       goto out;
+       }
+       err = validate_nnode(c, nnode, parent, iip);
+       if (err)
+               goto out;
+       if (!c->big_lpt)
+               nnode->num = calc_nnode_num_from_parent(c, parent, iip);
+       if (parent) {
+               branch->nnode = nnode;
+               nnode->level = parent->level - 1;
+       } else {
+               c->nroot = nnode;
+               nnode->level = c->lpt_hght;
+       }
+       nnode->parent = parent;
+       nnode->iip = iip;
+       return 0;
+
+out:
+       ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
+       kfree(nnode);
+       return err;
+}
+
+/**
+ * read_pnode - read a pnode from flash and link it to the tree in memory.
+ * @c: UBIFS file-system description object
+ * @parent: parent nnode
+ * @iip: index in parent
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
+{
+       struct ubifs_nbranch *branch;
+       struct ubifs_pnode *pnode = NULL;
+       void *buf = c->lpt_nod_buf;
+       int err, lnum, offs;
+
+       branch = &parent->nbranch[iip];
+       lnum = branch->lnum;
+       offs = branch->offs;
+       pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
+       if (!pnode) {
+               err = -ENOMEM;
+               goto out;
+       }
+       if (lnum == 0) {
+               /*
+                * This pnode was not written which just means that the LEB
+                * properties in it describe empty LEBs. We make the pnode as
+                * though we had read it.
+                */
+               int i;
+
+               if (c->big_lpt)
+                       pnode->num = calc_pnode_num_from_parent(c, parent, iip);
+               for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+                       struct ubifs_lprops * const lprops = &pnode->lprops[i];
+
+                       lprops->free = c->leb_size;
+                       lprops->flags = ubifs_categorize_lprops(c, lprops);
+               }
+       } else {
+               err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
+               if (err)
+                       goto out;
+               err = unpack_pnode(c, buf, pnode);
+               if (err)
+                       goto out;
+       }
+       err = validate_pnode(c, pnode, parent, iip);
+       if (err)
+               goto out;
+       if (!c->big_lpt)
+               pnode->num = calc_pnode_num_from_parent(c, parent, iip);
+       branch->pnode = pnode;
+       pnode->parent = parent;
+       pnode->iip = iip;
+       set_pnode_lnum(c, pnode);
+       c->pnodes_have += 1;
+       return 0;
+
+out:
+       ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
+       dbg_dump_pnode(c, pnode, parent, iip);
+       dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
+       kfree(pnode);
+       return err;
+}
+
+/**
+ * read_ltab - read LPT's own lprops table.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int read_ltab(struct ubifs_info *c)
+{
+       int err;
+       void *buf;
+
+       buf = vmalloc(c->ltab_sz);
+       if (!buf)
+               return -ENOMEM;
+       err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
+       if (err)
+               goto out;
+       err = unpack_ltab(c, buf);
+out:
+       vfree(buf);
+       return err;
+}
+
+/**
+ * read_lsave - read LPT's save table.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int read_lsave(struct ubifs_info *c)
+{
+       int err, i;
+       void *buf;
+
+       buf = vmalloc(c->lsave_sz);
+       if (!buf)
+               return -ENOMEM;
+       err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
+       if (err)
+               goto out;
+       err = unpack_lsave(c, buf);
+       if (err)
+               goto out;
+       for (i = 0; i < c->lsave_cnt; i++) {
+               int lnum = c->lsave[i];
+
+               /*
+                * Due to automatic resizing, the values in the lsave table
+                * could be beyond the volume size - just ignore them.
+                */
+               if (lnum >= c->leb_cnt)
+                       continue;
+               ubifs_lpt_lookup(c, lnum);
+       }
+out:
+       vfree(buf);
+       return err;
+}
+
+/**
+ * ubifs_get_nnode - get a nnode.
+ * @c: UBIFS file-system description object
+ * @parent: parent nnode (or NULL for the root)
+ * @iip: index in parent
+ *
+ * This function returns a pointer to the nnode on success or a negative error
+ * code on failure.
+ */
+struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
+                                   struct ubifs_nnode *parent, int iip)
+{
+       struct ubifs_nbranch *branch;
+       struct ubifs_nnode *nnode;
+       int err;
+
+       branch = &parent->nbranch[iip];
+       nnode = branch->nnode;
+       if (nnode)
+               return nnode;
+       err = ubifs_read_nnode(c, parent, iip);
+       if (err)
+               return ERR_PTR(err);
+       return branch->nnode;
+}
+
+/**
+ * ubifs_get_pnode - get a pnode.
+ * @c: UBIFS file-system description object
+ * @parent: parent nnode
+ * @iip: index in parent
+ *
+ * This function returns a pointer to the pnode on success or a negative error
+ * code on failure.
+ */
+struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
+                                   struct ubifs_nnode *parent, int iip)
+{
+       struct ubifs_nbranch *branch;
+       struct ubifs_pnode *pnode;
+       int err;
+
+       branch = &parent->nbranch[iip];
+       pnode = branch->pnode;
+       if (pnode)
+               return pnode;
+       err = read_pnode(c, parent, iip);
+       if (err)
+               return ERR_PTR(err);
+       update_cats(c, branch->pnode);
+       return branch->pnode;
+}
+
+/**
+ * ubifs_lpt_lookup - lookup LEB properties in the LPT.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to lookup
+ *
+ * This function returns a pointer to the LEB properties on success or a
+ * negative error code on failure.
+ */
+struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
+{
+       int err, i, h, iip, shft;
+       struct ubifs_nnode *nnode;
+       struct ubifs_pnode *pnode;
+
+       if (!c->nroot) {
+               err = ubifs_read_nnode(c, NULL, 0);
+               if (err)
+                       return ERR_PTR(err);
+       }
+       nnode = c->nroot;
+       i = lnum - c->main_first;
+       shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
+       for (h = 1; h < c->lpt_hght; h++) {
+               iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+               shft -= UBIFS_LPT_FANOUT_SHIFT;
+               nnode = ubifs_get_nnode(c, nnode, iip);
+               if (IS_ERR(nnode))
+                       return ERR_PTR(PTR_ERR(nnode));
+       }
+       iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+       shft -= UBIFS_LPT_FANOUT_SHIFT;
+       pnode = ubifs_get_pnode(c, nnode, iip);
+       if (IS_ERR(pnode))
+               return ERR_PTR(PTR_ERR(pnode));
+       iip = (i & (UBIFS_LPT_FANOUT - 1));
+       dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
+              pnode->lprops[iip].free, pnode->lprops[iip].dirty,
+              pnode->lprops[iip].flags);
+       return &pnode->lprops[iip];
+}
+
+/**
+ * dirty_cow_nnode - ensure a nnode is not being committed.
+ * @c: UBIFS file-system description object
+ * @nnode: nnode to check
+ *
+ * Returns dirtied nnode on success or negative error code on failure.
+ */
+static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c,
+                                          struct ubifs_nnode *nnode)
+{
+       struct ubifs_nnode *n;
+       int i;
+
+       if (!test_bit(COW_CNODE, &nnode->flags)) {
+               /* nnode is not being committed */
+               if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
+                       c->dirty_nn_cnt += 1;
+                       ubifs_add_nnode_dirt(c, nnode);
+               }
+               return nnode;
+       }
+
+       /* nnode is being committed, so copy it */
+       n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS);
+       if (unlikely(!n))
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(n, nnode, sizeof(struct ubifs_nnode));
+       n->cnext = NULL;
+       __set_bit(DIRTY_CNODE, &n->flags);
+       __clear_bit(COW_CNODE, &n->flags);
+
+       /* The children now have new parent */
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               struct ubifs_nbranch *branch = &n->nbranch[i];
+
+               if (branch->cnode)
+                       branch->cnode->parent = n;
+       }
+
+       ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags));
+       __set_bit(OBSOLETE_CNODE, &nnode->flags);
+
+       c->dirty_nn_cnt += 1;
+       ubifs_add_nnode_dirt(c, nnode);
+       if (nnode->parent)
+               nnode->parent->nbranch[n->iip].nnode = n;
+       else
+               c->nroot = n;
+       return n;
+}
+
+/**
+ * dirty_cow_pnode - ensure a pnode is not being committed.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode to check
+ *
+ * Returns dirtied pnode on success or negative error code on failure.
+ */
+static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c,
+                                          struct ubifs_pnode *pnode)
+{
+       struct ubifs_pnode *p;
+
+       if (!test_bit(COW_CNODE, &pnode->flags)) {
+               /* pnode is not being committed */
+               if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) {
+                       c->dirty_pn_cnt += 1;
+                       add_pnode_dirt(c, pnode);
+               }
+               return pnode;
+       }
+
+       /* pnode is being committed, so copy it */
+       p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
+       if (unlikely(!p))
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(p, pnode, sizeof(struct ubifs_pnode));
+       p->cnext = NULL;
+       __set_bit(DIRTY_CNODE, &p->flags);
+       __clear_bit(COW_CNODE, &p->flags);
+       replace_cats(c, pnode, p);
+
+       ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags));
+       __set_bit(OBSOLETE_CNODE, &pnode->flags);
+
+       c->dirty_pn_cnt += 1;
+       add_pnode_dirt(c, pnode);
+       pnode->parent->nbranch[p->iip].pnode = p;
+       return p;
+}
+
+/**
+ * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to lookup
+ *
+ * This function returns a pointer to the LEB properties on success or a
+ * negative error code on failure.
+ */
+struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
+{
+       int err, i, h, iip, shft;
+       struct ubifs_nnode *nnode;
+       struct ubifs_pnode *pnode;
+
+       if (!c->nroot) {
+               err = ubifs_read_nnode(c, NULL, 0);
+               if (err)
+                       return ERR_PTR(err);
+       }
+       nnode = c->nroot;
+       nnode = dirty_cow_nnode(c, nnode);
+       if (IS_ERR(nnode))
+               return ERR_PTR(PTR_ERR(nnode));
+       i = lnum - c->main_first;
+       shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
+       for (h = 1; h < c->lpt_hght; h++) {
+               iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+               shft -= UBIFS_LPT_FANOUT_SHIFT;
+               nnode = ubifs_get_nnode(c, nnode, iip);
+               if (IS_ERR(nnode))
+                       return ERR_PTR(PTR_ERR(nnode));
+               nnode = dirty_cow_nnode(c, nnode);
+               if (IS_ERR(nnode))
+                       return ERR_PTR(PTR_ERR(nnode));
+       }
+       iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+       shft -= UBIFS_LPT_FANOUT_SHIFT;
+       pnode = ubifs_get_pnode(c, nnode, iip);
+       if (IS_ERR(pnode))
+               return ERR_PTR(PTR_ERR(pnode));
+       pnode = dirty_cow_pnode(c, pnode);
+       if (IS_ERR(pnode))
+               return ERR_PTR(PTR_ERR(pnode));
+       iip = (i & (UBIFS_LPT_FANOUT - 1));
+       dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
+              pnode->lprops[iip].free, pnode->lprops[iip].dirty,
+              pnode->lprops[iip].flags);
+       ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags));
+       return &pnode->lprops[iip];
+}
+
+/**
+ * lpt_init_rd - initialize the LPT for reading.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int lpt_init_rd(struct ubifs_info *c)
+{
+       int err, i;
+
+       c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
+       if (!c->ltab)
+               return -ENOMEM;
+
+       i = max_t(int, c->nnode_sz, c->pnode_sz);
+       c->lpt_nod_buf = kmalloc(i, GFP_KERNEL);
+       if (!c->lpt_nod_buf)
+               return -ENOMEM;
+
+       for (i = 0; i < LPROPS_HEAP_CNT; i++) {
+               c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ,
+                                            GFP_KERNEL);
+               if (!c->lpt_heap[i].arr)
+                       return -ENOMEM;
+               c->lpt_heap[i].cnt = 0;
+               c->lpt_heap[i].max_cnt = LPT_HEAP_SZ;
+       }
+
+       c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL);
+       if (!c->dirty_idx.arr)
+               return -ENOMEM;
+       c->dirty_idx.cnt = 0;
+       c->dirty_idx.max_cnt = LPT_HEAP_SZ;
+
+       err = read_ltab(c);
+       if (err)
+               return err;
+
+       dbg_lp("space_bits %d", c->space_bits);
+       dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
+       dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
+       dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits);
+       dbg_lp("pcnt_bits %d", c->pcnt_bits);
+       dbg_lp("lnum_bits %d", c->lnum_bits);
+       dbg_lp("pnode_sz %d", c->pnode_sz);
+       dbg_lp("nnode_sz %d", c->nnode_sz);
+       dbg_lp("ltab_sz %d", c->ltab_sz);
+       dbg_lp("lsave_sz %d", c->lsave_sz);
+       dbg_lp("lsave_cnt %d", c->lsave_cnt);
+       dbg_lp("lpt_hght %d", c->lpt_hght);
+       dbg_lp("big_lpt %d", c->big_lpt);
+       dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
+       dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
+       dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
+       if (c->big_lpt)
+               dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
+
+       return 0;
+}
+
+/**
+ * lpt_init_wr - initialize the LPT for writing.
+ * @c: UBIFS file-system description object
+ *
+ * 'lpt_init_rd()' must have been called already.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int lpt_init_wr(struct ubifs_info *c)
+{
+       int err, i;
+
+       c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
+       if (!c->ltab_cmt)
+               return -ENOMEM;
+
+       c->lpt_buf = vmalloc(c->leb_size);
+       if (!c->lpt_buf)
+               return -ENOMEM;
+
+       if (c->big_lpt) {
+               c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS);
+               if (!c->lsave)
+                       return -ENOMEM;
+               err = read_lsave(c);
+               if (err)
+                       return err;
+       }
+
+       for (i = 0; i < c->lpt_lebs; i++)
+               if (c->ltab[i].free == c->leb_size) {
+                       err = ubifs_leb_unmap(c, i + c->lpt_first);
+                       if (err)
+                               return err;
+               }
+
+       return 0;
+}
+
+/**
+ * ubifs_lpt_init - initialize the LPT.
+ * @c: UBIFS file-system description object
+ * @rd: whether to initialize lpt for reading
+ * @wr: whether to initialize lpt for writing
+ *
+ * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true
+ * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is
+ * true.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
+{
+       int err;
+
+       if (rd) {
+               err = lpt_init_rd(c);
+               if (err)
+                       return err;
+       }
+
+       if (wr) {
+               err = lpt_init_wr(c);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/**
+ * struct lpt_scan_node - somewhere to put nodes while we scan LPT.
+ * @nnode: where to keep a nnode
+ * @pnode: where to keep a pnode
+ * @cnode: where to keep a cnode
+ * @in_tree: is the node in the tree in memory
+ * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in
+ * the tree
+ * @ptr.pnode: ditto for pnode
+ * @ptr.cnode: ditto for cnode
+ */
+struct lpt_scan_node {
+       union {
+               struct ubifs_nnode nnode;
+               struct ubifs_pnode pnode;
+               struct ubifs_cnode cnode;
+       };
+       int in_tree;
+       union {
+               struct ubifs_nnode *nnode;
+               struct ubifs_pnode *pnode;
+               struct ubifs_cnode *cnode;
+       } ptr;
+};
+
+/**
+ * scan_get_nnode - for the scan, get a nnode from either the tree or flash.
+ * @c: the UBIFS file-system description object
+ * @path: where to put the nnode
+ * @parent: parent of the nnode
+ * @iip: index in parent of the nnode
+ *
+ * This function returns a pointer to the nnode on success or a negative error
+ * code on failure.
+ */
+static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
+                                         struct lpt_scan_node *path,
+                                         struct ubifs_nnode *parent, int iip)
+{
+       struct ubifs_nbranch *branch;
+       struct ubifs_nnode *nnode;
+       void *buf = c->lpt_nod_buf;
+       int err;
+
+       branch = &parent->nbranch[iip];
+       nnode = branch->nnode;
+       if (nnode) {
+               path->in_tree = 1;
+               path->ptr.nnode = nnode;
+               return nnode;
+       }
+       nnode = &path->nnode;
+       path->in_tree = 0;
+       path->ptr.nnode = nnode;
+       memset(nnode, 0, sizeof(struct ubifs_nnode));
+       if (branch->lnum == 0) {
+               /*
+                * This nnode was not written which just means that the LEB
+                * properties in the subtree below it describe empty LEBs. We
+                * make the nnode as though we had read it, which in fact means
+                * doing almost nothing.
+                */
+               if (c->big_lpt)
+                       nnode->num = calc_nnode_num_from_parent(c, parent, iip);
+       } else {
+               err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
+                              c->nnode_sz);
+               if (err)
+                       return ERR_PTR(err);
+               err = unpack_nnode(c, buf, nnode);
+               if (err)
+                       return ERR_PTR(err);
+       }
+       err = validate_nnode(c, nnode, parent, iip);
+       if (err)
+               return ERR_PTR(err);
+       if (!c->big_lpt)
+               nnode->num = calc_nnode_num_from_parent(c, parent, iip);
+       nnode->level = parent->level - 1;
+       nnode->parent = parent;
+       nnode->iip = iip;
+       return nnode;
+}
+
+/**
+ * scan_get_pnode - for the scan, get a pnode from either the tree or flash.
+ * @c: the UBIFS file-system description object
+ * @path: where to put the pnode
+ * @parent: parent of the pnode
+ * @iip: index in parent of the pnode
+ *
+ * This function returns a pointer to the pnode on success or a negative error
+ * code on failure.
+ */
+static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
+                                         struct lpt_scan_node *path,
+                                         struct ubifs_nnode *parent, int iip)
+{
+       struct ubifs_nbranch *branch;
+       struct ubifs_pnode *pnode;
+       void *buf = c->lpt_nod_buf;
+       int err;
+
+       branch = &parent->nbranch[iip];
+       pnode = branch->pnode;
+       if (pnode) {
+               path->in_tree = 1;
+               path->ptr.pnode = pnode;
+               return pnode;
+       }
+       pnode = &path->pnode;
+       path->in_tree = 0;
+       path->ptr.pnode = pnode;
+       memset(pnode, 0, sizeof(struct ubifs_pnode));
+       if (branch->lnum == 0) {
+               /*
+                * This pnode was not written which just means that the LEB
+                * properties in it describe empty LEBs. We make the pnode as
+                * though we had read it.
+                */
+               int i;
+
+               if (c->big_lpt)
+                       pnode->num = calc_pnode_num_from_parent(c, parent, iip);
+               for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+                       struct ubifs_lprops * const lprops = &pnode->lprops[i];
+
+                       lprops->free = c->leb_size;
+                       lprops->flags = ubifs_categorize_lprops(c, lprops);
+               }
+       } else {
+               ubifs_assert(branch->lnum >= c->lpt_first &&
+                            branch->lnum <= c->lpt_last);
+               ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
+               err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
+                              c->pnode_sz);
+               if (err)
+                       return ERR_PTR(err);
+               err = unpack_pnode(c, buf, pnode);
+               if (err)
+                       return ERR_PTR(err);
+       }
+       err = validate_pnode(c, pnode, parent, iip);
+       if (err)
+               return ERR_PTR(err);
+       if (!c->big_lpt)
+               pnode->num = calc_pnode_num_from_parent(c, parent, iip);
+       pnode->parent = parent;
+       pnode->iip = iip;
+       set_pnode_lnum(c, pnode);
+       return pnode;
+}
+
+/**
+ * ubifs_lpt_scan_nolock - scan the LPT.
+ * @c: the UBIFS file-system description object
+ * @start_lnum: LEB number from which to start scanning
+ * @end_lnum: LEB number at which to stop scanning
+ * @scan_cb: callback function called for each lprops
+ * @data: data to be passed to the callback function
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
+                         ubifs_lpt_scan_callback scan_cb, void *data)
+{
+       int err = 0, i, h, iip, shft;
+       struct ubifs_nnode *nnode;
+       struct ubifs_pnode *pnode;
+       struct lpt_scan_node *path;
+
+       if (start_lnum == -1) {
+               start_lnum = end_lnum + 1;
+               if (start_lnum >= c->leb_cnt)
+                       start_lnum = c->main_first;
+       }
+
+       ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt);
+       ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt);
+
+       if (!c->nroot) {
+               err = ubifs_read_nnode(c, NULL, 0);
+               if (err)
+                       return err;
+       }
+
+       path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1),
+                      GFP_NOFS);
+       if (!path)
+               return -ENOMEM;
+
+       path[0].ptr.nnode = c->nroot;
+       path[0].in_tree = 1;
+again:
+       /* Descend to the pnode containing start_lnum */
+       nnode = c->nroot;
+       i = start_lnum - c->main_first;
+       shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
+       for (h = 1; h < c->lpt_hght; h++) {
+               iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+               shft -= UBIFS_LPT_FANOUT_SHIFT;
+               nnode = scan_get_nnode(c, path + h, nnode, iip);
+               if (IS_ERR(nnode)) {
+                       err = PTR_ERR(nnode);
+                       goto out;
+               }
+       }
+       iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+       shft -= UBIFS_LPT_FANOUT_SHIFT;
+       pnode = scan_get_pnode(c, path + h, nnode, iip);
+       if (IS_ERR(pnode)) {
+               err = PTR_ERR(pnode);
+               goto out;
+       }
+       iip = (i & (UBIFS_LPT_FANOUT - 1));
+
+       /* Loop for each lprops */
+       while (1) {
+               struct ubifs_lprops *lprops = &pnode->lprops[iip];
+               int ret, lnum = lprops->lnum;
+
+               ret = scan_cb(c, lprops, path[h].in_tree, data);
+               if (ret < 0) {
+                       err = ret;
+                       goto out;
+               }
+               if (ret & LPT_SCAN_ADD) {
+                       /* Add all the nodes in path to the tree in memory */
+                       for (h = 1; h < c->lpt_hght; h++) {
+                               const size_t sz = sizeof(struct ubifs_nnode);
+                               struct ubifs_nnode *parent;
+
+                               if (path[h].in_tree)
+                                       continue;
+                               nnode = kmalloc(sz, GFP_NOFS);
+                               if (!nnode) {
+                                       err = -ENOMEM;
+                                       goto out;
+                               }
+                               memcpy(nnode, &path[h].nnode, sz);
+                               parent = nnode->parent;
+                               parent->nbranch[nnode->iip].nnode = nnode;
+                               path[h].ptr.nnode = nnode;
+                               path[h].in_tree = 1;
+                               path[h + 1].cnode.parent = nnode;
+                       }
+                       if (path[h].in_tree)
+                               ubifs_ensure_cat(c, lprops);
+                       else {
+                               const size_t sz = sizeof(struct ubifs_pnode);
+                               struct ubifs_nnode *parent;
+
+                               pnode = kmalloc(sz, GFP_NOFS);
+                               if (!pnode) {
+                                       err = -ENOMEM;
+                                       goto out;
+                               }
+                               memcpy(pnode, &path[h].pnode, sz);
+                               parent = pnode->parent;
+                               parent->nbranch[pnode->iip].pnode = pnode;
+                               path[h].ptr.pnode = pnode;
+                               path[h].in_tree = 1;
+                               update_cats(c, pnode);
+                               c->pnodes_have += 1;
+                       }
+                       err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)
+                                                 c->nroot, 0, 0);
+                       if (err)
+                               goto out;
+                       err = dbg_check_cats(c);
+                       if (err)
+                               goto out;
+               }
+               if (ret & LPT_SCAN_STOP) {
+                       err = 0;
+                       break;
+               }
+               /* Get the next lprops */
+               if (lnum == end_lnum) {
+                       /*
+                        * We got to the end without finding what we were
+                        * looking for
+                        */
+                       err = -ENOSPC;
+                       goto out;
+               }
+               if (lnum + 1 >= c->leb_cnt) {
+                       /* Wrap-around to the beginning */
+                       start_lnum = c->main_first;
+                       goto again;
+               }
+               if (iip + 1 < UBIFS_LPT_FANOUT) {
+                       /* Next lprops is in the same pnode */
+                       iip += 1;
+                       continue;
+               }
+               /* We need to get the next pnode. Go up until we can go right */
+               iip = pnode->iip;
+               while (1) {
+                       h -= 1;
+                       ubifs_assert(h >= 0);
+                       nnode = path[h].ptr.nnode;
+                       if (iip + 1 < UBIFS_LPT_FANOUT)
+                               break;
+                       iip = nnode->iip;
+               }
+               /* Go right */
+               iip += 1;
+               /* Descend to the pnode */
+               h += 1;
+               for (; h < c->lpt_hght; h++) {
+                       nnode = scan_get_nnode(c, path + h, nnode, iip);
+                       if (IS_ERR(nnode)) {
+                               err = PTR_ERR(nnode);
+                               goto out;
+                       }
+                       iip = 0;
+               }
+               pnode = scan_get_pnode(c, path + h, nnode, iip);
+               if (IS_ERR(pnode)) {
+                       err = PTR_ERR(pnode);
+                       goto out;
+               }
+               iip = 0;
+       }
+out:
+       kfree(path);
+       return err;
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+/**
+ * dbg_chk_pnode - check a pnode.
+ * @c: the UBIFS file-system description object
+ * @pnode: pnode to check
+ * @col: pnode column
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+                        int col)
+{
+       int i;
+
+       if (pnode->num != col) {
+               dbg_err("pnode num %d expected %d parent num %d iip %d",
+                       pnode->num, col, pnode->parent->num, pnode->iip);
+               return -EINVAL;
+       }
+       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+               struct ubifs_lprops *lp, *lprops = &pnode->lprops[i];
+               int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i +
+                          c->main_first;
+               int found, cat = lprops->flags & LPROPS_CAT_MASK;
+               struct ubifs_lpt_heap *heap;
+               struct list_head *list = NULL;
+
+               if (lnum >= c->leb_cnt)
+                       continue;
+               if (lprops->lnum != lnum) {
+                       dbg_err("bad LEB number %d expected %d",
+                               lprops->lnum, lnum);
+                       return -EINVAL;
+               }
+               if (lprops->flags & LPROPS_TAKEN) {
+                       if (cat != LPROPS_UNCAT) {
+                               dbg_err("LEB %d taken but not uncat %d",
+                                       lprops->lnum, cat);
+                               return -EINVAL;
+                       }
+                       continue;
+               }
+               if (lprops->flags & LPROPS_INDEX) {
+                       switch (cat) {
+                       case LPROPS_UNCAT:
+                       case LPROPS_DIRTY_IDX:
+                       case LPROPS_FRDI_IDX:
+                               break;
+                       default:
+                               dbg_err("LEB %d index but cat %d",
+                                       lprops->lnum, cat);
+                               return -EINVAL;
+                       }
+               } else {
+                       switch (cat) {
+                       case LPROPS_UNCAT:
+                       case LPROPS_DIRTY:
+                       case LPROPS_FREE:
+                       case LPROPS_EMPTY:
+                       case LPROPS_FREEABLE:
+                               break;
+                       default:
+                               dbg_err("LEB %d not index but cat %d",
+                                       lprops->lnum, cat);
+                               return -EINVAL;
+                       }
+               }
+               switch (cat) {
+               case LPROPS_UNCAT:
+                       list = &c->uncat_list;
+                       break;
+               case LPROPS_EMPTY:
+                       list = &c->empty_list;
+                       break;
+               case LPROPS_FREEABLE:
+                       list = &c->freeable_list;
+                       break;
+               case LPROPS_FRDI_IDX:
+                       list = &c->frdi_idx_list;
+                       break;
+               }
+               found = 0;
+               switch (cat) {
+               case LPROPS_DIRTY:
+               case LPROPS_DIRTY_IDX:
+               case LPROPS_FREE:
+                       heap = &c->lpt_heap[cat - 1];
+                       if (lprops->hpos < heap->cnt &&
+                           heap->arr[lprops->hpos] == lprops)
+                               found = 1;
+                       break;
+               case LPROPS_UNCAT:
+               case LPROPS_EMPTY:
+               case LPROPS_FREEABLE:
+               case LPROPS_FRDI_IDX:
+                       list_for_each_entry(lp, list, list)
+                               if (lprops == lp) {
+                                       found = 1;
+                                       break;
+                               }
+                       break;
+               }
+               if (!found) {
+                       dbg_err("LEB %d cat %d not found in cat heap/list",
+                               lprops->lnum, cat);
+                       return -EINVAL;
+               }
+               switch (cat) {
+               case LPROPS_EMPTY:
+                       if (lprops->free != c->leb_size) {
+                               dbg_err("LEB %d cat %d free %d dirty %d",
+                                       lprops->lnum, cat, lprops->free,
+                                       lprops->dirty);
+                               return -EINVAL;
+                       }
+               case LPROPS_FREEABLE:
+               case LPROPS_FRDI_IDX:
+                       if (lprops->free + lprops->dirty != c->leb_size) {
+                               dbg_err("LEB %d cat %d free %d dirty %d",
+                                       lprops->lnum, cat, lprops->free,
+                                       lprops->dirty);
+                               return -EINVAL;
+                       }
+               }
+       }
+       return 0;
+}
+
+/**
+ * dbg_check_lpt_nodes - check nnodes and pnodes.
+ * @c: the UBIFS file-system description object
+ * @cnode: next cnode (nnode or pnode) to check
+ * @row: row of cnode (root is zero)
+ * @col: column of cnode (leftmost is zero)
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
+                       int row, int col)
+{
+       struct ubifs_nnode *nnode, *nn;
+       struct ubifs_cnode *cn;
+       int num, iip = 0, err;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+               return 0;
+
+       while (cnode) {
+               ubifs_assert(row >= 0);
+               nnode = cnode->parent;
+               if (cnode->level) {
+                       /* cnode is a nnode */
+                       num = calc_nnode_num(row, col);
+                       if (cnode->num != num) {
+                               dbg_err("nnode num %d expected %d "
+                                       "parent num %d iip %d", cnode->num, num,
+                                       (nnode ? nnode->num : 0), cnode->iip);
+                               return -EINVAL;
+                       }
+                       nn = (struct ubifs_nnode *)cnode;
+                       while (iip < UBIFS_LPT_FANOUT) {
+                               cn = nn->nbranch[iip].cnode;
+                               if (cn) {
+                                       /* Go down */
+                                       row += 1;
+                                       col <<= UBIFS_LPT_FANOUT_SHIFT;
+                                       col += iip;
+                                       iip = 0;
+                                       cnode = cn;
+                                       break;
+                               }
+                               /* Go right */
+                               iip += 1;
+                       }
+                       if (iip < UBIFS_LPT_FANOUT)
+                               continue;
+               } else {
+                       struct ubifs_pnode *pnode;
+
+                       /* cnode is a pnode */
+                       pnode = (struct ubifs_pnode *)cnode;
+                       err = dbg_chk_pnode(c, pnode, col);
+                       if (err)
+                               return err;
+               }
+               /* Go up and to the right */
+               row -= 1;
+               col >>= UBIFS_LPT_FANOUT_SHIFT;
+               iip = cnode->iip + 1;
+               cnode = (struct ubifs_cnode *)nnode;
+       }
+       return 0;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c

new file mode 100644 (file)

index 0000000..5f0b83e
--- /dev/null
+++ b/fs/ubifs/lpt_commit.c
@@ -0,0 +1,1648 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements commit-related functionality of the LEB properties
+ * subsystem.
+ */
+
+#include <linux/crc16.h>
+#include "ubifs.h"
+
+/**
+ * first_dirty_cnode - find first dirty cnode.
+ * @c: UBIFS file-system description object
+ * @nnode: nnode at which to start
+ *
+ * This function returns the first dirty cnode or %NULL if there is not one.
+ */
+static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode)
+{
+       ubifs_assert(nnode);
+       while (1) {
+               int i, cont = 0;
+
+               for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+                       struct ubifs_cnode *cnode;
+
+                       cnode = nnode->nbranch[i].cnode;
+                       if (cnode &&
+                           test_bit(DIRTY_CNODE, &cnode->flags)) {
+                               if (cnode->level == 0)
+                                       return cnode;
+                               nnode = (struct ubifs_nnode *)cnode;
+                               cont = 1;
+                               break;
+                       }
+               }
+               if (!cont)
+                       return (struct ubifs_cnode *)nnode;
+       }
+}
+
+/**
+ * next_dirty_cnode - find next dirty cnode.
+ * @cnode: cnode from which to begin searching
+ *
+ * This function returns the next dirty cnode or %NULL if there is not one.
+ */
+static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode)
+{
+       struct ubifs_nnode *nnode;
+       int i;
+
+       ubifs_assert(cnode);
+       nnode = cnode->parent;
+       if (!nnode)
+               return NULL;
+       for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) {
+               cnode = nnode->nbranch[i].cnode;
+               if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) {
+                       if (cnode->level == 0)
+                               return cnode; /* cnode is a pnode */
+                       /* cnode is a nnode */
+                       return first_dirty_cnode((struct ubifs_nnode *)cnode);
+               }
+       }
+       return (struct ubifs_cnode *)nnode;
+}
+
+/**
+ * get_cnodes_to_commit - create list of dirty cnodes to commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the number of cnodes to commit.
+ */
+static int get_cnodes_to_commit(struct ubifs_info *c)
+{
+       struct ubifs_cnode *cnode, *cnext;
+       int cnt = 0;
+
+       if (!c->nroot)
+               return 0;
+
+       if (!test_bit(DIRTY_CNODE, &c->nroot->flags))
+               return 0;
+
+       c->lpt_cnext = first_dirty_cnode(c->nroot);
+       cnode = c->lpt_cnext;
+       if (!cnode)
+               return 0;
+       cnt += 1;
+       while (1) {
+               ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
+               __set_bit(COW_ZNODE, &cnode->flags);
+               cnext = next_dirty_cnode(cnode);
+               if (!cnext) {
+                       cnode->cnext = c->lpt_cnext;
+                       break;
+               }
+               cnode->cnext = cnext;
+               cnode = cnext;
+               cnt += 1;
+       }
+       dbg_cmt("committing %d cnodes", cnt);
+       dbg_lp("committing %d cnodes", cnt);
+       ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt);
+       return cnt;
+}
+
+/**
+ * upd_ltab - update LPT LEB properties.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @free: amount of free space
+ * @dirty: amount of dirty space to add
+ */
+static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
+{
+       dbg_lp("LEB %d free %d dirty %d to %d +%d",
+              lnum, c->ltab[lnum - c->lpt_first].free,
+              c->ltab[lnum - c->lpt_first].dirty, free, dirty);
+       ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
+       c->ltab[lnum - c->lpt_first].free = free;
+       c->ltab[lnum - c->lpt_first].dirty += dirty;
+}
+
+/**
+ * alloc_lpt_leb - allocate an LPT LEB that is empty.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number is passed and returned here
+ *
+ * This function finds the next empty LEB in the ltab starting from @lnum. If a
+ * an empty LEB is found it is returned in @lnum and the function returns %0.
+ * Otherwise the function returns -ENOSPC.  Note however, that LPT is designed
+ * never to run out of space.
+ */
+static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
+{
+       int i, n;
+
+       n = *lnum - c->lpt_first + 1;
+       for (i = n; i < c->lpt_lebs; i++) {
+               if (c->ltab[i].tgc || c->ltab[i].cmt)
+                       continue;
+               if (c->ltab[i].free == c->leb_size) {
+                       c->ltab[i].cmt = 1;
+                       *lnum = i + c->lpt_first;
+                       return 0;
+               }
+       }
+
+       for (i = 0; i < n; i++) {
+               if (c->ltab[i].tgc || c->ltab[i].cmt)
+                       continue;
+               if (c->ltab[i].free == c->leb_size) {
+                       c->ltab[i].cmt = 1;
+                       *lnum = i + c->lpt_first;
+                       return 0;
+               }
+       }
+       dbg_err("last LEB %d", *lnum);
+       dump_stack();
+       return -ENOSPC;
+}
+
+/**
+ * layout_cnodes - layout cnodes for commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int layout_cnodes(struct ubifs_info *c)
+{
+       int lnum, offs, len, alen, done_lsave, done_ltab, err;
+       struct ubifs_cnode *cnode;
+
+       cnode = c->lpt_cnext;
+       if (!cnode)
+               return 0;
+       lnum = c->nhead_lnum;
+       offs = c->nhead_offs;
+       /* Try to place lsave and ltab nicely */
+       done_lsave = !c->big_lpt;
+       done_ltab = 0;
+       if (!done_lsave && offs + c->lsave_sz <= c->leb_size) {
+               done_lsave = 1;
+               c->lsave_lnum = lnum;
+               c->lsave_offs = offs;
+               offs += c->lsave_sz;
+       }
+
+       if (offs + c->ltab_sz <= c->leb_size) {
+               done_ltab = 1;
+               c->ltab_lnum = lnum;
+               c->ltab_offs = offs;
+               offs += c->ltab_sz;
+       }
+
+       do {
+               if (cnode->level) {
+                       len = c->nnode_sz;
+                       c->dirty_nn_cnt -= 1;
+               } else {
+                       len = c->pnode_sz;
+                       c->dirty_pn_cnt -= 1;
+               }
+               while (offs + len > c->leb_size) {
+                       alen = ALIGN(offs, c->min_io_size);
+                       upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+                       err = alloc_lpt_leb(c, &lnum);
+                       if (err)
+                               return err;
+                       offs = 0;
+                       ubifs_assert(lnum >= c->lpt_first &&
+                                    lnum <= c->lpt_last);
+                       /* Try to place lsave and ltab nicely */
+                       if (!done_lsave) {
+                               done_lsave = 1;
+                               c->lsave_lnum = lnum;
+                               c->lsave_offs = offs;
+                               offs += c->lsave_sz;
+                               continue;
+                       }
+                       if (!done_ltab) {
+                               done_ltab = 1;
+                               c->ltab_lnum = lnum;
+                               c->ltab_offs = offs;
+                               offs += c->ltab_sz;
+                               continue;
+                       }
+                       break;
+               }
+               if (cnode->parent) {
+                       cnode->parent->nbranch[cnode->iip].lnum = lnum;
+                       cnode->parent->nbranch[cnode->iip].offs = offs;
+               } else {
+                       c->lpt_lnum = lnum;
+                       c->lpt_offs = offs;
+               }
+               offs += len;
+               cnode = cnode->cnext;
+       } while (cnode && cnode != c->lpt_cnext);
+
+       /* Make sure to place LPT's save table */
+       if (!done_lsave) {
+               if (offs + c->lsave_sz > c->leb_size) {
+                       alen = ALIGN(offs, c->min_io_size);
+                       upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+                       err = alloc_lpt_leb(c, &lnum);
+                       if (err)
+                               return err;
+                       offs = 0;
+                       ubifs_assert(lnum >= c->lpt_first &&
+                                    lnum <= c->lpt_last);
+               }
+               done_lsave = 1;
+               c->lsave_lnum = lnum;
+               c->lsave_offs = offs;
+               offs += c->lsave_sz;
+       }
+
+       /* Make sure to place LPT's own lprops table */
+       if (!done_ltab) {
+               if (offs + c->ltab_sz > c->leb_size) {
+                       alen = ALIGN(offs, c->min_io_size);
+                       upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+                       err = alloc_lpt_leb(c, &lnum);
+                       if (err)
+                               return err;
+                       offs = 0;
+                       ubifs_assert(lnum >= c->lpt_first &&
+                                    lnum <= c->lpt_last);
+               }
+               done_ltab = 1;
+               c->ltab_lnum = lnum;
+               c->ltab_offs = offs;
+               offs += c->ltab_sz;
+       }
+
+       alen = ALIGN(offs, c->min_io_size);
+       upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+       return 0;
+}
+
+/**
+ * realloc_lpt_leb - allocate an LPT LEB that is empty.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number is passed and returned here
+ *
+ * This function duplicates exactly the results of the function alloc_lpt_leb.
+ * It is used during end commit to reallocate the same LEB numbers that were
+ * allocated by alloc_lpt_leb during start commit.
+ *
+ * This function finds the next LEB that was allocated by the alloc_lpt_leb
+ * function starting from @lnum. If a LEB is found it is returned in @lnum and
+ * the function returns %0. Otherwise the function returns -ENOSPC.
+ * Note however, that LPT is designed never to run out of space.
+ */
+static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
+{
+       int i, n;
+
+       n = *lnum - c->lpt_first + 1;
+       for (i = n; i < c->lpt_lebs; i++)
+               if (c->ltab[i].cmt) {
+                       c->ltab[i].cmt = 0;
+                       *lnum = i + c->lpt_first;
+                       return 0;
+               }
+
+       for (i = 0; i < n; i++)
+               if (c->ltab[i].cmt) {
+                       c->ltab[i].cmt = 0;
+                       *lnum = i + c->lpt_first;
+                       return 0;
+               }
+       dbg_err("last LEB %d", *lnum);
+       dump_stack();
+       return -ENOSPC;
+}
+
+/**
+ * write_cnodes - write cnodes for commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int write_cnodes(struct ubifs_info *c)
+{
+       int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave;
+       struct ubifs_cnode *cnode;
+       void *buf = c->lpt_buf;
+
+       cnode = c->lpt_cnext;
+       if (!cnode)
+               return 0;
+       lnum = c->nhead_lnum;
+       offs = c->nhead_offs;
+       from = offs;
+       /* Ensure empty LEB is unmapped */
+       if (offs == 0) {
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+       }
+       /* Try to place lsave and ltab nicely */
+       done_lsave = !c->big_lpt;
+       done_ltab = 0;
+       if (!done_lsave && offs + c->lsave_sz <= c->leb_size) {
+               done_lsave = 1;
+               ubifs_pack_lsave(c, buf + offs, c->lsave);
+               offs += c->lsave_sz;
+       }
+
+       if (offs + c->ltab_sz <= c->leb_size) {
+               done_ltab = 1;
+               ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
+               offs += c->ltab_sz;
+       }
+
+       /* Loop for each cnode */
+       do {
+               if (cnode->level)
+                       len = c->nnode_sz;
+               else
+                       len = c->pnode_sz;
+               while (offs + len > c->leb_size) {
+                       wlen = offs - from;
+                       if (wlen) {
+                               alen = ALIGN(wlen, c->min_io_size);
+                               memset(buf + offs, 0xff, alen - wlen);
+                               err = ubifs_leb_write(c, lnum, buf + from, from,
+                                                      alen, UBI_SHORTTERM);
+                               if (err)
+                                       return err;
+                       }
+                       err = realloc_lpt_leb(c, &lnum);
+                       if (err)
+                               return err;
+                       offs = 0;
+                       from = 0;
+                       ubifs_assert(lnum >= c->lpt_first &&
+                                    lnum <= c->lpt_last);
+                       err = ubifs_leb_unmap(c, lnum);
+                       if (err)
+                               return err;
+                       /* Try to place lsave and ltab nicely */
+                       if (!done_lsave) {
+                               done_lsave = 1;
+                               ubifs_pack_lsave(c, buf + offs, c->lsave);
+                               offs += c->lsave_sz;
+                               continue;
+                       }
+                       if (!done_ltab) {
+                               done_ltab = 1;
+                               ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
+                               offs += c->ltab_sz;
+                               continue;
+                       }
+                       break;
+               }
+               if (cnode->level)
+                       ubifs_pack_nnode(c, buf + offs,
+                                        (struct ubifs_nnode *)cnode);
+               else
+                       ubifs_pack_pnode(c, buf + offs,
+                                        (struct ubifs_pnode *)cnode);
+               /*
+                * The reason for the barriers is the same as in case of TNC.
+                * See comment in 'write_index()'. 'dirty_cow_nnode()' and
+                * 'dirty_cow_pnode()' are the functions for which this is
+                * important.
+                */
+               clear_bit(DIRTY_CNODE, &cnode->flags);
+               smp_mb__before_clear_bit();
+               clear_bit(COW_ZNODE, &cnode->flags);
+               smp_mb__after_clear_bit();
+               offs += len;
+               cnode = cnode->cnext;
+       } while (cnode && cnode != c->lpt_cnext);
+
+       /* Make sure to place LPT's save table */
+       if (!done_lsave) {
+               if (offs + c->lsave_sz > c->leb_size) {
+                       wlen = offs - from;
+                       alen = ALIGN(wlen, c->min_io_size);
+                       memset(buf + offs, 0xff, alen - wlen);
+                       err = ubifs_leb_write(c, lnum, buf + from, from, alen,
+                                             UBI_SHORTTERM);
+                       if (err)
+                               return err;
+                       err = realloc_lpt_leb(c, &lnum);
+                       if (err)
+                               return err;
+                       offs = 0;
+                       ubifs_assert(lnum >= c->lpt_first &&
+                                    lnum <= c->lpt_last);
+                       err = ubifs_leb_unmap(c, lnum);
+                       if (err)
+                               return err;
+               }
+               done_lsave = 1;
+               ubifs_pack_lsave(c, buf + offs, c->lsave);
+               offs += c->lsave_sz;
+       }
+
+       /* Make sure to place LPT's own lprops table */
+       if (!done_ltab) {
+               if (offs + c->ltab_sz > c->leb_size) {
+                       wlen = offs - from;
+                       alen = ALIGN(wlen, c->min_io_size);
+                       memset(buf + offs, 0xff, alen - wlen);
+                       err = ubifs_leb_write(c, lnum, buf + from, from, alen,
+                                             UBI_SHORTTERM);
+                       if (err)
+                               return err;
+                       err = realloc_lpt_leb(c, &lnum);
+                       if (err)
+                               return err;
+                       offs = 0;
+                       ubifs_assert(lnum >= c->lpt_first &&
+                                    lnum <= c->lpt_last);
+                       err = ubifs_leb_unmap(c, lnum);
+                       if (err)
+                               return err;
+               }
+               done_ltab = 1;
+               ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
+               offs += c->ltab_sz;
+       }
+
+       /* Write remaining data in buffer */
+       wlen = offs - from;
+       alen = ALIGN(wlen, c->min_io_size);
+       memset(buf + offs, 0xff, alen - wlen);
+       err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
+       if (err)
+               return err;
+       c->nhead_lnum = lnum;
+       c->nhead_offs = ALIGN(offs, c->min_io_size);
+
+       dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
+       dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
+       dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
+       if (c->big_lpt)
+               dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
+       return 0;
+}
+
+/**
+ * next_pnode - find next pnode.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode
+ *
+ * This function returns the next pnode or %NULL if there are no more pnodes.
+ */
+static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
+                                     struct ubifs_pnode *pnode)
+{
+       struct ubifs_nnode *nnode;
+       int iip;
+
+       /* Try to go right */
+       nnode = pnode->parent;
+       iip = pnode->iip + 1;
+       if (iip < UBIFS_LPT_FANOUT) {
+               /* We assume here that LEB zero is never an LPT LEB */
+               if (nnode->nbranch[iip].lnum)
+                       return ubifs_get_pnode(c, nnode, iip);
+               else
+                       return NULL;
+       }
+
+       /* Go up while can't go right */
+       do {
+               iip = nnode->iip + 1;
+               nnode = nnode->parent;
+               if (!nnode)
+                       return NULL;
+               /* We assume here that LEB zero is never an LPT LEB */
+       } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum);
+
+       /* Go right */
+       nnode = ubifs_get_nnode(c, nnode, iip);
+       if (IS_ERR(nnode))
+               return (void *)nnode;
+
+       /* Go down to level 1 */
+       while (nnode->level > 1) {
+               nnode = ubifs_get_nnode(c, nnode, 0);
+               if (IS_ERR(nnode))
+                       return (void *)nnode;
+       }
+
+       return ubifs_get_pnode(c, nnode, 0);
+}
+
+/**
+ * pnode_lookup - lookup a pnode in the LPT.
+ * @c: UBIFS file-system description object
+ * @i: pnode number (0 to main_lebs - 1)
+ *
+ * This function returns a pointer to the pnode on success or a negative
+ * error code on failure.
+ */
+static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
+{
+       int err, h, iip, shft;
+       struct ubifs_nnode *nnode;
+
+       if (!c->nroot) {
+               err = ubifs_read_nnode(c, NULL, 0);
+               if (err)
+                       return ERR_PTR(err);
+       }
+       i <<= UBIFS_LPT_FANOUT_SHIFT;
+       nnode = c->nroot;
+       shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
+       for (h = 1; h < c->lpt_hght; h++) {
+               iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+               shft -= UBIFS_LPT_FANOUT_SHIFT;
+               nnode = ubifs_get_nnode(c, nnode, iip);
+               if (IS_ERR(nnode))
+                       return ERR_PTR(PTR_ERR(nnode));
+       }
+       iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+       return ubifs_get_pnode(c, nnode, iip);
+}
+
+/**
+ * add_pnode_dirt - add dirty space to LPT LEB properties.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode for which to add dirt
+ */
+static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode)
+{
+       ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum,
+                          c->pnode_sz);
+}
+
+/**
+ * do_make_pnode_dirty - mark a pnode dirty.
+ * @c: UBIFS file-system description object
+ * @pnode: pnode to mark dirty
+ */
+static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode)
+{
+       /* Assumes cnext list is empty i.e. not called during commit */
+       if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) {
+               struct ubifs_nnode *nnode;
+
+               c->dirty_pn_cnt += 1;
+               add_pnode_dirt(c, pnode);
+               /* Mark parent and ancestors dirty too */
+               nnode = pnode->parent;
+               while (nnode) {
+                       if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
+                               c->dirty_nn_cnt += 1;
+                               ubifs_add_nnode_dirt(c, nnode);
+                               nnode = nnode->parent;
+                       } else
+                               break;
+               }
+       }
+}
+
+/**
+ * make_tree_dirty - mark the entire LEB properties tree dirty.
+ * @c: UBIFS file-system description object
+ *
+ * This function is used by the "small" LPT model to cause the entire LEB
+ * properties tree to be written.  The "small" LPT model does not use LPT
+ * garbage collection because it is more efficient to write the entire tree
+ * (because it is small).
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int make_tree_dirty(struct ubifs_info *c)
+{
+       struct ubifs_pnode *pnode;
+
+       pnode = pnode_lookup(c, 0);
+       while (pnode) {
+               do_make_pnode_dirty(c, pnode);
+               pnode = next_pnode(c, pnode);
+               if (IS_ERR(pnode))
+                       return PTR_ERR(pnode);
+       }
+       return 0;
+}
+
+/**
+ * need_write_all - determine if the LPT area is running out of free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %1 if the LPT area is running out of free space and %0
+ * if it is not.
+ */
+static int need_write_all(struct ubifs_info *c)
+{
+       long long free = 0;
+       int i;
+
+       for (i = 0; i < c->lpt_lebs; i++) {
+               if (i + c->lpt_first == c->nhead_lnum)
+                       free += c->leb_size - c->nhead_offs;
+               else if (c->ltab[i].free == c->leb_size)
+                       free += c->leb_size;
+               else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
+                       free += c->leb_size;
+       }
+       /* Less than twice the size left */
+       if (free <= c->lpt_sz * 2)
+               return 1;
+       return 0;
+}
+
+/**
+ * lpt_tgc_start - start trivial garbage collection of LPT LEBs.
+ * @c: UBIFS file-system description object
+ *
+ * LPT trivial garbage collection is where a LPT LEB contains only dirty and
+ * free space and so may be reused as soon as the next commit is completed.
+ * This function is called during start commit to mark LPT LEBs for trivial GC.
+ */
+static void lpt_tgc_start(struct ubifs_info *c)
+{
+       int i;
+
+       for (i = 0; i < c->lpt_lebs; i++) {
+               if (i + c->lpt_first == c->nhead_lnum)
+                       continue;
+               if (c->ltab[i].dirty > 0 &&
+                   c->ltab[i].free + c->ltab[i].dirty == c->leb_size) {
+                       c->ltab[i].tgc = 1;
+                       c->ltab[i].free = c->leb_size;
+                       c->ltab[i].dirty = 0;
+                       dbg_lp("LEB %d", i + c->lpt_first);
+               }
+       }
+}
+
+/**
+ * lpt_tgc_end - end trivial garbage collection of LPT LEBs.
+ * @c: UBIFS file-system description object
+ *
+ * LPT trivial garbage collection is where a LPT LEB contains only dirty and
+ * free space and so may be reused as soon as the next commit is completed.
+ * This function is called after the commit is completed (master node has been
+ * written) and unmaps LPT LEBs that were marked for trivial GC.
+ */
+static int lpt_tgc_end(struct ubifs_info *c)
+{
+       int i, err;
+
+       for (i = 0; i < c->lpt_lebs; i++)
+               if (c->ltab[i].tgc) {
+                       err = ubifs_leb_unmap(c, i + c->lpt_first);
+                       if (err)
+                               return err;
+                       c->ltab[i].tgc = 0;
+                       dbg_lp("LEB %d", i + c->lpt_first);
+               }
+       return 0;
+}
+
+/**
+ * populate_lsave - fill the lsave array with important LEB numbers.
+ * @c: the UBIFS file-system description object
+ *
+ * This function is only called for the "big" model. It records a small number
+ * of LEB numbers of important LEBs.  Important LEBs are ones that are (from
+ * most important to least important): empty, freeable, freeable index, dirty
+ * index, dirty or free. Upon mount, we read this list of LEB numbers and bring
+ * their pnodes into memory.  That will stop us from having to scan the LPT
+ * straight away. For the "small" model we assume that scanning the LPT is no
+ * big deal.
+ */
+static void populate_lsave(struct ubifs_info *c)
+{
+       struct ubifs_lprops *lprops;
+       struct ubifs_lpt_heap *heap;
+       int i, cnt = 0;
+
+       ubifs_assert(c->big_lpt);
+       if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
+               c->lpt_drty_flgs |= LSAVE_DIRTY;
+               ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
+       }
+       list_for_each_entry(lprops, &c->empty_list, list) {
+               c->lsave[cnt++] = lprops->lnum;
+               if (cnt >= c->lsave_cnt)
+                       return;
+       }
+       list_for_each_entry(lprops, &c->freeable_list, list) {
+               c->lsave[cnt++] = lprops->lnum;
+               if (cnt >= c->lsave_cnt)
+                       return;
+       }
+       list_for_each_entry(lprops, &c->frdi_idx_list, list) {
+               c->lsave[cnt++] = lprops->lnum;
+               if (cnt >= c->lsave_cnt)
+                       return;
+       }
+       heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
+       for (i = 0; i < heap->cnt; i++) {
+               c->lsave[cnt++] = heap->arr[i]->lnum;
+               if (cnt >= c->lsave_cnt)
+                       return;
+       }
+       heap = &c->lpt_heap[LPROPS_DIRTY - 1];
+       for (i = 0; i < heap->cnt; i++) {
+               c->lsave[cnt++] = heap->arr[i]->lnum;
+               if (cnt >= c->lsave_cnt)
+                       return;
+       }
+       heap = &c->lpt_heap[LPROPS_FREE - 1];
+       for (i = 0; i < heap->cnt; i++) {
+               c->lsave[cnt++] = heap->arr[i]->lnum;
+               if (cnt >= c->lsave_cnt)
+                       return;
+       }
+       /* Fill it up completely */
+       while (cnt < c->lsave_cnt)
+               c->lsave[cnt++] = c->main_first;
+}
+
+/**
+ * nnode_lookup - lookup a nnode in the LPT.
+ * @c: UBIFS file-system description object
+ * @i: nnode number
+ *
+ * This function returns a pointer to the nnode on success or a negative
+ * error code on failure.
+ */
+static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i)
+{
+       int err, iip;
+       struct ubifs_nnode *nnode;
+
+       if (!c->nroot) {
+               err = ubifs_read_nnode(c, NULL, 0);
+               if (err)
+                       return ERR_PTR(err);
+       }
+       nnode = c->nroot;
+       while (1) {
+               iip = i & (UBIFS_LPT_FANOUT - 1);
+               i >>= UBIFS_LPT_FANOUT_SHIFT;
+               if (!i)
+                       break;
+               nnode = ubifs_get_nnode(c, nnode, iip);
+               if (IS_ERR(nnode))
+                       return nnode;
+       }
+       return nnode;
+}
+
+/**
+ * make_nnode_dirty - find a nnode and, if found, make it dirty.
+ * @c: UBIFS file-system description object
+ * @node_num: nnode number of nnode to make dirty
+ * @lnum: LEB number where nnode was written
+ * @offs: offset where nnode was written
+ *
+ * This function is used by LPT garbage collection.  LPT garbage collection is
+ * used only for the "big" LPT model (c->big_lpt == 1).  Garbage collection
+ * simply involves marking all the nodes in the LEB being garbage-collected as
+ * dirty.  The dirty nodes are written next commit, after which the LEB is free
+ * to be reused.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum,
+                           int offs)
+{
+       struct ubifs_nnode *nnode;
+
+       nnode = nnode_lookup(c, node_num);
+       if (IS_ERR(nnode))
+               return PTR_ERR(nnode);
+       if (nnode->parent) {
+               struct ubifs_nbranch *branch;
+
+               branch = &nnode->parent->nbranch[nnode->iip];
+               if (branch->lnum != lnum || branch->offs != offs)
+                       return 0; /* nnode is obsolete */
+       } else if (c->lpt_lnum != lnum || c->lpt_offs != offs)
+                       return 0; /* nnode is obsolete */
+       /* Assumes cnext list is empty i.e. not called during commit */
+       if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
+               c->dirty_nn_cnt += 1;
+               ubifs_add_nnode_dirt(c, nnode);
+               /* Mark parent and ancestors dirty too */
+               nnode = nnode->parent;
+               while (nnode) {
+                       if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
+                               c->dirty_nn_cnt += 1;
+                               ubifs_add_nnode_dirt(c, nnode);
+                               nnode = nnode->parent;
+                       } else
+                               break;
+               }
+       }
+       return 0;
+}
+
+/**
+ * make_pnode_dirty - find a pnode and, if found, make it dirty.
+ * @c: UBIFS file-system description object
+ * @node_num: pnode number of pnode to make dirty
+ * @lnum: LEB number where pnode was written
+ * @offs: offset where pnode was written
+ *
+ * This function is used by LPT garbage collection.  LPT garbage collection is
+ * used only for the "big" LPT model (c->big_lpt == 1).  Garbage collection
+ * simply involves marking all the nodes in the LEB being garbage-collected as
+ * dirty.  The dirty nodes are written next commit, after which the LEB is free
+ * to be reused.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum,
+                           int offs)
+{
+       struct ubifs_pnode *pnode;
+       struct ubifs_nbranch *branch;
+
+       pnode = pnode_lookup(c, node_num);
+       if (IS_ERR(pnode))
+               return PTR_ERR(pnode);
+       branch = &pnode->parent->nbranch[pnode->iip];
+       if (branch->lnum != lnum || branch->offs != offs)
+               return 0;
+       do_make_pnode_dirty(c, pnode);
+       return 0;
+}
+
+/**
+ * make_ltab_dirty - make ltab node dirty.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number where ltab was written
+ * @offs: offset where ltab was written
+ *
+ * This function is used by LPT garbage collection.  LPT garbage collection is
+ * used only for the "big" LPT model (c->big_lpt == 1).  Garbage collection
+ * simply involves marking all the nodes in the LEB being garbage-collected as
+ * dirty.  The dirty nodes are written next commit, after which the LEB is free
+ * to be reused.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs)
+{
+       if (lnum != c->ltab_lnum || offs != c->ltab_offs)
+               return 0; /* This ltab node is obsolete */
+       if (!(c->lpt_drty_flgs & LTAB_DIRTY)) {
+               c->lpt_drty_flgs |= LTAB_DIRTY;
+               ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz);
+       }
+       return 0;
+}
+
+/**
+ * make_lsave_dirty - make lsave node dirty.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number where lsave was written
+ * @offs: offset where lsave was written
+ *
+ * This function is used by LPT garbage collection.  LPT garbage collection is
+ * used only for the "big" LPT model (c->big_lpt == 1).  Garbage collection
+ * simply involves marking all the nodes in the LEB being garbage-collected as
+ * dirty.  The dirty nodes are written next commit, after which the LEB is free
+ * to be reused.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs)
+{
+       if (lnum != c->lsave_lnum || offs != c->lsave_offs)
+               return 0; /* This lsave node is obsolete */
+       if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
+               c->lpt_drty_flgs |= LSAVE_DIRTY;
+               ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
+       }
+       return 0;
+}
+
+/**
+ * make_node_dirty - make node dirty.
+ * @c: UBIFS file-system description object
+ * @node_type: LPT node type
+ * @node_num: node number
+ * @lnum: LEB number where node was written
+ * @offs: offset where node was written
+ *
+ * This function is used by LPT garbage collection.  LPT garbage collection is
+ * used only for the "big" LPT model (c->big_lpt == 1).  Garbage collection
+ * simply involves marking all the nodes in the LEB being garbage-collected as
+ * dirty.  The dirty nodes are written next commit, after which the LEB is free
+ * to be reused.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
+                          int lnum, int offs)
+{
+       switch (node_type) {
+       case UBIFS_LPT_NNODE:
+               return make_nnode_dirty(c, node_num, lnum, offs);
+       case UBIFS_LPT_PNODE:
+               return make_pnode_dirty(c, node_num, lnum, offs);
+       case UBIFS_LPT_LTAB:
+               return make_ltab_dirty(c, lnum, offs);
+       case UBIFS_LPT_LSAVE:
+               return make_lsave_dirty(c, lnum, offs);
+       }
+       return -EINVAL;
+}
+
+/**
+ * get_lpt_node_len - return the length of a node based on its type.
+ * @c: UBIFS file-system description object
+ * @node_type: LPT node type
+ */
+static int get_lpt_node_len(struct ubifs_info *c, int node_type)
+{
+       switch (node_type) {
+       case UBIFS_LPT_NNODE:
+               return c->nnode_sz;
+       case UBIFS_LPT_PNODE:
+               return c->pnode_sz;
+       case UBIFS_LPT_LTAB:
+               return c->ltab_sz;
+       case UBIFS_LPT_LSAVE:
+               return c->lsave_sz;
+       }
+       return 0;
+}
+
+/**
+ * get_pad_len - return the length of padding in a buffer.
+ * @c: UBIFS file-system description object
+ * @buf: buffer
+ * @len: length of buffer
+ */
+static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
+{
+       int offs, pad_len;
+
+       if (c->min_io_size == 1)
+               return 0;
+       offs = c->leb_size - len;
+       pad_len = ALIGN(offs, c->min_io_size) - offs;
+       return pad_len;
+}
+
+/**
+ * get_lpt_node_type - return type (and node number) of a node in a buffer.
+ * @c: UBIFS file-system description object
+ * @buf: buffer
+ * @node_num: node number is returned here
+ */
+static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int pos = 0, node_type;
+
+       node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
+       *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
+       return node_type;
+}
+
+/**
+ * is_a_node - determine if a buffer contains a node.
+ * @c: UBIFS file-system description object
+ * @buf: buffer
+ * @len: length of buffer
+ *
+ * This function returns %1 if the buffer contains a node or %0 if it does not.
+ */
+static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
+{
+       uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
+       int pos = 0, node_type, node_len;
+       uint16_t crc, calc_crc;
+
+       node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
+       if (node_type == UBIFS_LPT_NOT_A_NODE)
+               return 0;
+       node_len = get_lpt_node_len(c, node_type);
+       if (!node_len || node_len > len)
+               return 0;
+       pos = 0;
+       addr = buf;
+       crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
+       calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
+                        node_len - UBIFS_LPT_CRC_BYTES);
+       if (crc != calc_crc)
+               return 0;
+       return 1;
+}
+
+
+/**
+ * lpt_gc_lnum - garbage collect a LPT LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to garbage collect
+ *
+ * LPT garbage collection is used only for the "big" LPT model
+ * (c->big_lpt == 1).  Garbage collection simply involves marking all the nodes
+ * in the LEB being garbage-collected as dirty.  The dirty nodes are written
+ * next commit, after which the LEB is free to be reused.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
+{
+       int err, len = c->leb_size, node_type, node_num, node_len, offs;
+       void *buf = c->lpt_buf;
+
+       dbg_lp("LEB %d", lnum);
+       err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+       if (err) {
+               ubifs_err("cannot read LEB %d, error %d", lnum, err);
+               return err;
+       }
+       while (1) {
+               if (!is_a_node(c, buf, len)) {
+                       int pad_len;
+
+                       pad_len = get_pad_len(c, buf, len);
+                       if (pad_len) {
+                               buf += pad_len;
+                               len -= pad_len;
+                               continue;
+                       }
+                       return 0;
+               }
+               node_type = get_lpt_node_type(c, buf, &node_num);
+               node_len = get_lpt_node_len(c, node_type);
+               offs = c->leb_size - len;
+               ubifs_assert(node_len != 0);
+               mutex_lock(&c->lp_mutex);
+               err = make_node_dirty(c, node_type, node_num, lnum, offs);
+               mutex_unlock(&c->lp_mutex);
+               if (err)
+                       return err;
+               buf += node_len;
+               len -= node_len;
+       }
+       return 0;
+}
+
+/**
+ * lpt_gc - LPT garbage collection.
+ * @c: UBIFS file-system description object
+ *
+ * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'.
+ * Returns %0 on success and a negative error code on failure.
+ */
+static int lpt_gc(struct ubifs_info *c)
+{
+       int i, lnum = -1, dirty = 0;
+
+       mutex_lock(&c->lp_mutex);
+       for (i = 0; i < c->lpt_lebs; i++) {
+               ubifs_assert(!c->ltab[i].tgc);
+               if (i + c->lpt_first == c->nhead_lnum ||
+                   c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
+                       continue;
+               if (c->ltab[i].dirty > dirty) {
+                       dirty = c->ltab[i].dirty;
+                       lnum = i + c->lpt_first;
+               }
+       }
+       mutex_unlock(&c->lp_mutex);
+       if (lnum == -1)
+               return -ENOSPC;
+       return lpt_gc_lnum(c, lnum);
+}
+
+/**
+ * ubifs_lpt_start_commit - UBIFS commit starts.
+ * @c: the UBIFS file-system description object
+ *
+ * This function has to be called when UBIFS starts the commit operation.
+ * This function "freezes" all currently dirty LEB properties and does not
+ * change them anymore. Further changes are saved and tracked separately
+ * because they are not part of this commit. This function returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+int ubifs_lpt_start_commit(struct ubifs_info *c)
+{
+       int err, cnt;
+
+       dbg_lp("");
+
+       mutex_lock(&c->lp_mutex);
+       err = dbg_check_ltab(c);
+       if (err)
+               goto out;
+
+       if (c->check_lpt_free) {
+               /*
+                * We ensure there is enough free space in
+                * ubifs_lpt_post_commit() by marking nodes dirty. That
+                * information is lost when we unmount, so we also need
+                * to check free space once after mounting also.
+                */
+               c->check_lpt_free = 0;
+               while (need_write_all(c)) {
+                       mutex_unlock(&c->lp_mutex);
+                       err = lpt_gc(c);
+                       if (err)
+                               return err;
+                       mutex_lock(&c->lp_mutex);
+               }
+       }
+
+       lpt_tgc_start(c);
+
+       if (!c->dirty_pn_cnt) {
+               dbg_cmt("no cnodes to commit");
+               err = 0;
+               goto out;
+       }
+
+       if (!c->big_lpt && need_write_all(c)) {
+               /* If needed, write everything */
+               err = make_tree_dirty(c);
+               if (err)
+                       goto out;
+               lpt_tgc_start(c);
+       }
+
+       if (c->big_lpt)
+               populate_lsave(c);
+
+       cnt = get_cnodes_to_commit(c);
+       ubifs_assert(cnt != 0);
+
+       err = layout_cnodes(c);
+       if (err)
+               goto out;
+
+       /* Copy the LPT's own lprops for end commit to write */
+       memcpy(c->ltab_cmt, c->ltab,
+              sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
+       c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY);
+
+out:
+       mutex_unlock(&c->lp_mutex);
+       return err;
+}
+
+/**
+ * free_obsolete_cnodes - free obsolete cnodes for commit end.
+ * @c: UBIFS file-system description object
+ */
+static void free_obsolete_cnodes(struct ubifs_info *c)
+{
+       struct ubifs_cnode *cnode, *cnext;
+
+       cnext = c->lpt_cnext;
+       if (!cnext)
+               return;
+       do {
+               cnode = cnext;
+               cnext = cnode->cnext;
+               if (test_bit(OBSOLETE_CNODE, &cnode->flags))
+                       kfree(cnode);
+               else
+                       cnode->cnext = NULL;
+       } while (cnext != c->lpt_cnext);
+       c->lpt_cnext = NULL;
+}
+
+/**
+ * ubifs_lpt_end_commit - finish the commit operation.
+ * @c: the UBIFS file-system description object
+ *
+ * This function has to be called when the commit operation finishes. It
+ * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to
+ * the media. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+int ubifs_lpt_end_commit(struct ubifs_info *c)
+{
+       int err;
+
+       dbg_lp("");
+
+       if (!c->lpt_cnext)
+               return 0;
+
+       err = write_cnodes(c);
+       if (err)
+               return err;
+
+       mutex_lock(&c->lp_mutex);
+       free_obsolete_cnodes(c);
+       mutex_unlock(&c->lp_mutex);
+
+       return 0;
+}
+
+/**
+ * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC.
+ * @c: UBIFS file-system description object
+ *
+ * LPT trivial GC is completed after a commit. Also LPT GC is done after a
+ * commit for the "big" LPT model.
+ */
+int ubifs_lpt_post_commit(struct ubifs_info *c)
+{
+       int err;
+
+       mutex_lock(&c->lp_mutex);
+       err = lpt_tgc_end(c);
+       if (err)
+               goto out;
+       if (c->big_lpt)
+               while (need_write_all(c)) {
+                       mutex_unlock(&c->lp_mutex);
+                       err = lpt_gc(c);
+                       if (err)
+                               return err;
+                       mutex_lock(&c->lp_mutex);
+               }
+out:
+       mutex_unlock(&c->lp_mutex);
+       return err;
+}
+
+/**
+ * first_nnode - find the first nnode in memory.
+ * @c: UBIFS file-system description object
+ * @hght: height of tree where nnode found is returned here
+ *
+ * This function returns a pointer to the nnode found or %NULL if no nnode is
+ * found. This function is a helper to 'ubifs_lpt_free()'.
+ */
+static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght)
+{
+       struct ubifs_nnode *nnode;
+       int h, i, found;
+
+       nnode = c->nroot;
+       *hght = 0;
+       if (!nnode)
+               return NULL;
+       for (h = 1; h < c->lpt_hght; h++) {
+               found = 0;
+               for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+                       if (nnode->nbranch[i].nnode) {
+                               found = 1;
+                               nnode = nnode->nbranch[i].nnode;
+                               *hght = h;
+                               break;
+                       }
+               }
+               if (!found)
+                       break;
+       }
+       return nnode;
+}
+
+/**
+ * next_nnode - find the next nnode in memory.
+ * @c: UBIFS file-system description object
+ * @nnode: nnode from which to start.
+ * @hght: height of tree where nnode is, is passed and returned here
+ *
+ * This function returns a pointer to the nnode found or %NULL if no nnode is
+ * found. This function is a helper to 'ubifs_lpt_free()'.
+ */
+static struct ubifs_nnode *next_nnode(struct ubifs_info *c,
+                                     struct ubifs_nnode *nnode, int *hght)
+{
+       struct ubifs_nnode *parent;
+       int iip, h, i, found;
+
+       parent = nnode->parent;
+       if (!parent)
+               return NULL;
+       if (nnode->iip == UBIFS_LPT_FANOUT - 1) {
+               *hght -= 1;
+               return parent;
+       }
+       for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) {
+               nnode = parent->nbranch[iip].nnode;
+               if (nnode)
+                       break;
+       }
+       if (!nnode) {
+               *hght -= 1;
+               return parent;
+       }
+       for (h = *hght + 1; h < c->lpt_hght; h++) {
+               found = 0;
+               for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+                       if (nnode->nbranch[i].nnode) {
+                               found = 1;
+                               nnode = nnode->nbranch[i].nnode;
+                               *hght = h;
+                               break;
+                       }
+               }
+               if (!found)
+                       break;
+       }
+       return nnode;
+}
+
+/**
+ * ubifs_lpt_free - free resources owned by the LPT.
+ * @c: UBIFS file-system description object
+ * @wr_only: free only resources used for writing
+ */
+void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
+{
+       struct ubifs_nnode *nnode;
+       int i, hght;
+
+       /* Free write-only things first */
+
+       free_obsolete_cnodes(c); /* Leftover from a failed commit */
+
+       vfree(c->ltab_cmt);
+       c->ltab_cmt = NULL;
+       vfree(c->lpt_buf);
+       c->lpt_buf = NULL;
+       kfree(c->lsave);
+       c->lsave = NULL;
+
+       if (wr_only)
+               return;
+
+       /* Now free the rest */
+
+       nnode = first_nnode(c, &hght);
+       while (nnode) {
+               for (i = 0; i < UBIFS_LPT_FANOUT; i++)
+                       kfree(nnode->nbranch[i].nnode);
+               nnode = next_nnode(c, nnode, &hght);
+       }
+       for (i = 0; i < LPROPS_HEAP_CNT; i++)
+               kfree(c->lpt_heap[i].arr);
+       kfree(c->dirty_idx.arr);
+       kfree(c->nroot);
+       vfree(c->ltab);
+       kfree(c->lpt_nod_buf);
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+/**
+ * dbg_is_all_ff - determine if a buffer contains only 0xff bytes.
+ * @buf: buffer
+ * @len: buffer length
+ */
+static int dbg_is_all_ff(uint8_t *buf, int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (buf[i] != 0xff)
+                       return 0;
+       return 1;
+}
+
+/**
+ * dbg_is_nnode_dirty - determine if a nnode is dirty.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB number where nnode was written
+ * @offs: offset where nnode was written
+ */
+static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
+{
+       struct ubifs_nnode *nnode;
+       int hght;
+
+       /* Entire tree is in memory so first_nnode / next_nnode are ok */
+       nnode = first_nnode(c, &hght);
+       for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
+               struct ubifs_nbranch *branch;
+
+               cond_resched();
+               if (nnode->parent) {
+                       branch = &nnode->parent->nbranch[nnode->iip];
+                       if (branch->lnum != lnum || branch->offs != offs)
+                               continue;
+                       if (test_bit(DIRTY_CNODE, &nnode->flags))
+                               return 1;
+                       return 0;
+               } else {
+                       if (c->lpt_lnum != lnum || c->lpt_offs != offs)
+                               continue;
+                       if (test_bit(DIRTY_CNODE, &nnode->flags))
+                               return 1;
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+/**
+ * dbg_is_pnode_dirty - determine if a pnode is dirty.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB number where pnode was written
+ * @offs: offset where pnode was written
+ */
+static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs)
+{
+       int i, cnt;
+
+       cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
+       for (i = 0; i < cnt; i++) {
+               struct ubifs_pnode *pnode;
+               struct ubifs_nbranch *branch;
+
+               cond_resched();
+               pnode = pnode_lookup(c, i);
+               if (IS_ERR(pnode))
+                       return PTR_ERR(pnode);
+               branch = &pnode->parent->nbranch[pnode->iip];
+               if (branch->lnum != lnum || branch->offs != offs)
+                       continue;
+               if (test_bit(DIRTY_CNODE, &pnode->flags))
+                       return 1;
+               return 0;
+       }
+       return 1;
+}
+
+/**
+ * dbg_is_ltab_dirty - determine if a ltab node is dirty.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB number where ltab node was written
+ * @offs: offset where ltab node was written
+ */
+static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs)
+{
+       if (lnum != c->ltab_lnum || offs != c->ltab_offs)
+               return 1;
+       return (c->lpt_drty_flgs & LTAB_DIRTY) != 0;
+}
+
+/**
+ * dbg_is_lsave_dirty - determine if a lsave node is dirty.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB number where lsave node was written
+ * @offs: offset where lsave node was written
+ */
+static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs)
+{
+       if (lnum != c->lsave_lnum || offs != c->lsave_offs)
+               return 1;
+       return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0;
+}
+
+/**
+ * dbg_is_node_dirty - determine if a node is dirty.
+ * @c: the UBIFS file-system description object
+ * @node_type: node type
+ * @lnum: LEB number where node was written
+ * @offs: offset where node was written
+ */
+static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum,
+                            int offs)
+{
+       switch (node_type) {
+       case UBIFS_LPT_NNODE:
+               return dbg_is_nnode_dirty(c, lnum, offs);
+       case UBIFS_LPT_PNODE:
+               return dbg_is_pnode_dirty(c, lnum, offs);
+       case UBIFS_LPT_LTAB:
+               return dbg_is_ltab_dirty(c, lnum, offs);
+       case UBIFS_LPT_LSAVE:
+               return dbg_is_lsave_dirty(c, lnum, offs);
+       }
+       return 1;
+}
+
+/**
+ * dbg_check_ltab_lnum - check the ltab for a LPT LEB number.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB number where node was written
+ * @offs: offset where node was written
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
+{
+       int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
+       int ret;
+       void *buf = c->dbg_buf;
+
+       dbg_lp("LEB %d", lnum);
+       err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+       if (err) {
+               dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
+               return err;
+       }
+       while (1) {
+               if (!is_a_node(c, buf, len)) {
+                       int i, pad_len;
+
+                       pad_len = get_pad_len(c, buf, len);
+                       if (pad_len) {
+                               buf += pad_len;
+                               len -= pad_len;
+                               dirty += pad_len;
+                               continue;
+                       }
+                       if (!dbg_is_all_ff(buf, len)) {
+                               dbg_msg("invalid empty space in LEB %d at %d",
+                                       lnum, c->leb_size - len);
+                               err = -EINVAL;
+                       }
+                       i = lnum - c->lpt_first;
+                       if (len != c->ltab[i].free) {
+                               dbg_msg("invalid free space in LEB %d "
+                                       "(free %d, expected %d)",
+                                       lnum, len, c->ltab[i].free);
+                               err = -EINVAL;
+                       }
+                       if (dirty != c->ltab[i].dirty) {
+                               dbg_msg("invalid dirty space in LEB %d "
+                                       "(dirty %d, expected %d)",
+                                       lnum, dirty, c->ltab[i].dirty);
+                               err = -EINVAL;
+                       }
+                       return err;
+               }
+               node_type = get_lpt_node_type(c, buf, &node_num);
+               node_len = get_lpt_node_len(c, node_type);
+               ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
+               if (ret == 1)
+                       dirty += node_len;
+               buf += node_len;
+               len -= node_len;
+       }
+}
+
+/**
+ * dbg_check_ltab - check the free and dirty space in the ltab.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_check_ltab(struct ubifs_info *c)
+{
+       int lnum, err, i, cnt;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+               return 0;
+
+       /* Bring the entire tree into memory */
+       cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
+       for (i = 0; i < cnt; i++) {
+               struct ubifs_pnode *pnode;
+
+               pnode = pnode_lookup(c, i);
+               if (IS_ERR(pnode))
+                       return PTR_ERR(pnode);
+               cond_resched();
+       }
+
+       /* Check nodes */
+       err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0);
+       if (err)
+               return err;
+
+       /* Check each LEB */
+       for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
+               err = dbg_check_ltab_lnum(c, lnum);
+               if (err) {
+                       dbg_err("failed at LEB %d", lnum);
+                       return err;
+               }
+       }
+
+       dbg_lp("succeeded");
+       return 0;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c

new file mode 100644 (file)

index 0000000..71d5493
--- /dev/null
+++ b/fs/ubifs/master.c
@@ -0,0 +1,387 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/* This file implements reading and writing the master node */
+
+#include "ubifs.h"
+
+/**
+ * scan_for_master - search the valid master node.
+ * @c: UBIFS file-system description object
+ *
+ * This function scans the master node LEBs and search for the latest master
+ * node. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int scan_for_master(struct ubifs_info *c)
+{
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       int lnum, offs = 0, nodes_cnt;
+
+       lnum = UBIFS_MST_LNUM;
+
+       sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+       if (IS_ERR(sleb))
+               return PTR_ERR(sleb);
+       nodes_cnt = sleb->nodes_cnt;
+       if (nodes_cnt > 0) {
+               snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
+                                 list);
+               if (snod->type != UBIFS_MST_NODE)
+                       goto out;
+               memcpy(c->mst_node, snod->node, snod->len);
+               offs = snod->offs;
+       }
+       ubifs_scan_destroy(sleb);
+
+       lnum += 1;
+
+       sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+       if (IS_ERR(sleb))
+               return PTR_ERR(sleb);
+       if (sleb->nodes_cnt != nodes_cnt)
+               goto out;
+       if (!sleb->nodes_cnt)
+               goto out;
+       snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
+       if (snod->type != UBIFS_MST_NODE)
+               goto out;
+       if (snod->offs != offs)
+               goto out;
+       if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
+                  (void *)snod->node + UBIFS_CH_SZ,
+                  UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
+               goto out;
+       c->mst_offs = offs;
+       ubifs_scan_destroy(sleb);
+       return 0;
+
+out:
+       ubifs_scan_destroy(sleb);
+       return -EINVAL;
+}
+
+/**
+ * validate_master - validate master node.
+ * @c: UBIFS file-system description object
+ *
+ * This function validates data which was read from master node. Returns zero
+ * if the data is all right and %-EINVAL if not.
+ */
+static int validate_master(const struct ubifs_info *c)
+{
+       long long main_sz;
+       int err;
+
+       if (c->max_sqnum >= SQNUM_WATERMARK) {
+               err = 1;
+               goto out;
+       }
+
+       if (c->cmt_no >= c->max_sqnum) {
+               err = 2;
+               goto out;
+       }
+
+       if (c->highest_inum >= INUM_WATERMARK) {
+               err = 3;
+               goto out;
+       }
+
+       if (c->lhead_lnum < UBIFS_LOG_LNUM ||
+           c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs ||
+           c->lhead_offs < 0 || c->lhead_offs >= c->leb_size ||
+           c->lhead_offs & (c->min_io_size - 1)) {
+               err = 4;
+               goto out;
+       }
+
+       if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first ||
+           c->zroot.offs >= c->leb_size || c->zroot.offs & 7) {
+               err = 5;
+               goto out;
+       }
+
+       if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len ||
+           c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) {
+               err = 6;
+               goto out;
+       }
+
+       if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) {
+               err = 7;
+               goto out;
+       }
+
+       if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first ||
+           c->ihead_offs % c->min_io_size || c->ihead_offs < 0 ||
+           c->ihead_offs > c->leb_size || c->ihead_offs & 7) {
+               err = 8;
+               goto out;
+       }
+
+       main_sz = (long long)c->main_lebs * c->leb_size;
+       if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
+               err = 9;
+               goto out;
+       }
+
+       if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last ||
+           c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) {
+               err = 10;
+               goto out;
+       }
+
+       if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last ||
+           c->nhead_offs < 0 || c->nhead_offs % c->min_io_size ||
+           c->nhead_offs > c->leb_size) {
+               err = 11;
+               goto out;
+       }
+
+       if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last ||
+           c->ltab_offs < 0 ||
+           c->ltab_offs + c->ltab_sz > c->leb_size) {
+               err = 12;
+               goto out;
+       }
+
+       if (c->big_lpt && (c->lsave_lnum < c->lpt_first ||
+           c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 ||
+           c->lsave_offs + c->lsave_sz > c->leb_size)) {
+               err = 13;
+               goto out;
+       }
+
+       if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) {
+               err = 14;
+               goto out;
+       }
+
+       if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) {
+               err = 15;
+               goto out;
+       }
+
+       if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) {
+               err = 16;
+               goto out;
+       }
+
+       if (c->lst.total_free < 0 || c->lst.total_free > main_sz ||
+           c->lst.total_free & 7) {
+               err = 17;
+               goto out;
+       }
+
+       if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) {
+               err = 18;
+               goto out;
+       }
+
+       if (c->lst.total_used < 0 || (c->lst.total_used & 7)) {
+               err = 19;
+               goto out;
+       }
+
+       if (c->lst.total_free + c->lst.total_dirty +
+           c->lst.total_used > main_sz) {
+               err = 20;
+               goto out;
+       }
+
+       if (c->lst.total_dead + c->lst.total_dark +
+           c->lst.total_used + c->old_idx_sz > main_sz) {
+               err = 21;
+               goto out;
+       }
+
+       if (c->lst.total_dead < 0 ||
+           c->lst.total_dead > c->lst.total_free + c->lst.total_dirty ||
+           c->lst.total_dead & 7) {
+               err = 22;
+               goto out;
+       }
+
+       if (c->lst.total_dark < 0 ||
+           c->lst.total_dark > c->lst.total_free + c->lst.total_dirty ||
+           c->lst.total_dark & 7) {
+               err = 23;
+               goto out;
+       }
+
+       return 0;
+
+out:
+       ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
+       dbg_dump_node(c, c->mst_node);
+       return -EINVAL;
+}
+
+/**
+ * ubifs_read_master - read master node.
+ * @c: UBIFS file-system description object
+ *
+ * This function finds and reads the master node during file-system mount. If
+ * the flash is empty, it creates default master node as well. Returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+int ubifs_read_master(struct ubifs_info *c)
+{
+       int err, old_leb_cnt;
+
+       c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL);
+       if (!c->mst_node)
+               return -ENOMEM;
+
+       err = scan_for_master(c);
+       if (err) {
+               err = ubifs_recover_master_node(c);
+               if (err)
+                       /*
+                        * Note, we do not free 'c->mst_node' here because the
+                        * unmount routine will take care of this.
+                        */
+                       return err;
+       }
+
+       /* Make sure that the recovery flag is clear */
+       c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY);
+
+       c->max_sqnum       = le64_to_cpu(c->mst_node->ch.sqnum);
+       c->highest_inum    = le64_to_cpu(c->mst_node->highest_inum);
+       c->cmt_no          = le64_to_cpu(c->mst_node->cmt_no);
+       c->zroot.lnum      = le32_to_cpu(c->mst_node->root_lnum);
+       c->zroot.offs      = le32_to_cpu(c->mst_node->root_offs);
+       c->zroot.len       = le32_to_cpu(c->mst_node->root_len);
+       c->lhead_lnum      = le32_to_cpu(c->mst_node->log_lnum);
+       c->gc_lnum         = le32_to_cpu(c->mst_node->gc_lnum);
+       c->ihead_lnum      = le32_to_cpu(c->mst_node->ihead_lnum);
+       c->ihead_offs      = le32_to_cpu(c->mst_node->ihead_offs);
+       c->old_idx_sz      = le64_to_cpu(c->mst_node->index_size);
+       c->lpt_lnum        = le32_to_cpu(c->mst_node->lpt_lnum);
+       c->lpt_offs        = le32_to_cpu(c->mst_node->lpt_offs);
+       c->nhead_lnum      = le32_to_cpu(c->mst_node->nhead_lnum);
+       c->nhead_offs      = le32_to_cpu(c->mst_node->nhead_offs);
+       c->ltab_lnum       = le32_to_cpu(c->mst_node->ltab_lnum);
+       c->ltab_offs       = le32_to_cpu(c->mst_node->ltab_offs);
+       c->lsave_lnum      = le32_to_cpu(c->mst_node->lsave_lnum);
+       c->lsave_offs      = le32_to_cpu(c->mst_node->lsave_offs);
+       c->lscan_lnum      = le32_to_cpu(c->mst_node->lscan_lnum);
+       c->lst.empty_lebs  = le32_to_cpu(c->mst_node->empty_lebs);
+       c->lst.idx_lebs    = le32_to_cpu(c->mst_node->idx_lebs);
+       old_leb_cnt        = le32_to_cpu(c->mst_node->leb_cnt);
+       c->lst.total_free  = le64_to_cpu(c->mst_node->total_free);
+       c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty);
+       c->lst.total_used  = le64_to_cpu(c->mst_node->total_used);
+       c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead);
+       c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark);
+
+       c->calc_idx_sz = c->old_idx_sz;
+
+       if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
+               c->no_orphs = 1;
+
+       if (old_leb_cnt != c->leb_cnt) {
+               /* The file system has been resized */
+               int growth = c->leb_cnt - old_leb_cnt;
+
+               if (c->leb_cnt < old_leb_cnt ||
+                   c->leb_cnt < UBIFS_MIN_LEB_CNT) {
+                       ubifs_err("bad leb_cnt on master node");
+                       dbg_dump_node(c, c->mst_node);
+                       return -EINVAL;
+               }
+
+               dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs",
+                       old_leb_cnt, c->leb_cnt);
+               c->lst.empty_lebs += growth;
+               c->lst.total_free += growth * (long long)c->leb_size;
+               c->lst.total_dark += growth * (long long)c->dark_wm;
+
+               /*
+                * Reflect changes back onto the master node. N.B. the master
+                * node gets written immediately whenever mounting (or
+                * remounting) in read-write mode, so we do not need to write it
+                * here.
+                */
+               c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt);
+               c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs);
+               c->mst_node->total_free = cpu_to_le64(c->lst.total_free);
+               c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark);
+       }
+
+       err = validate_master(c);
+       if (err)
+               return err;
+
+       err = dbg_old_index_check_init(c, &c->zroot);
+
+       return err;
+}
+
+/**
+ * ubifs_write_master - write master node.
+ * @c: UBIFS file-system description object
+ *
+ * This function writes the master node. The caller has to take the
+ * @c->mst_mutex lock before calling this function. Returns zero in case of
+ * success and a negative error code in case of failure. The master node is
+ * written twice to enable recovery.
+ */
+int ubifs_write_master(struct ubifs_info *c)
+{
+       int err, lnum, offs, len;
+
+       if (c->ro_media)
+               return -EINVAL;
+
+       lnum = UBIFS_MST_LNUM;
+       offs = c->mst_offs + c->mst_node_alsz;
+       len = UBIFS_MST_NODE_SZ;
+
+       if (offs + UBIFS_MST_NODE_SZ > c->leb_size) {
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+               offs = 0;
+       }
+
+       c->mst_offs = offs;
+       c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
+
+       err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
+       if (err)
+               return err;
+
+       lnum += 1;
+
+       if (offs == 0) {
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+       }
+       err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
+
+       return err;
+}
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h

new file mode 100644 (file)

index 0000000..4beccfc
--- /dev/null
+++ b/fs/ubifs/misc.h
@@ -0,0 +1,342 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file contains miscellaneous helper functions.
+ */
+
+#ifndef __UBIFS_MISC_H__
+#define __UBIFS_MISC_H__
+
+/**
+ * ubifs_zn_dirty - check if znode is dirty.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is dirty and %0 otherwise.
+ */
+static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
+{
+       return !!test_bit(DIRTY_ZNODE, &znode->flags);
+}
+
+/**
+ * ubifs_wake_up_bgt - wake up background thread.
+ * @c: UBIFS file-system description object
+ */
+static inline void ubifs_wake_up_bgt(struct ubifs_info *c)
+{
+       if (c->bgt && !c->need_bgt) {
+               c->need_bgt = 1;
+               wake_up_process(c->bgt);
+       }
+}
+
+/**
+ * ubifs_tnc_find_child - find next child in znode.
+ * @znode: znode to search at
+ * @start: the zbranch index to start at
+ *
+ * This helper function looks for znode child starting at index @start. Returns
+ * the child or %NULL if no children were found.
+ */
+static inline struct ubifs_znode *
+ubifs_tnc_find_child(struct ubifs_znode *znode, int start)
+{
+       while (start < znode->child_cnt) {
+               if (znode->zbranch[start].znode)
+                       return znode->zbranch[start].znode;
+               start += 1;
+       }
+
+       return NULL;
+}
+
+/**
+ * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object.
+ * @inode: the VFS 'struct inode' pointer
+ */
+static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
+{
+       return container_of(inode, struct ubifs_inode, vfs_inode);
+}
+
+/**
+ * ubifs_ro_mode - switch UBIFS to read read-only mode.
+ * @c: UBIFS file-system description object
+ * @err: error code which is the reason of switching to R/O mode
+ */
+static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
+{
+       if (!c->ro_media) {
+               c->ro_media = 1;
+               ubifs_warn("switched to read-only mode, error %d", err);
+               dbg_dump_stack();
+       }
+}
+
+/**
+ * ubifs_compr_present - check if compressor was compiled in.
+ * @compr_type: compressor type to check
+ *
+ * This function returns %1 of compressor of type @compr_type is present, and
+ * %0 if not.
+ */
+static inline int ubifs_compr_present(int compr_type)
+{
+       ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
+       return !!ubifs_compressors[compr_type]->capi_name;
+}
+
+/**
+ * ubifs_compr_name - get compressor name string by its type.
+ * @compr_type: compressor type
+ *
+ * This function returns compressor type string.
+ */
+static inline const char *ubifs_compr_name(int compr_type)
+{
+       ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
+       return ubifs_compressors[compr_type]->name;
+}
+
+/**
+ * ubifs_wbuf_sync - synchronize write-buffer.
+ * @wbuf: write-buffer to synchronize
+ *
+ * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume
+ * that the write-buffer is already locked.
+ */
+static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
+{
+       int err;
+
+       mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+       err = ubifs_wbuf_sync_nolock(wbuf);
+       mutex_unlock(&wbuf->io_mutex);
+       return err;
+}
+
+/**
+ * ubifs_leb_unmap - unmap an LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to unmap
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
+{
+       int err;
+
+       if (c->ro_media)
+               return -EROFS;
+       err = ubi_leb_unmap(c->ubi, lnum);
+       if (err) {
+               ubifs_err("unmap LEB %d failed, error %d", lnum, err);
+               return err;
+       }
+
+       return 0;
+}
+
+/**
+ * ubifs_leb_write - write to a LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to write
+ * @buf: buffer to write from
+ * @offs: offset within LEB to write to
+ * @len: length to write
+ * @dtype: data type
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
+                                 const void *buf, int offs, int len, int dtype)
+{
+       int err;
+
+       if (c->ro_media)
+               return -EROFS;
+       err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
+       if (err) {
+               ubifs_err("writing %d bytes at %d:%d, error %d",
+                         len, lnum, offs, err);
+               return err;
+       }
+
+       return 0;
+}
+
+/**
+ * ubifs_leb_change - atomic LEB change.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to write
+ * @buf: buffer to write from
+ * @len: length to write
+ * @dtype: data type
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
+                                  const void *buf, int len, int dtype)
+{
+       int err;
+
+       if (c->ro_media)
+               return -EROFS;
+       err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
+       if (err) {
+               ubifs_err("changing %d bytes in LEB %d, error %d",
+                         len, lnum, err);
+               return err;
+       }
+
+       return 0;
+}
+
+/**
+ * ubifs_encode_dev - encode device node IDs.
+ * @dev: UBIFS device node information
+ * @rdev: device IDs to encode
+ *
+ * This is a helper function which encodes major/minor numbers of a device node
+ * into UBIFS device node description. We use standard Linux "new" and "huge"
+ * encodings.
+ */
+static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev)
+{
+       if (new_valid_dev(rdev)) {
+               dev->new = cpu_to_le32(new_encode_dev(rdev));
+               return sizeof(dev->new);
+       } else {
+               dev->huge = cpu_to_le64(huge_encode_dev(rdev));
+               return sizeof(dev->huge);
+       }
+}
+
+/**
+ * ubifs_add_dirt - add dirty space to LEB properties.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB to add dirty space for
+ * @dirty: dirty space to add
+ *
+ * This is a helper function which increased amount of dirty LEB space. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty)
+{
+       return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0);
+}
+
+/**
+ * ubifs_return_leb - return LEB to lprops.
+ * @c: the UBIFS file-system description object
+ * @lnum: LEB to return
+ *
+ * This helper function cleans the "taken" flag of a logical eraseblock in the
+ * lprops. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static inline int ubifs_return_leb(struct ubifs_info *c, int lnum)
+{
+       return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+                                  LPROPS_TAKEN, 0);
+}
+
+/**
+ * ubifs_idx_node_sz - return index node size.
+ * @c: the UBIFS file-system description object
+ * @child_cnt: number of children of this index node
+ */
+static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt)
+{
+       return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt;
+}
+
+/**
+ * ubifs_idx_branch - return pointer to an index branch.
+ * @c: the UBIFS file-system description object
+ * @idx: index node
+ * @bnum: branch number
+ */
+static inline
+struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c,
+                                     const struct ubifs_idx_node *idx,
+                                     int bnum)
+{
+       return (struct ubifs_branch *)((void *)idx->branches +
+                                      (UBIFS_BRANCH_SZ + c->key_len) * bnum);
+}
+
+/**
+ * ubifs_idx_key - return pointer to an index key.
+ * @c: the UBIFS file-system description object
+ * @idx: index node
+ */
+static inline void *ubifs_idx_key(const struct ubifs_info *c,
+                                 const struct ubifs_idx_node *idx)
+{
+       return (void *)((struct ubifs_branch *)idx->branches)->key;
+}
+
+/**
+ * ubifs_reported_space - calculate reported free space.
+ * @c: the UBIFS file-system description object
+ * @free: amount of free space
+ *
+ * This function calculates amount of free space which will be reported to
+ * user-space. User-space application tend to expect that if the file-system
+ * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
+ * are able to write a file of size N. UBIFS attaches node headers to each data
+ * node and it has to write indexind nodes as well. This introduces additional
+ * overhead, and UBIFS it has to report sligtly less free space to meet the
+ * above expectetion.
+ *
+ * This function assumes free space is made up of uncompressed data nodes and
+ * full index nodes (one per data node, doubled because we always allow enough
+ * space to write the index twice).
+ *
+ * Note, the calculation is pessimistic, which means that most of the time
+ * UBIFS reports less space than it actually has.
+ */
+static inline long long ubifs_reported_space(const struct ubifs_info *c,
+                                            uint64_t free)
+{
+       int divisor, factor;
+
+       divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1);
+       factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
+       do_div(free, divisor);
+
+       return free * factor;
+}
+
+/**
+ * ubifs_current_time - round current time to time granularity.
+ * @inode: inode
+ */
+static inline struct timespec ubifs_current_time(struct inode *inode)
+{
+       return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+               current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c

new file mode 100644 (file)

index 0000000..3afeb92
--- /dev/null
+++ b/fs/ubifs/orphan.c
@@ -0,0 +1,958 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author: Adrian Hunter
+ */
+
+#include "ubifs.h"
+
+/*
+ * An orphan is an inode number whose inode node has been committed to the index
+ * with a link count of zero. That happens when an open file is deleted
+ * (unlinked) and then a commit is run. In the normal course of events the inode
+ * would be deleted when the file is closed. However in the case of an unclean
+ * unmount, orphans need to be accounted for. After an unclean unmount, the
+ * orphans' inodes must be deleted which means either scanning the entire index
+ * looking for them, or keeping a list on flash somewhere. This unit implements
+ * the latter approach.
+ *
+ * The orphan area is a fixed number of LEBs situated between the LPT area and
+ * the main area. The number of orphan area LEBs is specified when the file
+ * system is created. The minimum number is 1. The size of the orphan area
+ * should be so that it can hold the maximum number of orphans that are expected
+ * to ever exist at one time.
+ *
+ * The number of orphans that can fit in a LEB is:
+ *
+ *         (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)
+ *
+ * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough.
+ *
+ * Orphans are accumulated in a rb-tree. When an inode's link count drops to
+ * zero, the inode number is added to the rb-tree. It is removed from the tree
+ * when the inode is deleted.  Any new orphans that are in the orphan tree when
+ * the commit is run, are written to the orphan area in 1 or more orph nodes.
+ * If the orphan area is full, it is consolidated to make space.  There is
+ * always enough space because validation prevents the user from creating more
+ * than the maximum number of orphans allowed.
+ */
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+static int dbg_check_orphans(struct ubifs_info *c);
+#else
+#define dbg_check_orphans(c) 0
+#endif
+
+/**
+ * ubifs_add_orphan - add an orphan.
+ * @c: UBIFS file-system description object
+ * @inum: orphan inode number
+ *
+ * Add an orphan. This function is called when an inodes link count drops to
+ * zero.
+ */
+int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
+{
+       struct ubifs_orphan *orphan, *o;
+       struct rb_node **p, *parent = NULL;
+
+       orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS);
+       if (!orphan)
+               return -ENOMEM;
+       orphan->inum = inum;
+       orphan->new = 1;
+
+       spin_lock(&c->orphan_lock);
+       if (c->tot_orphans >= c->max_orphans) {
+               spin_unlock(&c->orphan_lock);
+               kfree(orphan);
+               return -ENFILE;
+       }
+       p = &c->orph_tree.rb_node;
+       while (*p) {
+               parent = *p;
+               o = rb_entry(parent, struct ubifs_orphan, rb);
+               if (inum < o->inum)
+                       p = &(*p)->rb_left;
+               else if (inum > o->inum)
+                       p = &(*p)->rb_right;
+               else {
+                       dbg_err("orphaned twice");
+                       spin_unlock(&c->orphan_lock);
+                       kfree(orphan);
+                       return 0;
+               }
+       }
+       c->tot_orphans += 1;
+       c->new_orphans += 1;
+       rb_link_node(&orphan->rb, parent, p);
+       rb_insert_color(&orphan->rb, &c->orph_tree);
+       list_add_tail(&orphan->list, &c->orph_list);
+       list_add_tail(&orphan->new_list, &c->orph_new);
+       spin_unlock(&c->orphan_lock);
+       dbg_gen("ino %lu", inum);
+       return 0;
+}
+
+/**
+ * ubifs_delete_orphan - delete an orphan.
+ * @c: UBIFS file-system description object
+ * @inum: orphan inode number
+ *
+ * Delete an orphan. This function is called when an inode is deleted.
+ */
+void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
+{
+       struct ubifs_orphan *o;
+       struct rb_node *p;
+
+       spin_lock(&c->orphan_lock);
+       p = c->orph_tree.rb_node;
+       while (p) {
+               o = rb_entry(p, struct ubifs_orphan, rb);
+               if (inum < o->inum)
+                       p = p->rb_left;
+               else if (inum > o->inum)
+                       p = p->rb_right;
+               else {
+                       if (o->dnext) {
+                               spin_unlock(&c->orphan_lock);
+                               dbg_gen("deleted twice ino %lu", inum);
+                               return;
+                       }
+                       if (o->cnext) {
+                               o->dnext = c->orph_dnext;
+                               c->orph_dnext = o;
+                               spin_unlock(&c->orphan_lock);
+                               dbg_gen("delete later ino %lu", inum);
+                               return;
+                       }
+                       rb_erase(p, &c->orph_tree);
+                       list_del(&o->list);
+                       c->tot_orphans -= 1;
+                       if (o->new) {
+                               list_del(&o->new_list);
+                               c->new_orphans -= 1;
+                       }
+                       spin_unlock(&c->orphan_lock);
+                       kfree(o);
+                       dbg_gen("inum %lu", inum);
+                       return;
+               }
+       }
+       spin_unlock(&c->orphan_lock);
+       dbg_err("missing orphan ino %lu", inum);
+       dbg_dump_stack();
+}
+
+/**
+ * ubifs_orphan_start_commit - start commit of orphans.
+ * @c: UBIFS file-system description object
+ *
+ * Start commit of orphans.
+ */
+int ubifs_orphan_start_commit(struct ubifs_info *c)
+{
+       struct ubifs_orphan *orphan, **last;
+
+       spin_lock(&c->orphan_lock);
+       last = &c->orph_cnext;
+       list_for_each_entry(orphan, &c->orph_new, new_list) {
+               ubifs_assert(orphan->new);
+               orphan->new = 0;
+               *last = orphan;
+               last = &orphan->cnext;
+       }
+       *last = orphan->cnext;
+       c->cmt_orphans = c->new_orphans;
+       c->new_orphans = 0;
+       dbg_cmt("%d orphans to commit", c->cmt_orphans);
+       INIT_LIST_HEAD(&c->orph_new);
+       if (c->tot_orphans == 0)
+               c->no_orphs = 1;
+       else
+               c->no_orphs = 0;
+       spin_unlock(&c->orphan_lock);
+       return 0;
+}
+
+/**
+ * avail_orphs - calculate available space.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the number of orphans that can be written in the
+ * available space.
+ */
+static int avail_orphs(struct ubifs_info *c)
+{
+       int avail_lebs, avail, gap;
+
+       avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1;
+       avail = avail_lebs *
+              ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
+       gap = c->leb_size - c->ohead_offs;
+       if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64))
+               avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
+       return avail;
+}
+
+/**
+ * tot_avail_orphs - calculate total space.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the number of orphans that can be written in half
+ * the total space. That leaves half the space for adding new orphans.
+ */
+static int tot_avail_orphs(struct ubifs_info *c)
+{
+       int avail_lebs, avail;
+
+       avail_lebs = c->orph_lebs;
+       avail = avail_lebs *
+              ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
+       return avail / 2;
+}
+
+/**
+ * do_write_orph_node - write a node
+ * @c: UBIFS file-system description object
+ * @len: length of node
+ * @atomic: write atomically
+ *
+ * This function writes a node to the orphan head from the orphan buffer. If
+ * %atomic is not zero, then the write is done atomically. On success, %0 is
+ * returned, otherwise a negative error code is returned.
+ */
+static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
+{
+       int err = 0;
+
+       if (atomic) {
+               ubifs_assert(c->ohead_offs == 0);
+               ubifs_prepare_node(c, c->orph_buf, len, 1);
+               len = ALIGN(len, c->min_io_size);
+               err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len,
+                                      UBI_SHORTTERM);
+       } else {
+               if (c->ohead_offs == 0) {
+                       /* Ensure LEB has been unmapped */
+                       err = ubifs_leb_unmap(c, c->ohead_lnum);
+                       if (err)
+                               return err;
+               }
+               err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum,
+                                      c->ohead_offs, UBI_SHORTTERM);
+       }
+       return err;
+}
+
+/**
+ * write_orph_node - write an orph node
+ * @c: UBIFS file-system description object
+ * @atomic: write atomically
+ *
+ * This function builds an orph node from the cnext list and writes it to the
+ * orphan head. On success, %0 is returned, otherwise a negative error code
+ * is returned.
+ */
+static int write_orph_node(struct ubifs_info *c, int atomic)
+{
+       struct ubifs_orphan *orphan, *cnext;
+       struct ubifs_orph_node *orph;
+       int gap, err, len, cnt, i;
+
+       ubifs_assert(c->cmt_orphans > 0);
+       gap = c->leb_size - c->ohead_offs;
+       if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) {
+               c->ohead_lnum += 1;
+               c->ohead_offs = 0;
+               gap = c->leb_size;
+               if (c->ohead_lnum > c->orph_last) {
+                       /*
+                        * We limit the number of orphans so that this should
+                        * never happen.
+                        */
+                       ubifs_err("out of space in orphan area");
+                       return -EINVAL;
+               }
+       }
+       cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
+       if (cnt > c->cmt_orphans)
+               cnt = c->cmt_orphans;
+       len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64);
+       ubifs_assert(c->orph_buf);
+       orph = c->orph_buf;
+       orph->ch.node_type = UBIFS_ORPH_NODE;
+       spin_lock(&c->orphan_lock);
+       cnext = c->orph_cnext;
+       for (i = 0; i < cnt; i++) {
+               orphan = cnext;
+               orph->inos[i] = cpu_to_le64(orphan->inum);
+               cnext = orphan->cnext;
+               orphan->cnext = NULL;
+       }
+       c->orph_cnext = cnext;
+       c->cmt_orphans -= cnt;
+       spin_unlock(&c->orphan_lock);
+       if (c->cmt_orphans)
+               orph->cmt_no = cpu_to_le64(c->cmt_no + 1);
+       else
+               /* Mark the last node of the commit */
+               orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63));
+       ubifs_assert(c->ohead_offs + len <= c->leb_size);
+       ubifs_assert(c->ohead_lnum >= c->orph_first);
+       ubifs_assert(c->ohead_lnum <= c->orph_last);
+       err = do_write_orph_node(c, len, atomic);
+       c->ohead_offs += ALIGN(len, c->min_io_size);
+       c->ohead_offs = ALIGN(c->ohead_offs, 8);
+       return err;
+}
+
+/**
+ * write_orph_nodes - write orph nodes until there are no more to commit
+ * @c: UBIFS file-system description object
+ * @atomic: write atomically
+ *
+ * This function writes orph nodes for all the orphans to commit. On success,
+ * %0 is returned, otherwise a negative error code is returned.
+ */
+static int write_orph_nodes(struct ubifs_info *c, int atomic)
+{
+       int err;
+
+       while (c->cmt_orphans > 0) {
+               err = write_orph_node(c, atomic);
+               if (err)
+                       return err;
+       }
+       if (atomic) {
+               int lnum;
+
+               /* Unmap any unused LEBs after consolidation */
+               lnum = c->ohead_lnum + 1;
+               for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) {
+                       err = ubifs_leb_unmap(c, lnum);
+                       if (err)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+/**
+ * consolidate - consolidate the orphan area.
+ * @c: UBIFS file-system description object
+ *
+ * This function enables consolidation by putting all the orphans into the list
+ * to commit. The list is in the order that the orphans were added, and the
+ * LEBs are written atomically in order, so at no time can orphans be lost by
+ * an unclean unmount.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int consolidate(struct ubifs_info *c)
+{
+       int tot_avail = tot_avail_orphs(c), err = 0;
+
+       spin_lock(&c->orphan_lock);
+       dbg_cmt("there is space for %d orphans and there are %d",
+               tot_avail, c->tot_orphans);
+       if (c->tot_orphans - c->new_orphans <= tot_avail) {
+               struct ubifs_orphan *orphan, **last;
+               int cnt = 0;
+
+               /* Change the cnext list to include all non-new orphans */
+               last = &c->orph_cnext;
+               list_for_each_entry(orphan, &c->orph_list, list) {
+                       if (orphan->new)
+                               continue;
+                       *last = orphan;
+                       last = &orphan->cnext;
+                       cnt += 1;
+               }
+               *last = orphan->cnext;
+               ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
+               c->cmt_orphans = cnt;
+               c->ohead_lnum = c->orph_first;
+               c->ohead_offs = 0;
+       } else {
+               /*
+                * We limit the number of orphans so that this should
+                * never happen.
+                */
+               ubifs_err("out of space in orphan area");
+               err = -EINVAL;
+       }
+       spin_unlock(&c->orphan_lock);
+       return err;
+}
+
+/**
+ * commit_orphans - commit orphans.
+ * @c: UBIFS file-system description object
+ *
+ * This function commits orphans to flash. On success, %0 is returned,
+ * otherwise a negative error code is returned.
+ */
+static int commit_orphans(struct ubifs_info *c)
+{
+       int avail, atomic = 0, err;
+
+       ubifs_assert(c->cmt_orphans > 0);
+       avail = avail_orphs(c);
+       if (avail < c->cmt_orphans) {
+               /* Not enough space to write new orphans, so consolidate */
+               err = consolidate(c);
+               if (err)
+                       return err;
+               atomic = 1;
+       }
+       err = write_orph_nodes(c, atomic);
+       return err;
+}
+
+/**
+ * erase_deleted - erase the orphans marked for deletion.
+ * @c: UBIFS file-system description object
+ *
+ * During commit, the orphans being committed cannot be deleted, so they are
+ * marked for deletion and deleted by this function. Also, the recovery
+ * adds killed orphans to the deletion list, and therefore they are deleted
+ * here too.
+ */
+static void erase_deleted(struct ubifs_info *c)
+{
+       struct ubifs_orphan *orphan, *dnext;
+
+       spin_lock(&c->orphan_lock);
+       dnext = c->orph_dnext;
+       while (dnext) {
+               orphan = dnext;
+               dnext = orphan->dnext;
+               ubifs_assert(!orphan->new);
+               rb_erase(&orphan->rb, &c->orph_tree);
+               list_del(&orphan->list);
+               c->tot_orphans -= 1;
+               dbg_gen("deleting orphan ino %lu", orphan->inum);
+               kfree(orphan);
+       }
+       c->orph_dnext = NULL;
+       spin_unlock(&c->orphan_lock);
+}
+
+/**
+ * ubifs_orphan_end_commit - end commit of orphans.
+ * @c: UBIFS file-system description object
+ *
+ * End commit of orphans.
+ */
+int ubifs_orphan_end_commit(struct ubifs_info *c)
+{
+       int err;
+
+       if (c->cmt_orphans != 0) {
+               err = commit_orphans(c);
+               if (err)
+                       return err;
+       }
+       erase_deleted(c);
+       err = dbg_check_orphans(c);
+       return err;
+}
+
+/**
+ * clear_orphans - erase all LEBs used for orphans.
+ * @c: UBIFS file-system description object
+ *
+ * If recovery is not required, then the orphans from the previous session
+ * are not needed. This function locates the LEBs used to record
+ * orphans, and un-maps them.
+ */
+static int clear_orphans(struct ubifs_info *c)
+{
+       int lnum, err;
+
+       for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+       }
+       c->ohead_lnum = c->orph_first;
+       c->ohead_offs = 0;
+       return 0;
+}
+
+/**
+ * insert_dead_orphan - insert an orphan.
+ * @c: UBIFS file-system description object
+ * @inum: orphan inode number
+ *
+ * This function is a helper to the 'do_kill_orphans()' function. The orphan
+ * must be kept until the next commit, so it is added to the rb-tree and the
+ * deletion list.
+ */
+static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
+{
+       struct ubifs_orphan *orphan, *o;
+       struct rb_node **p, *parent = NULL;
+
+       orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL);
+       if (!orphan)
+               return -ENOMEM;
+       orphan->inum = inum;
+
+       p = &c->orph_tree.rb_node;
+       while (*p) {
+               parent = *p;
+               o = rb_entry(parent, struct ubifs_orphan, rb);
+               if (inum < o->inum)
+                       p = &(*p)->rb_left;
+               else if (inum > o->inum)
+                       p = &(*p)->rb_right;
+               else {
+                       /* Already added - no problem */
+                       kfree(orphan);
+                       return 0;
+               }
+       }
+       c->tot_orphans += 1;
+       rb_link_node(&orphan->rb, parent, p);
+       rb_insert_color(&orphan->rb, &c->orph_tree);
+       list_add_tail(&orphan->list, &c->orph_list);
+       orphan->dnext = c->orph_dnext;
+       c->orph_dnext = orphan;
+       dbg_mnt("ino %lu, new %d, tot %d",
+               inum, c->new_orphans, c->tot_orphans);
+       return 0;
+}
+
+/**
+ * do_kill_orphans - remove orphan inodes from the index.
+ * @c: UBIFS file-system description object
+ * @sleb: scanned LEB
+ * @last_cmt_no: cmt_no of last orph node read is passed and returned here
+ * @outofdate: whether the LEB is out of date is returned here
+ * @last_flagged: whether the end orph node is encountered
+ *
+ * This function is a helper to the 'kill_orphans()' function. It goes through
+ * every orphan node in a LEB and for every inode number recorded, removes
+ * all keys for that inode from the TNC.
+ */
+static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                          unsigned long long *last_cmt_no, int *outofdate,
+                          int *last_flagged)
+{
+       struct ubifs_scan_node *snod;
+       struct ubifs_orph_node *orph;
+       unsigned long long cmt_no;
+       ino_t inum;
+       int i, n, err, first = 1;
+
+       list_for_each_entry(snod, &sleb->nodes, list) {
+               if (snod->type != UBIFS_ORPH_NODE) {
+                       ubifs_err("invalid node type %d in orphan area at "
+                                 "%d:%d", snod->type, sleb->lnum, snod->offs);
+                       dbg_dump_node(c, snod->node);
+                       return -EINVAL;
+               }
+
+               orph = snod->node;
+
+               /* Check commit number */
+               cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX;
+               /*
+                * The commit number on the master node may be less, because
+                * of a failed commit. If there are several failed commits in a
+                * row, the commit number written on orph nodes will continue to
+                * increase (because the commit number is adjusted here) even
+                * though the commit number on the master node stays the same
+                * because the master node has not been re-written.
+                */
+               if (cmt_no > c->cmt_no)
+                       c->cmt_no = cmt_no;
+               if (cmt_no < *last_cmt_no && *last_flagged) {
+                       /*
+                        * The last orph node had a higher commit number and was
+                        * flagged as the last written for that commit number.
+                        * That makes this orph node, out of date.
+                        */
+                       if (!first) {
+                               ubifs_err("out of order commit number %llu in "
+                                         "orphan node at %d:%d",
+                                         cmt_no, sleb->lnum, snod->offs);
+                               dbg_dump_node(c, snod->node);
+                               return -EINVAL;
+                       }
+                       dbg_rcvry("out of date LEB %d", sleb->lnum);
+                       *outofdate = 1;
+                       return 0;
+               }
+
+               if (first)
+                       first = 0;
+
+               n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
+               for (i = 0; i < n; i++) {
+                       inum = le64_to_cpu(orph->inos[i]);
+                       dbg_rcvry("deleting orphaned inode %lu", inum);
+                       err = ubifs_tnc_remove_ino(c, inum);
+                       if (err)
+                               return err;
+                       err = insert_dead_orphan(c, inum);
+                       if (err)
+                               return err;
+               }
+
+               *last_cmt_no = cmt_no;
+               if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) {
+                       dbg_rcvry("last orph node for commit %llu at %d:%d",
+                                 cmt_no, sleb->lnum, snod->offs);
+                       *last_flagged = 1;
+               } else
+                       *last_flagged = 0;
+       }
+
+       return 0;
+}
+
+/**
+ * kill_orphans - remove all orphan inodes from the index.
+ * @c: UBIFS file-system description object
+ *
+ * If recovery is required, then orphan inodes recorded during the previous
+ * session (which ended with an unclean unmount) must be deleted from the index.
+ * This is done by updating the TNC, but since the index is not updated until
+ * the next commit, the LEBs where the orphan information is recorded are not
+ * erased until the next commit.
+ */
+static int kill_orphans(struct ubifs_info *c)
+{
+       unsigned long long last_cmt_no = 0;
+       int lnum, err = 0, outofdate = 0, last_flagged = 0;
+
+       c->ohead_lnum = c->orph_first;
+       c->ohead_offs = 0;
+       /* Check no-orphans flag and skip this if no orphans */
+       if (c->no_orphs) {
+               dbg_rcvry("no orphans");
+               return 0;
+       }
+       /*
+        * Orph nodes always start at c->orph_first and are written to each
+        * successive LEB in turn. Generally unused LEBs will have been unmapped
+        * but may contain out of date orph nodes if the unmap didn't go
+        * through. In addition, the last orph node written for each commit is
+        * marked (top bit of orph->cmt_no is set to 1). It is possible that
+        * there are orph nodes from the next commit (i.e. the commit did not
+        * complete successfully). In that case, no orphans will have been lost
+        * due to the way that orphans are written, and any orphans added will
+        * be valid orphans anyway and so can be deleted.
+        */
+       for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+               struct ubifs_scan_leb *sleb;
+
+               dbg_rcvry("LEB %d", lnum);
+               sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+               if (IS_ERR(sleb)) {
+                       sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
+                       if (IS_ERR(sleb)) {
+                               err = PTR_ERR(sleb);
+                               break;
+                       }
+               }
+               err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate,
+                                     &last_flagged);
+               if (err || outofdate) {
+                       ubifs_scan_destroy(sleb);
+                       break;
+               }
+               if (sleb->endpt) {
+                       c->ohead_lnum = lnum;
+                       c->ohead_offs = sleb->endpt;
+               }
+               ubifs_scan_destroy(sleb);
+       }
+       return err;
+}
+
+/**
+ * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them.
+ * @c: UBIFS file-system description object
+ * @unclean: indicates recovery from unclean unmount
+ * @read_only: indicates read only mount
+ *
+ * This function is called when mounting to erase orphans from the previous
+ * session. If UBIFS was not unmounted cleanly, then the inodes recorded as
+ * orphans are deleted.
+ */
+int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
+{
+       int err = 0;
+
+       c->max_orphans = tot_avail_orphs(c);
+
+       if (!read_only) {
+               c->orph_buf = vmalloc(c->leb_size);
+               if (!c->orph_buf)
+                       return -ENOMEM;
+       }
+
+       if (unclean)
+               err = kill_orphans(c);
+       else if (!read_only)
+               err = clear_orphans(c);
+
+       return err;
+}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+struct check_orphan {
+       struct rb_node rb;
+       ino_t inum;
+};
+
+struct check_info {
+       unsigned long last_ino;
+       unsigned long tot_inos;
+       unsigned long missing;
+       unsigned long long leaf_cnt;
+       struct ubifs_ino_node *node;
+       struct rb_root root;
+};
+
+static int dbg_find_orphan(struct ubifs_info *c, ino_t inum)
+{
+       struct ubifs_orphan *o;
+       struct rb_node *p;
+
+       spin_lock(&c->orphan_lock);
+       p = c->orph_tree.rb_node;
+       while (p) {
+               o = rb_entry(p, struct ubifs_orphan, rb);
+               if (inum < o->inum)
+                       p = p->rb_left;
+               else if (inum > o->inum)
+                       p = p->rb_right;
+               else {
+                       spin_unlock(&c->orphan_lock);
+                       return 1;
+               }
+       }
+       spin_unlock(&c->orphan_lock);
+       return 0;
+}
+
+static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum)
+{
+       struct check_orphan *orphan, *o;
+       struct rb_node **p, *parent = NULL;
+
+       orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS);
+       if (!orphan)
+               return -ENOMEM;
+       orphan->inum = inum;
+
+       p = &root->rb_node;
+       while (*p) {
+               parent = *p;
+               o = rb_entry(parent, struct check_orphan, rb);
+               if (inum < o->inum)
+                       p = &(*p)->rb_left;
+               else if (inum > o->inum)
+                       p = &(*p)->rb_right;
+               else {
+                       kfree(orphan);
+                       return 0;
+               }
+       }
+       rb_link_node(&orphan->rb, parent, p);
+       rb_insert_color(&orphan->rb, root);
+       return 0;
+}
+
+static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
+{
+       struct check_orphan *o;
+       struct rb_node *p;
+
+       p = root->rb_node;
+       while (p) {
+               o = rb_entry(p, struct check_orphan, rb);
+               if (inum < o->inum)
+                       p = p->rb_left;
+               else if (inum > o->inum)
+                       p = p->rb_right;
+               else
+                       return 1;
+       }
+       return 0;
+}
+
+static void dbg_free_check_tree(struct rb_root *root)
+{
+       struct rb_node *this = root->rb_node;
+       struct check_orphan *o;
+
+       while (this) {
+               if (this->rb_left) {
+                       this = this->rb_left;
+                       continue;
+               } else if (this->rb_right) {
+                       this = this->rb_right;
+                       continue;
+               }
+               o = rb_entry(this, struct check_orphan, rb);
+               this = rb_parent(this);
+               if (this) {
+                       if (this->rb_left == &o->rb)
+                               this->rb_left = NULL;
+                       else
+                               this->rb_right = NULL;
+               }
+               kfree(o);
+       }
+}
+
+static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                           void *priv)
+{
+       struct check_info *ci = priv;
+       ino_t inum;
+       int err;
+
+       inum = key_inum(c, &zbr->key);
+       if (inum != ci->last_ino) {
+               /* Lowest node type is the inode node, so it comes first */
+               if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
+                       ubifs_err("found orphan node ino %lu, type %d", inum,
+                                 key_type(c, &zbr->key));
+               ci->last_ino = inum;
+               ci->tot_inos += 1;
+               err = ubifs_tnc_read_node(c, zbr, ci->node);
+               if (err) {
+                       ubifs_err("node read failed, error %d", err);
+                       return err;
+               }
+               if (ci->node->nlink == 0)
+                       /* Must be recorded as an orphan */
+                       if (!dbg_find_check_orphan(&ci->root, inum) &&
+                           !dbg_find_orphan(c, inum)) {
+                               ubifs_err("missing orphan, ino %lu", inum);
+                               ci->missing += 1;
+                       }
+       }
+       ci->leaf_cnt += 1;
+       return 0;
+}
+
+static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
+{
+       struct ubifs_scan_node *snod;
+       struct ubifs_orph_node *orph;
+       ino_t inum;
+       int i, n, err;
+
+       list_for_each_entry(snod, &sleb->nodes, list) {
+               cond_resched();
+               if (snod->type != UBIFS_ORPH_NODE)
+                       continue;
+               orph = snod->node;
+               n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
+               for (i = 0; i < n; i++) {
+                       inum = le64_to_cpu(orph->inos[i]);
+                       err = dbg_ins_check_orphan(&ci->root, inum);
+                       if (err)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
+{
+       int lnum, err = 0;
+
+       /* Check no-orphans flag and skip this if no orphans */
+       if (c->no_orphs)
+               return 0;
+
+       for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+               struct ubifs_scan_leb *sleb;
+
+               sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+               if (IS_ERR(sleb)) {
+                       err = PTR_ERR(sleb);
+                       break;
+               }
+
+               err = dbg_read_orphans(ci, sleb);
+               ubifs_scan_destroy(sleb);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+static int dbg_check_orphans(struct ubifs_info *c)
+{
+       struct check_info ci;
+       int err;
+
+       if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
+               return 0;
+
+       ci.last_ino = 0;
+       ci.tot_inos = 0;
+       ci.missing  = 0;
+       ci.leaf_cnt = 0;
+       ci.root = RB_ROOT;
+       ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
+       if (!ci.node) {
+               ubifs_err("out of memory");
+               return -ENOMEM;
+       }
+
+       err = dbg_scan_orphans(c, &ci);
+       if (err)
+               goto out;
+
+       err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
+       if (err) {
+               ubifs_err("cannot scan TNC, error %d", err);
+               goto out;
+       }
+
+       if (ci.missing) {
+               ubifs_err("%lu missing orphan(s)", ci.missing);
+               err = -EINVAL;
+               goto out;
+       }
+
+       dbg_cmt("last inode number is %lu", ci.last_ino);
+       dbg_cmt("total number of inodes is %lu", ci.tot_inos);
+       dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt);
+
+out:
+       dbg_free_check_tree(&ci.root);
+       kfree(ci.node);
+       return err;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c

new file mode 100644 (file)

index 0000000..77d26c1
--- /dev/null
+++ b/fs/ubifs/recovery.c
@@ -0,0 +1,1519 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements functions needed to recover from unclean un-mounts.
+ * When UBIFS is mounted, it checks a flag on the master node to determine if
+ * an un-mount was completed sucessfully. If not, the process of mounting
+ * incorparates additional checking and fixing of on-flash data structures.
+ * UBIFS always cleans away all remnants of an unclean un-mount, so that
+ * errors do not accumulate. However UBIFS defers recovery if it is mounted
+ * read-only, and the flash is not modified in that case.
+ */
+
+#include <linux/crc32.h>
+#include "ubifs.h"
+
+/**
+ * is_empty - determine whether a buffer is empty (contains all 0xff).
+ * @buf: buffer to clean
+ * @len: length of buffer
+ *
+ * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
+ * %0 is returned.
+ */
+static int is_empty(void *buf, int len)
+{
+       uint8_t *p = buf;
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (*p++ != 0xff)
+                       return 0;
+       return 1;
+}
+
+/**
+ * get_master_node - get the last valid master node allowing for corruption.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @pbuf: buffer containing the LEB read, is returned here
+ * @mst: master node, if found, is returned here
+ * @cor: corruption, if found, is returned here
+ *
+ * This function allocates a buffer, reads the LEB into it, and finds and
+ * returns the last valid master node allowing for one area of corruption.
+ * The corrupt area, if there is one, must be consistent with the assumption
+ * that it is the result of an unclean unmount while the master node was being
+ * written. Under those circumstances, it is valid to use the previously written
+ * master node.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
+                          struct ubifs_mst_node **mst, void **cor)
+{
+       const int sz = c->mst_node_alsz;
+       int err, offs, len;
+       void *sbuf, *buf;
+
+       sbuf = vmalloc(c->leb_size);
+       if (!sbuf)
+               return -ENOMEM;
+
+       err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
+       if (err && err != -EBADMSG)
+               goto out_free;
+
+       /* Find the first position that is definitely not a node */
+       offs = 0;
+       buf = sbuf;
+       len = c->leb_size;
+       while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
+               struct ubifs_ch *ch = buf;
+
+               if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
+                       break;
+               offs += sz;
+               buf  += sz;
+               len  -= sz;
+       }
+       /* See if there was a valid master node before that */
+       if (offs) {
+               int ret;
+
+               offs -= sz;
+               buf  -= sz;
+               len  += sz;
+               ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
+               if (ret != SCANNED_A_NODE && offs) {
+                       /* Could have been corruption so check one place back */
+                       offs -= sz;
+                       buf  -= sz;
+                       len  += sz;
+                       ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
+                       if (ret != SCANNED_A_NODE)
+                               /*
+                                * We accept only one area of corruption because
+                                * we are assuming that it was caused while
+                                * trying to write a master node.
+                                */
+                               goto out_err;
+               }
+               if (ret == SCANNED_A_NODE) {
+                       struct ubifs_ch *ch = buf;
+
+                       if (ch->node_type != UBIFS_MST_NODE)
+                               goto out_err;
+                       dbg_rcvry("found a master node at %d:%d", lnum, offs);
+                       *mst = buf;
+                       offs += sz;
+                       buf  += sz;
+                       len  -= sz;
+               }
+       }
+       /* Check for corruption */
+       if (offs < c->leb_size) {
+               if (!is_empty(buf, min_t(int, len, sz))) {
+                       *cor = buf;
+                       dbg_rcvry("found corruption at %d:%d", lnum, offs);
+               }
+               offs += sz;
+               buf  += sz;
+               len  -= sz;
+       }
+       /* Check remaining empty space */
+       if (offs < c->leb_size)
+               if (!is_empty(buf, len))
+                       goto out_err;
+       *pbuf = sbuf;
+       return 0;
+
+out_err:
+       err = -EINVAL;
+out_free:
+       vfree(sbuf);
+       *mst = NULL;
+       *cor = NULL;
+       return err;
+}
+
+/**
+ * write_rcvrd_mst_node - write recovered master node.
+ * @c: UBIFS file-system description object
+ * @mst: master node
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int write_rcvrd_mst_node(struct ubifs_info *c,
+                               struct ubifs_mst_node *mst)
+{
+       int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
+       uint32_t save_flags;
+
+       dbg_rcvry("recovery");
+
+       save_flags = mst->flags;
+       mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY);
+
+       ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
+       err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
+       if (err)
+               goto out;
+       err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
+       if (err)
+               goto out;
+out:
+       mst->flags = save_flags;
+       return err;
+}
+
+/**
+ * ubifs_recover_master_node - recover the master node.
+ * @c: UBIFS file-system description object
+ *
+ * This function recovers the master node from corruption that may occur due to
+ * an unclean unmount.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_recover_master_node(struct ubifs_info *c)
+{
+       void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
+       struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
+       const int sz = c->mst_node_alsz;
+       int err, offs1, offs2;
+
+       dbg_rcvry("recovery");
+
+       err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
+       if (err)
+               goto out_free;
+
+       err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
+       if (err)
+               goto out_free;
+
+       if (mst1) {
+               offs1 = (void *)mst1 - buf1;
+               if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
+                   (offs1 == 0 && !cor1)) {
+                       /*
+                        * mst1 was written by recovery at offset 0 with no
+                        * corruption.
+                        */
+                       dbg_rcvry("recovery recovery");
+                       mst = mst1;
+               } else if (mst2) {
+                       offs2 = (void *)mst2 - buf2;
+                       if (offs1 == offs2) {
+                               /* Same offset, so must be the same */
+                               if (memcmp((void *)mst1 + UBIFS_CH_SZ,
+                                          (void *)mst2 + UBIFS_CH_SZ,
+                                          UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
+                                       goto out_err;
+                               mst = mst1;
+                       } else if (offs2 + sz == offs1) {
+                               /* 1st LEB was written, 2nd was not */
+                               if (cor1)
+                                       goto out_err;
+                               mst = mst1;
+                       } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
+                               /* 1st LEB was unmapped and written, 2nd not */
+                               if (cor1)
+                                       goto out_err;
+                               mst = mst1;
+                       } else
+                               goto out_err;
+               } else {
+                       /*
+                        * 2nd LEB was unmapped and about to be written, so
+                        * there must be only one master node in the first LEB
+                        * and no corruption.
+                        */
+                       if (offs1 != 0 || cor1)
+                               goto out_err;
+                       mst = mst1;
+               }
+       } else {
+               if (!mst2)
+                       goto out_err;
+               /*
+                * 1st LEB was unmapped and about to be written, so there must
+                * be no room left in 2nd LEB.
+                */
+               offs2 = (void *)mst2 - buf2;
+               if (offs2 + sz + sz <= c->leb_size)
+                       goto out_err;
+               mst = mst2;
+       }
+
+       dbg_rcvry("recovered master node from LEB %d",
+                 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
+
+       memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
+
+       if ((c->vfs_sb->s_flags & MS_RDONLY)) {
+               /* Read-only mode. Keep a copy for switching to rw mode */
+               c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
+               if (!c->rcvrd_mst_node) {
+                       err = -ENOMEM;
+                       goto out_free;
+               }
+               memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
+       } else {
+               /* Write the recovered master node */
+               c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
+               err = write_rcvrd_mst_node(c, c->mst_node);
+               if (err)
+                       goto out_free;
+       }
+
+       vfree(buf2);
+       vfree(buf1);
+
+       return 0;
+
+out_err:
+       err = -EINVAL;
+out_free:
+       ubifs_err("failed to recover master node");
+       if (mst1) {
+               dbg_err("dumping first master node");
+               dbg_dump_node(c, mst1);
+       }
+       if (mst2) {
+               dbg_err("dumping second master node");
+               dbg_dump_node(c, mst2);
+       }
+       vfree(buf2);
+       vfree(buf1);
+       return err;
+}
+
+/**
+ * ubifs_write_rcvrd_mst_node - write the recovered master node.
+ * @c: UBIFS file-system description object
+ *
+ * This function writes the master node that was recovered during mounting in
+ * read-only mode and must now be written because we are remounting rw.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
+{
+       int err;
+
+       if (!c->rcvrd_mst_node)
+               return 0;
+       c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
+       c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
+       err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
+       if (err)
+               return err;
+       kfree(c->rcvrd_mst_node);
+       c->rcvrd_mst_node = NULL;
+       return 0;
+}
+
+/**
+ * is_last_write - determine if an offset was in the last write to a LEB.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to check
+ * @offs: offset to check
+ *
+ * This function returns %1 if @offs was in the last write to the LEB whose data
+ * is in @buf, otherwise %0 is returned.  The determination is made by checking
+ * for subsequent empty space starting from the next min_io_size boundary (or a
+ * bit less than the common header size if min_io_size is one).
+ */
+static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
+{
+       int empty_offs;
+       int check_len;
+       uint8_t *p;
+
+       if (c->min_io_size == 1) {
+               check_len = c->leb_size - offs;
+               p = buf + check_len;
+               for (; check_len > 0; check_len--)
+                       if (*--p != 0xff)
+                               break;
+               /*
+                * 'check_len' is the size of the corruption which cannot be
+                * more than the size of 1 node if it was caused by an unclean
+                * unmount.
+                */
+               if (check_len > UBIFS_MAX_NODE_SZ)
+                       return 0;
+               return 1;
+       }
+
+       /*
+        * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
+        * last wbuf written. After that should be empty space.
+        */
+       empty_offs = ALIGN(offs + 1, c->min_io_size);
+       check_len = c->leb_size - empty_offs;
+       p = buf + empty_offs - offs;
+
+       for (; check_len > 0; check_len--)
+               if (*p++ != 0xff)
+                       return 0;
+       return 1;
+}
+
+/**
+ * clean_buf - clean the data from an LEB sitting in a buffer.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to clean
+ * @lnum: LEB number to clean
+ * @offs: offset from which to clean
+ * @len: length of buffer
+ *
+ * This function pads up to the next min_io_size boundary (if there is one) and
+ * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
+ * min_io_size boundary (if there is one).
+ */
+static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
+                     int *offs, int *len)
+{
+       int empty_offs, pad_len;
+
+       lnum = lnum;
+       dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
+
+       if (c->min_io_size == 1) {
+               memset(*buf, 0xff, c->leb_size - *offs);
+               return;
+       }
+
+       ubifs_assert(!(*offs & 7));
+       empty_offs = ALIGN(*offs, c->min_io_size);
+       pad_len = empty_offs - *offs;
+       ubifs_pad(c, *buf, pad_len);
+       *offs += pad_len;
+       *buf += pad_len;
+       *len -= pad_len;
+       memset(*buf, 0xff, c->leb_size - empty_offs);
+}
+
+/**
+ * no_more_nodes - determine if there are no more nodes in a buffer.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to check
+ * @len: length of buffer
+ * @lnum: LEB number of the LEB from which @buf was read
+ * @offs: offset from which @buf was read
+ *
+ * This function scans @buf for more nodes and returns %0 is a node is found and
+ * %1 if no more nodes are found.
+ */
+static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
+                       int lnum, int offs)
+{
+       int skip, next_offs = 0;
+
+       if (len > UBIFS_DATA_NODE_SZ) {
+               struct ubifs_ch *ch = buf;
+               int dlen = le32_to_cpu(ch->len);
+
+               if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
+                   dlen <= UBIFS_MAX_DATA_NODE_SZ)
+                       /* The corrupt node looks like a data node */
+                       next_offs = ALIGN(offs + dlen, 8);
+       }
+
+       if (c->min_io_size == 1)
+               skip = 8;
+       else
+               skip = ALIGN(offs + 1, c->min_io_size) - offs;
+
+       offs += skip;
+       buf += skip;
+       len -= skip;
+       while (len > 8) {
+               struct ubifs_ch *ch = buf;
+               uint32_t magic = le32_to_cpu(ch->magic);
+               int ret;
+
+               if (magic == UBIFS_NODE_MAGIC) {
+                       ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
+                       if (ret == SCANNED_A_NODE || ret > 0) {
+                               /*
+                                * There is a small chance this is just data in
+                                * a data node, so check that possibility. e.g.
+                                * this is part of a file that itself contains
+                                * a UBIFS image.
+                                */
+                               if (next_offs && offs + le32_to_cpu(ch->len) <=
+                                   next_offs)
+                                       continue;
+                               dbg_rcvry("unexpected node at %d:%d", lnum,
+                                         offs);
+                               return 0;
+                       }
+               }
+               offs += 8;
+               buf += 8;
+               len -= 8;
+       }
+       return 1;
+}
+
+/**
+ * fix_unclean_leb - fix an unclean LEB.
+ * @c: UBIFS file-system description object
+ * @sleb: scanned LEB information
+ * @start: offset where scan started
+ */
+static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                          int start)
+{
+       int lnum = sleb->lnum, endpt = start;
+
+       /* Get the end offset of the last node we are keeping */
+       if (!list_empty(&sleb->nodes)) {
+               struct ubifs_scan_node *snod;
+
+               snod = list_entry(sleb->nodes.prev,
+                                 struct ubifs_scan_node, list);
+               endpt = snod->offs + snod->len;
+       }
+
+       if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
+               /* Add to recovery list */
+               struct ubifs_unclean_leb *ucleb;
+
+               dbg_rcvry("need to fix LEB %d start %d endpt %d",
+                         lnum, start, sleb->endpt);
+               ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
+               if (!ucleb)
+                       return -ENOMEM;
+               ucleb->lnum = lnum;
+               ucleb->endpt = endpt;
+               list_add_tail(&ucleb->list, &c->unclean_leb_list);
+       } else {
+               /* Write the fixed LEB back to flash */
+               int err;
+
+               dbg_rcvry("fixing LEB %d start %d endpt %d",
+                         lnum, start, sleb->endpt);
+               if (endpt == 0) {
+                       err = ubifs_leb_unmap(c, lnum);
+                       if (err)
+                               return err;
+               } else {
+                       int len = ALIGN(endpt, c->min_io_size);
+
+                       if (start) {
+                               err = ubi_read(c->ubi, lnum, sleb->buf, 0,
+                                              start);
+                               if (err)
+                                       return err;
+                       }
+                       /* Pad to min_io_size */
+                       if (len > endpt) {
+                               int pad_len = len - ALIGN(endpt, 8);
+
+                               if (pad_len > 0) {
+                                       void *buf = sleb->buf + len - pad_len;
+
+                                       ubifs_pad(c, buf, pad_len);
+                               }
+                       }
+                       err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
+                                            UBI_UNKNOWN);
+                       if (err)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+/**
+ * drop_incomplete_group - drop nodes from an incomplete group.
+ * @sleb: scanned LEB information
+ * @offs: offset of dropped nodes is returned here
+ *
+ * This function returns %1 if nodes are dropped and %0 otherwise.
+ */
+static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
+{
+       int dropped = 0;
+
+       while (!list_empty(&sleb->nodes)) {
+               struct ubifs_scan_node *snod;
+               struct ubifs_ch *ch;
+
+               snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
+                                 list);
+               ch = snod->node;
+               if (ch->group_type != UBIFS_IN_NODE_GROUP)
+                       return dropped;
+               dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
+               *offs = snod->offs;
+               list_del(&snod->list);
+               kfree(snod);
+               sleb->nodes_cnt -= 1;
+               dropped = 1;
+       }
+       return dropped;
+}
+
+/**
+ * ubifs_recover_leb - scan and recover a LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @offs: offset
+ * @sbuf: LEB-sized buffer to use
+ * @grouped: nodes may be grouped for recovery
+ *
+ * This function does a scan of a LEB, but caters for errors that might have
+ * been caused by the unclean unmount from which we are attempting to recover.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
+                                        int offs, void *sbuf, int grouped)
+{
+       int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
+       int empty_chkd = 0, start = offs;
+       struct ubifs_scan_leb *sleb;
+       void *buf = sbuf + offs;
+
+       dbg_rcvry("%d:%d", lnum, offs);
+
+       sleb = ubifs_start_scan(c, lnum, offs, sbuf);
+       if (IS_ERR(sleb))
+               return sleb;
+
+       if (sleb->ecc)
+               need_clean = 1;
+
+       while (len >= 8) {
+               int ret;
+
+               dbg_scan("look at LEB %d:%d (%d bytes left)",
+                        lnum, offs, len);
+
+               cond_resched();
+
+               /*
+                * Scan quietly until there is an error from which we cannot
+                * recover
+                */
+               ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
+
+               if (ret == SCANNED_A_NODE) {
+                       /* A valid node, and not a padding node */
+                       struct ubifs_ch *ch = buf;
+                       int node_len;
+
+                       err = ubifs_add_snod(c, sleb, buf, offs);
+                       if (err)
+                               goto error;
+                       node_len = ALIGN(le32_to_cpu(ch->len), 8);
+                       offs += node_len;
+                       buf += node_len;
+                       len -= node_len;
+                       continue;
+               }
+
+               if (ret > 0) {
+                       /* Padding bytes or a valid padding node */
+                       offs += ret;
+                       buf += ret;
+                       len -= ret;
+                       continue;
+               }
+
+               if (ret == SCANNED_EMPTY_SPACE) {
+                       if (!is_empty(buf, len)) {
+                               if (!is_last_write(c, buf, offs))
+                                       break;
+                               clean_buf(c, &buf, lnum, &offs, &len);
+                               need_clean = 1;
+                       }
+                       empty_chkd = 1;
+                       break;
+               }
+
+               if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
+                       if (is_last_write(c, buf, offs)) {
+                               clean_buf(c, &buf, lnum, &offs, &len);
+                               need_clean = 1;
+                               empty_chkd = 1;
+                               break;
+                       }
+
+               if (ret == SCANNED_A_CORRUPT_NODE)
+                       if (no_more_nodes(c, buf, len, lnum, offs)) {
+                               clean_buf(c, &buf, lnum, &offs, &len);
+                               need_clean = 1;
+                               empty_chkd = 1;
+                               break;
+                       }
+
+               if (quiet) {
+                       /* Redo the last scan but noisily */
+                       quiet = 0;
+                       continue;
+               }
+
+               switch (ret) {
+               case SCANNED_GARBAGE:
+                       dbg_err("garbage");
+                       goto corrupted;
+               case SCANNED_A_CORRUPT_NODE:
+               case SCANNED_A_BAD_PAD_NODE:
+                       dbg_err("bad node");
+                       goto corrupted;
+               default:
+                       dbg_err("unknown");
+                       goto corrupted;
+               }
+       }
+
+       if (!empty_chkd && !is_empty(buf, len)) {
+               if (is_last_write(c, buf, offs)) {
+                       clean_buf(c, &buf, lnum, &offs, &len);
+                       need_clean = 1;
+               } else {
+                       ubifs_err("corrupt empty space at LEB %d:%d",
+                                 lnum, offs);
+                       goto corrupted;
+               }
+       }
+
+       /* Drop nodes from incomplete group */
+       if (grouped && drop_incomplete_group(sleb, &offs)) {
+               buf = sbuf + offs;
+               len = c->leb_size - offs;
+               clean_buf(c, &buf, lnum, &offs, &len);
+               need_clean = 1;
+       }
+
+       if (offs % c->min_io_size) {
+               clean_buf(c, &buf, lnum, &offs, &len);
+               need_clean = 1;
+       }
+
+       ubifs_end_scan(c, sleb, lnum, offs);
+
+       if (need_clean) {
+               err = fix_unclean_leb(c, sleb, start);
+               if (err)
+                       goto error;
+       }
+
+       return sleb;
+
+corrupted:
+       ubifs_scanned_corruption(c, lnum, offs, buf);
+       err = -EUCLEAN;
+error:
+       ubifs_err("LEB %d scanning failed", lnum);
+       ubifs_scan_destroy(sleb);
+       return ERR_PTR(err);
+}
+
+/**
+ * get_cs_sqnum - get commit start sequence number.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of commit start node
+ * @offs: offset of commit start node
+ * @cs_sqnum: commit start sequence number is returned here
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
+                       unsigned long long *cs_sqnum)
+{
+       struct ubifs_cs_node *cs_node = NULL;
+       int err, ret;
+
+       dbg_rcvry("at %d:%d", lnum, offs);
+       cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
+       if (!cs_node)
+               return -ENOMEM;
+       if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
+               goto out_err;
+       err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
+       if (err && err != -EBADMSG)
+               goto out_free;
+       ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
+       if (ret != SCANNED_A_NODE) {
+               dbg_err("Not a valid node");
+               goto out_err;
+       }
+       if (cs_node->ch.node_type != UBIFS_CS_NODE) {
+               dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
+               goto out_err;
+       }
+       if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
+               dbg_err("CS node cmt_no %llu != current cmt_no %llu",
+                       (unsigned long long)le64_to_cpu(cs_node->cmt_no),
+                       c->cmt_no);
+               goto out_err;
+       }
+       *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
+       dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
+       kfree(cs_node);
+       return 0;
+
+out_err:
+       err = -EINVAL;
+out_free:
+       ubifs_err("failed to get CS sqnum");
+       kfree(cs_node);
+       return err;
+}
+
+/**
+ * ubifs_recover_log_leb - scan and recover a log LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @offs: offset
+ * @sbuf: LEB-sized buffer to use
+ *
+ * This function does a scan of a LEB, but caters for errors that might have
+ * been caused by the unclean unmount from which we are attempting to recover.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
+                                            int offs, void *sbuf)
+{
+       struct ubifs_scan_leb *sleb;
+       int next_lnum;
+
+       dbg_rcvry("LEB %d", lnum);
+       next_lnum = lnum + 1;
+       if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
+               next_lnum = UBIFS_LOG_LNUM;
+       if (next_lnum != c->ltail_lnum) {
+               /*
+                * We can only recover at the end of the log, so check that the
+                * next log LEB is empty or out of date.
+                */
+               sleb = ubifs_scan(c, next_lnum, 0, sbuf);
+               if (IS_ERR(sleb))
+                       return sleb;
+               if (sleb->nodes_cnt) {
+                       struct ubifs_scan_node *snod;
+                       unsigned long long cs_sqnum = c->cs_sqnum;
+
+                       snod = list_entry(sleb->nodes.next,
+                                         struct ubifs_scan_node, list);
+                       if (cs_sqnum == 0) {
+                               int err;
+
+                               err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
+                               if (err) {
+                                       ubifs_scan_destroy(sleb);
+                                       return ERR_PTR(err);
+                               }
+                       }
+                       if (snod->sqnum > cs_sqnum) {
+                               ubifs_err("unrecoverable log corruption "
+                                         "in LEB %d", lnum);
+                               ubifs_scan_destroy(sleb);
+                               return ERR_PTR(-EUCLEAN);
+                       }
+               }
+               ubifs_scan_destroy(sleb);
+       }
+       return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
+}
+
+/**
+ * recover_head - recover a head.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of head to recover
+ * @offs: offset of head to recover
+ * @sbuf: LEB-sized buffer to use
+ *
+ * This function ensures that there is no data on the flash at a head location.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int recover_head(const struct ubifs_info *c, int lnum, int offs,
+                       void *sbuf)
+{
+       int len, err, need_clean = 0;
+
+       if (c->min_io_size > 1)
+               len = c->min_io_size;
+       else
+               len = 512;
+       if (offs + len > c->leb_size)
+               len = c->leb_size - offs;
+
+       if (!len)
+               return 0;
+
+       /* Read at the head location and check it is empty flash */
+       err = ubi_read(c->ubi, lnum, sbuf, offs, len);
+       if (err)
+               need_clean = 1;
+       else {
+               uint8_t *p = sbuf;
+
+               while (len--)
+                       if (*p++ != 0xff) {
+                               need_clean = 1;
+                               break;
+                       }
+       }
+
+       if (need_clean) {
+               dbg_rcvry("cleaning head at %d:%d", lnum, offs);
+               if (offs == 0)
+                       return ubifs_leb_unmap(c, lnum);
+               err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
+               if (err)
+                       return err;
+               return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
+       }
+
+       return 0;
+}
+
+/**
+ * ubifs_recover_inl_heads - recover index and LPT heads.
+ * @c: UBIFS file-system description object
+ * @sbuf: LEB-sized buffer to use
+ *
+ * This function ensures that there is no data on the flash at the index and
+ * LPT head locations.
+ *
+ * This deals with the recovery of a half-completed journal commit. UBIFS is
+ * careful never to overwrite the last version of the index or the LPT. Because
+ * the index and LPT are wandering trees, data from a half-completed commit will
+ * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
+ * assumed to be empty and will be unmapped anyway before use, or in the index
+ * and LPT heads.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
+{
+       int err;
+
+       ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
+
+       dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
+       err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
+       if (err)
+               return err;
+
+       dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
+       err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+/**
+ *  clean_an_unclean_leb - read and write a LEB to remove corruption.
+ * @c: UBIFS file-system description object
+ * @ucleb: unclean LEB information
+ * @sbuf: LEB-sized buffer to use
+ *
+ * This function reads a LEB up to a point pre-determined by the mount recovery,
+ * checks the nodes, and writes the result back to the flash, thereby cleaning
+ * off any following corruption, or non-fatal ECC errors.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int clean_an_unclean_leb(const struct ubifs_info *c,
+                               struct ubifs_unclean_leb *ucleb, void *sbuf)
+{
+       int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
+       void *buf = sbuf;
+
+       dbg_rcvry("LEB %d len %d", lnum, len);
+
+       if (len == 0) {
+               /* Nothing to read, just unmap it */
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+               return 0;
+       }
+
+       err = ubi_read(c->ubi, lnum, buf, offs, len);
+       if (err && err != -EBADMSG)
+               return err;
+
+       while (len >= 8) {
+               int ret;
+
+               cond_resched();
+
+               /* Scan quietly until there is an error */
+               ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
+
+               if (ret == SCANNED_A_NODE) {
+                       /* A valid node, and not a padding node */
+                       struct ubifs_ch *ch = buf;
+                       int node_len;
+
+                       node_len = ALIGN(le32_to_cpu(ch->len), 8);
+                       offs += node_len;
+                       buf += node_len;
+                       len -= node_len;
+                       continue;
+               }
+
+               if (ret > 0) {
+                       /* Padding bytes or a valid padding node */
+                       offs += ret;
+                       buf += ret;
+                       len -= ret;
+                       continue;
+               }
+
+               if (ret == SCANNED_EMPTY_SPACE) {
+                       ubifs_err("unexpected empty space at %d:%d",
+                                 lnum, offs);
+                       return -EUCLEAN;
+               }
+
+               if (quiet) {
+                       /* Redo the last scan but noisily */
+                       quiet = 0;
+                       continue;
+               }
+
+               ubifs_scanned_corruption(c, lnum, offs, buf);
+               return -EUCLEAN;
+       }
+
+       /* Pad to min_io_size */
+       len = ALIGN(ucleb->endpt, c->min_io_size);
+       if (len > ucleb->endpt) {
+               int pad_len = len - ALIGN(ucleb->endpt, 8);
+
+               if (pad_len > 0) {
+                       buf = c->sbuf + len - pad_len;
+                       ubifs_pad(c, buf, pad_len);
+               }
+       }
+
+       /* Write back the LEB atomically */
+       err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
+       if (err)
+               return err;
+
+       dbg_rcvry("cleaned LEB %d", lnum);
+
+       return 0;
+}
+
+/**
+ * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
+ * @c: UBIFS file-system description object
+ * @sbuf: LEB-sized buffer to use
+ *
+ * This function cleans a LEB identified during recovery that needs to be
+ * written but was not because UBIFS was mounted read-only. This happens when
+ * remounting to read-write mode.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
+{
+       dbg_rcvry("recovery");
+       while (!list_empty(&c->unclean_leb_list)) {
+               struct ubifs_unclean_leb *ucleb;
+               int err;
+
+               ucleb = list_entry(c->unclean_leb_list.next,
+                                  struct ubifs_unclean_leb, list);
+               err = clean_an_unclean_leb(c, ucleb, sbuf);
+               if (err)
+                       return err;
+               list_del(&ucleb->list);
+               kfree(ucleb);
+       }
+       return 0;
+}
+
+/**
+ * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
+ * @c: UBIFS file-system description object
+ *
+ * Out-of-place garbage collection requires always one empty LEB with which to
+ * start garbage collection. The LEB number is recorded in c->gc_lnum and is
+ * written to the master node on unmounting. In the case of an unclean unmount
+ * the value of gc_lnum recorded in the master node is out of date and cannot
+ * be used. Instead, recovery must allocate an empty LEB for this purpose.
+ * However, there may not be enough empty space, in which case it must be
+ * possible to GC the dirtiest LEB into the GC head LEB.
+ *
+ * This function also runs the commit which causes the TNC updates from
+ * size-recovery and orphans to be written to the flash. That is important to
+ * ensure correct replay order for subsequent mounts.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_rcvry_gc_commit(struct ubifs_info *c)
+{
+       struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
+       struct ubifs_lprops lp;
+       int lnum, err;
+
+       c->gc_lnum = -1;
+       if (wbuf->lnum == -1) {
+               dbg_rcvry("no GC head LEB");
+               goto find_free;
+       }
+       /*
+        * See whether the used space in the dirtiest LEB fits in the GC head
+        * LEB.
+        */
+       if (wbuf->offs == c->leb_size) {
+               dbg_rcvry("no room in GC head LEB");
+               goto find_free;
+       }
+       err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
+       if (err) {
+               if (err == -ENOSPC)
+                       dbg_err("could not find a dirty LEB");
+               return err;
+       }
+       ubifs_assert(!(lp.flags & LPROPS_INDEX));
+       lnum = lp.lnum;
+       if (lp.free + lp.dirty == c->leb_size) {
+               /* An empty LEB was returned */
+               if (lp.free != c->leb_size) {
+                       err = ubifs_change_one_lp(c, lnum, c->leb_size,
+                                                 0, 0, 0, 0);
+                       if (err)
+                               return err;
+               }
+               err = ubifs_leb_unmap(c, lnum);
+               if (err)
+                       return err;
+               c->gc_lnum = lnum;
+               dbg_rcvry("allocated LEB %d for GC", lnum);
+               /* Run the commit */
+               dbg_rcvry("committing");
+               return ubifs_run_commit(c);
+       }
+       /*
+        * There was no empty LEB so the used space in the dirtiest LEB must fit
+        * in the GC head LEB.
+        */
+       if (lp.free + lp.dirty < wbuf->offs) {
+               dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
+                         lnum, wbuf->lnum, wbuf->offs);
+               err = ubifs_return_leb(c, lnum);
+               if (err)
+                       return err;
+               goto find_free;
+       }
+       /*
+        * We run the commit before garbage collection otherwise subsequent
+        * mounts will see the GC and orphan deletion in a different order.
+        */
+       dbg_rcvry("committing");
+       err = ubifs_run_commit(c);
+       if (err)
+               return err;
+       /*
+        * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
+        * - use locking to keep 'ubifs_assert()' happy.
+        */
+       dbg_rcvry("GC'ing LEB %d", lnum);
+       mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
+       err = ubifs_garbage_collect_leb(c, &lp);
+       if (err >= 0) {
+               int err2 = ubifs_wbuf_sync_nolock(wbuf);
+
+               if (err2)
+                       err = err2;
+       }
+       mutex_unlock(&wbuf->io_mutex);
+       if (err < 0) {
+               dbg_err("GC failed, error %d", err);
+               if (err == -EAGAIN)
+                       err = -EINVAL;
+               return err;
+       }
+       if (err != LEB_RETAINED) {
+               dbg_err("GC returned %d", err);
+               return -EINVAL;
+       }
+       err = ubifs_leb_unmap(c, c->gc_lnum);
+       if (err)
+               return err;
+       dbg_rcvry("allocated LEB %d for GC", lnum);
+       return 0;
+
+find_free:
+       /*
+        * There is no GC head LEB or the free space in the GC head LEB is too
+        * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
+        * GC is not run.
+        */
+       lnum = ubifs_find_free_leb_for_idx(c);
+       if (lnum < 0) {
+               dbg_err("could not find an empty LEB");
+               return lnum;
+       }
+       /* And reset the index flag */
+       err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+                                 LPROPS_INDEX, 0);
+       if (err)
+               return err;
+       c->gc_lnum = lnum;
+       dbg_rcvry("allocated LEB %d for GC", lnum);
+       /* Run the commit */
+       dbg_rcvry("committing");
+       return ubifs_run_commit(c);
+}
+
+/**
+ * struct size_entry - inode size information for recovery.
+ * @rb: link in the RB-tree of sizes
+ * @inum: inode number
+ * @i_size: size on inode
+ * @d_size: maximum size based on data nodes
+ * @exists: indicates whether the inode exists
+ * @inode: inode if pinned in memory awaiting rw mode to fix it
+ */
+struct size_entry {
+       struct rb_node rb;
+       ino_t inum;
+       loff_t i_size;
+       loff_t d_size;
+       int exists;
+       struct inode *inode;
+};
+
+/**
+ * add_ino - add an entry to the size tree.
+ * @c: UBIFS file-system description object
+ * @inum: inode number
+ * @i_size: size on inode
+ * @d_size: maximum size based on data nodes
+ * @exists: indicates whether the inode exists
+ */
+static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
+                  loff_t d_size, int exists)
+{
+       struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
+       struct size_entry *e;
+
+       while (*p) {
+               parent = *p;
+               e = rb_entry(parent, struct size_entry, rb);
+               if (inum < e->inum)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+
+       e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
+       if (!e)
+               return -ENOMEM;
+
+       e->inum = inum;
+       e->i_size = i_size;
+       e->d_size = d_size;
+       e->exists = exists;
+
+       rb_link_node(&e->rb, parent, p);
+       rb_insert_color(&e->rb, &c->size_tree);
+
+       return 0;
+}
+
+/**
+ * find_ino - find an entry on the size tree.
+ * @c: UBIFS file-system description object
+ * @inum: inode number
+ */
+static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
+{
+       struct rb_node *p = c->size_tree.rb_node;
+       struct size_entry *e;
+
+       while (p) {
+               e = rb_entry(p, struct size_entry, rb);
+               if (inum < e->inum)
+                       p = p->rb_left;
+               else if (inum > e->inum)
+                       p = p->rb_right;
+               else
+                       return e;
+       }
+       return NULL;
+}
+
+/**
+ * remove_ino - remove an entry from the size tree.
+ * @c: UBIFS file-system description object
+ * @inum: inode number
+ */
+static void remove_ino(struct ubifs_info *c, ino_t inum)
+{
+       struct size_entry *e = find_ino(c, inum);
+
+       if (!e)
+               return;
+       rb_erase(&e->rb, &c->size_tree);
+       kfree(e);
+}
+
+/**
+ * ubifs_destroy_size_tree - free resources related to the size tree.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_destroy_size_tree(struct ubifs_info *c)
+{
+       struct rb_node *this = c->size_tree.rb_node;
+       struct size_entry *e;
+
+       while (this) {
+               if (this->rb_left) {
+                       this = this->rb_left;
+                       continue;
+               } else if (this->rb_right) {
+                       this = this->rb_right;
+                       continue;
+               }
+               e = rb_entry(this, struct size_entry, rb);
+               if (e->inode)
+                       iput(e->inode);
+               this = rb_parent(this);
+               if (this) {
+                       if (this->rb_left == &e->rb)
+                               this->rb_left = NULL;
+                       else
+                               this->rb_right = NULL;
+               }
+               kfree(e);
+       }
+       c->size_tree = RB_ROOT;
+}
+
+/**
+ * ubifs_recover_size_accum - accumulate inode sizes for recovery.
+ * @c: UBIFS file-system description object
+ * @key: node key
+ * @deletion: node is for a deletion
+ * @new_size: inode size
+ *
+ * This function has two purposes:
+ *     1) to ensure there are no data nodes that fall outside the inode size
+ *     2) to ensure there are no data nodes for inodes that do not exist
+ * To accomplish those purposes, a rb-tree is constructed containing an entry
+ * for each inode number in the journal that has not been deleted, and recording
+ * the size from the inode node, the maximum size of any data node (also altered
+ * by truncations) and a flag indicating a inode number for which no inode node
+ * was present in the journal.
+ *
+ * Note that there is still the possibility that there are data nodes that have
+ * been committed that are beyond the inode size, however the only way to find
+ * them would be to scan the entire index. Alternatively, some provision could
+ * be made to record the size of inodes at the start of commit, which would seem
+ * very cumbersome for a scenario that is quite unlikely and the only negative
+ * consequence of which is wasted space.
+ *
+ * This functions returns %0 on success and a negative error code on failure.
+ */
+int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
+                            int deletion, loff_t new_size)
+{
+       ino_t inum = key_inum(c, key);
+       struct size_entry *e;
+       int err;
+
+       switch (key_type(c, key)) {
+       case UBIFS_INO_KEY:
+               if (deletion)
+                       remove_ino(c, inum);
+               else {
+                       e = find_ino(c, inum);
+                       if (e) {
+                               e->i_size = new_size;
+                               e->exists = 1;
+                       } else {
+                               err = add_ino(c, inum, new_size, 0, 1);
+                               if (err)
+                                       return err;
+                       }
+               }
+               break;
+       case UBIFS_DATA_KEY:
+               e = find_ino(c, inum);
+               if (e) {
+                       if (new_size > e->d_size)
+                               e->d_size = new_size;
+               } else {
+                       err = add_ino(c, inum, 0, new_size, 0);
+                       if (err)
+                               return err;
+               }
+               break;
+       case UBIFS_TRUN_KEY:
+               e = find_ino(c, inum);
+               if (e)
+                       e->d_size = new_size;
+               break;
+       }
+       return 0;
+}
+
+/**
+ * fix_size_in_place - fix inode size in place on flash.
+ * @c: UBIFS file-system description object
+ * @e: inode size information for recovery
+ */
+static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
+{
+       struct ubifs_ino_node *ino = c->sbuf;
+       unsigned char *p;
+       union ubifs_key key;
+       int err, lnum, offs, len;
+       loff_t i_size;
+       uint32_t crc;
+
+       /* Locate the inode node LEB number and offset */
+       ino_key_init(c, &key, e->inum);
+       err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
+       if (err)
+               goto out;
+       /*
+        * If the size recorded on the inode node is greater than the size that
+        * was calculated from nodes in the journal then don't change the inode.
+        */
+       i_size = le64_to_cpu(ino->size);
+       if (i_size >= e->d_size)
+               return 0;
+       /* Read the LEB */
+       err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
+       if (err)
+               goto out;
+       /* Change the size field and recalculate the CRC */
+       ino = c->sbuf + offs;
+       ino->size = cpu_to_le64(e->d_size);
+       len = le32_to_cpu(ino->ch.len);
+       crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
+       ino->ch.crc = cpu_to_le32(crc);
+       /* Work out where data in the LEB ends and free space begins */
+       p = c->sbuf;
+       len = c->leb_size - 1;
+       while (p[len] == 0xff)
+               len -= 1;
+       len = ALIGN(len + 1, c->min_io_size);
+       /* Atomically write the fixed LEB back again */
+       err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+       if (err)
+               goto out;
+       dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs,
+                 i_size, e->d_size);
+       return 0;
+
+out:
+       ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
+                  e->inum, e->i_size, e->d_size, err);
+       return err;
+}
+
+/**
+ * ubifs_recover_size - recover inode size.
+ * @c: UBIFS file-system description object
+ *
+ * This function attempts to fix inode size discrepancies identified by the
+ * 'ubifs_recover_size_accum()' function.
+ *
+ * This functions returns %0 on success and a negative error code on failure.
+ */
+int ubifs_recover_size(struct ubifs_info *c)
+{
+       struct rb_node *this = rb_first(&c->size_tree);
+
+       while (this) {
+               struct size_entry *e;
+               int err;
+
+               e = rb_entry(this, struct size_entry, rb);
+               if (!e->exists) {
+                       union ubifs_key key;
+
+                       ino_key_init(c, &key, e->inum);
+                       err = ubifs_tnc_lookup(c, &key, c->sbuf);
+                       if (err && err != -ENOENT)
+                               return err;
+                       if (err == -ENOENT) {
+                               /* Remove data nodes that have no inode */
+                               dbg_rcvry("removing ino %lu", e->inum);
+                               err = ubifs_tnc_remove_ino(c, e->inum);
+                               if (err)
+                                       return err;
+                       } else {
+                               struct ubifs_ino_node *ino = c->sbuf;
+
+                               e->exists = 1;
+                               e->i_size = le64_to_cpu(ino->size);
+                       }
+               }
+               if (e->exists && e->i_size < e->d_size) {
+                       if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
+                               /* Fix the inode size and pin it in memory */
+                               struct inode *inode;
+
+                               inode = ubifs_iget(c->vfs_sb, e->inum);
+                               if (IS_ERR(inode))
+                                       return PTR_ERR(inode);
+                               if (inode->i_size < e->d_size) {
+                                       dbg_rcvry("ino %lu size %lld -> %lld",
+                                                 e->inum, e->d_size,
+                                                 inode->i_size);
+                                       inode->i_size = e->d_size;
+                                       ubifs_inode(inode)->ui_size = e->d_size;
+                                       e->inode = inode;
+                                       this = rb_next(this);
+                                       continue;
+                               }
+                               iput(inode);
+                       } else {
+                               /* Fix the size in place */
+                               err = fix_size_in_place(c, e);
+                               if (err)
+                                       return err;
+                               if (e->inode)
+                                       iput(e->inode);
+                       }
+               }
+               this = rb_next(this);
+               rb_erase(&e->rb, &c->size_tree);
+               kfree(e);
+       }
+       return 0;
+}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c

new file mode 100644 (file)

index 0000000..7399692
--- /dev/null
+++ b/fs/ubifs/replay.c
@@ -0,0 +1,1075 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file contains journal replay code. It runs when the file-system is being
+ * mounted and requires no locking.
+ *
+ * The larger is the journal, the longer it takes to scan it, so the longer it
+ * takes to mount UBIFS. This is why the journal has limited size which may be
+ * changed depending on the system requirements. But a larger journal gives
+ * faster I/O speed because it writes the index less frequently. So this is a
+ * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
+ * larger is the journal, the more memory its index may consume.
+ */
+
+#include "ubifs.h"
+
+/*
+ * Replay flags.
+ *
+ * REPLAY_DELETION: node was deleted
+ * REPLAY_REF: node is a reference node
+ */
+enum {
+       REPLAY_DELETION = 1,
+       REPLAY_REF = 2,
+};
+
+/**
+ * struct replay_entry - replay tree entry.
+ * @lnum: logical eraseblock number of the node
+ * @offs: node offset
+ * @len: node length
+ * @sqnum: node sequence number
+ * @flags: replay flags
+ * @rb: links the replay tree
+ * @key: node key
+ * @nm: directory entry name
+ * @old_size: truncation old size
+ * @new_size: truncation new size
+ * @free: amount of free space in a bud
+ * @dirty: amount of dirty space in a bud from padding and deletion nodes
+ *
+ * UBIFS journal replay must compare node sequence numbers, which means it must
+ * build a tree of node information to insert into the TNC.
+ */
+struct replay_entry {
+       int lnum;
+       int offs;
+       int len;
+       unsigned long long sqnum;
+       int flags;
+       struct rb_node rb;
+       union ubifs_key key;
+       union {
+               struct qstr nm;
+               struct {
+                       loff_t old_size;
+                       loff_t new_size;
+               };
+               struct {
+                       int free;
+                       int dirty;
+               };
+       };
+};
+
+/**
+ * struct bud_entry - entry in the list of buds to replay.
+ * @list: next bud in the list
+ * @bud: bud description object
+ * @free: free bytes in the bud
+ * @sqnum: reference node sequence number
+ */
+struct bud_entry {
+       struct list_head list;
+       struct ubifs_bud *bud;
+       int free;
+       unsigned long long sqnum;
+};
+
+/**
+ * set_bud_lprops - set free and dirty space used by a bud.
+ * @c: UBIFS file-system description object
+ * @r: replay entry of bud
+ */
+static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
+{
+       const struct ubifs_lprops *lp;
+       int err = 0, dirty;
+
+       ubifs_get_lprops(c);
+
+       lp = ubifs_lpt_lookup_dirty(c, r->lnum);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+
+       dirty = lp->dirty;
+       if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
+               /*
+                * The LEB was added to the journal with a starting offset of
+                * zero which means the LEB must have been empty. The LEB
+                * property values should be lp->free == c->leb_size and
+                * lp->dirty == 0, but that is not the case. The reason is that
+                * the LEB was garbage collected. The garbage collector resets
+                * the free and dirty space without recording it anywhere except
+                * lprops, so if there is not a commit then lprops does not have
+                * that information next time the file system is mounted.
+                *
+                * We do not need to adjust free space because the scan has told
+                * us the exact value which is recorded in the replay entry as
+                * r->free.
+                *
+                * However we do need to subtract from the dirty space the
+                * amount of space that the garbage collector reclaimed, which
+                * is the whole LEB minus the amount of space that was free.
+                */
+               dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+                       lp->free, lp->dirty);
+               dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+                       lp->free, lp->dirty);
+               dirty -= c->leb_size - lp->free;
+               /*
+                * If the replay order was perfect the dirty space would now be
+                * zero. The order is not perfect because the the journal heads
+                * race with eachother. This is not a problem but is does mean
+                * that the dirty space may temporarily exceed c->leb_size
+                * during the replay.
+                */
+               if (dirty != 0)
+                       dbg_msg("LEB %d lp: %d free %d dirty "
+                               "replay: %d free %d dirty", r->lnum, lp->free,
+                               lp->dirty, r->free, r->dirty);
+       }
+       lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
+                            lp->flags | LPROPS_TAKEN, 0);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * trun_remove_range - apply a replay entry for a truncation to the TNC.
+ * @c: UBIFS file-system description object
+ * @r: replay entry of truncation
+ */
+static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
+{
+       unsigned min_blk, max_blk;
+       union ubifs_key min_key, max_key;
+       ino_t ino;
+
+       min_blk = r->new_size / UBIFS_BLOCK_SIZE;
+       if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
+               min_blk += 1;
+
+       max_blk = r->old_size / UBIFS_BLOCK_SIZE;
+       if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
+               max_blk -= 1;
+
+       ino = key_inum(c, &r->key);
+
+       data_key_init(c, &min_key, ino, min_blk);
+       data_key_init(c, &max_key, ino, max_blk);
+
+       return ubifs_tnc_remove_range(c, &min_key, &max_key);
+}
+
+/**
+ * apply_replay_entry - apply a replay entry to the TNC.
+ * @c: UBIFS file-system description object
+ * @r: replay entry to apply
+ *
+ * Apply a replay entry to the TNC.
+ */
+static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
+{
+       int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
+
+       dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
+               r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
+
+       /* Set c->replay_sqnum to help deal with dangling branches. */
+       c->replay_sqnum = r->sqnum;
+
+       if (r->flags & REPLAY_REF)
+               err = set_bud_lprops(c, r);
+       else if (is_hash_key(c, &r->key)) {
+               if (deletion)
+                       err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
+               else
+                       err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
+                                              r->len, &r->nm);
+       } else {
+               if (deletion)
+                       switch (key_type(c, &r->key)) {
+                       case UBIFS_INO_KEY:
+                       {
+                               ino_t inum = key_inum(c, &r->key);
+
+                               err = ubifs_tnc_remove_ino(c, inum);
+                               break;
+                       }
+                       case UBIFS_TRUN_KEY:
+                               err = trun_remove_range(c, r);
+                               break;
+                       default:
+                               err = ubifs_tnc_remove(c, &r->key);
+                               break;
+                       }
+               else
+                       err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
+                                           r->len);
+               if (err)
+                       return err;
+
+               if (c->need_recovery)
+                       err = ubifs_recover_size_accum(c, &r->key, deletion,
+                                                      r->new_size);
+       }
+
+       return err;
+}
+
+/**
+ * destroy_replay_tree - destroy the replay.
+ * @c: UBIFS file-system description object
+ *
+ * Destroy the replay tree.
+ */
+static void destroy_replay_tree(struct ubifs_info *c)
+{
+       struct rb_node *this = c->replay_tree.rb_node;
+       struct replay_entry *r;
+
+       while (this) {
+               if (this->rb_left) {
+                       this = this->rb_left;
+                       continue;
+               } else if (this->rb_right) {
+                       this = this->rb_right;
+                       continue;
+               }
+               r = rb_entry(this, struct replay_entry, rb);
+               this = rb_parent(this);
+               if (this) {
+                       if (this->rb_left == &r->rb)
+                               this->rb_left = NULL;
+                       else
+                               this->rb_right = NULL;
+               }
+               if (is_hash_key(c, &r->key))
+                       kfree(r->nm.name);
+               kfree(r);
+       }
+       c->replay_tree = RB_ROOT;
+}
+
+/**
+ * apply_replay_tree - apply the replay tree to the TNC.
+ * @c: UBIFS file-system description object
+ *
+ * Apply the replay tree.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int apply_replay_tree(struct ubifs_info *c)
+{
+       struct rb_node *this = rb_first(&c->replay_tree);
+
+       while (this) {
+               struct replay_entry *r;
+               int err;
+
+               cond_resched();
+
+               r = rb_entry(this, struct replay_entry, rb);
+               err = apply_replay_entry(c, r);
+               if (err)
+                       return err;
+               this = rb_next(this);
+       }
+       return 0;
+}
+
+/**
+ * insert_node - insert a node to the replay tree.
+ * @c: UBIFS file-system description object
+ * @lnum: node logical eraseblock number
+ * @offs: node offset
+ * @len: node length
+ * @key: node key
+ * @sqnum: sequence number
+ * @deletion: non-zero if this is a deletion
+ * @used: number of bytes in use in a LEB
+ * @old_size: truncation old size
+ * @new_size: truncation new size
+ *
+ * This function inserts a scanned non-direntry node to the replay tree. The
+ * replay tree is an RB-tree containing @struct replay_entry elements which are
+ * indexed by the sequence number. The replay tree is applied at the very end
+ * of the replay process. Since the tree is sorted in sequence number order,
+ * the older modifications are applied first. This function returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
+                      union ubifs_key *key, unsigned long long sqnum,
+                      int deletion, int *used, loff_t old_size,
+                      loff_t new_size)
+{
+       struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
+       struct replay_entry *r;
+
+       if (key_inum(c, key) >= c->highest_inum)
+               c->highest_inum = key_inum(c, key);
+
+       dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+       while (*p) {
+               parent = *p;
+               r = rb_entry(parent, struct replay_entry, rb);
+               if (sqnum < r->sqnum) {
+                       p = &(*p)->rb_left;
+                       continue;
+               } else if (sqnum > r->sqnum) {
+                       p = &(*p)->rb_right;
+                       continue;
+               }
+               ubifs_err("duplicate sqnum in replay");
+               return -EINVAL;
+       }
+
+       r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
+       if (!r)
+               return -ENOMEM;
+
+       if (!deletion)
+               *used += ALIGN(len, 8);
+       r->lnum = lnum;
+       r->offs = offs;
+       r->len = len;
+       r->sqnum = sqnum;
+       r->flags = (deletion ? REPLAY_DELETION : 0);
+       r->old_size = old_size;
+       r->new_size = new_size;
+       key_copy(c, key, &r->key);
+
+       rb_link_node(&r->rb, parent, p);
+       rb_insert_color(&r->rb, &c->replay_tree);
+       return 0;
+}
+
+/**
+ * insert_dent - insert a directory entry node into the replay tree.
+ * @c: UBIFS file-system description object
+ * @lnum: node logical eraseblock number
+ * @offs: node offset
+ * @len: node length
+ * @key: node key
+ * @name: directory entry name
+ * @nlen: directory entry name length
+ * @sqnum: sequence number
+ * @deletion: non-zero if this is a deletion
+ * @used: number of bytes in use in a LEB
+ *
+ * This function inserts a scanned directory entry node to the replay tree.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ *
+ * This function is also used for extended attribute entries because they are
+ * implemented as directory entry nodes.
+ */
+static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
+                      union ubifs_key *key, const char *name, int nlen,
+                      unsigned long long sqnum, int deletion, int *used)
+{
+       struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
+       struct replay_entry *r;
+       char *nbuf;
+
+       if (key_inum(c, key) >= c->highest_inum)
+               c->highest_inum = key_inum(c, key);
+
+       dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+       while (*p) {
+               parent = *p;
+               r = rb_entry(parent, struct replay_entry, rb);
+               if (sqnum < r->sqnum) {
+                       p = &(*p)->rb_left;
+                       continue;
+               }
+               if (sqnum > r->sqnum) {
+                       p = &(*p)->rb_right;
+                       continue;
+               }
+               ubifs_err("duplicate sqnum in replay");
+               return -EINVAL;
+       }
+
+       r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
+       if (!r)
+               return -ENOMEM;
+       nbuf = kmalloc(nlen + 1, GFP_KERNEL);
+       if (!nbuf) {
+               kfree(r);
+               return -ENOMEM;
+       }
+
+       if (!deletion)
+               *used += ALIGN(len, 8);
+       r->lnum = lnum;
+       r->offs = offs;
+       r->len = len;
+       r->sqnum = sqnum;
+       r->nm.len = nlen;
+       memcpy(nbuf, name, nlen);
+       nbuf[nlen] = '\0';
+       r->nm.name = nbuf;
+       r->flags = (deletion ? REPLAY_DELETION : 0);
+       key_copy(c, key, &r->key);
+
+       ubifs_assert(!*p);
+       rb_link_node(&r->rb, parent, p);
+       rb_insert_color(&r->rb, &c->replay_tree);
+       return 0;
+}
+
+/**
+ * ubifs_validate_entry - validate directory or extended attribute entry node.
+ * @c: UBIFS file-system description object
+ * @dent: the node to validate
+ *
+ * This function validates directory or extended attribute entry node @dent.
+ * Returns zero if the node is all right and a %-EINVAL if not.
+ */
+int ubifs_validate_entry(struct ubifs_info *c,
+                        const struct ubifs_dent_node *dent)
+{
+       int key_type = key_type_flash(c, dent->key);
+       int nlen = le16_to_cpu(dent->nlen);
+
+       if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
+           dent->type >= UBIFS_ITYPES_CNT ||
+           nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
+           strnlen(dent->name, nlen) != nlen ||
+           le64_to_cpu(dent->inum) > MAX_INUM) {
+               ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
+                         "directory entry" : "extended attribute entry");
+               return -EINVAL;
+       }
+
+       if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
+               ubifs_err("bad key type %d", key_type);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * replay_bud - replay a bud logical eraseblock.
+ * @c: UBIFS file-system description object
+ * @lnum: bud logical eraseblock number to replay
+ * @offs: bud start offset
+ * @jhead: journal head to which this bud belongs
+ * @free: amount of free space in the bud is returned here
+ * @dirty: amount of dirty space from padding and deletion nodes is returned
+ * here
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
+                     int *free, int *dirty)
+{
+       int err = 0, used = 0;
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       struct ubifs_bud *bud;
+
+       dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
+       if (c->need_recovery)
+               sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
+       else
+               sleb = ubifs_scan(c, lnum, offs, c->sbuf);
+       if (IS_ERR(sleb))
+               return PTR_ERR(sleb);
+
+       /*
+        * The bud does not have to start from offset zero - the beginning of
+        * the 'lnum' LEB may contain previously committed data. One of the
+        * things we have to do in replay is to correctly update lprops with
+        * newer information about this LEB.
+        *
+        * At this point lprops thinks that this LEB has 'c->leb_size - offs'
+        * bytes of free space because it only contain information about
+        * committed data.
+        *
+        * But we know that real amount of free space is 'c->leb_size -
+        * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
+        * 'sleb->endpt' is used by bud data. We have to correctly calculate
+        * how much of these data are dirty and update lprops with this
+        * information.
+        *
+        * The dirt in that LEB region is comprised of padding nodes, deletion
+        * nodes, truncation nodes and nodes which are obsoleted by subsequent
+        * nodes in this LEB. So instead of calculating clean space, we
+        * calculate used space ('used' variable).
+        */
+
+       list_for_each_entry(snod, &sleb->nodes, list) {
+               int deletion = 0;
+
+               cond_resched();
+
+               if (snod->sqnum >= SQNUM_WATERMARK) {
+                       ubifs_err("file system's life ended");
+                       goto out_dump;
+               }
+
+               if (snod->sqnum > c->max_sqnum)
+                       c->max_sqnum = snod->sqnum;
+
+               switch (snod->type) {
+               case UBIFS_INO_NODE:
+               {
+                       struct ubifs_ino_node *ino = snod->node;
+                       loff_t new_size = le64_to_cpu(ino->size);
+
+                       if (le32_to_cpu(ino->nlink) == 0)
+                               deletion = 1;
+                       err = insert_node(c, lnum, snod->offs, snod->len,
+                                         &snod->key, snod->sqnum, deletion,
+                                         &used, 0, new_size);
+                       break;
+               }
+               case UBIFS_DATA_NODE:
+               {
+                       struct ubifs_data_node *dn = snod->node;
+                       loff_t new_size = le32_to_cpu(dn->size) +
+                                         key_block(c, &snod->key) *
+                                         UBIFS_BLOCK_SIZE;
+
+                       err = insert_node(c, lnum, snod->offs, snod->len,
+                                         &snod->key, snod->sqnum, deletion,
+                                         &used, 0, new_size);
+                       break;
+               }
+               case UBIFS_DENT_NODE:
+               case UBIFS_XENT_NODE:
+               {
+                       struct ubifs_dent_node *dent = snod->node;
+
+                       err = ubifs_validate_entry(c, dent);
+                       if (err)
+                               goto out_dump;
+
+                       err = insert_dent(c, lnum, snod->offs, snod->len,
+                                         &snod->key, dent->name,
+                                         le16_to_cpu(dent->nlen), snod->sqnum,
+                                         !le64_to_cpu(dent->inum), &used);
+                       break;
+               }
+               case UBIFS_TRUN_NODE:
+               {
+                       struct ubifs_trun_node *trun = snod->node;
+                       loff_t old_size = le64_to_cpu(trun->old_size);
+                       loff_t new_size = le64_to_cpu(trun->new_size);
+                       union ubifs_key key;
+
+                       /* Validate truncation node */
+                       if (old_size < 0 || old_size > c->max_inode_sz ||
+                           new_size < 0 || new_size > c->max_inode_sz ||
+                           old_size <= new_size) {
+                               ubifs_err("bad truncation node");
+                               goto out_dump;
+                       }
+
+                       /*
+                        * Create a fake truncation key just to use the same
+                        * functions which expect nodes to have keys.
+                        */
+                       trun_key_init(c, &key, le32_to_cpu(trun->inum));
+                       err = insert_node(c, lnum, snod->offs, snod->len,
+                                         &key, snod->sqnum, 1, &used,
+                                         old_size, new_size);
+                       break;
+               }
+               default:
+                       ubifs_err("unexpected node type %d in bud LEB %d:%d",
+                                 snod->type, lnum, snod->offs);
+                       err = -EINVAL;
+                       goto out_dump;
+               }
+               if (err)
+                       goto out;
+       }
+
+       bud = ubifs_search_bud(c, lnum);
+       if (!bud)
+               BUG();
+
+       ubifs_assert(sleb->endpt - offs >= used);
+       ubifs_assert(sleb->endpt % c->min_io_size == 0);
+
+       if (sleb->endpt + c->min_io_size <= c->leb_size &&
+           !(c->vfs_sb->s_flags & MS_RDONLY))
+               err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
+                                            sleb->endpt, UBI_SHORTTERM);
+
+       *dirty = sleb->endpt - offs - used;
+       *free = c->leb_size - sleb->endpt;
+
+out:
+       ubifs_scan_destroy(sleb);
+       return err;
+
+out_dump:
+       ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
+       dbg_dump_node(c, snod->node);
+       ubifs_scan_destroy(sleb);
+       return -EINVAL;
+}
+
+/**
+ * insert_ref_node - insert a reference node to the replay tree.
+ * @c: UBIFS file-system description object
+ * @lnum: node logical eraseblock number
+ * @offs: node offset
+ * @sqnum: sequence number
+ * @free: amount of free space in bud
+ * @dirty: amount of dirty space from padding and deletion nodes
+ *
+ * This function inserts a reference node to the replay tree and returns zero
+ * in case of success ort a negative error code in case of failure.
+ */
+static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
+                          unsigned long long sqnum, int free, int dirty)
+{
+       struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
+       struct replay_entry *r;
+
+       dbg_mnt("add ref LEB %d:%d", lnum, offs);
+       while (*p) {
+               parent = *p;
+               r = rb_entry(parent, struct replay_entry, rb);
+               if (sqnum < r->sqnum) {
+                       p = &(*p)->rb_left;
+                       continue;
+               } else if (sqnum > r->sqnum) {
+                       p = &(*p)->rb_right;
+                       continue;
+               }
+               ubifs_err("duplicate sqnum in replay tree");
+               return -EINVAL;
+       }
+
+       r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
+       if (!r)
+               return -ENOMEM;
+
+       r->lnum = lnum;
+       r->offs = offs;
+       r->sqnum = sqnum;
+       r->flags = REPLAY_REF;
+       r->free = free;
+       r->dirty = dirty;
+
+       rb_link_node(&r->rb, parent, p);
+       rb_insert_color(&r->rb, &c->replay_tree);
+       return 0;
+}
+
+/**
+ * replay_buds - replay all buds.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int replay_buds(struct ubifs_info *c)
+{
+       struct bud_entry *b;
+       int err, uninitialized_var(free), uninitialized_var(dirty);
+
+       list_for_each_entry(b, &c->replay_buds, list) {
+               err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
+                                &free, &dirty);
+               if (err)
+                       return err;
+               err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
+                                     free, dirty);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/**
+ * destroy_bud_list - destroy the list of buds to replay.
+ * @c: UBIFS file-system description object
+ */
+static void destroy_bud_list(struct ubifs_info *c)
+{
+       struct bud_entry *b;
+
+       while (!list_empty(&c->replay_buds)) {
+               b = list_entry(c->replay_buds.next, struct bud_entry, list);
+               list_del(&b->list);
+               kfree(b);
+       }
+}
+
+/**
+ * add_replay_bud - add a bud to the list of buds to replay.
+ * @c: UBIFS file-system description object
+ * @lnum: bud logical eraseblock number to replay
+ * @offs: bud start offset
+ * @jhead: journal head to which this bud belongs
+ * @sqnum: reference node sequence number
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
+                         unsigned long long sqnum)
+{
+       struct ubifs_bud *bud;
+       struct bud_entry *b;
+
+       dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
+
+       bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
+       if (!bud)
+               return -ENOMEM;
+
+       b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
+       if (!b) {
+               kfree(bud);
+               return -ENOMEM;
+       }
+
+       bud->lnum = lnum;
+       bud->start = offs;
+       bud->jhead = jhead;
+       ubifs_add_bud(c, bud);
+
+       b->bud = bud;
+       b->sqnum = sqnum;
+       list_add_tail(&b->list, &c->replay_buds);
+
+       return 0;
+}
+
+/**
+ * validate_ref - validate a reference node.
+ * @c: UBIFS file-system description object
+ * @ref: the reference node to validate
+ * @ref_lnum: LEB number of the reference node
+ * @ref_offs: reference node offset
+ *
+ * This function returns %1 if a bud reference already exists for the LEB. %0 is
+ * returned if the reference node is new, otherwise %-EINVAL is returned if
+ * validation failed.
+ */
+static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
+{
+       struct ubifs_bud *bud;
+       int lnum = le32_to_cpu(ref->lnum);
+       unsigned int offs = le32_to_cpu(ref->offs);
+       unsigned int jhead = le32_to_cpu(ref->jhead);
+
+       /*
+        * ref->offs may point to the end of LEB when the journal head points
+        * to the end of LEB and we write reference node for it during commit.
+        * So this is why we require 'offs > c->leb_size'.
+        */
+       if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
+           lnum < c->main_first || offs > c->leb_size ||
+           offs & (c->min_io_size - 1))
+               return -EINVAL;
+
+       /* Make sure we have not already looked at this bud */
+       bud = ubifs_search_bud(c, lnum);
+       if (bud) {
+               if (bud->jhead == jhead && bud->start <= offs)
+                       return 1;
+               ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * replay_log_leb - replay a log logical eraseblock.
+ * @c: UBIFS file-system description object
+ * @lnum: log logical eraseblock to replay
+ * @offs: offset to start replaying from
+ * @sbuf: scan buffer
+ *
+ * This function replays a log LEB and returns zero in case of success, %1 if
+ * this is the last LEB in the log, and a negative error code in case of
+ * failure.
+ */
+static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
+{
+       int err;
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       const struct ubifs_cs_node *node;
+
+       dbg_mnt("replay log LEB %d:%d", lnum, offs);
+       sleb = ubifs_scan(c, lnum, offs, sbuf);
+       if (IS_ERR(sleb)) {
+               if (c->need_recovery)
+                       sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
+               if (IS_ERR(sleb))
+                       return PTR_ERR(sleb);
+       }
+
+       if (sleb->nodes_cnt == 0) {
+               err = 1;
+               goto out;
+       }
+
+       node = sleb->buf;
+
+       snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
+       if (c->cs_sqnum == 0) {
+               /*
+                * This is the first log LEB we are looking at, make sure that
+                * the first node is a commit start node. Also record its
+                * sequence number so that UBIFS can determine where the log
+                * ends, because all nodes which were have higher sequence
+                * numbers.
+                */
+               if (snod->type != UBIFS_CS_NODE) {
+                       dbg_err("first log node at LEB %d:%d is not CS node",
+                               lnum, offs);
+                       goto out_dump;
+               }
+               if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
+                       dbg_err("first CS node at LEB %d:%d has wrong "
+                               "commit number %llu expected %llu",
+                               lnum, offs,
+                               (unsigned long long)le64_to_cpu(node->cmt_no),
+                               c->cmt_no);
+                       goto out_dump;
+               }
+
+               c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
+               dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
+       }
+
+       if (snod->sqnum < c->cs_sqnum) {
+               /*
+                * This means that we reached end of log and now
+                * look to the older log data, which was already
+                * committed but the eraseblock was not erased (UBIFS
+                * only unmaps it). So this basically means we have to
+                * exit with "end of log" code.
+                */
+               err = 1;
+               goto out;
+       }
+
+       /* Make sure the first node sits at offset zero of the LEB */
+       if (snod->offs != 0) {
+               dbg_err("first node is not at zero offset");
+               goto out_dump;
+       }
+
+       list_for_each_entry(snod, &sleb->nodes, list) {
+
+               cond_resched();
+
+               if (snod->sqnum >= SQNUM_WATERMARK) {
+                       ubifs_err("file system's life ended");
+                       goto out_dump;
+               }
+
+               if (snod->sqnum < c->cs_sqnum) {
+                       dbg_err("bad sqnum %llu, commit sqnum %llu",
+                               snod->sqnum, c->cs_sqnum);
+                       goto out_dump;
+               }
+
+               if (snod->sqnum > c->max_sqnum)
+                       c->max_sqnum = snod->sqnum;
+
+               switch (snod->type) {
+               case UBIFS_REF_NODE: {
+                       const struct ubifs_ref_node *ref = snod->node;
+
+                       err = validate_ref(c, ref);
+                       if (err == 1)
+                               break; /* Already have this bud */
+                       if (err)
+                               goto out_dump;
+
+                       err = add_replay_bud(c, le32_to_cpu(ref->lnum),
+                                            le32_to_cpu(ref->offs),
+                                            le32_to_cpu(ref->jhead),
+                                            snod->sqnum);
+                       if (err)
+                               goto out;
+
+                       break;
+               }
+               case UBIFS_CS_NODE:
+                       /* Make sure it sits at the beginning of LEB */
+                       if (snod->offs != 0) {
+                               ubifs_err("unexpected node in log");
+                               goto out_dump;
+                       }
+                       break;
+               default:
+                       ubifs_err("unexpected node in log");
+                       goto out_dump;
+               }
+       }
+
+       if (sleb->endpt || c->lhead_offs >= c->leb_size) {
+               c->lhead_lnum = lnum;
+               c->lhead_offs = sleb->endpt;
+       }
+
+       err = !sleb->endpt;
+out:
+       ubifs_scan_destroy(sleb);
+       return err;
+
+out_dump:
+       ubifs_err("log error detected while replying the log at LEB %d:%d",
+                 lnum, offs + snod->offs);
+       dbg_dump_node(c, snod->node);
+       ubifs_scan_destroy(sleb);
+       return -EINVAL;
+}
+
+/**
+ * take_ihead - update the status of the index head in lprops to 'taken'.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the amount of free space in the index head LEB or a
+ * negative error code.
+ */
+static int take_ihead(struct ubifs_info *c)
+{
+       const struct ubifs_lprops *lp;
+       int err, free;
+
+       ubifs_get_lprops(c);
+
+       lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+
+       free = lp->free;
+
+       lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
+                            lp->flags | LPROPS_TAKEN, 0);
+       if (IS_ERR(lp)) {
+               err = PTR_ERR(lp);
+               goto out;
+       }
+
+       err = free;
+out:
+       ubifs_release_lprops(c);
+       return err;
+}
+
+/**
+ * ubifs_replay_journal - replay journal.
+ * @c: UBIFS file-system description object
+ *
+ * This function scans the journal, replays and cleans it up. It makes sure all
+ * memory data structures related to uncommitted journal are built (dirty TNC
+ * tree, tree of buds, modified lprops, etc).
+ */
+int ubifs_replay_journal(struct ubifs_info *c)
+{
+       int err, i, lnum, offs, free;
+       void *sbuf = NULL;
+
+       BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
+
+       /* Update the status of the index head in lprops to 'taken' */
+       free = take_ihead(c);
+       if (free < 0)
+               return free; /* Error code */
+
+       if (c->ihead_offs != c->leb_size - free) {
+               ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
+                         c->ihead_offs);
+               return -EINVAL;
+       }
+
+       sbuf = vmalloc(c->leb_size);
+       if (!sbuf)
+               return -ENOMEM;
+
+       dbg_mnt("start replaying the journal");
+
+       c->replaying = 1;
+
+       lnum = c->ltail_lnum = c->lhead_lnum;
+       offs = c->lhead_offs;
+
+       for (i = 0; i < c->log_lebs; i++, lnum++) {
+               if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
+                       /*
+                        * The log is logically circular, we reached the last
+                        * LEB, switch to the first one.
+                        */
+                       lnum = UBIFS_LOG_LNUM;
+                       offs = 0;
+               }
+               err = replay_log_leb(c, lnum, offs, sbuf);
+               if (err == 1)
+                       /* We hit the end of the log */
+                       break;
+               if (err)
+                       goto out;
+               offs = 0;
+       }
+
+       err = replay_buds(c);
+       if (err)
+               goto out;
+
+       err = apply_replay_tree(c);
+       if (err)
+               goto out;
+
+       ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
+       dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
+               "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
+               c->highest_inum);
+out:
+       destroy_replay_tree(c);
+       destroy_bud_list(c);
+       vfree(sbuf);
+       c->replaying = 0;
+       return err;
+}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c

new file mode 100644 (file)

index 0000000..2bf753b
--- /dev/null
+++ b/fs/ubifs/sb.c
@@ -0,0 +1,629 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements UBIFS superblock. The superblock is stored at the first
+ * LEB of the volume and is never changed by UBIFS. Only user-space tools may
+ * change it. The superblock node mostly contains geometry information.
+ */
+
+#include "ubifs.h"
+#include <linux/random.h>
+
+/*
+ * Default journal size in logical eraseblocks as a percent of total
+ * flash size.
+ */
+#define DEFAULT_JNL_PERCENT 5
+
+/* Default maximum journal size in bytes */
+#define DEFAULT_MAX_JNL (32*1024*1024)
+
+/* Default indexing tree fanout */
+#define DEFAULT_FANOUT 8
+
+/* Default number of data journal heads */
+#define DEFAULT_JHEADS_CNT 1
+
+/* Default positions of different LEBs in the main area */
+#define DEFAULT_IDX_LEB  0
+#define DEFAULT_DATA_LEB 1
+#define DEFAULT_GC_LEB   2
+
+/* Default number of LEB numbers in LPT's save table */
+#define DEFAULT_LSAVE_CNT 256
+
+/* Default reserved pool size as a percent of maximum free space */
+#define DEFAULT_RP_PERCENT 5
+
+/* The default maximum size of reserved pool in bytes */
+#define DEFAULT_MAX_RP_SIZE (5*1024*1024)
+
+/* Default time granularity in nanoseconds */
+#define DEFAULT_TIME_GRAN 1000000000
+
+/**
+ * create_default_filesystem - format empty UBI volume.
+ * @c: UBIFS file-system description object
+ *
+ * This function creates default empty file-system. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+static int create_default_filesystem(struct ubifs_info *c)
+{
+       struct ubifs_sb_node *sup;
+       struct ubifs_mst_node *mst;
+       struct ubifs_idx_node *idx;
+       struct ubifs_branch *br;
+       struct ubifs_ino_node *ino;
+       struct ubifs_cs_node *cs;
+       union ubifs_key key;
+       int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
+       int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
+       int min_leb_cnt = UBIFS_MIN_LEB_CNT;
+       uint64_t tmp64, main_bytes;
+
+       /* Some functions called from here depend on the @c->key_len filed */
+       c->key_len = UBIFS_SK_LEN;
+
+       /*
+        * First of all, we have to calculate default file-system geometry -
+        * log size, journal size, etc.
+        */
+       if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT)
+               /* We can first multiply then divide and have no overflow */
+               jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100;
+       else
+               jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT;
+
+       if (jnl_lebs < UBIFS_MIN_JNL_LEBS)
+               jnl_lebs = UBIFS_MIN_JNL_LEBS;
+       if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL)
+               jnl_lebs = DEFAULT_MAX_JNL / c->leb_size;
+
+       /*
+        * The log should be large enough to fit reference nodes for all bud
+        * LEBs. Because buds do not have to start from the beginning of LEBs
+        * (half of the LEB may contain committed data), the log should
+        * generally be larger, make it twice as large.
+        */
+       tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1;
+       log_lebs = tmp / c->leb_size;
+       /* Plus one LEB reserved for commit */
+       log_lebs += 1;
+       if (c->leb_cnt - min_leb_cnt > 8) {
+               /* And some extra space to allow writes while committing */
+               log_lebs += 1;
+               min_leb_cnt += 1;
+       }
+
+       max_buds = jnl_lebs - log_lebs;
+       if (max_buds < UBIFS_MIN_BUD_LEBS)
+               max_buds = UBIFS_MIN_BUD_LEBS;
+
+       /*
+        * Orphan nodes are stored in a separate area. One node can store a lot
+        * of orphan inode numbers, but when new orphan comes we just add a new
+        * orphan node. At some point the nodes are consolidated into one
+        * orphan node.
+        */
+       orph_lebs = UBIFS_MIN_ORPH_LEBS;
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       if (c->leb_cnt - min_leb_cnt > 1)
+               /*
+                * For debugging purposes it is better to have at least 2
+                * orphan LEBs, because the orphan subsystem would need to do
+                * consolidations and would be stressed more.
+                */
+               orph_lebs += 1;
+#endif
+
+       main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;
+       main_lebs -= orph_lebs;
+
+       lpt_first = UBIFS_LOG_LNUM + log_lebs;
+       c->lsave_cnt = DEFAULT_LSAVE_CNT;
+       c->max_leb_cnt = c->leb_cnt;
+       err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,
+                                   &big_lpt);
+       if (err)
+               return err;
+
+       dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first,
+               lpt_first + lpt_lebs - 1);
+
+       main_first = c->leb_cnt - main_lebs;
+
+       /* Create default superblock */
+       tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
+       sup = kzalloc(tmp, GFP_KERNEL);
+       if (!sup)
+               return -ENOMEM;
+
+       tmp64 = (uint64_t)max_buds * c->leb_size;
+       if (big_lpt)
+               sup_flags |= UBIFS_FLG_BIGLPT;
+
+       sup->ch.node_type  = UBIFS_SB_NODE;
+       sup->key_hash      = UBIFS_KEY_HASH_R5;
+       sup->flags         = cpu_to_le32(sup_flags);
+       sup->min_io_size   = cpu_to_le32(c->min_io_size);
+       sup->leb_size      = cpu_to_le32(c->leb_size);
+       sup->leb_cnt       = cpu_to_le32(c->leb_cnt);
+       sup->max_leb_cnt   = cpu_to_le32(c->max_leb_cnt);
+       sup->max_bud_bytes = cpu_to_le64(tmp64);
+       sup->log_lebs      = cpu_to_le32(log_lebs);
+       sup->lpt_lebs      = cpu_to_le32(lpt_lebs);
+       sup->orph_lebs     = cpu_to_le32(orph_lebs);
+       sup->jhead_cnt     = cpu_to_le32(DEFAULT_JHEADS_CNT);
+       sup->fanout        = cpu_to_le32(DEFAULT_FANOUT);
+       sup->lsave_cnt     = cpu_to_le32(c->lsave_cnt);
+       sup->fmt_version   = cpu_to_le32(UBIFS_FORMAT_VERSION);
+       sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
+       sup->time_gran     = cpu_to_le32(DEFAULT_TIME_GRAN);
+
+       generate_random_uuid(sup->uuid);
+
+       main_bytes = (uint64_t)main_lebs * c->leb_size;
+       tmp64 = main_bytes * DEFAULT_RP_PERCENT;
+       do_div(tmp64, 100);
+       if (tmp64 > DEFAULT_MAX_RP_SIZE)
+               tmp64 = DEFAULT_MAX_RP_SIZE;
+       sup->rp_size = cpu_to_le64(tmp64);
+
+       err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
+       kfree(sup);
+       if (err)
+               return err;
+
+       dbg_gen("default superblock created at LEB 0:0");
+
+       /* Create default master node */
+       mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
+       if (!mst)
+               return -ENOMEM;
+
+       mst->ch.node_type = UBIFS_MST_NODE;
+       mst->log_lnum     = cpu_to_le32(UBIFS_LOG_LNUM);
+       mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO);
+       mst->cmt_no       = 0;
+       mst->root_lnum    = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
+       mst->root_offs    = 0;
+       tmp = ubifs_idx_node_sz(c, 1);
+       mst->root_len     = cpu_to_le32(tmp);
+       mst->gc_lnum      = cpu_to_le32(main_first + DEFAULT_GC_LEB);
+       mst->ihead_lnum   = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
+       mst->ihead_offs   = cpu_to_le32(ALIGN(tmp, c->min_io_size));
+       mst->index_size   = cpu_to_le64(ALIGN(tmp, 8));
+       mst->lpt_lnum     = cpu_to_le32(c->lpt_lnum);
+       mst->lpt_offs     = cpu_to_le32(c->lpt_offs);
+       mst->nhead_lnum   = cpu_to_le32(c->nhead_lnum);
+       mst->nhead_offs   = cpu_to_le32(c->nhead_offs);
+       mst->ltab_lnum    = cpu_to_le32(c->ltab_lnum);
+       mst->ltab_offs    = cpu_to_le32(c->ltab_offs);
+       mst->lsave_lnum   = cpu_to_le32(c->lsave_lnum);
+       mst->lsave_offs   = cpu_to_le32(c->lsave_offs);
+       mst->lscan_lnum   = cpu_to_le32(main_first);
+       mst->empty_lebs   = cpu_to_le32(main_lebs - 2);
+       mst->idx_lebs     = cpu_to_le32(1);
+       mst->leb_cnt      = cpu_to_le32(c->leb_cnt);
+
+       /* Calculate lprops statistics */
+       tmp64 = main_bytes;
+       tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
+       tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
+       mst->total_free = cpu_to_le64(tmp64);
+
+       tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
+       ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) -
+                         UBIFS_INO_NODE_SZ;
+       tmp64 += ino_waste;
+       tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8);
+       mst->total_dirty = cpu_to_le64(tmp64);
+
+       /*  The indexing LEB does not contribute to dark space */
+       tmp64 = (c->main_lebs - 1) * c->dark_wm;
+       mst->total_dark = cpu_to_le64(tmp64);
+
+       mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
+
+       err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,
+                              UBI_UNKNOWN);
+       if (err) {
+               kfree(mst);
+               return err;
+       }
+       err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0,
+                              UBI_UNKNOWN);
+       kfree(mst);
+       if (err)
+               return err;
+
+       dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);
+
+       /* Create the root indexing node */
+       tmp = ubifs_idx_node_sz(c, 1);
+       idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
+       if (!idx)
+               return -ENOMEM;
+
+       c->key_fmt = UBIFS_SIMPLE_KEY_FMT;
+       c->key_hash = key_r5_hash;
+
+       idx->ch.node_type = UBIFS_IDX_NODE;
+       idx->child_cnt = cpu_to_le16(1);
+       ino_key_init(c, &key, UBIFS_ROOT_INO);
+       br = ubifs_idx_branch(c, idx, 0);
+       key_write_idx(c, &key, &br->key);
+       br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
+       br->len  = cpu_to_le32(UBIFS_INO_NODE_SZ);
+       err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0,
+                              UBI_UNKNOWN);
+       kfree(idx);
+       if (err)
+               return err;
+
+       dbg_gen("default root indexing node created LEB %d:0",
+               main_first + DEFAULT_IDX_LEB);
+
+       /* Create default root inode */
+       tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
+       ino = kzalloc(tmp, GFP_KERNEL);
+       if (!ino)
+               return -ENOMEM;
+
+       ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);
+       ino->ch.node_type = UBIFS_INO_NODE;
+       ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
+       ino->nlink = cpu_to_le32(2);
+       tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
+       ino->atime_sec   = tmp;
+       ino->ctime_sec   = tmp;
+       ino->mtime_sec   = tmp;
+       ino->atime_nsec  = 0;
+       ino->ctime_nsec  = 0;
+       ino->mtime_nsec  = 0;
+       ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);
+       ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);
+
+       /* Set compression enabled by default */
+       ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
+
+       err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
+                              main_first + DEFAULT_DATA_LEB, 0,
+                              UBI_UNKNOWN);
+       kfree(ino);
+       if (err)
+               return err;
+
+       dbg_gen("root inode created at LEB %d:0",
+               main_first + DEFAULT_DATA_LEB);
+
+       /*
+        * The first node in the log has to be the commit start node. This is
+        * always the case during normal file-system operation. Write a fake
+        * commit start node to the log.
+        */
+       tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);
+       cs = kzalloc(tmp, GFP_KERNEL);
+       if (!cs)
+               return -ENOMEM;
+
+       cs->ch.node_type = UBIFS_CS_NODE;
+       err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM,
+                              0, UBI_UNKNOWN);
+       kfree(cs);
+
+       ubifs_msg("default file-system created");
+       return 0;
+}
+
+/**
+ * validate_sb - validate superblock node.
+ * @c: UBIFS file-system description object
+ * @sup: superblock node
+ *
+ * This function validates superblock node @sup. Since most of data was read
+ * from the superblock and stored in @c, the function validates fields in @c
+ * instead. Returns zero in case of success and %-EINVAL in case of validation
+ * failure.
+ */
+static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
+{
+       long long max_bytes;
+       int err = 1, min_leb_cnt;
+
+       if (!c->key_hash) {
+               err = 2;
+               goto failed;
+       }
+
+       if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) {
+               err = 3;
+               goto failed;
+       }
+
+       if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
+               ubifs_err("min. I/O unit mismatch: %d in superblock, %d real",
+                         le32_to_cpu(sup->min_io_size), c->min_io_size);
+               goto failed;
+       }
+
+       if (le32_to_cpu(sup->leb_size) != c->leb_size) {
+               ubifs_err("LEB size mismatch: %d in superblock, %d real",
+                         le32_to_cpu(sup->leb_size), c->leb_size);
+               goto failed;
+       }
+
+       if (c->log_lebs < UBIFS_MIN_LOG_LEBS ||
+           c->lpt_lebs < UBIFS_MIN_LPT_LEBS ||
+           c->orph_lebs < UBIFS_MIN_ORPH_LEBS ||
+           c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
+               err = 4;
+               goto failed;
+       }
+
+       /*
+        * Calculate minimum allowed amount of main area LEBs. This is very
+        * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we
+        * have just read from the superblock.
+        */
+       min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs;
+       min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
+
+       if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
+               ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, "
+                         "%d minimum required", c->leb_cnt, c->vi.size,
+                         min_leb_cnt);
+               goto failed;
+       }
+
+       if (c->max_leb_cnt < c->leb_cnt) {
+               ubifs_err("max. LEB count %d less than LEB count %d",
+                         c->max_leb_cnt, c->leb_cnt);
+               goto failed;
+       }
+
+       if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
+               err = 7;
+               goto failed;
+       }
+
+       if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS ||
+           c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) {
+               err = 8;
+               goto failed;
+       }
+
+       if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 ||
+           c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) {
+               err = 9;
+               goto failed;
+       }
+
+       if (c->fanout < UBIFS_MIN_FANOUT ||
+           ubifs_idx_node_sz(c, c->fanout) > c->leb_size) {
+               err = 10;
+               goto failed;
+       }
+
+       if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT &&
+           c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS -
+           c->log_lebs - c->lpt_lebs - c->orph_lebs)) {
+               err = 11;
+               goto failed;
+       }
+
+       if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs +
+           c->orph_lebs + c->main_lebs != c->leb_cnt) {
+               err = 12;
+               goto failed;
+       }
+
+       if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
+               err = 13;
+               goto failed;
+       }
+
+       max_bytes = c->main_lebs * (long long)c->leb_size;
+       if (c->rp_size < 0 || max_bytes < c->rp_size) {
+               err = 14;
+               goto failed;
+       }
+
+       if (le32_to_cpu(sup->time_gran) > 1000000000 ||
+           le32_to_cpu(sup->time_gran) < 1) {
+               err = 15;
+               goto failed;
+       }
+
+       return 0;
+
+failed:
+       ubifs_err("bad superblock, error %d", err);
+       dbg_dump_node(c, sup);
+       return -EINVAL;
+}
+
+/**
+ * ubifs_read_sb_node - read superblock node.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns a pointer to the superblock node or a negative error
+ * code.
+ */
+struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
+{
+       struct ubifs_sb_node *sup;
+       int err;
+
+       sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS);
+       if (!sup)
+               return ERR_PTR(-ENOMEM);
+
+       err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ,
+                             UBIFS_SB_LNUM, 0);
+       if (err) {
+               kfree(sup);
+               return ERR_PTR(err);
+       }
+
+       return sup;
+}
+
+/**
+ * ubifs_write_sb_node - write superblock node.
+ * @c: UBIFS file-system description object
+ * @sup: superblock node read with 'ubifs_read_sb_node()'
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
+{
+       int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
+
+       ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
+       return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM);
+}
+
+/**
+ * ubifs_read_superblock - read superblock.
+ * @c: UBIFS file-system description object
+ *
+ * This function finds, reads and checks the superblock. If an empty UBI volume
+ * is being mounted, this function creates default superblock. Returns zero in
+ * case of success, and a negative error code in case of failure.
+ */
+int ubifs_read_superblock(struct ubifs_info *c)
+{
+       int err, sup_flags;
+       struct ubifs_sb_node *sup;
+
+       if (c->empty) {
+               err = create_default_filesystem(c);
+               if (err)
+                       return err;
+       }
+
+       sup = ubifs_read_sb_node(c);
+       if (IS_ERR(sup))
+               return PTR_ERR(sup);
+
+       /*
+        * The software supports all previous versions but not future versions,
+        * due to the unavailability of time-travelling equipment.
+        */
+       c->fmt_version = le32_to_cpu(sup->fmt_version);
+       if (c->fmt_version > UBIFS_FORMAT_VERSION) {
+               ubifs_err("on-flash format version is %d, but software only "
+                         "supports up to version %d", c->fmt_version,
+                         UBIFS_FORMAT_VERSION);
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (c->fmt_version < 3) {
+               ubifs_err("on-flash format version %d is not supported",
+                         c->fmt_version);
+               err = -EINVAL;
+               goto out;
+       }
+
+       switch (sup->key_hash) {
+       case UBIFS_KEY_HASH_R5:
+               c->key_hash = key_r5_hash;
+               c->key_hash_type = UBIFS_KEY_HASH_R5;
+               break;
+
+       case UBIFS_KEY_HASH_TEST:
+               c->key_hash = key_test_hash;
+               c->key_hash_type = UBIFS_KEY_HASH_TEST;
+               break;
+       };
+
+       c->key_fmt = sup->key_fmt;
+
+       switch (c->key_fmt) {
+       case UBIFS_SIMPLE_KEY_FMT:
+               c->key_len = UBIFS_SK_LEN;
+               break;
+       default:
+               ubifs_err("unsupported key format");
+               err = -EINVAL;
+               goto out;
+       }
+
+       c->leb_cnt       = le32_to_cpu(sup->leb_cnt);
+       c->max_leb_cnt   = le32_to_cpu(sup->max_leb_cnt);
+       c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes);
+       c->log_lebs      = le32_to_cpu(sup->log_lebs);
+       c->lpt_lebs      = le32_to_cpu(sup->lpt_lebs);
+       c->orph_lebs     = le32_to_cpu(sup->orph_lebs);
+       c->jhead_cnt     = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
+       c->fanout        = le32_to_cpu(sup->fanout);
+       c->lsave_cnt     = le32_to_cpu(sup->lsave_cnt);
+       c->default_compr = le16_to_cpu(sup->default_compr);
+       c->rp_size       = le64_to_cpu(sup->rp_size);
+       c->rp_uid        = le32_to_cpu(sup->rp_uid);
+       c->rp_gid        = le32_to_cpu(sup->rp_gid);
+       sup_flags        = le32_to_cpu(sup->flags);
+
+       c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
+
+       memcpy(&c->uuid, &sup->uuid, 16);
+
+       c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
+
+       /* Automatically increase file system size to the maximum size */
+       c->old_leb_cnt = c->leb_cnt;
+       if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
+               c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
+               if (c->vfs_sb->s_flags & MS_RDONLY)
+                       dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
+                               c->old_leb_cnt, c->leb_cnt);
+               else {
+                       dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs",
+                               c->old_leb_cnt, c->leb_cnt);
+                       sup->leb_cnt = cpu_to_le32(c->leb_cnt);
+                       err = ubifs_write_sb_node(c, sup);
+                       if (err)
+                               goto out;
+                       c->old_leb_cnt = c->leb_cnt;
+               }
+       }
+
+       c->log_bytes = (long long)c->log_lebs * c->leb_size;
+       c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1;
+       c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs;
+       c->lpt_last = c->lpt_first + c->lpt_lebs - 1;
+       c->orph_first = c->lpt_last + 1;
+       c->orph_last = c->orph_first + c->orph_lebs - 1;
+       c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
+       c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
+       c->main_first = c->leb_cnt - c->main_lebs;
+       c->report_rp_size = ubifs_reported_space(c, c->rp_size);
+
+       err = validate_sb(c, sup);
+out:
+       kfree(sup);
+       return err;
+}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c

new file mode 100644 (file)

index 0000000..acf5c5f
--- /dev/null
+++ b/fs/ubifs/scan.c
@@ -0,0 +1,362 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements the scan which is a general-purpose function for
+ * determining what nodes are in an eraseblock. The scan is used to replay the
+ * journal, to do garbage collection. for the TNC in-the-gaps method, and by
+ * debugging functions.
+ */
+
+#include "ubifs.h"
+
+/**
+ * scan_padding_bytes - scan for padding bytes.
+ * @buf: buffer to scan
+ * @len: length of buffer
+ *
+ * This function returns the number of padding bytes on success and
+ * %SCANNED_GARBAGE on failure.
+ */
+static int scan_padding_bytes(void *buf, int len)
+{
+       int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len);
+       uint8_t *p = buf;
+
+       dbg_scan("not a node");
+
+       while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE)
+               pad_len += 1;
+
+       if (!pad_len || (pad_len & 7))
+               return SCANNED_GARBAGE;
+
+       dbg_scan("%d padding bytes", pad_len);
+
+       return pad_len;
+}
+
+/**
+ * ubifs_scan_a_node - scan for a node or padding.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to scan
+ * @len: length of buffer
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ * @quiet: print no messages
+ *
+ * This function returns a scanning code to indicate what was scanned.
+ */
+int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
+                     int offs, int quiet)
+{
+       struct ubifs_ch *ch = buf;
+       uint32_t magic;
+
+       magic = le32_to_cpu(ch->magic);
+
+       if (magic == 0xFFFFFFFF) {
+               dbg_scan("hit empty space");
+               return SCANNED_EMPTY_SPACE;
+       }
+
+       if (magic != UBIFS_NODE_MAGIC)
+               return scan_padding_bytes(buf, len);
+
+       if (len < UBIFS_CH_SZ)
+               return SCANNED_GARBAGE;
+
+       dbg_scan("scanning %s", dbg_ntype(ch->node_type));
+
+       if (ubifs_check_node(c, buf, lnum, offs, quiet))
+               return SCANNED_A_CORRUPT_NODE;
+
+       if (ch->node_type == UBIFS_PAD_NODE) {
+               struct ubifs_pad_node *pad = buf;
+               int pad_len = le32_to_cpu(pad->pad_len);
+               int node_len = le32_to_cpu(ch->len);
+
+               /* Validate the padding node */
+               if (pad_len < 0 ||
+                   offs + node_len + pad_len > c->leb_size) {
+                       if (!quiet) {
+                               ubifs_err("bad pad node at LEB %d:%d",
+                                         lnum, offs);
+                               dbg_dump_node(c, pad);
+                       }
+                       return SCANNED_A_BAD_PAD_NODE;
+               }
+
+               /* Make the node pads to 8-byte boundary */
+               if ((node_len + pad_len) & 7) {
+                       if (!quiet) {
+                               dbg_err("bad padding length %d - %d",
+                                       offs, offs + node_len + pad_len);
+                       }
+                       return SCANNED_A_BAD_PAD_NODE;
+               }
+
+               dbg_scan("%d bytes padded, offset now %d",
+                        pad_len, ALIGN(offs + node_len + pad_len, 8));
+
+               return node_len + pad_len;
+       }
+
+       return SCANNED_A_NODE;
+}
+
+/**
+ * ubifs_start_scan - create LEB scanning information at start of scan.
+ * @c: UBIFS file-system description object
+ * @lnum: logical eraseblock number
+ * @offs: offset to start at (usually zero)
+ * @sbuf: scan buffer (must be c->leb_size)
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
+                                       int offs, void *sbuf)
+{
+       struct ubifs_scan_leb *sleb;
+       int err;
+
+       dbg_scan("scan LEB %d:%d", lnum, offs);
+
+       sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS);
+       if (!sleb)
+               return ERR_PTR(-ENOMEM);
+
+       sleb->lnum = lnum;
+       INIT_LIST_HEAD(&sleb->nodes);
+       sleb->buf = sbuf;
+
+       err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
+       if (err && err != -EBADMSG) {
+               ubifs_err("cannot read %d bytes from LEB %d:%d,"
+                         " error %d", c->leb_size - offs, lnum, offs, err);
+               kfree(sleb);
+               return ERR_PTR(err);
+       }
+
+       if (err == -EBADMSG)
+               sleb->ecc = 1;
+
+       return sleb;
+}
+
+/**
+ * ubifs_end_scan - update LEB scanning information at end of scan.
+ * @c: UBIFS file-system description object
+ * @sleb: scanning information
+ * @lnum: logical eraseblock number
+ * @offs: offset to start at (usually zero)
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                   int lnum, int offs)
+{
+       lnum = lnum;
+       dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
+       ubifs_assert(offs % c->min_io_size == 0);
+
+       sleb->endpt = ALIGN(offs, c->min_io_size);
+}
+
+/**
+ * ubifs_add_snod - add a scanned node to LEB scanning information.
+ * @c: UBIFS file-system description object
+ * @sleb: scanning information
+ * @buf: buffer containing node
+ * @offs: offset of node on flash
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                  void *buf, int offs)
+{
+       struct ubifs_ch *ch = buf;
+       struct ubifs_ino_node *ino = buf;
+       struct ubifs_scan_node *snod;
+
+       snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
+       if (!snod)
+               return -ENOMEM;
+
+       snod->sqnum = le64_to_cpu(ch->sqnum);
+       snod->type = ch->node_type;
+       snod->offs = offs;
+       snod->len = le32_to_cpu(ch->len);
+       snod->node = buf;
+
+       switch (ch->node_type) {
+       case UBIFS_INO_NODE:
+       case UBIFS_DENT_NODE:
+       case UBIFS_XENT_NODE:
+       case UBIFS_DATA_NODE:
+       case UBIFS_TRUN_NODE:
+               /*
+                * The key is in the same place in all keyed
+                * nodes.
+                */
+               key_read(c, &ino->key, &snod->key);
+               break;
+       }
+       list_add_tail(&snod->list, &sleb->nodes);
+       sleb->nodes_cnt += 1;
+       return 0;
+}
+
+/**
+ * ubifs_scanned_corruption - print information after UBIFS scanned corruption.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of corruption
+ * @offs: offset of corruption
+ * @buf: buffer containing corruption
+ */
+void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
+                             void *buf)
+{
+       int len;
+
+       ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
+       if (dbg_failure_mode)
+               return;
+       len = c->leb_size - offs;
+       if (len > 4096)
+               len = 4096;
+       dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
+       print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
+}
+
+/**
+ * ubifs_scan - scan a logical eraseblock.
+ * @c: UBIFS file-system description object
+ * @lnum: logical eraseblock number
+ * @offs: offset to start at (usually zero)
+ * @sbuf: scan buffer (must be c->leb_size)
+ *
+ * This function scans LEB number @lnum and returns complete information about
+ * its contents. Returns an error code in case of failure.
+ */
+struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
+                                 int offs, void *sbuf)
+{
+       void *buf = sbuf + offs;
+       int err, len = c->leb_size - offs;
+       struct ubifs_scan_leb *sleb;
+
+       sleb = ubifs_start_scan(c, lnum, offs, sbuf);
+       if (IS_ERR(sleb))
+               return sleb;
+
+       while (len >= 8) {
+               struct ubifs_ch *ch = buf;
+               int node_len, ret;
+
+               dbg_scan("look at LEB %d:%d (%d bytes left)",
+                        lnum, offs, len);
+
+               cond_resched();
+
+               ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
+
+               if (ret > 0) {
+                       /* Padding bytes or a valid padding node */
+                       offs += ret;
+                       buf += ret;
+                       len -= ret;
+                       continue;
+               }
+
+               if (ret == SCANNED_EMPTY_SPACE)
+                       /* Empty space is checked later */
+                       break;
+
+               switch (ret) {
+               case SCANNED_GARBAGE:
+                       dbg_err("garbage");
+                       goto corrupted;
+               case SCANNED_A_NODE:
+                       break;
+               case SCANNED_A_CORRUPT_NODE:
+               case SCANNED_A_BAD_PAD_NODE:
+                       dbg_err("bad node");
+                       goto corrupted;
+               default:
+                       dbg_err("unknown");
+                       goto corrupted;
+               }
+
+               err = ubifs_add_snod(c, sleb, buf, offs);
+               if (err)
+                       goto error;
+
+               node_len = ALIGN(le32_to_cpu(ch->len), 8);
+               offs += node_len;
+               buf += node_len;
+               len -= node_len;
+       }
+
+       if (offs % c->min_io_size)
+               goto corrupted;
+
+       ubifs_end_scan(c, sleb, lnum, offs);
+
+       for (; len > 4; offs += 4, buf = buf + 4, len -= 4)
+               if (*(uint32_t *)buf != 0xffffffff)
+                       break;
+       for (; len; offs++, buf++, len--)
+               if (*(uint8_t *)buf != 0xff) {
+                       ubifs_err("corrupt empty space at LEB %d:%d",
+                                 lnum, offs);
+                       goto corrupted;
+               }
+
+       return sleb;
+
+corrupted:
+       ubifs_scanned_corruption(c, lnum, offs, buf);
+       err = -EUCLEAN;
+error:
+       ubifs_err("LEB %d scanning failed", lnum);
+       ubifs_scan_destroy(sleb);
+       return ERR_PTR(err);
+}
+
+/**
+ * ubifs_scan_destroy - destroy LEB scanning information.
+ * @sleb: scanning information to free
+ */
+void ubifs_scan_destroy(struct ubifs_scan_leb *sleb)
+{
+       struct ubifs_scan_node *node;
+       struct list_head *head;
+
+       head = &sleb->nodes;
+       while (!list_empty(head)) {
+               node = list_entry(head->next, struct ubifs_scan_node, list);
+               list_del(&node->list);
+               kfree(node);
+       }
+       kfree(sleb);
+}
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c

new file mode 100644 (file)

index 0000000..f248533
--- /dev/null
+++ b/fs/ubifs/shrinker.c
@@ -0,0 +1,322 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements UBIFS shrinker which evicts clean znodes from the TNC
+ * tree when Linux VM needs more RAM.
+ *
+ * We do not implement any LRU lists to find oldest znodes to free because it
+ * would add additional overhead to the file system fast paths. So the shrinker
+ * just walks the TNC tree when searching for znodes to free.
+ *
+ * If the root of a TNC sub-tree is clean and old enough, then the children are
+ * also clean and old enough. So the shrinker walks the TNC in level order and
+ * dumps entire sub-trees.
+ *
+ * The age of znodes is just the time-stamp when they were last looked at.
+ * The current shrinker first tries to evict old znodes, then young ones.
+ *
+ * Since the shrinker is global, it has to protect against races with FS
+ * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
+ */
+
+#include "ubifs.h"
+
+/* List of all UBIFS file-system instances */
+LIST_HEAD(ubifs_infos);
+
+/*
+ * We number each shrinker run and record the number on the ubifs_info structure
+ * so that we can easily work out which ubifs_info structures have already been
+ * done by the current run.
+ */
+static unsigned int shrinker_run_no;
+
+/* Protects 'ubifs_infos' list */
+DEFINE_SPINLOCK(ubifs_infos_lock);
+
+/* Global clean znode counter (for all mounted UBIFS instances) */
+atomic_long_t ubifs_clean_zn_cnt;
+
+/**
+ * shrink_tnc - shrink TNC tree.
+ * @c: UBIFS file-system description object
+ * @nr: number of znodes to free
+ * @age: the age of znodes to free
+ * @contention: if any contention, this is set to %1
+ *
+ * This function traverses TNC tree and frees clean znodes. It does not free
+ * clean znodes which younger then @age. Returns number of freed znodes.
+ */
+static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
+{
+       int total_freed = 0;
+       struct ubifs_znode *znode, *zprev;
+       int time = get_seconds();
+
+       ubifs_assert(mutex_is_locked(&c->umount_mutex));
+       ubifs_assert(mutex_is_locked(&c->tnc_mutex));
+
+       if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
+               return 0;
+
+       /*
+        * Traverse the TNC tree in levelorder manner, so that it is possible
+        * to destroy large sub-trees. Indeed, if a znode is old, then all its
+        * children are older or of the same age.
+        *
+        * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
+        * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
+        * changed only when the 'c->tnc_mutex' is held.
+        */
+       zprev = NULL;
+       znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
+       while (znode && total_freed < nr &&
+              atomic_long_read(&c->clean_zn_cnt) > 0) {
+               int freed;
+
+               /*
+                * If the znode is clean, but it is in the 'c->cnext' list, this
+                * means that this znode has just been written to flash as a
+                * part of commit and was marked clean. They will be removed
+                * from the list at end commit. We cannot change the list,
+                * because it is not protected by any mutex (design decision to
+                * make commit really independent and parallel to main I/O). So
+                * we just skip these znodes.
+                *
+                * Note, the 'clean_zn_cnt' counters are not updated until
+                * after the commit, so the UBIFS shrinker does not report
+                * the znodes which are in the 'c->cnext' list as freeable.
+                *
+                * Also note, if the root of a sub-tree is not in 'c->cnext',
+                * then the whole sub-tree is not in 'c->cnext' as well, so it
+                * is safe to dump whole sub-tree.
+                */
+
+               if (znode->cnext) {
+                       /*
+                        * Very soon these znodes will be removed from the list
+                        * and become freeable.
+                        */
+                       *contention = 1;
+               } else if (!ubifs_zn_dirty(znode) &&
+                          abs(time - znode->time) >= age) {
+                       if (znode->parent)
+                               znode->parent->zbranch[znode->iip].znode = NULL;
+                       else
+                               c->zroot.znode = NULL;
+
+                       freed = ubifs_destroy_tnc_subtree(znode);
+                       atomic_long_sub(freed, &ubifs_clean_zn_cnt);
+                       atomic_long_sub(freed, &c->clean_zn_cnt);
+                       ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
+                       total_freed += freed;
+                       znode = zprev;
+               }
+
+               if (unlikely(!c->zroot.znode))
+                       break;
+
+               zprev = znode;
+               znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
+               cond_resched();
+       }
+
+       return total_freed;
+}
+
+/**
+ * shrink_tnc_trees - shrink UBIFS TNC trees.
+ * @nr: number of znodes to free
+ * @age: the age of znodes to free
+ * @contention: if any contention, this is set to %1
+ *
+ * This function walks the list of mounted UBIFS file-systems and frees clean
+ * znodes which are older then @age, until at least @nr znodes are freed.
+ * Returns the number of freed znodes.
+ */
+static int shrink_tnc_trees(int nr, int age, int *contention)
+{
+       struct ubifs_info *c;
+       struct list_head *p;
+       unsigned int run_no;
+       int freed = 0;
+
+       spin_lock(&ubifs_infos_lock);
+       do {
+               run_no = ++shrinker_run_no;
+       } while (run_no == 0);
+       /* Iterate over all mounted UBIFS file-systems and try to shrink them */
+       p = ubifs_infos.next;
+       while (p != &ubifs_infos) {
+               c = list_entry(p, struct ubifs_info, infos_list);
+               /*
+                * We move the ones we do to the end of the list, so we stop
+                * when we see one we have already done.
+                */
+               if (c->shrinker_run_no == run_no)
+                       break;
+               if (!mutex_trylock(&c->umount_mutex)) {
+                       /* Some un-mount is in progress, try next FS */
+                       *contention = 1;
+                       p = p->next;
+                       continue;
+               }
+               /*
+                * We're holding 'c->umount_mutex', so the file-system won't go
+                * away.
+                */
+               if (!mutex_trylock(&c->tnc_mutex)) {
+                       mutex_unlock(&c->umount_mutex);
+                       *contention = 1;
+                       p = p->next;
+                       continue;
+               }
+               spin_unlock(&ubifs_infos_lock);
+               /*
+                * OK, now we have TNC locked, the file-system cannot go away -
+                * it is safe to reap the cache.
+                */
+               c->shrinker_run_no = run_no;
+               freed += shrink_tnc(c, nr, age, contention);
+               mutex_unlock(&c->tnc_mutex);
+               spin_lock(&ubifs_infos_lock);
+               /* Get the next list element before we move this one */
+               p = p->next;
+               /*
+                * Move this one to the end of the list to provide some
+                * fairness.
+                */
+               list_del(&c->infos_list);
+               list_add_tail(&c->infos_list, &ubifs_infos);
+               mutex_unlock(&c->umount_mutex);
+               if (freed >= nr)
+                       break;
+       }
+       spin_unlock(&ubifs_infos_lock);
+       return freed;
+}
+
+/**
+ * kick_a_thread - kick a background thread to start commit.
+ *
+ * This function kicks a background thread to start background commit. Returns
+ * %-1 if a thread was kicked or there is another reason to assume the memory
+ * will soon be freed or become freeable. If there are no dirty znodes, returns
+ * %0.
+ */
+static int kick_a_thread(void)
+{
+       int i;
+       struct ubifs_info *c;
+
+       /*
+        * Iterate over all mounted UBIFS file-systems and find out if there is
+        * already an ongoing commit operation there. If no, then iterate for
+        * the second time and initiate background commit.
+        */
+       spin_lock(&ubifs_infos_lock);
+       for (i = 0; i < 2; i++) {
+               list_for_each_entry(c, &ubifs_infos, infos_list) {
+                       long dirty_zn_cnt;
+
+                       if (!mutex_trylock(&c->umount_mutex)) {
+                               /*
+                                * Some un-mount is in progress, it will
+                                * certainly free memory, so just return.
+                                */
+                               spin_unlock(&ubifs_infos_lock);
+                               return -1;
+                       }
+
+                       dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
+
+                       if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
+                           c->ro_media) {
+                               mutex_unlock(&c->umount_mutex);
+                               continue;
+                       }
+
+                       if (c->cmt_state != COMMIT_RESTING) {
+                               spin_unlock(&ubifs_infos_lock);
+                               mutex_unlock(&c->umount_mutex);
+                               return -1;
+                       }
+
+                       if (i == 1) {
+                               list_del(&c->infos_list);
+                               list_add_tail(&c->infos_list, &ubifs_infos);
+                               spin_unlock(&ubifs_infos_lock);
+
+                               ubifs_request_bg_commit(c);
+                               mutex_unlock(&c->umount_mutex);
+                               return -1;
+                       }
+                       mutex_unlock(&c->umount_mutex);
+               }
+       }
+       spin_unlock(&ubifs_infos_lock);
+
+       return 0;
+}
+
+int ubifs_shrinker(int nr, gfp_t gfp_mask)
+{
+       int freed, contention = 0;
+       long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
+
+       if (nr == 0)
+               return clean_zn_cnt;
+
+       if (!clean_zn_cnt) {
+               /*
+                * No clean znodes, nothing to reap. All we can do in this case
+                * is to kick background threads to start commit, which will
+                * probably make clean znodes which, in turn, will be freeable.
+                * And we return -1 which means will make VM call us again
+                * later.
+                */
+               dbg_tnc("no clean znodes, kick a thread");
+               return kick_a_thread();
+       }
+
+       freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
+       if (freed >= nr)
+               goto out;
+
+       dbg_tnc("not enough old znodes, try to free young ones");
+       freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
+       if (freed >= nr)
+               goto out;
+
+       dbg_tnc("not enough young znodes, free all");
+       freed += shrink_tnc_trees(nr - freed, 0, &contention);
+
+       if (!freed && contention) {
+               dbg_tnc("freed nothing, but contention");
+               return -1;
+       }
+
+out:
+       dbg_tnc("%d znodes were freed, requested %d", freed, nr);
+       return freed;
+}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c

new file mode 100644 (file)

index 0000000..00eb9c6
--- /dev/null
+++ b/fs/ubifs/super.c
@@ -0,0 +1,1951 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements UBIFS initialization and VFS superblock operations. Some
+ * initialization stuff which is rather large and complex is placed at
+ * corresponding subsystems, but most of it is here.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/random.h>
+#include <linux/kthread.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include "ubifs.h"
+
+/* Slab cache for UBIFS inodes */
+struct kmem_cache *ubifs_inode_slab;
+
+/* UBIFS TNC shrinker description */
+static struct shrinker ubifs_shrinker_info = {
+       .shrink = ubifs_shrinker,
+       .seeks = DEFAULT_SEEKS,
+};
+
+/**
+ * validate_inode - validate inode.
+ * @c: UBIFS file-system description object
+ * @inode: the inode to validate
+ *
+ * This is a helper function for 'ubifs_iget()' which validates various fields
+ * of a newly built inode to make sure they contain sane values and prevent
+ * possible vulnerabilities. Returns zero if the inode is all right and
+ * a non-zero error code if not.
+ */
+static int validate_inode(struct ubifs_info *c, const struct inode *inode)
+{
+       int err;
+       const struct ubifs_inode *ui = ubifs_inode(inode);
+
+       if (inode->i_size > c->max_inode_sz) {
+               ubifs_err("inode is too large (%lld)",
+                         (long long)inode->i_size);
+               return 1;
+       }
+
+       if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
+               ubifs_err("unknown compression type %d", ui->compr_type);
+               return 2;
+       }
+
+       if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX)
+               return 3;
+
+       if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
+               return 4;
+
+       if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
+               return 5;
+
+       if (!ubifs_compr_present(ui->compr_type)) {
+               ubifs_warn("inode %lu uses '%s' compression, but it was not "
+                          "compiled in", inode->i_ino,
+                          ubifs_compr_name(ui->compr_type));
+       }
+
+       err = dbg_check_dir_size(c, inode);
+       return err;
+}
+
+struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
+{
+       int err;
+       union ubifs_key key;
+       struct ubifs_ino_node *ino;
+       struct ubifs_info *c = sb->s_fs_info;
+       struct inode *inode;
+       struct ubifs_inode *ui;
+
+       dbg_gen("inode %lu", inum);
+
+       inode = iget_locked(sb, inum);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+       if (!(inode->i_state & I_NEW))
+               return inode;
+       ui = ubifs_inode(inode);
+
+       ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
+       if (!ino) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       ino_key_init(c, &key, inode->i_ino);
+
+       err = ubifs_tnc_lookup(c, &key, ino);
+       if (err)
+               goto out_ino;
+
+       inode->i_flags |= (S_NOCMTIME | S_NOATIME);
+       inode->i_nlink = le32_to_cpu(ino->nlink);
+       inode->i_uid   = le32_to_cpu(ino->uid);
+       inode->i_gid   = le32_to_cpu(ino->gid);
+       inode->i_atime.tv_sec  = (int64_t)le64_to_cpu(ino->atime_sec);
+       inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
+       inode->i_mtime.tv_sec  = (int64_t)le64_to_cpu(ino->mtime_sec);
+       inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
+       inode->i_ctime.tv_sec  = (int64_t)le64_to_cpu(ino->ctime_sec);
+       inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec);
+       inode->i_mode = le32_to_cpu(ino->mode);
+       inode->i_size = le64_to_cpu(ino->size);
+
+       ui->data_len    = le32_to_cpu(ino->data_len);
+       ui->flags       = le32_to_cpu(ino->flags);
+       ui->compr_type  = le16_to_cpu(ino->compr_type);
+       ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum);
+       ui->xattr_cnt   = le32_to_cpu(ino->xattr_cnt);
+       ui->xattr_size  = le32_to_cpu(ino->xattr_size);
+       ui->xattr_names = le32_to_cpu(ino->xattr_names);
+       ui->synced_i_size = ui->ui_size = inode->i_size;
+
+       ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0;
+
+       err = validate_inode(c, inode);
+       if (err)
+               goto out_invalid;
+
+       /* Disable readahead */
+       inode->i_mapping->backing_dev_info = &c->bdi;
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+               inode->i_mapping->a_ops = &ubifs_file_address_operations;
+               inode->i_op = &ubifs_file_inode_operations;
+               inode->i_fop = &ubifs_file_operations;
+               if (ui->xattr) {
+                       ui->data = kmalloc(ui->data_len + 1, GFP_NOFS);
+                       if (!ui->data) {
+                               err = -ENOMEM;
+                               goto out_ino;
+                       }
+                       memcpy(ui->data, ino->data, ui->data_len);
+                       ((char *)ui->data)[ui->data_len] = '\0';
+               } else if (ui->data_len != 0) {
+                       err = 10;
+                       goto out_invalid;
+               }
+               break;
+       case S_IFDIR:
+               inode->i_op  = &ubifs_dir_inode_operations;
+               inode->i_fop = &ubifs_dir_operations;
+               if (ui->data_len != 0) {
+                       err = 11;
+                       goto out_invalid;
+               }
+               break;
+       case S_IFLNK:
+               inode->i_op = &ubifs_symlink_inode_operations;
+               if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) {
+                       err = 12;
+                       goto out_invalid;
+               }
+               ui->data = kmalloc(ui->data_len + 1, GFP_NOFS);
+               if (!ui->data) {
+                       err = -ENOMEM;
+                       goto out_ino;
+               }
+               memcpy(ui->data, ino->data, ui->data_len);
+               ((char *)ui->data)[ui->data_len] = '\0';
+               break;
+       case S_IFBLK:
+       case S_IFCHR:
+       {
+               dev_t rdev;
+               union ubifs_dev_desc *dev;
+
+               ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
+               if (!ui->data) {
+                       err = -ENOMEM;
+                       goto out_ino;
+               }
+
+               dev = (union ubifs_dev_desc *)ino->data;
+               if (ui->data_len == sizeof(dev->new))
+                       rdev = new_decode_dev(le32_to_cpu(dev->new));
+               else if (ui->data_len == sizeof(dev->huge))
+                       rdev = huge_decode_dev(le64_to_cpu(dev->huge));
+               else {
+                       err = 13;
+                       goto out_invalid;
+               }
+               memcpy(ui->data, ino->data, ui->data_len);
+               inode->i_op = &ubifs_file_inode_operations;
+               init_special_inode(inode, inode->i_mode, rdev);
+               break;
+       }
+       case S_IFSOCK:
+       case S_IFIFO:
+               inode->i_op = &ubifs_file_inode_operations;
+               init_special_inode(inode, inode->i_mode, 0);
+               if (ui->data_len != 0) {
+                       err = 14;
+                       goto out_invalid;
+               }
+               break;
+       default:
+               err = 15;
+               goto out_invalid;
+       }
+
+       kfree(ino);
+       ubifs_set_inode_flags(inode);
+       unlock_new_inode(inode);
+       return inode;
+
+out_invalid:
+       ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err);
+       dbg_dump_node(c, ino);
+       dbg_dump_inode(c, inode);
+       err = -EINVAL;
+out_ino:
+       kfree(ino);
+out:
+       ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err);
+       iget_failed(inode);
+       return ERR_PTR(err);
+}
+
+static struct inode *ubifs_alloc_inode(struct super_block *sb)
+{
+       struct ubifs_inode *ui;
+
+       ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS);
+       if (!ui)
+               return NULL;
+
+       memset((void *)ui + sizeof(struct inode), 0,
+              sizeof(struct ubifs_inode) - sizeof(struct inode));
+       mutex_init(&ui->ui_mutex);
+       spin_lock_init(&ui->ui_lock);
+       return &ui->vfs_inode;
+};
+
+static void ubifs_destroy_inode(struct inode *inode)
+{
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       kfree(ui->data);
+       kmem_cache_free(ubifs_inode_slab, inode);
+}
+
+/*
+ * Note, Linux write-back code calls this without 'i_mutex'.
+ */
+static int ubifs_write_inode(struct inode *inode, int wait)
+{
+       int err;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       ubifs_assert(!ui->xattr);
+       if (is_bad_inode(inode))
+               return 0;
+
+       mutex_lock(&ui->ui_mutex);
+       /*
+        * Due to races between write-back forced by budgeting
+        * (see 'sync_some_inodes()') and pdflush write-back, the inode may
+        * have already been synchronized, do not do this again. This might
+        * also happen if it was synchronized in an VFS operation, e.g.
+        * 'ubifs_link()'.
+        */
+       if (!ui->dirty) {
+               mutex_unlock(&ui->ui_mutex);
+               return 0;
+       }
+
+       dbg_gen("inode %lu", inode->i_ino);
+       err = ubifs_jnl_write_inode(c, inode, 0);
+       if (err)
+               ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+
+       ui->dirty = 0;
+       mutex_unlock(&ui->ui_mutex);
+       ubifs_release_dirty_inode_budget(c, ui);
+       return err;
+}
+
+static void ubifs_delete_inode(struct inode *inode)
+{
+       int err;
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+
+       if (ubifs_inode(inode)->xattr)
+               /*
+                * Extended attribute inode deletions are fully handled in
+                * 'ubifs_removexattr()'. These inodes are special and have
+                * limited usage, so there is nothing to do here.
+                */
+               goto out;
+
+       dbg_gen("inode %lu", inode->i_ino);
+       ubifs_assert(!atomic_read(&inode->i_count));
+       ubifs_assert(inode->i_nlink == 0);
+
+       truncate_inode_pages(&inode->i_data, 0);
+       if (is_bad_inode(inode))
+               goto out;
+
+       ubifs_inode(inode)->ui_size = inode->i_size = 0;
+       err = ubifs_jnl_write_inode(c, inode, 1);
+       if (err)
+               /*
+                * Worst case we have a lost orphan inode wasting space, so a
+                * simple error message is ok here.
+                */
+               ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+out:
+       clear_inode(inode);
+}
+
+static void ubifs_dirty_inode(struct inode *inode)
+{
+       struct ubifs_inode *ui = ubifs_inode(inode);
+
+       ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+       if (!ui->dirty) {
+               ui->dirty = 1;
+               dbg_gen("inode %lu",  inode->i_ino);
+       }
+}
+
+static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+       struct ubifs_info *c = dentry->d_sb->s_fs_info;
+       unsigned long long free;
+
+       free = ubifs_budg_get_free_space(c);
+       dbg_gen("free space %lld bytes (%lld blocks)",
+               free, free >> UBIFS_BLOCK_SHIFT);
+
+       buf->f_type = UBIFS_SUPER_MAGIC;
+       buf->f_bsize = UBIFS_BLOCK_SIZE;
+       buf->f_blocks = c->block_cnt;
+       buf->f_bfree = free >> UBIFS_BLOCK_SHIFT;
+       if (free > c->report_rp_size)
+               buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT;
+       else
+               buf->f_bavail = 0;
+       buf->f_files = 0;
+       buf->f_ffree = 0;
+       buf->f_namelen = UBIFS_MAX_NLEN;
+
+       return 0;
+}
+
+static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
+{
+       struct ubifs_info *c = mnt->mnt_sb->s_fs_info;
+
+       if (c->mount_opts.unmount_mode == 2)
+               seq_printf(s, ",fast_unmount");
+       else if (c->mount_opts.unmount_mode == 1)
+               seq_printf(s, ",norm_unmount");
+
+       return 0;
+}
+
+static int ubifs_sync_fs(struct super_block *sb, int wait)
+{
+       struct ubifs_info *c = sb->s_fs_info;
+       int i, ret = 0, err;
+
+       if (c->jheads)
+               for (i = 0; i < c->jhead_cnt; i++) {
+                       err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+                       if (err && !ret)
+                               ret = err;
+               }
+       /*
+        * We ought to call sync for c->ubi but it does not have one. If it had
+        * it would in turn call mtd->sync, however mtd operations are
+        * synchronous anyway, so we don't lose any sleep here.
+        */
+       return ret;
+}
+
+/**
+ * init_constants_early - initialize UBIFS constants.
+ * @c: UBIFS file-system description object
+ *
+ * This function initialize UBIFS constants which do not need the superblock to
+ * be read. It also checks that the UBI volume satisfies basic UBIFS
+ * requirements. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int init_constants_early(struct ubifs_info *c)
+{
+       if (c->vi.corrupted) {
+               ubifs_warn("UBI volume is corrupted - read-only mode");
+               c->ro_media = 1;
+       }
+
+       if (c->di.ro_mode) {
+               ubifs_msg("read-only UBI device");
+               c->ro_media = 1;
+       }
+
+       if (c->vi.vol_type == UBI_STATIC_VOLUME) {
+               ubifs_msg("static UBI volume - read-only mode");
+               c->ro_media = 1;
+       }
+
+       c->leb_cnt = c->vi.size;
+       c->leb_size = c->vi.usable_leb_size;
+       c->half_leb_size = c->leb_size / 2;
+       c->min_io_size = c->di.min_io_size;
+       c->min_io_shift = fls(c->min_io_size) - 1;
+
+       if (c->leb_size < UBIFS_MIN_LEB_SZ) {
+               ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
+                         c->leb_size, UBIFS_MIN_LEB_SZ);
+               return -EINVAL;
+       }
+
+       if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
+               ubifs_err("too few LEBs (%d), min. is %d",
+                         c->leb_cnt, UBIFS_MIN_LEB_CNT);
+               return -EINVAL;
+       }
+
+       if (!is_power_of_2(c->min_io_size)) {
+               ubifs_err("bad min. I/O size %d", c->min_io_size);
+               return -EINVAL;
+       }
+
+       /*
+        * UBIFS aligns all node to 8-byte boundary, so to make function in
+        * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
+        * less than 8.
+        */
+       if (c->min_io_size < 8) {
+               c->min_io_size = 8;
+               c->min_io_shift = 3;
+       }
+
+       c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
+       c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size);
+
+       /*
+        * Initialize node length ranges which are mostly needed for node
+        * length validation.
+        */
+       c->ranges[UBIFS_PAD_NODE].len  = UBIFS_PAD_NODE_SZ;
+       c->ranges[UBIFS_SB_NODE].len   = UBIFS_SB_NODE_SZ;
+       c->ranges[UBIFS_MST_NODE].len  = UBIFS_MST_NODE_SZ;
+       c->ranges[UBIFS_REF_NODE].len  = UBIFS_REF_NODE_SZ;
+       c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ;
+       c->ranges[UBIFS_CS_NODE].len   = UBIFS_CS_NODE_SZ;
+
+       c->ranges[UBIFS_INO_NODE].min_len  = UBIFS_INO_NODE_SZ;
+       c->ranges[UBIFS_INO_NODE].max_len  = UBIFS_MAX_INO_NODE_SZ;
+       c->ranges[UBIFS_ORPH_NODE].min_len =
+                               UBIFS_ORPH_NODE_SZ + sizeof(__le64);
+       c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size;
+       c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ;
+       c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ;
+       c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ;
+       c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ;
+       c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ;
+       c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ;
+       /*
+        * Minimum indexing node size is amended later when superblock is
+        * read and the key length is known.
+        */
+       c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ;
+       /*
+        * Maximum indexing node size is amended later when superblock is
+        * read and the fanout is known.
+        */
+       c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX;
+
+       /*
+        * Initialize dead and dark LEB space watermarks.
+        *
+        * Dead space is the space which cannot be used. Its watermark is
+        * equivalent to min. I/O unit or minimum node size if it is greater
+        * then min. I/O unit.
+        *
+        * Dark space is the space which might be used, or might not, depending
+        * on which node should be written to the LEB. Its watermark is
+        * equivalent to maximum UBIFS node size.
+        */
+       c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
+       c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
+
+       return 0;
+}
+
+/**
+ * bud_wbuf_callback - bud LEB write-buffer synchronization call-back.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB the write-buffer was synchronized to
+ * @free: how many free bytes left in this LEB
+ * @pad: how many bytes were padded
+ *
+ * This is a callback function which is called by the I/O unit when the
+ * write-buffer is synchronized. We need this to correctly maintain space
+ * accounting in bud logical eraseblocks. This function returns zero in case of
+ * success and a negative error code in case of failure.
+ *
+ * This function actually belongs to the journal, but we keep it here because
+ * we want to keep it static.
+ */
+static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
+{
+       return ubifs_update_one_lp(c, lnum, free, pad, 0, 0);
+}
+
+/*
+ * init_constants_late - initialize UBIFS constants.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function which initializes various UBIFS constants after
+ * the superblock has been read. It also checks various UBIFS parameters and
+ * makes sure they are all right. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+static int init_constants_late(struct ubifs_info *c)
+{
+       int tmp, err;
+       uint64_t tmp64;
+
+       c->main_bytes = (long long)c->main_lebs * c->leb_size;
+       c->max_znode_sz = sizeof(struct ubifs_znode) +
+                               c->fanout * sizeof(struct ubifs_zbranch);
+
+       tmp = ubifs_idx_node_sz(c, 1);
+       c->ranges[UBIFS_IDX_NODE].min_len = tmp;
+       c->min_idx_node_sz = ALIGN(tmp, 8);
+
+       tmp = ubifs_idx_node_sz(c, c->fanout);
+       c->ranges[UBIFS_IDX_NODE].max_len = tmp;
+       c->max_idx_node_sz = ALIGN(tmp, 8);
+
+       /* Make sure LEB size is large enough to fit full commit */
+       tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
+       tmp = ALIGN(tmp, c->min_io_size);
+       if (tmp > c->leb_size) {
+               dbg_err("too small LEB size %d, at least %d needed",
+                       c->leb_size, tmp);
+               return -EINVAL;
+       }
+
+       /*
+        * Make sure that the log is large enough to fit reference nodes for
+        * all buds plus one reserved LEB.
+        */
+       tmp64 = c->max_bud_bytes;
+       tmp = do_div(tmp64, c->leb_size);
+       c->max_bud_cnt = tmp64 + !!tmp;
+       tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
+       tmp /= c->leb_size;
+       tmp += 1;
+       if (c->log_lebs < tmp) {
+               dbg_err("too small log %d LEBs, required min. %d LEBs",
+                       c->log_lebs, tmp);
+               return -EINVAL;
+       }
+
+       /*
+        * When budgeting we assume worst-case scenarios when the pages are not
+        * be compressed and direntries are of the maximum size.
+        *
+        * Note, data, which may be stored in inodes is budgeted separately, so
+        * it is not included into 'c->inode_budget'.
+        */
+       c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
+       c->inode_budget = UBIFS_INO_NODE_SZ;
+       c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
+
+       /*
+        * When the amount of flash space used by buds becomes
+        * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit.
+        * The writers are unblocked when the commit is finished. To avoid
+        * writers to be blocked UBIFS initiates background commit in advance,
+        * when number of bud bytes becomes above the limit defined below.
+        */
+       c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4;
+
+       /*
+        * Ensure minimum journal size. All the bytes in the journal heads are
+        * considered to be used, when calculating the current journal usage.
+        * Consequently, if the journal is too small, UBIFS will treat it as
+        * always full.
+        */
+       tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1;
+       if (c->bg_bud_bytes < tmp64)
+               c->bg_bud_bytes = tmp64;
+       if (c->max_bud_bytes < tmp64 + c->leb_size)
+               c->max_bud_bytes = tmp64 + c->leb_size;
+
+       err = ubifs_calc_lpt_geom(c);
+       if (err)
+               return err;
+
+       c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+       /*
+        * Calculate total amount of FS blocks. This number is not used
+        * internally because it does not make much sense for UBIFS, but it is
+        * necessary to report something for the 'statfs()' call.
+        *
+        * Subtract the LEB reserved for GC and the LEB which is reserved for
+        * deletions.
+        *
+        * Review 'ubifs_calc_available()' if changing this calculation.
+        */
+       tmp64 = c->main_lebs - 2;
+       tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
+       tmp64 = ubifs_reported_space(c, tmp64);
+       c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
+
+       return 0;
+}
+
+/**
+ * take_gc_lnum - reserve GC LEB.
+ * @c: UBIFS file-system description object
+ *
+ * This function ensures that the LEB reserved for garbage collection is
+ * unmapped and is marked as "taken" in lprops. We also have to set free space
+ * to LEB size and dirty space to zero, because lprops may contain out-of-date
+ * information if the file-system was un-mounted before it has been committed.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int take_gc_lnum(struct ubifs_info *c)
+{
+       int err;
+
+       if (c->gc_lnum == -1) {
+               ubifs_err("no LEB for GC");
+               return -EINVAL;
+       }
+
+       err = ubifs_leb_unmap(c, c->gc_lnum);
+       if (err)
+               return err;
+
+       /* And we have to tell lprops that this LEB is taken */
+       err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0,
+                                 LPROPS_TAKEN, 0, 0);
+       return err;
+}
+
+/**
+ * alloc_wbufs - allocate write-buffers.
+ * @c: UBIFS file-system description object
+ *
+ * This helper function allocates and initializes UBIFS write-buffers. Returns
+ * zero in case of success and %-ENOMEM in case of failure.
+ */
+static int alloc_wbufs(struct ubifs_info *c)
+{
+       int i, err;
+
+       c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead),
+                          GFP_KERNEL);
+       if (!c->jheads)
+               return -ENOMEM;
+
+       /* Initialize journal heads */
+       for (i = 0; i < c->jhead_cnt; i++) {
+               INIT_LIST_HEAD(&c->jheads[i].buds_list);
+               err = ubifs_wbuf_init(c, &c->jheads[i].wbuf);
+               if (err)
+                       return err;
+
+               c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
+               c->jheads[i].wbuf.jhead = i;
+       }
+
+       c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
+       /*
+        * Garbage Collector head likely contains long-term data and
+        * does not need to be synchronized by timer.
+        */
+       c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
+       c->jheads[GCHD].wbuf.timeout = 0;
+
+       return 0;
+}
+
+/**
+ * free_wbufs - free write-buffers.
+ * @c: UBIFS file-system description object
+ */
+static void free_wbufs(struct ubifs_info *c)
+{
+       int i;
+
+       if (c->jheads) {
+               for (i = 0; i < c->jhead_cnt; i++) {
+                       kfree(c->jheads[i].wbuf.buf);
+                       kfree(c->jheads[i].wbuf.inodes);
+               }
+               kfree(c->jheads);
+               c->jheads = NULL;
+       }
+}
+
+/**
+ * free_orphans - free orphans.
+ * @c: UBIFS file-system description object
+ */
+static void free_orphans(struct ubifs_info *c)
+{
+       struct ubifs_orphan *orph;
+
+       while (c->orph_dnext) {
+               orph = c->orph_dnext;
+               c->orph_dnext = orph->dnext;
+               list_del(&orph->list);
+               kfree(orph);
+       }
+
+       while (!list_empty(&c->orph_list)) {
+               orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
+               list_del(&orph->list);
+               kfree(orph);
+               dbg_err("orphan list not empty at unmount");
+       }
+
+       vfree(c->orph_buf);
+       c->orph_buf = NULL;
+}
+
+/**
+ * free_buds - free per-bud objects.
+ * @c: UBIFS file-system description object
+ */
+static void free_buds(struct ubifs_info *c)
+{
+       struct rb_node *this = c->buds.rb_node;
+       struct ubifs_bud *bud;
+
+       while (this) {
+               if (this->rb_left)
+                       this = this->rb_left;
+               else if (this->rb_right)
+                       this = this->rb_right;
+               else {
+                       bud = rb_entry(this, struct ubifs_bud, rb);
+                       this = rb_parent(this);
+                       if (this) {
+                               if (this->rb_left == &bud->rb)
+                                       this->rb_left = NULL;
+                               else
+                                       this->rb_right = NULL;
+                       }
+                       kfree(bud);
+               }
+       }
+}
+
+/**
+ * check_volume_empty - check if the UBI volume is empty.
+ * @c: UBIFS file-system description object
+ *
+ * This function checks if the UBIFS volume is empty by looking if its LEBs are
+ * mapped or not. The result of checking is stored in the @c->empty variable.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int check_volume_empty(struct ubifs_info *c)
+{
+       int lnum, err;
+
+       c->empty = 1;
+       for (lnum = 0; lnum < c->leb_cnt; lnum++) {
+               err = ubi_is_mapped(c->ubi, lnum);
+               if (unlikely(err < 0))
+                       return err;
+               if (err == 1) {
+                       c->empty = 0;
+                       break;
+               }
+
+               cond_resched();
+       }
+
+       return 0;
+}
+
+/*
+ * UBIFS mount options.
+ *
+ * Opt_fast_unmount: do not run a journal commit before un-mounting
+ * Opt_norm_unmount: run a journal commit before un-mounting
+ * Opt_err: just end of array marker
+ */
+enum {
+       Opt_fast_unmount,
+       Opt_norm_unmount,
+       Opt_err,
+};
+
+static match_table_t tokens = {
+       {Opt_fast_unmount, "fast_unmount"},
+       {Opt_norm_unmount, "norm_unmount"},
+       {Opt_err, NULL},
+};
+
+/**
+ * ubifs_parse_options - parse mount parameters.
+ * @c: UBIFS file-system description object
+ * @options: parameters to parse
+ * @is_remount: non-zero if this is FS re-mount
+ *
+ * This function parses UBIFS mount options and returns zero in case success
+ * and a negative error code in case of failure.
+ */
+static int ubifs_parse_options(struct ubifs_info *c, char *options,
+                              int is_remount)
+{
+       char *p;
+       substring_t args[MAX_OPT_ARGS];
+
+       if (!options)
+               return 0;
+
+       while ((p = strsep(&options, ","))) {
+               int token;
+
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_fast_unmount:
+                       c->mount_opts.unmount_mode = 2;
+                       c->fast_unmount = 1;
+                       break;
+               case Opt_norm_unmount:
+                       c->mount_opts.unmount_mode = 1;
+                       c->fast_unmount = 0;
+                       break;
+               default:
+                       ubifs_err("unrecognized mount option \"%s\" "
+                                 "or missing value", p);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * destroy_journal - destroy journal data structures.
+ * @c: UBIFS file-system description object
+ *
+ * This function destroys journal data structures including those that may have
+ * been created by recovery functions.
+ */
+static void destroy_journal(struct ubifs_info *c)
+{
+       while (!list_empty(&c->unclean_leb_list)) {
+               struct ubifs_unclean_leb *ucleb;
+
+               ucleb = list_entry(c->unclean_leb_list.next,
+                                  struct ubifs_unclean_leb, list);
+               list_del(&ucleb->list);
+               kfree(ucleb);
+       }
+       while (!list_empty(&c->old_buds)) {
+               struct ubifs_bud *bud;
+
+               bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
+               list_del(&bud->list);
+               kfree(bud);
+       }
+       ubifs_destroy_idx_gc(c);
+       ubifs_destroy_size_tree(c);
+       ubifs_tnc_close(c);
+       free_buds(c);
+}
+
+/**
+ * mount_ubifs - mount UBIFS file-system.
+ * @c: UBIFS file-system description object
+ *
+ * This function mounts UBIFS file system. Returns zero in case of success and
+ * a negative error code in case of failure.
+ *
+ * Note, the function does not de-allocate resources it it fails half way
+ * through, and the caller has to do this instead.
+ */
+static int mount_ubifs(struct ubifs_info *c)
+{
+       struct super_block *sb = c->vfs_sb;
+       int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
+       long long x;
+       size_t sz;
+
+       err = init_constants_early(c);
+       if (err)
+               return err;
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       c->dbg_buf = vmalloc(c->leb_size);
+       if (!c->dbg_buf)
+               return -ENOMEM;
+#endif
+
+       err = check_volume_empty(c);
+       if (err)
+               goto out_free;
+
+       if (c->empty && (mounted_read_only || c->ro_media)) {
+               /*
+                * This UBI volume is empty, and read-only, or the file system
+                * is mounted read-only - we cannot format it.
+                */
+               ubifs_err("can't format empty UBI volume: read-only %s",
+                         c->ro_media ? "UBI volume" : "mount");
+               err = -EROFS;
+               goto out_free;
+       }
+
+       if (c->ro_media && !mounted_read_only) {
+               ubifs_err("cannot mount read-write - read-only media");
+               err = -EROFS;
+               goto out_free;
+       }
+
+       /*
+        * The requirement for the buffer is that it should fit indexing B-tree
+        * height amount of integers. We assume the height if the TNC tree will
+        * never exceed 64.
+        */
+       err = -ENOMEM;
+       c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL);
+       if (!c->bottom_up_buf)
+               goto out_free;
+
+       c->sbuf = vmalloc(c->leb_size);
+       if (!c->sbuf)
+               goto out_free;
+
+       if (!mounted_read_only) {
+               c->ileb_buf = vmalloc(c->leb_size);
+               if (!c->ileb_buf)
+                       goto out_free;
+       }
+
+       err = ubifs_read_superblock(c);
+       if (err)
+               goto out_free;
+
+       /*
+        * Make sure the compressor which is set as the default on in the
+        * superblock was actually compiled in.
+        */
+       if (!ubifs_compr_present(c->default_compr)) {
+               ubifs_warn("'%s' compressor is set by superblock, but not "
+                          "compiled in", ubifs_compr_name(c->default_compr));
+               c->default_compr = UBIFS_COMPR_NONE;
+       }
+
+       dbg_failure_mode_registration(c);
+
+       err = init_constants_late(c);
+       if (err)
+               goto out_dereg;
+
+       sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
+       sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
+       c->cbuf = kmalloc(sz, GFP_NOFS);
+       if (!c->cbuf) {
+               err = -ENOMEM;
+               goto out_dereg;
+       }
+
+       if (!mounted_read_only) {
+               err = alloc_wbufs(c);
+               if (err)
+                       goto out_cbuf;
+
+               /* Create background thread */
+               sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
+                       c->vi.vol_id);
+               c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
+               if (!c->bgt)
+                       c->bgt = ERR_PTR(-EINVAL);
+               if (IS_ERR(c->bgt)) {
+                       err = PTR_ERR(c->bgt);
+                       c->bgt = NULL;
+                       ubifs_err("cannot spawn \"%s\", error %d",
+                                 c->bgt_name, err);
+                       goto out_wbufs;
+               }
+               wake_up_process(c->bgt);
+       }
+
+       err = ubifs_read_master(c);
+       if (err)
+               goto out_master;
+
+       if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
+               ubifs_msg("recovery needed");
+               c->need_recovery = 1;
+               if (!mounted_read_only) {
+                       err = ubifs_recover_inl_heads(c, c->sbuf);
+                       if (err)
+                               goto out_master;
+               }
+       } else if (!mounted_read_only) {
+               /*
+                * Set the "dirty" flag so that if we reboot uncleanly we
+                * will notice this immediately on the next mount.
+                */
+               c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
+               err = ubifs_write_master(c);
+               if (err)
+                       goto out_master;
+       }
+
+       err = ubifs_lpt_init(c, 1, !mounted_read_only);
+       if (err)
+               goto out_lpt;
+
+       err = dbg_check_idx_size(c, c->old_idx_sz);
+       if (err)
+               goto out_lpt;
+
+       err = ubifs_replay_journal(c);
+       if (err)
+               goto out_journal;
+
+       err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
+       if (err)
+               goto out_orphans;
+
+       if (!mounted_read_only) {
+               int lnum;
+
+               /* Check for enough free space */
+               if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
+                       ubifs_err("insufficient available space");
+                       err = -EINVAL;
+                       goto out_orphans;
+               }
+
+               /* Check for enough log space */
+               lnum = c->lhead_lnum + 1;
+               if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)
+                       lnum = UBIFS_LOG_LNUM;
+               if (lnum == c->ltail_lnum) {
+                       err = ubifs_consolidate_log(c);
+                       if (err)
+                               goto out_orphans;
+               }
+
+               if (c->need_recovery) {
+                       err = ubifs_recover_size(c);
+                       if (err)
+                               goto out_orphans;
+                       err = ubifs_rcvry_gc_commit(c);
+               } else
+                       err = take_gc_lnum(c);
+               if (err)
+                       goto out_orphans;
+
+               err = dbg_check_lprops(c);
+               if (err)
+                       goto out_orphans;
+       } else if (c->need_recovery) {
+               err = ubifs_recover_size(c);
+               if (err)
+                       goto out_orphans;
+       }
+
+       spin_lock(&ubifs_infos_lock);
+       list_add_tail(&c->infos_list, &ubifs_infos);
+       spin_unlock(&ubifs_infos_lock);
+
+       if (c->need_recovery) {
+               if (mounted_read_only)
+                       ubifs_msg("recovery deferred");
+               else {
+                       c->need_recovery = 0;
+                       ubifs_msg("recovery completed");
+               }
+       }
+
+       err = dbg_check_filesystem(c);
+       if (err)
+               goto out_infos;
+
+       ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num,
+                 c->vi.vol_id);
+       if (mounted_read_only)
+               ubifs_msg("mounted read-only");
+       x = (long long)c->main_lebs * c->leb_size;
+       ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
+                 x, x >> 10, x >> 20, c->main_lebs);
+       x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
+       ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
+                 x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
+       ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
+       ubifs_msg("media format %d, latest format %d",
+                 c->fmt_version, UBIFS_FORMAT_VERSION);
+
+       dbg_msg("compiled on:         " __DATE__ " at " __TIME__);
+       dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size);
+       dbg_msg("LEB size:            %d bytes (%d KiB)",
+               c->leb_size, c->leb_size / 1024);
+       dbg_msg("data journal heads:  %d",
+               c->jhead_cnt - NONDATA_JHEADS_CNT);
+       dbg_msg("UUID:                %02X%02X%02X%02X-%02X%02X"
+              "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
+              c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
+              c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
+              c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
+              c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
+       dbg_msg("fast unmount:        %d", c->fast_unmount);
+       dbg_msg("big_lpt              %d", c->big_lpt);
+       dbg_msg("log LEBs:            %d (%d - %d)",
+               c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
+       dbg_msg("LPT area LEBs:       %d (%d - %d)",
+               c->lpt_lebs, c->lpt_first, c->lpt_last);
+       dbg_msg("orphan area LEBs:    %d (%d - %d)",
+               c->orph_lebs, c->orph_first, c->orph_last);
+       dbg_msg("main area LEBs:      %d (%d - %d)",
+               c->main_lebs, c->main_first, c->leb_cnt - 1);
+       dbg_msg("index LEBs:          %d", c->lst.idx_lebs);
+       dbg_msg("total index bytes:   %lld (%lld KiB, %lld MiB)",
+               c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
+       dbg_msg("key hash type:       %d", c->key_hash_type);
+       dbg_msg("tree fanout:         %d", c->fanout);
+       dbg_msg("reserved GC LEB:     %d", c->gc_lnum);
+       dbg_msg("first main LEB:      %d", c->main_first);
+       dbg_msg("dead watermark:      %d", c->dead_wm);
+       dbg_msg("dark watermark:      %d", c->dark_wm);
+       x = (long long)c->main_lebs * c->dark_wm;
+       dbg_msg("max. dark space:     %lld (%lld KiB, %lld MiB)",
+               x, x >> 10, x >> 20);
+       dbg_msg("maximum bud bytes:   %lld (%lld KiB, %lld MiB)",
+               c->max_bud_bytes, c->max_bud_bytes >> 10,
+               c->max_bud_bytes >> 20);
+       dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
+               c->bg_bud_bytes, c->bg_bud_bytes >> 10,
+               c->bg_bud_bytes >> 20);
+       dbg_msg("current bud bytes    %lld (%lld KiB, %lld MiB)",
+               c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
+       dbg_msg("max. seq. number:    %llu", c->max_sqnum);
+       dbg_msg("commit number:       %llu", c->cmt_no);
+
+       return 0;
+
+out_infos:
+       spin_lock(&ubifs_infos_lock);
+       list_del(&c->infos_list);
+       spin_unlock(&ubifs_infos_lock);
+out_orphans:
+       free_orphans(c);
+out_journal:
+       destroy_journal(c);
+out_lpt:
+       ubifs_lpt_free(c, 0);
+out_master:
+       kfree(c->mst_node);
+       kfree(c->rcvrd_mst_node);
+       if (c->bgt)
+               kthread_stop(c->bgt);
+out_wbufs:
+       free_wbufs(c);
+out_cbuf:
+       kfree(c->cbuf);
+out_dereg:
+       dbg_failure_mode_deregistration(c);
+out_free:
+       vfree(c->ileb_buf);
+       vfree(c->sbuf);
+       kfree(c->bottom_up_buf);
+       UBIFS_DBG(vfree(c->dbg_buf));
+       return err;
+}
+
+/**
+ * ubifs_umount - un-mount UBIFS file-system.
+ * @c: UBIFS file-system description object
+ *
+ * Note, this function is called to free allocated resourced when un-mounting,
+ * as well as free resources when an error occurred while we were half way
+ * through mounting (error path cleanup function). So it has to make sure the
+ * resource was actually allocated before freeing it.
+ */
+static void ubifs_umount(struct ubifs_info *c)
+{
+       dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
+               c->vi.vol_id);
+
+       spin_lock(&ubifs_infos_lock);
+       list_del(&c->infos_list);
+       spin_unlock(&ubifs_infos_lock);
+
+       if (c->bgt)
+               kthread_stop(c->bgt);
+
+       destroy_journal(c);
+       free_wbufs(c);
+       free_orphans(c);
+       ubifs_lpt_free(c, 0);
+
+       kfree(c->cbuf);
+       kfree(c->rcvrd_mst_node);
+       kfree(c->mst_node);
+       vfree(c->sbuf);
+       kfree(c->bottom_up_buf);
+       UBIFS_DBG(vfree(c->dbg_buf));
+       vfree(c->ileb_buf);
+       dbg_failure_mode_deregistration(c);
+}
+
+/**
+ * ubifs_remount_rw - re-mount in read-write mode.
+ * @c: UBIFS file-system description object
+ *
+ * UBIFS avoids allocating many unnecessary resources when mounted in read-only
+ * mode. This function allocates the needed resources and re-mounts UBIFS in
+ * read-write mode.
+ */
+static int ubifs_remount_rw(struct ubifs_info *c)
+{
+       int err, lnum;
+
+       if (c->ro_media)
+               return -EINVAL;
+
+       mutex_lock(&c->umount_mutex);
+       c->remounting_rw = 1;
+
+       /* Check for enough free space */
+       if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
+               ubifs_err("insufficient available space");
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (c->old_leb_cnt != c->leb_cnt) {
+               struct ubifs_sb_node *sup;
+
+               sup = ubifs_read_sb_node(c);
+               if (IS_ERR(sup)) {
+                       err = PTR_ERR(sup);
+                       goto out;
+               }
+               sup->leb_cnt = cpu_to_le32(c->leb_cnt);
+               err = ubifs_write_sb_node(c, sup);
+               if (err)
+                       goto out;
+       }
+
+       if (c->need_recovery) {
+               ubifs_msg("completing deferred recovery");
+               err = ubifs_write_rcvrd_mst_node(c);
+               if (err)
+                       goto out;
+               err = ubifs_recover_size(c);
+               if (err)
+                       goto out;
+               err = ubifs_clean_lebs(c, c->sbuf);
+               if (err)
+                       goto out;
+               err = ubifs_recover_inl_heads(c, c->sbuf);
+               if (err)
+                       goto out;
+       }
+
+       if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) {
+               c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
+               err = ubifs_write_master(c);
+               if (err)
+                       goto out;
+       }
+
+       c->ileb_buf = vmalloc(c->leb_size);
+       if (!c->ileb_buf) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       err = ubifs_lpt_init(c, 0, 1);
+       if (err)
+               goto out;
+
+       err = alloc_wbufs(c);
+       if (err)
+               goto out;
+
+       ubifs_create_buds_lists(c);
+
+       /* Create background thread */
+       c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
+       if (!c->bgt)
+               c->bgt = ERR_PTR(-EINVAL);
+       if (IS_ERR(c->bgt)) {
+               err = PTR_ERR(c->bgt);
+               c->bgt = NULL;
+               ubifs_err("cannot spawn \"%s\", error %d",
+                         c->bgt_name, err);
+               return err;
+       }
+       wake_up_process(c->bgt);
+
+       c->orph_buf = vmalloc(c->leb_size);
+       if (!c->orph_buf)
+               return -ENOMEM;
+
+       /* Check for enough log space */
+       lnum = c->lhead_lnum + 1;
+       if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)
+               lnum = UBIFS_LOG_LNUM;
+       if (lnum == c->ltail_lnum) {
+               err = ubifs_consolidate_log(c);
+               if (err)
+                       goto out;
+       }
+
+       if (c->need_recovery)
+               err = ubifs_rcvry_gc_commit(c);
+       else
+               err = take_gc_lnum(c);
+       if (err)
+               goto out;
+
+       if (c->need_recovery) {
+               c->need_recovery = 0;
+               ubifs_msg("deferred recovery completed");
+       }
+
+       dbg_gen("re-mounted read-write");
+       c->vfs_sb->s_flags &= ~MS_RDONLY;
+       c->remounting_rw = 0;
+       mutex_unlock(&c->umount_mutex);
+       return 0;
+
+out:
+       vfree(c->orph_buf);
+       c->orph_buf = NULL;
+       if (c->bgt) {
+               kthread_stop(c->bgt);
+               c->bgt = NULL;
+       }
+       free_wbufs(c);
+       vfree(c->ileb_buf);
+       c->ileb_buf = NULL;
+       ubifs_lpt_free(c, 1);
+       c->remounting_rw = 0;
+       mutex_unlock(&c->umount_mutex);
+       return err;
+}
+
+/**
+ * commit_on_unmount - commit the journal when un-mounting.
+ * @c: UBIFS file-system description object
+ *
+ * This function is called during un-mounting and it commits the journal unless
+ * the "fast unmount" mode is enabled. It also avoids committing the journal if
+ * it contains too few data.
+ *
+ * Sometimes recovery requires the journal to be committed at least once, and
+ * this function takes care about this.
+ */
+static void commit_on_unmount(struct ubifs_info *c)
+{
+       if (!c->fast_unmount) {
+               long long bud_bytes;
+
+               spin_lock(&c->buds_lock);
+               bud_bytes = c->bud_bytes;
+               spin_unlock(&c->buds_lock);
+               if (bud_bytes > c->leb_size)
+                       ubifs_run_commit(c);
+       }
+}
+
+/**
+ * ubifs_remount_ro - re-mount in read-only mode.
+ * @c: UBIFS file-system description object
+ *
+ * We rely on VFS to have stopped writing. Possibly the background thread could
+ * be running a commit, however kthread_stop will wait in that case.
+ */
+static void ubifs_remount_ro(struct ubifs_info *c)
+{
+       int i, err;
+
+       ubifs_assert(!c->need_recovery);
+       commit_on_unmount(c);
+
+       mutex_lock(&c->umount_mutex);
+       if (c->bgt) {
+               kthread_stop(c->bgt);
+               c->bgt = NULL;
+       }
+
+       for (i = 0; i < c->jhead_cnt; i++) {
+               ubifs_wbuf_sync(&c->jheads[i].wbuf);
+               del_timer_sync(&c->jheads[i].wbuf.timer);
+       }
+
+       if (!c->ro_media) {
+               c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
+               c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+               c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
+               err = ubifs_write_master(c);
+               if (err)
+                       ubifs_ro_mode(c, err);
+       }
+
+       ubifs_destroy_idx_gc(c);
+       free_wbufs(c);
+       vfree(c->orph_buf);
+       c->orph_buf = NULL;
+       vfree(c->ileb_buf);
+       c->ileb_buf = NULL;
+       ubifs_lpt_free(c, 1);
+       mutex_unlock(&c->umount_mutex);
+}
+
+static void ubifs_put_super(struct super_block *sb)
+{
+       int i;
+       struct ubifs_info *c = sb->s_fs_info;
+
+       ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
+                 c->vi.vol_id);
+       /*
+        * The following asserts are only valid if there has not been a failure
+        * of the media. For example, there will be dirty inodes if we failed
+        * to write them back because of I/O errors.
+        */
+       ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
+       ubifs_assert(c->budg_idx_growth == 0);
+       ubifs_assert(c->budg_data_growth == 0);
+
+       /*
+        * The 'c->umount_lock' prevents races between UBIFS memory shrinker
+        * and file system un-mount. Namely, it prevents the shrinker from
+        * picking this superblock for shrinking - it will be just skipped if
+        * the mutex is locked.
+        */
+       mutex_lock(&c->umount_mutex);
+       if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
+               /*
+                * First of all kill the background thread to make sure it does
+                * not interfere with un-mounting and freeing resources.
+                */
+               if (c->bgt) {
+                       kthread_stop(c->bgt);
+                       c->bgt = NULL;
+               }
+
+               /* Synchronize write-buffers */
+               if (c->jheads)
+                       for (i = 0; i < c->jhead_cnt; i++) {
+                               ubifs_wbuf_sync(&c->jheads[i].wbuf);
+                               del_timer_sync(&c->jheads[i].wbuf.timer);
+                       }
+
+               /*
+                * On fatal errors c->ro_media is set to 1, in which case we do
+                * not write the master node.
+                */
+               if (!c->ro_media) {
+                       /*
+                        * We are being cleanly unmounted which means the
+                        * orphans were killed - indicate this in the master
+                        * node. Also save the reserved GC LEB number.
+                        */
+                       int err;
+
+                       c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
+                       c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+                       c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
+                       err = ubifs_write_master(c);
+                       if (err)
+                               /*
+                                * Recovery will attempt to fix the master area
+                                * next mount, so we just print a message and
+                                * continue to unmount normally.
+                                */
+                               ubifs_err("failed to write master node, "
+                                         "error %d", err);
+               }
+       }
+
+       ubifs_umount(c);
+       bdi_destroy(&c->bdi);
+       ubi_close_volume(c->ubi);
+       mutex_unlock(&c->umount_mutex);
+       kfree(c);
+}
+
+static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+       int err;
+       struct ubifs_info *c = sb->s_fs_info;
+
+       dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
+
+       err = ubifs_parse_options(c, data, 1);
+       if (err) {
+               ubifs_err("invalid or unknown remount parameter");
+               return err;
+       }
+       if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+               err = ubifs_remount_rw(c);
+               if (err)
+                       return err;
+       } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+               ubifs_remount_ro(c);
+
+       return 0;
+}
+
+struct super_operations ubifs_super_operations = {
+       .alloc_inode   = ubifs_alloc_inode,
+       .destroy_inode = ubifs_destroy_inode,
+       .put_super     = ubifs_put_super,
+       .write_inode   = ubifs_write_inode,
+       .delete_inode  = ubifs_delete_inode,
+       .statfs        = ubifs_statfs,
+       .dirty_inode   = ubifs_dirty_inode,
+       .remount_fs    = ubifs_remount_fs,
+       .show_options  = ubifs_show_options,
+       .sync_fs       = ubifs_sync_fs,
+};
+
+/**
+ * open_ubi - parse UBI device name string and open the UBI device.
+ * @name: UBI volume name
+ * @mode: UBI volume open mode
+ *
+ * There are several ways to specify UBI volumes when mounting UBIFS:
+ * o ubiX_Y    - UBI device number X, volume Y;
+ * o ubiY      - UBI device number 0, volume Y;
+ * o ubiX:NAME - mount UBI device X, volume with name NAME;
+ * o ubi:NAME  - mount UBI device 0, volume with name NAME.
+ *
+ * Alternative '!' separator may be used instead of ':' (because some shells
+ * like busybox may interpret ':' as an NFS host name separator). This function
+ * returns ubi volume object in case of success and a negative error code in
+ * case of failure.
+ */
+static struct ubi_volume_desc *open_ubi(const char *name, int mode)
+{
+       int dev, vol;
+       char *endptr;
+
+       if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
+               return ERR_PTR(-EINVAL);
+
+       /* ubi:NAME method */
+       if ((name[3] == ':' || name[3] == '!') && name[4] != '\0')
+               return ubi_open_volume_nm(0, name + 4, mode);
+
+       if (!isdigit(name[3]))
+               return ERR_PTR(-EINVAL);
+
+       dev = simple_strtoul(name + 3, &endptr, 0);
+
+       /* ubiY method */
+       if (*endptr == '\0')
+               return ubi_open_volume(0, dev, mode);
+
+       /* ubiX_Y method */
+       if (*endptr == '_' && isdigit(endptr[1])) {
+               vol = simple_strtoul(endptr + 1, &endptr, 0);
+               if (*endptr != '\0')
+                       return ERR_PTR(-EINVAL);
+               return ubi_open_volume(dev, vol, mode);
+       }
+
+       /* ubiX:NAME method */
+       if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0')
+               return ubi_open_volume_nm(dev, ++endptr, mode);
+
+       return ERR_PTR(-EINVAL);
+}
+
+static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+{
+       struct ubi_volume_desc *ubi = sb->s_fs_info;
+       struct ubifs_info *c;
+       struct inode *root;
+       int err;
+
+       c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
+       if (!c)
+               return -ENOMEM;
+
+       spin_lock_init(&c->cnt_lock);
+       spin_lock_init(&c->cs_lock);
+       spin_lock_init(&c->buds_lock);
+       spin_lock_init(&c->space_lock);
+       spin_lock_init(&c->orphan_lock);
+       init_rwsem(&c->commit_sem);
+       mutex_init(&c->lp_mutex);
+       mutex_init(&c->tnc_mutex);
+       mutex_init(&c->log_mutex);
+       mutex_init(&c->mst_mutex);
+       mutex_init(&c->umount_mutex);
+       init_waitqueue_head(&c->cmt_wq);
+       c->buds = RB_ROOT;
+       c->old_idx = RB_ROOT;
+       c->size_tree = RB_ROOT;
+       c->orph_tree = RB_ROOT;
+       INIT_LIST_HEAD(&c->infos_list);
+       INIT_LIST_HEAD(&c->idx_gc);
+       INIT_LIST_HEAD(&c->replay_list);
+       INIT_LIST_HEAD(&c->replay_buds);
+       INIT_LIST_HEAD(&c->uncat_list);
+       INIT_LIST_HEAD(&c->empty_list);
+       INIT_LIST_HEAD(&c->freeable_list);
+       INIT_LIST_HEAD(&c->frdi_idx_list);
+       INIT_LIST_HEAD(&c->unclean_leb_list);
+       INIT_LIST_HEAD(&c->old_buds);
+       INIT_LIST_HEAD(&c->orph_list);
+       INIT_LIST_HEAD(&c->orph_new);
+
+       c->highest_inum = UBIFS_FIRST_INO;
+       get_random_bytes(&c->vfs_gen, sizeof(int));
+       c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
+
+       ubi_get_volume_info(ubi, &c->vi);
+       ubi_get_device_info(c->vi.ubi_num, &c->di);
+
+       /* Re-open the UBI device in read-write mode */
+       c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
+       if (IS_ERR(c->ubi)) {
+               err = PTR_ERR(c->ubi);
+               goto out_free;
+       }
+
+       /*
+        * UBIFS provids 'backing_dev_info' in order to disable readahead. For
+        * UBIFS, I/O is not deferred, it is done immediately in readpage,
+        * which means the user would have to wait not just for their own I/O
+        * but the readahead I/O as well i.e. completely pointless.
+        *
+        * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
+        */
+       c->bdi.capabilities = BDI_CAP_MAP_COPY;
+       c->bdi.unplug_io_fn = default_unplug_io_fn;
+       err  = bdi_init(&c->bdi);
+       if (err)
+               goto out_close;
+
+       err = ubifs_parse_options(c, data, 0);
+       if (err)
+               goto out_bdi;
+
+       c->vfs_sb = sb;
+
+       sb->s_fs_info = c;
+       sb->s_magic = UBIFS_SUPER_MAGIC;
+       sb->s_blocksize = UBIFS_BLOCK_SIZE;
+       sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
+       sb->s_dev = c->vi.cdev;
+       sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
+       if (c->max_inode_sz > MAX_LFS_FILESIZE)
+               sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
+       sb->s_op = &ubifs_super_operations;
+
+       mutex_lock(&c->umount_mutex);
+       err = mount_ubifs(c);
+       if (err) {
+               ubifs_assert(err < 0);
+               goto out_unlock;
+       }
+
+       /* Read the root inode */
+       root = ubifs_iget(sb, UBIFS_ROOT_INO);
+       if (IS_ERR(root)) {
+               err = PTR_ERR(root);
+               goto out_umount;
+       }
+
+       sb->s_root = d_alloc_root(root);
+       if (!sb->s_root)
+               goto out_iput;
+
+       mutex_unlock(&c->umount_mutex);
+
+       return 0;
+
+out_iput:
+       iput(root);
+out_umount:
+       ubifs_umount(c);
+out_unlock:
+       mutex_unlock(&c->umount_mutex);
+out_bdi:
+       bdi_destroy(&c->bdi);
+out_close:
+       ubi_close_volume(c->ubi);
+out_free:
+       kfree(c);
+       return err;
+}
+
+static int sb_test(struct super_block *sb, void *data)
+{
+       dev_t *dev = data;
+
+       return sb->s_dev == *dev;
+}
+
+static int sb_set(struct super_block *sb, void *data)
+{
+       dev_t *dev = data;
+
+       sb->s_dev = *dev;
+       return 0;
+}
+
+static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
+                       const char *name, void *data, struct vfsmount *mnt)
+{
+       struct ubi_volume_desc *ubi;
+       struct ubi_volume_info vi;
+       struct super_block *sb;
+       int err;
+
+       dbg_gen("name %s, flags %#x", name, flags);
+
+       /*
+        * Get UBI device number and volume ID. Mount it read-only so far
+        * because this might be a new mount point, and UBI allows only one
+        * read-write user at a time.
+        */
+       ubi = open_ubi(name, UBI_READONLY);
+       if (IS_ERR(ubi)) {
+               ubifs_err("cannot open \"%s\", error %d",
+                         name, (int)PTR_ERR(ubi));
+               return PTR_ERR(ubi);
+       }
+       ubi_get_volume_info(ubi, &vi);
+
+       dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
+
+       sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev);
+       if (IS_ERR(sb)) {
+               err = PTR_ERR(sb);
+               goto out_close;
+       }
+
+       if (sb->s_root) {
+               /* A new mount point for already mounted UBIFS */
+               dbg_gen("this ubi volume is already mounted");
+               if ((flags ^ sb->s_flags) & MS_RDONLY) {
+                       err = -EBUSY;
+                       goto out_deact;
+               }
+       } else {
+               sb->s_flags = flags;
+               /*
+                * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
+                * replaced by 'c'.
+                */
+               sb->s_fs_info = ubi;
+               err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+               if (err)
+                       goto out_deact;
+               /* We do not support atime */
+               sb->s_flags |= MS_ACTIVE | MS_NOATIME;
+       }
+
+       /* 'fill_super()' opens ubi again so we must close it here */
+       ubi_close_volume(ubi);
+
+       return simple_set_mnt(mnt, sb);
+
+out_deact:
+       up_write(&sb->s_umount);
+       deactivate_super(sb);
+out_close:
+       ubi_close_volume(ubi);
+       return err;
+}
+
+static void ubifs_kill_sb(struct super_block *sb)
+{
+       struct ubifs_info *c = sb->s_fs_info;
+
+       /*
+        * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
+        * in order to be outside BKL.
+        */
+       if (sb->s_root && !(sb->s_flags & MS_RDONLY))
+               commit_on_unmount(c);
+       /* The un-mount routine is actually done in put_super() */
+       generic_shutdown_super(sb);
+}
+
+static struct file_system_type ubifs_fs_type = {
+       .name    = "ubifs",
+       .owner   = THIS_MODULE,
+       .get_sb  = ubifs_get_sb,
+       .kill_sb = ubifs_kill_sb
+};
+
+/*
+ * Inode slab cache constructor.
+ */
+static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
+{
+       struct ubifs_inode *ui = obj;
+       inode_init_once(&ui->vfs_inode);
+}
+
+static int __init ubifs_init(void)
+{
+       int err;
+
+       BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24);
+
+       /* Make sure node sizes are 8-byte aligned */
+       BUILD_BUG_ON(UBIFS_CH_SZ        & 7);
+       BUILD_BUG_ON(UBIFS_INO_NODE_SZ  & 7);
+       BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_SB_NODE_SZ   & 7);
+       BUILD_BUG_ON(UBIFS_MST_NODE_SZ  & 7);
+       BUILD_BUG_ON(UBIFS_REF_NODE_SZ  & 7);
+       BUILD_BUG_ON(UBIFS_CS_NODE_SZ   & 7);
+       BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7);
+
+       BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7);
+       BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ  & 7);
+       BUILD_BUG_ON(UBIFS_MAX_NODE_SZ      & 7);
+       BUILD_BUG_ON(MIN_WRITE_SZ           & 7);
+
+       /* Check min. node size */
+       BUILD_BUG_ON(UBIFS_INO_NODE_SZ  < MIN_WRITE_SZ);
+       BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ);
+       BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ);
+       BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ);
+
+       BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ);
+       BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ);
+       BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ);
+       BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ  > UBIFS_MAX_NODE_SZ);
+
+       /* Defined node sizes */
+       BUILD_BUG_ON(UBIFS_SB_NODE_SZ  != 4096);
+       BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512);
+       BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160);
+       BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
+
+       /*
+        * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
+        * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
+        */
+       if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
+               ubifs_err("VFS page cache size is %u bytes, but UBIFS requires"
+                         " at least 4096 bytes",
+                         (unsigned int)PAGE_CACHE_SIZE);
+               return -EINVAL;
+       }
+
+       err = register_filesystem(&ubifs_fs_type);
+       if (err) {
+               ubifs_err("cannot register file system, error %d", err);
+               return err;
+       }
+
+       err = -ENOMEM;
+       ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
+                               sizeof(struct ubifs_inode), 0,
+                               SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
+                               &inode_slab_ctor);
+       if (!ubifs_inode_slab)
+               goto out_reg;
+
+       register_shrinker(&ubifs_shrinker_info);
+
+       err = ubifs_compressors_init();
+       if (err)
+               goto out_compr;
+
+       return 0;
+
+out_compr:
+       unregister_shrinker(&ubifs_shrinker_info);
+       kmem_cache_destroy(ubifs_inode_slab);
+out_reg:
+       unregister_filesystem(&ubifs_fs_type);
+       return err;
+}
+/* late_initcall to let compressors initialize first */
+late_initcall(ubifs_init);
+
+static void __exit ubifs_exit(void)
+{
+       ubifs_assert(list_empty(&ubifs_infos));
+       ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
+
+       ubifs_compressors_exit();
+       unregister_shrinker(&ubifs_shrinker_info);
+       kmem_cache_destroy(ubifs_inode_slab);
+       unregister_filesystem(&ubifs_fs_type);
+}
+module_exit(ubifs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(__stringify(UBIFS_VERSION));
+MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter");
+MODULE_DESCRIPTION("UBIFS - UBI File System");
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c

new file mode 100644 (file)

index 0000000..e909f4a
--- /dev/null
+++ b/fs/ubifs/tnc.c
@@ -0,0 +1,2956 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements TNC (Tree Node Cache) which caches indexing nodes of
+ * the UBIFS B-tree.
+ *
+ * At the moment the locking rules of the TNC tree are quite simple and
+ * straightforward. We just have a mutex and lock it when we traverse the
+ * tree. If a znode is not in memory, we read it from flash while still having
+ * the mutex locked.
+ */
+
+#include <linux/crc32.h>
+#include "ubifs.h"
+
+/*
+ * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
+ * @NAME_LESS: name corresponding to the first argument is less than second
+ * @NAME_MATCHES: names match
+ * @NAME_GREATER: name corresponding to the second argument is greater than
+ *                first
+ * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media
+ *
+ * These constants were introduce to improve readability.
+ */
+enum {
+       NAME_LESS    = 0,
+       NAME_MATCHES = 1,
+       NAME_GREATER = 2,
+       NOT_ON_MEDIA = 3,
+};
+
+/**
+ * insert_old_idx - record an index node obsoleted since the last commit start.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of obsoleted index node
+ * @offs: offset of obsoleted index node
+ *
+ * Returns %0 on success, and a negative error code on failure.
+ *
+ * For recovery, there must always be a complete intact version of the index on
+ * flash at all times. That is called the "old index". It is the index as at the
+ * time of the last successful commit. Many of the index nodes in the old index
+ * may be dirty, but they must not be erased until the next successful commit
+ * (at which point that index becomes the old index).
+ *
+ * That means that the garbage collection and the in-the-gaps method of
+ * committing must be able to determine if an index node is in the old index.
+ * Most of the old index nodes can be found by looking up the TNC using the
+ * 'lookup_znode()' function. However, some of the old index nodes may have
+ * been deleted from the current index or may have been changed so much that
+ * they cannot be easily found. In those cases, an entry is added to an RB-tree.
+ * That is what this function does. The RB-tree is ordered by LEB number and
+ * offset because they uniquely identify the old index node.
+ */
+static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
+{
+       struct ubifs_old_idx *old_idx, *o;
+       struct rb_node **p, *parent = NULL;
+
+       old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
+       if (unlikely(!old_idx))
+               return -ENOMEM;
+       old_idx->lnum = lnum;
+       old_idx->offs = offs;
+
+       p = &c->old_idx.rb_node;
+       while (*p) {
+               parent = *p;
+               o = rb_entry(parent, struct ubifs_old_idx, rb);
+               if (lnum < o->lnum)
+                       p = &(*p)->rb_left;
+               else if (lnum > o->lnum)
+                       p = &(*p)->rb_right;
+               else if (offs < o->offs)
+                       p = &(*p)->rb_left;
+               else if (offs > o->offs)
+                       p = &(*p)->rb_right;
+               else {
+                       ubifs_err("old idx added twice!");
+                       kfree(old_idx);
+                       return 0;
+               }
+       }
+       rb_link_node(&old_idx->rb, parent, p);
+       rb_insert_color(&old_idx->rb, &c->old_idx);
+       return 0;
+}
+
+/**
+ * insert_old_idx_znode - record a znode obsoleted since last commit start.
+ * @c: UBIFS file-system description object
+ * @znode: znode of obsoleted index node
+ *
+ * Returns %0 on success, and a negative error code on failure.
+ */
+int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode)
+{
+       if (znode->parent) {
+               struct ubifs_zbranch *zbr;
+
+               zbr = &znode->parent->zbranch[znode->iip];
+               if (zbr->len)
+                       return insert_old_idx(c, zbr->lnum, zbr->offs);
+       } else
+               if (c->zroot.len)
+                       return insert_old_idx(c, c->zroot.lnum,
+                                             c->zroot.offs);
+       return 0;
+}
+
+/**
+ * ins_clr_old_idx_znode - record a znode obsoleted since last commit start.
+ * @c: UBIFS file-system description object
+ * @znode: znode of obsoleted index node
+ *
+ * Returns %0 on success, and a negative error code on failure.
+ */
+static int ins_clr_old_idx_znode(struct ubifs_info *c,
+                                struct ubifs_znode *znode)
+{
+       int err;
+
+       if (znode->parent) {
+               struct ubifs_zbranch *zbr;
+
+               zbr = &znode->parent->zbranch[znode->iip];
+               if (zbr->len) {
+                       err = insert_old_idx(c, zbr->lnum, zbr->offs);
+                       if (err)
+                               return err;
+                       zbr->lnum = 0;
+                       zbr->offs = 0;
+                       zbr->len = 0;
+               }
+       } else
+               if (c->zroot.len) {
+                       err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs);
+                       if (err)
+                               return err;
+                       c->zroot.lnum = 0;
+                       c->zroot.offs = 0;
+                       c->zroot.len = 0;
+               }
+       return 0;
+}
+
+/**
+ * destroy_old_idx - destroy the old_idx RB-tree.
+ * @c: UBIFS file-system description object
+ *
+ * During start commit, the old_idx RB-tree is used to avoid overwriting index
+ * nodes that were in the index last commit but have since been deleted.  This
+ * is necessary for recovery i.e. the old index must be kept intact until the
+ * new index is successfully written.  The old-idx RB-tree is used for the
+ * in-the-gaps method of writing index nodes and is destroyed every commit.
+ */
+void destroy_old_idx(struct ubifs_info *c)
+{
+       struct rb_node *this = c->old_idx.rb_node;
+       struct ubifs_old_idx *old_idx;
+
+       while (this) {
+               if (this->rb_left) {
+                       this = this->rb_left;
+                       continue;
+               } else if (this->rb_right) {
+                       this = this->rb_right;
+                       continue;
+               }
+               old_idx = rb_entry(this, struct ubifs_old_idx, rb);
+               this = rb_parent(this);
+               if (this) {
+                       if (this->rb_left == &old_idx->rb)
+                               this->rb_left = NULL;
+                       else
+                               this->rb_right = NULL;
+               }
+               kfree(old_idx);
+       }
+       c->old_idx = RB_ROOT;
+}
+
+/**
+ * copy_znode - copy a dirty znode.
+ * @c: UBIFS file-system description object
+ * @znode: znode to copy
+ *
+ * A dirty znode being committed may not be changed, so it is copied.
+ */
+static struct ubifs_znode *copy_znode(struct ubifs_info *c,
+                                     struct ubifs_znode *znode)
+{
+       struct ubifs_znode *zn;
+
+       zn = kmalloc(c->max_znode_sz, GFP_NOFS);
+       if (unlikely(!zn))
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(zn, znode, c->max_znode_sz);
+       zn->cnext = NULL;
+       __set_bit(DIRTY_ZNODE, &zn->flags);
+       __clear_bit(COW_ZNODE, &zn->flags);
+
+       ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+       __set_bit(OBSOLETE_ZNODE, &znode->flags);
+
+       if (znode->level != 0) {
+               int i;
+               const int n = zn->child_cnt;
+
+               /* The children now have new parent */
+               for (i = 0; i < n; i++) {
+                       struct ubifs_zbranch *zbr = &zn->zbranch[i];
+
+                       if (zbr->znode)
+                               zbr->znode->parent = zn;
+               }
+       }
+
+       atomic_long_inc(&c->dirty_zn_cnt);
+       return zn;
+}
+
+/**
+ * add_idx_dirt - add dirt due to a dirty znode.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of index node
+ * @dirt: size of index node
+ *
+ * This function updates lprops dirty space and the new size of the index.
+ */
+static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
+{
+       c->calc_idx_sz -= ALIGN(dirt, 8);
+       return ubifs_add_dirt(c, lnum, dirt);
+}
+
+/**
+ * dirty_cow_znode - ensure a znode is not being committed.
+ * @c: UBIFS file-system description object
+ * @zbr: branch of znode to check
+ *
+ * Returns dirtied znode on success or negative error code on failure.
+ */
+static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
+                                          struct ubifs_zbranch *zbr)
+{
+       struct ubifs_znode *znode = zbr->znode;
+       struct ubifs_znode *zn;
+       int err;
+
+       if (!test_bit(COW_ZNODE, &znode->flags)) {
+               /* znode is not being committed */
+               if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
+                       atomic_long_inc(&c->dirty_zn_cnt);
+                       atomic_long_dec(&c->clean_zn_cnt);
+                       atomic_long_dec(&ubifs_clean_zn_cnt);
+                       err = add_idx_dirt(c, zbr->lnum, zbr->len);
+                       if (unlikely(err))
+                               return ERR_PTR(err);
+               }
+               return znode;
+       }
+
+       zn = copy_znode(c, znode);
+       if (unlikely(IS_ERR(zn)))
+               return zn;
+
+       if (zbr->len) {
+               err = insert_old_idx(c, zbr->lnum, zbr->offs);
+               if (unlikely(err))
+                       return ERR_PTR(err);
+               err = add_idx_dirt(c, zbr->lnum, zbr->len);
+       } else
+               err = 0;
+
+       zbr->znode = zn;
+       zbr->lnum = 0;
+       zbr->offs = 0;
+       zbr->len = 0;
+
+       if (unlikely(err))
+               return ERR_PTR(err);
+       return zn;
+}
+
+/**
+ * lnc_add - add a leaf node to the leaf node cache.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of leaf node
+ * @node: leaf node
+ *
+ * Leaf nodes are non-index nodes directory entry nodes or data nodes. The
+ * purpose of the leaf node cache is to save re-reading the same leaf node over
+ * and over again. Most things are cached by VFS, however the file system must
+ * cache directory entries for readdir and for resolving hash collisions. The
+ * present implementation of the leaf node cache is extremely simple, and
+ * allows for error returns that are not used but that may be needed if a more
+ * complex implementation is created.
+ *
+ * Note, this function does not add the @node object to LNC directly, but
+ * allocates a copy of the object and adds the copy to LNC. The reason for this
+ * is that @node has been allocated outside of the TNC subsystem and will be
+ * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC
+ * may be changed at any time, e.g. freed by the shrinker.
+ */
+static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                  const void *node)
+{
+       int err;
+       void *lnc_node;
+       const struct ubifs_dent_node *dent = node;
+
+       ubifs_assert(!zbr->leaf);
+       ubifs_assert(zbr->len != 0);
+       ubifs_assert(is_hash_key(c, &zbr->key));
+
+       err = ubifs_validate_entry(c, dent);
+       if (err) {
+               dbg_dump_stack();
+               dbg_dump_node(c, dent);
+               return err;
+       }
+
+       lnc_node = kmalloc(zbr->len, GFP_NOFS);
+       if (!lnc_node)
+               /* We don't have to have the cache, so no error */
+               return 0;
+
+       memcpy(lnc_node, node, zbr->len);
+       zbr->leaf = lnc_node;
+       return 0;
+}
+
+ /**
+ * lnc_add_directly - add a leaf node to the leaf-node-cache.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of leaf node
+ * @node: leaf node
+ *
+ * This function is similar to 'lnc_add()', but it does not create a copy of
+ * @node but inserts @node to TNC directly.
+ */
+static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                           void *node)
+{
+       int err;
+
+       ubifs_assert(!zbr->leaf);
+       ubifs_assert(zbr->len != 0);
+
+       err = ubifs_validate_entry(c, node);
+       if (err) {
+               dbg_dump_stack();
+               dbg_dump_node(c, node);
+               return err;
+       }
+
+       zbr->leaf = node;
+       return 0;
+}
+
+/**
+ * lnc_free - remove a leaf node from the leaf node cache.
+ * @zbr: zbranch of leaf node
+ * @node: leaf node
+ */
+static void lnc_free(struct ubifs_zbranch *zbr)
+{
+       if (!zbr->leaf)
+               return;
+       kfree(zbr->leaf);
+       zbr->leaf = NULL;
+}
+
+/**
+ * tnc_read_node_nm - read a "hashed" leaf node.
+ * @c: UBIFS file-system description object
+ * @zbr: key and position of the node
+ * @node: node is returned here
+ *
+ * This function reads a "hashed" node defined by @zbr from the leaf node cache
+ * (in it is there) or from the hash media, in which case the node is also
+ * added to LNC. Returns zero in case of success or a negative negative error
+ * code in case of failure.
+ */
+static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                           void *node)
+{
+       int err;
+
+       ubifs_assert(is_hash_key(c, &zbr->key));
+
+       if (zbr->leaf) {
+               /* Read from the leaf node cache */
+               ubifs_assert(zbr->len != 0);
+               memcpy(node, zbr->leaf, zbr->len);
+               return 0;
+       }
+
+       err = ubifs_tnc_read_node(c, zbr, node);
+       if (err)
+               return err;
+
+       /* Add the node to the leaf node cache */
+       err = lnc_add(c, zbr, node);
+       return err;
+}
+
+/**
+ * try_read_node - read a node if it is a node.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to read to
+ * @type: node type
+ * @len: node length (not aligned)
+ * @lnum: LEB number of node to read
+ * @offs: offset of node to read
+ *
+ * This function tries to read a node of known type and length, checks it and
+ * stores it in @buf. This function returns %1 if a node is present and %0 if
+ * a node is not present. A negative error code is returned for I/O errors.
+ * This function performs that same function as ubifs_read_node except that
+ * it does not require that there is actually a node present and instead
+ * the return code indicates if a node was read.
+ */
+static int try_read_node(const struct ubifs_info *c, void *buf, int type,
+                        int len, int lnum, int offs)
+{
+       int err, node_len;
+       struct ubifs_ch *ch = buf;
+       uint32_t crc, node_crc;
+
+       dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+
+       err = ubi_read(c->ubi, lnum, buf, offs, len);
+       if (err) {
+               ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
+                         type, lnum, offs, err);
+               return err;
+       }
+
+       if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
+               return 0;
+
+       if (ch->node_type != type)
+               return 0;
+
+       node_len = le32_to_cpu(ch->len);
+       if (node_len != len)
+               return 0;
+
+       crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
+       node_crc = le32_to_cpu(ch->crc);
+       if (crc != node_crc)
+               return 0;
+
+       return 1;
+}
+
+/**
+ * fallible_read_node - try to read a leaf node.
+ * @c: UBIFS file-system description object
+ * @key:  key of node to read
+ * @zbr:  position of node
+ * @node: node returned
+ *
+ * This function tries to read a node and returns %1 if the node is read, %0
+ * if the node is not present, and a negative error code in the case of error.
+ */
+static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
+                             struct ubifs_zbranch *zbr, void *node)
+{
+       int ret;
+
+       dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key));
+
+       ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
+                           zbr->offs);
+       if (ret == 1) {
+               union ubifs_key node_key;
+               struct ubifs_dent_node *dent = node;
+
+               /* All nodes have key in the same place */
+               key_read(c, &dent->key, &node_key);
+               if (keys_cmp(c, key, &node_key) != 0)
+                       ret = 0;
+       }
+       if (ret == 0)
+               dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
+                       zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
+       return ret;
+}
+
+/**
+ * matches_name - determine if a direntry or xattr entry matches a given name.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of dent
+ * @nm: name to match
+ *
+ * This function checks if xentry/direntry referred by zbranch @zbr matches name
+ * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by
+ * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case
+ * of failure, a negative error code is returned.
+ */
+static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                       const struct qstr *nm)
+{
+       struct ubifs_dent_node *dent;
+       int nlen, err;
+
+       /* If possible, match against the dent in the leaf node cache */
+       if (!zbr->leaf) {
+               dent = kmalloc(zbr->len, GFP_NOFS);
+               if (!dent)
+                       return -ENOMEM;
+
+               err = ubifs_tnc_read_node(c, zbr, dent);
+               if (err)
+                       goto out_free;
+
+               /* Add the node to the leaf node cache */
+               err = lnc_add_directly(c, zbr, dent);
+               if (err)
+                       goto out_free;
+       } else
+               dent = zbr->leaf;
+
+       nlen = le16_to_cpu(dent->nlen);
+       err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
+       if (err == 0) {
+               if (nlen == nm->len)
+                       return NAME_MATCHES;
+               else if (nlen < nm->len)
+                       return NAME_LESS;
+               else
+                       return NAME_GREATER;
+       } else if (err < 0)
+               return NAME_LESS;
+       else
+               return NAME_GREATER;
+
+out_free:
+       kfree(dent);
+       return err;
+}
+
+/**
+ * get_znode - get a TNC znode that may not be loaded yet.
+ * @c: UBIFS file-system description object
+ * @znode: parent znode
+ * @n: znode branch slot number
+ *
+ * This function returns the znode or a negative error code.
+ */
+static struct ubifs_znode *get_znode(struct ubifs_info *c,
+                                    struct ubifs_znode *znode, int n)
+{
+       struct ubifs_zbranch *zbr;
+
+       zbr = &znode->zbranch[n];
+       if (zbr->znode)
+               znode = zbr->znode;
+       else
+               znode = ubifs_load_znode(c, zbr, znode, n);
+       return znode;
+}
+
+/**
+ * tnc_next - find next TNC entry.
+ * @c: UBIFS file-system description object
+ * @zn: znode is passed and returned here
+ * @n: znode branch slot number is passed and returned here
+ *
+ * This function returns %0 if the next TNC entry is found, %-ENOENT if there is
+ * no next entry, or a negative error code otherwise.
+ */
+static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
+{
+       struct ubifs_znode *znode = *zn;
+       int nn = *n;
+
+       nn += 1;
+       if (nn < znode->child_cnt) {
+               *n = nn;
+               return 0;
+       }
+       while (1) {
+               struct ubifs_znode *zp;
+
+               zp = znode->parent;
+               if (!zp)
+                       return -ENOENT;
+               nn = znode->iip + 1;
+               znode = zp;
+               if (nn < znode->child_cnt) {
+                       znode = get_znode(c, znode, nn);
+                       if (IS_ERR(znode))
+                               return PTR_ERR(znode);
+                       while (znode->level != 0) {
+                               znode = get_znode(c, znode, 0);
+                               if (IS_ERR(znode))
+                                       return PTR_ERR(znode);
+                       }
+                       nn = 0;
+                       break;
+               }
+       }
+       *zn = znode;
+       *n = nn;
+       return 0;
+}
+
+/**
+ * tnc_prev - find previous TNC entry.
+ * @c: UBIFS file-system description object
+ * @zn: znode is returned here
+ * @n: znode branch slot number is passed and returned here
+ *
+ * This function returns %0 if the previous TNC entry is found, %-ENOENT if
+ * there is no next entry, or a negative error code otherwise.
+ */
+static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
+{
+       struct ubifs_znode *znode = *zn;
+       int nn = *n;
+
+       if (nn > 0) {
+               *n = nn - 1;
+               return 0;
+       }
+       while (1) {
+               struct ubifs_znode *zp;
+
+               zp = znode->parent;
+               if (!zp)
+                       return -ENOENT;
+               nn = znode->iip - 1;
+               znode = zp;
+               if (nn >= 0) {
+                       znode = get_znode(c, znode, nn);
+                       if (IS_ERR(znode))
+                               return PTR_ERR(znode);
+                       while (znode->level != 0) {
+                               nn = znode->child_cnt - 1;
+                               znode = get_znode(c, znode, nn);
+                               if (IS_ERR(znode))
+                                       return PTR_ERR(znode);
+                       }
+                       nn = znode->child_cnt - 1;
+                       break;
+               }
+       }
+       *zn = znode;
+       *n = nn;
+       return 0;
+}
+
+/**
+ * resolve_collision - resolve a collision.
+ * @c: UBIFS file-system description object
+ * @key: key of a directory or extended attribute entry
+ * @zn: znode is returned here
+ * @n: zbranch number is passed and returned here
+ * @nm: name of the entry
+ *
+ * This function is called for "hashed" keys to make sure that the found key
+ * really corresponds to the looked up node (directory or extended attribute
+ * entry). It returns %1 and sets @zn and @n if the collision is resolved.
+ * %0 is returned if @nm is not found and @zn and @n are set to the previous
+ * entry, i.e. to the entry after which @nm could follow if it were in TNC.
+ * This means that @n may be set to %-1 if the leftmost key in @zn is the
+ * previous one. A negative error code is returned on failures.
+ */
+static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
+                            struct ubifs_znode **zn, int *n,
+                            const struct qstr *nm)
+{
+       int err;
+
+       err = matches_name(c, &(*zn)->zbranch[*n], nm);
+       if (unlikely(err < 0))
+               return err;
+       if (err == NAME_MATCHES)
+               return 1;
+
+       if (err == NAME_GREATER) {
+               /* Look left */
+               while (1) {
+                       err = tnc_prev(c, zn, n);
+                       if (err == -ENOENT) {
+                               ubifs_assert(*n == 0);
+                               *n = -1;
+                               return 0;
+                       }
+                       if (err < 0)
+                               return err;
+                       if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
+                               /*
+                                * We have found the branch after which we would
+                                * like to insert, but inserting in this znode
+                                * may still be wrong. Consider the following 3
+                                * znodes, in the case where we are resolving a
+                                * collision with Key2.
+                                *
+                                *                  znode zp
+                                *            ----------------------
+                                * level 1     |  Key0  |  Key1  |
+                                *            -----------------------
+                                *                 |            |
+                                *       znode za  |            |  znode zb
+                                *          ------------      ------------
+                                * level 0  |  Key0  |        |  Key2  |
+                                *          ------------      ------------
+                                *
+                                * The lookup finds Key2 in znode zb. Lets say
+                                * there is no match and the name is greater so
+                                * we look left. When we find Key0, we end up
+                                * here. If we return now, we will insert into
+                                * znode za at slot n = 1.  But that is invalid
+                                * according to the parent's keys.  Key2 must
+                                * be inserted into znode zb.
+                                *
+                                * Note, this problem is not relevant for the
+                                * case when we go right, because
+                                * 'tnc_insert()' would correct the parent key.
+                                */
+                               if (*n == (*zn)->child_cnt - 1) {
+                                       err = tnc_next(c, zn, n);
+                                       if (err) {
+                                               /* Should be impossible */
+                                               ubifs_assert(0);
+                                               if (err == -ENOENT)
+                                                       err = -EINVAL;
+                                               return err;
+                                       }
+                                       ubifs_assert(*n == 0);
+                                       *n = -1;
+                               }
+                               return 0;
+                       }
+                       err = matches_name(c, &(*zn)->zbranch[*n], nm);
+                       if (err < 0)
+                               return err;
+                       if (err == NAME_LESS)
+                               return 0;
+                       if (err == NAME_MATCHES)
+                               return 1;
+                       ubifs_assert(err == NAME_GREATER);
+               }
+       } else {
+               int nn = *n;
+               struct ubifs_znode *znode = *zn;
+
+               /* Look right */
+               while (1) {
+                       err = tnc_next(c, &znode, &nn);
+                       if (err == -ENOENT)
+                               return 0;
+                       if (err < 0)
+                               return err;
+                       if (keys_cmp(c, &znode->zbranch[nn].key, key))
+                               return 0;
+                       err = matches_name(c, &znode->zbranch[nn], nm);
+                       if (err < 0)
+                               return err;
+                       if (err == NAME_GREATER)
+                               return 0;
+                       *zn = znode;
+                       *n = nn;
+                       if (err == NAME_MATCHES)
+                               return 1;
+                       ubifs_assert(err == NAME_LESS);
+               }
+       }
+}
+
+/**
+ * fallible_matches_name - determine if a dent matches a given name.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of dent
+ * @nm: name to match
+ *
+ * This is a "fallible" version of 'matches_name()' function which does not
+ * panic if the direntry/xentry referred by @zbr does not exist on the media.
+ *
+ * This function checks if xentry/direntry referred by zbranch @zbr matches name
+ * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr
+ * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA
+ * if xentry/direntry referred by @zbr does not exist on the media. A negative
+ * error code is returned in case of failure.
+ */
+static int fallible_matches_name(struct ubifs_info *c,
+                                struct ubifs_zbranch *zbr,
+                                const struct qstr *nm)
+{
+       struct ubifs_dent_node *dent;
+       int nlen, err;
+
+       /* If possible, match against the dent in the leaf node cache */
+       if (!zbr->leaf) {
+               dent = kmalloc(zbr->len, GFP_NOFS);
+               if (!dent)
+                       return -ENOMEM;
+
+               err = fallible_read_node(c, &zbr->key, zbr, dent);
+               if (err < 0)
+                       goto out_free;
+               if (err == 0) {
+                       /* The node was not present */
+                       err = NOT_ON_MEDIA;
+                       goto out_free;
+               }
+               ubifs_assert(err == 1);
+
+               err = lnc_add_directly(c, zbr, dent);
+               if (err)
+                       goto out_free;
+       } else
+               dent = zbr->leaf;
+
+       nlen = le16_to_cpu(dent->nlen);
+       err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
+       if (err == 0) {
+               if (nlen == nm->len)
+                       return NAME_MATCHES;
+               else if (nlen < nm->len)
+                       return NAME_LESS;
+               else
+                       return NAME_GREATER;
+       } else if (err < 0)
+               return NAME_LESS;
+       else
+               return NAME_GREATER;
+
+out_free:
+       kfree(dent);
+       return err;
+}
+
+/**
+ * fallible_resolve_collision - resolve a collision even if nodes are missing.
+ * @c: UBIFS file-system description object
+ * @key: key
+ * @zn: znode is returned here
+ * @n: branch number is passed and returned here
+ * @nm: name of directory entry
+ * @adding: indicates caller is adding a key to the TNC
+ *
+ * This is a "fallible" version of the 'resolve_collision()' function which
+ * does not panic if one of the nodes referred to by TNC does not exist on the
+ * media. This may happen when replaying the journal if a deleted node was
+ * Garbage-collected and the commit was not done. A branch that refers to a node
+ * that is not present is called a dangling branch. The following are the return
+ * codes for this function:
+ *  o if @nm was found, %1 is returned and @zn and @n are set to the found
+ *    branch;
+ *  o if we are @adding and @nm was not found, %0 is returned;
+ *  o if we are not @adding and @nm was not found, but a dangling branch was
+ *    found, then %1 is returned and @zn and @n are set to the dangling branch;
+ *  o a negative error code is returned in case of failure.
+ */
+static int fallible_resolve_collision(struct ubifs_info *c,
+                                     const union ubifs_key *key,
+                                     struct ubifs_znode **zn, int *n,
+                                     const struct qstr *nm, int adding)
+{
+       struct ubifs_znode *o_znode = NULL, *znode = *zn;
+       int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n;
+
+       cmp = fallible_matches_name(c, &znode->zbranch[nn], nm);
+       if (unlikely(cmp < 0))
+               return cmp;
+       if (cmp == NAME_MATCHES)
+               return 1;
+       if (cmp == NOT_ON_MEDIA) {
+               o_znode = znode;
+               o_n = nn;
+               /*
+                * We are unlucky and hit a dangling branch straight away.
+                * Now we do not really know where to go to find the needed
+                * branch - to the left or to the right. Well, let's try left.
+                */
+               unsure = 1;
+       } else if (!adding)
+               unsure = 1; /* Remove a dangling branch wherever it is */
+
+       if (cmp == NAME_GREATER || unsure) {
+               /* Look left */
+               while (1) {
+                       err = tnc_prev(c, zn, n);
+                       if (err == -ENOENT) {
+                               ubifs_assert(*n == 0);
+                               *n = -1;
+                               break;
+                       }
+                       if (err < 0)
+                               return err;
+                       if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
+                               /* See comments in 'resolve_collision()' */
+                               if (*n == (*zn)->child_cnt - 1) {
+                                       err = tnc_next(c, zn, n);
+                                       if (err) {
+                                               /* Should be impossible */
+                                               ubifs_assert(0);
+                                               if (err == -ENOENT)
+                                                       err = -EINVAL;
+                                               return err;
+                                       }
+                                       ubifs_assert(*n == 0);
+                                       *n = -1;
+                               }
+                               break;
+                       }
+                       err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm);
+                       if (err < 0)
+                               return err;
+                       if (err == NAME_MATCHES)
+                               return 1;
+                       if (err == NOT_ON_MEDIA) {
+                               o_znode = *zn;
+                               o_n = *n;
+                               continue;
+                       }
+                       if (!adding)
+                               continue;
+                       if (err == NAME_LESS)
+                               break;
+                       else
+                               unsure = 0;
+               }
+       }
+
+       if (cmp == NAME_LESS || unsure) {
+               /* Look right */
+               *zn = znode;
+               *n = nn;
+               while (1) {
+                       err = tnc_next(c, &znode, &nn);
+                       if (err == -ENOENT)
+                               break;
+                       if (err < 0)
+                               return err;
+                       if (keys_cmp(c, &znode->zbranch[nn].key, key))
+                               break;
+                       err = fallible_matches_name(c, &znode->zbranch[nn], nm);
+                       if (err < 0)
+                               return err;
+                       if (err == NAME_GREATER)
+                               break;
+                       *zn = znode;
+                       *n = nn;
+                       if (err == NAME_MATCHES)
+                               return 1;
+                       if (err == NOT_ON_MEDIA) {
+                               o_znode = znode;
+                               o_n = nn;
+                       }
+               }
+       }
+
+       /* Never match a dangling branch when adding */
+       if (adding || !o_znode)
+               return 0;
+
+       dbg_mnt("dangling match LEB %d:%d len %d %s",
+               o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
+               o_znode->zbranch[o_n].len, DBGKEY(key));
+       *zn = o_znode;
+       *n = o_n;
+       return 1;
+}
+
+/**
+ * matches_position - determine if a zbranch matches a given position.
+ * @zbr: zbranch of dent
+ * @lnum: LEB number of dent to match
+ * @offs: offset of dent to match
+ *
+ * This function returns %1 if @lnum:@offs matches, and %0 otherwise.
+ */
+static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs)
+{
+       if (zbr->lnum == lnum && zbr->offs == offs)
+               return 1;
+       else
+               return 0;
+}
+
+/**
+ * resolve_collision_directly - resolve a collision directly.
+ * @c: UBIFS file-system description object
+ * @key: key of directory entry
+ * @zn: znode is passed and returned here
+ * @n: zbranch number is passed and returned here
+ * @lnum: LEB number of dent node to match
+ * @offs: offset of dent node to match
+ *
+ * This function is used for "hashed" keys to make sure the found directory or
+ * extended attribute entry node is what was looked for. It is used when the
+ * flash address of the right node is known (@lnum:@offs) which makes it much
+ * easier to resolve collisions (no need to read entries and match full
+ * names). This function returns %1 and sets @zn and @n if the collision is
+ * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the
+ * previous directory entry. Otherwise a negative error code is returned.
+ */
+static int resolve_collision_directly(struct ubifs_info *c,
+                                     const union ubifs_key *key,
+                                     struct ubifs_znode **zn, int *n,
+                                     int lnum, int offs)
+{
+       struct ubifs_znode *znode;
+       int nn, err;
+
+       znode = *zn;
+       nn = *n;
+       if (matches_position(&znode->zbranch[nn], lnum, offs))
+               return 1;
+
+       /* Look left */
+       while (1) {
+               err = tnc_prev(c, &znode, &nn);
+               if (err == -ENOENT)
+                       break;
+               if (err < 0)
+                       return err;
+               if (keys_cmp(c, &znode->zbranch[nn].key, key))
+                       break;
+               if (matches_position(&znode->zbranch[nn], lnum, offs)) {
+                       *zn = znode;
+                       *n = nn;
+                       return 1;
+               }
+       }
+
+       /* Look right */
+       znode = *zn;
+       nn = *n;
+       while (1) {
+               err = tnc_next(c, &znode, &nn);
+               if (err == -ENOENT)
+                       return 0;
+               if (err < 0)
+                       return err;
+               if (keys_cmp(c, &znode->zbranch[nn].key, key))
+                       return 0;
+               *zn = znode;
+               *n = nn;
+               if (matches_position(&znode->zbranch[nn], lnum, offs))
+                       return 1;
+       }
+}
+
+/**
+ * dirty_cow_bottom_up - dirty a znode and its ancestors.
+ * @c: UBIFS file-system description object
+ * @znode: znode to dirty
+ *
+ * If we do not have a unique key that resides in a znode, then we cannot
+ * dirty that znode from the top down (i.e. by using lookup_level0_dirty)
+ * This function records the path back to the last dirty ancestor, and then
+ * dirties the znodes on that path.
+ */
+static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
+                                              struct ubifs_znode *znode)
+{
+       struct ubifs_znode *zp;
+       int *path = c->bottom_up_buf, p = 0;
+
+       ubifs_assert(c->zroot.znode);
+       ubifs_assert(znode);
+       if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) {
+               kfree(c->bottom_up_buf);
+               c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int),
+                                          GFP_NOFS);
+               if (!c->bottom_up_buf)
+                       return ERR_PTR(-ENOMEM);
+               path = c->bottom_up_buf;
+       }
+       if (c->zroot.znode->level) {
+               /* Go up until parent is dirty */
+               while (1) {
+                       int n;
+
+                       zp = znode->parent;
+                       if (!zp)
+                               break;
+                       n = znode->iip;
+                       ubifs_assert(p < c->zroot.znode->level);
+                       path[p++] = n;
+                       if (!zp->cnext && ubifs_zn_dirty(znode))
+                               break;
+                       znode = zp;
+               }
+       }
+
+       /* Come back down, dirtying as we go */
+       while (1) {
+               struct ubifs_zbranch *zbr;
+
+               zp = znode->parent;
+               if (zp) {
+                       ubifs_assert(path[p - 1] >= 0);
+                       ubifs_assert(path[p - 1] < zp->child_cnt);
+                       zbr = &zp->zbranch[path[--p]];
+                       znode = dirty_cow_znode(c, zbr);
+               } else {
+                       ubifs_assert(znode == c->zroot.znode);
+                       znode = dirty_cow_znode(c, &c->zroot);
+               }
+               if (unlikely(IS_ERR(znode)) || !p)
+                       break;
+               ubifs_assert(path[p - 1] >= 0);
+               ubifs_assert(path[p - 1] < znode->child_cnt);
+               znode = znode->zbranch[path[p - 1]].znode;
+       }
+
+       return znode;
+}
+
+/**
+ * ubifs_lookup_level0 - search for zero-level znode.
+ * @c: UBIFS file-system description object
+ * @key:  key to lookup
+ * @zn: znode is returned here
+ * @n: znode branch slot number is returned here
+ *
+ * This function looks up the TNC tree and search for zero-level znode which
+ * refers key @key. The found zero-level znode is returned in @zn. There are 3
+ * cases:
+ *   o exact match, i.e. the found zero-level znode contains key @key, then %1
+ *     is returned and slot number of the matched branch is stored in @n;
+ *   o not exact match, which means that zero-level znode does not contain
+ *     @key, then %0 is returned and slot number of the closed branch is stored
+ *     in  @n;
+ *   o @key is so small that it is even less than the lowest key of the
+ *     leftmost zero-level node, then %0 is returned and %0 is stored in @n.
+ *
+ * Note, when the TNC tree is traversed, some znodes may be absent, then this
+ * function reads corresponding indexing nodes and inserts them to TNC. In
+ * case of failure, a negative error code is returned.
+ */
+int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
+                       struct ubifs_znode **zn, int *n)
+{
+       int err, exact;
+       struct ubifs_znode *znode;
+       unsigned long time = get_seconds();
+
+       dbg_tnc("search key %s", DBGKEY(key));
+
+       znode = c->zroot.znode;
+       if (unlikely(!znode)) {
+               znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
+               if (IS_ERR(znode))
+                       return PTR_ERR(znode);
+       }
+
+       znode->time = time;
+
+       while (1) {
+               struct ubifs_zbranch *zbr;
+
+               exact = ubifs_search_zbranch(c, znode, key, n);
+
+               if (znode->level == 0)
+                       break;
+
+               if (*n < 0)
+                       *n = 0;
+               zbr = &znode->zbranch[*n];
+
+               if (zbr->znode) {
+                       znode->time = time;
+                       znode = zbr->znode;
+                       continue;
+               }
+
+               /* znode is not in TNC cache, load it from the media */
+               znode = ubifs_load_znode(c, zbr, znode, *n);
+               if (IS_ERR(znode))
+                       return PTR_ERR(znode);
+       }
+
+       *zn = znode;
+       if (exact || !is_hash_key(c, key) || *n != -1) {
+               dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
+               return exact;
+       }
+
+       /*
+        * Here is a tricky place. We have not found the key and this is a
+        * "hashed" key, which may collide. The rest of the code deals with
+        * situations like this:
+        *
+        *                  | 3 | 5 |
+        *                  /       \
+        *          | 3 | 5 |      | 6 | 7 | (x)
+        *
+        * Or more a complex example:
+        *
+        *                | 1 | 5 |
+        *                /       \
+        *       | 1 | 3 |         | 5 | 8 |
+        *              \           /
+        *          | 5 | 5 |   | 6 | 7 | (x)
+        *
+        * In the examples, if we are looking for key "5", we may reach nodes
+        * marked with "(x)". In this case what we have do is to look at the
+        * left and see if there is "5" key there. If there is, we have to
+        * return it.
+        *
+        * Note, this whole situation is possible because we allow to have
+        * elements which are equivalent to the next key in the parent in the
+        * children of current znode. For example, this happens if we split a
+        * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something
+        * like this:
+        *                      | 3 | 5 |
+        *                       /     \
+        *                | 3 | 5 |   | 5 | 6 | 7 |
+        *                              ^
+        * And this becomes what is at the first "picture" after key "5" marked
+        * with "^" is removed. What could be done is we could prohibit
+        * splitting in the middle of the colliding sequence. Also, when
+        * removing the leftmost key, we would have to correct the key of the
+        * parent node, which would introduce additional complications. Namely,
+        * if we changed the the leftmost key of the parent znode, the garbage
+        * collector would be unable to find it (GC is doing this when GC'ing
+        * indexing LEBs). Although we already have an additional RB-tree where
+        * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
+        * after the commit. But anyway, this does not look easy to implement
+        * so we did not try this.
+        */
+       err = tnc_prev(c, &znode, n);
+       if (err == -ENOENT) {
+               dbg_tnc("found 0, lvl %d, n -1", znode->level);
+               *n = -1;
+               return 0;
+       }
+       if (unlikely(err < 0))
+               return err;
+       if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
+               dbg_tnc("found 0, lvl %d, n -1", znode->level);
+               *n = -1;
+               return 0;
+       }
+
+       dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
+       *zn = znode;
+       return 1;
+}
+
+/**
+ * lookup_level0_dirty - search for zero-level znode dirtying.
+ * @c: UBIFS file-system description object
+ * @key:  key to lookup
+ * @zn: znode is returned here
+ * @n: znode branch slot number is returned here
+ *
+ * This function looks up the TNC tree and search for zero-level znode which
+ * refers key @key. The found zero-level znode is returned in @zn. There are 3
+ * cases:
+ *   o exact match, i.e. the found zero-level znode contains key @key, then %1
+ *     is returned and slot number of the matched branch is stored in @n;
+ *   o not exact match, which means that zero-level znode does not contain @key
+ *     then %0 is returned and slot number of the closed branch is stored in
+ *     @n;
+ *   o @key is so small that it is even less than the lowest key of the
+ *     leftmost zero-level node, then %0 is returned and %-1 is stored in @n.
+ *
+ * Additionally all znodes in the path from the root to the located zero-level
+ * znode are marked as dirty.
+ *
+ * Note, when the TNC tree is traversed, some znodes may be absent, then this
+ * function reads corresponding indexing nodes and inserts them to TNC. In
+ * case of failure, a negative error code is returned.
+ */
+static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
+                              struct ubifs_znode **zn, int *n)
+{
+       int err, exact;
+       struct ubifs_znode *znode;
+       unsigned long time = get_seconds();
+
+       dbg_tnc("search and dirty key %s", DBGKEY(key));
+
+       znode = c->zroot.znode;
+       if (unlikely(!znode)) {
+               znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
+               if (IS_ERR(znode))
+                       return PTR_ERR(znode);
+       }
+
+       znode = dirty_cow_znode(c, &c->zroot);
+       if (IS_ERR(znode))
+               return PTR_ERR(znode);
+
+       znode->time = time;
+
+       while (1) {
+               struct ubifs_zbranch *zbr;
+
+               exact = ubifs_search_zbranch(c, znode, key, n);
+
+               if (znode->level == 0)
+                       break;
+
+               if (*n < 0)
+                       *n = 0;
+               zbr = &znode->zbranch[*n];
+
+               if (zbr->znode) {
+                       znode->time = time;
+                       znode = dirty_cow_znode(c, zbr);
+                       if (IS_ERR(znode))
+                               return PTR_ERR(znode);
+                       continue;
+               }
+
+               /* znode is not in TNC cache, load it from the media */
+               znode = ubifs_load_znode(c, zbr, znode, *n);
+               if (IS_ERR(znode))
+                       return PTR_ERR(znode);
+               znode = dirty_cow_znode(c, zbr);
+               if (IS_ERR(znode))
+                       return PTR_ERR(znode);
+       }
+
+       *zn = znode;
+       if (exact || !is_hash_key(c, key) || *n != -1) {
+               dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
+               return exact;
+       }
+
+       /*
+        * See huge comment at 'lookup_level0_dirty()' what is the rest of the
+        * code.
+        */
+       err = tnc_prev(c, &znode, n);
+       if (err == -ENOENT) {
+               *n = -1;
+               dbg_tnc("found 0, lvl %d, n -1", znode->level);
+               return 0;
+       }
+       if (unlikely(err < 0))
+               return err;
+       if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
+               *n = -1;
+               dbg_tnc("found 0, lvl %d, n -1", znode->level);
+               return 0;
+       }
+
+       if (znode->cnext || !ubifs_zn_dirty(znode)) {
+               znode = dirty_cow_bottom_up(c, znode);
+               if (IS_ERR(znode))
+                       return PTR_ERR(znode);
+       }
+
+       dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
+       *zn = znode;
+       return 1;
+}
+
+/**
+ * ubifs_tnc_lookup - look up a file-system node.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
+ *
+ * This function look up and reads node with key @key. The caller has to make
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure.
+ */
+int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
+                    void *node)
+{
+       int found, n, err;
+       struct ubifs_znode *znode;
+       struct ubifs_zbranch zbr, *zt;
+
+       mutex_lock(&c->tnc_mutex);
+       found = ubifs_lookup_level0(c, key, &znode, &n);
+       if (!found) {
+               err = -ENOENT;
+               goto out;
+       } else if (found < 0) {
+               err = found;
+               goto out;
+       }
+       zt = &znode->zbranch[n];
+       if (is_hash_key(c, key)) {
+               /*
+                * In this case the leaf node cache gets used, so we pass the
+                * address of the zbranch and keep the mutex locked
+                */
+               err = tnc_read_node_nm(c, zt, node);
+               goto out;
+       }
+       zbr = znode->zbranch[n];
+       mutex_unlock(&c->tnc_mutex);
+
+       err = ubifs_tnc_read_node(c, &zbr, node);
+       return err;
+
+out:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * ubifs_tnc_locate - look up a file-system node and return it and its location.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
+ * @lnum: LEB number is returned here
+ * @offs: offset is returned here
+ *
+ * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
+ * location also. See 'ubifs_tnc_lookup()'.
+ */
+int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
+                    void *node, int *lnum, int *offs)
+{
+       int found, n, err;
+       struct ubifs_znode *znode;
+       struct ubifs_zbranch zbr, *zt;
+
+       mutex_lock(&c->tnc_mutex);
+       found = ubifs_lookup_level0(c, key, &znode, &n);
+       if (!found) {
+               err = -ENOENT;
+               goto out;
+       } else if (found < 0) {
+               err = found;
+               goto out;
+       }
+       zt = &znode->zbranch[n];
+       if (is_hash_key(c, key)) {
+               /*
+                * In this case the leaf node cache gets used, so we pass the
+                * address of the zbranch and keep the mutex locked
+                */
+               *lnum = zt->lnum;
+               *offs = zt->offs;
+               err = tnc_read_node_nm(c, zt, node);
+               goto out;
+       }
+       zbr = znode->zbranch[n];
+       mutex_unlock(&c->tnc_mutex);
+
+       *lnum = zbr.lnum;
+       *offs = zbr.offs;
+
+       err = ubifs_tnc_read_node(c, &zbr, node);
+       return err;
+
+out:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * do_lookup_nm- look up a "hashed" node.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
+ * @nm: node name
+ *
+ * This function look up and reads a node which contains name hash in the key.
+ * Since the hash may have collisions, there may be many nodes with the same
+ * key, so we have to sequentially look to all of them until the needed one is
+ * found. This function returns zero in case of success, %-ENOENT if the node
+ * was not found, and a negative error code in case of failure.
+ */
+static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
+                       void *node, const struct qstr *nm)
+{
+       int found, n, err;
+       struct ubifs_znode *znode;
+       struct ubifs_zbranch zbr;
+
+       dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
+       mutex_lock(&c->tnc_mutex);
+       found = ubifs_lookup_level0(c, key, &znode, &n);
+       if (!found) {
+               err = -ENOENT;
+               goto out_unlock;
+       } else if (found < 0) {
+               err = found;
+               goto out_unlock;
+       }
+
+       ubifs_assert(n >= 0);
+
+       err = resolve_collision(c, key, &znode, &n, nm);
+       dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
+       if (unlikely(err < 0))
+               goto out_unlock;
+       if (err == 0) {
+               err = -ENOENT;
+               goto out_unlock;
+       }
+
+       zbr = znode->zbranch[n];
+       mutex_unlock(&c->tnc_mutex);
+
+       err = tnc_read_node_nm(c, &zbr, node);
+       return err;
+
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * ubifs_tnc_lookup_nm - look up a "hashed" node.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
+ * @nm: node name
+ *
+ * This function look up and reads a node which contains name hash in the key.
+ * Since the hash may have collisions, there may be many nodes with the same
+ * key, so we have to sequentially look to all of them until the needed one is
+ * found. This function returns zero in case of success, %-ENOENT if the node
+ * was not found, and a negative error code in case of failure.
+ */
+int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
+                       void *node, const struct qstr *nm)
+{
+       int err, len;
+       const struct ubifs_dent_node *dent = node;
+
+       /*
+        * We assume that in most of the cases there are no name collisions and
+        * 'ubifs_tnc_lookup()' returns us the right direntry.
+        */
+       err = ubifs_tnc_lookup(c, key, node);
+       if (err)
+               return err;
+
+       len = le16_to_cpu(dent->nlen);
+       if (nm->len == len && !memcmp(dent->name, nm->name, len))
+               return 0;
+
+       /*
+        * Unluckily, there are hash collisions and we have to iterate over
+        * them look at each direntry with colliding name hash sequentially.
+        */
+       return do_lookup_nm(c, key, node, nm);
+}
+
+/**
+ * correct_parent_keys - correct parent znodes' keys.
+ * @c: UBIFS file-system description object
+ * @znode: znode to correct parent znodes for
+ *
+ * This is a helper function for 'tnc_insert()'. When the key of the leftmost
+ * zbranch changes, keys of parent znodes have to be corrected. This helper
+ * function is called in such situations and corrects the keys if needed.
+ */
+static void correct_parent_keys(const struct ubifs_info *c,
+                               struct ubifs_znode *znode)
+{
+       union ubifs_key *key, *key1;
+
+       ubifs_assert(znode->parent);
+       ubifs_assert(znode->iip == 0);
+
+       key = &znode->zbranch[0].key;
+       key1 = &znode->parent->zbranch[0].key;
+
+       while (keys_cmp(c, key, key1) < 0) {
+               key_copy(c, key, key1);
+               znode = znode->parent;
+               znode->alt = 1;
+               if (!znode->parent || znode->iip)
+                       break;
+               key1 = &znode->parent->zbranch[0].key;
+       }
+}
+
+/**
+ * insert_zbranch - insert a zbranch into a znode.
+ * @znode: znode into which to insert
+ * @zbr: zbranch to insert
+ * @n: slot number to insert to
+ *
+ * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in
+ * znode's array of zbranches and keeps zbranches consolidated, so when a new
+ * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th
+ * slot, zbranches starting from @n have to be moved right.
+ */
+static void insert_zbranch(struct ubifs_znode *znode,
+                          const struct ubifs_zbranch *zbr, int n)
+{
+       int i;
+
+       ubifs_assert(ubifs_zn_dirty(znode));
+
+       if (znode->level) {
+               for (i = znode->child_cnt; i > n; i--) {
+                       znode->zbranch[i] = znode->zbranch[i - 1];
+                       if (znode->zbranch[i].znode)
+                               znode->zbranch[i].znode->iip = i;
+               }
+               if (zbr->znode)
+                       zbr->znode->iip = n;
+       } else
+               for (i = znode->child_cnt; i > n; i--)
+                       znode->zbranch[i] = znode->zbranch[i - 1];
+
+       znode->zbranch[n] = *zbr;
+       znode->child_cnt += 1;
+
+       /*
+        * After inserting at slot zero, the lower bound of the key range of
+        * this znode may have changed. If this znode is subsequently split
+        * then the upper bound of the key range may change, and furthermore
+        * it could change to be lower than the original lower bound. If that
+        * happens, then it will no longer be possible to find this znode in the
+        * TNC using the key from the index node on flash. That is bad because
+        * if it is not found, we will assume it is obsolete and may overwrite
+        * it. Then if there is an unclean unmount, we will start using the
+        * old index which will be broken.
+        *
+        * So we first mark znodes that have insertions at slot zero, and then
+        * if they are split we add their lnum/offs to the old_idx tree.
+        */
+       if (n == 0)
+               znode->alt = 1;
+}
+
+/**
+ * tnc_insert - insert a node into TNC.
+ * @c: UBIFS file-system description object
+ * @znode: znode to insert into
+ * @zbr: branch to insert
+ * @n: slot number to insert new zbranch to
+ *
+ * This function inserts a new node described by @zbr into znode @znode. If
+ * znode does not have a free slot for new zbranch, it is split. Parent znodes
+ * are splat as well if needed. Returns zero in case of success or a negative
+ * error code in case of failure.
+ */
+static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
+                     struct ubifs_zbranch *zbr, int n)
+{
+       struct ubifs_znode *zn, *zi, *zp;
+       int i, keep, move, appending = 0;
+       union ubifs_key *key = &zbr->key;
+
+       ubifs_assert(n >= 0 && n <= c->fanout);
+
+       /* Implement naive insert for now */
+again:
+       zp = znode->parent;
+       if (znode->child_cnt < c->fanout) {
+               ubifs_assert(n != c->fanout);
+               dbg_tnc("inserted at %d level %d, key %s", n, znode->level,
+                       DBGKEY(key));
+
+               insert_zbranch(znode, zbr, n);
+
+               /* Ensure parent's key is correct */
+               if (n == 0 && zp && znode->iip == 0)
+                       correct_parent_keys(c, znode);
+
+               return 0;
+       }
+
+       /*
+        * Unfortunately, @znode does not have more empty slots and we have to
+        * split it.
+        */
+       dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key));
+
+       if (znode->alt)
+               /*
+                * We can no longer be sure of finding this znode by key, so we
+                * record it in the old_idx tree.
+                */
+               ins_clr_old_idx_znode(c, znode);
+
+       zn = kzalloc(c->max_znode_sz, GFP_NOFS);
+       if (!zn)
+               return -ENOMEM;
+       zn->parent = zp;
+       zn->level = znode->level;
+
+       /* Decide where to split */
+       if (znode->level == 0 && n == c->fanout &&
+           key_type(c, key) == UBIFS_DATA_KEY) {
+               union ubifs_key *key1;
+
+               /*
+                * If this is an inode which is being appended - do not split
+                * it because no other zbranches can be inserted between
+                * zbranches of consecutive data nodes anyway.
+                */
+               key1 = &znode->zbranch[n - 1].key;
+               if (key_inum(c, key1) == key_inum(c, key) &&
+                   key_type(c, key1) == UBIFS_DATA_KEY &&
+                   key_block(c, key1) == key_block(c, key) - 1)
+                       appending = 1;
+       }
+
+       if (appending) {
+               keep = c->fanout;
+               move = 0;
+       } else {
+               keep = (c->fanout + 1) / 2;
+               move = c->fanout - keep;
+       }
+
+       /*
+        * Although we don't at present, we could look at the neighbors and see
+        * if we can move some zbranches there.
+        */
+
+       if (n < keep) {
+               /* Insert into existing znode */
+               zi = znode;
+               move += 1;
+               keep -= 1;
+       } else {
+               /* Insert into new znode */
+               zi = zn;
+               n -= keep;
+               /* Re-parent */
+               if (zn->level != 0)
+                       zbr->znode->parent = zn;
+       }
+
+       __set_bit(DIRTY_ZNODE, &zn->flags);
+       atomic_long_inc(&c->dirty_zn_cnt);
+
+       zn->child_cnt = move;
+       znode->child_cnt = keep;
+
+       dbg_tnc("moving %d, keeping %d", move, keep);
+
+       /* Move zbranch */
+       for (i = 0; i < move; i++) {
+               zn->zbranch[i] = znode->zbranch[keep + i];
+               /* Re-parent */
+               if (zn->level != 0)
+                       if (zn->zbranch[i].znode) {
+                               zn->zbranch[i].znode->parent = zn;
+                               zn->zbranch[i].znode->iip = i;
+                       }
+       }
+
+       /* Insert new key and branch */
+       dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key));
+
+       insert_zbranch(zi, zbr, n);
+
+       /* Insert new znode (produced by spitting) into the parent */
+       if (zp) {
+               i = n;
+               /* Locate insertion point */
+               n = znode->iip + 1;
+               if (appending && n != c->fanout)
+                       appending = 0;
+
+               if (i == 0 && zi == znode && znode->iip == 0)
+                       correct_parent_keys(c, znode);
+
+               /* Tail recursion */
+               zbr->key = zn->zbranch[0].key;
+               zbr->znode = zn;
+               zbr->lnum = 0;
+               zbr->offs = 0;
+               zbr->len = 0;
+               znode = zp;
+
+               goto again;
+       }
+
+       /* We have to split root znode */
+       dbg_tnc("creating new zroot at level %d", znode->level + 1);
+
+       zi = kzalloc(c->max_znode_sz, GFP_NOFS);
+       if (!zi)
+               return -ENOMEM;
+
+       zi->child_cnt = 2;
+       zi->level = znode->level + 1;
+
+       __set_bit(DIRTY_ZNODE, &zi->flags);
+       atomic_long_inc(&c->dirty_zn_cnt);
+
+       zi->zbranch[0].key = znode->zbranch[0].key;
+       zi->zbranch[0].znode = znode;
+       zi->zbranch[0].lnum = c->zroot.lnum;
+       zi->zbranch[0].offs = c->zroot.offs;
+       zi->zbranch[0].len = c->zroot.len;
+       zi->zbranch[1].key = zn->zbranch[0].key;
+       zi->zbranch[1].znode = zn;
+
+       c->zroot.lnum = 0;
+       c->zroot.offs = 0;
+       c->zroot.len = 0;
+       c->zroot.znode = zi;
+
+       zn->parent = zi;
+       zn->iip = 1;
+       znode->parent = zi;
+       znode->iip = 0;
+
+       return 0;
+}
+
+/**
+ * ubifs_tnc_add - add a node to TNC.
+ * @c: UBIFS file-system description object
+ * @key: key to add
+ * @lnum: LEB number of node
+ * @offs: node offset
+ * @len: node length
+ *
+ * This function adds a node with key @key to TNC. The node may be new or it may
+ * obsolete some existing one. Returns %0 on success or negative error code on
+ * failure.
+ */
+int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
+                 int offs, int len)
+{
+       int found, n, err = 0;
+       struct ubifs_znode *znode;
+
+       mutex_lock(&c->tnc_mutex);
+       dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key));
+       found = lookup_level0_dirty(c, key, &znode, &n);
+       if (!found) {
+               struct ubifs_zbranch zbr;
+
+               zbr.znode = NULL;
+               zbr.lnum = lnum;
+               zbr.offs = offs;
+               zbr.len = len;
+               key_copy(c, key, &zbr.key);
+               err = tnc_insert(c, znode, &zbr, n + 1);
+       } else if (found == 1) {
+               struct ubifs_zbranch *zbr = &znode->zbranch[n];
+
+               lnc_free(zbr);
+               err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
+               zbr->lnum = lnum;
+               zbr->offs = offs;
+               zbr->len = len;
+       } else
+               err = found;
+       if (!err)
+               err = dbg_check_tnc(c, 0);
+       mutex_unlock(&c->tnc_mutex);
+
+       return err;
+}
+
+/**
+ * ubifs_tnc_replace - replace a node in the TNC only if the old node is found.
+ * @c: UBIFS file-system description object
+ * @key: key to add
+ * @old_lnum: LEB number of old node
+ * @old_offs: old node offset
+ * @lnum: LEB number of node
+ * @offs: node offset
+ * @len: node length
+ *
+ * This function replaces a node with key @key in the TNC only if the old node
+ * is found.  This function is called by garbage collection when node are moved.
+ * Returns %0 on success or negative error code on failure.
+ */
+int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
+                     int old_lnum, int old_offs, int lnum, int offs, int len)
+{
+       int found, n, err = 0;
+       struct ubifs_znode *znode;
+
+       mutex_lock(&c->tnc_mutex);
+       dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum,
+               old_offs, lnum, offs, len, DBGKEY(key));
+       found = lookup_level0_dirty(c, key, &znode, &n);
+       if (found < 0) {
+               err = found;
+               goto out_unlock;
+       }
+
+       if (found == 1) {
+               struct ubifs_zbranch *zbr = &znode->zbranch[n];
+
+               found = 0;
+               if (zbr->lnum == old_lnum && zbr->offs == old_offs) {
+                       lnc_free(zbr);
+                       err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
+                       if (err)
+                               goto out_unlock;
+                       zbr->lnum = lnum;
+                       zbr->offs = offs;
+                       zbr->len = len;
+                       found = 1;
+               } else if (is_hash_key(c, key)) {
+                       found = resolve_collision_directly(c, key, &znode, &n,
+                                                          old_lnum, old_offs);
+                       dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d",
+                               found, znode, n, old_lnum, old_offs);
+                       if (found < 0) {
+                               err = found;
+                               goto out_unlock;
+                       }
+
+                       if (found) {
+                               /* Ensure the znode is dirtied */
+                               if (znode->cnext || !ubifs_zn_dirty(znode)) {
+                                           znode = dirty_cow_bottom_up(c,
+                                                                       znode);
+                                           if (IS_ERR(znode)) {
+                                                   err = PTR_ERR(znode);
+                                                   goto out_unlock;
+                                           }
+                               }
+                               zbr = &znode->zbranch[n];
+                               lnc_free(zbr);
+                               err = ubifs_add_dirt(c, zbr->lnum,
+                                                    zbr->len);
+                               if (err)
+                                       goto out_unlock;
+                               zbr->lnum = lnum;
+                               zbr->offs = offs;
+                               zbr->len = len;
+                       }
+               }
+       }
+
+       if (!found)
+               err = ubifs_add_dirt(c, lnum, len);
+
+       if (!err)
+               err = dbg_check_tnc(c, 0);
+
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * ubifs_tnc_add_nm - add a "hashed" node to TNC.
+ * @c: UBIFS file-system description object
+ * @key: key to add
+ * @lnum: LEB number of node
+ * @offs: node offset
+ * @len: node length
+ * @nm: node name
+ *
+ * This is the same as 'ubifs_tnc_add()' but it should be used with keys which
+ * may have collisions, like directory entry keys.
+ */
+int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
+                    int lnum, int offs, int len, const struct qstr *nm)
+{
+       int found, n, err = 0;
+       struct ubifs_znode *znode;
+
+       mutex_lock(&c->tnc_mutex);
+       dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name,
+               DBGKEY(key));
+       found = lookup_level0_dirty(c, key, &znode, &n);
+       if (found < 0) {
+               err = found;
+               goto out_unlock;
+       }
+
+       if (found == 1) {
+               if (c->replaying)
+                       found = fallible_resolve_collision(c, key, &znode, &n,
+                                                          nm, 1);
+               else
+                       found = resolve_collision(c, key, &znode, &n, nm);
+               dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n);
+               if (found < 0) {
+                       err = found;
+                       goto out_unlock;
+               }
+
+               /* Ensure the znode is dirtied */
+               if (znode->cnext || !ubifs_zn_dirty(znode)) {
+                           znode = dirty_cow_bottom_up(c, znode);
+                           if (IS_ERR(znode)) {
+                                   err = PTR_ERR(znode);
+                                   goto out_unlock;
+                           }
+               }
+
+               if (found == 1) {
+                       struct ubifs_zbranch *zbr = &znode->zbranch[n];
+
+                       lnc_free(zbr);
+                       err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
+                       zbr->lnum = lnum;
+                       zbr->offs = offs;
+                       zbr->len = len;
+                       goto out_unlock;
+               }
+       }
+
+       if (!found) {
+               struct ubifs_zbranch zbr;
+
+               zbr.znode = NULL;
+               zbr.lnum = lnum;
+               zbr.offs = offs;
+               zbr.len = len;
+               key_copy(c, key, &zbr.key);
+               err = tnc_insert(c, znode, &zbr, n + 1);
+               if (err)
+                       goto out_unlock;
+               if (c->replaying) {
+                       /*
+                        * We did not find it in the index so there may be a
+                        * dangling branch still in the index. So we remove it
+                        * by passing 'ubifs_tnc_remove_nm()' the same key but
+                        * an unmatchable name.
+                        */
+                       struct qstr noname = { .len = 0, .name = "" };
+
+                       err = dbg_check_tnc(c, 0);
+                       mutex_unlock(&c->tnc_mutex);
+                       if (err)
+                               return err;
+                       return ubifs_tnc_remove_nm(c, key, &noname);
+               }
+       }
+
+out_unlock:
+       if (!err)
+               err = dbg_check_tnc(c, 0);
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * tnc_delete - delete a znode form TNC.
+ * @c: UBIFS file-system description object
+ * @znode: znode to delete from
+ * @n: zbranch slot number to delete
+ *
+ * This function deletes a leaf node from @n-th slot of @znode. Returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
+{
+       struct ubifs_zbranch *zbr;
+       struct ubifs_znode *zp;
+       int i, err;
+
+       /* Delete without merge for now */
+       ubifs_assert(znode->level == 0);
+       ubifs_assert(n >= 0 && n < c->fanout);
+       dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key));
+
+       zbr = &znode->zbranch[n];
+       lnc_free(zbr);
+
+       err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
+       if (err) {
+               dbg_dump_znode(c, znode);
+               return err;
+       }
+
+       /* We do not "gap" zbranch slots */
+       for (i = n; i < znode->child_cnt - 1; i++)
+               znode->zbranch[i] = znode->zbranch[i + 1];
+       znode->child_cnt -= 1;
+
+       if (znode->child_cnt > 0)
+               return 0;
+
+       /*
+        * This was the last zbranch, we have to delete this znode from the
+        * parent.
+        */
+
+       do {
+               ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+               ubifs_assert(ubifs_zn_dirty(znode));
+
+               zp = znode->parent;
+               n = znode->iip;
+
+               atomic_long_dec(&c->dirty_zn_cnt);
+
+               err = insert_old_idx_znode(c, znode);
+               if (err)
+                       return err;
+
+               if (znode->cnext) {
+                       __set_bit(OBSOLETE_ZNODE, &znode->flags);
+                       atomic_long_inc(&c->clean_zn_cnt);
+                       atomic_long_inc(&ubifs_clean_zn_cnt);
+               } else
+                       kfree(znode);
+               znode = zp;
+       } while (znode->child_cnt == 1); /* while removing last child */
+
+       /* Remove from znode, entry n - 1 */
+       znode->child_cnt -= 1;
+       ubifs_assert(znode->level != 0);
+       for (i = n; i < znode->child_cnt; i++) {
+               znode->zbranch[i] = znode->zbranch[i + 1];
+               if (znode->zbranch[i].znode)
+                       znode->zbranch[i].znode->iip = i;
+       }
+
+       /*
+        * If this is the root and it has only 1 child then
+        * collapse the tree.
+        */
+       if (!znode->parent) {
+               while (znode->child_cnt == 1 && znode->level != 0) {
+                       zp = znode;
+                       zbr = &znode->zbranch[0];
+                       znode = get_znode(c, znode, 0);
+                       if (IS_ERR(znode))
+                               return PTR_ERR(znode);
+                       znode = dirty_cow_znode(c, zbr);
+                       if (IS_ERR(znode))
+                               return PTR_ERR(znode);
+                       znode->parent = NULL;
+                       znode->iip = 0;
+                       if (c->zroot.len) {
+                               err = insert_old_idx(c, c->zroot.lnum,
+                                                    c->zroot.offs);
+                               if (err)
+                                       return err;
+                       }
+                       c->zroot.lnum = zbr->lnum;
+                       c->zroot.offs = zbr->offs;
+                       c->zroot.len = zbr->len;
+                       c->zroot.znode = znode;
+                       ubifs_assert(!test_bit(OBSOLETE_ZNODE,
+                                    &zp->flags));
+                       ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
+                       atomic_long_dec(&c->dirty_zn_cnt);
+
+                       if (zp->cnext) {
+                               __set_bit(OBSOLETE_ZNODE, &zp->flags);
+                               atomic_long_inc(&c->clean_zn_cnt);
+                               atomic_long_inc(&ubifs_clean_zn_cnt);
+                       } else
+                               kfree(zp);
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * ubifs_tnc_remove - remove an index entry of a node.
+ * @c: UBIFS file-system description object
+ * @key: key of node
+ *
+ * Returns %0 on success or negative error code on failure.
+ */
+int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key)
+{
+       int found, n, err = 0;
+       struct ubifs_znode *znode;
+
+       mutex_lock(&c->tnc_mutex);
+       dbg_tnc("key %s", DBGKEY(key));
+       found = lookup_level0_dirty(c, key, &znode, &n);
+       if (found < 0) {
+               err = found;
+               goto out_unlock;
+       }
+       if (found == 1)
+               err = tnc_delete(c, znode, n);
+       if (!err)
+               err = dbg_check_tnc(c, 0);
+
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node.
+ * @c: UBIFS file-system description object
+ * @key: key of node
+ * @nm: directory entry name
+ *
+ * Returns %0 on success or negative error code on failure.
+ */
+int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
+                       const struct qstr *nm)
+{
+       int n, err;
+       struct ubifs_znode *znode;
+
+       mutex_lock(&c->tnc_mutex);
+       dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key));
+       err = lookup_level0_dirty(c, key, &znode, &n);
+       if (err < 0)
+               goto out_unlock;
+
+       if (err) {
+               if (c->replaying)
+                       err = fallible_resolve_collision(c, key, &znode, &n,
+                                                        nm, 0);
+               else
+                       err = resolve_collision(c, key, &znode, &n, nm);
+               dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
+               if (err < 0)
+                       goto out_unlock;
+               if (err) {
+                       /* Ensure the znode is dirtied */
+                       if (znode->cnext || !ubifs_zn_dirty(znode)) {
+                                   znode = dirty_cow_bottom_up(c, znode);
+                                   if (IS_ERR(znode)) {
+                                           err = PTR_ERR(znode);
+                                           goto out_unlock;
+                                   }
+                       }
+                       err = tnc_delete(c, znode, n);
+               }
+       }
+
+out_unlock:
+       if (!err)
+               err = dbg_check_tnc(c, 0);
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * key_in_range - determine if a key falls within a range of keys.
+ * @c: UBIFS file-system description object
+ * @key: key to check
+ * @from_key: lowest key in range
+ * @to_key: highest key in range
+ *
+ * This function returns %1 if the key is in range and %0 otherwise.
+ */
+static int key_in_range(struct ubifs_info *c, union ubifs_key *key,
+                       union ubifs_key *from_key, union ubifs_key *to_key)
+{
+       if (keys_cmp(c, key, from_key) < 0)
+               return 0;
+       if (keys_cmp(c, key, to_key) > 0)
+               return 0;
+       return 1;
+}
+
+/**
+ * ubifs_tnc_remove_range - remove index entries in range.
+ * @c: UBIFS file-system description object
+ * @from_key: lowest key to remove
+ * @to_key: highest key to remove
+ *
+ * This function removes index entries starting at @from_key and ending at
+ * @to_key.  This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
+                          union ubifs_key *to_key)
+{
+       int i, n, k, err = 0;
+       struct ubifs_znode *znode;
+       union ubifs_key *key;
+
+       mutex_lock(&c->tnc_mutex);
+       while (1) {
+               /* Find first level 0 znode that contains keys to remove */
+               err = ubifs_lookup_level0(c, from_key, &znode, &n);
+               if (err < 0)
+                       goto out_unlock;
+
+               if (err)
+                       key = from_key;
+               else {
+                       err = tnc_next(c, &znode, &n);
+                       if (err == -ENOENT) {
+                               err = 0;
+                               goto out_unlock;
+                       }
+                       if (err < 0)
+                               goto out_unlock;
+                       key = &znode->zbranch[n].key;
+                       if (!key_in_range(c, key, from_key, to_key)) {
+                               err = 0;
+                               goto out_unlock;
+                       }
+               }
+
+               /* Ensure the znode is dirtied */
+               if (znode->cnext || !ubifs_zn_dirty(znode)) {
+                           znode = dirty_cow_bottom_up(c, znode);
+                           if (IS_ERR(znode)) {
+                                   err = PTR_ERR(znode);
+                                   goto out_unlock;
+                           }
+               }
+
+               /* Remove all keys in range except the first */
+               for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) {
+                       key = &znode->zbranch[i].key;
+                       if (!key_in_range(c, key, from_key, to_key))
+                               break;
+                       lnc_free(&znode->zbranch[i]);
+                       err = ubifs_add_dirt(c, znode->zbranch[i].lnum,
+                                            znode->zbranch[i].len);
+                       if (err) {
+                               dbg_dump_znode(c, znode);
+                               goto out_unlock;
+                       }
+                       dbg_tnc("removing %s", DBGKEY(key));
+               }
+               if (k) {
+                       for (i = n + 1 + k; i < znode->child_cnt; i++)
+                               znode->zbranch[i - k] = znode->zbranch[i];
+                       znode->child_cnt -= k;
+               }
+
+               /* Now delete the first */
+               err = tnc_delete(c, znode, n);
+               if (err)
+                       goto out_unlock;
+       }
+
+out_unlock:
+       if (!err)
+               err = dbg_check_tnc(c, 0);
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * ubifs_tnc_remove_ino - remove an inode from TNC.
+ * @c: UBIFS file-system description object
+ * @inum: inode number to remove
+ *
+ * This function remove inode @inum and all the extended attributes associated
+ * with the anode from TNC and returns zero in case of success or a negative
+ * error code in case of failure.
+ */
+int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
+{
+       union ubifs_key key1, key2;
+       struct ubifs_dent_node *xent, *pxent = NULL;
+       struct qstr nm = { .name = NULL };
+
+       dbg_tnc("ino %lu", inum);
+
+       /*
+        * Walk all extended attribute entries and remove them together with
+        * corresponding extended attribute inodes.
+        */
+       lowest_xent_key(c, &key1, inum);
+       while (1) {
+               ino_t xattr_inum;
+               int err;
+
+               xent = ubifs_tnc_next_ent(c, &key1, &nm);
+               if (IS_ERR(xent)) {
+                       err = PTR_ERR(xent);
+                       if (err == -ENOENT)
+                               break;
+                       return err;
+               }
+
+               xattr_inum = le64_to_cpu(xent->inum);
+               dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum);
+
+               nm.name = xent->name;
+               nm.len = le16_to_cpu(xent->nlen);
+               err = ubifs_tnc_remove_nm(c, &key1, &nm);
+               if (err) {
+                       kfree(xent);
+                       return err;
+               }
+
+               lowest_ino_key(c, &key1, xattr_inum);
+               highest_ino_key(c, &key2, xattr_inum);
+               err = ubifs_tnc_remove_range(c, &key1, &key2);
+               if (err) {
+                       kfree(xent);
+                       return err;
+               }
+
+               kfree(pxent);
+               pxent = xent;
+               key_read(c, &xent->key, &key1);
+       }
+
+       kfree(pxent);
+       lowest_ino_key(c, &key1, inum);
+       highest_ino_key(c, &key2, inum);
+
+       return ubifs_tnc_remove_range(c, &key1, &key2);
+}
+
+/**
+ * ubifs_tnc_next_ent - walk directory or extended attribute entries.
+ * @c: UBIFS file-system description object
+ * @key: key of last entry
+ * @nm: name of last entry found or %NULL
+ *
+ * This function finds and reads the next directory or extended attribute entry
+ * after the given key (@key) if there is one. @nm is used to resolve
+ * collisions.
+ *
+ * If the name of the current entry is not known and only the key is known,
+ * @nm->name has to be %NULL. In this case the semantics of this function is a
+ * little bit different and it returns the entry corresponding to this key, not
+ * the next one. If the key was not found, the closest "right" entry is
+ * returned.
+ *
+ * If the fist entry has to be found, @key has to contain the lowest possible
+ * key value for this inode and @name has to be %NULL.
+ *
+ * This function returns the found directory or extended attribute entry node
+ * in case of success, %-ENOENT is returned if no entry was found, and a
+ * negative error code is returned in case of failure.
+ */
+struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
+                                          union ubifs_key *key,
+                                          const struct qstr *nm)
+{
+       int n, err, type = key_type(c, key);
+       struct ubifs_znode *znode;
+       struct ubifs_dent_node *dent;
+       struct ubifs_zbranch *zbr;
+       union ubifs_key *dkey;
+
+       dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key));
+       ubifs_assert(is_hash_key(c, key));
+
+       mutex_lock(&c->tnc_mutex);
+       err = ubifs_lookup_level0(c, key, &znode, &n);
+       if (unlikely(err < 0))
+               goto out_unlock;
+
+       if (nm->name) {
+               if (err) {
+                       /* Handle collisions */
+                       err = resolve_collision(c, key, &znode, &n, nm);
+                       dbg_tnc("rc returned %d, znode %p, n %d",
+                               err, znode, n);
+                       if (unlikely(err < 0))
+                               goto out_unlock;
+               }
+
+               /* Now find next entry */
+               err = tnc_next(c, &znode, &n);
+               if (unlikely(err))
+                       goto out_unlock;
+       } else {
+               /*
+                * The full name of the entry was not given, in which case the
+                * behavior of this function is a little different and it
+                * returns current entry, not the next one.
+                */
+               if (!err) {
+                       /*
+                        * However, the given key does not exist in the TNC
+                        * tree and @znode/@n variables contain the closest
+                        * "preceding" element. Switch to the next one.
+                        */
+                       err = tnc_next(c, &znode, &n);
+                       if (err)
+                               goto out_unlock;
+               }
+       }
+
+       zbr = &znode->zbranch[n];
+       dent = kmalloc(zbr->len, GFP_NOFS);
+       if (unlikely(!dent)) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       /*
+        * The above 'tnc_next()' call could lead us to the next inode, check
+        * this.
+        */
+       dkey = &zbr->key;
+       if (key_inum(c, dkey) != key_inum(c, key) ||
+           key_type(c, dkey) != type) {
+               err = -ENOENT;
+               goto out_free;
+       }
+
+       err = tnc_read_node_nm(c, zbr, dent);
+       if (unlikely(err))
+               goto out_free;
+
+       mutex_unlock(&c->tnc_mutex);
+       return dent;
+
+out_free:
+       kfree(dent);
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return ERR_PTR(err);
+}
+
+/**
+ * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit.
+ * @c: UBIFS file-system description object
+ *
+ * Destroy left-over obsolete znodes from a failed commit.
+ */
+static void tnc_destroy_cnext(struct ubifs_info *c)
+{
+       struct ubifs_znode *cnext;
+
+       if (!c->cnext)
+               return;
+       ubifs_assert(c->cmt_state == COMMIT_BROKEN);
+       cnext = c->cnext;
+       do {
+               struct ubifs_znode *znode = cnext;
+
+               cnext = cnext->cnext;
+               if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+                       kfree(znode);
+       } while (cnext && cnext != c->cnext);
+}
+
+/**
+ * ubifs_tnc_close - close TNC subsystem and free all related resources.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_tnc_close(struct ubifs_info *c)
+{
+       long clean_freed;
+
+       tnc_destroy_cnext(c);
+       if (c->zroot.znode) {
+               clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
+               atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt);
+       }
+       kfree(c->gap_lebs);
+       kfree(c->ilebs);
+       destroy_old_idx(c);
+}
+
+/**
+ * left_znode - get the znode to the left.
+ * @c: UBIFS file-system description object
+ * @znode: znode
+ *
+ * This function returns a pointer to the znode to the left of @znode or NULL if
+ * there is not one. A negative error code is returned on failure.
+ */
+static struct ubifs_znode *left_znode(struct ubifs_info *c,
+                                     struct ubifs_znode *znode)
+{
+       int level = znode->level;
+
+       while (1) {
+               int n = znode->iip - 1;
+
+               /* Go up until we can go left */
+               znode = znode->parent;
+               if (!znode)
+                       return NULL;
+               if (n >= 0) {
+                       /* Now go down the rightmost branch to 'level' */
+                       znode = get_znode(c, znode, n);
+                       if (IS_ERR(znode))
+                               return znode;
+                       while (znode->level != level) {
+                               n = znode->child_cnt - 1;
+                               znode = get_znode(c, znode, n);
+                               if (IS_ERR(znode))
+                                       return znode;
+                       }
+                       break;
+               }
+       }
+       return znode;
+}
+
+/**
+ * right_znode - get the znode to the right.
+ * @c: UBIFS file-system description object
+ * @znode: znode
+ *
+ * This function returns a pointer to the znode to the right of @znode or NULL
+ * if there is not one. A negative error code is returned on failure.
+ */
+static struct ubifs_znode *right_znode(struct ubifs_info *c,
+                                      struct ubifs_znode *znode)
+{
+       int level = znode->level;
+
+       while (1) {
+               int n = znode->iip + 1;
+
+               /* Go up until we can go right */
+               znode = znode->parent;
+               if (!znode)
+                       return NULL;
+               if (n < znode->child_cnt) {
+                       /* Now go down the leftmost branch to 'level' */
+                       znode = get_znode(c, znode, n);
+                       if (IS_ERR(znode))
+                               return znode;
+                       while (znode->level != level) {
+                               znode = get_znode(c, znode, 0);
+                               if (IS_ERR(znode))
+                                       return znode;
+                       }
+                       break;
+               }
+       }
+       return znode;
+}
+
+/**
+ * lookup_znode - find a particular indexing node from TNC.
+ * @c: UBIFS file-system description object
+ * @key: index node key to lookup
+ * @level: index node level
+ * @lnum: index node LEB number
+ * @offs: index node offset
+ *
+ * This function searches an indexing node by its first key @key and its
+ * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
+ * nodes it traverses to TNC. This function is called fro indexing nodes which
+ * were found on the media by scanning, for example when garbage-collecting or
+ * when doing in-the-gaps commit. This means that the indexing node which is
+ * looked for does not have to have exactly the same leftmost key @key, because
+ * the leftmost key may have been changed, in which case TNC will contain a
+ * dirty znode which still refers the same @lnum:@offs. This function is clever
+ * enough to recognize such indexing nodes.
+ *
+ * Note, if a znode was deleted or changed too much, then this function will
+ * not find it. For situations like this UBIFS has the old index RB-tree
+ * (indexed by @lnum:@offs).
+ *
+ * This function returns a pointer to the znode found or %NULL if it is not
+ * found. A negative error code is returned on failure.
+ */
+static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
+                                       union ubifs_key *key, int level,
+                                       int lnum, int offs)
+{
+       struct ubifs_znode *znode, *zn;
+       int n, nn;
+
+       /*
+        * The arguments have probably been read off flash, so don't assume
+        * they are valid.
+        */
+       if (level < 0)
+               return ERR_PTR(-EINVAL);
+
+       /* Get the root znode */
+       znode = c->zroot.znode;
+       if (!znode) {
+               znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
+               if (IS_ERR(znode))
+                       return znode;
+       }
+       /* Check if it is the one we are looking for */
+       if (c->zroot.lnum == lnum && c->zroot.offs == offs)
+               return znode;
+       /* Descend to the parent level i.e. (level + 1) */
+       if (level >= znode->level)
+               return NULL;
+       while (1) {
+               ubifs_search_zbranch(c, znode, key, &n);
+               if (n < 0) {
+                       /*
+                        * We reached a znode where the leftmost key is greater
+                        * than the key we are searching for. This is the same
+                        * situation as the one described in a huge comment at
+                        * the end of the 'ubifs_lookup_level0()' function. And
+                        * for exactly the same reasons we have to try to look
+                        * left before giving up.
+                        */
+                       znode = left_znode(c, znode);
+                       if (!znode)
+                               return NULL;
+                       if (IS_ERR(znode))
+                               return znode;
+                       ubifs_search_zbranch(c, znode, key, &n);
+                       ubifs_assert(n >= 0);
+               }
+               if (znode->level == level + 1)
+                       break;
+               znode = get_znode(c, znode, n);
+               if (IS_ERR(znode))
+                       return znode;
+       }
+       /* Check if the child is the one we are looking for */
+       if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs)
+               return get_znode(c, znode, n);
+       /* If the key is unique, there is nowhere else to look */
+       if (!is_hash_key(c, key))
+               return NULL;
+       /*
+        * The key is not unique and so may be also in the znodes to either
+        * side.
+        */
+       zn = znode;
+       nn = n;
+       /* Look left */
+       while (1) {
+               /* Move one branch to the left */
+               if (n)
+                       n -= 1;
+               else {
+                       znode = left_znode(c, znode);
+                       if (!znode)
+                               break;
+                       if (IS_ERR(znode))
+                               return znode;
+                       n = znode->child_cnt - 1;
+               }
+               /* Check it */
+               if (znode->zbranch[n].lnum == lnum &&
+                   znode->zbranch[n].offs == offs)
+                       return get_znode(c, znode, n);
+               /* Stop if the key is less than the one we are looking for */
+               if (keys_cmp(c, &znode->zbranch[n].key, key) < 0)
+                       break;
+       }
+       /* Back to the middle */
+       znode = zn;
+       n = nn;
+       /* Look right */
+       while (1) {
+               /* Move one branch to the right */
+               if (++n >= znode->child_cnt) {
+                       znode = right_znode(c, znode);
+                       if (!znode)
+                               break;
+                       if (IS_ERR(znode))
+                               return znode;
+                       n = 0;
+               }
+               /* Check it */
+               if (znode->zbranch[n].lnum == lnum &&
+                   znode->zbranch[n].offs == offs)
+                       return get_znode(c, znode, n);
+               /* Stop if the key is greater than the one we are looking for */
+               if (keys_cmp(c, &znode->zbranch[n].key, key) > 0)
+                       break;
+       }
+       return NULL;
+}
+
+/**
+ * is_idx_node_in_tnc - determine if an index node is in the TNC.
+ * @c: UBIFS file-system description object
+ * @key: key of index node
+ * @level: index node level
+ * @lnum: LEB number of index node
+ * @offs: offset of index node
+ *
+ * This function returns %0 if the index node is not referred to in the TNC, %1
+ * if the index node is referred to in the TNC and the corresponding znode is
+ * dirty, %2 if an index node is referred to in the TNC and the corresponding
+ * znode is clean, and a negative error code in case of failure.
+ *
+ * Note, the @key argument has to be the key of the first child. Also note,
+ * this function relies on the fact that 0:0 is never a valid LEB number and
+ * offset for a main-area node.
+ */
+int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
+                      int lnum, int offs)
+{
+       struct ubifs_znode *znode;
+
+       znode = lookup_znode(c, key, level, lnum, offs);
+       if (!znode)
+               return 0;
+       if (IS_ERR(znode))
+               return PTR_ERR(znode);
+
+       return ubifs_zn_dirty(znode) ? 1 : 2;
+}
+
+/**
+ * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC.
+ * @c: UBIFS file-system description object
+ * @key: node key
+ * @lnum: node LEB number
+ * @offs: node offset
+ *
+ * This function returns %1 if the node is referred to in the TNC, %0 if it is
+ * not, and a negative error code in case of failure.
+ *
+ * Note, this function relies on the fact that 0:0 is never a valid LEB number
+ * and offset for a main-area node.
+ */
+static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key,
+                              int lnum, int offs)
+{
+       struct ubifs_zbranch *zbr;
+       struct ubifs_znode *znode, *zn;
+       int n, found, err, nn;
+       const int unique = !is_hash_key(c, key);
+
+       found = ubifs_lookup_level0(c, key, &znode, &n);
+       if (found < 0)
+               return found; /* Error code */
+       if (!found)
+               return 0;
+       zbr = &znode->zbranch[n];
+       if (lnum == zbr->lnum && offs == zbr->offs)
+               return 1; /* Found it */
+       if (unique)
+               return 0;
+       /*
+        * Because the key is not unique, we have to look left
+        * and right as well
+        */
+       zn = znode;
+       nn = n;
+       /* Look left */
+       while (1) {
+               err = tnc_prev(c, &znode, &n);
+               if (err == -ENOENT)
+                       break;
+               if (err)
+                       return err;
+               if (keys_cmp(c, key, &znode->zbranch[n].key))
+                       break;
+               zbr = &znode->zbranch[n];
+               if (lnum == zbr->lnum && offs == zbr->offs)
+                       return 1; /* Found it */
+       }
+       /* Look right */
+       znode = zn;
+       n = nn;
+       while (1) {
+               err = tnc_next(c, &znode, &n);
+               if (err) {
+                       if (err == -ENOENT)
+                               return 0;
+                       return err;
+               }
+               if (keys_cmp(c, key, &znode->zbranch[n].key))
+                       break;
+               zbr = &znode->zbranch[n];
+               if (lnum == zbr->lnum && offs == zbr->offs)
+                       return 1; /* Found it */
+       }
+       return 0;
+}
+
+/**
+ * ubifs_tnc_has_node - determine whether a node is in the TNC.
+ * @c: UBIFS file-system description object
+ * @key: node key
+ * @level: index node level (if it is an index node)
+ * @lnum: node LEB number
+ * @offs: node offset
+ * @is_idx: non-zero if the node is an index node
+ *
+ * This function returns %1 if the node is in the TNC, %0 if it is not, and a
+ * negative error code in case of failure. For index nodes, @key has to be the
+ * key of the first child. An index node is considered to be in the TNC only if
+ * the corresponding znode is clean or has not been loaded.
+ */
+int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
+                      int lnum, int offs, int is_idx)
+{
+       int err;
+
+       mutex_lock(&c->tnc_mutex);
+       if (is_idx) {
+               err = is_idx_node_in_tnc(c, key, level, lnum, offs);
+               if (err < 0)
+                       goto out_unlock;
+               if (err == 1)
+                       /* The index node was found but it was dirty */
+                       err = 0;
+               else if (err == 2)
+                       /* The index node was found and it was clean */
+                       err = 1;
+               else
+                       BUG_ON(err != 0);
+       } else
+               err = is_leaf_node_in_tnc(c, key, lnum, offs);
+
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * ubifs_dirty_idx_node - dirty an index node.
+ * @c: UBIFS file-system description object
+ * @key: index node key
+ * @level: index node level
+ * @lnum: index node LEB number
+ * @offs: index node offset
+ *
+ * This function loads and dirties an index node so that it can be garbage
+ * collected. The @key argument has to be the key of the first child. This
+ * function relies on the fact that 0:0 is never a valid LEB number and offset
+ * for a main-area node. Returns %0 on success and a negative error code on
+ * failure.
+ */
+int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
+                        int lnum, int offs)
+{
+       struct ubifs_znode *znode;
+       int err = 0;
+
+       mutex_lock(&c->tnc_mutex);
+       znode = lookup_znode(c, key, level, lnum, offs);
+       if (!znode)
+               goto out_unlock;
+       if (IS_ERR(znode)) {
+               err = PTR_ERR(znode);
+               goto out_unlock;
+       }
+       znode = dirty_cow_bottom_up(c, znode);
+       if (IS_ERR(znode)) {
+               err = PTR_ERR(znode);
+               goto out_unlock;
+       }
+
+out_unlock:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c

new file mode 100644 (file)

index 0000000..8117e65
--- /dev/null
+++ b/fs/ubifs/tnc_commit.c
@@ -0,0 +1,1103 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/* This file implements TNC functions for committing */
+
+#include "ubifs.h"
+
+/**
+ * make_idx_node - make an index node for fill-the-gaps method of TNC commit.
+ * @c: UBIFS file-system description object
+ * @idx: buffer in which to place new index node
+ * @znode: znode from which to make new index node
+ * @lnum: LEB number where new index node will be written
+ * @offs: offset where new index node will be written
+ * @len: length of new index node
+ */
+static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
+                        struct ubifs_znode *znode, int lnum, int offs, int len)
+{
+       struct ubifs_znode *zp;
+       int i, err;
+
+       /* Make index node */
+       idx->ch.node_type = UBIFS_IDX_NODE;
+       idx->child_cnt = cpu_to_le16(znode->child_cnt);
+       idx->level = cpu_to_le16(znode->level);
+       for (i = 0; i < znode->child_cnt; i++) {
+               struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
+               struct ubifs_zbranch *zbr = &znode->zbranch[i];
+
+               key_write_idx(c, &zbr->key, &br->key);
+               br->lnum = cpu_to_le32(zbr->lnum);
+               br->offs = cpu_to_le32(zbr->offs);
+               br->len = cpu_to_le32(zbr->len);
+               if (!zbr->lnum || !zbr->len) {
+                       ubifs_err("bad ref in znode");
+                       dbg_dump_znode(c, znode);
+                       if (zbr->znode)
+                               dbg_dump_znode(c, zbr->znode);
+               }
+       }
+       ubifs_prepare_node(c, idx, len, 0);
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       znode->lnum = lnum;
+       znode->offs = offs;
+       znode->len = len;
+#endif
+
+       err = insert_old_idx_znode(c, znode);
+
+       /* Update the parent */
+       zp = znode->parent;
+       if (zp) {
+               struct ubifs_zbranch *zbr;
+
+               zbr = &zp->zbranch[znode->iip];
+               zbr->lnum = lnum;
+               zbr->offs = offs;
+               zbr->len = len;
+       } else {
+               c->zroot.lnum = lnum;
+               c->zroot.offs = offs;
+               c->zroot.len = len;
+       }
+       c->calc_idx_sz += ALIGN(len, 8);
+
+       atomic_long_dec(&c->dirty_zn_cnt);
+
+       ubifs_assert(ubifs_zn_dirty(znode));
+       ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+
+       __clear_bit(DIRTY_ZNODE, &znode->flags);
+       __clear_bit(COW_ZNODE, &znode->flags);
+
+       return err;
+}
+
+/**
+ * fill_gap - make index nodes in gaps in dirty index LEBs.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number that gap appears in
+ * @gap_start: offset of start of gap
+ * @gap_end: offset of end of gap
+ * @dirt: adds dirty space to this
+ *
+ * This function returns the number of index nodes written into the gap.
+ */
+static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
+                   int *dirt)
+{
+       int len, gap_remains, gap_pos, written, pad_len;
+
+       ubifs_assert((gap_start & 7) == 0);
+       ubifs_assert((gap_end & 7) == 0);
+       ubifs_assert(gap_end >= gap_start);
+
+       gap_remains = gap_end - gap_start;
+       if (!gap_remains)
+               return 0;
+       gap_pos = gap_start;
+       written = 0;
+       while (c->enext) {
+               len = ubifs_idx_node_sz(c, c->enext->child_cnt);
+               if (len < gap_remains) {
+                       struct ubifs_znode *znode = c->enext;
+                       const int alen = ALIGN(len, 8);
+                       int err;
+
+                       ubifs_assert(alen <= gap_remains);
+                       err = make_idx_node(c, c->ileb_buf + gap_pos, znode,
+                                           lnum, gap_pos, len);
+                       if (err)
+                               return err;
+                       gap_remains -= alen;
+                       gap_pos += alen;
+                       c->enext = znode->cnext;
+                       if (c->enext == c->cnext)
+                               c->enext = NULL;
+                       written += 1;
+               } else
+                       break;
+       }
+       if (gap_end == c->leb_size) {
+               c->ileb_len = ALIGN(gap_pos, c->min_io_size);
+               /* Pad to end of min_io_size */
+               pad_len = c->ileb_len - gap_pos;
+       } else
+               /* Pad to end of gap */
+               pad_len = gap_remains;
+       dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d",
+              lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len);
+       ubifs_pad(c, c->ileb_buf + gap_pos, pad_len);
+       *dirt += pad_len;
+       return written;
+}
+
+/**
+ * find_old_idx - find an index node obsoleted since the last commit start.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of obsoleted index node
+ * @offs: offset of obsoleted index node
+ *
+ * Returns %1 if found and %0 otherwise.
+ */
+static int find_old_idx(struct ubifs_info *c, int lnum, int offs)
+{
+       struct ubifs_old_idx *o;
+       struct rb_node *p;
+
+       p = c->old_idx.rb_node;
+       while (p) {
+               o = rb_entry(p, struct ubifs_old_idx, rb);
+               if (lnum < o->lnum)
+                       p = p->rb_left;
+               else if (lnum > o->lnum)
+                       p = p->rb_right;
+               else if (offs < o->offs)
+                       p = p->rb_left;
+               else if (offs > o->offs)
+                       p = p->rb_right;
+               else
+                       return 1;
+       }
+       return 0;
+}
+
+/**
+ * is_idx_node_in_use - determine if an index node can be overwritten.
+ * @c: UBIFS file-system description object
+ * @key: key of index node
+ * @level: index node level
+ * @lnum: LEB number of index node
+ * @offs: offset of index node
+ *
+ * If @key / @lnum / @offs identify an index node that was not part of the old
+ * index, then this function returns %0 (obsolete).  Else if the index node was
+ * part of the old index but is now dirty %1 is returned, else if it is clean %2
+ * is returned. A negative error code is returned on failure.
+ */
+static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+                             int level, int lnum, int offs)
+{
+       int ret;
+
+       ret = is_idx_node_in_tnc(c, key, level, lnum, offs);
+       if (ret < 0)
+               return ret; /* Error code */
+       if (ret == 0)
+               if (find_old_idx(c, lnum, offs))
+                       return 1;
+       return ret;
+}
+
+/**
+ * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
+ * @c: UBIFS file-system description object
+ * @p: return LEB number here
+ *
+ * This function lays out new index nodes for dirty znodes using in-the-gaps
+ * method of TNC commit.
+ * This function merely puts the next znode into the next gap, making no attempt
+ * to try to maximise the number of znodes that fit.
+ * This function returns the number of index nodes written into the gaps, or a
+ * negative error code on failure.
+ */
+static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
+{
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+       int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written;
+
+       tot_written = 0;
+       /* Get an index LEB with lots of obsolete index nodes */
+       lnum = ubifs_find_dirty_idx_leb(c);
+       if (lnum < 0)
+               /*
+                * There also may be dirt in the index head that could be
+                * filled, however we do not check there at present.
+                */
+               return lnum; /* Error code */
+       *p = lnum;
+       dbg_gc("LEB %d", lnum);
+       /*
+        * Scan the index LEB.  We use the generic scan for this even though
+        * it is more comprehensive and less efficient than is needed for this
+        * purpose.
+        */
+       sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
+       c->ileb_len = 0;
+       if (IS_ERR(sleb))
+               return PTR_ERR(sleb);
+       gap_start = 0;
+       list_for_each_entry(snod, &sleb->nodes, list) {
+               struct ubifs_idx_node *idx;
+               int in_use, level;
+
+               ubifs_assert(snod->type == UBIFS_IDX_NODE);
+               idx = snod->node;
+               key_read(c, ubifs_idx_key(c, idx), &snod->key);
+               level = le16_to_cpu(idx->level);
+               /* Determine if the index node is in use (not obsolete) */
+               in_use = is_idx_node_in_use(c, &snod->key, level, lnum,
+                                           snod->offs);
+               if (in_use < 0) {
+                       ubifs_scan_destroy(sleb);
+                       return in_use; /* Error code */
+               }
+               if (in_use) {
+                       if (in_use == 1)
+                               dirt += ALIGN(snod->len, 8);
+                       /*
+                        * The obsolete index nodes form gaps that can be
+                        * overwritten.  This gap has ended because we have
+                        * found an index node that is still in use
+                        * i.e. not obsolete
+                        */
+                       gap_end = snod->offs;
+                       /* Try to fill gap */
+                       written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
+                       if (written < 0) {
+                               ubifs_scan_destroy(sleb);
+                               return written; /* Error code */
+                       }
+                       tot_written += written;
+                       gap_start = ALIGN(snod->offs + snod->len, 8);
+               }
+       }
+       ubifs_scan_destroy(sleb);
+       c->ileb_len = c->leb_size;
+       gap_end = c->leb_size;
+       /* Try to fill gap */
+       written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
+       if (written < 0)
+               return written; /* Error code */
+       tot_written += written;
+       if (tot_written == 0) {
+               struct ubifs_lprops lp;
+
+               dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
+               err = ubifs_read_one_lp(c, lnum, &lp);
+               if (err)
+                       return err;
+               if (lp.free == c->leb_size) {
+                       /*
+                        * We must have snatched this LEB from the idx_gc list
+                        * so we need to correct the free and dirty space.
+                        */
+                       err = ubifs_change_one_lp(c, lnum,
+                                                 c->leb_size - c->ileb_len,
+                                                 dirt, 0, 0, 0);
+                       if (err)
+                               return err;
+               }
+               return 0;
+       }
+       err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt,
+                                 0, 0, 0);
+       if (err)
+               return err;
+       err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len,
+                              UBI_SHORTTERM);
+       if (err)
+               return err;
+       dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
+       return tot_written;
+}
+
+/**
+ * get_leb_cnt - calculate the number of empty LEBs needed to commit.
+ * @c: UBIFS file-system description object
+ * @cnt: number of znodes to commit
+ *
+ * This function returns the number of empty LEBs needed to commit @cnt znodes
+ * to the current index head.  The number is not exact and may be more than
+ * needed.
+ */
+static int get_leb_cnt(struct ubifs_info *c, int cnt)
+{
+       int d;
+
+       /* Assume maximum index node size (i.e. overestimate space needed) */
+       cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz;
+       if (cnt < 0)
+               cnt = 0;
+       d = c->leb_size / c->max_idx_node_sz;
+       return DIV_ROUND_UP(cnt, d);
+}
+
+/**
+ * layout_in_gaps - in-the-gaps method of committing TNC.
+ * @c: UBIFS file-system description object
+ * @cnt: number of dirty znodes to commit.
+ *
+ * This function lays out new index nodes for dirty znodes using in-the-gaps
+ * method of TNC commit.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int layout_in_gaps(struct ubifs_info *c, int cnt)
+{
+       int err, leb_needed_cnt, written, *p;
+
+       dbg_gc("%d znodes to write", cnt);
+
+       c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS);
+       if (!c->gap_lebs)
+               return -ENOMEM;
+
+       p = c->gap_lebs;
+       do {
+               ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
+               written = layout_leb_in_gaps(c, p);
+               if (written < 0) {
+                       err = written;
+                       if (err == -ENOSPC) {
+                               if (!dbg_force_in_the_gaps_enabled) {
+                                       /*
+                                        * Do not print scary warnings if the
+                                        * debugging option which forces
+                                        * in-the-gaps is enabled.
+                                        */
+                                       ubifs_err("out of space");
+                                       spin_lock(&c->space_lock);
+                                       dbg_dump_budg(c);
+                                       spin_unlock(&c->space_lock);
+                                       dbg_dump_lprops(c);
+                               }
+                               /* Try to commit anyway */
+                               err = 0;
+                               break;
+                       }
+                       kfree(c->gap_lebs);
+                       c->gap_lebs = NULL;
+                       return err;
+               }
+               p++;
+               cnt -= written;
+               leb_needed_cnt = get_leb_cnt(c, cnt);
+               dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
+                      leb_needed_cnt, c->ileb_cnt);
+       } while (leb_needed_cnt > c->ileb_cnt);
+
+       *p = -1;
+       return 0;
+}
+
+/**
+ * layout_in_empty_space - layout index nodes in empty space.
+ * @c: UBIFS file-system description object
+ *
+ * This function lays out new index nodes for dirty znodes using empty LEBs.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int layout_in_empty_space(struct ubifs_info *c)
+{
+       struct ubifs_znode *znode, *cnext, *zp;
+       int lnum, offs, len, next_len, buf_len, buf_offs, used, avail;
+       int wlen, blen, err;
+
+       cnext = c->enext;
+       if (!cnext)
+               return 0;
+
+       lnum = c->ihead_lnum;
+       buf_offs = c->ihead_offs;
+
+       buf_len = ubifs_idx_node_sz(c, c->fanout);
+       buf_len = ALIGN(buf_len, c->min_io_size);
+       used = 0;
+       avail = buf_len;
+
+       /* Ensure there is enough room for first write */
+       next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
+       if (buf_offs + next_len > c->leb_size)
+               lnum = -1;
+
+       while (1) {
+               znode = cnext;
+
+               len = ubifs_idx_node_sz(c, znode->child_cnt);
+
+               /* Determine the index node position */
+               if (lnum == -1) {
+                       if (c->ileb_nxt >= c->ileb_cnt) {
+                               ubifs_err("out of space");
+                               return -ENOSPC;
+                       }
+                       lnum = c->ilebs[c->ileb_nxt++];
+                       buf_offs = 0;
+                       used = 0;
+                       avail = buf_len;
+               }
+
+               offs = buf_offs + used;
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+               znode->lnum = lnum;
+               znode->offs = offs;
+               znode->len = len;
+#endif
+
+               /* Update the parent */
+               zp = znode->parent;
+               if (zp) {
+                       struct ubifs_zbranch *zbr;
+                       int i;
+
+                       i = znode->iip;
+                       zbr = &zp->zbranch[i];
+                       zbr->lnum = lnum;
+                       zbr->offs = offs;
+                       zbr->len = len;
+               } else {
+                       c->zroot.lnum = lnum;
+                       c->zroot.offs = offs;
+                       c->zroot.len = len;
+               }
+               c->calc_idx_sz += ALIGN(len, 8);
+
+               /*
+                * Once lprops is updated, we can decrease the dirty znode count
+                * but it is easier to just do it here.
+                */
+               atomic_long_dec(&c->dirty_zn_cnt);
+
+               /*
+                * Calculate the next index node length to see if there is
+                * enough room for it
+                */
+               cnext = znode->cnext;
+               if (cnext == c->cnext)
+                       next_len = 0;
+               else
+                       next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
+
+               if (c->min_io_size == 1) {
+                       buf_offs += ALIGN(len, 8);
+                       if (next_len) {
+                               if (buf_offs + next_len <= c->leb_size)
+                                       continue;
+                               err = ubifs_update_one_lp(c, lnum, 0,
+                                               c->leb_size - buf_offs, 0, 0);
+                               if (err)
+                                       return err;
+                               lnum = -1;
+                               continue;
+                       }
+                       err = ubifs_update_one_lp(c, lnum,
+                                       c->leb_size - buf_offs, 0, 0, 0);
+                       if (err)
+                               return err;
+                       break;
+               }
+
+               /* Update buffer positions */
+               wlen = used + len;
+               used += ALIGN(len, 8);
+               avail -= ALIGN(len, 8);
+
+               if (next_len != 0 &&
+                   buf_offs + used + next_len <= c->leb_size &&
+                   avail > 0)
+                       continue;
+
+               if (avail <= 0 && next_len &&
+                   buf_offs + used + next_len <= c->leb_size)
+                       blen = buf_len;
+               else
+                       blen = ALIGN(wlen, c->min_io_size);
+
+               /* The buffer is full or there are no more znodes to do */
+               buf_offs += blen;
+               if (next_len) {
+                       if (buf_offs + next_len > c->leb_size) {
+                               err = ubifs_update_one_lp(c, lnum,
+                                       c->leb_size - buf_offs, blen - used,
+                                       0, 0);
+                               if (err)
+                                       return err;
+                               lnum = -1;
+                       }
+                       used -= blen;
+                       if (used < 0)
+                               used = 0;
+                       avail = buf_len - used;
+                       continue;
+               }
+               err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs,
+                                         blen - used, 0, 0);
+               if (err)
+                       return err;
+               break;
+       }
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       c->new_ihead_lnum = lnum;
+       c->new_ihead_offs = buf_offs;
+#endif
+
+       return 0;
+}
+
+/**
+ * layout_commit - determine positions of index nodes to commit.
+ * @c: UBIFS file-system description object
+ * @no_space: indicates that insufficient empty LEBs were allocated
+ * @cnt: number of znodes to commit
+ *
+ * Calculate and update the positions of index nodes to commit.  If there were
+ * an insufficient number of empty LEBs allocated, then index nodes are placed
+ * into the gaps created by obsolete index nodes in non-empty index LEBs.  For
+ * this purpose, an obsolete index node is one that was not in the index as at
+ * the end of the last commit.  To write "in-the-gaps" requires that those index
+ * LEBs are updated atomically in-place.
+ */
+static int layout_commit(struct ubifs_info *c, int no_space, int cnt)
+{
+       int err;
+
+       if (no_space) {
+               err = layout_in_gaps(c, cnt);
+               if (err)
+                       return err;
+       }
+       err = layout_in_empty_space(c);
+       return err;
+}
+
+/**
+ * find_first_dirty - find first dirty znode.
+ * @znode: znode to begin searching from
+ */
+static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode)
+{
+       int i, cont;
+
+       if (!znode)
+               return NULL;
+
+       while (1) {
+               if (znode->level == 0) {
+                       if (ubifs_zn_dirty(znode))
+                               return znode;
+                       return NULL;
+               }
+               cont = 0;
+               for (i = 0; i < znode->child_cnt; i++) {
+                       struct ubifs_zbranch *zbr = &znode->zbranch[i];
+
+                       if (zbr->znode && ubifs_zn_dirty(zbr->znode)) {
+                               znode = zbr->znode;
+                               cont = 1;
+                               break;
+                       }
+               }
+               if (!cont) {
+                       if (ubifs_zn_dirty(znode))
+                               return znode;
+                       return NULL;
+               }
+       }
+}
+
+/**
+ * find_next_dirty - find next dirty znode.
+ * @znode: znode to begin searching from
+ */
+static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode)
+{
+       int n = znode->iip + 1;
+
+       znode = znode->parent;
+       if (!znode)
+               return NULL;
+       for (; n < znode->child_cnt; n++) {
+               struct ubifs_zbranch *zbr = &znode->zbranch[n];
+
+               if (zbr->znode && ubifs_zn_dirty(zbr->znode))
+                       return find_first_dirty(zbr->znode);
+       }
+       return znode;
+}
+
+/**
+ * get_znodes_to_commit - create list of dirty znodes to commit.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the number of znodes to commit.
+ */
+static int get_znodes_to_commit(struct ubifs_info *c)
+{
+       struct ubifs_znode *znode, *cnext;
+       int cnt = 0;
+
+       c->cnext = find_first_dirty(c->zroot.znode);
+       znode = c->enext = c->cnext;
+       if (!znode) {
+               dbg_cmt("no znodes to commit");
+               return 0;
+       }
+       cnt += 1;
+       while (1) {
+               ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
+               __set_bit(COW_ZNODE, &znode->flags);
+               znode->alt = 0;
+               cnext = find_next_dirty(znode);
+               if (!cnext) {
+                       znode->cnext = c->cnext;
+                       break;
+               }
+               znode->cnext = cnext;
+               znode = cnext;
+               cnt += 1;
+       }
+       dbg_cmt("committing %d znodes", cnt);
+       ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt));
+       return cnt;
+}
+
+/**
+ * alloc_idx_lebs - allocate empty LEBs to be used to commit.
+ * @c: UBIFS file-system description object
+ * @cnt: number of znodes to commit
+ *
+ * This function returns %-ENOSPC if it cannot allocate a sufficient number of
+ * empty LEBs.  %0 is returned on success, otherwise a negative error code
+ * is returned.
+ */
+static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
+{
+       int i, leb_cnt, lnum;
+
+       c->ileb_cnt = 0;
+       c->ileb_nxt = 0;
+       leb_cnt = get_leb_cnt(c, cnt);
+       dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt);
+       if (!leb_cnt)
+               return 0;
+       c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS);
+       if (!c->ilebs)
+               return -ENOMEM;
+       for (i = 0; i < leb_cnt; i++) {
+               lnum = ubifs_find_free_leb_for_idx(c);
+               if (lnum < 0)
+                       return lnum;
+               c->ilebs[c->ileb_cnt++] = lnum;
+               dbg_cmt("LEB %d", lnum);
+       }
+       if (dbg_force_in_the_gaps())
+               return -ENOSPC;
+       return 0;
+}
+
+/**
+ * free_unused_idx_lebs - free unused LEBs that were allocated for the commit.
+ * @c: UBIFS file-system description object
+ *
+ * It is possible that we allocate more empty LEBs for the commit than we need.
+ * This functions frees the surplus.
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int free_unused_idx_lebs(struct ubifs_info *c)
+{
+       int i, err = 0, lnum, er;
+
+       for (i = c->ileb_nxt; i < c->ileb_cnt; i++) {
+               lnum = c->ilebs[i];
+               dbg_cmt("LEB %d", lnum);
+               er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+                                        LPROPS_INDEX | LPROPS_TAKEN, 0);
+               if (!err)
+                       err = er;
+       }
+       return err;
+}
+
+/**
+ * free_idx_lebs - free unused LEBs after commit end.
+ * @c: UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int free_idx_lebs(struct ubifs_info *c)
+{
+       int err;
+
+       err = free_unused_idx_lebs(c);
+       kfree(c->ilebs);
+       c->ilebs = NULL;
+       return err;
+}
+
+/**
+ * ubifs_tnc_start_commit - start TNC commit.
+ * @c: UBIFS file-system description object
+ * @zroot: new index root position is returned here
+ *
+ * This function prepares the list of indexing nodes to commit and lays out
+ * their positions on flash. If there is not enough free space it uses the
+ * in-gap commit method. Returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
+{
+       int err = 0, cnt;
+
+       mutex_lock(&c->tnc_mutex);
+       err = dbg_check_tnc(c, 1);
+       if (err)
+               goto out;
+       cnt = get_znodes_to_commit(c);
+       if (cnt != 0) {
+               int no_space = 0;
+
+               err = alloc_idx_lebs(c, cnt);
+               if (err == -ENOSPC)
+                       no_space = 1;
+               else if (err)
+                       goto out_free;
+               err = layout_commit(c, no_space, cnt);
+               if (err)
+                       goto out_free;
+               ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
+               err = free_unused_idx_lebs(c);
+               if (err)
+                       goto out;
+       }
+       destroy_old_idx(c);
+       memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch));
+
+       err = ubifs_save_dirty_idx_lnums(c);
+       if (err)
+               goto out;
+
+       spin_lock(&c->space_lock);
+       /*
+        * Although we have not finished committing yet, update size of the
+        * committed index ('c->old_idx_sz') and zero out the index growth
+        * budget. It is OK to do this now, because we've reserved all the
+        * space which is needed to commit the index, and it is save for the
+        * budgeting subsystem to assume the index is already committed,
+        * even though it is not.
+        */
+       c->old_idx_sz = c->calc_idx_sz;
+       c->budg_uncommitted_idx = 0;
+       spin_unlock(&c->space_lock);
+       mutex_unlock(&c->tnc_mutex);
+
+       dbg_cmt("number of index LEBs %d", c->lst.idx_lebs);
+       dbg_cmt("size of index %llu", c->calc_idx_sz);
+       return err;
+
+out_free:
+       free_idx_lebs(c);
+out:
+       mutex_unlock(&c->tnc_mutex);
+       return err;
+}
+
+/**
+ * write_index - write index nodes.
+ * @c: UBIFS file-system description object
+ *
+ * This function writes the index nodes whose positions were laid out in the
+ * layout_in_empty_space function.
+ */
+static int write_index(struct ubifs_info *c)
+{
+       struct ubifs_idx_node *idx;
+       struct ubifs_znode *znode, *cnext;
+       int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
+       int avail, wlen, err, lnum_pos = 0;
+
+       cnext = c->enext;
+       if (!cnext)
+               return 0;
+
+       /*
+        * Always write index nodes to the index head so that index nodes and
+        * other types of nodes are never mixed in the same erase block.
+        */
+       lnum = c->ihead_lnum;
+       buf_offs = c->ihead_offs;
+
+       /* Allocate commit buffer */
+       buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size);
+       used = 0;
+       avail = buf_len;
+
+       /* Ensure there is enough room for first write */
+       next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
+       if (buf_offs + next_len > c->leb_size) {
+               err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0,
+                                         LPROPS_TAKEN);
+               if (err)
+                       return err;
+               lnum = -1;
+       }
+
+       while (1) {
+               cond_resched();
+
+               znode = cnext;
+               idx = c->cbuf + used;
+
+               /* Make index node */
+               idx->ch.node_type = UBIFS_IDX_NODE;
+               idx->child_cnt = cpu_to_le16(znode->child_cnt);
+               idx->level = cpu_to_le16(znode->level);
+               for (i = 0; i < znode->child_cnt; i++) {
+                       struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
+                       struct ubifs_zbranch *zbr = &znode->zbranch[i];
+
+                       key_write_idx(c, &zbr->key, &br->key);
+                       br->lnum = cpu_to_le32(zbr->lnum);
+                       br->offs = cpu_to_le32(zbr->offs);
+                       br->len = cpu_to_le32(zbr->len);
+                       if (!zbr->lnum || !zbr->len) {
+                               ubifs_err("bad ref in znode");
+                               dbg_dump_znode(c, znode);
+                               if (zbr->znode)
+                                       dbg_dump_znode(c, zbr->znode);
+                       }
+               }
+               len = ubifs_idx_node_sz(c, znode->child_cnt);
+               ubifs_prepare_node(c, idx, len, 0);
+
+               /* Determine the index node position */
+               if (lnum == -1) {
+                       lnum = c->ilebs[lnum_pos++];
+                       buf_offs = 0;
+                       used = 0;
+                       avail = buf_len;
+               }
+               offs = buf_offs + used;
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+               if (lnum != znode->lnum || offs != znode->offs ||
+                   len != znode->len) {
+                       ubifs_err("inconsistent znode posn");
+                       return -EINVAL;
+               }
+#endif
+
+               /* Grab some stuff from znode while we still can */
+               cnext = znode->cnext;
+
+               ubifs_assert(ubifs_zn_dirty(znode));
+               ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+
+               /*
+                * It is important that other threads should see %DIRTY_ZNODE
+                * flag cleared before %COW_ZNODE. Specifically, it matters in
+                * the 'dirty_cow_znode()' function. This is the reason for the
+                * first barrier. Also, we want the bit changes to be seen to
+                * other threads ASAP, to avoid unnecesarry copying, which is
+                * the reason for the second barrier.
+                */
+               clear_bit(DIRTY_ZNODE, &znode->flags);
+               smp_mb__before_clear_bit();
+               clear_bit(COW_ZNODE, &znode->flags);
+               smp_mb__after_clear_bit();
+
+               /* Do not access znode from this point on */
+
+               /* Update buffer positions */
+               wlen = used + len;
+               used += ALIGN(len, 8);
+               avail -= ALIGN(len, 8);
+
+               /*
+                * Calculate the next index node length to see if there is
+                * enough room for it
+                */
+               if (cnext == c->cnext)
+                       next_len = 0;
+               else
+                       next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
+
+               if (c->min_io_size == 1) {
+                       /*
+                        * Write the prepared index node immediately if there is
+                        * no minimum IO size
+                        */
+                       err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
+                                             wlen, UBI_SHORTTERM);
+                       if (err)
+                               return err;
+                       buf_offs += ALIGN(wlen, 8);
+                       if (next_len) {
+                               used = 0;
+                               avail = buf_len;
+                               if (buf_offs + next_len > c->leb_size) {
+                                       err = ubifs_update_one_lp(c, lnum,
+                                               LPROPS_NC, 0, 0, LPROPS_TAKEN);
+                                       if (err)
+                                               return err;
+                                       lnum = -1;
+                               }
+                               continue;
+                       }
+               } else {
+                       int blen, nxt_offs = buf_offs + used + next_len;
+
+                       if (next_len && nxt_offs <= c->leb_size) {
+                               if (avail > 0)
+                                       continue;
+                               else
+                                       blen = buf_len;
+                       } else {
+                               wlen = ALIGN(wlen, 8);
+                               blen = ALIGN(wlen, c->min_io_size);
+                               ubifs_pad(c, c->cbuf + wlen, blen - wlen);
+                       }
+                       /*
+                        * The buffer is full or there are no more znodes
+                        * to do
+                        */
+                       err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
+                                             blen, UBI_SHORTTERM);
+                       if (err)
+                               return err;
+                       buf_offs += blen;
+                       if (next_len) {
+                               if (nxt_offs > c->leb_size) {
+                                       err = ubifs_update_one_lp(c, lnum,
+                                               LPROPS_NC, 0, 0, LPROPS_TAKEN);
+                                       if (err)
+                                               return err;
+                                       lnum = -1;
+                               }
+                               used -= blen;
+                               if (used < 0)
+                                       used = 0;
+                               avail = buf_len - used;
+                               memmove(c->cbuf, c->cbuf + blen, used);
+                               continue;
+                       }
+               }
+               break;
+       }
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) {
+               ubifs_err("inconsistent ihead");
+               return -EINVAL;
+       }
+#endif
+
+       c->ihead_lnum = lnum;
+       c->ihead_offs = buf_offs;
+
+       return 0;
+}
+
+/**
+ * free_obsolete_znodes - free obsolete znodes.
+ * @c: UBIFS file-system description object
+ *
+ * At the end of commit end, obsolete znodes are freed.
+ */
+static void free_obsolete_znodes(struct ubifs_info *c)
+{
+       struct ubifs_znode *znode, *cnext;
+
+       cnext = c->cnext;
+       do {
+               znode = cnext;
+               cnext = znode->cnext;
+               if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+                       kfree(znode);
+               else {
+                       znode->cnext = NULL;
+                       atomic_long_inc(&c->clean_zn_cnt);
+                       atomic_long_inc(&ubifs_clean_zn_cnt);
+               }
+       } while (cnext != c->cnext);
+}
+
+/**
+ * return_gap_lebs - return LEBs used by the in-gap commit method.
+ * @c: UBIFS file-system description object
+ *
+ * This function clears the "taken" flag for the LEBs which were used by the
+ * "commit in-the-gaps" method.
+ */
+static int return_gap_lebs(struct ubifs_info *c)
+{
+       int *p, err;
+
+       if (!c->gap_lebs)
+               return 0;
+
+       dbg_cmt("");
+       for (p = c->gap_lebs; *p != -1; p++) {
+               err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0,
+                                         LPROPS_TAKEN, 0);
+               if (err)
+                       return err;
+       }
+
+       kfree(c->gap_lebs);
+       c->gap_lebs = NULL;
+       return 0;
+}
+
+/**
+ * ubifs_tnc_end_commit - update the TNC for commit end.
+ * @c: UBIFS file-system description object
+ *
+ * Write the dirty znodes.
+ */
+int ubifs_tnc_end_commit(struct ubifs_info *c)
+{
+       int err;
+
+       if (!c->cnext)
+               return 0;
+
+       err = return_gap_lebs(c);
+       if (err)
+               return err;
+
+       err = write_index(c);
+       if (err)
+               return err;
+
+       mutex_lock(&c->tnc_mutex);
+
+       dbg_cmt("TNC height is %d", c->zroot.znode->level + 1);
+
+       free_obsolete_znodes(c);
+
+       c->cnext = NULL;
+       kfree(c->ilebs);
+       c->ilebs = NULL;
+
+       mutex_unlock(&c->tnc_mutex);
+
+       return 0;
+}
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c

new file mode 100644 (file)

index 0000000..a25c1cc
--- /dev/null
+++ b/fs/ubifs/tnc_misc.c
@@ -0,0 +1,494 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ *          Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file contains miscelanious TNC-related functions shared betweend
+ * different files. This file does not form any logically separate TNC
+ * sub-system. The file was created because there is a lot of TNC code and
+ * putting it all in one file would make that file too big and unreadable.
+ */
+
+#include "ubifs.h"
+
+/**
+ * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal.
+ * @zr: root of the subtree to traverse
+ * @znode: previous znode
+ *
+ * This function implements levelorder TNC traversal. The LNC is ignored.
+ * Returns the next element or %NULL if @znode is already the last one.
+ */
+struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
+                                             struct ubifs_znode *znode)
+{
+       int level, iip, level_search = 0;
+       struct ubifs_znode *zn;
+
+       ubifs_assert(zr);
+
+       if (unlikely(!znode))
+               return zr;
+
+       if (unlikely(znode == zr)) {
+               if (znode->level == 0)
+                       return NULL;
+               return ubifs_tnc_find_child(zr, 0);
+       }
+
+       level = znode->level;
+
+       iip = znode->iip;
+       while (1) {
+               ubifs_assert(znode->level <= zr->level);
+
+               /*
+                * First walk up until there is a znode with next branch to
+                * look at.
+                */
+               while (znode->parent != zr && iip >= znode->parent->child_cnt) {
+                       znode = znode->parent;
+                       iip = znode->iip;
+               }
+
+               if (unlikely(znode->parent == zr &&
+                            iip >= znode->parent->child_cnt)) {
+                       /* This level is done, switch to the lower one */
+                       level -= 1;
+                       if (level_search || level < 0)
+                               /*
+                                * We were already looking for znode at lower
+                                * level ('level_search'). As we are here
+                                * again, it just does not exist. Or all levels
+                                * were finished ('level < 0').
+                                */
+                               return NULL;
+
+                       level_search = 1;
+                       iip = -1;
+                       znode = ubifs_tnc_find_child(zr, 0);
+                       ubifs_assert(znode);
+               }
+
+               /* Switch to the next index */
+               zn = ubifs_tnc_find_child(znode->parent, iip + 1);
+               if (!zn) {
+                       /* No more children to look at, we have walk up */
+                       iip = znode->parent->child_cnt;
+                       continue;
+               }
+
+               /* Walk back down to the level we came from ('level') */
+               while (zn->level != level) {
+                       znode = zn;
+                       zn = ubifs_tnc_find_child(zn, 0);
+                       if (!zn) {
+                               /*
+                                * This path is not too deep so it does not
+                                * reach 'level'. Try next path.
+                                */
+                               iip = znode->iip;
+                               break;
+                       }
+               }
+
+               if (zn) {
+                       ubifs_assert(zn->level >= 0);
+                       return zn;
+               }
+       }
+}
+
+/**
+ * ubifs_search_zbranch - search znode branch.
+ * @c: UBIFS file-system description object
+ * @znode: znode to search in
+ * @key: key to search for
+ * @n: znode branch slot number is returned here
+ *
+ * This is a helper function which search branch with key @key in @znode using
+ * binary search. The result of the search may be:
+ *   o exact match, then %1 is returned, and the slot number of the branch is
+ *     stored in @n;
+ *   o no exact match, then %0 is returned and the slot number of the left
+ *     closest branch is returned in @n; the slot if all keys in this znode are
+ *     greater than @key, then %-1 is returned in @n.
+ */
+int ubifs_search_zbranch(const struct ubifs_info *c,
+                        const struct ubifs_znode *znode,
+                        const union ubifs_key *key, int *n)
+{
+       int beg = 0, end = znode->child_cnt, uninitialized_var(mid);
+       int uninitialized_var(cmp);
+       const struct ubifs_zbranch *zbr = &znode->zbranch[0];
+
+       ubifs_assert(end > beg);
+
+       while (end > beg) {
+               mid = (beg + end) >> 1;
+               cmp = keys_cmp(c, key, &zbr[mid].key);
+               if (cmp > 0)
+                       beg = mid + 1;
+               else if (cmp < 0)
+                       end = mid;
+               else {
+                       *n = mid;
+                       return 1;
+               }
+       }
+
+       *n = end - 1;
+
+       /* The insert point is after *n */
+       ubifs_assert(*n >= -1 && *n < znode->child_cnt);
+       if (*n == -1)
+               ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0);
+       else
+               ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0);
+       if (*n + 1 < znode->child_cnt)
+               ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0);
+
+       return 0;
+}
+
+/**
+ * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal.
+ * @znode: znode to start at (root of the sub-tree to traverse)
+ *
+ * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is
+ * ignored.
+ */
+struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode)
+{
+       if (unlikely(!znode))
+               return NULL;
+
+       while (znode->level > 0) {
+               struct ubifs_znode *child;
+
+               child = ubifs_tnc_find_child(znode, 0);
+               if (!child)
+                       return znode;
+               znode = child;
+       }
+
+       return znode;
+}
+
+/**
+ * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal.
+ * @znode: previous znode
+ *
+ * This function implements postorder TNC traversal. The LNC is ignored.
+ * Returns the next element or %NULL if @znode is already the last one.
+ */
+struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode)
+{
+       struct ubifs_znode *zn;
+
+       ubifs_assert(znode);
+       if (unlikely(!znode->parent))
+               return NULL;
+
+       /* Switch to the next index in the parent */
+       zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1);
+       if (!zn)
+               /* This is in fact the last child, return parent */
+               return znode->parent;
+
+       /* Go to the first znode in this new subtree */
+       return ubifs_tnc_postorder_first(zn);
+}
+
+/**
+ * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree.
+ * @znode: znode defining subtree to destroy
+ *
+ * This function destroys subtree of the TNC tree. Returns number of clean
+ * znodes in the subtree.
+ */
+long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode)
+{
+       struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode);
+       long clean_freed = 0;
+       int n;
+
+       ubifs_assert(zn);
+       while (1) {
+               for (n = 0; n < zn->child_cnt; n++) {
+                       if (!zn->zbranch[n].znode)
+                               continue;
+
+                       if (zn->level > 0 &&
+                           !ubifs_zn_dirty(zn->zbranch[n].znode))
+                               clean_freed += 1;
+
+                       cond_resched();
+                       kfree(zn->zbranch[n].znode);
+               }
+
+               if (zn == znode) {
+                       if (!ubifs_zn_dirty(zn))
+                               clean_freed += 1;
+                       kfree(zn);
+                       return clean_freed;
+               }
+
+               zn = ubifs_tnc_postorder_next(zn);
+       }
+}
+
+/**
+ * read_znode - read an indexing node from flash and fill znode.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB of the indexing node to read
+ * @offs: node offset
+ * @len: node length
+ * @znode: znode to read to
+ *
+ * This function reads an indexing node from the flash media and fills znode
+ * with the read data. Returns zero in case of success and a negative error
+ * code in case of failure. The read indexing node is validated and if anything
+ * is wrong with it, this function prints complaint messages and returns
+ * %-EINVAL.
+ */
+static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
+                     struct ubifs_znode *znode)
+{
+       int i, err, type, cmp;
+       struct ubifs_idx_node *idx;
+
+       idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
+       if (!idx)
+               return -ENOMEM;
+
+       err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
+       if (err < 0) {
+               kfree(idx);
+               return err;
+       }
+
+       znode->child_cnt = le16_to_cpu(idx->child_cnt);
+       znode->level = le16_to_cpu(idx->level);
+
+       dbg_tnc("LEB %d:%d, level %d, %d branch",
+               lnum, offs, znode->level, znode->child_cnt);
+
+       if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
+               dbg_err("current fanout %d, branch count %d",
+                       c->fanout, znode->child_cnt);
+               dbg_err("max levels %d, znode level %d",
+                       UBIFS_MAX_LEVELS, znode->level);
+               err = 1;
+               goto out_dump;
+       }
+
+       for (i = 0; i < znode->child_cnt; i++) {
+               const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
+               struct ubifs_zbranch *zbr = &znode->zbranch[i];
+
+               key_read(c, &br->key, &zbr->key);
+               zbr->lnum = le32_to_cpu(br->lnum);
+               zbr->offs = le32_to_cpu(br->offs);
+               zbr->len  = le32_to_cpu(br->len);
+               zbr->znode = NULL;
+
+               /* Validate branch */
+
+               if (zbr->lnum < c->main_first ||
+                   zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
+                   zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
+                       dbg_err("bad branch %d", i);
+                       err = 2;
+                       goto out_dump;
+               }
+
+               switch (key_type(c, &zbr->key)) {
+               case UBIFS_INO_KEY:
+               case UBIFS_DATA_KEY:
+               case UBIFS_DENT_KEY:
+               case UBIFS_XENT_KEY:
+                       break;
+               default:
+                       dbg_msg("bad key type at slot %d: %s", i,
+                               DBGKEY(&zbr->key));
+                       err = 3;
+                       goto out_dump;
+               }
+
+               if (znode->level)
+                       continue;
+
+               type = key_type(c, &zbr->key);
+               if (c->ranges[type].max_len == 0) {
+                       if (zbr->len != c->ranges[type].len) {
+                               dbg_err("bad target node (type %d) length (%d)",
+                                       type, zbr->len);
+                               dbg_err("have to be %d", c->ranges[type].len);
+                               err = 4;
+                               goto out_dump;
+                       }
+               } else if (zbr->len < c->ranges[type].min_len ||
+                          zbr->len > c->ranges[type].max_len) {
+                       dbg_err("bad target node (type %d) length (%d)",
+                               type, zbr->len);
+                       dbg_err("have to be in range of %d-%d",
+                               c->ranges[type].min_len,
+                               c->ranges[type].max_len);
+                       err = 5;
+                       goto out_dump;
+               }
+       }
+
+       /*
+        * Ensure that the next key is greater or equivalent to the
+        * previous one.
+        */
+       for (i = 0; i < znode->child_cnt - 1; i++) {
+               const union ubifs_key *key1, *key2;
+
+               key1 = &znode->zbranch[i].key;
+               key2 = &znode->zbranch[i + 1].key;
+
+               cmp = keys_cmp(c, key1, key2);
+               if (cmp > 0) {
+                       dbg_err("bad key order (keys %d and %d)", i, i + 1);
+                       err = 6;
+                       goto out_dump;
+               } else if (cmp == 0 && !is_hash_key(c, key1)) {
+                       /* These can only be keys with colliding hash */
+                       dbg_err("keys %d and %d are not hashed but equivalent",
+                               i, i + 1);
+                       err = 7;
+                       goto out_dump;
+               }
+       }
+
+       kfree(idx);
+       return 0;
+
+out_dump:
+       ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
+       dbg_dump_node(c, idx);
+       kfree(idx);
+       return -EINVAL;
+}
+
+/**
+ * ubifs_load_znode - load znode to TNC cache.
+ * @c: UBIFS file-system description object
+ * @zbr: znode branch
+ * @parent: znode's parent
+ * @iip: index in parent
+ *
+ * This function loads znode pointed to by @zbr into the TNC cache and
+ * returns pointer to it in case of success and a negative error code in case
+ * of failure.
+ */
+struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
+                                    struct ubifs_zbranch *zbr,
+                                    struct ubifs_znode *parent, int iip)
+{
+       int err;
+       struct ubifs_znode *znode;
+
+       ubifs_assert(!zbr->znode);
+       /*
+        * A slab cache is not presently used for znodes because the znode size
+        * depends on the fanout which is stored in the superblock.
+        */
+       znode = kzalloc(c->max_znode_sz, GFP_NOFS);
+       if (!znode)
+               return ERR_PTR(-ENOMEM);
+
+       err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode);
+       if (err)
+               goto out;
+
+       atomic_long_inc(&c->clean_zn_cnt);
+
+       /*
+        * Increment the global clean znode counter as well. It is OK that
+        * global and per-FS clean znode counters may be inconsistent for some
+        * short time (because we might be preempted at this point), the global
+        * one is only used in shrinker.
+        */
+       atomic_long_inc(&ubifs_clean_zn_cnt);
+
+       zbr->znode = znode;
+       znode->parent = parent;
+       znode->time = get_seconds();
+       znode->iip = iip;
+
+       return znode;
+
+out:
+       kfree(znode);
+       return ERR_PTR(err);
+}
+
+/**
+ * ubifs_tnc_read_node - read a leaf node from the flash media.
+ * @c: UBIFS file-system description object
+ * @zbr: key and position of the node
+ * @node: node is returned here
+ *
+ * This function reads a node defined by @zbr from the flash media. Returns
+ * zero in case of success or a negative negative error code in case of
+ * failure.
+ */
+int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                       void *node)
+{
+       union ubifs_key key1, *key = &zbr->key;
+       int err, type = key_type(c, key);
+       struct ubifs_wbuf *wbuf;
+
+       /*
+        * 'zbr' has to point to on-flash node. The node may sit in a bud and
+        * may even be in a write buffer, so we have to take care about this.
+        */
+       wbuf = ubifs_get_wbuf(c, zbr->lnum);
+       if (wbuf)
+               err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len,
+                                          zbr->lnum, zbr->offs);
+       else
+               err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum,
+                                     zbr->offs);
+
+       if (err) {
+               dbg_tnc("key %s", DBGKEY(key));
+               return err;
+       }
+
+       /* Make sure the key of the read node is correct */
+       key_read(c, key, &key1);
+       if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) {
+               ubifs_err("bad key in node at LEB %d:%d",
+                         zbr->lnum, zbr->offs);
+               dbg_tnc("looked for key %s found node's key %s",
+                       DBGKEY(key), DBGKEY1(&key1));
+               dbg_dump_node(c, node);
+               return -EINVAL;
+       }
+
+       return 0;
+}
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h

new file mode 100644 (file)

index 0000000..0cc7da9
--- /dev/null
+++ b/fs/ubifs/ubifs-media.h
@@ -0,0 +1,745 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file describes UBIFS on-flash format and contains definitions of all the
+ * relevant data structures and constants.
+ *
+ * All UBIFS on-flash objects are stored in the form of nodes. All nodes start
+ * with the UBIFS node magic number and have the same common header. Nodes
+ * always sit at 8-byte aligned positions on the media and node header sizes are
+ * also 8-byte aligned (except for the indexing node and the padding node).
+ */
+
+#ifndef __UBIFS_MEDIA_H__
+#define __UBIFS_MEDIA_H__
+
+/* UBIFS node magic number (must not have the padding byte first or last) */
+#define UBIFS_NODE_MAGIC  0x06101831
+
+/* UBIFS on-flash format version */
+#define UBIFS_FORMAT_VERSION 4
+
+/* Minimum logical eraseblock size in bytes */
+#define UBIFS_MIN_LEB_SZ (15*1024)
+
+/* Initial CRC32 value used when calculating CRC checksums */
+#define UBIFS_CRC32_INIT 0xFFFFFFFFU
+
+/*
+ * UBIFS does not try to compress data if its length is less than the below
+ * constant.
+ */
+#define UBIFS_MIN_COMPR_LEN 128
+
+/* Root inode number */
+#define UBIFS_ROOT_INO 1
+
+/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */
+#define UBIFS_FIRST_INO 64
+
+/*
+ * Maximum file name and extended attribute length (must be a multiple of 8,
+ * minus 1).
+ */
+#define UBIFS_MAX_NLEN 255
+
+/* Maximum number of data journal heads */
+#define UBIFS_MAX_JHEADS 1
+
+/*
+ * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system,
+ * which means that it does not treat the underlying media as consisting of
+ * blocks like in case of hard drives. Do not be confused. UBIFS block is just
+ * the maximum amount of data which one data node can have or which can be
+ * attached to an inode node.
+ */
+#define UBIFS_BLOCK_SIZE  4096
+#define UBIFS_BLOCK_SHIFT 12
+#define UBIFS_BLOCK_MASK  0x00000FFF
+
+/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
+#define UBIFS_PADDING_BYTE 0xCE
+
+/* Maximum possible key length */
+#define UBIFS_MAX_KEY_LEN 16
+
+/* Key length ("simple" format) */
+#define UBIFS_SK_LEN 8
+
+/* Minimum index tree fanout */
+#define UBIFS_MIN_FANOUT 2
+
+/* Maximum number of levels in UBIFS indexing B-tree */
+#define UBIFS_MAX_LEVELS 512
+
+/* Maximum amount of data attached to an inode in bytes */
+#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE
+
+/* LEB Properties Tree fanout (must be power of 2) and fanout shift */
+#define UBIFS_LPT_FANOUT 4
+#define UBIFS_LPT_FANOUT_SHIFT 2
+
+/* LEB Properties Tree bit field sizes */
+#define UBIFS_LPT_CRC_BITS 16
+#define UBIFS_LPT_CRC_BYTES 2
+#define UBIFS_LPT_TYPE_BITS 4
+
+/* The key is always at the same position in all keyed nodes */
+#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
+
+/*
+ * LEB Properties Tree node types.
+ *
+ * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties)
+ * UBIFS_LPT_NNODE: LPT internal node
+ * UBIFS_LPT_LTAB: LPT's own lprops table
+ * UBIFS_LPT_LSAVE: LPT's save table (big model only)
+ * UBIFS_LPT_NODE_CNT: count of LPT node types
+ * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type
+ */
+enum {
+       UBIFS_LPT_PNODE,
+       UBIFS_LPT_NNODE,
+       UBIFS_LPT_LTAB,
+       UBIFS_LPT_LSAVE,
+       UBIFS_LPT_NODE_CNT,
+       UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1,
+};
+
+/*
+ * UBIFS inode types.
+ *
+ * UBIFS_ITYPE_REG: regular file
+ * UBIFS_ITYPE_DIR: directory
+ * UBIFS_ITYPE_LNK: soft link
+ * UBIFS_ITYPE_BLK: block device node
+ * UBIFS_ITYPE_CHR: character device node
+ * UBIFS_ITYPE_FIFO: fifo
+ * UBIFS_ITYPE_SOCK: socket
+ * UBIFS_ITYPES_CNT: count of supported file types
+ */
+enum {
+       UBIFS_ITYPE_REG,
+       UBIFS_ITYPE_DIR,
+       UBIFS_ITYPE_LNK,
+       UBIFS_ITYPE_BLK,
+       UBIFS_ITYPE_CHR,
+       UBIFS_ITYPE_FIFO,
+       UBIFS_ITYPE_SOCK,
+       UBIFS_ITYPES_CNT,
+};
+
+/*
+ * Supported key hash functions.
+ *
+ * UBIFS_KEY_HASH_R5: R5 hash
+ * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name
+ */
+enum {
+       UBIFS_KEY_HASH_R5,
+       UBIFS_KEY_HASH_TEST,
+};
+
+/*
+ * Supported key formats.
+ *
+ * UBIFS_SIMPLE_KEY_FMT: simple key format
+ */
+enum {
+       UBIFS_SIMPLE_KEY_FMT,
+};
+
+/*
+ * The simple key format uses 29 bits for storing UBIFS block number and hash
+ * value.
+ */
+#define UBIFS_S_KEY_BLOCK_BITS 29
+#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF
+#define UBIFS_S_KEY_HASH_BITS  UBIFS_S_KEY_BLOCK_BITS
+#define UBIFS_S_KEY_HASH_MASK  UBIFS_S_KEY_BLOCK_MASK
+
+/*
+ * Key types.
+ *
+ * UBIFS_INO_KEY: inode node key
+ * UBIFS_DATA_KEY: data node key
+ * UBIFS_DENT_KEY: directory entry node key
+ * UBIFS_XENT_KEY: extended attribute entry key
+ * UBIFS_KEY_TYPES_CNT: number of supported key types
+ */
+enum {
+       UBIFS_INO_KEY,
+       UBIFS_DATA_KEY,
+       UBIFS_DENT_KEY,
+       UBIFS_XENT_KEY,
+       UBIFS_KEY_TYPES_CNT,
+};
+
+/* Count of LEBs reserved for the superblock area */
+#define UBIFS_SB_LEBS 1
+/* Count of LEBs reserved for the master area */
+#define UBIFS_MST_LEBS 2
+
+/* First LEB of the superblock area */
+#define UBIFS_SB_LNUM 0
+/* First LEB of the master area */
+#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS)
+/* First LEB of the log area */
+#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS)
+
+/*
+ * The below constants define the absolute minimum values for various UBIFS
+ * media areas. Many of them actually depend of flash geometry and the FS
+ * configuration (number of journal heads, orphan LEBs, etc). This means that
+ * the smallest volume size which can be used for UBIFS cannot be pre-defined
+ * by these constants. The file-system that meets the below limitation will not
+ * necessarily mount. UBIFS does run-time calculations and validates the FS
+ * size.
+ */
+
+/* Minimum number of logical eraseblocks in the log */
+#define UBIFS_MIN_LOG_LEBS 2
+/* Minimum number of bud logical eraseblocks (one for each head) */
+#define UBIFS_MIN_BUD_LEBS 3
+/* Minimum number of journal logical eraseblocks */
+#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS)
+/* Minimum number of LPT area logical eraseblocks */
+#define UBIFS_MIN_LPT_LEBS 2
+/* Minimum number of orphan area logical eraseblocks */
+#define UBIFS_MIN_ORPH_LEBS 1
+/*
+ * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1
+ * for GC, 1 for deletions, and at least 1 for committed data).
+ */
+#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5)
+
+/* Minimum number of logical eraseblocks */
+#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
+                          UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \
+                          UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS)
+
+/* Node sizes (N.B. these are guaranteed to be multiples of 8) */
+#define UBIFS_CH_SZ        sizeof(struct ubifs_ch)
+#define UBIFS_INO_NODE_SZ  sizeof(struct ubifs_ino_node)
+#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node)
+#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node)
+#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node)
+#define UBIFS_PAD_NODE_SZ  sizeof(struct ubifs_pad_node)
+#define UBIFS_SB_NODE_SZ   sizeof(struct ubifs_sb_node)
+#define UBIFS_MST_NODE_SZ  sizeof(struct ubifs_mst_node)
+#define UBIFS_REF_NODE_SZ  sizeof(struct ubifs_ref_node)
+#define UBIFS_IDX_NODE_SZ  sizeof(struct ubifs_idx_node)
+#define UBIFS_CS_NODE_SZ   sizeof(struct ubifs_cs_node)
+#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
+/* Extended attribute entry nodes are identical to directory entry nodes */
+#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
+/* Only this does not have to be multiple of 8 bytes */
+#define UBIFS_BRANCH_SZ    sizeof(struct ubifs_branch)
+
+/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */
+#define UBIFS_MAX_DATA_NODE_SZ  (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE)
+#define UBIFS_MAX_INO_NODE_SZ   (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA)
+#define UBIFS_MAX_DENT_NODE_SZ  (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1)
+#define UBIFS_MAX_XENT_NODE_SZ  UBIFS_MAX_DENT_NODE_SZ
+
+/* The largest UBIFS node */
+#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ
+
+/*
+ * On-flash inode flags.
+ *
+ * UBIFS_COMPR_FL: use compression for this inode
+ * UBIFS_SYNC_FL:  I/O on this inode has to be synchronous
+ * UBIFS_IMMUTABLE_FL: inode is immutable
+ * UBIFS_APPEND_FL: writes to the inode may only append data
+ * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous
+ * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value
+ *
+ * Note, these are on-flash flags which correspond to ioctl flags
+ * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not
+ * have to be the same.
+ */
+enum {
+       UBIFS_COMPR_FL     = 0x01,
+       UBIFS_SYNC_FL      = 0x02,
+       UBIFS_IMMUTABLE_FL = 0x04,
+       UBIFS_APPEND_FL    = 0x08,
+       UBIFS_DIRSYNC_FL   = 0x10,
+       UBIFS_XATTR_FL     = 0x20,
+};
+
+/* Inode flag bits used by UBIFS */
+#define UBIFS_FL_MASK 0x0000001F
+
+/*
+ * UBIFS compression algorithms.
+ *
+ * UBIFS_COMPR_NONE: no compression
+ * UBIFS_COMPR_LZO: LZO compression
+ * UBIFS_COMPR_ZLIB: ZLIB compression
+ * UBIFS_COMPR_TYPES_CNT: count of supported compression types
+ */
+enum {
+       UBIFS_COMPR_NONE,
+       UBIFS_COMPR_LZO,
+       UBIFS_COMPR_ZLIB,
+       UBIFS_COMPR_TYPES_CNT,
+};
+
+/*
+ * UBIFS node types.
+ *
+ * UBIFS_INO_NODE: inode node
+ * UBIFS_DATA_NODE: data node
+ * UBIFS_DENT_NODE: directory entry node
+ * UBIFS_XENT_NODE: extended attribute node
+ * UBIFS_TRUN_NODE: truncation node
+ * UBIFS_PAD_NODE: padding node
+ * UBIFS_SB_NODE: superblock node
+ * UBIFS_MST_NODE: master node
+ * UBIFS_REF_NODE: LEB reference node
+ * UBIFS_IDX_NODE: index node
+ * UBIFS_CS_NODE: commit start node
+ * UBIFS_ORPH_NODE: orphan node
+ * UBIFS_NODE_TYPES_CNT: count of supported node types
+ *
+ * Note, we index arrays by these numbers, so keep them low and contiguous.
+ * Node type constants for inodes, direntries and so on have to be the same as
+ * corresponding key type constants.
+ */
+enum {
+       UBIFS_INO_NODE,
+       UBIFS_DATA_NODE,
+       UBIFS_DENT_NODE,
+       UBIFS_XENT_NODE,
+       UBIFS_TRUN_NODE,
+       UBIFS_PAD_NODE,
+       UBIFS_SB_NODE,
+       UBIFS_MST_NODE,
+       UBIFS_REF_NODE,
+       UBIFS_IDX_NODE,
+       UBIFS_CS_NODE,
+       UBIFS_ORPH_NODE,
+       UBIFS_NODE_TYPES_CNT,
+};
+
+/*
+ * Master node flags.
+ *
+ * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty
+ * UBIFS_MST_NO_ORPHS: no orphan inodes present
+ * UBIFS_MST_RCVRY: written by recovery
+ */
+enum {
+       UBIFS_MST_DIRTY = 1,
+       UBIFS_MST_NO_ORPHS = 2,
+       UBIFS_MST_RCVRY = 4,
+};
+
+/*
+ * Node group type (used by recovery to recover whole group or none).
+ *
+ * UBIFS_NO_NODE_GROUP: this node is not part of a group
+ * UBIFS_IN_NODE_GROUP: this node is a part of a group
+ * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group
+ */
+enum {
+       UBIFS_NO_NODE_GROUP = 0,
+       UBIFS_IN_NODE_GROUP,
+       UBIFS_LAST_OF_NODE_GROUP,
+};
+
+/*
+ * Superblock flags.
+ *
+ * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
+ */
+enum {
+       UBIFS_FLG_BIGLPT = 0x02,
+};
+
+/**
+ * struct ubifs_ch - common header node.
+ * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC)
+ * @crc: CRC-32 checksum of the node header
+ * @sqnum: sequence number
+ * @len: full node length
+ * @node_type: node type
+ * @group_type: node group type
+ * @padding: reserved for future, zeroes
+ *
+ * Every UBIFS node starts with this common part. If the node has a key, the
+ * key always goes next.
+ */
+struct ubifs_ch {
+       __le32 magic;
+       __le32 crc;
+       __le64 sqnum;
+       __le32 len;
+       __u8 node_type;
+       __u8 group_type;
+       __u8 padding[2];
+} __attribute__ ((packed));
+
+/**
+ * union ubifs_dev_desc - device node descriptor.
+ * @new: new type device descriptor
+ * @huge: huge type device descriptor
+ *
+ * This data structure describes major/minor numbers of a device node. In an
+ * inode is a device node then its data contains an object of this type. UBIFS
+ * uses standard Linux "new" and "huge" device node encodings.
+ */
+union ubifs_dev_desc {
+       __le32 new;
+       __le64 huge;
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_ino_node - inode node.
+ * @ch: common header
+ * @key: node key
+ * @creat_sqnum: sequence number at time of creation
+ * @size: inode size in bytes (amount of uncompressed data)
+ * @atime_sec: access time seconds
+ * @ctime_sec: creation time seconds
+ * @mtime_sec: modification time seconds
+ * @atime_nsec: access time nanoseconds
+ * @ctime_nsec: creation time nanoseconds
+ * @mtime_nsec: modification time nanoseconds
+ * @nlink: number of hard links
+ * @uid: owner ID
+ * @gid: group ID
+ * @mode: access flags
+ * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc)
+ * @data_len: inode data length
+ * @xattr_cnt: count of extended attributes this inode has
+ * @xattr_size: summarized size of all extended attributes in bytes
+ * @padding1: reserved for future, zeroes
+ * @xattr_names: sum of lengths of all extended attribute names belonging to
+ *               this inode
+ * @compr_type: compression type used for this inode
+ * @padding2: reserved for future, zeroes
+ * @data: data attached to the inode
+ *
+ * Note, even though inode compression type is defined by @compr_type, some
+ * nodes of this inode may be compressed with different compressor - this
+ * happens if compression type is changed while the inode already has data
+ * nodes. But @compr_type will be use for further writes to the inode.
+ *
+ * Note, do not forget to amend 'zero_ino_node_unused()' function when changing
+ * the padding fields.
+ */
+struct ubifs_ino_node {
+       struct ubifs_ch ch;
+       __u8 key[UBIFS_MAX_KEY_LEN];
+       __le64 creat_sqnum;
+       __le64 size;
+       __le64 atime_sec;
+       __le64 ctime_sec;
+       __le64 mtime_sec;
+       __le32 atime_nsec;
+       __le32 ctime_nsec;
+       __le32 mtime_nsec;
+       __le32 nlink;
+       __le32 uid;
+       __le32 gid;
+       __le32 mode;
+       __le32 flags;
+       __le32 data_len;
+       __le32 xattr_cnt;
+       __le32 xattr_size;
+       __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */
+       __le32 xattr_names;
+       __le16 compr_type;
+       __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
+       __u8 data[];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_dent_node - directory entry node.
+ * @ch: common header
+ * @key: node key
+ * @inum: target inode number
+ * @padding1: reserved for future, zeroes
+ * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc)
+ * @nlen: name length
+ * @padding2: reserved for future, zeroes
+ * @name: zero-terminated name
+ *
+ * Note, do not forget to amend 'zero_dent_node_unused()' function when
+ * changing the padding fields.
+ */
+struct ubifs_dent_node {
+       struct ubifs_ch ch;
+       __u8 key[UBIFS_MAX_KEY_LEN];
+       __le64 inum;
+       __u8 padding1;
+       __u8 type;
+       __le16 nlen;
+       __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
+       __u8 name[];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_data_node - data node.
+ * @ch: common header
+ * @key: node key
+ * @size: uncompressed data size in bytes
+ * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc)
+ * @padding: reserved for future, zeroes
+ * @data: data
+ *
+ * Note, do not forget to amend 'zero_data_node_unused()' function when
+ * changing the padding fields.
+ */
+struct ubifs_data_node {
+       struct ubifs_ch ch;
+       __u8 key[UBIFS_MAX_KEY_LEN];
+       __le32 size;
+       __le16 compr_type;
+       __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
+       __u8 data[];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_trun_node - truncation node.
+ * @ch: common header
+ * @inum: truncated inode number
+ * @padding: reserved for future, zeroes
+ * @old_size: size before truncation
+ * @new_size: size after truncation
+ *
+ * This node exists only in the journal and never goes to the main area. Note,
+ * do not forget to amend 'zero_trun_node_unused()' function when changing the
+ * padding fields.
+ */
+struct ubifs_trun_node {
+       struct ubifs_ch ch;
+       __le32 inum;
+       __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
+       __le64 old_size;
+       __le64 new_size;
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_pad_node - padding node.
+ * @ch: common header
+ * @pad_len: how many bytes after this node are unused (because padded)
+ * @padding: reserved for future, zeroes
+ */
+struct ubifs_pad_node {
+       struct ubifs_ch ch;
+       __le32 pad_len;
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_sb_node - superblock node.
+ * @ch: common header
+ * @padding: reserved for future, zeroes
+ * @key_hash: type of hash function used in keys
+ * @key_fmt: format of the key
+ * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc)
+ * @min_io_size: minimal input/output unit size
+ * @leb_size: logical eraseblock size in bytes
+ * @leb_cnt: count of LEBs used by file-system
+ * @max_leb_cnt: maximum count of LEBs used by file-system
+ * @max_bud_bytes: maximum amount of data stored in buds
+ * @log_lebs: log size in logical eraseblocks
+ * @lpt_lebs: number of LEBs used for lprops table
+ * @orph_lebs: number of LEBs used for recording orphans
+ * @jhead_cnt: count of journal heads
+ * @fanout: tree fanout (max. number of links per indexing node)
+ * @lsave_cnt: number of LEB numbers in LPT's save table
+ * @fmt_version: UBIFS on-flash format version
+ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
+ * @padding1: reserved for future, zeroes
+ * @rp_uid: reserve pool UID
+ * @rp_gid: reserve pool GID
+ * @rp_size: size of the reserved pool in bytes
+ * @padding2: reserved for future, zeroes
+ * @time_gran: time granularity in nanoseconds
+ * @uuid: UUID generated when the file system image was created
+ */
+struct ubifs_sb_node {
+       struct ubifs_ch ch;
+       __u8 padding[2];
+       __u8 key_hash;
+       __u8 key_fmt;
+       __le32 flags;
+       __le32 min_io_size;
+       __le32 leb_size;
+       __le32 leb_cnt;
+       __le32 max_leb_cnt;
+       __le64 max_bud_bytes;
+       __le32 log_lebs;
+       __le32 lpt_lebs;
+       __le32 orph_lebs;
+       __le32 jhead_cnt;
+       __le32 fanout;
+       __le32 lsave_cnt;
+       __le32 fmt_version;
+       __le16 default_compr;
+       __u8 padding1[2];
+       __le32 rp_uid;
+       __le32 rp_gid;
+       __le64 rp_size;
+       __le32 time_gran;
+       __u8 uuid[16];
+       __u8 padding2[3972];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_mst_node - master node.
+ * @ch: common header
+ * @highest_inum: highest inode number in the committed index
+ * @cmt_no: commit number
+ * @flags: various flags (%UBIFS_MST_DIRTY, etc)
+ * @log_lnum: start of the log
+ * @root_lnum: LEB number of the root indexing node
+ * @root_offs: offset within @root_lnum
+ * @root_len: root indexing node length
+ * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was
+ * not reserved and should be reserved on mount)
+ * @ihead_lnum: LEB number of index head
+ * @ihead_offs: offset of index head
+ * @index_size: size of index on flash
+ * @total_free: total free space in bytes
+ * @total_dirty: total dirty space in bytes
+ * @total_used: total used space in bytes (includes only data LEBs)
+ * @total_dead: total dead space in bytes (includes only data LEBs)
+ * @total_dark: total dark space in bytes (includes only data LEBs)
+ * @lpt_lnum: LEB number of LPT root nnode
+ * @lpt_offs: offset of LPT root nnode
+ * @nhead_lnum: LEB number of LPT head
+ * @nhead_offs: offset of LPT head
+ * @ltab_lnum: LEB number of LPT's own lprops table
+ * @ltab_offs: offset of LPT's own lprops table
+ * @lsave_lnum: LEB number of LPT's save table (big model only)
+ * @lsave_offs: offset of LPT's save table (big model only)
+ * @lscan_lnum: LEB number of last LPT scan
+ * @empty_lebs: number of empty logical eraseblocks
+ * @idx_lebs: number of indexing logical eraseblocks
+ * @leb_cnt: count of LEBs used by file-system
+ * @padding: reserved for future, zeroes
+ */
+struct ubifs_mst_node {
+       struct ubifs_ch ch;
+       __le64 highest_inum;
+       __le64 cmt_no;
+       __le32 flags;
+       __le32 log_lnum;
+       __le32 root_lnum;
+       __le32 root_offs;
+       __le32 root_len;
+       __le32 gc_lnum;
+       __le32 ihead_lnum;
+       __le32 ihead_offs;
+       __le64 index_size;
+       __le64 total_free;
+       __le64 total_dirty;
+       __le64 total_used;
+       __le64 total_dead;
+       __le64 total_dark;
+       __le32 lpt_lnum;
+       __le32 lpt_offs;
+       __le32 nhead_lnum;
+       __le32 nhead_offs;
+       __le32 ltab_lnum;
+       __le32 ltab_offs;
+       __le32 lsave_lnum;
+       __le32 lsave_offs;
+       __le32 lscan_lnum;
+       __le32 empty_lebs;
+       __le32 idx_lebs;
+       __le32 leb_cnt;
+       __u8 padding[344];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_ref_node - logical eraseblock reference node.
+ * @ch: common header
+ * @lnum: the referred logical eraseblock number
+ * @offs: start offset in the referred LEB
+ * @jhead: journal head number
+ * @padding: reserved for future, zeroes
+ */
+struct ubifs_ref_node {
+       struct ubifs_ch ch;
+       __le32 lnum;
+       __le32 offs;
+       __le32 jhead;
+       __u8 padding[28];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_branch - key/reference/length branch
+ * @lnum: LEB number of the target node
+ * @offs: offset within @lnum
+ * @len: target node length
+ * @key: key
+ */
+struct ubifs_branch {
+       __le32 lnum;
+       __le32 offs;
+       __le32 len;
+       __u8 key[];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_idx_node - indexing node.
+ * @ch: common header
+ * @child_cnt: number of child index nodes
+ * @level: tree level
+ * @branches: LEB number / offset / length / key branches
+ */
+struct ubifs_idx_node {
+       struct ubifs_ch ch;
+       __le16 child_cnt;
+       __le16 level;
+       __u8 branches[];
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_cs_node - commit start node.
+ * @ch: common header
+ * @cmt_no: commit number
+ */
+struct ubifs_cs_node {
+       struct ubifs_ch ch;
+       __le64 cmt_no;
+} __attribute__ ((packed));
+
+/**
+ * struct ubifs_orph_node - orphan node.
+ * @ch: common header
+ * @cmt_no: commit number (also top bit is set on the last node of the commit)
+ * @inos: inode numbers of orphans
+ */
+struct ubifs_orph_node {
+       struct ubifs_ch ch;
+       __le64 cmt_no;
+       __le64 inos[];
+} __attribute__ ((packed));
+
+#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h

new file mode 100644 (file)

index 0000000..e4f89f2
--- /dev/null
+++ b/fs/ubifs/ubifs.h
@@ -0,0 +1,1649 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/* Implementation version 0.7 */
+
+#ifndef __UBIFS_H__
+#define __UBIFS_H__
+
+#include <asm/div64.h>
+#include <linux/statfs.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/mtd/ubi.h>
+#include <linux/pagemap.h>
+#include <linux/backing-dev.h>
+#include "ubifs-media.h"
+
+/* Version of this UBIFS implementation */
+#define UBIFS_VERSION 1
+
+/* Normal UBIFS messages */
+#define ubifs_msg(fmt, ...) \
+               printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__)
+/* UBIFS error messages */
+#define ubifs_err(fmt, ...)                                                  \
+       printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
+              __func__, ##__VA_ARGS__)
+/* UBIFS warning messages */
+#define ubifs_warn(fmt, ...)                                         \
+       printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \
+              current->pid, __func__, ##__VA_ARGS__)
+
+/* UBIFS file system VFS magic number */
+#define UBIFS_SUPER_MAGIC 0x24051905
+
+/* Number of UBIFS blocks per VFS page */
+#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE)
+#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT)
+
+/* "File system end of life" sequence number watermark */
+#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
+#define SQNUM_WATERMARK      0xFFFFFFFFFF000000ULL
+
+/* Minimum amount of data UBIFS writes to the flash */
+#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
+
+/*
+ * Currently we do not support inode number overlapping and re-using, so this
+ * watermark defines dangerous inode number level. This should be fixed later,
+ * although it is difficult to exceed current limit. Another option is to use
+ * 64-bit inode numbers, but this means more overhead.
+ */
+#define INUM_WARN_WATERMARK 0xFFF00000
+#define INUM_WATERMARK      0xFFFFFF00
+
+/* Largest key size supported in this implementation */
+#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
+
+/* Maximum number of entries in each LPT (LEB category) heap */
+#define LPT_HEAP_SZ 256
+
+/*
+ * Background thread name pattern. The numbers are UBI device and volume
+ * numbers.
+ */
+#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
+
+/* Default write-buffer synchronization timeout (5 secs) */
+#define DEFAULT_WBUF_TIMEOUT (5 * HZ)
+
+/* Maximum possible inode number (only 32-bit inodes are supported now) */
+#define MAX_INUM 0xFFFFFFFF
+
+/* Number of non-data journal heads */
+#define NONDATA_JHEADS_CNT 2
+
+/* Garbage collector head */
+#define GCHD   0
+/* Base journal head number */
+#define BASEHD 1
+/* First "general purpose" journal head */
+#define DATAHD 2
+
+/* 'No change' value for 'ubifs_change_lp()' */
+#define LPROPS_NC 0x80000001
+
+/*
+ * There is no notion of truncation key because truncation nodes do not exist
+ * in TNC. However, when replaying, it is handy to introduce fake "truncation"
+ * keys for truncation nodes because the code becomes simpler. So we define
+ * %UBIFS_TRUN_KEY type.
+ */
+#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
+
+/*
+ * How much a directory entry/extended attribute entry adds to the parent/host
+ * inode.
+ */
+#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8)
+
+/* How much an extended attribute adds to the host inode */
+#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8)
+
+/*
+ * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered
+ * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are
+ * considered "young". This is used by shrinker when selecting znode to trim
+ * off.
+ */
+#define OLD_ZNODE_AGE 20
+#define YOUNG_ZNODE_AGE 5
+
+/*
+ * Some compressors, like LZO, may end up with more data then the input buffer.
+ * So UBIFS always allocates larger output buffer, to be sure the compressor
+ * will not corrupt memory in case of worst case compression.
+ */
+#define WORST_COMPR_FACTOR 2
+
+/* Maximum expected tree height for use by bottom_up_buf */
+#define BOTTOM_UP_HEIGHT 64
+
+/*
+ * Lockdep classes for UBIFS inode @ui_mutex.
+ */
+enum {
+       WB_MUTEX_1 = 0,
+       WB_MUTEX_2 = 1,
+       WB_MUTEX_3 = 2,
+};
+
+/*
+ * Znode flags (actually, bit numbers which store the flags).
+ *
+ * DIRTY_ZNODE: znode is dirty
+ * COW_ZNODE: znode is being committed and a new instance of this znode has to
+ *            be created before changing this znode
+ * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is
+ *                 still in the commit list and the ongoing commit operation
+ *                 will commit it, and delete this znode after it is done
+ */
+enum {
+       DIRTY_ZNODE    = 0,
+       COW_ZNODE      = 1,
+       OBSOLETE_ZNODE = 2,
+};
+
+/*
+ * Commit states.
+ *
+ * COMMIT_RESTING: commit is not wanted
+ * COMMIT_BACKGROUND: background commit has been requested
+ * COMMIT_REQUIRED: commit is required
+ * COMMIT_RUNNING_BACKGROUND: background commit is running
+ * COMMIT_RUNNING_REQUIRED: commit is running and it is required
+ * COMMIT_BROKEN: commit failed
+ */
+enum {
+       COMMIT_RESTING = 0,
+       COMMIT_BACKGROUND,
+       COMMIT_REQUIRED,
+       COMMIT_RUNNING_BACKGROUND,
+       COMMIT_RUNNING_REQUIRED,
+       COMMIT_BROKEN,
+};
+
+/*
+ * 'ubifs_scan_a_node()' return values.
+ *
+ * SCANNED_GARBAGE:  scanned garbage
+ * SCANNED_EMPTY_SPACE: scanned empty space
+ * SCANNED_A_NODE: scanned a valid node
+ * SCANNED_A_CORRUPT_NODE: scanned a corrupted node
+ * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length
+ *
+ * Greater than zero means: 'scanned that number of padding bytes'
+ */
+enum {
+       SCANNED_GARBAGE        = 0,
+       SCANNED_EMPTY_SPACE    = -1,
+       SCANNED_A_NODE         = -2,
+       SCANNED_A_CORRUPT_NODE = -3,
+       SCANNED_A_BAD_PAD_NODE = -4,
+};
+
+/*
+ * LPT cnode flag bits.
+ *
+ * DIRTY_CNODE: cnode is dirty
+ * COW_CNODE: cnode is being committed and must be copied before writing
+ * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
+ * so it can (and must) be freed when the commit is finished
+ */
+enum {
+       DIRTY_CNODE    = 0,
+       COW_CNODE      = 1,
+       OBSOLETE_CNODE = 2,
+};
+
+/*
+ * Dirty flag bits (lpt_drty_flgs) for LPT special nodes.
+ *
+ * LTAB_DIRTY: ltab node is dirty
+ * LSAVE_DIRTY: lsave node is dirty
+ */
+enum {
+       LTAB_DIRTY  = 1,
+       LSAVE_DIRTY = 2,
+};
+
+/*
+ * Return codes used by the garbage collector.
+ * @LEB_FREED: the logical eraseblock was freed and is ready to use
+ * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit
+ * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes
+ */
+enum {
+       LEB_FREED,
+       LEB_FREED_IDX,
+       LEB_RETAINED,
+};
+
+/**
+ * struct ubifs_old_idx - index node obsoleted since last commit start.
+ * @rb: rb-tree node
+ * @lnum: LEB number of obsoleted index node
+ * @offs: offset of obsoleted index node
+ */
+struct ubifs_old_idx {
+       struct rb_node rb;
+       int lnum;
+       int offs;
+};
+
+/* The below union makes it easier to deal with keys */
+union ubifs_key {
+       uint8_t u8[CUR_MAX_KEY_LEN];
+       uint32_t u32[CUR_MAX_KEY_LEN/4];
+       uint64_t u64[CUR_MAX_KEY_LEN/8];
+       __le32 j32[CUR_MAX_KEY_LEN/4];
+};
+
+/**
+ * struct ubifs_scan_node - UBIFS scanned node information.
+ * @list: list of scanned nodes
+ * @key: key of node scanned (if it has one)
+ * @sqnum: sequence number
+ * @type: type of node scanned
+ * @offs: offset with LEB of node scanned
+ * @len: length of node scanned
+ * @node: raw node
+ */
+struct ubifs_scan_node {
+       struct list_head list;
+       union ubifs_key key;
+       unsigned long long sqnum;
+       int type;
+       int offs;
+       int len;
+       void *node;
+};
+
+/**
+ * struct ubifs_scan_leb - UBIFS scanned LEB information.
+ * @lnum: logical eraseblock number
+ * @nodes_cnt: number of nodes scanned
+ * @nodes: list of struct ubifs_scan_node
+ * @endpt: end point (and therefore the start of empty space)
+ * @ecc: read returned -EBADMSG
+ * @buf: buffer containing entire LEB scanned
+ */
+struct ubifs_scan_leb {
+       int lnum;
+       int nodes_cnt;
+       struct list_head nodes;
+       int endpt;
+       int ecc;
+       void *buf;
+};
+
+/**
+ * struct ubifs_gced_idx_leb - garbage-collected indexing LEB.
+ * @list: list
+ * @lnum: LEB number
+ * @unmap: OK to unmap this LEB
+ *
+ * This data structure is used to temporary store garbage-collected indexing
+ * LEBs - they are not released immediately, but only after the next commit.
+ * This is needed to guarantee recoverability.
+ */
+struct ubifs_gced_idx_leb {
+       struct list_head list;
+       int lnum;
+       int unmap;
+};
+
+/**
+ * struct ubifs_inode - UBIFS in-memory inode description.
+ * @vfs_inode: VFS inode description object
+ * @creat_sqnum: sequence number at time of creation
+ * @xattr_size: summarized size of all extended attributes in bytes
+ * @xattr_cnt: count of extended attributes this inode has
+ * @xattr_names: sum of lengths of all extended attribute names belonging to
+ *               this inode
+ * @dirty: non-zero if the inode is dirty
+ * @xattr: non-zero if this is an extended attribute inode
+ * @ui_mutex: serializes inode write-back with the rest of VFS operations,
+ *            serializes "clean <-> dirty" state changes, protects @dirty,
+ *            @ui_size, and @xattr_size
+ * @ui_lock: protects @synced_i_size
+ * @synced_i_size: synchronized size of inode, i.e. the value of inode size
+ *                 currently stored on the flash; used only for regular file
+ *                 inodes
+ * @ui_size: inode size used by UBIFS when writing to flash
+ * @flags: inode flags (@UBIFS_COMPR_FL, etc)
+ * @compr_type: default compression type used for this inode
+ * @data_len: length of the data attached to the inode
+ * @data: inode's data
+ *
+ * @ui_mutex exists for two main reasons. At first it prevents inodes from
+ * being written back while UBIFS changing them, being in the middle of an VFS
+ * operation. This way UBIFS makes sure the inode fields are consistent. For
+ * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and
+ * write-back must not write any of them before we have finished.
+ *
+ * The second reason is budgeting - UBIFS has to budget all operations. If an
+ * operation is going to mark an inode dirty, it has to allocate budget for
+ * this. It cannot just mark it dirty because there is no guarantee there will
+ * be enough flash space to write the inode back later. This means UBIFS has
+ * to have full control over inode "clean <-> dirty" transitions (and pages
+ * actually). But unfortunately, VFS marks inodes dirty in many places, and it
+ * does not ask the file-system if it is allowed to do so (there is a notifier,
+ * but it is not enough), i.e., there is no mechanism to synchronize with this.
+ * So UBIFS has its own inode dirty flag and its own mutex to serialize
+ * "clean <-> dirty" transitions.
+ *
+ * The @synced_i_size field is used to make sure we never write pages which are
+ * beyond last synchronized inode size. See 'ubifs_writepage()' for more
+ * information.
+ *
+ * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
+ * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
+ * make sure @inode->i_size is always changed under @ui_mutex, because it
+ * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
+ * with 'ubifs_writepage()' (see file.c). All the other inode fields are
+ * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
+ * could consider to rework locking and base it on "shadow" fields.
+ */
+struct ubifs_inode {
+       struct inode vfs_inode;
+       unsigned long long creat_sqnum;
+       unsigned int xattr_size;
+       unsigned int xattr_cnt;
+       unsigned int xattr_names;
+       unsigned int dirty:1;
+       unsigned int xattr:1;
+       struct mutex ui_mutex;
+       spinlock_t ui_lock;
+       loff_t synced_i_size;
+       loff_t ui_size;
+       int flags;
+       int compr_type;
+       int data_len;
+       void *data;
+};
+
+/**
+ * struct ubifs_unclean_leb - records a LEB recovered under read-only mode.
+ * @list: list
+ * @lnum: LEB number of recovered LEB
+ * @endpt: offset where recovery ended
+ *
+ * This structure records a LEB identified during recovery that needs to be
+ * cleaned but was not because UBIFS was mounted read-only. The information
+ * is used to clean the LEB when remounting to read-write mode.
+ */
+struct ubifs_unclean_leb {
+       struct list_head list;
+       int lnum;
+       int endpt;
+};
+
+/*
+ * LEB properties flags.
+ *
+ * LPROPS_UNCAT: not categorized
+ * LPROPS_DIRTY: dirty > 0, not index
+ * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index
+ * LPROPS_FREE: free > 0, not empty, not index
+ * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
+ * LPROPS_EMPTY: LEB is empty, not taken
+ * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
+ * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken
+ * LPROPS_CAT_MASK: mask for the LEB categories above
+ * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media)
+ * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash)
+ */
+enum {
+       LPROPS_UNCAT     =  0,
+       LPROPS_DIRTY     =  1,
+       LPROPS_DIRTY_IDX =  2,
+       LPROPS_FREE      =  3,
+       LPROPS_HEAP_CNT  =  3,
+       LPROPS_EMPTY     =  4,
+       LPROPS_FREEABLE  =  5,
+       LPROPS_FRDI_IDX  =  6,
+       LPROPS_CAT_MASK  = 15,
+       LPROPS_TAKEN     = 16,
+       LPROPS_INDEX     = 32,
+};
+
+/**
+ * struct ubifs_lprops - logical eraseblock properties.
+ * @free: amount of free space in bytes
+ * @dirty: amount of dirty space in bytes
+ * @flags: LEB properties flags (see above)
+ * @lnum: LEB number
+ * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE)
+ * @hpos: heap position in heap of same-category lprops (other categories)
+ */
+struct ubifs_lprops {
+       int free;
+       int dirty;
+       int flags;
+       int lnum;
+       union {
+               struct list_head list;
+               int hpos;
+       };
+};
+
+/**
+ * struct ubifs_lpt_lprops - LPT logical eraseblock properties.
+ * @free: amount of free space in bytes
+ * @dirty: amount of dirty space in bytes
+ * @tgc: trivial GC flag (1 => unmap after commit end)
+ * @cmt: commit flag (1 => reserved for commit)
+ */
+struct ubifs_lpt_lprops {
+       int free;
+       int dirty;
+       unsigned tgc : 1;
+       unsigned cmt : 1;
+};
+
+/**
+ * struct ubifs_lp_stats - statistics of eraseblocks in the main area.
+ * @empty_lebs: number of empty LEBs
+ * @taken_empty_lebs: number of taken LEBs
+ * @idx_lebs: number of indexing LEBs
+ * @total_free: total free space in bytes
+ * @total_dirty: total dirty space in bytes
+ * @total_used: total used space in bytes (includes only data LEBs)
+ * @total_dead: total dead space in bytes (includes only data LEBs)
+ * @total_dark: total dark space in bytes (includes only data LEBs)
+ *
+ * N.B. total_dirty and total_used are different to other total_* fields,
+ * because they account _all_ LEBs, not just data LEBs.
+ *
+ * 'taken_empty_lebs' counts the LEBs that are in the transient state of having
+ * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed
+ * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
+ * by itself (in which case 'unused_lebs' would be a better name). In the case
+ * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
+ * but unlike other empty LEBs that are 'taken', it may not be written straight
+ * away (i.e. before the next commit start or unmount), so either gc_lnum must
+ * be specially accounted for, or the current approach followed i.e. count it
+ * under 'taken_empty_lebs'.
+ */
+struct ubifs_lp_stats {
+       int empty_lebs;
+       int taken_empty_lebs;
+       int idx_lebs;
+       long long total_free;
+       long long total_dirty;
+       long long total_used;
+       long long total_dead;
+       long long total_dark;
+};
+
+struct ubifs_nnode;
+
+/**
+ * struct ubifs_cnode - LEB Properties Tree common node.
+ * @parent: parent nnode
+ * @cnext: next cnode to commit
+ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
+ * @iip: index in parent
+ * @level: level in the tree (zero for pnodes, greater than zero for nnodes)
+ * @num: node number
+ */
+struct ubifs_cnode {
+       struct ubifs_nnode *parent;
+       struct ubifs_cnode *cnext;
+       unsigned long flags;
+       int iip;
+       int level;
+       int num;
+};
+
+/**
+ * struct ubifs_pnode - LEB Properties Tree leaf node.
+ * @parent: parent nnode
+ * @cnext: next cnode to commit
+ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
+ * @iip: index in parent
+ * @level: level in the tree (always zero for pnodes)
+ * @num: node number
+ * @lprops: LEB properties array
+ */
+struct ubifs_pnode {
+       struct ubifs_nnode *parent;
+       struct ubifs_cnode *cnext;
+       unsigned long flags;
+       int iip;
+       int level;
+       int num;
+       struct ubifs_lprops lprops[UBIFS_LPT_FANOUT];
+};
+
+/**
+ * struct ubifs_nbranch - LEB Properties Tree internal node branch.
+ * @lnum: LEB number of child
+ * @offs: offset of child
+ * @nnode: nnode child
+ * @pnode: pnode child
+ * @cnode: cnode child
+ */
+struct ubifs_nbranch {
+       int lnum;
+       int offs;
+       union {
+               struct ubifs_nnode *nnode;
+               struct ubifs_pnode *pnode;
+               struct ubifs_cnode *cnode;
+       };
+};
+
+/**
+ * struct ubifs_nnode - LEB Properties Tree internal node.
+ * @parent: parent nnode
+ * @cnext: next cnode to commit
+ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
+ * @iip: index in parent
+ * @level: level in the tree (always greater than zero for nnodes)
+ * @num: node number
+ * @nbranch: branches to child nodes
+ */
+struct ubifs_nnode {
+       struct ubifs_nnode *parent;
+       struct ubifs_cnode *cnext;
+       unsigned long flags;
+       int iip;
+       int level;
+       int num;
+       struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT];
+};
+
+/**
+ * struct ubifs_lpt_heap - heap of categorized lprops.
+ * @arr: heap array
+ * @cnt: number in heap
+ * @max_cnt: maximum number allowed in heap
+ *
+ * There are %LPROPS_HEAP_CNT heaps.
+ */
+struct ubifs_lpt_heap {
+       struct ubifs_lprops **arr;
+       int cnt;
+       int max_cnt;
+};
+
+/*
+ * Return codes for LPT scan callback function.
+ *
+ * LPT_SCAN_CONTINUE: continue scanning
+ * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory
+ * LPT_SCAN_STOP: stop scanning
+ */
+enum {
+       LPT_SCAN_CONTINUE = 0,
+       LPT_SCAN_ADD = 1,
+       LPT_SCAN_STOP = 2,
+};
+
+struct ubifs_info;
+
+/* Callback used by the 'ubifs_lpt_scan_nolock()' function */
+typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
+                                      const struct ubifs_lprops *lprops,
+                                      int in_tree, void *data);
+
+/**
+ * struct ubifs_wbuf - UBIFS write-buffer.
+ * @c: UBIFS file-system description object
+ * @buf: write-buffer (of min. flash I/O unit size)
+ * @lnum: logical eraseblock number the write-buffer points to
+ * @offs: write-buffer offset in this logical eraseblock
+ * @avail: number of bytes available in the write-buffer
+ * @used:  number of used bytes in the write-buffer
+ * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
+ * %UBI_UNKNOWN)
+ * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
+ *         up by 'mutex_lock_nested()).
+ * @sync_callback: write-buffer synchronization callback
+ * @io_mutex: serializes write-buffer I/O
+ * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
+ *        fields
+ * @timer: write-buffer timer
+ * @timeout: timer expire interval in jiffies
+ * @need_sync: it is set if its timer expired and needs sync
+ * @next_ino: points to the next position of the following inode number
+ * @inodes: stores the inode numbers of the nodes which are in wbuf
+ *
+ * The write-buffer synchronization callback is called when the write-buffer is
+ * synchronized in order to notify how much space was wasted due to
+ * write-buffer padding and how much free space is left in the LEB.
+ *
+ * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under
+ * spin-lock or mutex because they are written under both mutex and spin-lock.
+ * @buf is appended to under mutex but overwritten under both mutex and
+ * spin-lock. Thus the data between @buf and @buf + @used can be read under
+ * spinlock.
+ */
+struct ubifs_wbuf {
+       struct ubifs_info *c;
+       void *buf;
+       int lnum;
+       int offs;
+       int avail;
+       int used;
+       int dtype;
+       int jhead;
+       int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
+       struct mutex io_mutex;
+       spinlock_t lock;
+       struct timer_list timer;
+       int timeout;
+       int need_sync;
+       int next_ino;
+       ino_t *inodes;
+};
+
+/**
+ * struct ubifs_bud - bud logical eraseblock.
+ * @lnum: logical eraseblock number
+ * @start: where the (uncommitted) bud data starts
+ * @jhead: journal head number this bud belongs to
+ * @list: link in the list buds belonging to the same journal head
+ * @rb: link in the tree of all buds
+ */
+struct ubifs_bud {
+       int lnum;
+       int start;
+       int jhead;
+       struct list_head list;
+       struct rb_node rb;
+};
+
+/**
+ * struct ubifs_jhead - journal head.
+ * @wbuf: head's write-buffer
+ * @buds_list: list of bud LEBs belonging to this journal head
+ *
+ * Note, the @buds list is protected by the @c->buds_lock.
+ */
+struct ubifs_jhead {
+       struct ubifs_wbuf wbuf;
+       struct list_head buds_list;
+};
+
+/**
+ * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
+ * @key: key
+ * @znode: znode address in memory
+ * @lnum: LEB number of the indexing node
+ * @offs: offset of the indexing node within @lnum
+ * @len: target node length
+ */
+struct ubifs_zbranch {
+       union ubifs_key key;
+       union {
+               struct ubifs_znode *znode;
+               void *leaf;
+       };
+       int lnum;
+       int offs;
+       int len;
+};
+
+/**
+ * struct ubifs_znode - in-memory representation of an indexing node.
+ * @parent: parent znode or NULL if it is the root
+ * @cnext: next znode to commit
+ * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE)
+ * @time: last access time (seconds)
+ * @level: level of the entry in the TNC tree
+ * @child_cnt: count of child znodes
+ * @iip: index in parent's zbranch array
+ * @alt: lower bound of key range has altered i.e. child inserted at slot 0
+ * @lnum: LEB number of the corresponding indexing node
+ * @offs: offset of the corresponding indexing node
+ * @len: length  of the corresponding indexing node
+ * @zbranch: array of znode branches (@c->fanout elements)
+ */
+struct ubifs_znode {
+       struct ubifs_znode *parent;
+       struct ubifs_znode *cnext;
+       unsigned long flags;
+       unsigned long time;
+       int level;
+       int child_cnt;
+       int iip;
+       int alt;
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       int lnum, offs, len;
+#endif
+       struct ubifs_zbranch zbranch[];
+};
+
+/**
+ * struct ubifs_node_range - node length range description data structure.
+ * @len: fixed node length
+ * @min_len: minimum possible node length
+ * @max_len: maximum possible node length
+ *
+ * If @max_len is %0, the node has fixed length @len.
+ */
+struct ubifs_node_range {
+       union {
+               int len;
+               int min_len;
+       };
+       int max_len;
+};
+
+/**
+ * struct ubifs_compressor - UBIFS compressor description structure.
+ * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc)
+ * @cc: cryptoapi compressor handle
+ * @comp_mutex: mutex used during compression
+ * @decomp_mutex: mutex used during decompression
+ * @name: compressor name
+ * @capi_name: cryptoapi compressor name
+ */
+struct ubifs_compressor {
+       int compr_type;
+       struct crypto_comp *cc;
+       struct mutex *comp_mutex;
+       struct mutex *decomp_mutex;
+       const char *name;
+       const char *capi_name;
+};
+
+/**
+ * struct ubifs_budget_req - budget requirements of an operation.
+ *
+ * @fast: non-zero if the budgeting should try to aquire budget quickly and
+ *        should not try to call write-back
+ * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
+ *               have to be re-calculated
+ * @new_page: non-zero if the operation adds a new page
+ * @dirtied_page: non-zero if the operation makes a page dirty
+ * @new_dent: non-zero if the operation adds a new directory entry
+ * @mod_dent: non-zero if the operation removes or modifies an existing
+ *            directory entry
+ * @new_ino: non-zero if the operation adds a new inode
+ * @new_ino_d: now much data newly created inode contains
+ * @dirtied_ino: how many inodes the operation makes dirty
+ * @dirtied_ino_d: now much data dirtied inode contains
+ * @idx_growth: how much the index will supposedly grow
+ * @data_growth: how much new data the operation will supposedly add
+ * @dd_growth: how much data that makes other data dirty the operation will
+ *             supposedly add
+ *
+ * @idx_growth, @data_growth and @dd_growth are not used in budget request. The
+ * budgeting subsystem caches index and data growth values there to avoid
+ * re-calculating them when the budget is released. However, if @idx_growth is
+ * %-1, it is calculated by the release function using other fields.
+ *
+ * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
+ * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
+ * dirty by the re-name operation.
+ */
+struct ubifs_budget_req {
+       unsigned int fast:1;
+       unsigned int recalculate:1;
+       unsigned int new_page:1;
+       unsigned int dirtied_page:1;
+       unsigned int new_dent:1;
+       unsigned int mod_dent:1;
+       unsigned int new_ino:1;
+       unsigned int new_ino_d:13;
+#ifndef UBIFS_DEBUG
+       unsigned int dirtied_ino:4;
+       unsigned int dirtied_ino_d:15;
+#else
+       /* Not bit-fields to check for overflows */
+       unsigned int dirtied_ino;
+       unsigned int dirtied_ino_d;
+#endif
+       int idx_growth;
+       int data_growth;
+       int dd_growth;
+};
+
+/**
+ * struct ubifs_orphan - stores the inode number of an orphan.
+ * @rb: rb-tree node of rb-tree of orphans sorted by inode number
+ * @list: list head of list of orphans in order added
+ * @new_list: list head of list of orphans added since the last commit
+ * @cnext: next orphan to commit
+ * @dnext: next orphan to delete
+ * @inum: inode number
+ * @new: %1 => added since the last commit, otherwise %0
+ */
+struct ubifs_orphan {
+       struct rb_node rb;
+       struct list_head list;
+       struct list_head new_list;
+       struct ubifs_orphan *cnext;
+       struct ubifs_orphan *dnext;
+       ino_t inum;
+       int new;
+};
+
+/**
+ * struct ubifs_mount_opts - UBIFS-specific mount options information.
+ * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
+ */
+struct ubifs_mount_opts {
+       unsigned int unmount_mode:2;
+};
+
+/**
+ * struct ubifs_info - UBIFS file-system description data structure
+ * (per-superblock).
+ * @vfs_sb: VFS @struct super_block object
+ * @bdi: backing device info object to make VFS happy and disable readahead
+ *
+ * @highest_inum: highest used inode number
+ * @vfs_gen: VFS inode generation counter
+ * @max_sqnum: current global sequence number
+ * @cmt_no: commit number (last successfully completed commit)
+ * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters
+ * @fmt_version: UBIFS on-flash format version
+ * @uuid: UUID from super block
+ *
+ * @lhead_lnum: log head logical eraseblock number
+ * @lhead_offs: log head offset
+ * @ltail_lnum: log tail logical eraseblock number (offset is always 0)
+ * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and
+ *             @bud_bytes
+ * @min_log_bytes: minimum required number of bytes in the log
+ * @cmt_bud_bytes: used during commit to temporarily amount of bytes in
+ *                 committed buds
+ *
+ * @buds: tree of all buds indexed by bud LEB number
+ * @bud_bytes: how many bytes of flash is used by buds
+ * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud
+ *             lists
+ * @jhead_cnt: count of journal heads
+ * @jheads: journal heads (head zero is base head)
+ * @max_bud_bytes: maximum number of bytes allowed in buds
+ * @bg_bud_bytes: number of bud bytes when background commit is initiated
+ * @old_buds: buds to be released after commit ends
+ * @max_bud_cnt: maximum number of buds
+ *
+ * @commit_sem: synchronizes committer with other processes
+ * @cmt_state: commit state
+ * @cs_lock: commit state lock
+ * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
+ * @fast_unmount: do not run journal commit before un-mounting
+ * @big_lpt: flag that LPT is too big to write whole during commit
+ * @check_lpt_free: flag that indicates LPT GC may be needed
+ * @nospace: non-zero if the file-system does not have flash space (used as
+ *           optimization)
+ * @nospace_rp: the same as @nospace, but additionally means that even reserved
+ *              pool is full
+ *
+ * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
+ *             @calc_idx_sz
+ * @zroot: zbranch which points to the root index node and znode
+ * @cnext: next znode to commit
+ * @enext: next znode to commit to empty space
+ * @gap_lebs: array of LEBs used by the in-gaps commit method
+ * @cbuf: commit buffer
+ * @ileb_buf: buffer for commit in-the-gaps method
+ * @ileb_len: length of data in ileb_buf
+ * @ihead_lnum: LEB number of index head
+ * @ihead_offs: offset of index head
+ * @ilebs: pre-allocated index LEBs
+ * @ileb_cnt: number of pre-allocated index LEBs
+ * @ileb_nxt: next pre-allocated index LEBs
+ * @old_idx: tree of index nodes obsoleted since the last commit start
+ * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
+ * @new_ihead_lnum: used by debugging to check ihead_lnum
+ * @new_ihead_offs: used by debugging to check ihead_offs
+ *
+ * @mst_node: master node
+ * @mst_offs: offset of valid master node
+ * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
+ *
+ * @log_lebs: number of logical eraseblocks in the log
+ * @log_bytes: log size in bytes
+ * @log_last: last LEB of the log
+ * @lpt_lebs: number of LEBs used for lprops table
+ * @lpt_first: first LEB of the lprops table area
+ * @lpt_last: last LEB of the lprops table area
+ * @orph_lebs: number of LEBs used for the orphan area
+ * @orph_first: first LEB of the orphan area
+ * @orph_last: last LEB of the orphan area
+ * @main_lebs: count of LEBs in the main area
+ * @main_first: first LEB of the main area
+ * @main_bytes: main area size in bytes
+ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
+ *
+ * @key_hash_type: type of the key hash
+ * @key_hash: direntry key hash function
+ * @key_fmt: key format
+ * @key_len: key length
+ * @fanout: fanout of the index tree (number of links per indexing node)
+ *
+ * @min_io_size: minimal input/output unit size
+ * @min_io_shift: number of bits in @min_io_size minus one
+ * @leb_size: logical eraseblock size in bytes
+ * @half_leb_size: half LEB size
+ * @leb_cnt: count of logical eraseblocks
+ * @max_leb_cnt: maximum count of logical eraseblocks
+ * @old_leb_cnt: count of logical eraseblocks before re-size
+ * @ro_media: the underlying UBI volume is read-only
+ *
+ * @dirty_pg_cnt: number of dirty pages (not used)
+ * @dirty_zn_cnt: number of dirty znodes
+ * @clean_zn_cnt: number of clean znodes
+ *
+ * @budg_idx_growth: amount of bytes budgeted for index growth
+ * @budg_data_growth: amount of bytes budgeted for cached data
+ * @budg_dd_growth: amount of bytes budgeted for cached data that will make
+ *                  other data dirty
+ * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
+ *                        but which still have to be taken into account because
+ *                        the index has not been committed so far
+ * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
+ *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst;
+ * @min_idx_lebs: minimum number of LEBs required for the index
+ * @old_idx_sz: size of index on flash
+ * @calc_idx_sz: temporary variable which is used to calculate new index size
+ *               (contains accurate new index size at end of TNC commit start)
+ * @lst: lprops statistics
+ *
+ * @page_budget: budget for a page
+ * @inode_budget: budget for an inode
+ * @dent_budget: budget for a directory entry
+ *
+ * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
+ * I/O unit
+ * @mst_node_alsz: master node aligned size
+ * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
+ * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
+ * @max_inode_sz: maximum possible inode size in bytes
+ * @max_znode_sz: size of znode in bytes
+ * @dead_wm: LEB dead space watermark
+ * @dark_wm: LEB dark space watermark
+ * @block_cnt: count of 4KiB blocks on the FS
+ *
+ * @ranges: UBIFS node length ranges
+ * @ubi: UBI volume descriptor
+ * @di: UBI device information
+ * @vi: UBI volume information
+ *
+ * @orph_tree: rb-tree of orphan inode numbers
+ * @orph_list: list of orphan inode numbers in order added
+ * @orph_new: list of orphan inode numbers added since last commit
+ * @orph_cnext: next orphan to commit
+ * @orph_dnext: next orphan to delete
+ * @orphan_lock: lock for orph_tree and orph_new
+ * @orph_buf: buffer for orphan nodes
+ * @new_orphans: number of orphans since last commit
+ * @cmt_orphans: number of orphans being committed
+ * @tot_orphans: number of orphans in the rb_tree
+ * @max_orphans: maximum number of orphans allowed
+ * @ohead_lnum: orphan head LEB number
+ * @ohead_offs: orphan head offset
+ * @no_orphs: non-zero if there are no orphans
+ *
+ * @bgt: UBIFS background thread
+ * @bgt_name: background thread name
+ * @need_bgt: if background thread should run
+ * @need_wbuf_sync: if write-buffers have to be synchronized
+ *
+ * @gc_lnum: LEB number used for garbage collection
+ * @sbuf: a buffer of LEB size used by GC and replay for scanning
+ * @idx_gc: list of index LEBs that have been garbage collected
+ * @idx_gc_cnt: number of elements on the idx_gc list
+ *
+ * @infos_list: links all 'ubifs_info' objects
+ * @umount_mutex: serializes shrinker and un-mount
+ * @shrinker_run_no: shrinker run number
+ *
+ * @space_bits: number of bits needed to record free or dirty space
+ * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT
+ * @lpt_offs_bits: number of bits needed to record an offset in the LPT
+ * @lpt_spc_bits: number of bits needed to space in the LPT
+ * @pcnt_bits: number of bits needed to record pnode or nnode number
+ * @lnum_bits: number of bits needed to record LEB number
+ * @nnode_sz: size of on-flash nnode
+ * @pnode_sz: size of on-flash pnode
+ * @ltab_sz: size of on-flash LPT lprops table
+ * @lsave_sz: size of on-flash LPT save table
+ * @pnode_cnt: number of pnodes
+ * @nnode_cnt: number of nnodes
+ * @lpt_hght: height of the LPT
+ * @pnodes_have: number of pnodes in memory
+ *
+ * @lp_mutex: protects lprops table and all the other lprops-related fields
+ * @lpt_lnum: LEB number of the root nnode of the LPT
+ * @lpt_offs: offset of the root nnode of the LPT
+ * @nhead_lnum: LEB number of LPT head
+ * @nhead_offs: offset of LPT head
+ * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
+ * @dirty_nn_cnt: number of dirty nnodes
+ * @dirty_pn_cnt: number of dirty pnodes
+ * @lpt_sz: LPT size
+ * @lpt_nod_buf: buffer for an on-flash nnode or pnode
+ * @lpt_buf: buffer of LEB size used by LPT
+ * @nroot: address in memory of the root nnode of the LPT
+ * @lpt_cnext: next LPT node to commit
+ * @lpt_heap: array of heaps of categorized lprops
+ * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at
+ *             previous commit start
+ * @uncat_list: list of un-categorized LEBs
+ * @empty_list: list of empty LEBs
+ * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
+ * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
+ * @freeable_cnt: number of freeable LEBs in @freeable_list
+ *
+ * @ltab_lnum: LEB number of LPT's own lprops table
+ * @ltab_offs: offset of LPT's own lprops table
+ * @ltab: LPT's own lprops table
+ * @ltab_cmt: LPT's own lprops table (commit copy)
+ * @lsave_cnt: number of LEB numbers in LPT's save table
+ * @lsave_lnum: LEB number of LPT's save table
+ * @lsave_offs: offset of LPT's save table
+ * @lsave: LPT's save table
+ * @lscan_lnum: LEB number of last LPT scan
+ *
+ * @rp_size: size of the reserved pool in bytes
+ * @report_rp_size: size of the reserved pool reported to user-space
+ * @rp_uid: reserved pool user ID
+ * @rp_gid: reserved pool group ID
+ *
+ * @empty: if the UBI device is empty
+ * @replay_tree: temporary tree used during journal replay
+ * @replay_list: temporary list used during journal replay
+ * @replay_buds: list of buds to replay
+ * @cs_sqnum: sequence number of first node in the log (commit start node)
+ * @replay_sqnum: sequence number of node currently being replayed
+ * @need_recovery: file-system needs recovery
+ * @replaying: set to %1 during journal replay
+ * @unclean_leb_list: LEBs to recover when mounting ro to rw
+ * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
+ * @size_tree: inode size information for recovery
+ * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
+ * @mount_opts: UBIFS-specific mount options
+ *
+ * @dbg_buf: a buffer of LEB size used for debugging purposes
+ * @old_zroot: old index root - used by 'dbg_check_old_index()'
+ * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
+ * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
+ * @failure_mode: failure mode for recovery testing
+ * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
+ * @fail_timeout: time in jiffies when delay of failure mode expires
+ * @fail_cnt: current number of calls to failure mode I/O functions
+ * @fail_cnt_max: number of calls by which to delay failure mode
+ */
+struct ubifs_info {
+       struct super_block *vfs_sb;
+       struct backing_dev_info bdi;
+
+       ino_t highest_inum;
+       unsigned int vfs_gen;
+       unsigned long long max_sqnum;
+       unsigned long long cmt_no;
+       spinlock_t cnt_lock;
+       int fmt_version;
+       unsigned char uuid[16];
+
+       int lhead_lnum;
+       int lhead_offs;
+       int ltail_lnum;
+       struct mutex log_mutex;
+       int min_log_bytes;
+       long long cmt_bud_bytes;
+
+       struct rb_root buds;
+       long long bud_bytes;
+       spinlock_t buds_lock;
+       int jhead_cnt;
+       struct ubifs_jhead *jheads;
+       long long max_bud_bytes;
+       long long bg_bud_bytes;
+       struct list_head old_buds;
+       int max_bud_cnt;
+
+       struct rw_semaphore commit_sem;
+       int cmt_state;
+       spinlock_t cs_lock;
+       wait_queue_head_t cmt_wq;
+       unsigned int fast_unmount:1;
+       unsigned int big_lpt:1;
+       unsigned int check_lpt_free:1;
+       unsigned int nospace:1;
+       unsigned int nospace_rp:1;
+
+       struct mutex tnc_mutex;
+       struct ubifs_zbranch zroot;
+       struct ubifs_znode *cnext;
+       struct ubifs_znode *enext;
+       int *gap_lebs;
+       void *cbuf;
+       void *ileb_buf;
+       int ileb_len;
+       int ihead_lnum;
+       int ihead_offs;
+       int *ilebs;
+       int ileb_cnt;
+       int ileb_nxt;
+       struct rb_root old_idx;
+       int *bottom_up_buf;
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       int new_ihead_lnum;
+       int new_ihead_offs;
+#endif
+
+       struct ubifs_mst_node *mst_node;
+       int mst_offs;
+       struct mutex mst_mutex;
+
+       int log_lebs;
+       long long log_bytes;
+       int log_last;
+       int lpt_lebs;
+       int lpt_first;
+       int lpt_last;
+       int orph_lebs;
+       int orph_first;
+       int orph_last;
+       int main_lebs;
+       int main_first;
+       long long main_bytes;
+       int default_compr;
+
+       uint8_t key_hash_type;
+       uint32_t (*key_hash)(const char *str, int len);
+       int key_fmt;
+       int key_len;
+       int fanout;
+
+       int min_io_size;
+       int min_io_shift;
+       int leb_size;
+       int half_leb_size;
+       int leb_cnt;
+       int max_leb_cnt;
+       int old_leb_cnt;
+       int ro_media;
+
+       atomic_long_t dirty_pg_cnt;
+       atomic_long_t dirty_zn_cnt;
+       atomic_long_t clean_zn_cnt;
+
+       long long budg_idx_growth;
+       long long budg_data_growth;
+       long long budg_dd_growth;
+       long long budg_uncommitted_idx;
+       spinlock_t space_lock;
+       int min_idx_lebs;
+       unsigned long long old_idx_sz;
+       unsigned long long calc_idx_sz;
+       struct ubifs_lp_stats lst;
+
+       int page_budget;
+       int inode_budget;
+       int dent_budget;
+
+       int ref_node_alsz;
+       int mst_node_alsz;
+       int min_idx_node_sz;
+       int max_idx_node_sz;
+       long long max_inode_sz;
+       int max_znode_sz;
+       int dead_wm;
+       int dark_wm;
+       int block_cnt;
+
+       struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT];
+       struct ubi_volume_desc *ubi;
+       struct ubi_device_info di;
+       struct ubi_volume_info vi;
+
+       struct rb_root orph_tree;
+       struct list_head orph_list;
+       struct list_head orph_new;
+       struct ubifs_orphan *orph_cnext;
+       struct ubifs_orphan *orph_dnext;
+       spinlock_t orphan_lock;
+       void *orph_buf;
+       int new_orphans;
+       int cmt_orphans;
+       int tot_orphans;
+       int max_orphans;
+       int ohead_lnum;
+       int ohead_offs;
+       int no_orphs;
+
+       struct task_struct *bgt;
+       char bgt_name[sizeof(BGT_NAME_PATTERN) + 9];
+       int need_bgt;
+       int need_wbuf_sync;
+
+       int gc_lnum;
+       void *sbuf;
+       struct list_head idx_gc;
+       int idx_gc_cnt;
+
+       struct list_head infos_list;
+       struct mutex umount_mutex;
+       unsigned int shrinker_run_no;
+
+       int space_bits;
+       int lpt_lnum_bits;
+       int lpt_offs_bits;
+       int lpt_spc_bits;
+       int pcnt_bits;
+       int lnum_bits;
+       int nnode_sz;
+       int pnode_sz;
+       int ltab_sz;
+       int lsave_sz;
+       int pnode_cnt;
+       int nnode_cnt;
+       int lpt_hght;
+       int pnodes_have;
+
+       struct mutex lp_mutex;
+       int lpt_lnum;
+       int lpt_offs;
+       int nhead_lnum;
+       int nhead_offs;
+       int lpt_drty_flgs;
+       int dirty_nn_cnt;
+       int dirty_pn_cnt;
+       long long lpt_sz;
+       void *lpt_nod_buf;
+       void *lpt_buf;
+       struct ubifs_nnode *nroot;
+       struct ubifs_cnode *lpt_cnext;
+       struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT];
+       struct ubifs_lpt_heap dirty_idx;
+       struct list_head uncat_list;
+       struct list_head empty_list;
+       struct list_head freeable_list;
+       struct list_head frdi_idx_list;
+       int freeable_cnt;
+
+       int ltab_lnum;
+       int ltab_offs;
+       struct ubifs_lpt_lprops *ltab;
+       struct ubifs_lpt_lprops *ltab_cmt;
+       int lsave_cnt;
+       int lsave_lnum;
+       int lsave_offs;
+       int *lsave;
+       int lscan_lnum;
+
+       long long rp_size;
+       long long report_rp_size;
+       uid_t rp_uid;
+       gid_t rp_gid;
+
+       /* The below fields are used only during mounting and re-mounting */
+       int empty;
+       struct rb_root replay_tree;
+       struct list_head replay_list;
+       struct list_head replay_buds;
+       unsigned long long cs_sqnum;
+       unsigned long long replay_sqnum;
+       int need_recovery;
+       int replaying;
+       struct list_head unclean_leb_list;
+       struct ubifs_mst_node *rcvrd_mst_node;
+       struct rb_root size_tree;
+       int remounting_rw;
+       struct ubifs_mount_opts mount_opts;
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+       void *dbg_buf;
+       struct ubifs_zbranch old_zroot;
+       int old_zroot_level;
+       unsigned long long old_zroot_sqnum;
+       int failure_mode;
+       int fail_delay;
+       unsigned long fail_timeout;
+       unsigned int fail_cnt;
+       unsigned int fail_cnt_max;
+#endif
+};
+
+extern struct list_head ubifs_infos;
+extern spinlock_t ubifs_infos_lock;
+extern atomic_long_t ubifs_clean_zn_cnt;
+extern struct kmem_cache *ubifs_inode_slab;
+extern struct super_operations ubifs_super_operations;
+extern struct address_space_operations ubifs_file_address_operations;
+extern struct file_operations ubifs_file_operations;
+extern struct inode_operations ubifs_file_inode_operations;
+extern struct file_operations ubifs_dir_operations;
+extern struct inode_operations ubifs_dir_inode_operations;
+extern struct inode_operations ubifs_symlink_inode_operations;
+extern struct backing_dev_info ubifs_backing_dev_info;
+extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
+
+/* io.c */
+int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
+int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
+                          int dtype);
+int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf);
+int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
+                   int lnum, int offs);
+int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
+                        int lnum, int offs);
+int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
+                    int offs, int dtype);
+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
+                    int offs, int quiet);
+void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
+void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
+int ubifs_io_init(struct ubifs_info *c);
+void ubifs_pad(const struct ubifs_info *c, void *buf, int pad);
+int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf);
+int ubifs_bg_wbufs_sync(struct ubifs_info *c);
+void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum);
+int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
+
+/* scan.c */
+struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
+                                 int offs, void *sbuf);
+void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
+int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
+                     int offs, int quiet);
+struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
+                                       int offs, void *sbuf);
+void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                   int lnum, int offs);
+int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                  void *buf, int offs);
+void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
+                             void *buf);
+
+/* log.c */
+void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud);
+void ubifs_create_buds_lists(struct ubifs_info *c);
+int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs);
+struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum);
+struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum);
+int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum);
+int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum);
+int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum);
+int ubifs_consolidate_log(struct ubifs_info *c);
+
+/* journal.c */
+int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
+                    const struct qstr *nm, const struct inode *inode,
+                    int deletion, int xent);
+int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
+                        const union ubifs_key *key, const void *buf, int len);
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
+                         int last_reference);
+int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+                    const struct dentry *old_dentry,
+                    const struct inode *new_dir,
+                    const struct dentry *new_dentry, int sync);
+int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
+                      loff_t old_size, loff_t new_size);
+int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
+                          const struct inode *inode, const struct qstr *nm);
+int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1,
+                          const struct inode *inode2);
+
+/* budget.c */
+int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req);
+void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req);
+void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
+                                     struct ubifs_inode *ui);
+int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode,
+                         struct ubifs_budget_req *req);
+void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
+                               struct ubifs_budget_req *req);
+void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
+                        struct ubifs_budget_req *req);
+long long ubifs_budg_get_free_space(struct ubifs_info *c);
+int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
+void ubifs_convert_page_budget(struct ubifs_info *c);
+long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
+
+/* find.c */
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+                         int squeeze);
+int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
+int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
+                        int min_space, int pick_free);
+int ubifs_find_dirty_idx_leb(struct ubifs_info *c);
+int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
+
+/* tnc.c */
+int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
+                       struct ubifs_znode **zn, int *n);
+int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
+                    void *node);
+int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
+                       void *node, const struct qstr *nm);
+int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
+                    void *node, int *lnum, int *offs);
+int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
+                 int offs, int len);
+int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
+                     int old_lnum, int old_offs, int lnum, int offs, int len);
+int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
+                    int lnum, int offs, int len, const struct qstr *nm);
+int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
+int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
+                       const struct qstr *nm);
+int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
+                          union ubifs_key *to_key);
+int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum);
+struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
+                                          union ubifs_key *key,
+                                          const struct qstr *nm);
+void ubifs_tnc_close(struct ubifs_info *c);
+int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
+                      int lnum, int offs, int is_idx);
+int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
+                        int lnum, int offs);
+/* Shared by tnc.c for tnc_commit.c */
+void destroy_old_idx(struct ubifs_info *c);
+int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
+                      int lnum, int offs);
+int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
+
+/* tnc_misc.c */
+struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
+                                             struct ubifs_znode *znode);
+int ubifs_search_zbranch(const struct ubifs_info *c,
+                        const struct ubifs_znode *znode,
+                        const union ubifs_key *key, int *n);
+struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode);
+struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode);
+long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr);
+struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
+                                    struct ubifs_zbranch *zbr,
+                                    struct ubifs_znode *parent, int iip);
+int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+                       void *node);
+
+/* tnc_commit.c */
+int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
+int ubifs_tnc_end_commit(struct ubifs_info *c);
+
+/* shrinker.c */
+int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
+/* commit.c */
+int ubifs_bg_thread(void *info);
+void ubifs_commit_required(struct ubifs_info *c);
+void ubifs_request_bg_commit(struct ubifs_info *c);
+int ubifs_run_commit(struct ubifs_info *c);
+void ubifs_recovery_commit(struct ubifs_info *c);
+int ubifs_gc_should_commit(struct ubifs_info *c);
+void ubifs_wait_for_commit(struct ubifs_info *c);
+
+/* master.c */
+int ubifs_read_master(struct ubifs_info *c);
+int ubifs_write_master(struct ubifs_info *c);
+
+/* sb.c */
+int ubifs_read_superblock(struct ubifs_info *c);
+struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
+int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
+
+/* replay.c */
+int ubifs_validate_entry(struct ubifs_info *c,
+                        const struct ubifs_dent_node *dent);
+int ubifs_replay_journal(struct ubifs_info *c);
+
+/* gc.c */
+int ubifs_garbage_collect(struct ubifs_info *c, int anyway);
+int ubifs_gc_start_commit(struct ubifs_info *c);
+int ubifs_gc_end_commit(struct ubifs_info *c);
+void ubifs_destroy_idx_gc(struct ubifs_info *c);
+int ubifs_get_idx_gc_leb(struct ubifs_info *c);
+int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp);
+
+/* orphan.c */
+int ubifs_add_orphan(struct ubifs_info *c, ino_t inum);
+void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
+int ubifs_orphan_start_commit(struct ubifs_info *c);
+int ubifs_orphan_end_commit(struct ubifs_info *c);
+int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
+
+/* lpt.c */
+int ubifs_calc_lpt_geom(struct ubifs_info *c);
+int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
+                         int *lpt_lebs, int *big_lpt);
+int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr);
+struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum);
+struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum);
+int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
+                         ubifs_lpt_scan_callback scan_cb, void *data);
+
+/* Shared by lpt.c for lpt_commit.c */
+void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave);
+void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
+                    struct ubifs_lpt_lprops *ltab);
+void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
+                     struct ubifs_pnode *pnode);
+void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
+                     struct ubifs_nnode *nnode);
+struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
+                                   struct ubifs_nnode *parent, int iip);
+struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
+                                   struct ubifs_nnode *parent, int iip);
+int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
+void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
+void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
+uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
+struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
+
+/* lpt_commit.c */
+int ubifs_lpt_start_commit(struct ubifs_info *c);
+int ubifs_lpt_end_commit(struct ubifs_info *c);
+int ubifs_lpt_post_commit(struct ubifs_info *c);
+void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
+
+/* lprops.c */
+void ubifs_get_lprops(struct ubifs_info *c);
+const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
+                                          const struct ubifs_lprops *lp,
+                                          int free, int dirty, int flags,
+                                          int idx_gc_cnt);
+void ubifs_release_lprops(struct ubifs_info *c);
+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
+void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
+                     int cat);
+void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
+                      struct ubifs_lprops *new_lprops);
+void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops);
+int ubifs_categorize_lprops(const struct ubifs_info *c,
+                           const struct ubifs_lprops *lprops);
+int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
+                       int flags_set, int flags_clean, int idx_gc_cnt);
+int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
+                       int flags_set, int flags_clean);
+int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp);
+const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
+const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
+const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
+const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
+
+/* file.c */
+int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
+
+/* dir.c */
+struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
+                             int mode);
+int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                 struct kstat *stat);
+
+/* xattr.c */
+int ubifs_setxattr(struct dentry *dentry, const char *name,
+                  const void *value, size_t size, int flags);
+ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
+                      size_t size);
+ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int ubifs_removexattr(struct dentry *dentry, const char *name);
+
+/* super.c */
+struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
+
+/* recovery.c */
+int ubifs_recover_master_node(struct ubifs_info *c);
+int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
+struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
+                                        int offs, void *sbuf, int grouped);
+struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
+                                            int offs, void *sbuf);
+int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
+int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
+int ubifs_rcvry_gc_commit(struct ubifs_info *c);
+int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
+                            int deletion, loff_t new_size);
+int ubifs_recover_size(struct ubifs_info *c);
+void ubifs_destroy_size_tree(struct ubifs_info *c);
+
+/* ioctl.c */
+long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+void ubifs_set_inode_flags(struct inode *inode);
+#ifdef CONFIG_COMPAT
+long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+#endif
+
+/* compressor.c */
+int __init ubifs_compressors_init(void);
+void __exit ubifs_compressors_exit(void);
+void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
+                   int *compr_type);
+int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
+                    int compr_type);
+
+#include "debug.h"
+#include "misc.h"
+#include "key.h"
+
+#endif /* !__UBIFS_H__ */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c

new file mode 100644 (file)

index 0000000..1388a07
--- /dev/null
+++ b/fs/ubifs/xattr.c
@@ -0,0 +1,581 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём)
+ *          Adrian Hunter
+ */
+
+/*
+ * This file implements UBIFS extended attributes support.
+ *
+ * Extended attributes are implemented as regular inodes with attached data,
+ * which limits extended attribute size to UBIFS block size (4KiB). Names of
+ * extended attributes are described by extended attribute entries (xentries),
+ * which are almost identical to directory entries, but have different key type.
+ *
+ * In other words, the situation with extended attributes is very similar to
+ * directories. Indeed, any inode (but of course not xattr inodes) may have a
+ * number of associated xentries, just like directory inodes have associated
+ * directory entries. Extended attribute entries store the name of the extended
+ * attribute, the host inode number, and the extended attribute inode number.
+ * Similarly, direntries store the name, the parent and the target inode
+ * numbers. Thus, most of the common UBIFS mechanisms may be re-used for
+ * extended attributes.
+ *
+ * The number of extended attributes is not limited, but there is Linux
+ * limitation on the maximum possible size of the list of all extended
+ * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure
+ * the sum of all extended attribute names of the inode does not exceed that
+ * limit.
+ *
+ * Extended attributes are synchronous, which means they are written to the
+ * flash media synchronously and there is no write-back for extended attribute
+ * inodes. The extended attribute values are not stored in compressed form on
+ * the media.
+ *
+ * Since extended attributes are represented by regular inodes, they are cached
+ * in the VFS inode cache. The xentries are cached in the LNC cache (see
+ * tnc.c).
+ *
+ * ACL support is not implemented.
+ */
+
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include "ubifs.h"
+
+/*
+ * Limit the number of extended attributes per inode so that the total size
+ * (xattr_size) is guaranteeded to fit in an 'unsigned int'.
+ */
+#define MAX_XATTRS_PER_INODE 65535
+
+/*
+ * Extended attribute type constants.
+ *
+ * USER_XATTR: user extended attribute ("user.*")
+ * TRUSTED_XATTR: trusted extended attribute ("trusted.*)
+ * SECURITY_XATTR: security extended attribute ("security.*")
+ */
+enum {
+       USER_XATTR,
+       TRUSTED_XATTR,
+       SECURITY_XATTR,
+};
+
+static struct inode_operations none_inode_operations;
+static struct address_space_operations none_address_operations;
+static struct file_operations none_file_operations;
+
+/**
+ * create_xattr - create an extended attribute.
+ * @c: UBIFS file-system description object
+ * @host: host inode
+ * @nm: extended attribute name
+ * @value: extended attribute value
+ * @size: size of extended attribute value
+ *
+ * This is a helper function which creates an extended attribute of name @nm
+ * and value @value for inode @host. The host inode is also updated on flash
+ * because the ctime and extended attribute accounting data changes. This
+ * function returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int create_xattr(struct ubifs_info *c, struct inode *host,
+                       const struct qstr *nm, const void *value, int size)
+{
+       int err;
+       struct inode *inode;
+       struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
+       struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+                                       .new_ino_d = size, .dirtied_ino = 1,
+                                       .dirtied_ino_d = host_ui->data_len};
+
+       if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
+               return -ENOSPC;
+       /*
+        * Linux limits the maximum size of the extended attribute names list
+        * to %XATTR_LIST_MAX. This means we should not allow creating more*
+        * extended attributes if the name list becomes larger. This limitation
+        * is artificial for UBIFS, though.
+        */
+       if (host_ui->xattr_names + host_ui->xattr_cnt +
+                                       nm->len + 1 > XATTR_LIST_MAX)
+               return -ENOSPC;
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_budg;
+       }
+
+       mutex_lock(&host_ui->ui_mutex);
+       /* Re-define all operations to be "nothing" */
+       inode->i_mapping->a_ops = &none_address_operations;
+       inode->i_op = &none_inode_operations;
+       inode->i_fop = &none_file_operations;
+
+       inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
+       ui = ubifs_inode(inode);
+       ui->xattr = 1;
+       ui->flags |= UBIFS_XATTR_FL;
+       ui->data = kmalloc(size, GFP_NOFS);
+       if (!ui->data) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       memcpy(ui->data, value, size);
+       host->i_ctime = ubifs_current_time(host);
+       host_ui->xattr_cnt += 1;
+       host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
+       host_ui->xattr_size += CALC_XATTR_BYTES(size);
+       host_ui->xattr_names += nm->len;
+
+       /*
+        * We do not use i_size_write() because nobody can race with us as we
+        * are holding host @host->i_mutex - every xattr operation for this
+        * inode is serialized by it.
+        */
+       inode->i_size = ui->ui_size = size;
+       ui->data_len = size;
+       err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
+       if (err)
+               goto out_cancel;
+       mutex_unlock(&host_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       insert_inode_hash(inode);
+       iput(inode);
+       return 0;
+
+out_cancel:
+       host_ui->xattr_cnt -= 1;
+       host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
+       host_ui->xattr_size -= CALC_XATTR_BYTES(size);
+out_unlock:
+       mutex_unlock(&host_ui->ui_mutex);
+       make_bad_inode(inode);
+       iput(inode);
+out_budg:
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+/**
+ * change_xattr - change an extended attribute.
+ * @c: UBIFS file-system description object
+ * @host: host inode
+ * @inode: extended attribute inode
+ * @value: extended attribute value
+ * @size: size of extended attribute value
+ *
+ * This helper function changes the value of extended attribute @inode with new
+ * data from @value. Returns zero in case of success and a negative error code
+ * in case of failure.
+ */
+static int change_xattr(struct ubifs_info *c, struct inode *host,
+                       struct inode *inode, const void *value, int size)
+{
+       int err;
+       struct ubifs_inode *host_ui = ubifs_inode(host);
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_budget_req req = { .dirtied_ino = 2,
+                               .dirtied_ino_d = size + host_ui->data_len };
+
+       ubifs_assert(ui->data_len == inode->i_size);
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       mutex_lock(&host_ui->ui_mutex);
+       host->i_ctime = ubifs_current_time(host);
+       host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
+       host_ui->xattr_size += CALC_XATTR_BYTES(size);
+
+       kfree(ui->data);
+       ui->data = kmalloc(size, GFP_NOFS);
+       if (!ui->data) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       memcpy(ui->data, value, size);
+       inode->i_size = ui->ui_size = size;
+       ui->data_len = size;
+
+       /*
+        * It is important to write the host inode after the xattr inode
+        * because if the host inode gets synchronized (via 'fsync()'), then
+        * the extended attribute inode gets synchronized, because it goes
+        * before the host inode in the write-buffer.
+        */
+       err = ubifs_jnl_change_xattr(c, inode, host);
+       if (err)
+               goto out_cancel;
+       mutex_unlock(&host_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       return 0;
+
+out_cancel:
+       host_ui->xattr_size -= CALC_XATTR_BYTES(size);
+       host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
+       make_bad_inode(inode);
+out_unlock:
+       mutex_unlock(&host_ui->ui_mutex);
+       ubifs_release_budget(c, &req);
+       return err;
+}
+
+/**
+ * check_namespace - check extended attribute name-space.
+ * @nm: extended attribute name
+ *
+ * This function makes sure the extended attribute name belongs to one of the
+ * supported extended attribute name-spaces. Returns name-space index in case
+ * of success and a negative error code in case of failure.
+ */
+static int check_namespace(const struct qstr *nm)
+{
+       int type;
+
+       if (nm->len > UBIFS_MAX_NLEN)
+               return -ENAMETOOLONG;
+
+       if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
+                    XATTR_TRUSTED_PREFIX_LEN)) {
+               if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
+                       return -EINVAL;
+               type = TRUSTED_XATTR;
+       } else if (!strncmp(nm->name, XATTR_USER_PREFIX,
+                                     XATTR_USER_PREFIX_LEN)) {
+               if (nm->name[XATTR_USER_PREFIX_LEN] == '\0')
+                       return -EINVAL;
+               type = USER_XATTR;
+       } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
+                                    XATTR_SECURITY_PREFIX_LEN)) {
+               if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
+                       return -EINVAL;
+               type = SECURITY_XATTR;
+       } else
+               return -EOPNOTSUPP;
+
+       return type;
+}
+
+static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
+{
+       struct inode *inode;
+
+       inode = ubifs_iget(c->vfs_sb, inum);
+       if (IS_ERR(inode)) {
+               ubifs_err("dead extended attribute entry, error %d",
+                         (int)PTR_ERR(inode));
+               return inode;
+       }
+       if (ubifs_inode(inode)->xattr)
+               return inode;
+       ubifs_err("corrupt extended attribute entry");
+       iput(inode);
+       return ERR_PTR(-EINVAL);
+}
+
+int ubifs_setxattr(struct dentry *dentry, const char *name,
+                  const void *value, size_t size, int flags)
+{
+       struct inode *inode, *host = dentry->d_inode;
+       struct ubifs_info *c = host->i_sb->s_fs_info;
+       struct qstr nm = { .name = name, .len = strlen(name) };
+       struct ubifs_dent_node *xent;
+       union ubifs_key key;
+       int err, type;
+
+       dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
+               host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+
+       if (size > UBIFS_MAX_INO_DATA)
+               return -ERANGE;
+
+       type = check_namespace(&nm);
+       if (type < 0)
+               return type;
+
+       xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
+       if (!xent)
+               return -ENOMEM;
+
+       /*
+        * The extended attribute entries are stored in LNC, so multiple
+        * look-ups do not involve reading the flash.
+        */
+       xent_key_init(c, &key, host->i_ino, &nm);
+       err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
+       if (err) {
+               if (err != -ENOENT)
+                       goto out_free;
+
+               if (flags & XATTR_REPLACE)
+                       /* We are asked not to create the xattr */
+                       err = -ENODATA;
+               else
+                       err = create_xattr(c, host, &nm, value, size);
+               goto out_free;
+       }
+
+       if (flags & XATTR_CREATE) {
+               /* We are asked not to replace the xattr */
+               err = -EEXIST;
+               goto out_free;
+       }
+
+       inode = iget_xattr(c, le64_to_cpu(xent->inum));
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_free;
+       }
+
+       err = change_xattr(c, host, inode, value, size);
+       iput(inode);
+
+out_free:
+       kfree(xent);
+       return err;
+}
+
+ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
+                      size_t size)
+{
+       struct inode *inode, *host = dentry->d_inode;
+       struct ubifs_info *c = host->i_sb->s_fs_info;
+       struct qstr nm = { .name = name, .len = strlen(name) };
+       struct ubifs_inode *ui;
+       struct ubifs_dent_node *xent;
+       union ubifs_key key;
+       int err;
+
+       dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
+               host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+
+       err = check_namespace(&nm);
+       if (err < 0)
+               return err;
+
+       xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
+       if (!xent)
+               return -ENOMEM;
+
+       mutex_lock(&host->i_mutex);
+       xent_key_init(c, &key, host->i_ino, &nm);
+       err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
+       if (err) {
+               if (err == -ENOENT)
+                       err = -ENODATA;
+               goto out_unlock;
+       }
+
+       inode = iget_xattr(c, le64_to_cpu(xent->inum));
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_unlock;
+       }
+
+       ui = ubifs_inode(inode);
+       ubifs_assert(inode->i_size == ui->data_len);
+       ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len);
+
+       if (buf) {
+               /* If @buf is %NULL we are supposed to return the length */
+               if (ui->data_len > size) {
+                       dbg_err("buffer size %zd, xattr len %d",
+                               size, ui->data_len);
+                       err = -ERANGE;
+                       goto out_iput;
+               }
+
+               memcpy(buf, ui->data, ui->data_len);
+       }
+       err = ui->data_len;
+
+out_iput:
+       iput(inode);
+out_unlock:
+       mutex_unlock(&host->i_mutex);
+       kfree(xent);
+       return err;
+}
+
+ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+       union ubifs_key key;
+       struct inode *host = dentry->d_inode;
+       struct ubifs_info *c = host->i_sb->s_fs_info;
+       struct ubifs_inode *host_ui = ubifs_inode(host);
+       struct ubifs_dent_node *xent, *pxent = NULL;
+       int err, len, written = 0;
+       struct qstr nm = { .name = NULL };
+
+       dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
+               dentry->d_name.len, dentry->d_name.name, size);
+
+       len = host_ui->xattr_names + host_ui->xattr_cnt;
+       if (!buffer)
+               /*
+                * We should return the minimum buffer size which will fit a
+                * null-terminated list of all the extended attribute names.
+                */
+               return len;
+
+       if (len > size)
+               return -ERANGE;
+
+       lowest_xent_key(c, &key, host->i_ino);
+
+       mutex_lock(&host->i_mutex);
+       while (1) {
+               int type;
+
+               xent = ubifs_tnc_next_ent(c, &key, &nm);
+               if (unlikely(IS_ERR(xent))) {
+                       err = PTR_ERR(xent);
+                       break;
+               }
+
+               nm.name = xent->name;
+               nm.len = le16_to_cpu(xent->nlen);
+
+               type = check_namespace(&nm);
+               if (unlikely(type < 0)) {
+                       err = type;
+                       break;
+               }
+
+               /* Show trusted namespace only for "power" users */
+               if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) {
+                       memcpy(buffer + written, nm.name, nm.len + 1);
+                       written += nm.len + 1;
+               }
+
+               kfree(pxent);
+               pxent = xent;
+               key_read(c, &xent->key, &key);
+       }
+       mutex_unlock(&host->i_mutex);
+
+       kfree(pxent);
+       if (err != -ENOENT) {
+               ubifs_err("cannot find next direntry, error %d", err);
+               return err;
+       }
+
+       ubifs_assert(written <= size);
+       return written;
+}
+
+static int remove_xattr(struct ubifs_info *c, struct inode *host,
+                       struct inode *inode, const struct qstr *nm)
+{
+       int err;
+       struct ubifs_inode *host_ui = ubifs_inode(host);
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1,
+                                       .dirtied_ino_d = host_ui->data_len };
+
+       ubifs_assert(ui->data_len == inode->i_size);
+
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               return err;
+
+       mutex_lock(&host_ui->ui_mutex);
+       host->i_ctime = ubifs_current_time(host);
+       host_ui->xattr_cnt -= 1;
+       host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
+       host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
+       host_ui->xattr_names -= nm->len;
+
+       err = ubifs_jnl_delete_xattr(c, host, inode, nm);
+       if (err)
+               goto out_cancel;
+       mutex_unlock(&host_ui->ui_mutex);
+
+       ubifs_release_budget(c, &req);
+       return 0;
+
+out_cancel:
+       host_ui->xattr_cnt += 1;
+       host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
+       host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
+       mutex_unlock(&host_ui->ui_mutex);
+       ubifs_release_budget(c, &req);
+       make_bad_inode(inode);
+       return err;
+}
+
+int ubifs_removexattr(struct dentry *dentry, const char *name)
+{
+       struct inode *inode, *host = dentry->d_inode;
+       struct ubifs_info *c = host->i_sb->s_fs_info;
+       struct qstr nm = { .name = name, .len = strlen(name) };
+       struct ubifs_dent_node *xent;
+       union ubifs_key key;
+       int err;
+
+       dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
+               host->i_ino, dentry->d_name.len, dentry->d_name.name);
+       ubifs_assert(mutex_is_locked(&host->i_mutex));
+
+       err = check_namespace(&nm);
+       if (err < 0)
+               return err;
+
+       xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
+       if (!xent)
+               return -ENOMEM;
+
+       xent_key_init(c, &key, host->i_ino, &nm);
+       err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
+       if (err) {
+               if (err == -ENOENT)
+                       err = -ENODATA;
+               goto out_free;
+       }
+
+       inode = iget_xattr(c, le64_to_cpu(xent->inum));
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto out_free;
+       }
+
+       ubifs_assert(inode->i_nlink == 1);
+       inode->i_nlink = 0;
+       err = remove_xattr(c, host, inode, &nm);
+       if (err)
+               inode->i_nlink = 1;
+
+       /* If @i_nlink is 0, 'iput()' will delete the inode */
+       iput(inode);
+
+out_free:
+       kfree(xent);
+       return err;
+}
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h

index 28fe8bae103703405c08029ae1dafa40ff586613..4eb75a88795ad21da088d394f5e08faa898e04ad 100644 (file)
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -63,7 +63,7 @@
  
  /* Current ACPICA subsystem version in YYYYMMDD format */
  
-#define ACPI_CA_VERSION                 0x20080321
+#define ACPI_CA_VERSION                0x20080609
  
  /*
   * OS name, used for the _OS object.  The _OS object is essentially obsolete,
diff --git a/include/acpi/acdisasm.h b/include/acpi/acdisasm.h

index 788f887820126ede28352c5824957f4964a7bcc8..f53faca8ec8055cc367968aa9982ddd8ca0fcad0 100644 (file)
--- a/include/acpi/acdisasm.h
+++ b/include/acpi/acdisasm.h
@@ -162,6 +162,7 @@ extern struct acpi_dmtable_info acpi_dm_table_info_dmar_hdr[];
  extern struct acpi_dmtable_info acpi_dm_table_info_dmar_scope[];
  extern struct acpi_dmtable_info acpi_dm_table_info_dmar0[];
  extern struct acpi_dmtable_info acpi_dm_table_info_dmar1[];
+extern struct acpi_dmtable_info acpi_dm_table_info_dmar2[];
  extern struct acpi_dmtable_info acpi_dm_table_info_ecdt[];
  extern struct acpi_dmtable_info acpi_dm_table_info_einj[];
  extern struct acpi_dmtable_info acpi_dm_table_info_einj0[];
diff --git a/include/acpi/acdispat.h b/include/acpi/acdispat.h

index 910f018d92c74333a8a340e0f27f5a98951f4175..21a73a105d0ab2920962c4febb1e6a2d795dd263 100644 (file)
--- a/include/acpi/acdispat.h
+++ b/include/acpi/acdispat.h
@@ -221,7 +221,7 @@ acpi_ds_method_error(acpi_status status, struct acpi_walk_state *walk_state);
   * dsinit
   */
  acpi_status
-acpi_ds_initialize_objects(acpi_native_uint table_index,
+acpi_ds_initialize_objects(u32 table_index,
                            struct acpi_namespace_node *start_node);
  
  /*
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h

index 1f591171bf317d2e84c4baf7e6e971f37235d978..e5a890ffeb02c26ec55f73ff4a0ff58912306087 100644 (file)
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -108,8 +108,9 @@
  #define AE_BAD_HEX_CONSTANT             (acpi_status) (0x0007 | AE_CODE_PROGRAMMER)
  #define AE_BAD_OCTAL_CONSTANT           (acpi_status) (0x0008 | AE_CODE_PROGRAMMER)
  #define AE_BAD_DECIMAL_CONSTANT         (acpi_status) (0x0009 | AE_CODE_PROGRAMMER)
+#define AE_MISSING_ARGUMENTS           (acpi_status) (0x000A | AE_CODE_PROGRAMMER)
  
-#define AE_CODE_PGM_MAX                 0x0009
+#define AE_CODE_PGM_MAX                0x000A
  
  /*
   * Acpi table exceptions
@@ -225,6 +226,7 @@ char const *acpi_gbl_exception_names_env[] = {
  };
  
  char const *acpi_gbl_exception_names_pgm[] = {
+       NULL,
         "AE_BAD_PARAMETER",
         "AE_BAD_CHARACTER",
         "AE_BAD_PATHNAME",
@@ -233,10 +235,12 @@ char const *acpi_gbl_exception_names_pgm[] = {
         "AE_ALIGNMENT",
         "AE_BAD_HEX_CONSTANT",
         "AE_BAD_OCTAL_CONSTANT",
-       "AE_BAD_DECIMAL_CONSTANT"
+       "AE_BAD_DECIMAL_CONSTANT",
+       "AE_MISSING_ARGUMENTS"
  };
  
  char const *acpi_gbl_exception_names_tbl[] = {
+       NULL,
         "AE_BAD_SIGNATURE",
         "AE_BAD_HEADER",
         "AE_BAD_CHECKSUM",
@@ -246,6 +250,7 @@ char const *acpi_gbl_exception_names_tbl[] = {
  };
  
  char const *acpi_gbl_exception_names_aml[] = {
+       NULL,
         "AE_AML_ERROR",
         "AE_AML_PARSE",
         "AE_AML_BAD_OPCODE",
@@ -283,6 +288,7 @@ char const *acpi_gbl_exception_names_aml[] = {
  };
  
  char const *acpi_gbl_exception_names_ctrl[] = {
+       NULL,
         "AE_CTRL_RETURN_VALUE",
         "AE_CTRL_PENDING",
         "AE_CTRL_TERMINATE",
diff --git a/include/acpi/acglobal.h b/include/acpi/acglobal.h

index 74ad971241dbb6cb46a733dc02dc0b894eb59e64..15dda46b70d1c1e36c949d9555efc7a805ccf321 100644 (file)
--- a/include/acpi/acglobal.h
+++ b/include/acpi/acglobal.h
@@ -140,7 +140,7 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags;
   */
  ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list;
  ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT;
-extern acpi_native_uint acpi_gbl_permanent_mmap;
+extern u8 acpi_gbl_permanent_mmap;
  
  /* These addresses are calculated from FADT address values */
  
diff --git a/include/acpi/achware.h b/include/acpi/achware.h

index d4fb9bbc903c0a3f417ea195d8650af87757ef8b..97a72b19327683843bc95219d643406b04426b59 100644 (file)
--- a/include/acpi/achware.h
+++ b/include/acpi/achware.h
@@ -87,6 +87,8 @@ acpi_status acpi_hw_clear_acpi_status(void);
  /*
   * hwgpe - GPE support
   */
+acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info);
+
  acpi_status
  acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info);
  
@@ -100,11 +102,9 @@ acpi_status
  acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
                         struct acpi_gpe_block_info *gpe_block);
  
-#ifdef ACPI_FUTURE_USAGE
  acpi_status
  acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
                        acpi_event_status * event_status);
-#endif                         /* ACPI_FUTURE_USAGE */
  
  acpi_status acpi_hw_disable_all_gpes(void);
  
diff --git a/include/acpi/acinterp.h b/include/acpi/acinterp.h

index e249ce5d33003be88eee37914286a9c4975346d3..e8db7a3143a5c1244ee904152be7187d8e1e2e0b 100644 (file)
--- a/include/acpi/acinterp.h
+++ b/include/acpi/acinterp.h
@@ -366,10 +366,7 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth);
  
  void
  acpi_ex_dump_operands(union acpi_operand_object **operands,
-                     acpi_interpreter_mode interpreter_mode,
-                     char *ident,
-                     u32 num_levels,
-                     char *note, char *module_name, u32 line_number);
+                     const char *opcode_name, u32 num_opcodes);
  
  #ifdef ACPI_FUTURE_USAGE
  void
diff --git a/include/acpi/aclocal.h b/include/acpi/aclocal.h

index c5cdc32ac2f8f623deb3515bd367a4fca2ee5da7..b221c8583dddf22966e65b3e974d508e17f43079 100644 (file)
--- a/include/acpi/aclocal.h
+++ b/include/acpi/aclocal.h
@@ -98,8 +98,8 @@ union acpi_parse_object;
  
  static char *acpi_gbl_mutex_names[ACPI_NUM_MUTEX] = {
         "ACPI_MTX_Interpreter",
-       "ACPI_MTX_Tables",
         "ACPI_MTX_Namespace",
+       "ACPI_MTX_Tables",
         "ACPI_MTX_Events",
         "ACPI_MTX_Caches",
         "ACPI_MTX_Memory",
@@ -282,8 +282,8 @@ struct acpi_predefined_names {
  /* Info structure used to convert external<->internal namestrings */
  
  struct acpi_namestring_info {
-       char *external_name;
-       char *next_external_char;
+       const char *external_name;
+       const char *next_external_char;
         char *internal_name;
         u32 length;
         u32 num_segments;
diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h

index fb41a3b802fc500e7eafb419cc84063f9d1dd748..57ab9e9d7593e6213ff2342f55d2dbaaab92ff80 100644 (file)
--- a/include/acpi/acmacros.h
+++ b/include/acpi/acmacros.h
@@ -80,12 +80,12 @@
   */
  #define ACPI_CAST_PTR(t, p)             ((t *) (acpi_uintptr_t) (p))
  #define ACPI_CAST_INDIRECT_PTR(t, p)    ((t **) (acpi_uintptr_t) (p))
-#define ACPI_ADD_PTR(t,a,b)             ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8,(a)) + (acpi_native_uint)(b)))
-#define ACPI_PTR_DIFF(a,b)              (acpi_native_uint) (ACPI_CAST_PTR (u8,(a)) - ACPI_CAST_PTR (u8,(b)))
+#define ACPI_ADD_PTR(t, a, b)          ACPI_CAST_PTR (t, (ACPI_CAST_PTR (u8,(a)) + (acpi_size)(b)))
+#define ACPI_PTR_DIFF(a, b)            (acpi_size) (ACPI_CAST_PTR (u8,(a)) - ACPI_CAST_PTR (u8,(b)))
  
  /* Pointer/Integer type conversions */
  
-#define ACPI_TO_POINTER(i)              ACPI_ADD_PTR (void,(void *) NULL,(acpi_native_uint) i)
+#define ACPI_TO_POINTER(i)             ACPI_ADD_PTR (void, (void *) NULL, (acpi_size) i)
  #define ACPI_TO_INTEGER(p)              ACPI_PTR_DIFF (p,(void *) NULL)
  #define ACPI_OFFSET(d,f)                (acpi_size) ACPI_PTR_DIFF (&(((d *)0)->f),(void *) NULL)
  #define ACPI_PHYSADDR_TO_PTR(i)         ACPI_TO_POINTER(i)
@@ -296,22 +296,22 @@ struct acpi_integer_overlay {
  /*
   * Rounding macros (Power of two boundaries only)
   */
-#define ACPI_ROUND_DOWN(value,boundary)     (((acpi_native_uint)(value)) & \
-                                                                                               (~(((acpi_native_uint) boundary)-1)))
+#define ACPI_ROUND_DOWN(value, boundary)     (((acpi_size)(value)) & \
+                                               (~(((acpi_size) boundary)-1)))
  
-#define ACPI_ROUND_UP(value,boundary)       ((((acpi_native_uint)(value)) + \
-                                                                                               (((acpi_native_uint) boundary)-1)) & \
-                                                                                               (~(((acpi_native_uint) boundary)-1)))
+#define ACPI_ROUND_UP(value, boundary)      ((((acpi_size)(value)) + \
+                                               (((acpi_size) boundary)-1)) & \
+                                               (~(((acpi_size) boundary)-1)))
  
-/* Note: sizeof(acpi_native_uint) evaluates to either 2, 4, or 8 */
+/* Note: sizeof(acpi_size) evaluates to either 4 or 8 (32- vs 64-bit mode) */
  
  #define ACPI_ROUND_DOWN_TO_32BIT(a)         ACPI_ROUND_DOWN(a,4)
  #define ACPI_ROUND_DOWN_TO_64BIT(a)         ACPI_ROUND_DOWN(a,8)
-#define ACPI_ROUND_DOWN_TO_NATIVE_WORD(a)   ACPI_ROUND_DOWN(a,sizeof(acpi_native_uint))
+#define ACPI_ROUND_DOWN_TO_NATIVE_WORD(a)   ACPI_ROUND_DOWN(a,sizeof(acpi_size))
  
  #define ACPI_ROUND_UP_TO_32BIT(a)           ACPI_ROUND_UP(a,4)
  #define ACPI_ROUND_UP_TO_64BIT(a)           ACPI_ROUND_UP(a,8)
-#define ACPI_ROUND_UP_TO_NATIVE_WORD(a)     ACPI_ROUND_UP(a,sizeof(acpi_native_uint))
+#define ACPI_ROUND_UP_TO_NATIVE_WORD(a)     ACPI_ROUND_UP(a,sizeof(acpi_size))
  
  #define ACPI_ROUND_BITS_UP_TO_BYTES(a)      ACPI_DIV_8((a) + 7)
  #define ACPI_ROUND_BITS_DOWN_TO_BYTES(a)    ACPI_DIV_8((a))
@@ -322,7 +322,7 @@ struct acpi_integer_overlay {
  
  #define ACPI_ROUND_UP_TO(value,boundary)    (((value) + ((boundary)-1)) / (boundary))
  
-#define ACPI_IS_MISALIGNED(value)           (((acpi_native_uint)value) & (sizeof(acpi_native_uint)-1))
+#define ACPI_IS_MISALIGNED(value)          (((acpi_size)value) & (sizeof(acpi_size)-1))
  
  /*
   * Bitmask creation
@@ -414,7 +414,7 @@ struct acpi_integer_overlay {
   * error messages. The __FILE__ macro is not very useful for this, because it
   * often includes the entire pathname to the module
   */
-#define ACPI_MODULE_NAME(name)          static char ACPI_UNUSED_VAR *_acpi_module_name = name;
+#define ACPI_MODULE_NAME(name)         static const char ACPI_UNUSED_VAR _acpi_module_name[] = name;
  #else
  #define ACPI_MODULE_NAME(name)
  #endif
@@ -467,19 +467,17 @@ struct acpi_integer_overlay {
  /*
   * If ACPI_GET_FUNCTION_NAME was not defined in the compiler-dependent header,
   * define it now. This is the case where there the compiler does not support
- * a __FUNCTION__ macro or equivalent. We save the function name on the
- * local stack.
+ * a __FUNCTION__ macro or equivalent.
   */
  #ifndef ACPI_GET_FUNCTION_NAME
  #define ACPI_GET_FUNCTION_NAME          _acpi_function_name
  /*
   * The Name parameter should be the procedure name as a quoted string.
- * This is declared as a local string ("MyFunctionName") so that it can
- * be also used by the function exit macros below.
+ * The function name is also used by the function exit macros below.
   * Note: (const char) is used to be compatible with the debug interfaces
   * and macros such as __FUNCTION__.
   */
-#define ACPI_FUNCTION_NAME(name)        const char *_acpi_function_name = #name;
+#define ACPI_FUNCTION_NAME(name)       static const char _acpi_function_name[] = #name;
  
  #else
  /* Compiler supports __FUNCTION__ (or equivalent) -- Ignore this macro */
@@ -599,7 +597,7 @@ struct acpi_integer_overlay {
  /* Stack and buffer dumping */
  
  #define ACPI_DUMP_STACK_ENTRY(a)        acpi_ex_dump_operand((a),0)
-#define ACPI_DUMP_OPERANDS(a,b,c,d,e)   acpi_ex_dump_operands(a,b,c,d,e,_acpi_module_name,__LINE__)
+#define ACPI_DUMP_OPERANDS(a,b,c)      acpi_ex_dump_operands(a,b,c)
  
  #define ACPI_DUMP_ENTRY(a,b)            acpi_ns_dump_entry (a,b)
  #define ACPI_DUMP_PATHNAME(a,b,c,d)     acpi_ns_dump_pathname(a,b,c,d)
@@ -635,7 +633,7 @@ struct acpi_integer_overlay {
  #define ACPI_FUNCTION_VALUE_EXIT(s)    do { } while(0)
  #define ACPI_FUNCTION_ENTRY()          do { } while(0)
  #define ACPI_DUMP_STACK_ENTRY(a)       do { } while(0)
-#define ACPI_DUMP_OPERANDS(a,b,c,d,e)  do { } while(0)
+#define ACPI_DUMP_OPERANDS(a,b,c)      do { } while(0)
  #define ACPI_DUMP_ENTRY(a,b)           do { } while(0)
  #define ACPI_DUMP_TABLES(a,b)          do { } while(0)
  #define ACPI_DUMP_PATHNAME(a,b,c,d)    do { } while(0)
diff --git a/include/acpi/acnamesp.h b/include/acpi/acnamesp.h

index 713b30903fe54c2260e8ba07098359b6fadfddd6..9ed70a05058003034e275e8c8f13acff3124c2ed 100644 (file)
--- a/include/acpi/acnamesp.h
+++ b/include/acpi/acnamesp.h
@@ -86,8 +86,7 @@ acpi_status acpi_ns_initialize_devices(void);
  acpi_status acpi_ns_load_namespace(void);
  
  acpi_status
-acpi_ns_load_table(acpi_native_uint table_index,
-                  struct acpi_namespace_node *node);
+acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node);
  
  /*
   * nswalk - walk the namespace
@@ -108,12 +107,11 @@ struct acpi_namespace_node *acpi_ns_get_next_node(acpi_object_type type, struct
   * nsparse - table parsing
   */
  acpi_status
-acpi_ns_parse_table(acpi_native_uint table_index,
-                   struct acpi_namespace_node *start_node);
+acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node);
  
  acpi_status
-acpi_ns_one_complete_parse(acpi_native_uint pass_number,
-                          acpi_native_uint table_index,
+acpi_ns_one_complete_parse(u32 pass_number,
+                          u32 table_index,
                            struct acpi_namespace_node *start_node);
  
  /*
@@ -201,7 +199,7 @@ acpi_ns_pattern_match(struct acpi_namespace_node *obj_node, char *search_for);
  
  acpi_status
  acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
-                char *external_pathname,
+                const char *external_pathname,
                  u32 flags, struct acpi_namespace_node **out_node);
  
  acpi_size acpi_ns_get_pathname_length(struct acpi_namespace_node *node);
@@ -265,28 +263,30 @@ acpi_object_type acpi_ns_get_type(struct acpi_namespace_node *node);
  u32 acpi_ns_local(acpi_object_type type);
  
  void
-acpi_ns_report_error(char *module_name,
+acpi_ns_report_error(const char *module_name,
                      u32 line_number,
-                    char *internal_name, acpi_status lookup_status);
+                    const char *internal_name, acpi_status lookup_status);
  
  void
-acpi_ns_report_method_error(char *module_name,
+acpi_ns_report_method_error(const char *module_name,
                             u32 line_number,
-                           char *message,
+                           const char *message,
                             struct acpi_namespace_node *node,
-                           char *path, acpi_status lookup_status);
+                           const char *path, acpi_status lookup_status);
  
-void acpi_ns_print_node_pathname(struct acpi_namespace_node *node, char *msg);
+void
+acpi_ns_print_node_pathname(struct acpi_namespace_node *node, const char *msg);
  
  acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info);
  
  void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info);
  
-acpi_status acpi_ns_internalize_name(char *dotted_name, char **converted_name);
+acpi_status
+acpi_ns_internalize_name(const char *dotted_name, char **converted_name);
  
  acpi_status
  acpi_ns_externalize_name(u32 internal_name_length,
-                        char *internal_name,
+                        const char *internal_name,
                          u32 * converted_name_length, char **converted_name);
  
  struct acpi_namespace_node *acpi_ns_map_handle_to_node(acpi_handle handle);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h

index 2f1c68c7a7270ea7098a39509690eb5d03614c1d..a5ac0bc7f52eb1cc1849fb29afda135fb9dda6c1 100644 (file)
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -259,6 +259,7 @@ struct acpi_device_perf {
  /* Wakeup Management */
  struct acpi_device_wakeup_flags {
         u8 valid:1;             /* Can successfully enable wakeup? */
+       u8 prepared:1;          /* Has the wake-up capability been enabled? */
         u8 run_wake:1;          /* Run-Wake GPE devices */
  };
  
@@ -335,6 +336,8 @@ void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context);
  int acpi_bus_get_status(struct acpi_device *device);
  int acpi_bus_get_power(acpi_handle handle, int *state);
  int acpi_bus_set_power(acpi_handle handle, int state);
+bool acpi_bus_power_manageable(acpi_handle handle);
+bool acpi_bus_can_wakeup(acpi_handle handle);
  #ifdef CONFIG_ACPI_PROC_EVENT
  int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
  int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data);
@@ -376,14 +379,19 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
  #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
  
  #ifdef CONFIG_PM_SLEEP
-int acpi_pm_device_sleep_state(struct device *, int, int *);
+int acpi_pm_device_sleep_state(struct device *, int *);
+int acpi_pm_device_sleep_wake(struct device *, bool);
  #else /* !CONFIG_PM_SLEEP */
-static inline int acpi_pm_device_sleep_state(struct device *d, int w, int *p)
+static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
  {
         if (p)
                 *p = ACPI_STATE_D0;
         return ACPI_STATE_D3;
  }
+static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
+{
+       return -ENODEV;
+}
  #endif /* !CONFIG_PM_SLEEP */
  
  #endif                         /* CONFIG_ACPI */
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h

index 9757a040a5057e82678490bd35713488a6eab893..e5f38e5ce86fc5a4e28ec624e529ee1db59c16c0 100644 (file)
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -87,7 +87,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain,
     -------------------------------------------------------------------------- */
  
  #ifdef CONFIG_ACPI_POWER
-int acpi_enable_wakeup_device_power(struct acpi_device *dev);
+int acpi_device_sleep_wake(struct acpi_device *dev,
+                           int enable, int sleep_state, int dev_state);
+int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state);
  int acpi_disable_wakeup_device_power(struct acpi_device *dev);
  int acpi_power_get_inferred_state(struct acpi_device *device);
  int acpi_power_transition(struct acpi_device *device, int state);
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h

index d4a560d2deb61f05f3fe56d2b4affe515926c931..3f93a6b4e17fe61ccd6735dd84a78b89edc8dc0d 100644 (file)
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -144,7 +144,7 @@ void acpi_os_release_mutex(acpi_mutex handle);
  void *acpi_os_allocate(acpi_size size);
  
  void __iomem *acpi_os_map_memory(acpi_physical_address where,
-                                acpi_native_uint length);
+                               acpi_size length);
  
  void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size);
  
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h

index 2c3806e6546f278656ffb0342486fccc28c7643f..94d94e126e9f4a3b825309e01766df0b973c088d 100644 (file)
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -98,7 +98,7 @@ void acpi_free(void *address);
   */
  acpi_status acpi_reallocate_root_table(void);
  
-acpi_status acpi_find_root_pointer(acpi_native_uint * rsdp_address);
+acpi_status acpi_find_root_pointer(acpi_size *rsdp_address);
  
  acpi_status acpi_load_tables(void);
  
@@ -108,15 +108,15 @@ acpi_status acpi_unload_table_id(acpi_owner_id id);
  
  acpi_status
  acpi_get_table_header(acpi_string signature,
-                     acpi_native_uint instance,
+                     u32 instance,
                       struct acpi_table_header *out_table_header);
  
  acpi_status
  acpi_get_table(acpi_string signature,
-              acpi_native_uint instance, struct acpi_table_header **out_table);
+              u32 instance, struct acpi_table_header **out_table);
  
  acpi_status
-acpi_get_table_by_index(acpi_native_uint table_index,
+acpi_get_table_by_index(u32 table_index,
                         struct acpi_table_header **out_table);
  
  acpi_status
@@ -248,9 +248,7 @@ acpi_status acpi_disable_event(u32 event, u32 flags);
  
  acpi_status acpi_clear_event(u32 event);
  
-#ifdef ACPI_FUTURE_USAGE
  acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status);
-#endif                         /*  ACPI_FUTURE_USAGE  */
  
  acpi_status acpi_set_gpe_type(acpi_handle gpe_device, u32 gpe_number, u8 type);
  
@@ -260,12 +258,10 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags);
  
  acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number, u32 flags);
  
-#ifdef ACPI_FUTURE_USAGE
  acpi_status
  acpi_get_gpe_status(acpi_handle gpe_device,
                     u32 gpe_number,
                     u32 flags, acpi_event_status * event_status);
-#endif                         /*  ACPI_FUTURE_USAGE  */
  
  acpi_status
  acpi_install_gpe_block(acpi_handle gpe_device,
diff --git a/include/acpi/acstruct.h b/include/acpi/acstruct.h

index a907c67d651e778f535c749a3ae0b92031c9bc4f..7980a26bad356240038584568b6b0ce404c562fe 100644 (file)
--- a/include/acpi/acstruct.h
+++ b/include/acpi/acstruct.h
@@ -108,7 +108,6 @@ struct acpi_walk_state {
         union acpi_operand_object **caller_return_desc;
         union acpi_generic_state *control_state;        /* List of control states (nested IFs) */
         struct acpi_namespace_node *deferred_node;      /* Used when executing deferred opcodes */
-       struct acpi_gpe_event_info *gpe_event_info;     /* Info for GPE (_Lxx/_Exx methods only */
         union acpi_operand_object *implicit_return_obj;
         struct acpi_namespace_node *method_call_node;   /* Called method Node */
         union acpi_parse_object *method_call_op;        /* method_call Op if running a method */
@@ -143,7 +142,7 @@ struct acpi_init_walk_info {
         u16 package_init;
         u16 object_count;
         acpi_owner_id owner_id;
-       acpi_native_uint table_index;
+       u32 table_index;
  };
  
  struct acpi_get_devices_info {
@@ -189,17 +188,12 @@ struct acpi_evaluate_info {
         union acpi_operand_object **parameters;
         struct acpi_namespace_node *resolved_node;
         union acpi_operand_object *return_object;
+       u8 param_count;
         u8 pass_number;
-       u8 parameter_type;
         u8 return_object_type;
         u8 flags;
  };
  
-/* Types for parameter_type above */
-
-#define ACPI_PARAM_ARGS                 0
-#define ACPI_PARAM_GPE                  1
-
  /* Values for Flags above */
  
  #define ACPI_IGNORE_RETURN_VALUE        1
diff --git a/include/acpi/actables.h b/include/acpi/actables.h

index 4b36a55b0b3bceed7ba441b9a9121825abd631f3..0cbe1b9ab52249e38adcd12c5fe0c8009d8ebbd9 100644 (file)
--- a/include/acpi/actables.h
+++ b/include/acpi/actables.h
@@ -49,7 +49,7 @@ acpi_status acpi_allocate_root_table(u32 initial_table_count);
  /*
   * tbfadt - FADT parse/convert/validate
   */
-void acpi_tb_parse_fadt(acpi_native_uint table_index, u8 flags);
+void acpi_tb_parse_fadt(u32 table_index, u8 flags);
  
  void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length);
  
@@ -58,8 +58,7 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length);
   */
  acpi_status
  acpi_tb_find_table(char *signature,
-                  char *oem_id,
-                  char *oem_table_id, acpi_native_uint * table_index);
+                  char *oem_id, char *oem_table_id, u32 *table_index);
  
  /*
   * tbinstal - Table removal and deletion
@@ -69,30 +68,28 @@ acpi_status acpi_tb_resize_root_table_list(void);
  acpi_status acpi_tb_verify_table(struct acpi_table_desc *table_desc);
  
  acpi_status
-acpi_tb_add_table(struct acpi_table_desc *table_desc,
-                 acpi_native_uint * table_index);
+acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index);
  
  acpi_status
  acpi_tb_store_table(acpi_physical_address address,
                     struct acpi_table_header *table,
-                   u32 length, u8 flags, acpi_native_uint * table_index);
+                   u32 length, u8 flags, u32 *table_index);
  
  void acpi_tb_delete_table(struct acpi_table_desc *table_desc);
  
  void acpi_tb_terminate(void);
  
-void acpi_tb_delete_namespace_by_owner(acpi_native_uint table_index);
+void acpi_tb_delete_namespace_by_owner(u32 table_index);
  
-acpi_status acpi_tb_allocate_owner_id(acpi_native_uint table_index);
+acpi_status acpi_tb_allocate_owner_id(u32 table_index);
  
-acpi_status acpi_tb_release_owner_id(acpi_native_uint table_index);
+acpi_status acpi_tb_release_owner_id(u32 table_index);
  
-acpi_status
-acpi_tb_get_owner_id(acpi_native_uint table_index, acpi_owner_id * owner_id);
+acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id);
  
-u8 acpi_tb_is_table_loaded(acpi_native_uint table_index);
+u8 acpi_tb_is_table_loaded(u32 table_index);
  
-void acpi_tb_set_table_loaded_flag(acpi_native_uint table_index, u8 is_loaded);
+void acpi_tb_set_table_loaded_flag(u32 table_index, u8 is_loaded);
  
  /*
   * tbutils - table manager utilities
@@ -103,14 +100,14 @@ void
  acpi_tb_print_table_header(acpi_physical_address address,
                            struct acpi_table_header *header);
  
-u8 acpi_tb_checksum(u8 * buffer, acpi_native_uint length);
+u8 acpi_tb_checksum(u8 *buffer, u32 length);
  
  acpi_status
  acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length);
  
  void
  acpi_tb_install_table(acpi_physical_address address,
-                     u8 flags, char *signature, acpi_native_uint table_index);
+                     u8 flags, char *signature, u32 table_index);
  
  acpi_status
  acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags);
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h

index 9af239bd1153fad17bdaee0b0bfbfcbc671bab80..d38f9be2f6eeda77b42f48e492060ed6a9f3000b 100644 (file)
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -300,6 +300,7 @@ struct acpi_table_dbgp {
  /*******************************************************************************
   *
   * DMAR - DMA Remapping table
+ *       From "Intel Virtualization Technology for Directed I/O", Sept. 2007
   *
   ******************************************************************************/
  
@@ -310,6 +311,10 @@ struct acpi_table_dmar {
         u8 reserved[10];
  };
  
+/* Flags */
+
+#define ACPI_DMAR_INTR_REMAP       (1)
+
  /* DMAR subtable header */
  
  struct acpi_dmar_header {
@@ -382,6 +387,20 @@ struct acpi_dmar_reserved_memory {
  
  #define ACPI_DMAR_ALLOW_ALL         (1)
  
+
+/* 2: Root Port ATS Capability Reporting Structure */
+
+struct acpi_dmar_atsr {
+       struct acpi_dmar_header header;
+       u8 flags;
+       u8 reserved;
+       u16 segment;
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALL_PORTS        (1)
+
  /*******************************************************************************
   *
   * ECDT - Embedded Controller Boot Resources Table
@@ -1156,9 +1175,9 @@ struct acpi_srat_mem_affinity {
         u16 reserved;           /* Reserved, must be zero */
         u64 base_address;
         u64 length;
-       u32 memory_type;        /* See acpi_address_range_id */
+       u32 reserved1;
         u32 flags;
-       u64 reserved1;          /* Reserved, must be zero */
+       u64 reserved2;         /* Reserved, must be zero */
  };
  
  /* Flags */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h

index dfea2d440488548a7fccff43e998b3ec0ef346d6..4ea4f40bf894c22e0a1f99b745bd503e6a996f24 100644 (file)
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -110,10 +110,10 @@
   * usually used for memory allocation, efficient loop counters, and array
   * indexes. The types are similar to the size_t type in the C library and are
   * required because there is no C type that consistently represents the native
- * data width.
+ * data width. ACPI_SIZE is needed because there is no guarantee that a
+ * kernel-level C library is present.
   *
   * ACPI_SIZE        16/32/64-bit unsigned value
- * ACPI_NATIVE_UINT 16/32/64-bit unsigned value
   * ACPI_NATIVE_INT  16/32/64-bit signed value
   *
   */
@@ -147,9 +147,9 @@ typedef int INT32;
  
  /*! [End] no source code translation !*/
  
-typedef u64 acpi_native_uint;
  typedef s64 acpi_native_int;
  
+typedef u64 acpi_size;
  typedef u64 acpi_io_address;
  typedef u64 acpi_physical_address;
  
@@ -186,9 +186,9 @@ typedef int INT32;
  
  /*! [End] no source code translation !*/
  
-typedef u32 acpi_native_uint;
  typedef s32 acpi_native_int;
  
+typedef u32 acpi_size;
  typedef u32 acpi_io_address;
  typedef u32 acpi_physical_address;
  
@@ -202,10 +202,6 @@ typedef u32 acpi_physical_address;
  #error unknown ACPI_MACHINE_WIDTH
  #endif
  
-/* Variable-width type, used instead of clib size_t */
-
-typedef acpi_native_uint acpi_size;
-
  /*******************************************************************************
   *
   * OS-dependent and compiler-dependent types
@@ -219,7 +215,7 @@ typedef acpi_native_uint acpi_size;
  /* Value returned by acpi_os_get_thread_id */
  
  #ifndef acpi_thread_id
-#define acpi_thread_id                  acpi_native_uint
+#define acpi_thread_id                 acpi_size
  #endif
  
  /* Object returned from acpi_os_create_lock */
@@ -231,7 +227,7 @@ typedef acpi_native_uint acpi_size;
  /* Flags for acpi_os_acquire_lock/acpi_os_release_lock */
  
  #ifndef acpi_cpu_flags
-#define acpi_cpu_flags                  acpi_native_uint
+#define acpi_cpu_flags                 acpi_size
  #endif
  
  /* Object returned from acpi_os_create_cache */
diff --git a/include/acpi/acutils.h b/include/acpi/acutils.h

index b42cadf0730256bb68e17f6ff8ad376f7d57dc23..69f8888771fffa6cdf532117ac31bc14eb5997ff 100644 (file)
--- a/include/acpi/acutils.h
+++ b/include/acpi/acutils.h
@@ -172,7 +172,7 @@ char *acpi_ut_strstr(char *string1, char *string2);
  
  void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count);
  
-void *acpi_ut_memset(void *dest, acpi_native_uint value, acpi_size count);
+void *acpi_ut_memset(void *dest, u8 value, acpi_size count);
  
  int acpi_ut_to_upper(int c);
  
@@ -245,41 +245,45 @@ void acpi_ut_track_stack_ptr(void);
  
  void
  acpi_ut_trace(u32 line_number,
-             const char *function_name, char *module_name, u32 component_id);
+             const char *function_name,
+             const char *module_name, u32 component_id);
  
  void
  acpi_ut_trace_ptr(u32 line_number,
                   const char *function_name,
-                 char *module_name, u32 component_id, void *pointer);
+                 const char *module_name, u32 component_id, void *pointer);
  
  void
  acpi_ut_trace_u32(u32 line_number,
                   const char *function_name,
-                 char *module_name, u32 component_id, u32 integer);
+                 const char *module_name, u32 component_id, u32 integer);
  
  void
  acpi_ut_trace_str(u32 line_number,
                   const char *function_name,
-                 char *module_name, u32 component_id, char *string);
+                 const char *module_name, u32 component_id, char *string);
  
  void
  acpi_ut_exit(u32 line_number,
-            const char *function_name, char *module_name, u32 component_id);
+            const char *function_name,
+            const char *module_name, u32 component_id);
  
  void
  acpi_ut_status_exit(u32 line_number,
                     const char *function_name,
-                   char *module_name, u32 component_id, acpi_status status);
+                   const char *module_name,
+                   u32 component_id, acpi_status status);
  
  void
  acpi_ut_value_exit(u32 line_number,
                    const char *function_name,
-                  char *module_name, u32 component_id, acpi_integer value);
+                  const char *module_name,
+                  u32 component_id, acpi_integer value);
  
  void
  acpi_ut_ptr_exit(u32 line_number,
                  const char *function_name,
-                char *module_name, u32 component_id, u8 * ptr);
+                const char *module_name, u32 component_id, u8 *ptr);
  
  void acpi_ut_dump_buffer(u8 * buffer, u32 count, u32 display, u32 component_id);
  
@@ -297,33 +301,35 @@ void ACPI_INTERNAL_VAR_XFACE
  acpi_ut_debug_print(u32 requested_debug_level,
                     u32 line_number,
                     const char *function_name,
-                   char *module_name,
-                   u32 component_id, char *format, ...) ACPI_PRINTF_LIKE(6);
+                   const char *module_name,
+                   u32 component_id,
+                   const char *format, ...) ACPI_PRINTF_LIKE(6);
  
  void ACPI_INTERNAL_VAR_XFACE
  acpi_ut_debug_print_raw(u32 requested_debug_level,
                         u32 line_number,
                         const char *function_name,
-                       char *module_name,
+                       const char *module_name,
                         u32 component_id,
-                       char *format, ...) ACPI_PRINTF_LIKE(6);
+                       const char *format, ...) ACPI_PRINTF_LIKE(6);
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_error(char *module_name,
-             u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3);
+acpi_ut_error(const char *module_name,
+             u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_exception(char *module_name,
+acpi_ut_exception(const char *module_name,
                   u32 line_number,
-                 acpi_status status, char *format, ...) ACPI_PRINTF_LIKE(4);
+                 acpi_status status,
+                 const char *format, ...) ACPI_PRINTF_LIKE(4);
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_warning(char *module_name,
-               u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3);
+acpi_ut_warning(const char *module_name,
+               u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
  
  void ACPI_INTERNAL_VAR_XFACE
-acpi_ut_info(char *module_name,
-            u32 line_number, char *format, ...) ACPI_PRINTF_LIKE(3);
+acpi_ut_info(const char *module_name,
+            u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
  
  /*
   * utdelete - Object deletion and reference counts
@@ -376,13 +382,14 @@ acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest);
  /*
   * utobject - internal object create/delete/cache routines
   */
-union acpi_operand_object *acpi_ut_create_internal_object_dbg(char *module_name,
+union acpi_operand_object *acpi_ut_create_internal_object_dbg(const char
+                                                             *module_name,
                                                               u32 line_number,
                                                               u32 component_id,
                                                               acpi_object_type
                                                               type);
  
-void *acpi_ut_allocate_object_desc_dbg(char *module_name,
+void *acpi_ut_allocate_object_desc_dbg(const char *module_name,
                                        u32 line_number, u32 component_id);
  
  #define acpi_ut_create_internal_object(t) acpi_ut_create_internal_object_dbg (_acpi_module_name,__LINE__,_COMPONENT,t)
@@ -476,7 +483,7 @@ u8 acpi_ut_valid_acpi_name(u32 name);
  
  acpi_name acpi_ut_repair_name(char *name);
  
-u8 acpi_ut_valid_acpi_char(char character, acpi_native_uint position);
+u8 acpi_ut_valid_acpi_char(char character, u32 position);
  
  acpi_status
  acpi_ut_strtoul64(char *string, u32 base, acpi_integer * ret_integer);
@@ -543,26 +550,29 @@ acpi_status
  acpi_ut_initialize_buffer(struct acpi_buffer *buffer,
                           acpi_size required_length);
  
-void *acpi_ut_allocate(acpi_size size, u32 component, char *module, u32 line);
+void *acpi_ut_allocate(acpi_size size,
+                      u32 component, const char *module, u32 line);
  
  void *acpi_ut_allocate_zeroed(acpi_size size,
-                             u32 component, char *module, u32 line);
+                             u32 component, const char *module, u32 line);
  
  #ifdef ACPI_DBG_TRACK_ALLOCATIONS
  void *acpi_ut_allocate_and_track(acpi_size size,
-                                u32 component, char *module, u32 line);
+                                u32 component, const char *module, u32 line);
  
  void *acpi_ut_allocate_zeroed_and_track(acpi_size size,
-                                       u32 component, char *module, u32 line);
+                                       u32 component,
+                                       const char *module, u32 line);
  
  void
-acpi_ut_free_and_track(void *address, u32 component, char *module, u32 line);
+acpi_ut_free_and_track(void *address,
+                      u32 component, const char *module, u32 line);
  
  #ifdef ACPI_FUTURE_USAGE
  void acpi_ut_dump_allocation_info(void);
  #endif                         /* ACPI_FUTURE_USAGE */
  
-void acpi_ut_dump_allocations(u32 component, char *module);
+void acpi_ut_dump_allocations(u32 component, const char *module);
  
  acpi_status
  acpi_ut_create_list(char *list_name,
diff --git a/include/acpi/processor.h b/include/acpi/processor.h

index 06ebb6ef72aa7c57689a850b4c8692016f982c7f..3795590e152abbc54209e71b7688d4ed7639339f 100644 (file)
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -255,7 +255,7 @@ extern void acpi_processor_unregister_performance(struct
  int acpi_processor_notify_smm(struct module *calling_module);
  
  /* for communication between multiple parts of the processor kernel module */
-extern struct acpi_processor *processors[NR_CPUS];
+DECLARE_PER_CPU(struct acpi_processor *, processors);
  extern struct acpi_processor_errata errata;
  
  void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
diff --git a/include/acpi/reboot.h b/include/acpi/reboot.h

index 8857f57e0b7804d0e931c8d05876913b09b052a0..0419184ce8867664164de63b2a232da985af718f 100644 (file)
--- a/include/acpi/reboot.h
+++ b/include/acpi/reboot.h
@@ -1,9 +1,11 @@
+#ifndef __ACPI_REBOOT_H
+#define __ACPI_REBOOT_H
+
+#ifdef CONFIG_ACPI
+extern void acpi_reboot(void);
+#else
+static inline void acpi_reboot(void) { }
+#endif
  
-/*
- * Dummy placeholder to make the EFI patches apply to the x86 tree.
- * Andrew/Len, please just kill this file if you encounter it.
- */
-#ifndef acpi_reboot
-# define acpi_reboot() do { } while (0)
  #endif
  
diff --git a/include/asm-arm/arch-at91/at91_mci.h b/include/asm-arm/arch-at91/at91_mci.h

index 1551fc24eb43b6bdaa14e590ae1a9ed175513c64..400ec10014b45b8ee2b5782ef84e8339305e12b4 100644 (file)
--- a/include/asm-arm/arch-at91/at91_mci.h
+++ b/include/asm-arm/arch-at91/at91_mci.h
@@ -75,6 +75,10 @@
  #define                        AT91_MCI_TRTYP_MULTIPLE (1 << 19)
  #define                        AT91_MCI_TRTYP_STREAM   (2 << 19)
  
+#define AT91_MCI_BLKR          0x18            /* Block Register */
+#define                AT91_MCI_BLKR_BCNT(n)   ((0xffff & (n)) << 0)   /* Block count */
+#define                AT91_MCI_BLKR_BLKLEN(n) ((0xffff & (n)) << 16)  /* Block lenght */
+
  #define AT91_MCI_RSPR(n)       (0x20 + ((n) * 4))      /* Response Registers 0-3 */
  #define AT91_MCR_RDR           0x30            /* Receive Data Register */
  #define AT91_MCR_TDR           0x34            /* Transmit Data Register */
diff --git a/include/asm-arm/arch-s3c2410/regs-sdi.h b/include/asm-arm/arch-s3c2410/regs-sdi.h

index bb9d30b72952c88e684c17ca42e1147367084652..bfb222fa4abbba6f81694ad61a259ab03c31e7e0 100644 (file)
--- a/include/asm-arm/arch-s3c2410/regs-sdi.h
+++ b/include/asm-arm/arch-s3c2410/regs-sdi.h
@@ -28,9 +28,15 @@
  #define S3C2410_SDIDCNT               (0x30)
  #define S3C2410_SDIDSTA               (0x34)
  #define S3C2410_SDIFSTA               (0x38)
+
  #define S3C2410_SDIDATA               (0x3C)
  #define S3C2410_SDIIMSK               (0x40)
  
+#define S3C2440_SDIDATA               (0x40)
+#define S3C2440_SDIIMSK               (0x3C)
+
+#define S3C2440_SDICON_SDRESET        (1<<8)
+#define S3C2440_SDICON_MMCCLOCK       (1<<5)
  #define S3C2410_SDICON_BYTEORDER      (1<<4)
  #define S3C2410_SDICON_SDIOIRQ        (1<<3)
  #define S3C2410_SDICON_RWAITEN        (1<<2)
@@ -42,7 +48,8 @@
  #define S3C2410_SDICMDCON_LONGRSP     (1<<10)
  #define S3C2410_SDICMDCON_WAITRSP     (1<<9)
  #define S3C2410_SDICMDCON_CMDSTART    (1<<8)
-#define S3C2410_SDICMDCON_INDEX       (0xff)
+#define S3C2410_SDICMDCON_SENDERHOST  (1<<6)
+#define S3C2410_SDICMDCON_INDEX       (0x3f)
  
  #define S3C2410_SDICMDSTAT_CRCFAIL    (1<<12)
  #define S3C2410_SDICMDSTAT_CMDSENT    (1<<11)
@@ -51,6 +58,9 @@
  #define S3C2410_SDICMDSTAT_XFERING    (1<<8)
  #define S3C2410_SDICMDSTAT_INDEX      (0xff)
  
+#define S3C2440_SDIDCON_DS_BYTE       (0<<22)
+#define S3C2440_SDIDCON_DS_HALFWORD   (1<<22)
+#define S3C2440_SDIDCON_DS_WORD       (2<<22)
  #define S3C2410_SDIDCON_IRQPERIOD     (1<<21)
  #define S3C2410_SDIDCON_TXAFTERRESP   (1<<20)
  #define S3C2410_SDIDCON_RXAFTERCMD    (1<<19)
@@ -59,6 +69,7 @@
  #define S3C2410_SDIDCON_WIDEBUS       (1<<16)
  #define S3C2410_SDIDCON_DMAEN         (1<<15)
  #define S3C2410_SDIDCON_STOP          (1<<14)
+#define S3C2440_SDIDCON_DATSTART      (1<<14)
  #define S3C2410_SDIDCON_DATMODE              (3<<12)
  #define S3C2410_SDIDCON_BLKNUM        (0x7ff)
  
@@ -68,6 +79,7 @@
  #define S3C2410_SDIDCON_XFER_RXSTART  (2<<12)
  #define S3C2410_SDIDCON_XFER_TXSTART  (3<<12)
  
+#define S3C2410_SDIDCON_BLKNUM_MASK   (0xFFF)
  #define S3C2410_SDIDCNT_BLKNUM_SHIFT  (12)
  
  #define S3C2410_SDIDSTA_RDYWAITREQ    (1<<10)
@@ -82,10 +94,12 @@
  #define S3C2410_SDIDSTA_TXDATAON      (1<<1)
  #define S3C2410_SDIDSTA_RXDATAON      (1<<0)
  
+#define S3C2440_SDIFSTA_FIFORESET      (1<<16)
+#define S3C2440_SDIFSTA_FIFOFAIL       (3<<14)  /* 3 is correct (2 bits) */
  #define S3C2410_SDIFSTA_TFDET          (1<<13)
  #define S3C2410_SDIFSTA_RFDET          (1<<12)
-#define S3C2410_SDIFSTA_TXHALF         (1<<11)
-#define S3C2410_SDIFSTA_TXEMPTY        (1<<10)
+#define S3C2410_SDIFSTA_TFHALF         (1<<11)
+#define S3C2410_SDIFSTA_TFEMPTY        (1<<10)
  #define S3C2410_SDIFSTA_RFLAST         (1<<9)
  #define S3C2410_SDIFSTA_RFFULL         (1<<8)
  #define S3C2410_SDIFSTA_RFHALF         (1<<7)
diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h

new file mode 100644 (file)

index 0000000..2d0852a
--- /dev/null
+++ b/include/asm-arm/plat-s3c24xx/mci.h
@@ -0,0 +1,15 @@
+#ifndef _ARCH_MCI_H
+#define _ARCH_MCI_H
+
+struct s3c24xx_mci_pdata {
+       unsigned int    wprotect_invert : 1;
+       unsigned int    detect_invert : 1;   /* set => detect active high. */
+
+       unsigned int    gpio_detect;
+       unsigned int    gpio_wprotect;
+       unsigned long   ocr_avail;
+       void            (*set_power)(unsigned char power_mode,
+                                    unsigned short vdd);
+};
+
+#endif /* _ARCH_NCI_H */
diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h

index b4cddfaca90ec7d42b7637eb7a46c86887d8657f..a3783861cdd269ab80101e1a2acdf162f8fdecef 100644 (file)
--- a/include/asm-avr32/arch-at32ap/board.h
+++ b/include/asm-avr32/arch-at32ap/board.h
@@ -77,7 +77,11 @@ struct i2c_board_info;
  struct platform_device *at32_add_device_twi(unsigned int id,
                                             struct i2c_board_info *b,
                                             unsigned int n);
-struct platform_device *at32_add_device_mci(unsigned int id);
+
+struct mci_platform_data;
+struct platform_device *
+at32_add_device_mci(unsigned int id, struct mci_platform_data *data);
+
  struct platform_device *at32_add_device_ac97c(unsigned int id);
  struct platform_device *at32_add_device_abdac(unsigned int id);
  struct platform_device *at32_add_device_psif(unsigned int id);
diff --git a/include/asm-avr32/atmel-mci.h b/include/asm-avr32/atmel-mci.h

new file mode 100644 (file)

index 0000000..c2ea6e1
--- /dev/null
+++ b/include/asm-avr32/atmel-mci.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_AVR32_ATMEL_MCI_H
+#define __ASM_AVR32_ATMEL_MCI_H
+
+struct mci_platform_data {
+       int                     detect_pin;
+       int                     wp_pin;
+};
+
+#endif /* __ASM_AVR32_ATMEL_MCI_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h

index b20409404c7dac724761f2e4df39d228f25b9026..729f6b0a60e9d1655cfa5005236c5bf11fb83d5c 100644 (file)
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -86,6 +86,12 @@
                 VMLINUX_SYMBOL(__start_pci_fixups_resume) = .;          \
                 *(.pci_fixup_resume)                                    \
                 VMLINUX_SYMBOL(__end_pci_fixups_resume) = .;            \
+               VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .;    \
+               *(.pci_fixup_resume_early)                              \
+               VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .;      \
+               VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .;         \
+               *(.pci_fixup_suspend)                                   \
+               VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .;           \
         }                                                               \
                                                                         \
         /* Built-in firmware blobs */                                   \
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h

index 6aff126fc07ea31802ec249613e9237c16472875..f88fa054d01d5f64c684fefafc76190b59aa36e3 100644 (file)
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -763,6 +763,8 @@ prefetchw (const void *x)
  #define spin_lock_prefetch(x)  prefetchw(x)
  
  extern unsigned long boot_option_idle_override;
+extern unsigned long idle_halt;
+extern unsigned long idle_nomwait;
  
  #endif /* !__ASSEMBLY__ */
  
diff --git a/include/asm-mips/mach-au1x00/au1100_mmc.h b/include/asm-mips/mach-au1x00/au1100_mmc.h

index 9e0028f60a43bb1b0be30cb91dc3cd01924bd27b..c35e209184907fd9f7bdab54bed0457aa36bcaa1 100644 (file)
--- a/include/asm-mips/mach-au1x00/au1100_mmc.h
+++ b/include/asm-mips/mach-au1x00/au1100_mmc.h
@@ -38,15 +38,15 @@
  #ifndef __ASM_AU1100_MMC_H
  #define __ASM_AU1100_MMC_H
  
-
-#define NUM_AU1100_MMC_CONTROLLERS     2
-
-#if defined(CONFIG_SOC_AU1100)
-#define AU1100_SD_IRQ  AU1100_SD_INT
-#elif defined(CONFIG_SOC_AU1200)
-#define AU1100_SD_IRQ  AU1200_SD_INT
-#endif
-
+#include <linux/leds.h>
+
+struct au1xmmc_platform_data {
+       int(*cd_setup)(void *mmc_host, int on);
+       int(*card_inserted)(void *mmc_host);
+       int(*card_readonly)(void *mmc_host);
+       void(*set_power)(void *mmc_host, int state);
+       struct led_classdev *led;
+};
  
  #define SD0_BASE       0xB0600000
  #define SD1_BASE       0xB0680000
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild

index 09f312501eb5f82828b3231ecadc06e9356411d0..bb5e9edb9825391bcb152efe3ed79c27b8d5623f 100644 (file)
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -8,9 +8,9 @@ header-y += ucontext.h
  header-y += vtoc.h
  header-y += zcrypt.h
  header-y += kvm.h
-header-y += schid.h
  header-y += chsc.h
  
  unifdef-y += cmb.h
  unifdef-y += debug.h
  unifdef-y += chpid.h
+unifdef-y += schid.h
diff --git a/include/asm-s390/chpid.h b/include/asm-s390/chpid.h

index 606844d0a5c34c84af18863a204c21d4e157e4d3..dfe3c7f3439a71f1cedb51ea0ca5ae71575ca329 100644 (file)
--- a/include/asm-s390/chpid.h
+++ b/include/asm-s390/chpid.h
@@ -20,6 +20,9 @@ struct chp_id {
         u8 id;
  } __attribute__((packed));
  
+#ifdef __KERNEL__
+#include <asm/cio.h>
+
  static inline void chp_id_init(struct chp_id *chpid)
  {
         memset(chpid, 0, sizeof(struct chp_id));
@@ -40,9 +43,6 @@ static inline void chp_id_next(struct chp_id *chpid)
         }
  }
  
-#ifdef __KERNEL__
-#include <asm/cio.h>
-
  static inline int chp_id_is_valid(struct chp_id *chpid)
  {
         return (chpid->cssid <= __MAX_CSSID);
diff --git a/include/asm-s390/qdio.h b/include/asm-s390/qdio.h

index 11240342a0f4f54a6e7e66b3e03667400d4a9e35..6813772171f2d7b49826fb49d3f1333edbb88082 100644 (file)
--- a/include/asm-s390/qdio.h
+++ b/include/asm-s390/qdio.h
@@ -1,404 +1,382 @@
  /*
   * linux/include/asm-s390/qdio.h
   *
- * Linux for S/390 QDIO base support, Hipersocket base support
- * version 2
- *
- * Copyright 2000,2002 IBM Corporation
+ * Copyright 2000,2008 IBM Corp.
   * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ *           Jan Glauber <jang@linux.vnet.ibm.com>
   *
   */
  #ifndef __QDIO_H__
  #define __QDIO_H__
  
-/* note, that most of the typedef's are from ingo. */
-
  #include <linux/interrupt.h>
  #include <asm/cio.h>
  #include <asm/ccwdev.h>
  
-#define QDIO_NAME "qdio "
-
-#ifndef __s390x__
-#define QDIO_32_BIT
-#endif /* __s390x__ */
-
-/**** CONSTANTS, that are relied on without using these symbols *****/
-#define QDIO_MAX_QUEUES_PER_IRQ 32 /* used in width of unsigned int */
-/************************ END of CONSTANTS **************************/
-#define QDIO_MAX_BUFFERS_PER_Q 128 /* must be a power of 2 (%x=&(x-1)*/
-#define QDIO_BUF_ORDER 7 /* 2**this == number of pages used for sbals in 1 q */
-#define QDIO_MAX_ELEMENTS_PER_BUFFER 16
-#define SBAL_SIZE 256
-
-#define QDIO_QETH_QFMT 0
-#define QDIO_ZFCP_QFMT 1
-#define QDIO_IQDIO_QFMT 2
-#define QDIO_IQDIO_QFMT_ASYNCH 3
-
-struct qdio_buffer_element{
-       unsigned int flags;
-       unsigned int length;
-#ifdef QDIO_32_BIT
-       void *reserved;
-#endif /* QDIO_32_BIT */
-       void *addr;
-} __attribute__ ((packed,aligned(16)));
-
-struct qdio_buffer{
-       volatile struct qdio_buffer_element element[16];
-} __attribute__ ((packed,aligned(256)));
-
-
-/* params are: ccw_device, status, qdio_error, siga_error,
-   queue_number, first element processed, number of elements processed,
-   int_parm */
-typedef void qdio_handler_t(struct ccw_device *,unsigned int,unsigned int,
-                           unsigned int,unsigned int,int,int,unsigned long);
-
-
-#define QDIO_STATUS_INBOUND_INT 0x01
-#define QDIO_STATUS_OUTBOUND_INT 0x02
-#define QDIO_STATUS_LOOK_FOR_ERROR 0x04
-#define QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR 0x08
-#define QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR 0x10
-#define QDIO_STATUS_ACTIVATE_CHECK_CONDITION 0x20
-
-#define QDIO_SIGA_ERROR_ACCESS_EXCEPTION 0x10
-#define QDIO_SIGA_ERROR_B_BIT_SET 0x20
-
-/* for qdio_initialize */
-#define QDIO_INBOUND_0COPY_SBALS 0x01
-#define QDIO_OUTBOUND_0COPY_SBALS 0x02
-#define QDIO_USE_OUTBOUND_PCIS 0x04
-
-/* for qdio_cleanup */
-#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01
-#define QDIO_FLAG_CLEANUP_USING_HALT 0x02
-
-struct qdio_initialize {
-       struct ccw_device *cdev;
-       unsigned char q_format;
-       unsigned char adapter_name[8];
-               unsigned int qib_param_field_format; /*adapter dependent*/
-       /* pointer to 128 bytes or NULL, if no param field */
-       unsigned char *qib_param_field; /* adapter dependent */
-       /* pointer to no_queues*128 words of data or NULL */
-       unsigned long *input_slib_elements;
-       unsigned long *output_slib_elements;
-       unsigned int min_input_threshold;
-       unsigned int max_input_threshold;
-       unsigned int min_output_threshold;
-       unsigned int max_output_threshold;
-       unsigned int no_input_qs;
-       unsigned int no_output_qs;
-       qdio_handler_t *input_handler;
-       qdio_handler_t *output_handler;
-       unsigned long int_parm;
-       unsigned long flags;
-       void **input_sbal_addr_array; /* addr of n*128 void ptrs */
-       void **output_sbal_addr_array; /* addr of n*128 void ptrs */
-};
-
-extern int qdio_initialize(struct qdio_initialize *init_data);
-extern int qdio_allocate(struct qdio_initialize *init_data);
-extern int qdio_establish(struct qdio_initialize *init_data);
-
-extern int qdio_activate(struct ccw_device *,int flags);
-
-#define QDIO_STATE_MUST_USE_OUTB_PCI   0x00000001
-#define QDIO_STATE_INACTIVE            0x00000002 /* after qdio_cleanup */
-#define QDIO_STATE_ESTABLISHED                 0x00000004 /* after qdio_initialize */
-#define QDIO_STATE_ACTIVE              0x00000008 /* after qdio_activate */
-#define QDIO_STATE_STOPPED             0x00000010 /* after queues went down */
-extern unsigned long qdio_get_status(int irq);
-
-
-#define QDIO_FLAG_SYNC_INPUT     0x01
-#define QDIO_FLAG_SYNC_OUTPUT    0x02
-#define QDIO_FLAG_UNDER_INTERRUPT 0x04
-#define QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT 0x08 /* no effect on
-                                                    adapter interrupts */
-#define QDIO_FLAG_DONT_SIGA 0x10
-#define QDIO_FLAG_PCI_OUT   0x20
-
-extern int do_QDIO(struct ccw_device*, unsigned int flags, 
-                  unsigned int queue_number,
-                  unsigned int qidx,unsigned int count,
-                  struct qdio_buffer *buffers);
-
-extern int qdio_get_ssqd_pct(struct ccw_device*);
-extern int qdio_synchronize(struct ccw_device*, unsigned int flags,
-                           unsigned int queue_number);
-
-extern int qdio_cleanup(struct ccw_device*, int how);
-extern int qdio_shutdown(struct ccw_device*, int how);
-extern int qdio_free(struct ccw_device*);
-
-unsigned char qdio_get_slsb_state(struct ccw_device*, unsigned int flag,
-                                 unsigned int queue_number,
-                                 unsigned int qidx);
-
-extern void qdio_init_scrubber(void);
-
+#define QDIO_MAX_QUEUES_PER_IRQ                32
+#define QDIO_MAX_BUFFERS_PER_Q         128
+#define QDIO_MAX_BUFFERS_MASK          (QDIO_MAX_BUFFERS_PER_Q - 1)
+#define QDIO_MAX_ELEMENTS_PER_BUFFER   16
+#define QDIO_SBAL_SIZE                 256
+
+#define QDIO_QETH_QFMT                 0
+#define QDIO_ZFCP_QFMT                 1
+#define QDIO_IQDIO_QFMT                        2
+
+/**
+ * struct qdesfmt0 - queue descriptor, format 0
+ * @sliba: storage list information block address
+ * @sla: storage list address
+ * @slsba: storage list state block address
+ * @akey: access key for DLIB
+ * @bkey: access key for SL
+ * @ckey: access key for SBALs
+ * @dkey: access key for SLSB
+ */
  struct qdesfmt0 {
-#ifdef QDIO_32_BIT
-       unsigned long res1;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long sliba;            /* storage-list-information-block
-                                          address */
-#ifdef QDIO_32_BIT
-       unsigned long res2;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long sla;              /* storage-list address */
-#ifdef QDIO_32_BIT
-       unsigned long res3;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long slsba;            /* storage-list-state-block address */
-       unsigned int  res4;             /* reserved */
-       unsigned int  akey  :  4;       /* access key for DLIB */
-       unsigned int  bkey  :  4;       /* access key for SL */
-       unsigned int  ckey  :  4;       /* access key for SBALs */
-       unsigned int  dkey  :  4;       /* access key for SLSB */
-       unsigned int  res5  : 16;       /* reserved */
+       u64 sliba;
+       u64 sla;
+       u64 slsba;
+       u32      : 32;
+       u32 akey : 4;
+       u32 bkey : 4;
+       u32 ckey : 4;
+       u32 dkey : 4;
+       u32      : 16;
  } __attribute__ ((packed));
  
-/*
- * Queue-Description record (QDR)
+/**
+ * struct qdr - queue description record (QDR)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @ac: adapter characteristics
+ * @iqdcnt: input queue descriptor count
+ * @oqdcnt: output queue descriptor count
+ * @iqdsz: inpout queue descriptor size
+ * @oqdsz: output queue descriptor size
+ * @qiba: queue information block address
+ * @qkey: queue information block key
+ * @qdf0: queue descriptions
   */
  struct qdr {
-       unsigned int  qfmt    :  8;     /* queue format */
-       unsigned int  pfmt    :  8;     /* impl. dep. parameter format */
-       unsigned int  res1    :  8;     /* reserved */
-       unsigned int  ac      :  8;     /* adapter characteristics */
-       unsigned int  res2    :  8;     /* reserved */
-       unsigned int  iqdcnt  :  8;     /* input-queue-descriptor count */
-       unsigned int  res3    :  8;     /* reserved */
-       unsigned int  oqdcnt  :  8;     /* output-queue-descriptor count */
-       unsigned int  res4    :  8;     /* reserved */
-       unsigned int  iqdsz   :  8;     /* input-queue-descriptor size */
-       unsigned int  res5    :  8;     /* reserved */
-       unsigned int  oqdsz   :  8;     /* output-queue-descriptor size */
-       unsigned int  res6[9];          /* reserved */
-#ifdef QDIO_32_BIT
-       unsigned long res7;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long qiba;             /* queue-information-block address */
-       unsigned int  res8;             /* reserved */
-       unsigned int  qkey    :  4;     /* queue-information-block key */
-       unsigned int  res9    : 28;     /* reserved */
-/*     union _qd {*/ /* why this? */
-               struct qdesfmt0 qdf0[126];
-/*     } qd;*/
-} __attribute__ ((packed,aligned(4096)));
-
-
-/*
- * queue information block (QIB)
- */
-#define QIB_AC_INBOUND_PCI_SUPPORTED   0x80
-#define QIB_AC_OUTBOUND_PCI_SUPPORTED  0x40
+       u32 qfmt   : 8;
+       u32 pfmt   : 8;
+       u32        : 8;
+       u32 ac     : 8;
+       u32        : 8;
+       u32 iqdcnt : 8;
+       u32        : 8;
+       u32 oqdcnt : 8;
+       u32        : 8;
+       u32 iqdsz  : 8;
+       u32        : 8;
+       u32 oqdsz  : 8;
+       /* private: */
+       u32 res[9];
+       /* public: */
+       u64 qiba;
+       u32        : 32;
+       u32 qkey   : 4;
+       u32        : 28;
+       struct qdesfmt0 qdf0[126];
+} __attribute__ ((packed, aligned(4096)));
+
+#define QIB_AC_OUTBOUND_PCI_SUPPORTED  0x40
  #define QIB_RFLAGS_ENABLE_QEBSM                0x80
  
+/**
+ * struct qib - queue information block (QIB)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @rflags: QEBSM
+ * @ac: adapter characteristics
+ * @isliba: absolute address of first input SLIB
+ * @osliba: absolute address of first output SLIB
+ * @ebcnam: adapter identifier in EBCDIC
+ * @parm: implementation dependent parameters
+ */
  struct qib {
-       unsigned int  qfmt    :  8;     /* queue format */
-       unsigned int  pfmt    :  8;     /* impl. dep. parameter format */
-       unsigned int  rflags  :  8;     /* QEBSM */
-       unsigned int  ac      :  8;     /* adapter characteristics */
-       unsigned int  res2;             /* reserved */
-#ifdef QDIO_32_BIT
-       unsigned long res3;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long isliba;           /* absolute address of 1st
-                                          input SLIB */
-#ifdef QDIO_32_BIT
-       unsigned long res4;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long osliba;           /* absolute address of 1st
-                                          output SLIB */
-       unsigned int  res5;             /* reserved */
-       unsigned int  res6;             /* reserved */
-       unsigned char ebcnam[8];        /* adapter identifier in EBCDIC */
-       unsigned char res7[88];         /* reserved */
-       unsigned char parm[QDIO_MAX_BUFFERS_PER_Q];
-                                       /* implementation dependent
-                                          parameters */
-} __attribute__ ((packed,aligned(256)));
-
-
-/*
- * storage-list-information block element (SLIBE)
+       u32 qfmt   : 8;
+       u32 pfmt   : 8;
+       u32 rflags : 8;
+       u32 ac     : 8;
+       u32        : 32;
+       u64 isliba;
+       u64 osliba;
+       u32        : 32;
+       u32        : 32;
+       u8 ebcnam[8];
+       /* private: */
+       u8 res[88];
+       /* public: */
+       u8 parm[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slibe - storage list information block element (SLIBE)
+ * @parms: implementation dependent parameters
   */
  struct slibe {
-#ifdef QDIO_32_BIT
-       unsigned long res;              /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long parms;            /* implementation dependent
-                                          parameters */
+       u64 parms;
  };
  
-/*
- * storage-list-information block (SLIB)
+/**
+ * struct slib - storage list information block (SLIB)
+ * @nsliba: next SLIB address (if any)
+ * @sla: SL address
+ * @slsba: SLSB address
+ * @slibe: SLIB elements
   */
  struct slib {
-#ifdef QDIO_32_BIT
-       unsigned long res1;             /* reserved */
-#endif /* QDIO_32_BIT */
-        unsigned long nsliba;           /* next SLIB address (if any) */
-#ifdef QDIO_32_BIT
-       unsigned long res2;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long sla;              /* SL address */
-#ifdef QDIO_32_BIT
-       unsigned long res3;             /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long slsba;            /* SLSB address */
-       unsigned char res4[1000];       /* reserved */
-       struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];    /* SLIB elements */
-} __attribute__ ((packed,aligned(2048)));
-
+       u64 nsliba;
+       u64 sla;
+       u64 slsba;
+       /* private: */
+       u8 res[1000];
+       /* public: */
+       struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(2048)));
+
+/**
+ * struct sbal_flags - storage block address list flags
+ * @last: last entry
+ * @cont: contiguous storage
+ * @frag: fragmentation
+ */
  struct sbal_flags {
-       unsigned char res1  : 1;   /* reserved */
-       unsigned char last  : 1;   /* last entry */
-       unsigned char cont  : 1;   /* contiguous storage */
-       unsigned char res2  : 1;   /* reserved */
-       unsigned char frag  : 2;   /* fragmentation (s.below) */
-       unsigned char res3  : 2;   /* reserved */
+       u8      : 1;
+       u8 last : 1;
+       u8 cont : 1;
+       u8      : 1;
+       u8 frag : 2;
+       u8      : 2;
  } __attribute__ ((packed));
  
-#define SBAL_FLAGS_FIRST_FRAG       0x04000000UL
-#define SBAL_FLAGS_MIDDLE_FRAG      0x08000000UL
-#define SBAL_FLAGS_LAST_FRAG        0x0c000000UL
-#define SBAL_FLAGS_LAST_ENTRY       0x40000000UL
-#define SBAL_FLAGS_CONTIGUOUS       0x20000000UL
+#define SBAL_FLAGS_FIRST_FRAG          0x04000000UL
+#define SBAL_FLAGS_MIDDLE_FRAG         0x08000000UL
+#define SBAL_FLAGS_LAST_FRAG           0x0c000000UL
+#define SBAL_FLAGS_LAST_ENTRY          0x40000000UL
+#define SBAL_FLAGS_CONTIGUOUS          0x20000000UL
  
-#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL
+#define SBAL_FLAGS0_DATA_CONTINUATION  0x20UL
  
  /* Awesome OpenFCP extensions */
-#define SBAL_FLAGS0_TYPE_STATUS       0x00UL
-#define SBAL_FLAGS0_TYPE_WRITE        0x08UL
-#define SBAL_FLAGS0_TYPE_READ         0x10UL
-#define SBAL_FLAGS0_TYPE_WRITE_READ   0x18UL
-#define SBAL_FLAGS0_MORE_SBALS       0x04UL
-#define SBAL_FLAGS0_COMMAND           0x02UL
-#define SBAL_FLAGS0_LAST_SBAL         0x00UL
-#define SBAL_FLAGS0_ONLY_SBAL         SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_MIDDLE_SBAL       SBAL_FLAGS0_MORE_SBALS
-#define SBAL_FLAGS0_FIRST_SBAL        SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND
-/* Naught of interest beyond this point */
-
-#define SBAL_FLAGS0_PCI                0x40
+#define SBAL_FLAGS0_TYPE_STATUS                0x00UL
+#define SBAL_FLAGS0_TYPE_WRITE         0x08UL
+#define SBAL_FLAGS0_TYPE_READ          0x10UL
+#define SBAL_FLAGS0_TYPE_WRITE_READ    0x18UL
+#define SBAL_FLAGS0_MORE_SBALS         0x04UL
+#define SBAL_FLAGS0_COMMAND            0x02UL
+#define SBAL_FLAGS0_LAST_SBAL          0x00UL
+#define SBAL_FLAGS0_ONLY_SBAL          SBAL_FLAGS0_COMMAND
+#define SBAL_FLAGS0_MIDDLE_SBAL                SBAL_FLAGS0_MORE_SBALS
+#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND
+#define SBAL_FLAGS0_PCI                        0x40
+
+/**
+ * struct sbal_sbalf_0 - sbal flags for sbale 0
+ * @pci: PCI indicator
+ * @cont: data continuation
+ * @sbtype: storage-block type (FCP)
+ */
  struct sbal_sbalf_0 {
-       unsigned char res1  : 1;   /* reserved */
-       unsigned char pci   : 1;   /* PCI indicator */
-       unsigned char cont  : 1;   /* data continuation */
-       unsigned char sbtype: 2;   /* storage-block type (OpenFCP) */
-       unsigned char res2  : 3;   /* reserved */
+       u8        : 1;
+       u8 pci    : 1;
+       u8 cont   : 1;
+       u8 sbtype : 2;
+       u8        : 3;
  } __attribute__ ((packed));
  
+/**
+ * struct sbal_sbalf_1 - sbal flags for sbale 1
+ * @key: storage key
+ */
  struct sbal_sbalf_1 {
-       unsigned char res1  : 4;   /* reserved */
-       unsigned char key   : 4;   /* storage key */
+       u8     : 4;
+       u8 key : 4;
  } __attribute__ ((packed));
  
+/**
+ * struct sbal_sbalf_14 - sbal flags for sbale 14
+ * @erridx: error index
+ */
  struct sbal_sbalf_14 {
-       unsigned char res1   : 4;  /* reserved */
-       unsigned char erridx : 4;  /* error index */
+       u8        : 4;
+       u8 erridx : 4;
  } __attribute__ ((packed));
  
+/**
+ * struct sbal_sbalf_15 - sbal flags for sbale 15
+ * @reason: reason for error state
+ */
  struct sbal_sbalf_15 {
-       unsigned char reason;      /* reserved */
+       u8 reason;
  } __attribute__ ((packed));
  
+/**
+ * union sbal_sbalf - storage block address list flags
+ * @i0: sbalf0
+ * @i1: sbalf1
+ * @i14: sbalf14
+ * @i15: sblaf15
+ * @value: raw value
+ */
  union sbal_sbalf {
         struct sbal_sbalf_0  i0;
         struct sbal_sbalf_1  i1;
         struct sbal_sbalf_14 i14;
         struct sbal_sbalf_15 i15;
-       unsigned char value;
+       u8 value;
  };
  
-struct sbal_element {
-       union {
-               struct sbal_flags  bits;       /* flags */
-               unsigned char value;
-       } flags;
-       unsigned int  res1  : 16;   /* reserved */
-       union sbal_sbalf  sbalf;       /* SBAL flags */
-       unsigned int  res2  : 16;  /* reserved */
-       unsigned int  count : 16;  /* data count */
-#ifdef QDIO_32_BIT
-       unsigned long res3;        /* reserved */
-#endif /* QDIO_32_BIT */
-       unsigned long addr;        /* absolute data address */
-} __attribute__ ((packed,aligned(16)));
+/**
+ * struct qdio_buffer_element - SBAL entry
+ * @flags: flags
+ * @length: length
+ * @addr: address
+*/
+struct qdio_buffer_element {
+       u32 flags;
+       u32 length;
+#ifdef CONFIG_32BIT
+       /* private: */
+       void *reserved;
+       /* public: */
+#endif
+       void *addr;
+} __attribute__ ((packed, aligned(16)));
  
-/*
- * strorage-block access-list (SBAL)
+/**
+ * struct qdio_buffer - storage block address list (SBAL)
+ * @element: SBAL entries
   */
-struct sbal {
-       struct sbal_element element[QDIO_MAX_ELEMENTS_PER_BUFFER];
-} __attribute__ ((packed,aligned(256)));
+struct qdio_buffer {
+       struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER];
+} __attribute__ ((packed, aligned(256)));
  
-/*
- * storage-list (SL)
+/**
+ * struct sl_element - storage list entry
+ * @sbal: absolute SBAL address
   */
  struct sl_element {
-#ifdef QDIO_32_BIT
-        unsigned long res;     /* reserved */
-#endif /* QDIO_32_BIT */
-        unsigned long sbal;    /* absolute SBAL address */
+#ifdef CONFIG_32BIT
+       /* private: */
+       unsigned long reserved;
+       /* public: */
+#endif
+       unsigned long sbal;
  } __attribute__ ((packed));
  
+/**
+ * struct sl - storage list (SL)
+ * @element: SL entries
+ */
  struct sl {
         struct sl_element element[QDIO_MAX_BUFFERS_PER_Q];
-} __attribute__ ((packed,aligned(1024)));
+} __attribute__ ((packed, aligned(1024)));
  
-/*
- * storage-list-state block (SLSB)
+/**
+ * struct slsb - storage list state block (SLSB)
+ * @val: state per buffer
   */
-struct slsb_flags {
-       unsigned char owner  : 2;   /* SBAL owner */
-       unsigned char type   : 1;   /* buffer type */
-       unsigned char state  : 5;   /* processing state */
+struct slsb {
+       u8 val[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+struct qdio_ssqd_desc {
+       u8 flags;
+       u8:8;
+       u16 sch;
+       u8 qfmt;
+       u8 parm;
+       u8 qdioac1;
+       u8 sch_class;
+       u8 pcnt;
+       u8 icnt;
+       u8:8;
+       u8 ocnt;
+       u8:8;
+       u8 mbccnt;
+       u16 qdioac2;
+       u64 sch_token;
+       u64:64;
  } __attribute__ ((packed));
  
+/* params are: ccw_device, qdio_error, queue_number,
+   first element processed, number of elements processed, int_parm */
+typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
+                           int, int, unsigned long);
  
-struct slsb {
-       union {
-               unsigned char val[QDIO_MAX_BUFFERS_PER_Q];
-               struct slsb_flags flags[QDIO_MAX_BUFFERS_PER_Q];
-       } acc;
-} __attribute__ ((packed,aligned(256)));
+/* qdio errors reported to the upper-layer program */
+#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION       0x10
+#define QDIO_ERROR_SIGA_BUSY                   0x20
+#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION    0x40
+#define QDIO_ERROR_SLSB_STATE                  0x80
  
-/*
- * SLSB values
+/* for qdio_initialize */
+#define QDIO_INBOUND_0COPY_SBALS               0x01
+#define QDIO_OUTBOUND_0COPY_SBALS              0x02
+#define QDIO_USE_OUTBOUND_PCIS                 0x04
+
+/* for qdio_cleanup */
+#define QDIO_FLAG_CLEANUP_USING_CLEAR          0x01
+#define QDIO_FLAG_CLEANUP_USING_HALT           0x02
+
+/**
+ * struct qdio_initialize - qdio initalization data
+ * @cdev: associated ccw device
+ * @q_format: queue format
+ * @adapter_name: name for the adapter
+ * @qib_param_field_format: format for qib_parm_field
+ * @qib_param_field: pointer to 128 bytes or NULL, if no param field
+ * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL
+ * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL
+ * @no_input_qs: number of input queues
+ * @no_output_qs: number of output queues
+ * @input_handler: handler to be called for input queues
+ * @output_handler: handler to be called for output queues
+ * @int_parm: interruption parameter
+ * @flags: initialization flags
+ * @input_sbal_addr_array:  address of no_input_qs * 128 pointers
+ * @output_sbal_addr_array: address of no_output_qs * 128 pointers
   */
-#define SLSB_OWNER_PROG              1
-#define SLSB_OWNER_CU                2
-
-#define SLSB_TYPE_INPUT              0
-#define SLSB_TYPE_OUTPUT             1
-
-#define SLSB_STATE_NOT_INIT          0
-#define SLSB_STATE_EMPTY             1
-#define SLSB_STATE_PRIMED            2
-#define SLSB_STATE_HALTED          0xe
-#define SLSB_STATE_ERROR           0xf
-
-#define SLSB_P_INPUT_NOT_INIT     0x80
-#define SLSB_P_INPUT_PROCESSING          0x81
-#define SLSB_CU_INPUT_EMPTY       0x41
-#define SLSB_P_INPUT_PRIMED       0x82
-#define SLSB_P_INPUT_HALTED       0x8E
-#define SLSB_P_INPUT_ERROR        0x8F
-
-#define SLSB_P_OUTPUT_NOT_INIT    0xA0
-#define SLSB_P_OUTPUT_EMPTY       0xA1
-#define SLSB_CU_OUTPUT_PRIMED     0x62
-#define SLSB_P_OUTPUT_HALTED      0xAE
-#define SLSB_P_OUTPUT_ERROR       0xAF
-
-#define SLSB_ERROR_DURING_LOOKUP  0xFF
+struct qdio_initialize {
+       struct ccw_device *cdev;
+       unsigned char q_format;
+       unsigned char adapter_name[8];
+       unsigned int qib_param_field_format;
+       unsigned char *qib_param_field;
+       unsigned long *input_slib_elements;
+       unsigned long *output_slib_elements;
+       unsigned int no_input_qs;
+       unsigned int no_output_qs;
+       qdio_handler_t *input_handler;
+       qdio_handler_t *output_handler;
+       unsigned long int_parm;
+       unsigned long flags;
+       void **input_sbal_addr_array;
+       void **output_sbal_addr_array;
+};
+
+#define QDIO_STATE_INACTIVE            0x00000002 /* after qdio_cleanup */
+#define QDIO_STATE_ESTABLISHED         0x00000004 /* after qdio_establish */
+#define QDIO_STATE_ACTIVE              0x00000008 /* after qdio_activate */
+#define QDIO_STATE_STOPPED             0x00000010 /* after queues went down */
+
+#define QDIO_FLAG_SYNC_INPUT           0x01
+#define QDIO_FLAG_SYNC_OUTPUT          0x02
+#define QDIO_FLAG_PCI_OUT              0x10
+
+extern int qdio_initialize(struct qdio_initialize *init_data);
+extern int qdio_allocate(struct qdio_initialize *init_data);
+extern int qdio_establish(struct qdio_initialize *init_data);
+extern int qdio_activate(struct ccw_device *);
+
+extern int do_QDIO(struct ccw_device*, unsigned int flags,
+                  int q_nr, int qidx, int count);
+extern int qdio_cleanup(struct ccw_device*, int how);
+extern int qdio_shutdown(struct ccw_device*, int how);
+extern int qdio_free(struct ccw_device *);
+extern struct qdio_ssqd_desc *qdio_get_ssqd_desc(struct ccw_device *cdev);
  
  #endif /* __QDIO_H__ */
diff --git a/include/asm-s390/schid.h b/include/asm-s390/schid.h

index 5017ffa78e049f17caa116490789f68f9594ff7d..7bdc0fe15691df26c41722e574817c449bb9f65f 100644 (file)
--- a/include/asm-s390/schid.h
+++ b/include/asm-s390/schid.h
@@ -10,6 +10,7 @@ struct subchannel_id {
         __u32 sch_no : 16;
  } __attribute__ ((packed, aligned(4)));
  
+#ifdef __KERNEL__
  
  /* Helper function for sane state of pre-allocated subchannel_id. */
  static inline void
@@ -25,4 +26,6 @@ schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2)
         return !memcmp(schid1, schid2, sizeof(struct subchannel_id));
  }
  
+#endif /* __KERNEL__ */
+
  #endif /* ASM_SCHID_H */
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h

index f09ee3f729778dee8be898577303dbe4b988abc7..4ba14e463e838ad6366673eceea62377ccb96c99 100644 (file)
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -65,6 +65,7 @@ extern unsigned long machine_flags;
  
  #define MACHINE_FLAG_VM                (1UL << 0)
  #define MACHINE_FLAG_IEEE      (1UL << 1)
+#define MACHINE_FLAG_P390      (1UL << 2)
  #define MACHINE_FLAG_CSP       (1UL << 3)
  #define MACHINE_FLAG_MVPG      (1UL << 4)
  #define MACHINE_FLAG_DIAG44    (1UL << 5)
@@ -77,7 +78,6 @@ extern unsigned long machine_flags;
  
  #define MACHINE_IS_VM          (machine_flags & MACHINE_FLAG_VM)
  #define MACHINE_IS_KVM         (machine_flags & MACHINE_FLAG_KVM)
-#define MACHINE_IS_P390                (machine_flags & MACHINE_FLAG_P390)
  #define MACHINE_HAS_DIAG9C     (machine_flags & MACHINE_FLAG_DIAG9C)
  
  #ifndef __s390x__
diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h

index 5b21485be573e12eb845a845fdffc463506b9c98..80c775d9fe205810257fb8dc0013b997b5d61969 100644 (file)
--- a/include/asm-x86/pci-direct.h
+++ b/include/asm-x86/pci-direct.h
@@ -11,7 +11,11 @@ extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset);
  extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset);
  extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val);
  extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val);
+extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val);
  
  extern int early_pci_allowed(void);
  
+extern unsigned int pci_early_dump_regs;
+extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
+extern void early_dump_pci_devices(void);
  #endif
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h

index 7f738270459276a305758ac8a3edce974b1985ba..55402d2ab9380e3f621c889503af1cec0abd9851 100644 (file)
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -727,6 +727,8 @@ extern int                  force_mwait;
  extern void select_idle_routine(const struct cpuinfo_x86 *c);
  
  extern unsigned long           boot_option_idle_override;
+extern unsigned long           idle_halt;
+extern unsigned long           idle_nomwait;
  
  extern void enable_sep_cpu(void);
  extern int sysenter_setup(void);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index 0601075d09a13565d0b1ea94e9d0bba1947e95df..a171776393767ab0e23411c4ff2d8a89f15676e2 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -235,6 +235,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n,
  int acpi_check_mem_region(resource_size_t start, resource_size_t n,
                       const char *name);
  
+#ifdef CONFIG_PM_SLEEP
+void __init acpi_old_suspend_ordering(void);
+#endif /* CONFIG_PM_SLEEP */
  #else  /* CONFIG_ACPI */
  
  static inline int early_acpi_boot_init(void)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 32a441b05fd5fbedf33748e7151cec38ea27dd90..88d68081a0f159a8fe19bfea00cb95aca161715d 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -985,6 +985,9 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw)
  
  static inline int blk_integrity_rq(struct request *rq)
  {
+       if (rq->bio == NULL)
+               return 0;
+
         return bio_integrity(rq->bio);
  }
  
diff --git a/include/linux/device.h b/include/linux/device.h

index 6a2d04c011bc1850e4f8191cc3749699952e2b7c..f71a78d123aef9d7b3b03276921d10286e1eceed 100644 (file)
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -68,6 +68,8 @@ struct bus_type {
         int (*resume_early)(struct device *dev);
         int (*resume)(struct device *dev);
  
+       struct pm_ext_ops *pm;
+
         struct bus_type_private *p;
  };
  
@@ -131,6 +133,8 @@ struct device_driver {
         int (*resume) (struct device *dev);
         struct attribute_group **groups;
  
+       struct pm_ops *pm;
+
         struct driver_private *p;
  };
  
@@ -197,6 +201,8 @@ struct class {
  
         int (*suspend)(struct device *dev, pm_message_t state);
         int (*resume)(struct device *dev);
+
+       struct pm_ops *pm;
  };
  
  extern int __must_check class_register(struct class *class);
@@ -248,8 +254,11 @@ struct device_type {
         struct attribute_group **groups;
         int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
         void (*release)(struct device *dev);
+
         int (*suspend)(struct device *dev, pm_message_t state);
         int (*resume)(struct device *dev);
+
+       struct pm_ops *pm;
  };
  
  /* interface for exporting device attributes */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h

index 08934995c7ab5463e5f4064f4e2fff5ccfb4af94..deddeedf32571f12015de116f729ae666f74ede4 100644 (file)
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -127,6 +127,15 @@ static inline void set_freezable(void)
         current->flags &= ~PF_NOFREEZE;
  }
  
+/*
+ * Tell the freezer that the current task should be frozen by it and that it
+ * should send a fake signal to the task to freeze it.
+ */
+static inline void set_freezable_with_signal(void)
+{
+       current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG);
+}
+
  /*
   * Freezer-friendly wrappers around wait_event_interruptible() and
   * wait_event_interruptible_timeout(), originally defined in <linux/wait.h>
@@ -174,6 +183,7 @@ static inline void freezer_do_not_count(void) {}
  static inline void freezer_count(void) {}
  static inline int freezer_should_skip(struct task_struct *p) { return 0; }
  static inline void set_freezable(void) {}
+static inline void set_freezable_with_signal(void) {}
  
  #define wait_event_freezable(wq, condition)                            \
                 wait_event_interruptible(wq, condition)
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 52e510a0aec2715baa3bacef354cff909f4e79ae..c6455dadb21bcde812f97046e5996f5b82c5c437 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1729,6 +1729,8 @@ static inline void invalidate_remote_inode(struct inode *inode)
  extern int invalidate_inode_pages2(struct address_space *mapping);
  extern int invalidate_inode_pages2_range(struct address_space *mapping,
                                          pgoff_t start, pgoff_t end);
+extern void generic_sync_sb_inodes(struct super_block *sb,
+                               struct writeback_control *wbc);
  extern int write_inode_now(struct inode *, int);
  extern int filemap_fdatawrite(struct address_space *);
  extern int filemap_flush(struct address_space *);
diff --git a/include/linux/ide.h b/include/linux/ide.h

index ac4eeb2932efb03a61484d1badbfd34620957a28..4726126f5a59875a831084062e4430ddf73b79a3 100644 (file)
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -138,6 +138,12 @@ struct ide_io_ports {
  #define WAIT_CMD       (10*HZ) /* 10sec  - maximum wait for an IRQ to happen */
  #define WAIT_MIN_SLEEP (2*HZ/100)      /* 20msec - minimum sleep time */
  
+/*
+ * Op codes for special requests to be handled by ide_special_rq().
+ * Values should be in the range of 0x20 to 0x3f.
+ */
+#define REQ_DRIVE_RESET                0x20
+
  /*
   * Check for an interrupt and acknowledge the interrupt status
   */
@@ -171,7 +177,7 @@ typedef struct hw_regs_s {
         int             irq;                    /* our irq number */
         ide_ack_intr_t  *ack_intr;              /* acknowledge interrupt */
         hwif_chipset_t  chipset;
-       struct device   *dev;
+       struct device   *dev, *parent;
  } hw_regs_t;
  
  void ide_init_port_data(struct hwif_s *, unsigned int);
@@ -405,8 +411,8 @@ typedef struct ide_drive_s {
  struct ide_port_info;
  
  struct ide_port_ops {
-       /* host specific initialization of devices on a port */
-       void    (*port_init_devs)(struct hwif_s *);
+       /* host specific initialization of a device */
+       void    (*init_dev)(ide_drive_t *);
         /* routine to program host for PIO mode */
         void    (*set_pio_mode)(ide_drive_t *, const u8);
         /* routine to program host for DMA mode */
@@ -565,8 +571,6 @@ typedef struct hwgroup_s {
         unsigned int sleeping   : 1;
                 /* BOOL: polling active & poll_timeout field valid */
         unsigned int polling    : 1;
-               /* BOOL: in a polling reset situation. Must not trigger another reset yet */
-       unsigned int resetting  : 1;
  
                 /* current drive */
         ide_drive_t *drive;
@@ -786,7 +790,6 @@ struct ide_driver_s {
         ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t);
         int             (*end_request)(ide_drive_t *, int, int);
         ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8);
-       ide_startstop_t (*abort)(ide_drive_t *, struct request *rq);
         struct device_driver    gen_driver;
         int             (*probe)(ide_drive_t *);
         void            (*remove)(ide_drive_t *);
@@ -801,18 +804,6 @@ struct ide_driver_s {
  
  int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long);
  
-/*
- * ide_hwifs[] is the master data structure used to keep track
- * of just about everything in ide.c.  Whenever possible, routines
- * should be using pointers to a drive (ide_drive_t *) or
- * pointers to a hwif (ide_hwif_t *), rather than indexing this
- * structure directly (the allocation/layout may change!).
- *
- */
-#ifndef _IDE_C
-extern ide_hwif_t      ide_hwifs[];            /* master data repository */
-#endif
-
  extern int ide_vlb_clk;
  extern int ide_pci_clk;
  
@@ -840,10 +831,6 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
  
  ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
  
-ide_startstop_t __ide_abort(ide_drive_t *, struct request *);
-
-extern ide_startstop_t ide_abort(ide_drive_t *, const char *);
-
  extern void ide_fix_driveid(struct hd_driveid *);
  
  extern void ide_fixstring(u8 *, const int, const int);
@@ -1271,16 +1258,43 @@ static inline int ide_dev_is_sata(struct hd_driveid *id)
  u64 ide_get_lba_addr(struct ide_taskfile *, int);
  u8 ide_dump_status(ide_drive_t *, const char *, u8);
  
-typedef struct ide_pio_timings_s {
-       int     setup_time;     /* Address setup (ns) minimum */
-       int     active_time;    /* Active pulse (ns) minimum */
-       int     cycle_time;     /* Cycle time (ns) minimum = */
-                               /* active + recovery (+ setup for some chips) */
-} ide_pio_timings_t;
+struct ide_timing {
+       u8  mode;
+       u8  setup;      /* t1 */
+       u16 act8b;      /* t2 for 8-bit io */
+       u16 rec8b;      /* t2i for 8-bit io */
+       u16 cyc8b;      /* t0 for 8-bit io */
+       u16 active;     /* t2 or tD */
+       u16 recover;    /* t2i or tK */
+       u16 cycle;      /* t0 */
+       u16 udma;       /* t2CYCTYP/2 */
+};
+
+enum {
+       IDE_TIMING_SETUP        = (1 << 0),
+       IDE_TIMING_ACT8B        = (1 << 1),
+       IDE_TIMING_REC8B        = (1 << 2),
+       IDE_TIMING_CYC8B        = (1 << 3),
+       IDE_TIMING_8BIT         = IDE_TIMING_ACT8B | IDE_TIMING_REC8B |
+                                 IDE_TIMING_CYC8B,
+       IDE_TIMING_ACTIVE       = (1 << 4),
+       IDE_TIMING_RECOVER      = (1 << 5),
+       IDE_TIMING_CYCLE        = (1 << 6),
+       IDE_TIMING_UDMA         = (1 << 7),
+       IDE_TIMING_ALL          = IDE_TIMING_SETUP | IDE_TIMING_8BIT |
+                                 IDE_TIMING_ACTIVE | IDE_TIMING_RECOVER |
+                                 IDE_TIMING_CYCLE | IDE_TIMING_UDMA,
+};
+
+struct ide_timing *ide_timing_find_mode(u8);
+u16 ide_pio_cycle_time(ide_drive_t *, u8);
+void ide_timing_merge(struct ide_timing *, struct ide_timing *,
+                     struct ide_timing *, unsigned int);
+int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int);
+
+int ide_scan_pio_blacklist(char *);
  
-unsigned int ide_pio_cycle_time(ide_drive_t *, u8);
  u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8);
-extern const ide_pio_timings_t ide_pio_timings[6];
  
  int ide_set_pio_mode(ide_drive_t *, u8);
  int ide_set_dma_mode(ide_drive_t *, u8);
diff --git a/include/linux/inet.h b/include/linux/inet.h

index 1354080cf8cfa611d85f4594c2e7a74b2867f4ef..4cca05c9678e760a07c0c8dcd88f73cc90ddfdc1 100644 (file)
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -44,6 +44,13 @@
  
  #include <linux/types.h>
  
+/*
+ * These mimic similar macros defined in user-space for inet_ntop(3).
+ * See /usr/include/netinet/in.h .
+ */
+#define INET_ADDRSTRLEN                (16)
+#define INET6_ADDRSTRLEN       (48)
+
  extern __be32 in_aton(const char *str);
  extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end);
  extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h

index 9927a88674a3bd3b5c1a5d4fe346f8a9889f6cc9..93c45acf249ab20cc19d43c94021a25691be9c76 100644 (file)
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -140,8 +140,8 @@ extern struct group_info init_groups;
                 .nr_cpus_allowed = NR_CPUS,                             \
         },                                                              \
         .tasks          = LIST_HEAD_INIT(tsk.tasks),                    \
-       .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children),          \
-       .ptrace_list    = LIST_HEAD_INIT(tsk.ptrace_list),              \
+       .ptraced        = LIST_HEAD_INIT(tsk.ptraced),                  \
+       .ptrace_entry   = LIST_HEAD_INIT(tsk.ptrace_entry),             \
         .real_parent    = &tsk,                                         \
         .parent         = &tsk,                                         \
         .children       = LIST_HEAD_INIT(tsk.children),                 \
diff --git a/include/linux/ioport.h b/include/linux/ioport.h

index c6801bffe76d335af9e8ac2db0fe68a07d3e9823..2cd07cc29687c49bd1cf8a0883cf5c72bbb4e762 100644 (file)
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -59,6 +59,7 @@ struct resource_list {
  #define IORESOURCE_IRQ_HIGHLEVEL       (1<<2)
  #define IORESOURCE_IRQ_LOWLEVEL                (1<<3)
  #define IORESOURCE_IRQ_SHAREABLE       (1<<4)
+#define IORESOURCE_IRQ_OPTIONAL        (1<<5)
  
  /* PnP DMA specific bits (IORESOURCE_BITS) */
  #define IORESOURCE_DMA_TYPE_MASK       (3<<0)
@@ -88,6 +89,10 @@ struct resource_list {
  #define IORESOURCE_MEM_SHADOWABLE      (1<<5)  /* dup: IORESOURCE_SHADOWABLE */
  #define IORESOURCE_MEM_EXPANSIONROM    (1<<6)
  
+/* PnP I/O specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_IO_16BIT_ADDR       (1<<0)
+#define IORESOURCE_IO_FIXED            (1<<1)
+
  /* PCI ROM control bits (IORESOURCE_BITS) */
  #define IORESOURCE_ROM_ENABLE          (1<<0)  /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */
  #define IORESOURCE_ROM_SHADOW          (1<<1)  /* ROM is copy at C000:0 */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h

index d0c3abed74c2cbb5cbc71c6bd563bd04dad03fdc..143cebf0586f140fd267bf360d69654281584d11 100644 (file)
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -135,6 +135,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
         struct mmc_command *, int);
  
  extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
+extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
  
  extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort);
  extern void mmc_release_host(struct mmc_host *host);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

index 7ab962fa1d738ffd540e4a260bb6822830b802da..10a2080086ca9e6402eec37178bd8dea384f3f92 100644 (file)
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -51,8 +51,30 @@ struct mmc_ios {
  
  struct mmc_host_ops {
         void    (*request)(struct mmc_host *host, struct mmc_request *req);
+       /*
+        * Avoid calling these three functions too often or in a "fast path",
+        * since underlaying controller might implement them in an expensive
+        * and/or slow way.
+        *
+        * Also note that these functions might sleep, so don't call them
+        * in the atomic contexts!
+        *
+        * Return values for the get_ro callback should be:
+        *   0 for a read/write card
+        *   1 for a read-only card
+        *   -ENOSYS when not supported (equal to NULL callback)
+        *   or a negative errno value when something bad happened
+        *
+        * Return values for the get_ro callback should be:
+        *   0 for a absent card
+        *   1 for a present card
+        *   -ENOSYS when not supported (equal to NULL callback)
+        *   or a negative errno value when something bad happened
+        */
         void    (*set_ios)(struct mmc_host *host, struct mmc_ios *ios);
         int     (*get_ro)(struct mmc_host *host);
+       int     (*get_cd)(struct mmc_host *host);
+
         void    (*enable_sdio_irq)(struct mmc_host *host, int enable);
  };
  
@@ -89,11 +111,11 @@ struct mmc_host {
         unsigned long           caps;           /* Host capabilities */
  
  #define MMC_CAP_4_BIT_DATA     (1 << 0)        /* Can the host do 4 bit transfers */
-#define MMC_CAP_MULTIWRITE     (1 << 1)        /* Can accurately report bytes sent to card on error */
-#define MMC_CAP_MMC_HIGHSPEED  (1 << 2)        /* Can do MMC high-speed timing */
-#define MMC_CAP_SD_HIGHSPEED   (1 << 3)        /* Can do SD high-speed timing */
-#define MMC_CAP_SDIO_IRQ       (1 << 4)        /* Can signal pending SDIO IRQs */
-#define MMC_CAP_SPI            (1 << 5)        /* Talks only SPI protocols */
+#define MMC_CAP_MMC_HIGHSPEED  (1 << 1)        /* Can do MMC high-speed timing */
+#define MMC_CAP_SD_HIGHSPEED   (1 << 2)        /* Can do SD high-speed timing */
+#define MMC_CAP_SDIO_IRQ       (1 << 3)        /* Can signal pending SDIO IRQs */
+#define MMC_CAP_SPI            (1 << 4)        /* Talks only SPI protocols */
+#define MMC_CAP_NEEDS_POLL     (1 << 5)        /* Needs polling for card-detection */
  
         /* host specific block data */
         unsigned int            max_seg_size;   /* see blk_queue_max_segment_size */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h

index 4236fbf0b6fbb831f4f617352e43ba3716d5188e..14b81f3e523292395ad2e1d8e00bf007a132920f 100644 (file)
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -16,7 +16,6 @@
   * Based strongly on code by:
   *
   * Author: Yong-iL Joh <tolkien@mizi.com>
- * Date  : $Date: 2002/06/18 12:37:30 $
   *
   * Author:  Andrew Christian
   *          15 May 2002
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h

index b050f4d7b41f05af47bf98c342c0b4528d110e19..07bee4a0d457e71c50eb9a7bbded7f69fd3d8d94 100644 (file)
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -1,7 +1,7 @@
  /*
   *  include/linux/mmc/sdio_func.h
   *
- *  Copyright 2007 Pierre Ossman
+ *  Copyright 2007-2008 Pierre Ossman
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -46,6 +46,8 @@ struct sdio_func {
         unsigned                max_blksize;    /* maximum block size */
         unsigned                cur_blksize;    /* current block size */
  
+       unsigned                enable_timeout; /* max enable timeout in msec */
+
         unsigned int            state;          /* function state */
  #define SDIO_STATE_PRESENT     (1<<0)          /* present in sysfs */
  
@@ -120,23 +122,22 @@ extern int sdio_set_block_size(struct sdio_func *func, unsigned blksz);
  extern int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler);
  extern int sdio_release_irq(struct sdio_func *func);
  
-extern unsigned char sdio_readb(struct sdio_func *func,
-       unsigned int addr, int *err_ret);
-extern unsigned short sdio_readw(struct sdio_func *func,
-       unsigned int addr, int *err_ret);
-extern unsigned long sdio_readl(struct sdio_func *func,
-       unsigned int addr, int *err_ret);
+extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz);
+
+extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret);
+extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret);
+extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret);
  
  extern int sdio_memcpy_fromio(struct sdio_func *func, void *dst,
         unsigned int addr, int count);
  extern int sdio_readsb(struct sdio_func *func, void *dst,
         unsigned int addr, int count);
  
-extern void sdio_writeb(struct sdio_func *func, unsigned char b,
+extern void sdio_writeb(struct sdio_func *func, u8 b,
         unsigned int addr, int *err_ret);
-extern void sdio_writew(struct sdio_func *func, unsigned short b,
+extern void sdio_writew(struct sdio_func *func, u16 b,
         unsigned int addr, int *err_ret);
-extern void sdio_writel(struct sdio_func *func, unsigned long b,
+extern void sdio_writel(struct sdio_func *func, u32 b,
         unsigned int addr, int *err_ret);
  
  extern int sdio_memcpy_toio(struct sdio_func *func, unsigned int addr,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index 27d6a8d98cef02477abf89c69337a1f77fbc208b..29d26191873428cecde2b29f3fd68b855684f614 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -12,9 +12,19 @@
  #include <linux/magic.h>
  
  /* Default timeout values */
+#define NFS_DEF_UDP_TIMEO      (11)
+#define NFS_DEF_UDP_RETRANS    (3)
+#define NFS_DEF_TCP_TIMEO      (600)
+#define NFS_DEF_TCP_RETRANS    (2)
+
  #define NFS_MAX_UDP_TIMEOUT    (60*HZ)
  #define NFS_MAX_TCP_TIMEOUT    (600*HZ)
  
+#define NFS_DEF_ACREGMIN       (3)
+#define NFS_DEF_ACREGMAX       (60)
+#define NFS_DEF_ACDIRMIN       (30)
+#define NFS_DEF_ACDIRMAX       (60)
+
  /*
   * When flushing a cluster of dirty pages, there can be different
   * strategies:
diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h

new file mode 100644 (file)

index 0000000..1cb9a3f
--- /dev/null
+++ b/include/linux/nfs_iostat.h
@@ -0,0 +1,119 @@
+/*
+ *  User-space visible declarations for NFS client per-mount
+ *  point statistics
+ *
+ *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ *  NFS client per-mount statistics provide information about the
+ *  health of the NFS client and the health of each NFS mount point.
+ *  Generally these are not for detailed problem diagnosis, but
+ *  simply to indicate that there is a problem.
+ *
+ *  These counters are not meant to be human-readable, but are meant
+ *  to be integrated into system monitoring tools such as "sar" and
+ *  "iostat".  As such, the counters are sampled by the tools over
+ *  time, and are never zeroed after a file system is mounted.
+ *  Moving averages can be computed by the tools by taking the
+ *  difference between two instantaneous samples  and dividing that
+ *  by the time between the samples.
+ */
+
+#ifndef _LINUX_NFS_IOSTAT
+#define _LINUX_NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS                "1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1.  SERVER - the number of payload bytes read from or written
+ *     to the server by the NFS client via an NFS READ or WRITE
+ *     request.
+ *
+ * 2.  NORMAL - the number of bytes read or written by applications
+ *     via the read(2) and write(2) system call interfaces.
+ *
+ * 3.  DIRECT - the number of bytes read or written from files
+ *     opened with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out
+ * of the NFS client.  Comparing the number of bytes requested by
+ * an application with the number of bytes the client requests from
+ * the server can provide an indication of client efficiency
+ * (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods
+ * are in use.  DIRECT by itself shows whether there is any O_DIRECT
+ * traffic.  NORMAL + DIRECT shows how much data is going through
+ * the system call interface.  A large amount of SERVER traffic
+ * without much NORMAL or DIRECT traffic shows that applications
+ * are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ *
+ * NB: When adding new byte counters, please include the measured
+ * units in the name of each byte counter to help users of this
+ * interface determine what exactly is being counted.
+ */
+enum nfs_stat_bytecounters {
+       NFSIOS_NORMALREADBYTES = 0,
+       NFSIOS_NORMALWRITTENBYTES,
+       NFSIOS_DIRECTREADBYTES,
+       NFSIOS_DIRECTWRITTENBYTES,
+       NFSIOS_SERVERREADBYTES,
+       NFSIOS_SERVERWRITTENBYTES,
+       NFSIOS_READPAGES,
+       NFSIOS_WRITEPAGES,
+       __NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client
+ * activity without enabling NFS trace debugging.  The counters
+ * show the rate at which VFS requests are made, and how often the
+ * client invalidates its data and attribute caches.  This allows
+ * system administrators to monitor such things as how close-to-open
+ * is working, and answer questions such as "why are there so many
+ * GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes,
+ * silly renames due to close-after-delete, and operations that
+ * change the size of a file (such operations can often be the
+ * source of data corruption if applications aren't using file
+ * locking properly).
+ */
+enum nfs_stat_eventcounters {
+       NFSIOS_INODEREVALIDATE = 0,
+       NFSIOS_DENTRYREVALIDATE,
+       NFSIOS_DATAINVALIDATE,
+       NFSIOS_ATTRINVALIDATE,
+       NFSIOS_VFSOPEN,
+       NFSIOS_VFSLOOKUP,
+       NFSIOS_VFSACCESS,
+       NFSIOS_VFSUPDATEPAGE,
+       NFSIOS_VFSREADPAGE,
+       NFSIOS_VFSREADPAGES,
+       NFSIOS_VFSWRITEPAGE,
+       NFSIOS_VFSWRITEPAGES,
+       NFSIOS_VFSGETDENTS,
+       NFSIOS_VFSSETATTR,
+       NFSIOS_VFSFLUSH,
+       NFSIOS_VFSFSYNC,
+       NFSIOS_VFSLOCK,
+       NFSIOS_VFSRELEASE,
+       NFSIOS_CONGESTIONWAIT,
+       NFSIOS_SETATTRTRUNC,
+       NFSIOS_EXTENDWRITE,
+       NFSIOS_SILLYRENAME,
+       NFSIOS_SHORTREAD,
+       NFSIOS_SHORTWRITE,
+       NFSIOS_DELAY,
+       __NFSIOS_COUNTSMAX,
+};
+
+#endif /* _LINUX_NFS_IOSTAT */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h

index a1676e19e49170dc41607ae824b5c911c83886c1..3c60685d972b595a5be6366328276a9c5a1cbcdb 100644 (file)
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,9 +27,12 @@
  /*
   * Valid flags for a dirty buffer
   */
-#define PG_BUSY                        0
-#define PG_NEED_COMMIT         1
-#define PG_NEED_RESCHED                2
+enum {
+       PG_BUSY = 0,
+       PG_CLEAN,
+       PG_NEED_COMMIT,
+       PG_NEED_RESCHED,
+};
  
  struct nfs_inode;
  struct nfs_page {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h

index 24263bb8e0bebdc62892cdb6b1a2527e0a0023cc..8c77c11224d1e0b009d6250b8789287d5a628b35 100644 (file)
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -829,9 +829,8 @@ struct nfs_rpc_ops {
         int     (*write_done)  (struct rpc_task *, struct nfs_write_data *);
         void    (*commit_setup) (struct nfs_write_data *, struct rpc_message *);
         int     (*commit_done) (struct rpc_task *, struct nfs_write_data *);
-       int     (*file_open)   (struct inode *, struct file *);
-       int     (*file_release) (struct inode *, struct file *);
         int     (*lock)(struct file *, int, struct file_lock *);
+       int     (*lock_check_bounds)(const struct file_lock *);
         void    (*clear_acl_cache)(struct inode *);
  };
  
diff --git a/include/linux/pci.h b/include/linux/pci.h

index d18b1dd49faba947acd56a77e1ccbb55df562b0f..a6a088e1a8049bf7b15fe5dac91b87f9a938133a 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -17,8 +17,7 @@
  #ifndef LINUX_PCI_H
  #define LINUX_PCI_H
  
-/* Include the pci register defines */
-#include <linux/pci_regs.h>
+#include <linux/pci_regs.h>    /* The pci register defines */
  
  /*
   * The PCI interface treats multi-function devices as independent
@@ -49,12 +48,22 @@
  #include <linux/list.h>
  #include <linux/compiler.h>
  #include <linux/errno.h>
+#include <linux/kobject.h>
  #include <asm/atomic.h>
  #include <linux/device.h>
  
  /* Include the ID list */
  #include <linux/pci_ids.h>
  
+/* pci_slot represents a physical slot */
+struct pci_slot {
+       struct pci_bus *bus;            /* The bus this slot is on */
+       struct list_head list;          /* node in list of slots on this bus */
+       struct hotplug_slot *hotplug;   /* Hotplug info (migrate over time) */
+       unsigned char number;           /* PCI_SLOT(pci_dev->devfn) */
+       struct kobject kobj;
+};
+
  /* File state for mmap()s on /proc/bus/pci/X/Y */
  enum pci_mmap_state {
         pci_mmap_io,
@@ -142,6 +151,7 @@ struct pci_dev {
  
         void            *sysdata;       /* hook for sys-specific extension */
         struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
+       struct pci_slot *slot;          /* Physical slot this device is in */
  
         unsigned int    devfn;          /* encoded device & function index */
         unsigned short  vendor;
@@ -167,6 +177,13 @@ struct pci_dev {
         pci_power_t     current_state;  /* Current operating state. In ACPI-speak,
                                            this is D0-D3, D0 being fully functional,
                                            and D3 being off. */
+       int             pm_cap;         /* PM capability offset in the
+                                          configuration space */
+       unsigned int    pme_support:5;  /* Bitmask of states from which PME#
+                                          can be generated */
+       unsigned int    d1_support:1;   /* Low power state D1 is supported */
+       unsigned int    d2_support:1;   /* Low power state D2 is supported */
+       unsigned int    no_d1d2:1;      /* Only allow D0 and D3 */
  
  #ifdef CONFIG_PCIEASPM
         struct pcie_link_state  *link_state;    /* ASPM link state. */
@@ -191,7 +208,6 @@ struct pci_dev {
         unsigned int    is_added:1;
         unsigned int    is_busmaster:1; /* device is busmaster */
         unsigned int    no_msi:1;       /* device may not use msi */
-       unsigned int    no_d1d2:1;   /* only allow d0 or d3 */
         unsigned int    block_ucfg_access:1;    /* userspace config space access is blocked */
         unsigned int    broken_parity_status:1; /* Device generates false positive parity */
         unsigned int    msi_enabled:1;
@@ -267,6 +283,7 @@ struct pci_bus {
         struct list_head children;      /* list of child buses */
         struct list_head devices;       /* list of devices on this bus */
         struct pci_dev  *self;          /* bridge device as seen by parent */
+       struct list_head slots;         /* list of slots on this bus */
         struct resource *resource[PCI_BUS_NUM_RESOURCES];
                                         /* address space routed to this bus */
  
@@ -328,7 +345,7 @@ struct pci_bus_region {
  struct pci_dynids {
         spinlock_t lock;            /* protects list, index */
         struct list_head list;      /* for IDs added at runtime */
-       unsigned int use_driver_data:1; /* pci_driver->driver_data is used */
+       unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */
  };
  
  /* ---------------------------------------------------------------- */
@@ -390,7 +407,7 @@ struct pci_driver {
         int  (*resume_early) (struct pci_dev *dev);
         int  (*resume) (struct pci_dev *dev);                   /* Device woken up */
         void (*shutdown) (struct pci_dev *dev);
-
+       struct pm_ext_ops *pm;
         struct pci_error_handlers *err_handler;
         struct device_driver    driver;
         struct pci_dynids dynids;
@@ -489,6 +506,10 @@ struct pci_bus *pci_create_bus(struct device *parent, int bus,
                                struct pci_ops *ops, void *sysdata);
  struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
                                 int busnr);
+struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
+                                const char *name);
+void pci_destroy_slot(struct pci_slot *slot);
+void pci_update_slot_number(struct pci_slot *slot, int slot_nr);
  int pci_scan_slot(struct pci_bus *bus, int devfn);
  struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn);
  void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
@@ -618,6 +639,8 @@ int pci_restore_state(struct pci_dev *dev);
  int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
  pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
  int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+int pci_prepare_to_sleep(struct pci_dev *dev);
+int pci_back_from_sleep(struct pci_dev *dev);
  
  /* Functions for PCI Hotplug drivers to use */
  int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
@@ -839,6 +862,11 @@ static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
         return -EIO;
  }
  
+static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
+{
+       return -EIO;
+}
+
  static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
                                         unsigned int size)
  {
@@ -977,9 +1005,9 @@ static inline void pci_set_drvdata(struct pci_dev *pdev, void *data)
  /* If you want to know what to call your pci_dev, ask this function.
   * Again, it's a wrapper around the generic device.
   */
-static inline char *pci_name(struct pci_dev *pdev)
+static inline const char *pci_name(struct pci_dev *pdev)
  {
-       return pdev->dev.bus_id;
+       return dev_name(&pdev->dev);
  }
  
  
@@ -1014,7 +1042,9 @@ enum pci_fixup_pass {
         pci_fixup_header,       /* After reading configuration header */
         pci_fixup_final,        /* Final phase of device fixups */
         pci_fixup_enable,       /* pci_enable_device() time */
-       pci_fixup_resume,       /* pci_enable_device() time */
+       pci_fixup_resume,       /* pci_device_resume() */
+       pci_fixup_suspend,      /* pci_device_suspend */
+       pci_fixup_resume_early, /* pci_device_resume_early() */
  };
  
  /* Anonymous variables would be nice... */
@@ -1036,6 +1066,12 @@ enum pci_fixup_pass {
  #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook)                 \
         DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,                    \
                         resume##vendor##device##hook, vendor, device, hook)
+#define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook)           \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,              \
+                       resume_early##vendor##device##hook, vendor, device, hook)
+#define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook)                        \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,                   \
+                       suspend##vendor##device##hook, vendor, device, hook)
  
  
  void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
@@ -1060,7 +1096,10 @@ extern int pci_pci_problems;
  extern unsigned long pci_cardbus_io_size;
  extern unsigned long pci_cardbus_mem_size;
  
-extern int pcibios_add_platform_entries(struct pci_dev *dev);
+int pcibios_add_platform_entries(struct pci_dev *dev);
+void pcibios_disable_device(struct pci_dev *dev);
+int pcibios_set_pcie_reset_state(struct pci_dev *dev,
+                                enum pcie_reset_state state);
  
  #ifdef CONFIG_PCI_MMCONFIG
  extern void __init pci_mmcfg_early_init(void);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h

index 8f67e8f2a3cc0c0e24d32ebd77cde24bb94281a5..a08cd06b541a190975d04ee7d1cdd8af73e0a9f5 100644 (file)
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -95,9 +95,6 @@ struct hotplug_slot_attribute {
   * @get_adapter_status: Called to get see if an adapter is present in the slot or not.
   *     If this field is NULL, the value passed in the struct hotplug_slot_info
   *     will be used when this value is requested by a user.
- * @get_address: Called to get pci address of a slot.
- *     If this field is NULL, the value passed in the struct hotplug_slot_info
- *     will be used when this value is requested by a user.
   * @get_max_bus_speed: Called to get the max bus speed for a slot.
   *     If this field is NULL, the value passed in the struct hotplug_slot_info
   *     will be used when this value is requested by a user.
@@ -120,7 +117,6 @@ struct hotplug_slot_ops {
         int (*get_attention_status)     (struct hotplug_slot *slot, u8 *value);
         int (*get_latch_status)         (struct hotplug_slot *slot, u8 *value);
         int (*get_adapter_status)       (struct hotplug_slot *slot, u8 *value);
-       int (*get_address)              (struct hotplug_slot *slot, u32 *value);
         int (*get_max_bus_speed)        (struct hotplug_slot *slot, enum pci_bus_speed *value);
         int (*get_cur_bus_speed)        (struct hotplug_slot *slot, enum pci_bus_speed *value);
  };
@@ -140,7 +136,6 @@ struct hotplug_slot_info {
         u8      attention_status;
         u8      latch_status;
         u8      adapter_status;
-       u32     address;
         enum pci_bus_speed      max_bus_speed;
         enum pci_bus_speed      cur_bus_speed;
  };
@@ -166,15 +161,14 @@ struct hotplug_slot {
  
         /* Variables below this are for use only by the hotplug pci core. */
         struct list_head                slot_list;
-       struct kobject                  kobj;
+       struct pci_slot                 *pci_slot;
  };
  #define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj)
  
-extern int pci_hp_register             (struct hotplug_slot *slot);
-extern int pci_hp_deregister           (struct hotplug_slot *slot);
+extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr);
+extern int pci_hp_deregister(struct hotplug_slot *slot);
  extern int __must_check pci_hp_change_slot_info        (struct hotplug_slot *slot,
                                                  struct hotplug_slot_info *info);
-extern struct kset *pci_hotplug_slots_kset;
  
  /* PCI Setting Record (Type 0) */
  struct hpp_type0 {
@@ -227,9 +221,9 @@ struct hotplug_params {
  #include <acpi/acpi.h>
  #include <acpi/acpi_bus.h>
  #include <acpi/actypes.h>
-extern acpi_status acpi_run_oshp(acpi_handle handle);
  extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
                                 struct hotplug_params *hpp);
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
  int acpi_root_bridge(acpi_handle handle);
  #endif
  #endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h

index 1cf4084b51e82edf9442903c1050d7c2d7c1378e..6be6a7943d8b9a02bc466fb6500e09b38b8df3c0 100644 (file)
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2190,6 +2190,7 @@
  #define PCI_DEVICE_ID_JMICRON_JMB366   0x2366
  #define PCI_DEVICE_ID_JMICRON_JMB368   0x2368
  #define PCI_DEVICE_ID_JMICRON_JMB38X_SD        0x2381
+#define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382
  #define PCI_DEVICE_ID_JMICRON_JMB38X_MS        0x2383
  
  #define PCI_VENDOR_ID_KORENIX          0x1982
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h

index c0c1223c91945111ca4ebbfe8d022de8461ab0ad..19958b929905beb2af8a0a6ca6802510379c232a 100644 (file)
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -231,6 +231,7 @@
  #define  PCI_PM_CAP_PME_D2     0x2000  /* PME# from D2 */
  #define  PCI_PM_CAP_PME_D3     0x4000  /* PME# from D3 (hot) */
  #define  PCI_PM_CAP_PME_D3cold 0x8000  /* PME# from D3 (cold) */
+#define  PCI_PM_CAP_PME_SHIFT  11      /* Start of the PME Mask in PMC */
  #define PCI_PM_CTRL            4       /* PM control and status register */
  #define  PCI_PM_CTRL_STATE_MASK        0x0003  /* Current power state (D0 to D3) */
  #define  PCI_PM_CTRL_NO_SOFT_RESET     0x0004  /* No reset for D3hot->D0 */
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h

index 3261681c82a45bb4ec68551bd6bb40bbef1935e3..95ac21ab3a092369f5f41537614eb9890161d70e 100644 (file)
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -53,6 +53,7 @@ struct platform_driver {
         int (*suspend_late)(struct platform_device *, pm_message_t state);
         int (*resume_early)(struct platform_device *);
         int (*resume)(struct platform_device *);
+       struct pm_ext_ops *pm;
         struct device_driver driver;
  };
  
diff --git a/include/linux/pm.h b/include/linux/pm.h

index 39a7ee859b671cfc57f5fcd9e063fc3031e96f84..4ad9de94449a67d598ea51bbb9a322fdfbce8b11 100644 (file)
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -112,7 +112,9 @@ typedef struct pm_message {
         int event;
  } pm_message_t;
  
-/*
+/**
+ * struct pm_ops - device PM callbacks
+ *
   * Several driver power state transitions are externally visible, affecting
   * the state of pending I/O queues and (for drivers that touch hardware)
   * interrupts, wakeups, DMA, and other hardware state.  There may also be
@@ -120,6 +122,284 @@ typedef struct pm_message {
   * to the rest of the driver stack (such as a driver that's ON gating off
   * clocks which are not in active use).
   *
+ * The externally visible transitions are handled with the help of the following
+ * callbacks included in this structure:
+ *
+ * @prepare: Prepare the device for the upcoming transition, but do NOT change
+ *     its hardware state.  Prevent new children of the device from being
+ *     registered after @prepare() returns (the driver's subsystem and
+ *     generally the rest of the kernel is supposed to prevent new calls to the
+ *     probe method from being made too once @prepare() has succeeded).  If
+ *     @prepare() detects a situation it cannot handle (e.g. registration of a
+ *     child already in progress), it may return -EAGAIN, so that the PM core
+ *     can execute it once again (e.g. after the new child has been registered)
+ *     to recover from the race condition.  This method is executed for all
+ *     kinds of suspend transitions and is followed by one of the suspend
+ *     callbacks: @suspend(), @freeze(), or @poweroff().
+ *     The PM core executes @prepare() for all devices before starting to
+ *     execute suspend callbacks for any of them, so drivers may assume all of
+ *     the other devices to be present and functional while @prepare() is being
+ *     executed.  In particular, it is safe to make GFP_KERNEL memory
+ *     allocations from within @prepare().  However, drivers may NOT assume
+ *     anything about the availability of the user space at that time and it
+ *     is not correct to request firmware from within @prepare() (it's too
+ *     late to do that).  [To work around this limitation, drivers may
+ *     register suspend and hibernation notifiers that are executed before the
+ *     freezing of tasks.]
+ *
+ * @complete: Undo the changes made by @prepare().  This method is executed for
+ *     all kinds of resume transitions, following one of the resume callbacks:
+ *     @resume(), @thaw(), @restore().  Also called if the state transition
+ *     fails before the driver's suspend callback (@suspend(), @freeze(),
+ *     @poweroff()) can be executed (e.g. if the suspend callback fails for one
+ *     of the other devices that the PM core has unsuccessfully attempted to
+ *     suspend earlier).
+ *     The PM core executes @complete() after it has executed the appropriate
+ *     resume callback for all devices.
+ *
+ * @suspend: Executed before putting the system into a sleep state in which the
+ *     contents of main memory are preserved.  Quiesce the device, put it into
+ *     a low power state appropriate for the upcoming system state (such as
+ *     PCI_D3hot), and enable wakeup events as appropriate.
+ *
+ * @resume: Executed after waking the system up from a sleep state in which the
+ *     contents of main memory were preserved.  Put the device into the
+ *     appropriate state, according to the information saved in memory by the
+ *     preceding @suspend().  The driver starts working again, responding to
+ *     hardware events and software requests.  The hardware may have gone
+ *     through a power-off reset, or it may have maintained state from the
+ *     previous suspend() which the driver may rely on while resuming.  On most
+ *     platforms, there are no restrictions on availability of resources like
+ *     clocks during @resume().
+ *
+ * @freeze: Hibernation-specific, executed before creating a hibernation image.
+ *     Quiesce operations so that a consistent image can be created, but do NOT
+ *     otherwise put the device into a low power device state and do NOT emit
+ *     system wakeup events.  Save in main memory the device settings to be
+ *     used by @restore() during the subsequent resume from hibernation or by
+ *     the subsequent @thaw(), if the creation of the image or the restoration
+ *     of main memory contents from it fails.
+ *
+ * @thaw: Hibernation-specific, executed after creating a hibernation image OR
+ *     if the creation of the image fails.  Also executed after a failing
+ *     attempt to restore the contents of main memory from such an image.
+ *     Undo the changes made by the preceding @freeze(), so the device can be
+ *     operated in the same way as immediately before the call to @freeze().
+ *
+ * @poweroff: Hibernation-specific, executed after saving a hibernation image.
+ *     Quiesce the device, put it into a low power state appropriate for the
+ *     upcoming system state (such as PCI_D3hot), and enable wakeup events as
+ *     appropriate.
+ *
+ * @restore: Hibernation-specific, executed after restoring the contents of main
+ *     memory from a hibernation image.  Driver starts working again,
+ *     responding to hardware events and software requests.  Drivers may NOT
+ *     make ANY assumptions about the hardware state right prior to @restore().
+ *     On most platforms, there are no restrictions on availability of
+ *     resources like clocks during @restore().
+ *
+ * All of the above callbacks, except for @complete(), return error codes.
+ * However, the error codes returned by the resume operations, @resume(),
+ * @thaw(), and @restore(), do not cause the PM core to abort the resume
+ * transition during which they are returned.  The error codes returned in
+ * that cases are only printed by the PM core to the system logs for debugging
+ * purposes.  Still, it is recommended that drivers only return error codes
+ * from their resume methods in case of an unrecoverable failure (i.e. when the
+ * device being handled refuses to resume and becomes unusable) to allow us to
+ * modify the PM core in the future, so that it can avoid attempting to handle
+ * devices that failed to resume and their children.
+ *
+ * It is allowed to unregister devices while the above callbacks are being
+ * executed.  However, it is not allowed to unregister a device from within any
+ * of its own callbacks.
+ */
+
+struct pm_ops {
+       int (*prepare)(struct device *dev);
+       void (*complete)(struct device *dev);
+       int (*suspend)(struct device *dev);
+       int (*resume)(struct device *dev);
+       int (*freeze)(struct device *dev);
+       int (*thaw)(struct device *dev);
+       int (*poweroff)(struct device *dev);
+       int (*restore)(struct device *dev);
+};
+
+/**
+ * struct pm_ext_ops - extended device PM callbacks
+ *
+ * Some devices require certain operations related to suspend and hibernation
+ * to be carried out with interrupts disabled.  Thus, 'struct pm_ext_ops' below
+ * is defined, adding callbacks to be executed with interrupts disabled to
+ * 'struct pm_ops'.
+ *
+ * The following callbacks included in 'struct pm_ext_ops' are executed with
+ * the nonboot CPUs switched off and with interrupts disabled on the only
+ * functional CPU.  They also are executed with the PM core list of devices
+ * locked, so they must NOT unregister any devices.
+ *
+ * @suspend_noirq: Complete the operations of ->suspend() by carrying out any
+ *     actions required for suspending the device that need interrupts to be
+ *     disabled
+ *
+ * @resume_noirq: Prepare for the execution of ->resume() by carrying out any
+ *     actions required for resuming the device that need interrupts to be
+ *     disabled
+ *
+ * @freeze_noirq: Complete the operations of ->freeze() by carrying out any
+ *     actions required for freezing the device that need interrupts to be
+ *     disabled
+ *
+ * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any
+ *     actions required for thawing the device that need interrupts to be
+ *     disabled
+ *
+ * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any
+ *     actions required for handling the device that need interrupts to be
+ *     disabled
+ *
+ * @restore_noirq: Prepare for the execution of ->restore() by carrying out any
+ *     actions required for restoring the operations of the device that need
+ *     interrupts to be disabled
+ *
+ * All of the above callbacks return error codes, but the error codes returned
+ * by the resume operations, @resume_noirq(), @thaw_noirq(), and
+ * @restore_noirq(), do not cause the PM core to abort the resume transition
+ * during which they are returned.  The error codes returned in that cases are
+ * only printed by the PM core to the system logs for debugging purposes.
+ * Still, as stated above, it is recommended that drivers only return error
+ * codes from their resume methods if the device being handled fails to resume
+ * and is not usable any more.
+ */
+
+struct pm_ext_ops {
+       struct pm_ops base;
+       int (*suspend_noirq)(struct device *dev);
+       int (*resume_noirq)(struct device *dev);
+       int (*freeze_noirq)(struct device *dev);
+       int (*thaw_noirq)(struct device *dev);
+       int (*poweroff_noirq)(struct device *dev);
+       int (*restore_noirq)(struct device *dev);
+};
+
+/**
+ * PM_EVENT_ messages
+ *
+ * The following PM_EVENT_ messages are defined for the internal use of the PM
+ * core, in order to provide a mechanism allowing the high level suspend and
+ * hibernation code to convey the necessary information to the device PM core
+ * code:
+ *
+ * ON          No transition.
+ *
+ * FREEZE      System is going to hibernate, call ->prepare() and ->freeze()
+ *             for all devices.
+ *
+ * SUSPEND     System is going to suspend, call ->prepare() and ->suspend()
+ *             for all devices.
+ *
+ * HIBERNATE   Hibernation image has been saved, call ->prepare() and
+ *             ->poweroff() for all devices.
+ *
+ * QUIESCE     Contents of main memory are going to be restored from a (loaded)
+ *             hibernation image, call ->prepare() and ->freeze() for all
+ *             devices.
+ *
+ * RESUME      System is resuming, call ->resume() and ->complete() for all
+ *             devices.
+ *
+ * THAW                Hibernation image has been created, call ->thaw() and
+ *             ->complete() for all devices.
+ *
+ * RESTORE     Contents of main memory have been restored from a hibernation
+ *             image, call ->restore() and ->complete() for all devices.
+ *
+ * RECOVER     Creation of a hibernation image or restoration of the main
+ *             memory contents from a hibernation image has failed, call
+ *             ->thaw() and ->complete() for all devices.
+ */
+
+#define PM_EVENT_ON            0x0000
+#define PM_EVENT_FREEZE        0x0001
+#define PM_EVENT_SUSPEND       0x0002
+#define PM_EVENT_HIBERNATE     0x0004
+#define PM_EVENT_QUIESCE       0x0008
+#define PM_EVENT_RESUME                0x0010
+#define PM_EVENT_THAW          0x0020
+#define PM_EVENT_RESTORE       0x0040
+#define PM_EVENT_RECOVER       0x0080
+
+#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
+
+#define PMSG_FREEZE    ((struct pm_message){ .event = PM_EVENT_FREEZE, })
+#define PMSG_QUIESCE   ((struct pm_message){ .event = PM_EVENT_QUIESCE, })
+#define PMSG_SUSPEND   ((struct pm_message){ .event = PM_EVENT_SUSPEND, })
+#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, })
+#define PMSG_RESUME    ((struct pm_message){ .event = PM_EVENT_RESUME, })
+#define PMSG_THAW      ((struct pm_message){ .event = PM_EVENT_THAW, })
+#define PMSG_RESTORE   ((struct pm_message){ .event = PM_EVENT_RESTORE, })
+#define PMSG_RECOVER   ((struct pm_message){ .event = PM_EVENT_RECOVER, })
+#define PMSG_ON                ((struct pm_message){ .event = PM_EVENT_ON, })
+
+/**
+ * Device power management states
+ *
+ * These state labels are used internally by the PM core to indicate the current
+ * status of a device with respect to the PM core operations.
+ *
+ * DPM_ON              Device is regarded as operational.  Set this way
+ *                     initially and when ->complete() is about to be called.
+ *                     Also set when ->prepare() fails.
+ *
+ * DPM_PREPARING       Device is going to be prepared for a PM transition.  Set
+ *                     when ->prepare() is about to be called.
+ *
+ * DPM_RESUMING                Device is going to be resumed.  Set when ->resume(),
+ *                     ->thaw(), or ->restore() is about to be called.
+ *
+ * DPM_SUSPENDING      Device has been prepared for a power transition.  Set
+ *                     when ->prepare() has just succeeded.
+ *
+ * DPM_OFF             Device is regarded as inactive.  Set immediately after
+ *                     ->suspend(), ->freeze(), or ->poweroff() has succeeded.
+ *                     Also set when ->resume()_noirq, ->thaw_noirq(), or
+ *                     ->restore_noirq() is about to be called.
+ *
+ * DPM_OFF_IRQ         Device is in a "deep sleep".  Set immediately after
+ *                     ->suspend_noirq(), ->freeze_noirq(), or
+ *                     ->poweroff_noirq() has just succeeded.
+ */
+
+enum dpm_state {
+       DPM_INVALID,
+       DPM_ON,
+       DPM_PREPARING,
+       DPM_RESUMING,
+       DPM_SUSPENDING,
+       DPM_OFF,
+       DPM_OFF_IRQ,
+};
+
+struct dev_pm_info {
+       pm_message_t            power_state;
+       unsigned                can_wakeup:1;
+       unsigned                should_wakeup:1;
+       enum dpm_state          status;         /* Owned by the PM core */
+#ifdef CONFIG_PM_SLEEP
+       struct list_head        entry;
+#endif
+};
+
+/*
+ * The PM_EVENT_ messages are also used by drivers implementing the legacy
+ * suspend framework, based on the ->suspend() and ->resume() callbacks common
+ * for suspend and hibernation transitions, according to the rules below.
+ */
+
+/* Necessary, because several drivers use PM_EVENT_PRETHAW */
+#define PM_EVENT_PRETHAW PM_EVENT_QUIESCE
+
+/*
   * One transition is triggered by resume(), after a suspend() call; the
   * message is implicit:
   *
@@ -164,35 +444,13 @@ typedef struct pm_message {
   * or from system low-power states such as standby or suspend-to-RAM.
   */
  
-#define PM_EVENT_ON 0
-#define PM_EVENT_FREEZE 1
-#define PM_EVENT_SUSPEND 2
-#define PM_EVENT_HIBERNATE 4
-#define PM_EVENT_PRETHAW 8
-
-#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
-
-#define PMSG_FREEZE    ((struct pm_message){ .event = PM_EVENT_FREEZE, })
-#define PMSG_PRETHAW   ((struct pm_message){ .event = PM_EVENT_PRETHAW, })
-#define PMSG_SUSPEND   ((struct pm_message){ .event = PM_EVENT_SUSPEND, })
-#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, })
-#define PMSG_ON                ((struct pm_message){ .event = PM_EVENT_ON, })
-
-struct dev_pm_info {
-       pm_message_t            power_state;
-       unsigned                can_wakeup:1;
-       unsigned                should_wakeup:1;
-       bool                    sleeping:1;     /* Owned by the PM core */
-#ifdef CONFIG_PM_SLEEP
-       struct list_head        entry;
-#endif
-};
+#ifdef CONFIG_PM_SLEEP
+extern void device_pm_lock(void);
+extern void device_power_up(pm_message_t state);
+extern void device_resume(pm_message_t state);
  
+extern void device_pm_unlock(void);
  extern int device_power_down(pm_message_t state);
-extern void device_power_up(void);
-extern void device_resume(void);
-
-#ifdef CONFIG_PM_SLEEP
  extern int device_suspend(pm_message_t state);
  extern int device_prepare_suspend(pm_message_t state);
  
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h

index f0d0b2cb8d205ce6de169cae5c60c7a3166ef11a..0aae7776185e63523a3eac6535adfbd8f9524e28 100644 (file)
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -35,6 +35,11 @@ static inline void device_init_wakeup(struct device *dev, int val)
         dev->power.can_wakeup = dev->power.should_wakeup = !!val;
  }
  
+static inline void device_set_wakeup_capable(struct device *dev, int val)
+{
+       dev->power.can_wakeup = !!val;
+}
+
  static inline int device_can_wakeup(struct device *dev)
  {
         return dev->power.can_wakeup;
@@ -47,21 +52,7 @@ static inline void device_set_wakeup_enable(struct device *dev, int val)
  
  static inline int device_may_wakeup(struct device *dev)
  {
-       return dev->power.can_wakeup & dev->power.should_wakeup;
-}
-
-/*
- * Platform hook to activate device wakeup capability, if that's not already
- * handled by enable_irq_wake() etc.
- * Returns zero on success, else negative errno
- */
-extern int (*platform_enable_wakeup)(struct device *dev, int is_on);
-
-static inline int call_platform_enable_wakeup(struct device *dev, int is_on)
-{
-       if (platform_enable_wakeup)
-               return (*platform_enable_wakeup)(dev, is_on);
-       return 0;
+       return dev->power.can_wakeup && dev->power.should_wakeup;
  }
  
  #else /* !CONFIG_PM */
@@ -72,6 +63,8 @@ static inline void device_init_wakeup(struct device *dev, int val)
         dev->power.can_wakeup = !!val;
  }
  
+static inline void device_set_wakeup_capable(struct device *dev, int val) { }
+
  static inline int device_can_wakeup(struct device *dev)
  {
         return dev->power.can_wakeup;
@@ -80,11 +73,6 @@ static inline int device_can_wakeup(struct device *dev)
  #define device_set_wakeup_enable(dev, val)     do {} while (0)
  #define device_may_wakeup(dev)                 0
  
-static inline int call_platform_enable_wakeup(struct device *dev, int is_on)
-{
-       return 0;
-}
-
  #endif /* !CONFIG_PM */
  
  #endif /* _LINUX_PM_WAKEUP_H */
diff --git a/include/linux/pnp.h b/include/linux/pnp.h

index 63b128d512fb5f413085b8b13d144d97cf41477f..1ce54b63085dd09b1ee5826694294969fe2a1f78 100644 (file)
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -1,6 +1,8 @@
  /*
   * Linux Plug and Play Support
   * Copyright by Adam Belay <ambx1@neo.rr.com>
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
   */
  
  #ifndef _LINUX_PNP_H
@@ -15,7 +17,6 @@
  
  struct pnp_protocol;
  struct pnp_dev;
-struct pnp_resource_table;
  
  /*
   * Resource Management
@@ -24,7 +25,14 @@ struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int);
  
  static inline int pnp_resource_valid(struct resource *res)
  {
-       if (res && !(res->flags & IORESOURCE_UNSET))
+       if (res)
+               return 1;
+       return 0;
+}
+
+static inline int pnp_resource_enabled(struct resource *res)
+{
+       if (res && !(res->flags & IORESOURCE_DISABLED))
                 return 1;
         return 0;
  }
@@ -40,19 +48,31 @@ static inline resource_size_t pnp_resource_len(struct resource *res)
  static inline resource_size_t pnp_port_start(struct pnp_dev *dev,
                                              unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_IO, bar)->start;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar);
+
+       if (pnp_resource_valid(res))
+               return res->start;
+       return 0;
  }
  
  static inline resource_size_t pnp_port_end(struct pnp_dev *dev,
                                            unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_IO, bar)->end;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar);
+
+       if (pnp_resource_valid(res))
+               return res->end;
+       return 0;
  }
  
  static inline unsigned long pnp_port_flags(struct pnp_dev *dev,
                                            unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_IO, bar)->flags;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar);
+
+       if (pnp_resource_valid(res))
+               return res->flags;
+       return IORESOURCE_IO | IORESOURCE_AUTO;
  }
  
  static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar)
@@ -63,25 +83,41 @@ static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar)
  static inline resource_size_t pnp_port_len(struct pnp_dev *dev,
                                            unsigned int bar)
  {
-       return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_IO, bar));
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar);
+
+       if (pnp_resource_valid(res))
+               return pnp_resource_len(res);
+       return 0;
  }
  
  
  static inline resource_size_t pnp_mem_start(struct pnp_dev *dev,
                                             unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_MEM, bar)->start;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar);
+
+       if (pnp_resource_valid(res))
+               return res->start;
+       return 0;
  }
  
  static inline resource_size_t pnp_mem_end(struct pnp_dev *dev,
                                           unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_MEM, bar)->end;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar);
+
+       if (pnp_resource_valid(res))
+               return res->end;
+       return 0;
  }
  
  static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_MEM, bar)->flags;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar);
+
+       if (pnp_resource_valid(res))
+               return res->flags;
+       return IORESOURCE_MEM | IORESOURCE_AUTO;
  }
  
  static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar)
@@ -92,18 +128,30 @@ static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar)
  static inline resource_size_t pnp_mem_len(struct pnp_dev *dev,
                                           unsigned int bar)
  {
-       return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_MEM, bar));
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar);
+
+       if (pnp_resource_valid(res))
+               return pnp_resource_len(res);
+       return 0;
  }
  
  
  static inline resource_size_t pnp_irq(struct pnp_dev *dev, unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->start;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar);
+
+       if (pnp_resource_valid(res))
+               return res->start;
+       return -1;
  }
  
  static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->flags;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar);
+
+       if (pnp_resource_valid(res))
+               return res->flags;
+       return IORESOURCE_IRQ | IORESOURCE_AUTO;
  }
  
  static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar)
@@ -114,12 +162,20 @@ static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar)
  
  static inline resource_size_t pnp_dma(struct pnp_dev *dev, unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_DMA, bar)->start;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar);
+
+       if (pnp_resource_valid(res))
+               return res->start;
+       return -1;
  }
  
  static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar)
  {
-       return pnp_get_resource(dev, IORESOURCE_DMA, bar)->flags;
+       struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar);
+
+       if (pnp_resource_valid(res))
+               return res->flags;
+       return IORESOURCE_DMA | IORESOURCE_AUTO;
  }
  
  static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar)
@@ -128,57 +184,6 @@ static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar)
  }
  
  
-#define PNP_PORT_FLAG_16BITADDR        (1<<0)
-#define PNP_PORT_FLAG_FIXED    (1<<1)
-
-struct pnp_port {
-       unsigned short min;     /* min base number */
-       unsigned short max;     /* max base number */
-       unsigned char align;    /* align boundary */
-       unsigned char size;     /* size of range */
-       unsigned char flags;    /* port flags */
-       unsigned char pad;      /* pad */
-       struct pnp_port *next;  /* next port */
-};
-
-#define PNP_IRQ_NR 256
-struct pnp_irq {
-       DECLARE_BITMAP(map, PNP_IRQ_NR);        /* bitmask for IRQ lines */
-       unsigned char flags;    /* IRQ flags */
-       unsigned char pad;      /* pad */
-       struct pnp_irq *next;   /* next IRQ */
-};
-
-struct pnp_dma {
-       unsigned char map;      /* bitmask for DMA channels */
-       unsigned char flags;    /* DMA flags */
-       struct pnp_dma *next;   /* next port */
-};
-
-struct pnp_mem {
-       unsigned int min;       /* min base number */
-       unsigned int max;       /* max base number */
-       unsigned int align;     /* align boundary */
-       unsigned int size;      /* size of range */
-       unsigned char flags;    /* memory flags */
-       unsigned char pad;      /* pad */
-       struct pnp_mem *next;   /* next memory resource */
-};
-
-#define PNP_RES_PRIORITY_PREFERRED     0
-#define PNP_RES_PRIORITY_ACCEPTABLE    1
-#define PNP_RES_PRIORITY_FUNCTIONAL    2
-#define PNP_RES_PRIORITY_INVALID       65535
-
-struct pnp_option {
-       unsigned short priority;        /* priority */
-       struct pnp_port *port;          /* first port */
-       struct pnp_irq *irq;            /* first IRQ */
-       struct pnp_dma *dma;            /* first DMA */
-       struct pnp_mem *mem;            /* first memory resource */
-       struct pnp_option *next;        /* used to chain dependent resources */
-};
-
  /*
   * Device Management
   */
@@ -246,9 +251,9 @@ struct pnp_dev {
  
         int active;
         int capabilities;
-       struct pnp_option *independent;
-       struct pnp_option *dependent;
-       struct pnp_resource_table *res;
+       unsigned int num_dependent_sets;
+       struct list_head resources;
+       struct list_head options;
  
         char name[PNP_NAME_LEN];        /* contains a human-readable name */
         int flags;                      /* used by protocols */
@@ -425,6 +430,8 @@ void pnp_unregister_card_driver(struct pnp_card_driver *drv);
  extern struct list_head pnp_cards;
  
  /* resource management */
+int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t base,
+                       resource_size_t size);
  int pnp_auto_config_dev(struct pnp_dev *dev);
  int pnp_start_dev(struct pnp_dev *dev);
  int pnp_stop_dev(struct pnp_dev *dev);
@@ -452,6 +459,9 @@ static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return
  static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { }
  
  /* resource management */
+static inline int pnp_possible_config(struct pnp_dev *dev, int type,
+                                     resource_size_t base,
+                                     resource_size_t size) { return 0; }
  static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; }
  static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; }
  static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; }
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 21349173d148034cb20bf6f3d987ac9b0a9310cf..1941d8b5cf11cb9731342c5519ac58436e192eab 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1062,12 +1062,6 @@ struct task_struct {
  #endif
  
         struct list_head tasks;
-       /*
-        * ptrace_list/ptrace_children forms the list of my children
-        * that were stolen by a ptracer.
-        */
-       struct list_head ptrace_children;
-       struct list_head ptrace_list;
  
         struct mm_struct *mm, *active_mm;
  
@@ -1089,18 +1083,25 @@ struct task_struct {
         /* 
          * pointers to (original) parent process, youngest child, younger sibling,
          * older sibling, respectively.  (p->father can be replaced with 
-        * p->parent->pid)
+        * p->real_parent->pid)
          */
-       struct task_struct *real_parent; /* real parent process (when being debugged) */
-       struct task_struct *parent;     /* parent process */
+       struct task_struct *real_parent; /* real parent process */
+       struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
         /*
-        * children/sibling forms the list of my children plus the
-        * tasks I'm ptracing.
+        * children/sibling forms the list of my natural children
          */
         struct list_head children;      /* list of my children */
         struct list_head sibling;       /* linkage in my parent's children list */
         struct task_struct *group_leader;       /* threadgroup leader */
  
+       /*
+        * ptraced is the list of tasks this task is using ptrace on.
+        * This includes both natural children and PTRACE_ATTACH targets.
+        * p->ptrace_entry is p's link on the p->parent->ptraced list.
+        */
+       struct list_head ptraced;
+       struct list_head ptrace_entry;
+
         /* PID/PID hash table linkage. */
         struct pid_link pids[PIDTYPE_MAX];
         struct list_head thread_group;
@@ -1494,6 +1495,7 @@ static inline void put_task_struct(struct task_struct *t)
  #define PF_MEMPOLICY   0x10000000      /* Non-default NUMA mempolicy */
  #define PF_MUTEX_TESTER        0x20000000      /* Thread belongs to the rt mutex tester */
  #define PF_FREEZER_SKIP        0x40000000      /* Freezer should not count it as freezeable */
+#define PF_FREEZER_NOSIG 0x80000000    /* Freezer won't send signals to it */
  
  /*
   * Only the _current_ task can read/write to tsk->flags, but other
@@ -1875,9 +1877,6 @@ extern void wait_task_inactive(struct task_struct * p);
  #define wait_task_inactive(p)  do { } while (0)
  #endif
  
-#define remove_parent(p)       list_del_init(&(p)->sibling)
-#define add_parent(p)          list_add_tail(&(p)->sibling,&(p)->parent->children)
-
  #define next_task(p)   list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
  
  #define for_each_process(p) \
diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h

index d5ca78b93a3bbd9f9e78191cb5f7314622259045..a3626aedaec9350f9f17b19a12a6ade89a561b5c 100644 (file)
--- a/include/linux/spi/mmc_spi.h
+++ b/include/linux/spi/mmc_spi.h
@@ -23,6 +23,15 @@ struct mmc_spi_platform_data {
         /* sense switch on sd cards */
         int (*get_ro)(struct device *);
  
+       /*
+        * If board does not use CD interrupts, driver can optimize polling
+        * using this function.
+        */
+       int (*get_cd)(struct device *);
+
+       /* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */
+       unsigned long caps;
+
         /* how long to debounce card detect, in msecs */
         u16 detect_delay;
  
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h

index 6fff7f82ef1275e70495493759cc56019951f1ad..e5bfe01ee305feb20e70845fb7e7f14e108bd345 100644 (file)
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -42,7 +42,8 @@ struct rpc_clnt {
  
         unsigned int            cl_softrtry : 1,/* soft timeouts */
                                 cl_discrtry : 1,/* disconnect before retry */
-                               cl_autobind : 1;/* use getport() */
+                               cl_autobind : 1,/* use getport() */
+                               cl_chatty   : 1;/* be verbose */
  
         struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
         const struct rpc_timeout *cl_timeout;   /* Timeout strategy */
@@ -114,6 +115,7 @@ struct rpc_create_args {
  #define RPC_CLNT_CREATE_NONPRIVPORT    (1UL << 3)
  #define RPC_CLNT_CREATE_NOPING         (1UL << 4)
  #define RPC_CLNT_CREATE_DISCRTRY       (1UL << 5)
+#define RPC_CLNT_CREATE_QUIET          (1UL << 6)
  
  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
@@ -123,6 +125,9 @@ void                rpc_shutdown_client(struct rpc_clnt *);
  void           rpc_release_client(struct rpc_clnt *);
  
  int            rpcb_register(u32, u32, int, unsigned short, int *);
+int            rpcb_v4_register(const u32 program, const u32 version,
+                                const struct sockaddr *address,
+                                const char *netid, int *result);
  int            rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
  void           rpcb_getport_async(struct rpc_task *);
  
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h

index d1a5c8c1a0f1ceef22eba19980667052bfedf10e..64981a2f1cae12e1b9f384b758cbc1718ee09721 100644 (file)
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -135,7 +135,6 @@ struct rpc_task_setup {
  #define RPC_IS_SWAPPER(t)      ((t)->tk_flags & RPC_TASK_SWAPPER)
  #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS)
  #define RPC_ASSASSINATED(t)    ((t)->tk_flags & RPC_TASK_KILLED)
-#define RPC_DO_CALLBACK(t)     ((t)->tk_callback != NULL)
  #define RPC_IS_SOFT(t)         ((t)->tk_flags & RPC_TASK_SOFT)
  
  #define RPC_TASK_RUNNING       0
diff --git a/include/linux/suspend.h b/include/linux/suspend.h

index a6977423baf737ed67d4847cdce4c89fc24452e0..e8e69159af71c2ed546fb97f25316bcd5de1ea0f 100644 (file)
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -86,6 +86,11 @@ typedef int __bitwise suspend_state_t;
   *     that implement @begin(), but platforms implementing @begin() should
   *     also provide a @end() which cleans up transitions aborted before
   *     @enter().
+ *
+ * @recover: Recover the platform from a suspend failure.
+ *     Called by the PM core if the suspending of devices fails.
+ *     This callback is optional and should only be implemented by platforms
+ *     which require special recovery actions in that situation.
   */
  struct platform_suspend_ops {
         int (*valid)(suspend_state_t state);
@@ -94,6 +99,7 @@ struct platform_suspend_ops {
         int (*enter)(suspend_state_t state);
         void (*finish)(void);
         void (*end)(void);
+       void (*recover)(void);
  };
  
  #ifdef CONFIG_SUSPEND
@@ -149,7 +155,7 @@ extern void mark_free_pages(struct zone *zone);
   * The methods in this structure allow a platform to carry out special
   * operations required by it during a hibernation transition.
   *
- * All the methods below must be implemented.
+ * All the methods below, except for @recover(), must be implemented.
   *
   * @begin: Tell the platform driver that we're starting hibernation.
   *     Called right after shrinking memory and before freezing devices.
@@ -189,6 +195,11 @@ extern void mark_free_pages(struct zone *zone);
   * @restore_cleanup: Clean up after a failing image restoration.
   *     Called right after the nonboot CPUs have been enabled and before
   *     thawing devices (runs with IRQs on).
+ *
+ * @recover: Recover the platform from a failure to suspend devices.
+ *     Called by the PM core if the suspending of devices during hibernation
+ *     fails.  This callback is optional and should only be implemented by
+ *     platforms which require special recovery actions in that situation.
   */
  struct platform_hibernation_ops {
         int (*begin)(void);
@@ -200,6 +211,7 @@ struct platform_hibernation_ops {
         void (*leave)(void);
         int (*pre_restore)(void);
         void (*restore_cleanup)(void);
+       void (*recover)(void);
  };
  
  #ifdef CONFIG_HIBERNATION
diff --git a/init/do_mounts.c b/init/do_mounts.c

index 660c1e50c91b7a622b2e65bf1095cd4e982b58af..a1de1bf3d6b9d5694cf71b4c4a9dc03193636b96 100644 (file)
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -372,7 +372,8 @@ void __init prepare_namespace(void)
  
         if (saved_root_name[0]) {
                 root_device_name = saved_root_name;
-               if (!strncmp(root_device_name, "mtd", 3)) {
+               if (!strncmp(root_device_name, "mtd", 3) ||
+                   !strncmp(root_device_name, "ubi", 3)) {
                         mount_block_root(root_device_name, root_mountflags);
                         goto out;
                 }
diff --git a/kernel/Makefile b/kernel/Makefile

index 0a7ed838984b034a0fcb843dc765f6941c769e72..985ddb7da4d02ccde3fa2acf1b4c9a5764778acb 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
             hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
             notifier.o ksysfs.o pm_qos_params.o sched_clock.o
  
-CFLAGS_REMOVE_sched.o = -mno-spe
-
  ifdef CONFIG_FTRACE
  # Do not trace debug files and internal ftrace files
  CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
  CFLAGS_REMOVE_rtmutex-debug.o = -pg
  CFLAGS_REMOVE_cgroup-debug.o = -pg
  CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_sched.o = -mno-spe -pg
  endif
  
  obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
diff --git a/kernel/cpu.c b/kernel/cpu.c

index b11f06dc149add3a29c072e2ec9356499dd9dbc1..cfb1d43ab801b69f24c47c0cdb17332c25f6f99e 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -299,6 +299,7 @@ int __ref cpu_down(unsigned int cpu)
         cpu_maps_update_done();
         return err;
  }
+EXPORT_SYMBOL(cpu_down);
  #endif /*CONFIG_HOTPLUG_CPU*/
  
  /* Requires cpu_add_remove_lock to be held */
diff --git a/kernel/exit.c b/kernel/exit.c

index ceb258782835a1073774e0337e0bc4d85f7a160f..93d2711b938123d34f1c0011e8f91a589b465615 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p)
                 __get_cpu_var(process_counts)--;
         }
         list_del_rcu(&p->thread_group);
-       remove_parent(p);
+       list_del_init(&p->sibling);
  }
  
  /*
@@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
         put_task_struct(container_of(rhp, struct task_struct, rcu));
  }
  
+/*
+ * Do final ptrace-related cleanup of a zombie being reaped.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_release_task(struct task_struct *p)
+{
+       BUG_ON(!list_empty(&p->ptraced));
+       ptrace_unlink(p);
+       BUG_ON(!list_empty(&p->ptrace_entry));
+}
+
  void release_task(struct task_struct * p)
  {
         struct task_struct *leader;
@@ -160,8 +172,7 @@ repeat:
         atomic_dec(&p->user->processes);
         proc_flush_task(p);
         write_lock_irq(&tasklist_lock);
-       ptrace_unlink(p);
-       BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
+       ptrace_release_task(p);
         __exit_signal(p);
  
         /*
@@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void)
  
         ptrace_unlink(current);
         /* Reparent to init */
-       remove_parent(current);
         current->real_parent = current->parent = kthreadd_task;
-       add_parent(current);
+       list_move_tail(&current->sibling, &current->real_parent->children);
  
         /* Set the exit signal to SIGCHLD so we signal init on exit */
         current->exit_signal = SIGCHLD;
@@ -692,37 +702,97 @@ static void exit_mm(struct task_struct * tsk)
         mmput(mm);
  }
  
-static void
-reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
+/*
+ * Return nonzero if @parent's children should reap themselves.
+ *
+ * Called with write_lock_irq(&tasklist_lock) held.
+ */
+static int ignoring_children(struct task_struct *parent)
  {
-       if (p->pdeath_signal)
-               /* We already hold the tasklist_lock here.  */
-               group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+       int ret;
+       struct sighand_struct *psig = parent->sighand;
+       unsigned long flags;
+       spin_lock_irqsave(&psig->siglock, flags);
+       ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
+       spin_unlock_irqrestore(&psig->siglock, flags);
+       return ret;
+}
  
-       /* Move the child from its dying parent to the new one.  */
-       if (unlikely(traced)) {
-               /* Preserve ptrace links if someone else is tracing this child.  */
-               list_del_init(&p->ptrace_list);
-               if (ptrace_reparented(p))
-                       list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
-       } else {
-               /* If this child is being traced, then we're the one tracing it
-                * anyway, so let go of it.
+/*
+ * Detach all tasks we were using ptrace on.
+ * Any that need to be release_task'd are put on the @dead list.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
+{
+       struct task_struct *p, *n;
+       int ign = -1;
+
+       list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
+               __ptrace_unlink(p);
+
+               if (p->exit_state != EXIT_ZOMBIE)
+                       continue;
+
+               /*
+                * If it's a zombie, our attachedness prevented normal
+                * parent notification or self-reaping.  Do notification
+                * now if it would have happened earlier.  If it should
+                * reap itself, add it to the @dead list.  We can't call
+                * release_task() here because we already hold tasklist_lock.
+                *
+                * If it's our own child, there is no notification to do.
+                * But if our normal children self-reap, then this child
+                * was prevented by ptrace and we must reap it now.
                  */
-               p->ptrace = 0;
-               remove_parent(p);
-               p->parent = p->real_parent;
-               add_parent(p);
+               if (!task_detached(p) && thread_group_empty(p)) {
+                       if (!same_thread_group(p->real_parent, parent))
+                               do_notify_parent(p, p->exit_signal);
+                       else {
+                               if (ign < 0)
+                                       ign = ignoring_children(parent);
+                               if (ign)
+                                       p->exit_signal = -1;
+                       }
+               }
  
-               if (task_is_traced(p)) {
+               if (task_detached(p)) {
                         /*
-                        * If it was at a trace stop, turn it into
-                        * a normal stop since it's no longer being
-                        * traced.
+                        * Mark it as in the process of being reaped.
                          */
-                       ptrace_untrace(p);
+                       p->exit_state = EXIT_DEAD;
+                       list_add(&p->ptrace_entry, dead);
                 }
         }
+}
+
+/*
+ * Finish up exit-time ptrace cleanup.
+ *
+ * Called without locks.
+ */
+static void ptrace_exit_finish(struct task_struct *parent,
+                              struct list_head *dead)
+{
+       struct task_struct *p, *n;
+
+       BUG_ON(!list_empty(&parent->ptraced));
+
+       list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+               list_del_init(&p->ptrace_entry);
+               release_task(p);
+       }
+}
+
+static void reparent_thread(struct task_struct *p, struct task_struct *father)
+{
+       if (p->pdeath_signal)
+               /* We already hold the tasklist_lock here.  */
+               group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+
+       list_move_tail(&p->sibling, &p->real_parent->children);
  
         /* If this is a threaded reparent there is no need to
          * notify anyone anything has happened.
@@ -737,7 +807,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
         /* If we'd notified the old parent about this child's death,
          * also notify the new parent.
          */
-       if (!traced && p->exit_state == EXIT_ZOMBIE &&
+       if (!ptrace_reparented(p) &&
+           p->exit_state == EXIT_ZOMBIE &&
             !task_detached(p) && thread_group_empty(p))
                 do_notify_parent(p, p->exit_signal);
  
@@ -754,12 +825,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
  static void forget_original_parent(struct task_struct *father)
  {
         struct task_struct *p, *n, *reaper = father;
-       struct list_head ptrace_dead;
-
-       INIT_LIST_HEAD(&ptrace_dead);
+       LIST_HEAD(ptrace_dead);
  
         write_lock_irq(&tasklist_lock);
  
+       /*
+        * First clean up ptrace if we were using it.
+        */
+       ptrace_exit(father, &ptrace_dead);
+
         do {
                 reaper = next_thread(reaper);
                 if (reaper == father) {
@@ -768,58 +842,19 @@ static void forget_original_parent(struct task_struct *father)
                 }
         } while (reaper->flags & PF_EXITING);
  
-       /*
-        * There are only two places where our children can be:
-        *
-        * - in our child list
-        * - in our ptraced child list
-        *
-        * Search them and reparent children.
-        */
         list_for_each_entry_safe(p, n, &father->children, sibling) {
-               int ptrace;
-
-               ptrace = p->ptrace;
-
-               /* if father isn't the real parent, then ptrace must be enabled */
-               BUG_ON(father != p->real_parent && !ptrace);
-
-               if (father == p->real_parent) {
-                       /* reparent with a reaper, real father it's us */
-                       p->real_parent = reaper;
-                       reparent_thread(p, father, 0);
-               } else {
-                       /* reparent ptraced task to its real parent */
-                       __ptrace_unlink (p);
-                       if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) &&
-                           thread_group_empty(p))
-                               do_notify_parent(p, p->exit_signal);
-               }
-
-               /*
-                * if the ptraced child is a detached zombie we must collect
-                * it before we exit, or it will remain zombie forever since
-                * we prevented it from self-reap itself while it was being
-                * traced by us, to be able to see it in wait4.
-                */
-               if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p)))
-                       list_add(&p->ptrace_list, &ptrace_dead);
-       }
-
-       list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
                 p->real_parent = reaper;
-               reparent_thread(p, father, 1);
+               if (p->parent == father) {
+                       BUG_ON(p->ptrace);
+                       p->parent = p->real_parent;
+               }
+               reparent_thread(p, father);
         }
  
         write_unlock_irq(&tasklist_lock);
         BUG_ON(!list_empty(&father->children));
-       BUG_ON(!list_empty(&father->ptrace_children));
-
-       list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
-               list_del_init(&p->ptrace_list);
-               release_task(p);
-       }
  
+       ptrace_exit_finish(father, &ptrace_dead);
  }
  
  /*
@@ -1180,13 +1215,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
                         return 0;
         }
  
-       /*
-        * Do not consider detached threads that are
-        * not ptraced:
-        */
-       if (task_detached(p) && !p->ptrace)
-               return 0;
-
         /* Wait for all children (clone and not) if __WALL is set;
          * otherwise, wait for clone children *only* if __WCLONE is
          * set; otherwise, wait for non-clone children *only*.  (Note:
@@ -1197,14 +1225,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
                 return 0;
  
         err = security_task_wait(p);
-       if (likely(!err))
-               return 1;
+       if (err)
+               return err;
  
-       if (type != PIDTYPE_PID)
-               return 0;
-       /* This child was explicitly requested, abort */
-       read_unlock(&tasklist_lock);
-       return err;
+       return 1;
  }
  
  static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
@@ -1238,7 +1262,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
   * the lock and this task is uninteresting.  If we return nonzero, we have
   * released the lock and the system call should return.
   */
-static int wait_task_zombie(struct task_struct *p, int noreap,
+static int wait_task_zombie(struct task_struct *p, int options,
                             struct siginfo __user *infop,
                             int __user *stat_addr, struct rusage __user *ru)
  {
@@ -1246,7 +1270,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
         int retval, status, traced;
         pid_t pid = task_pid_vnr(p);
  
-       if (unlikely(noreap)) {
+       if (!likely(options & WEXITED))
+               return 0;
+
+       if (unlikely(options & WNOWAIT)) {
                 uid_t uid = p->uid;
                 int exit_code = p->exit_code;
                 int why, status;
@@ -1396,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
   * the lock and this task is uninteresting.  If we return nonzero, we have
   * released the lock and the system call should return.
   */
-static int wait_task_stopped(struct task_struct *p,
-                            int noreap, struct siginfo __user *infop,
+static int wait_task_stopped(int ptrace, struct task_struct *p,
+                            int options, struct siginfo __user *infop,
                              int __user *stat_addr, struct rusage __user *ru)
  {
         int retval, exit_code, why;
         uid_t uid = 0; /* unneeded, required by compiler */
         pid_t pid;
  
+       if (!(options & WUNTRACED))
+               return 0;
+
         exit_code = 0;
         spin_lock_irq(&p->sighand->siglock);
  
         if (unlikely(!task_is_stopped_or_traced(p)))
                 goto unlock_sig;
  
-       if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0)
+       if (!ptrace && p->signal->group_stop_count > 0)
                 /*
                  * A group stop is in progress and this is the group leader.
                  * We won't report until all threads have stopped.
@@ -1421,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p,
         if (!exit_code)
                 goto unlock_sig;
  
-       if (!noreap)
+       if (!unlikely(options & WNOWAIT))
                 p->exit_code = 0;
  
         uid = p->uid;
@@ -1439,10 +1469,10 @@ unlock_sig:
          */
         get_task_struct(p);
         pid = task_pid_vnr(p);
-       why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
+       why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
         read_unlock(&tasklist_lock);
  
-       if (unlikely(noreap))
+       if (unlikely(options & WNOWAIT))
                 return wait_noreap_copyout(p, pid, uid,
                                            why, exit_code,
                                            infop, ru);
@@ -1476,7 +1506,7 @@ unlock_sig:
   * the lock and this task is uninteresting.  If we return nonzero, we have
   * released the lock and the system call should return.
   */
-static int wait_task_continued(struct task_struct *p, int noreap,
+static int wait_task_continued(struct task_struct *p, int options,
                                struct siginfo __user *infop,
                                int __user *stat_addr, struct rusage __user *ru)
  {
@@ -1484,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap,
         pid_t pid;
         uid_t uid;
  
+       if (!unlikely(options & WCONTINUED))
+               return 0;
+
         if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
                 return 0;
  
@@ -1493,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
                 spin_unlock_irq(&p->sighand->siglock);
                 return 0;
         }
-       if (!noreap)
+       if (!unlikely(options & WNOWAIT))
                 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
         spin_unlock_irq(&p->sighand->siglock);
  
@@ -1519,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap,
         return retval;
  }
  
+/*
+ * Consider @p for a wait by @parent.
+ *
+ * -ECHILD should be in *@notask_error before the first call.
+ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns zero if the search for a child should continue;
+ * then *@notask_error is 0 if @p is an eligible child,
+ * or another error from security_task_wait(), or still -ECHILD.
+ */
+static int wait_consider_task(struct task_struct *parent, int ptrace,
+                             struct task_struct *p, int *notask_error,
+                             enum pid_type type, struct pid *pid, int options,
+                             struct siginfo __user *infop,
+                             int __user *stat_addr, struct rusage __user *ru)
+{
+       int ret = eligible_child(type, pid, options, p);
+       if (!ret)
+               return ret;
+
+       if (unlikely(ret < 0)) {
+               /*
+                * If we have not yet seen any eligible child,
+                * then let this error code replace -ECHILD.
+                * A permission error will give the user a clue
+                * to look for security policy problems, rather
+                * than for mysterious wait bugs.
+                */
+               if (*notask_error)
+                       *notask_error = ret;
+       }
+
+       if (likely(!ptrace) && unlikely(p->ptrace)) {
+               /*
+                * This child is hidden by ptrace.
+                * We aren't allowed to see it now, but eventually we will.
+                */
+               *notask_error = 0;
+               return 0;
+       }
+
+       if (p->exit_state == EXIT_DEAD)
+               return 0;
+
+       /*
+        * We don't reap group leaders with subthreads.
+        */
+       if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
+               return wait_task_zombie(p, options, infop, stat_addr, ru);
+
+       /*
+        * It's stopped or running now, so it might
+        * later continue, exit, or stop again.
+        */
+       *notask_error = 0;
+
+       if (task_is_stopped_or_traced(p))
+               return wait_task_stopped(ptrace, p, options,
+                                        infop, stat_addr, ru);
+
+       return wait_task_continued(p, options, infop, stat_addr, ru);
+}
+
+/*
+ * Do the work of do_wait() for one thread in the group, @tsk.
+ *
+ * -ECHILD should be in *@notask_error before the first call.
+ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns zero if the search for a child should continue; then
+ * *@notask_error is 0 if there were any eligible children,
+ * or another error from security_task_wait(), or still -ECHILD.
+ */
+static int do_wait_thread(struct task_struct *tsk, int *notask_error,
+                         enum pid_type type, struct pid *pid, int options,
+                         struct siginfo __user *infop, int __user *stat_addr,
+                         struct rusage __user *ru)
+{
+       struct task_struct *p;
+
+       list_for_each_entry(p, &tsk->children, sibling) {
+               /*
+                * Do not consider detached threads.
+                */
+               if (!task_detached(p)) {
+                       int ret = wait_consider_task(tsk, 0, p, notask_error,
+                                                    type, pid, options,
+                                                    infop, stat_addr, ru);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
+                         enum pid_type type, struct pid *pid, int options,
+                         struct siginfo __user *infop, int __user *stat_addr,
+                         struct rusage __user *ru)
+{
+       struct task_struct *p;
+
+       /*
+        * Traditionally we see ptrace'd stopped tasks regardless of options.
+        */
+       options |= WUNTRACED;
+
+       list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
+               int ret = wait_consider_task(tsk, 1, p, notask_error,
+                                            type, pid, options,
+                                            infop, stat_addr, ru);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
  static long do_wait(enum pid_type type, struct pid *pid, int options,
                     struct siginfo __user *infop, int __user *stat_addr,
                     struct rusage __user *ru)
  {
         DECLARE_WAITQUEUE(wait, current);
         struct task_struct *tsk;
-       int flag, retval;
+       int retval;
  
         add_wait_queue(&current->signal->wait_chldexit,&wait);
  repeat:
-       /* If there is nothing that can match our critier just get out */
+       /*
+        * If there is nothing that can match our critiera just get out.
+        * We will clear @retval to zero if we see any child that might later
+        * match our criteria, even if we are not able to reap it yet.
+        */
         retval = -ECHILD;
         if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
                 goto end;
  
-       /*
-        * We will set this flag if we see any child that might later
-        * match our criteria, even if we are not able to reap it yet.
-        */
-       flag = retval = 0;
         current->state = TASK_INTERRUPTIBLE;
         read_lock(&tasklist_lock);
         tsk = current;
         do {
-               struct task_struct *p;
-
-               list_for_each_entry(p, &tsk->children, sibling) {
-                       int ret = eligible_child(type, pid, options, p);
-                       if (!ret)
-                               continue;
-
-                       if (unlikely(ret < 0)) {
-                               retval = ret;
-                       } else if (task_is_stopped_or_traced(p)) {
-                               /*
-                                * It's stopped now, so it might later
-                                * continue, exit, or stop again.
-                                */
-                               flag = 1;
-                               if (!(p->ptrace & PT_PTRACED) &&
-                                   !(options & WUNTRACED))
-                                       continue;
-
-                               retval = wait_task_stopped(p,
-                                               (options & WNOWAIT), infop,
-                                               stat_addr, ru);
-                       } else if (p->exit_state == EXIT_ZOMBIE &&
-                                       !delay_group_leader(p)) {
-                               /*
-                                * We don't reap group leaders with subthreads.
-                                */
-                               if (!likely(options & WEXITED))
-                                       continue;
-                               retval = wait_task_zombie(p,
-                                               (options & WNOWAIT), infop,
-                                               stat_addr, ru);
-                       } else if (p->exit_state != EXIT_DEAD) {
-                               /*
-                                * It's running now, so it might later
-                                * exit, stop, or stop and then continue.
-                                */
-                               flag = 1;
-                               if (!unlikely(options & WCONTINUED))
-                                       continue;
-                               retval = wait_task_continued(p,
-                                               (options & WNOWAIT), infop,
-                                               stat_addr, ru);
-                       }
-                       if (retval != 0) /* tasklist_lock released */
-                               goto end;
-               }
-               if (!flag) {
-                       list_for_each_entry(p, &tsk->ptrace_children,
-                                                               ptrace_list) {
-                               flag = eligible_child(type, pid, options, p);
-                               if (!flag)
-                                       continue;
-                               if (likely(flag > 0))
-                                       break;
-                               retval = flag;
-                               goto end;
-                       }
+               int tsk_result = do_wait_thread(tsk, &retval,
+                                               type, pid, options,
+                                               infop, stat_addr, ru);
+               if (!tsk_result)
+                       tsk_result = ptrace_do_wait(tsk, &retval,
+                                                   type, pid, options,
+                                                   infop, stat_addr, ru);
+               if (tsk_result) {
+                       /*
+                        * tasklist_lock is unlocked and we have a final result.
+                        */
+                       retval = tsk_result;
+                       goto end;
                 }
+
                 if (options & __WNOTHREAD)
                         break;
                 tsk = next_thread(tsk);
@@ -1609,16 +1714,14 @@ repeat:
         } while (tsk != current);
         read_unlock(&tasklist_lock);
  
-       if (flag) {
-               if (options & WNOHANG)
-                       goto end;
+       if (!retval && !(options & WNOHANG)) {
                 retval = -ERESTARTSYS;
-               if (signal_pending(current))
-                       goto end;
-               schedule();
-               goto repeat;
+               if (!signal_pending(current)) {
+                       schedule();
+                       goto repeat;
+               }
         }
-       retval = -ECHILD;
+
  end:
         current->state = TASK_RUNNING;
         remove_wait_queue(&current->signal->wait_chldexit,&wait);
diff --git a/kernel/fork.c b/kernel/fork.c

index 4bd2f516401f5409c76772a314e014c1357c47f7..adefc1131f274082960de7f375a67d4949cdfe27 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
          */
         p->group_leader = p;
         INIT_LIST_HEAD(&p->thread_group);
-       INIT_LIST_HEAD(&p->ptrace_children);
-       INIT_LIST_HEAD(&p->ptrace_list);
+       INIT_LIST_HEAD(&p->ptrace_entry);
+       INIT_LIST_HEAD(&p->ptraced);
  
         /* Now that the task is set up, run cgroup callbacks if
          * necessary. We need to run them before the task is visible
@@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         }
  
         if (likely(p->pid)) {
-               add_parent(p);
+               list_add_tail(&p->sibling, &p->real_parent->children);
                 if (unlikely(p->ptrace & PT_PTRACED))
                         __ptrace_link(p, current->parent);
  
diff --git a/kernel/kthread.c b/kernel/kthread.c

index 97747cdd37c98034f25e8f7dcafecdb328367e50..ac3fb73266412eaaaad2a904cb5bbe7917a159b7 100644 (file)
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -235,7 +235,7 @@ int kthreadd(void *unused)
         set_user_nice(tsk, KTHREAD_NICE_LEVEL);
         set_cpus_allowed(tsk, CPU_MASK_ALL);
  
-       current->flags |= PF_NOFREEZE;
+       current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
  
         for (;;) {
                 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c

index 14a656cdc6523bf64d9c5b64c3a628529b56bab5..f011e0870b52b316032295bc9b6ffcc45a4358ea 100644 (file)
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -179,6 +179,17 @@ static void platform_restore_cleanup(int platform_mode)
                 hibernation_ops->restore_cleanup();
  }
  
+/**
+ *     platform_recover - recover the platform from a failure to suspend
+ *     devices.
+ */
+
+static void platform_recover(int platform_mode)
+{
+       if (platform_mode && hibernation_ops && hibernation_ops->recover)
+               hibernation_ops->recover();
+}
+
  /**
   *     create_image - freeze devices that need to be frozen with interrupts
   *     off, create the hibernation image and thaw those devices.  Control
@@ -193,6 +204,7 @@ static int create_image(int platform_mode)
         if (error)
                 return error;
  
+       device_pm_lock();
         local_irq_disable();
         /* At this point, device_suspend() has been called, but *not*
          * device_power_down(). We *must* call device_power_down() now.
@@ -224,9 +236,11 @@ static int create_image(int platform_mode)
         /* NOTE:  device_power_up() is just a resume() for devices
          * that suspended with irqs off ... no overall powerup.
          */
-       device_power_up();
+       device_power_up(in_suspend ?
+               (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
   Enable_irqs:
         local_irq_enable();
+       device_pm_unlock();
         return error;
  }
  
@@ -255,10 +269,10 @@ int hibernation_snapshot(int platform_mode)
         suspend_console();
         error = device_suspend(PMSG_FREEZE);
         if (error)
-               goto Resume_console;
+               goto Recover_platform;
  
         if (hibernation_test(TEST_DEVICES))
-               goto Resume_devices;
+               goto Recover_platform;
  
         error = platform_pre_snapshot(platform_mode);
         if (error || hibernation_test(TEST_PLATFORM))
@@ -280,12 +294,16 @@ int hibernation_snapshot(int platform_mode)
   Finish:
         platform_finish(platform_mode);
   Resume_devices:
-       device_resume();
- Resume_console:
+       device_resume(in_suspend ?
+               (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
         resume_console();
   Close:
         platform_end(platform_mode);
         return error;
+
+ Recover_platform:
+       platform_recover(platform_mode);
+       goto Resume_devices;
  }
  
  /**
@@ -300,8 +318,9 @@ static int resume_target_kernel(void)
  {
         int error;
  
+       device_pm_lock();
         local_irq_disable();
-       error = device_power_down(PMSG_PRETHAW);
+       error = device_power_down(PMSG_QUIESCE);
         if (error) {
                 printk(KERN_ERR "PM: Some devices failed to power down, "
                         "aborting resume\n");
@@ -329,9 +348,10 @@ static int resume_target_kernel(void)
         swsusp_free();
         restore_processor_state();
         touch_softlockup_watchdog();
-       device_power_up();
+       device_power_up(PMSG_RECOVER);
   Enable_irqs:
         local_irq_enable();
+       device_pm_unlock();
         return error;
  }
  
@@ -350,7 +370,7 @@ int hibernation_restore(int platform_mode)
  
         pm_prepare_console();
         suspend_console();
-       error = device_suspend(PMSG_PRETHAW);
+       error = device_suspend(PMSG_QUIESCE);
         if (error)
                 goto Finish;
  
@@ -362,7 +382,7 @@ int hibernation_restore(int platform_mode)
                 enable_nonboot_cpus();
         }
         platform_restore_cleanup(platform_mode);
-       device_resume();
+       device_resume(PMSG_RECOVER);
   Finish:
         resume_console();
         pm_restore_console();
@@ -392,8 +412,11 @@ int hibernation_platform_enter(void)
  
         suspend_console();
         error = device_suspend(PMSG_HIBERNATE);
-       if (error)
-               goto Resume_console;
+       if (error) {
+               if (hibernation_ops->recover)
+                       hibernation_ops->recover();
+               goto Resume_devices;
+       }
  
         error = hibernation_ops->prepare();
         if (error)
@@ -403,6 +426,7 @@ int hibernation_platform_enter(void)
         if (error)
                 goto Finish;
  
+       device_pm_lock();
         local_irq_disable();
         error = device_power_down(PMSG_HIBERNATE);
         if (!error) {
@@ -411,6 +435,7 @@ int hibernation_platform_enter(void)
                 while (1);
         }
         local_irq_enable();
+       device_pm_unlock();
  
         /*
          * We don't need to reenable the nonboot CPUs or resume consoles, since
@@ -419,8 +444,7 @@ int hibernation_platform_enter(void)
   Finish:
         hibernation_ops->finish();
   Resume_devices:
-       device_resume();
- Resume_console:
+       device_resume(PMSG_RESTORE);
         resume_console();
   Close:
         hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c

index 6a6d5eb3524e7d2f219aeaa894e70981b3a706fb..3398f4651aa150b370a6eeef95305ca56ff39a0b 100644 (file)
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state)
  {
         int error = 0;
  
+       device_pm_lock();
         arch_suspend_disable_irqs();
         BUG_ON(!irqs_disabled());
  
@@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state)
         if (!suspend_test(TEST_CORE))
                 error = suspend_ops->enter(state);
  
-       device_power_up();
+       device_power_up(PMSG_RESUME);
   Done:
         arch_suspend_enable_irqs();
         BUG_ON(irqs_disabled());
+       device_pm_unlock();
         return error;
  }
  
@@ -267,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state)
         error = device_suspend(PMSG_SUSPEND);
         if (error) {
                 printk(KERN_ERR "PM: Some devices failed to suspend\n");
-               goto Resume_console;
+               goto Recover_platform;
         }
  
         if (suspend_test(TEST_DEVICES))
-               goto Resume_devices;
+               goto Recover_platform;
  
         if (suspend_ops->prepare) {
                 error = suspend_ops->prepare();
@@ -291,13 +293,17 @@ int suspend_devices_and_enter(suspend_state_t state)
         if (suspend_ops->finish)
                 suspend_ops->finish();
   Resume_devices:
-       device_resume();
- Resume_console:
+       device_resume(PMSG_RESUME);
         resume_console();
   Close:
         if (suspend_ops->end)
                 suspend_ops->end();
         return error;
+
+ Recover_platform:
+       if (suspend_ops->recover)
+               suspend_ops->recover();
+       goto Resume_devices;
  }
  
  /**
diff --git a/kernel/power/process.c b/kernel/power/process.c

index f1d0b345c9ba86a24ac48ee29cc4cb6f800b1c94..5fb87652f2149dd3a34f94e61e9a961303611e59 100644 (file)
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -19,9 +19,6 @@
   */
  #define TIMEOUT        (20 * HZ)
  
-#define FREEZER_KERNEL_THREADS 0
-#define FREEZER_USER_SPACE 1
-
  static inline int freezeable(struct task_struct * p)
  {
         if ((p == current) ||
@@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p)
         spin_unlock_irqrestore(&p->sighand->siglock, flags);
  }
  
-static int has_mm(struct task_struct *p)
+static inline bool should_send_signal(struct task_struct *p)
  {
-       return (p->mm && !(p->flags & PF_BORROWED_MM));
+       return !(p->flags & PF_FREEZER_NOSIG);
  }
  
  /**
   *     freeze_task - send a freeze request to given task
   *     @p: task to send the request to
- *     @with_mm_only: if set, the request will only be sent if the task has its
- *             own mm
- *     Return value: 0, if @with_mm_only is set and the task has no mm of its
- *             own or the task is frozen, 1, otherwise
+ *     @sig_only: if set, the request will only be sent if the task has the
+ *             PF_FREEZER_NOSIG flag unset
+ *     Return value: 'false', if @sig_only is set and the task has
+ *             PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
   *
- *     The freeze request is sent by seting the tasks's TIF_FREEZE flag and
+ *     The freeze request is sent by setting the tasks's TIF_FREEZE flag and
   *     either sending a fake signal to it or waking it up, depending on whether
- *     or not it has its own mm (ie. it is a user land task).  If @with_mm_only
- *     is set and the task has no mm of its own (ie. it is a kernel thread),
- *     its TIF_FREEZE flag should not be set.
- *
- *     The task_lock() is necessary to prevent races with exit_mm() or
- *     use_mm()/unuse_mm() from occuring.
+ *     or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
+ *     has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ *     TIF_FREEZE flag will not be set.
   */
-static int freeze_task(struct task_struct *p, int with_mm_only)
+static bool freeze_task(struct task_struct *p, bool sig_only)
  {
-       int ret = 1;
+       /*
+        * We first check if the task is freezing and next if it has already
+        * been frozen to avoid the race with frozen_process() which first marks
+        * the task as frozen and next clears its TIF_FREEZE.
+        */
+       if (!freezing(p)) {
+               rmb();
+               if (frozen(p))
+                       return false;
  
-       task_lock(p);
-       if (freezing(p)) {
-               if (has_mm(p)) {
-                       if (!signal_pending(p))
-                               fake_signal_wake_up(p);
-               } else {
-                       if (with_mm_only)
-                               ret = 0;
-                       else
-                               wake_up_state(p, TASK_INTERRUPTIBLE);
-               }
+               if (!sig_only || should_send_signal(p))
+                       set_freeze_flag(p);
+               else
+                       return false;
+       }
+
+       if (should_send_signal(p)) {
+               if (!signal_pending(p))
+                       fake_signal_wake_up(p);
+       } else if (sig_only) {
+               return false;
         } else {
-               rmb();
-               if (frozen(p)) {
-                       ret = 0;
-               } else {
-                       if (has_mm(p)) {
-                               set_freeze_flag(p);
-                               fake_signal_wake_up(p);
-                       } else {
-                               if (with_mm_only) {
-                                       ret = 0;
-                               } else {
-                                       set_freeze_flag(p);
-                                       wake_up_state(p, TASK_INTERRUPTIBLE);
-                               }
-                       }
-               }
+               wake_up_state(p, TASK_INTERRUPTIBLE);
         }
-       task_unlock(p);
-       return ret;
+
+       return true;
  }
  
  static void cancel_freezing(struct task_struct *p)
@@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p)
         }
  }
  
-static int try_to_freeze_tasks(int freeze_user_space)
+static int try_to_freeze_tasks(bool sig_only)
  {
         struct task_struct *g, *p;
         unsigned long end_time;
@@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space)
                         if (frozen(p) || !freezeable(p))
                                 continue;
  
-                       if (!freeze_task(p, freeze_user_space))
+                       if (!freeze_task(p, sig_only))
                                 continue;
  
                         /*
@@ -235,13 +222,13 @@ int freeze_processes(void)
         int error;
  
         printk("Freezing user space processes ... ");
-       error = try_to_freeze_tasks(FREEZER_USER_SPACE);
+       error = try_to_freeze_tasks(true);
         if (error)
                 goto Exit;
         printk("done.\n");
  
         printk("Freezing remaining freezable tasks ... ");
-       error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
+       error = try_to_freeze_tasks(false);
         if (error)
                 goto Exit;
         printk("done.");
@@ -251,7 +238,7 @@ int freeze_processes(void)
         return error;
  }
  
-static void thaw_tasks(int thaw_user_space)
+static void thaw_tasks(bool nosig_only)
  {
         struct task_struct *g, *p;
  
@@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space)
                 if (!freezeable(p))
                         continue;
  
-               if (!p->mm == thaw_user_space)
+               if (nosig_only && should_send_signal(p))
                         continue;
  
                 thaw_process(p);
@@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space)
  void thaw_processes(void)
  {
         printk("Restarting tasks ... ");
-       thaw_tasks(FREEZER_KERNEL_THREADS);
-       thaw_tasks(FREEZER_USER_SPACE);
+       thaw_tasks(true);
+       thaw_tasks(false);
         schedule();
         printk("done.\n");
  }
diff --git a/kernel/power/user.c b/kernel/power/user.c

index f5512cb3aa86f61e98caec04c82a01a0dc9f6dcf..a6332a3132620b4563e3be8a0fbfb9639b1e5405 100644 (file)
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,6 +23,7 @@
  #include <linux/console.h>
  #include <linux/cpu.h>
  #include <linux/freezer.h>
+#include <linux/smp_lock.h>
  
  #include <asm/uaccess.h>
  
@@ -69,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp)
         struct snapshot_data *data;
         int error;
  
-       if (!atomic_add_unless(&snapshot_device_available, -1, 0))
-               return -EBUSY;
+       mutex_lock(&pm_mutex);
+
+       if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+               error = -EBUSY;
+               goto Unlock;
+       }
  
         if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
                 atomic_inc(&snapshot_device_available);
-               return -ENOSYS;
+               error = -ENOSYS;
+               goto Unlock;
         }
         if(create_basic_memory_bitmaps()) {
                 atomic_inc(&snapshot_device_available);
-               return -ENOMEM;
+               error = -ENOMEM;
+               goto Unlock;
         }
         nonseekable_open(inode, filp);
         data = &snapshot_state;
@@ -98,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                 if (error)
                         pm_notifier_call_chain(PM_POST_HIBERNATION);
         }
-       if (error) {
+       if (error)
                 atomic_inc(&snapshot_device_available);
-               return error;
-       }
         data->frozen = 0;
         data->ready = 0;
         data->platform_support = 0;
  
-       return 0;
+ Unlock:
+       mutex_unlock(&pm_mutex);
+
+       return error;
  }
  
  static int snapshot_release(struct inode *inode, struct file *filp)
  {
         struct snapshot_data *data;
  
+       mutex_lock(&pm_mutex);
+
         swsusp_free();
         free_basic_memory_bitmaps();
         data = filp->private_data;
         free_all_swap_pages(data->swap);
-       if (data->frozen) {
-               mutex_lock(&pm_mutex);
+       if (data->frozen)
                 thaw_processes();
-               mutex_unlock(&pm_mutex);
-       }
         pm_notifier_call_chain(data->mode == O_WRONLY ?
                         PM_POST_HIBERNATION : PM_POST_RESTORE);
         atomic_inc(&snapshot_device_available);
+
+       mutex_unlock(&pm_mutex);
+
         return 0;
  }
  
@@ -134,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
         struct snapshot_data *data;
         ssize_t res;
  
+       mutex_lock(&pm_mutex);
+
         data = filp->private_data;
-       if (!data->ready)
-               return -ENODATA;
+       if (!data->ready) {
+               res = -ENODATA;
+               goto Unlock;
+       }
         res = snapshot_read_next(&data->handle, count);
         if (res > 0) {
                 if (copy_to_user(buf, data_of(data->handle), res))
@@ -144,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
                 else
                         *offp = data->handle.offset;
         }
+
+ Unlock:
+       mutex_unlock(&pm_mutex);
+
         return res;
  }
  
@@ -153,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
         struct snapshot_data *data;
         ssize_t res;
  
+       mutex_lock(&pm_mutex);
+
         data = filp->private_data;
         res = snapshot_write_next(&data->handle, count);
         if (res > 0) {
@@ -161,11 +181,14 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
                 else
                         *offp = data->handle.offset;
         }
+
+       mutex_unlock(&pm_mutex);
+
         return res;
  }
  
-static int snapshot_ioctl(struct inode *inode, struct file *filp,
-                          unsigned int cmd, unsigned long arg)
+static long snapshot_ioctl(struct file *filp, unsigned int cmd,
+                                                       unsigned long arg)
  {
         int error = 0;
         struct snapshot_data *data;
@@ -179,6 +202,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
+       if (!mutex_trylock(&pm_mutex))
+               return -EBUSY;
+
         data = filp->private_data;
  
         switch (cmd) {
@@ -186,7 +212,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
         case SNAPSHOT_FREEZE:
                 if (data->frozen)
                         break;
-               mutex_lock(&pm_mutex);
                 printk("Syncing filesystems ... ");
                 sys_sync();
                 printk("done.\n");
@@ -194,7 +219,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
                 error = freeze_processes();
                 if (error)
                         thaw_processes();
-               mutex_unlock(&pm_mutex);
                 if (!error)
                         data->frozen = 1;
                 break;
@@ -202,9 +226,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
         case SNAPSHOT_UNFREEZE:
                 if (!data->frozen || data->ready)
                         break;
-               mutex_lock(&pm_mutex);
                 thaw_processes();
-               mutex_unlock(&pm_mutex);
                 data->frozen = 0;
                 break;
  
@@ -307,16 +329,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
                         error = -EPERM;
                         break;
                 }
-               if (!mutex_trylock(&pm_mutex)) {
-                       error = -EBUSY;
-                       break;
-               }
                 /*
                  * Tasks are frozen and the notifiers have been called with
                  * PM_HIBERNATION_PREPARE
                  */
                 error = suspend_devices_and_enter(PM_SUSPEND_MEM);
-               mutex_unlock(&pm_mutex);
                 break;
  
         case SNAPSHOT_PLATFORM_SUPPORT:
@@ -390,6 +407,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
  
         }
  
+       mutex_unlock(&pm_mutex);
+
         return error;
  }
  
@@ -399,7 +418,7 @@ static const struct file_operations snapshot_fops = {
         .read = snapshot_read,
         .write = snapshot_write,
         .llseek = no_llseek,
-       .ioctl = snapshot_ioctl,
+       .unlocked_ioctl = snapshot_ioctl,
  };
  
  static struct miscdevice snapshot_device = {
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index e337390fce011dc0ea1d5eba1cf678553fb93b71..8392a9da64504054bf804e5809f902bc296f676f 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -33,13 +33,9 @@
   */
  void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  {
-       BUG_ON(!list_empty(&child->ptrace_list));
-       if (child->parent == new_parent)
-               return;
-       list_add(&child->ptrace_list, &child->parent->ptrace_children);
-       remove_parent(child);
+       BUG_ON(!list_empty(&child->ptrace_entry));
+       list_add(&child->ptrace_entry, &new_parent->ptraced);
         child->parent = new_parent;
-       add_parent(child);
  }
   
  /*
@@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child)
         BUG_ON(!child->ptrace);
  
         child->ptrace = 0;
-       if (ptrace_reparented(child)) {
-               list_del_init(&child->ptrace_list);
-               remove_parent(child);
-               child->parent = child->real_parent;
-               add_parent(child);
-       }
+       child->parent = child->real_parent;
+       list_del_init(&child->ptrace_entry);
  
         if (task_is_traced(child))
                 ptrace_untrace(child);
@@ -492,15 +484,34 @@ int ptrace_traceme(void)
         /*
          * Are we already being traced?
          */
+repeat:
         task_lock(current);
         if (!(current->ptrace & PT_PTRACED)) {
+               /*
+                * See ptrace_attach() comments about the locking here.
+                */
+               unsigned long flags;
+               if (!write_trylock_irqsave(&tasklist_lock, flags)) {
+                       task_unlock(current);
+                       do {
+                               cpu_relax();
+                       } while (!write_can_lock(&tasklist_lock));
+                       goto repeat;
+               }
+
                 ret = security_ptrace(current->parent, current,
                                       PTRACE_MODE_ATTACH);
+
                 /*
                  * Set the ptrace bit in the process ptrace flags.
+                * Then link us on our parent's ptraced list.
                  */
-               if (!ret)
+               if (!ret) {
                         current->ptrace |= PT_PTRACED;
+                       __ptrace_link(current, current->real_parent);
+               }
+
+               write_unlock_irqrestore(&tasklist_lock, flags);
         }
         task_unlock(current);
         return ret;
diff --git a/lib/Makefile b/lib/Makefile

index 2c62a9c06fbe5adf638ec304dda3d13546fdb036..818c4d4555188b509e015c198acb191e2f7a15b4 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,21 +2,17 @@
  # Makefile for some libs needed in the kernel.
  #
  
+ifdef CONFIG_FTRACE
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+endif
+
  lib-y := ctype.o string.o vsprintf.o cmdline.o \
          rbtree.o radix-tree.o dump_stack.o \
          idr.o int_sqrt.o extable.o prio_tree.o \
          sha1.o irq_regs.o reciprocal_div.o argv_split.o \
          proportions.o prio_heap.o ratelimit.o
  
-ifdef CONFIG_FTRACE
-# Do not profile string.o, since it may be used in early boot or vdso
-CFLAGS_REMOVE_string.o = -pg
-# Also do not profile any debug utilities
-CFLAGS_REMOVE_spinlock_debug.o = -pg
-CFLAGS_REMOVE_list_debug.o = -pg
-CFLAGS_REMOVE_debugobjects.o = -pg
-endif
-
  lib-$(CONFIG_MMU) += ioremap.o
  lib-$(CONFIG_SMP) += cpumask.o
  
diff --git a/lib/kobject.c b/lib/kobject.c

index 718e5101c263596224f7f35b523cea27e093ecd6..dcade0543bd2a2395ede8f91bf33438348e54f7e 100644 (file)
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -439,6 +439,7 @@ out:
  
         return error;
  }
+EXPORT_SYMBOL_GPL(kobject_rename);
  
  /**
   * kobject_move - move object to another parent
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c

index cc12d5f5d5da57e61db43d1d11bf6e2498432970..834a83199bdf5ec07d778b6346191fd2fb7ccf22 100644 (file)
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -63,22 +63,11 @@ static const struct rpc_credops gss_nullops;
  # define RPCDBG_FACILITY       RPCDBG_AUTH
  #endif
  
-#define NFS_NGROUPS    16
-
-#define GSS_CRED_SLACK         1024            /* XXX: unused */
+#define GSS_CRED_SLACK         1024
  /* length of a krb5 verifier (48), plus data added before arguments when
   * using integrity (two 4-byte integers): */
  #define GSS_VERF_SLACK         100
  
-/* XXX this define must match the gssd define
-* as it is passed to gssd to signal the use of
-* machine creds should be part of the shared rpc interface */
-
-#define CA_RUN_AS_MACHINE  0x00000200
-
-/* dump the buffer in `emacs-hexl' style */
-#define isprint(c)      ((c > 0x1f) && (c < 0x7f))
-
  struct gss_auth {
         struct kref kref;
         struct rpc_auth rpc_auth;
@@ -146,7 +135,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
         q = (const void *)((const char *)p + len);
         if (unlikely(q > end || q < p))
                 return ERR_PTR(-EFAULT);
-       dest->data = kmemdup(p, len, GFP_KERNEL);
+       dest->data = kmemdup(p, len, GFP_NOFS);
         if (unlikely(dest->data == NULL))
                 return ERR_PTR(-ENOMEM);
         dest->len = len;
@@ -171,7 +160,7 @@ gss_alloc_context(void)
  {
         struct gss_cl_ctx *ctx;
  
-       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
         if (ctx != NULL) {
                 ctx->gc_proc = RPC_GSS_PROC_DATA;
                 ctx->gc_seq = 1;        /* NetApp 6.4R1 doesn't accept seq. no. 0 */
@@ -272,7 +261,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
         return NULL;
  }
  
-/* Try to add a upcall to the pipefs queue.
+/* Try to add an upcall to the pipefs queue.
   * If an upcall owned by our uid already exists, then we return a reference
   * to that upcall instead of adding the new upcall.
   */
@@ -341,7 +330,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
  {
         struct gss_upcall_msg *gss_msg;
  
-       gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL);
+       gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
         if (gss_msg != NULL) {
                 INIT_LIST_HEAD(&gss_msg->list);
                 rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
@@ -493,7 +482,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
  {
         const void *p, *end;
         void *buf;
-       struct rpc_clnt *clnt;
         struct gss_upcall_msg *gss_msg;
         struct inode *inode = filp->f_path.dentry->d_inode;
         struct gss_cl_ctx *ctx;
@@ -503,11 +491,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         if (mlen > MSG_BUF_MAXSIZE)
                 goto out;
         err = -ENOMEM;
-       buf = kmalloc(mlen, GFP_KERNEL);
+       buf = kmalloc(mlen, GFP_NOFS);
         if (!buf)
                 goto out;
  
-       clnt = RPC_I(inode)->private;
         err = -EFAULT;
         if (copy_from_user(buf, src, mlen))
                 goto err;
@@ -806,7 +793,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
         dprintk("RPC:       gss_create_cred for uid %d, flavor %d\n",
                 acred->uid, auth->au_flavor);
  
-       if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
+       if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
                 goto out_err;
  
         rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c

index 60c3dba545d7c8287cc17a00251e7fe43dad9e27..ef45eba22485cf355c8fdd17382943d7ff7ec602 100644 (file)
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
         q = (const void *)((const char *)p + len);
         if (unlikely(q > end || q < p))
                 return ERR_PTR(-EFAULT);
-       res->data = kmemdup(p, len, GFP_KERNEL);
+       res->data = kmemdup(p, len, GFP_NOFS);
         if (unlikely(res->data == NULL))
                 return ERR_PTR(-ENOMEM);
         res->len = len;
@@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p,
         struct  krb5_ctx *ctx;
         int tmp;
  
-       if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
+       if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
                 goto out_err;
  
         p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c

index 5deb4b6e451404acc126eef5b9e2e855cef1fcb0..035e1dd6af1b22f46a040df1386ebef83109bb34 100644 (file)
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
         q = (const void *)((const char *)p + len);
         if (unlikely(q > end || q < p))
                 return ERR_PTR(-EFAULT);
-       res->data = kmemdup(p, len, GFP_KERNEL);
+       res->data = kmemdup(p, len, GFP_NOFS);
         if (unlikely(res->data == NULL))
                 return ERR_PTR(-ENOMEM);
         return q;
@@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
         struct  spkm3_ctx *ctx;
         int     version;
  
-       if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
+       if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
                 goto out_err;
  
         p = simple_get_bytes(p, end, &version, sizeof(version));
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c

index 6cdd241ad267f7631f8a349aece2f91686fc230e..3308157436d2931dcef73de989a7ed85ae8824da 100644 (file)
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
  int
  decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
  {
-       if (!(out->data = kzalloc(explen,GFP_KERNEL)))
+       if (!(out->data = kzalloc(explen,GFP_NOFS)))
                 return 0;
         out->len = explen;
         memcpy(out->data, in, enclen);
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c

index 44920b90bdc45a438faca8b2aa8d5cfb388a26c7..46b2647c5bd28bb253394e96c39823d401d6d473 100644 (file)
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
         dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",
                         acred->uid, acred->gid);
  
-       if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+       if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
                 return ERR_PTR(-ENOMEM);
  
         rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c

index 8945307556ec32c1a02da6f505748cb5f813b490..76739e928d0da5ff99b748ac61538e2bfc03fbf4 100644 (file)
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -25,6 +25,7 @@
  
  #include <linux/module.h>
  #include <linux/types.h>
+#include <linux/kallsyms.h>
  #include <linux/mm.h>
  #include <linux/slab.h>
  #include <linux/smp_lock.h>
@@ -58,7 +59,6 @@ static void   call_start(struct rpc_task *task);
  static void    call_reserve(struct rpc_task *task);
  static void    call_reserveresult(struct rpc_task *task);
  static void    call_allocate(struct rpc_task *task);
-static void    call_encode(struct rpc_task *task);
  static void    call_decode(struct rpc_task *task);
  static void    call_bind(struct rpc_task *task);
  static void    call_bind_status(struct rpc_task *task);
@@ -70,9 +70,9 @@ static void   call_refreshresult(struct rpc_task *task);
  static void    call_timeout(struct rpc_task *task);
  static void    call_connect(struct rpc_task *task);
  static void    call_connect_status(struct rpc_task *task);
-static __be32 *        call_header(struct rpc_task *task);
-static __be32 *        call_verify(struct rpc_task *task);
  
+static __be32  *rpc_encode_header(struct rpc_task *task);
+static __be32  *rpc_verify_header(struct rpc_task *task);
  static int     rpc_ping(struct rpc_clnt *clnt, int flags);
  
  static void rpc_register_client(struct rpc_clnt *clnt)
@@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
                 clnt->cl_autobind = 1;
         if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
                 clnt->cl_discrtry = 1;
+       if (!(args->flags & RPC_CLNT_CREATE_QUIET))
+               clnt->cl_chatty = 1;
  
         return clnt;
  }
@@ -690,6 +692,21 @@ rpc_restart_call(struct rpc_task *task)
  }
  EXPORT_SYMBOL_GPL(rpc_restart_call);
  
+#ifdef RPC_DEBUG
+static const char *rpc_proc_name(const struct rpc_task *task)
+{
+       const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+
+       if (proc) {
+               if (proc->p_name)
+                       return proc->p_name;
+               else
+                       return "NULL";
+       } else
+               return "no proc";
+}
+#endif
+
  /*
   * 0.  Initial state
   *
@@ -701,9 +718,9 @@ call_start(struct rpc_task *task)
  {
         struct rpc_clnt *clnt = task->tk_client;
  
-       dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid,
+       dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
                         clnt->cl_protname, clnt->cl_vers,
-                       task->tk_msg.rpc_proc->p_proc,
+                       rpc_proc_name(task),
                         (RPC_IS_ASYNC(task) ? "async" : "sync"));
  
         /* Increment call count */
@@ -861,7 +878,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
   * 3.  Encode arguments of an RPC call
   */
  static void
-call_encode(struct rpc_task *task)
+rpc_xdr_encode(struct rpc_task *task)
  {
         struct rpc_rqst *req = task->tk_rqstp;
         kxdrproc_t      encode;
@@ -876,23 +893,19 @@ call_encode(struct rpc_task *task)
                          (char *)req->rq_buffer + req->rq_callsize,
                          req->rq_rcvsize);
  
-       /* Encode header and provided arguments */
-       encode = task->tk_msg.rpc_proc->p_encode;
-       if (!(p = call_header(task))) {
-               printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
+       p = rpc_encode_header(task);
+       if (p == NULL) {
+               printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
                 rpc_exit(task, -EIO);
                 return;
         }
+
+       encode = task->tk_msg.rpc_proc->p_encode;
         if (encode == NULL)
                 return;
  
         task->tk_status = rpcauth_wrap_req(task, encode, req, p,
                         task->tk_msg.rpc_argp);
-       if (task->tk_status == -ENOMEM) {
-               /* XXX: Is this sane? */
-               rpc_delay(task, 3*HZ);
-               task->tk_status = -EAGAIN;
-       }
  }
  
  /*
@@ -929,11 +942,9 @@ call_bind_status(struct rpc_task *task)
         }
  
         switch (task->tk_status) {
-       case -EAGAIN:
-               dprintk("RPC: %5u rpcbind waiting for another request "
-                               "to finish\n", task->tk_pid);
-               /* avoid busy-waiting here -- could be a network outage. */
-               rpc_delay(task, 5*HZ);
+       case -ENOMEM:
+               dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid);
+               rpc_delay(task, HZ >> 2);
                 goto retry_timeout;
         case -EACCES:
                 dprintk("RPC: %5u remote rpcbind: RPC program/version "
@@ -1046,10 +1057,16 @@ call_transmit(struct rpc_task *task)
         /* Encode here so that rpcsec_gss can use correct sequence number. */
         if (rpc_task_need_encode(task)) {
                 BUG_ON(task->tk_rqstp->rq_bytes_sent != 0);
-               call_encode(task);
+               rpc_xdr_encode(task);
                 /* Did the encode result in an error condition? */
-               if (task->tk_status != 0)
+               if (task->tk_status != 0) {
+                       /* Was the error nonfatal? */
+                       if (task->tk_status == -EAGAIN)
+                               rpc_delay(task, HZ >> 4);
+                       else
+                               rpc_exit(task, task->tk_status);
                         return;
+               }
         }
         xprt_transmit(task);
         if (task->tk_status < 0)
@@ -1132,7 +1149,8 @@ call_status(struct rpc_task *task)
                 rpc_exit(task, status);
                 break;
         default:
-               printk("%s: RPC call returned error %d\n",
+               if (clnt->cl_chatty)
+                       printk("%s: RPC call returned error %d\n",
                                clnt->cl_protname, -status);
                 rpc_exit(task, status);
         }
@@ -1157,7 +1175,8 @@ call_timeout(struct rpc_task *task)
         task->tk_timeouts++;
  
         if (RPC_IS_SOFT(task)) {
-               printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+               if (clnt->cl_chatty)
+                       printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
                                 clnt->cl_protname, clnt->cl_server);
                 rpc_exit(task, -EIO);
                 return;
@@ -1165,7 +1184,8 @@ call_timeout(struct rpc_task *task)
  
         if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
                 task->tk_flags |= RPC_CALL_MAJORSEEN;
-               printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
+               if (clnt->cl_chatty)
+                       printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
                         clnt->cl_protname, clnt->cl_server);
         }
         rpc_force_rebind(clnt);
@@ -1196,8 +1216,9 @@ call_decode(struct rpc_task *task)
                         task->tk_pid, task->tk_status);
  
         if (task->tk_flags & RPC_CALL_MAJORSEEN) {
-               printk(KERN_NOTICE "%s: server %s OK\n",
-                       clnt->cl_protname, clnt->cl_server);
+               if (clnt->cl_chatty)
+                       printk(KERN_NOTICE "%s: server %s OK\n",
+                               clnt->cl_protname, clnt->cl_server);
                 task->tk_flags &= ~RPC_CALL_MAJORSEEN;
         }
  
@@ -1224,8 +1245,7 @@ call_decode(struct rpc_task *task)
                 goto out_retry;
         }
  
-       /* Verify the RPC header */
-       p = call_verify(task);
+       p = rpc_verify_header(task);
         if (IS_ERR(p)) {
                 if (p == ERR_PTR(-EAGAIN))
                         goto out_retry;
@@ -1243,7 +1263,7 @@ call_decode(struct rpc_task *task)
         return;
  out_retry:
         task->tk_status = 0;
-       /* Note: call_verify() may have freed the RPC slot */
+       /* Note: rpc_verify_header() may have freed the RPC slot */
         if (task->tk_rqstp == req) {
                 req->rq_received = req->rq_rcv_buf.len = 0;
                 if (task->tk_client->cl_discrtry)
@@ -1290,11 +1310,8 @@ call_refreshresult(struct rpc_task *task)
         return;
  }
  
-/*
- * Call header serialization
- */
  static __be32 *
-call_header(struct rpc_task *task)
+rpc_encode_header(struct rpc_task *task)
  {
         struct rpc_clnt *clnt = task->tk_client;
         struct rpc_rqst *req = task->tk_rqstp;
@@ -1314,11 +1331,8 @@ call_header(struct rpc_task *task)
         return p;
  }
  
-/*
- * Reply header verification
- */
  static __be32 *
-call_verify(struct rpc_task *task)
+rpc_verify_header(struct rpc_task *task)
  {
         struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
         int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
@@ -1392,7 +1406,7 @@ call_verify(struct rpc_task *task)
                         task->tk_action = call_bind;
                         goto out_retry;
                 case RPC_AUTH_TOOWEAK:
-                       printk(KERN_NOTICE "call_verify: server %s requires stronger "
+                       printk(KERN_NOTICE "RPC: server %s requires stronger "
                                "authentication.\n", task->tk_client->cl_server);
                         break;
                 default:
@@ -1431,10 +1445,10 @@ call_verify(struct rpc_task *task)
                 error = -EPROTONOSUPPORT;
                 goto out_err;
         case RPC_PROC_UNAVAIL:
-               dprintk("RPC: %5u %s: proc %p unsupported by program %u, "
+               dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
                                 "version %u on server %s\n",
                                 task->tk_pid, __func__,
-                               task->tk_msg.rpc_proc,
+                               rpc_proc_name(task),
                                 task->tk_client->cl_prog,
                                 task->tk_client->cl_vers,
                                 task->tk_client->cl_server);
@@ -1517,44 +1531,53 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
  EXPORT_SYMBOL_GPL(rpc_call_null);
  
  #ifdef RPC_DEBUG
+static void rpc_show_header(void)
+{
+       printk(KERN_INFO "-pid- flgs status -client- --rqstp- "
+               "-timeout ---ops--\n");
+}
+
+static void rpc_show_task(const struct rpc_clnt *clnt,
+                         const struct rpc_task *task)
+{
+       const char *rpc_waitq = "none";
+       char *p, action[KSYM_SYMBOL_LEN];
+
+       if (RPC_IS_QUEUED(task))
+               rpc_waitq = rpc_qname(task->tk_waitqueue);
+
+       /* map tk_action pointer to a function name; then trim off
+        * the "+0x0 [sunrpc]" */
+       sprint_symbol(action, (unsigned long)task->tk_action);
+       p = strchr(action, '+');
+       if (p)
+               *p = '\0';
+
+       printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n",
+               task->tk_pid, task->tk_flags, task->tk_status,
+               clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
+               clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
+               action, rpc_waitq);
+}
+
  void rpc_show_tasks(void)
  {
         struct rpc_clnt *clnt;
-       struct rpc_task *t;
+       struct rpc_task *task;
+       int header = 0;
  
         spin_lock(&rpc_client_lock);
-       if (list_empty(&all_clients))
-               goto out;
-       printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
-               "-rpcwait -action- ---ops--\n");
         list_for_each_entry(clnt, &all_clients, cl_clients) {
-               if (list_empty(&clnt->cl_tasks))
-                       continue;
                 spin_lock(&clnt->cl_lock);
-               list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
-                       const char *rpc_waitq = "none";
-                       int proc;
-
-                       if (t->tk_msg.rpc_proc)
-                               proc = t->tk_msg.rpc_proc->p_proc;
-                       else
-                               proc = -1;
-
-                       if (RPC_IS_QUEUED(t))
-                               rpc_waitq = rpc_qname(t->tk_waitqueue);
-
-                       printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
-                               t->tk_pid, proc,
-                               t->tk_flags, t->tk_status,
-                               t->tk_client,
-                               (t->tk_client ? t->tk_client->cl_prog : 0),
-                               t->tk_rqstp, t->tk_timeout,
-                               rpc_waitq,
-                               t->tk_action, t->tk_ops);
+               list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
+                       if (!header) {
+                               rpc_show_header();
+                               header++;
+                       }
+                       rpc_show_task(clnt, task);
                 }
                 spin_unlock(&clnt->cl_lock);
         }
-out:
         spin_unlock(&rpc_client_lock);
  }
  #endif
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c

index e6fb21b19b86b6421011f76400e74e5d2a919acc..24db2b4d12d3e43b3faf944262a10105ecaca898 100644 (file)
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -32,6 +32,10 @@
  #define RPCBIND_PROGRAM                (100000u)
  #define RPCBIND_PORT           (111u)
  
+#define RPCBVERS_2             (2u)
+#define RPCBVERS_3             (3u)
+#define RPCBVERS_4             (4u)
+
  enum {
         RPCBPROC_NULL,
         RPCBPROC_SET,
@@ -64,6 +68,7 @@ enum {
  #define RPCB_MAXOWNERLEN       sizeof(RPCB_OWNER_STRING)
  
  static void                    rpcb_getport_done(struct rpc_task *, void *);
+static void                    rpcb_map_release(void *data);
  static struct rpc_program      rpcb_program;
  
  struct rpcbind_args {
@@ -76,41 +81,73 @@ struct rpcbind_args {
         const char *            r_netid;
         const char *            r_addr;
         const char *            r_owner;
+
+       int                     r_status;
  };
  
  static struct rpc_procinfo rpcb_procedures2[];
  static struct rpc_procinfo rpcb_procedures3[];
+static struct rpc_procinfo rpcb_procedures4[];
  
  struct rpcb_info {
-       int                     rpc_vers;
+       u32                     rpc_vers;
         struct rpc_procinfo *   rpc_proc;
  };
  
  static struct rpcb_info rpcb_next_version[];
  static struct rpcb_info rpcb_next_version6[];
  
+static const struct rpc_call_ops rpcb_getport_ops = {
+       .rpc_call_done          = rpcb_getport_done,
+       .rpc_release            = rpcb_map_release,
+};
+
+static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
+{
+       xprt_clear_binding(xprt);
+       rpc_wake_up_status(&xprt->binding, status);
+}
+
  static void rpcb_map_release(void *data)
  {
         struct rpcbind_args *map = data;
  
+       rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status);
         xprt_put(map->r_xprt);
         kfree(map);
  }
  
-static const struct rpc_call_ops rpcb_getport_ops = {
-       .rpc_call_done          = rpcb_getport_done,
-       .rpc_release            = rpcb_map_release,
+static const struct sockaddr_in rpcb_inaddr_loopback = {
+       .sin_family             = AF_INET,
+       .sin_addr.s_addr        = htonl(INADDR_LOOPBACK),
+       .sin_port               = htons(RPCBIND_PORT),
  };
  
-static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
+static const struct sockaddr_in6 rpcb_in6addr_loopback = {
+       .sin6_family            = AF_INET6,
+       .sin6_addr              = IN6ADDR_LOOPBACK_INIT,
+       .sin6_port              = htons(RPCBIND_PORT),
+};
+
+static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
+                                         size_t addrlen, u32 version)
  {
-       xprt_clear_binding(xprt);
-       rpc_wake_up_status(&xprt->binding, status);
+       struct rpc_create_args args = {
+               .protocol       = XPRT_TRANSPORT_UDP,
+               .address        = addr,
+               .addrsize       = addrlen,
+               .servername     = "localhost",
+               .program        = &rpcb_program,
+               .version        = version,
+               .authflavor     = RPC_AUTH_UNIX,
+               .flags          = RPC_CLNT_CREATE_NOPING,
+       };
+
+       return rpc_create(&args);
  }
  
  static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
-                                   size_t salen, int proto, u32 version,
-                                   int privileged)
+                                   size_t salen, int proto, u32 version)
  {
         struct rpc_create_args args = {
                 .protocol       = proto,
@@ -120,7 +157,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
                 .program        = &rpcb_program,
                 .version        = version,
                 .authflavor     = RPC_AUTH_UNIX,
-               .flags          = RPC_CLNT_CREATE_NOPING,
+               .flags          = (RPC_CLNT_CREATE_NOPING |
+                                       RPC_CLNT_CREATE_NONPRIVPORT),
         };
  
         switch (srvaddr->sa_family) {
@@ -134,29 +172,72 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
                 return NULL;
         }
  
-       if (!privileged)
-               args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
         return rpc_create(&args);
  }
  
+static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
+                             u32 version, struct rpc_message *msg,
+                             int *result)
+{
+       struct rpc_clnt *rpcb_clnt;
+       int error = 0;
+
+       *result = 0;
+
+       rpcb_clnt = rpcb_create_local(addr, addrlen, version);
+       if (!IS_ERR(rpcb_clnt)) {
+               error = rpc_call_sync(rpcb_clnt, msg, 0);
+               rpc_shutdown_client(rpcb_clnt);
+       } else
+               error = PTR_ERR(rpcb_clnt);
+
+       if (error < 0)
+               printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+                               "server (errno %d).\n", -error);
+       dprintk("RPC:       registration status %d/%d\n", error, *result);
+
+       return error;
+}
+
  /**
   * rpcb_register - set or unset a port registration with the local rpcbind svc
   * @prog: RPC program number to bind
   * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
+ * @prot: transport protocol to register
   * @port: port value to register
- * @okay: result code
+ * @okay: OUT: result code
+ *
+ * RPC services invoke this function to advertise their contact
+ * information via the system's rpcbind daemon.  RPC services
+ * invoke this function once for each [program, version, transport]
+ * tuple they wish to advertise.
+ *
+ * Callers may also unregister RPC services that are no longer
+ * available by setting the passed-in port to zero.  This removes
+ * all registered transports for [program, version] from the local
+ * rpcbind database.
+ *
+ * Returns zero if the registration request was dispatched
+ * successfully and a reply was received.  The rpcbind daemon's
+ * boolean result code is stored in *okay.
+ *
+ * Returns an errno value and sets *result to zero if there was
+ * some problem that prevented the rpcbind request from being
+ * dispatched, or if the rpcbind daemon did not respond within
+ * the timeout.
   *
- * port == 0 means unregister, port != 0 means register.
+ * This function uses rpcbind protocol version 2 to contact the
+ * local rpcbind daemon.
   *
- * This routine supports only rpcbind version 2.
+ * Registration works over both AF_INET and AF_INET6, and services
+ * registered via this function are advertised as available for any
+ * address.  If the local rpcbind daemon is listening on AF_INET6,
+ * services registered via this function will be advertised on
+ * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
+ * addresses).
   */
  int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
  {
-       struct sockaddr_in sin = {
-               .sin_family             = AF_INET,
-               .sin_addr.s_addr        = htonl(INADDR_LOOPBACK),
-       };
         struct rpcbind_args map = {
                 .r_prog         = prog,
                 .r_vers         = vers,
@@ -164,32 +245,159 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
                 .r_port         = port,
         };
         struct rpc_message msg = {
-               .rpc_proc       = &rpcb_procedures2[port ?
-                                       RPCBPROC_SET : RPCBPROC_UNSET],
                 .rpc_argp       = &map,
                 .rpc_resp       = okay,
         };
-       struct rpc_clnt *rpcb_clnt;
-       int error = 0;
  
         dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
                         "rpcbind\n", (port ? "" : "un"),
                         prog, vers, prot, port);
  
-       rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
-                               sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
-       if (IS_ERR(rpcb_clnt))
-               return PTR_ERR(rpcb_clnt);
+       msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
+       if (port)
+               msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
  
-       error = rpc_call_sync(rpcb_clnt, &msg, 0);
+       return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
+                                       sizeof(rpcb_inaddr_loopback),
+                                       RPCBVERS_2, &msg, okay);
+}
  
-       rpc_shutdown_client(rpcb_clnt);
-       if (error < 0)
-               printk(KERN_WARNING "RPC: failed to contact local rpcbind "
-                               "server (errno %d).\n", -error);
-       dprintk("RPC:       registration status %d/%d\n", error, *okay);
+/*
+ * Fill in AF_INET family-specific arguments to register
+ */
+static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
+                               struct rpc_message *msg)
+{
+       struct rpcbind_args *map = msg->rpc_argp;
+       unsigned short port = ntohs(address_to_register->sin_port);
+       char buf[32];
+
+       /* Construct AF_INET universal address */
+       snprintf(buf, sizeof(buf),
+                       NIPQUAD_FMT".%u.%u",
+                       NIPQUAD(address_to_register->sin_addr.s_addr),
+                       port >> 8, port & 0xff);
+       map->r_addr = buf;
+
+       dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
+               "local rpcbind\n", (port ? "" : "un"),
+                       map->r_prog, map->r_vers,
+                       map->r_addr, map->r_netid);
+
+       msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+       if (port)
+               msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+
+       return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
+                                       sizeof(rpcb_inaddr_loopback),
+                                       RPCBVERS_4, msg, msg->rpc_resp);
+}
  
-       return error;
+/*
+ * Fill in AF_INET6 family-specific arguments to register
+ */
+static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
+                               struct rpc_message *msg)
+{
+       struct rpcbind_args *map = msg->rpc_argp;
+       unsigned short port = ntohs(address_to_register->sin6_port);
+       char buf[64];
+
+       /* Construct AF_INET6 universal address */
+       snprintf(buf, sizeof(buf),
+                       NIP6_FMT".%u.%u",
+                       NIP6(address_to_register->sin6_addr),
+                       port >> 8, port & 0xff);
+       map->r_addr = buf;
+
+       dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
+               "local rpcbind\n", (port ? "" : "un"),
+                       map->r_prog, map->r_vers,
+                       map->r_addr, map->r_netid);
+
+       msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+       if (port)
+               msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+
+       return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
+                                       sizeof(rpcb_in6addr_loopback),
+                                       RPCBVERS_4, msg, msg->rpc_resp);
+}
+
+/**
+ * rpcb_v4_register - set or unset a port registration with the local rpcbind
+ * @program: RPC program number of service to (un)register
+ * @version: RPC version number of service to (un)register
+ * @address: address family, IP address, and port to (un)register
+ * @netid: netid of transport protocol to (un)register
+ * @result: result code from rpcbind RPC call
+ *
+ * RPC services invoke this function to advertise their contact
+ * information via the system's rpcbind daemon.  RPC services
+ * invoke this function once for each [program, version, address,
+ * netid] tuple they wish to advertise.
+ *
+ * Callers may also unregister RPC services that are no longer
+ * available by setting the port number in the passed-in address
+ * to zero.  Callers pass a netid of "" to unregister all
+ * transport netids associated with [program, version, address].
+ *
+ * Returns zero if the registration request was dispatched
+ * successfully and a reply was received.  The rpcbind daemon's
+ * result code is stored in *result.
+ *
+ * Returns an errno value and sets *result to zero if there was
+ * some problem that prevented the rpcbind request from being
+ * dispatched, or if the rpcbind daemon did not respond within
+ * the timeout.
+ *
+ * This function uses rpcbind protocol version 4 to contact the
+ * local rpcbind daemon.  The local rpcbind daemon must support
+ * version 4 of the rpcbind protocol in order for these functions
+ * to register a service successfully.
+ *
+ * Supported netids include "udp" and "tcp" for UDP and TCP over
+ * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6,
+ * respectively.
+ *
+ * The contents of @address determine the address family and the
+ * port to be registered.  The usual practice is to pass INADDR_ANY
+ * as the raw address, but specifying a non-zero address is also
+ * supported by this API if the caller wishes to advertise an RPC
+ * service on a specific network interface.
+ *
+ * Note that passing in INADDR_ANY does not create the same service
+ * registration as IN6ADDR_ANY.  The former advertises an RPC
+ * service on any IPv4 address, but not on IPv6.  The latter
+ * advertises the service on all IPv4 and IPv6 addresses.
+ */
+int rpcb_v4_register(const u32 program, const u32 version,
+                    const struct sockaddr *address, const char *netid,
+                    int *result)
+{
+       struct rpcbind_args map = {
+               .r_prog         = program,
+               .r_vers         = version,
+               .r_netid        = netid,
+               .r_owner        = RPCB_OWNER_STRING,
+       };
+       struct rpc_message msg = {
+               .rpc_argp       = &map,
+               .rpc_resp       = result,
+       };
+
+       *result = 0;
+
+       switch (address->sa_family) {
+       case AF_INET:
+               return rpcb_register_netid4((struct sockaddr_in *)address,
+                                           &msg);
+       case AF_INET6:
+               return rpcb_register_netid6((struct sockaddr_in6 *)address,
+                                           &msg);
+       }
+
+       return -EAFNOSUPPORT;
  }
  
  /**
@@ -227,7 +435,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
                 __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
  
         rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
-                               sizeof(*sin), prot, 2, 0);
+                               sizeof(*sin), prot, RPCBVERS_2);
         if (IS_ERR(rpcb_clnt))
                 return PTR_ERR(rpcb_clnt);
  
@@ -289,17 +497,16 @@ void rpcb_getport_async(struct rpc_task *task)
         /* Autobind on cloned rpc clients is discouraged */
         BUG_ON(clnt->cl_parent != clnt);
  
+       /* Put self on the wait queue to ensure we get notified if
+        * some other task is already attempting to bind the port */
+       rpc_sleep_on(&xprt->binding, task, NULL);
+
         if (xprt_test_and_set_binding(xprt)) {
-               status = -EAGAIN;       /* tell caller to check again */
                 dprintk("RPC: %5u %s: waiting for another binder\n",
                         task->tk_pid, __func__);
-               goto bailout_nowake;
+               return;
         }
  
-       /* Put self on queue before sending rpcbind request, in case
-        * rpcb_getport_done completes before we return from rpc_run_task */
-       rpc_sleep_on(&xprt->binding, task, NULL);
-
         /* Someone else may have bound if we slept */
         if (xprt_bound(xprt)) {
                 status = 0;
@@ -338,7 +545,7 @@ void rpcb_getport_async(struct rpc_task *task)
                 task->tk_pid, __func__, bind_version);
  
         rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
-                               bind_version, 0);
+                               bind_version);
         if (IS_ERR(rpcb_clnt)) {
                 status = PTR_ERR(rpcb_clnt);
                 dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
@@ -361,15 +568,15 @@ void rpcb_getport_async(struct rpc_task *task)
         map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
         map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
         map->r_owner = RPCB_OWNER_STRING;       /* ignored for GETADDR */
+       map->r_status = -EIO;
  
         child = rpcb_call_async(rpcb_clnt, map, proc);
         rpc_release_client(rpcb_clnt);
         if (IS_ERR(child)) {
-               status = -EIO;
                 /* rpcb_map_release() has freed the arguments */
                 dprintk("RPC: %5u %s: rpc_run_task failed\n",
                         task->tk_pid, __func__);
-               goto bailout_nofree;
+               return;
         }
         rpc_put_task(child);
  
@@ -378,7 +585,6 @@ void rpcb_getport_async(struct rpc_task *task)
  
  bailout_nofree:
         rpcb_wake_rpcbind_waiters(xprt, status);
-bailout_nowake:
         task->tk_status = status;
  }
  EXPORT_SYMBOL_GPL(rpcb_getport_async);
@@ -417,9 +623,13 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
         dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",
                         child->tk_pid, status, map->r_port);
  
-       rpcb_wake_rpcbind_waiters(xprt, status);
+       map->r_status = status;
  }
  
+/*
+ * XDR functions for rpcbind
+ */
+
  static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
                                struct rpcbind_args *rpcb)
  {
@@ -438,7 +648,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
                                unsigned short *portp)
  {
         *portp = (unsigned short) ntohl(*p++);
-       dprintk("RPC:      rpcb_decode_getport result %u\n",
+       dprintk("RPC:       rpcb_decode_getport result %u\n",
                         *portp);
         return 0;
  }
@@ -447,8 +657,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
                            unsigned int *boolp)
  {
         *boolp = (unsigned int) ntohl(*p++);
-       dprintk("RPC:      rpcb_decode_set result %u\n",
-                       *boolp);
+       dprintk("RPC:       rpcb_decode_set: call %s\n",
+                       (*boolp ? "succeeded" : "failed"));
         return 0;
  }
  
@@ -571,52 +781,60 @@ out_err:
  static struct rpc_procinfo rpcb_procedures2[] = {
         PROC(SET,               mapping,        set),
         PROC(UNSET,             mapping,        set),
-       PROC(GETADDR,           mapping,        getport),
+       PROC(GETPORT,           mapping,        getport),
  };
  
  static struct rpc_procinfo rpcb_procedures3[] = {
-       PROC(SET,               mapping,        set),
-       PROC(UNSET,             mapping,        set),
+       PROC(SET,               getaddr,        set),
+       PROC(UNSET,             getaddr,        set),
         PROC(GETADDR,           getaddr,        getaddr),
  };
  
  static struct rpc_procinfo rpcb_procedures4[] = {
-       PROC(SET,               mapping,        set),
-       PROC(UNSET,             mapping,        set),
+       PROC(SET,               getaddr,        set),
+       PROC(UNSET,             getaddr,        set),
+       PROC(GETADDR,           getaddr,        getaddr),
         PROC(GETVERSADDR,       getaddr,        getaddr),
  };
  
  static struct rpcb_info rpcb_next_version[] = {
-#ifdef CONFIG_SUNRPC_BIND34
-       { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
-       { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
-#endif
-       { 2, &rpcb_procedures2[RPCBPROC_GETPORT] },
-       { 0, NULL },
+       {
+               .rpc_vers       = RPCBVERS_2,
+               .rpc_proc       = &rpcb_procedures2[RPCBPROC_GETPORT],
+       },
+       {
+               .rpc_proc       = NULL,
+       },
  };
  
  static struct rpcb_info rpcb_next_version6[] = {
-#ifdef CONFIG_SUNRPC_BIND34
-       { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
-       { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
-#endif
-       { 0, NULL },
+       {
+               .rpc_vers       = RPCBVERS_4,
+               .rpc_proc       = &rpcb_procedures4[RPCBPROC_GETADDR],
+       },
+       {
+               .rpc_vers       = RPCBVERS_3,
+               .rpc_proc       = &rpcb_procedures3[RPCBPROC_GETADDR],
+       },
+       {
+               .rpc_proc       = NULL,
+       },
  };
  
  static struct rpc_version rpcb_version2 = {
-       .number         = 2,
+       .number         = RPCBVERS_2,
         .nrprocs        = RPCB_HIGHPROC_2,
         .procs          = rpcb_procedures2
  };
  
  static struct rpc_version rpcb_version3 = {
-       .number         = 3,
+       .number         = RPCBVERS_3,
         .nrprocs        = RPCB_HIGHPROC_3,
         .procs          = rpcb_procedures3
  };
  
  static struct rpc_version rpcb_version4 = {
-       .number         = 4,
+       .number         = RPCBVERS_4,
         .nrprocs        = RPCB_HIGHPROC_4,
         .procs          = rpcb_procedures4
  };
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c

index 6eab9bf94baf3ebef9c2b1300ffe8505c3694a78..385f427bedad5e8e8ca51088cb51963da4f2019c 100644 (file)
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay);
   */
  static void rpc_prepare_task(struct rpc_task *task)
  {
-       lock_kernel();
         task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
-       unlock_kernel();
  }
  
  /*
@@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task)
  {
         task->tk_action = NULL;
         if (task->tk_ops->rpc_call_done != NULL) {
-               lock_kernel();
                 task->tk_ops->rpc_call_done(task, task->tk_calldata);
-               unlock_kernel();
                 if (task->tk_action != NULL) {
                         WARN_ON(RPC_ASSASSINATED(task));
                         /* Always release the RPC slot and buffer memory */
@@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task);
  
  void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
  {
-       if (ops->rpc_release != NULL) {
-               lock_kernel();
+       if (ops->rpc_release != NULL)
                 ops->rpc_release(calldata);
-               unlock_kernel();
-       }
  }
  
  /*
@@ -626,19 +619,15 @@ static void __rpc_execute(struct rpc_task *task)
                 /*
                  * Execute any pending callback.
                  */
-               if (RPC_DO_CALLBACK(task)) {
-                       /* Define a callback save pointer */
+               if (task->tk_callback) {
                         void (*save_callback)(struct rpc_task *);
  
                         /*
-                        * If a callback exists, save it, reset it,
-                        * call it.
-                        * The save is needed to stop from resetting
-                        * another callback set within the callback handler
-                        * - Dave
+                        * We set tk_callback to NULL before calling it,
+                        * in case it sets the tk_callback field itself:
                          */
-                       save_callback=task->tk_callback;
-                       task->tk_callback=NULL;
+                       save_callback = task->tk_callback;
+                       task->tk_callback = NULL;
                         save_callback(task);
                 }
  
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index e1770f7ba0b3ec8a9e1d861ff8a2b950000fc8bb..99a52aabe332a70d881a427dd1beba36e9d1d87a 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task)
  {
         struct rpc_xprt *xprt = task->tk_xprt;
  
-       if (task->tk_status >= 0) {
+       if (task->tk_status == 0) {
                 xprt->stat.connect_count++;
                 xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
                 dprintk("RPC: %5u xprt_connect_status: connection established\n",
@@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task)
         }
  
         switch (task->tk_status) {
-       case -ECONNREFUSED:
-       case -ECONNRESET:
-               dprintk("RPC: %5u xprt_connect_status: server %s refused "
-                               "connection\n", task->tk_pid,
-                               task->tk_client->cl_server);
-               break;
         case -ENOTCONN:
                 dprintk("RPC: %5u xprt_connect_status: connection broken\n",
                                 task->tk_pid);
@@ -878,6 +872,7 @@ void xprt_transmit(struct rpc_task *task)
                 return;
  
         req->rq_connect_cookie = xprt->connect_cookie;
+       req->rq_xtime = jiffies;
         status = xprt->ops->send_request(task);
         if (status == 0) {
                 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c

index ddbe981ab516a48e42ee289bf0e8ceca88ff7a7e..4486c59c3aca87d6102a372255246a326b9efdc6 100644 (file)
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task)
                                 req->rq_svec->iov_base,
                                 req->rq_svec->iov_len);
  
-       req->rq_xtime = jiffies;
         status = xs_sendpages(transport->sock,
                               xs_addr(xprt),
                               xprt->addrlen, xdr,
@@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
          * to cope with writespace callbacks arriving _after_ we have
          * called sendmsg(). */
         while (1) {
-               req->rq_xtime = jiffies;
                 status = xs_sendpages(transport->sock,
                                         NULL, 0, xdr, req->rq_bytes_sent);
  
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst

index 3d2f4609578fdbb2f2f84d3b117925e8691d2d78..c972c0f54ce0aba88771ef2a727b425ad50abd83 100644 (file)
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -28,18 +28,39 @@ endif
  installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
  installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
  
+# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
+PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs
+$(INSTALL_FW_PATH)/$$(%): install-all-dirs
+       @true
+install-all-dirs: $(installed-fw-dirs)
+       @true
+
  quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@)
        cmd_install = $(INSTALL) -m0644 $< $@
  
  $(installed-fw-dirs):
         $(call cmd,mkdir)
  
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)/
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
         $(call cmd,install)
  
-.PHONY: __fw_install __fw_modinst FORCE
+PHONY +=  __fw_install __fw_modinst FORCE
+
+.PHONY: $(PHONY)
  
  __fw_install: $(installed-fw)
  __fw_modinst: $(mod-fw)
  
  FORCE:
+
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building using $(if_changed{,_dep}). As an optimization, we
+# don't need to read them if the target does not exist; we will rebuild
+# anyway in that case.
+
+targets := $(wildcard $(sort $(targets)))
+cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+
+ifneq ($(cmd_files),)
+  include $(cmd_files)
+endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 17 Jul 2008 17:38:59 +0000 (10:38 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 17 Jul 2008 17:38:59 +0000 (10:38 -0700)