Merge branch 'devicetree/merge' of git://git.secretlab.ca/git/linux-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Nov 2011 14:49:29 +0000 (07:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Nov 2011 14:49:29 +0000 (07:49 -0700)
* 'devicetree/merge' of git://git.secretlab.ca/git/linux-2.6:
  dt: add empty of_machine_is_compatible
  ahci: add DT binding for Calxeda AHCI controller
  dt/platform: minor cleanup
  dt: add empty of_alias_get_id() for non-dt builds

932 files changed:
Documentation/DMA-API.txt
Documentation/cgroups/memory.txt
Documentation/device-mapper/dm-log.txt
Documentation/device-mapper/persistent-data.txt [new file with mode: 0644]
Documentation/device-mapper/thin-provisioning.txt [new file with mode: 0644]
Documentation/devicetree/bindings/virtio/mmio.txt [new file with mode: 0644]
Documentation/feature-removal-schedule.txt
Documentation/filesystems/Locking
Documentation/filesystems/ext3.txt
Documentation/filesystems/ext4.txt
Documentation/hwspinlock.txt
Documentation/networking/ipvs-sysctl.txt
Documentation/rapidio/rapidio.txt
Documentation/rapidio/tsi721.txt [new file with mode: 0644]
Documentation/virtual/uml/UserModeLinux-HOWTO.txt
MAINTAINERS
arch/arm/mach-imx/mach-mx27_3ds.c
arch/arm/mach-imx/mach-mx31_3ds.c
arch/arm/mach-imx/mach-mx31lite.c
arch/arm/mach-imx/mach-mx31moboard.c
arch/arm/mach-imx/mach-pcm038.c
arch/arm/mach-mx5/mx51_efika.c
arch/arm/mach-omap2/Kconfig
arch/arm/mach-omap2/board-omap3beagle.c
arch/arm/mach-omap2/hwspinlock.c
arch/arm/mach-u300/i2c.c
arch/arm/mach-u300/include/mach/irqs.h
arch/arm/mach-ux500/board-u5500.c
arch/arm/mach-ux500/cpu.c
arch/ia64/Kconfig
arch/ia64/configs/generic_defconfig
arch/ia64/configs/gensparse_defconfig
arch/ia64/configs/tiger_defconfig
arch/ia64/configs/xen_domu_defconfig
arch/ia64/configs/zx1_defconfig
arch/ia64/sn/kernel/sn2/sn_hwperf.c
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/Kconfig
arch/mips/alchemy/common/Makefile
arch/mips/alchemy/common/dbdma.c
arch/mips/alchemy/common/dma.c
arch/mips/alchemy/common/gpiolib-au1000.c [deleted file]
arch/mips/alchemy/common/gpiolib.c [new file with mode: 0644]
arch/mips/alchemy/common/pci.c [deleted file]
arch/mips/alchemy/common/platform.c
arch/mips/alchemy/common/power.c
arch/mips/alchemy/common/setup.c
arch/mips/alchemy/devboards/db1200/platform.c
arch/mips/alchemy/devboards/db1x00/board_setup.c
arch/mips/alchemy/devboards/db1x00/platform.c
arch/mips/alchemy/devboards/pb1100/platform.c
arch/mips/alchemy/devboards/pb1200/platform.c
arch/mips/alchemy/devboards/pb1500/board_setup.c
arch/mips/alchemy/devboards/pb1500/platform.c
arch/mips/alchemy/devboards/pb1550/board_setup.c
arch/mips/alchemy/devboards/pb1550/platform.c
arch/mips/alchemy/gpr/board_setup.c
arch/mips/alchemy/gpr/platform.c
arch/mips/alchemy/mtx-1/board_setup.c
arch/mips/alchemy/mtx-1/platform.c
arch/mips/alchemy/xxs1500/board_setup.c
arch/mips/alchemy/xxs1500/platform.c
arch/mips/include/asm/cacheflush.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/io.h
arch/mips/include/asm/mach-au1x00/au1000.h
arch/mips/include/asm/mach-au1x00/au1xxx.h [deleted file]
arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
arch/mips/include/asm/mach-au1x00/au1xxx_ide.h
arch/mips/include/asm/mach-au1x00/au1xxx_psc.h
arch/mips/include/asm/mach-au1x00/gpio-au1000.h
arch/mips/include/asm/mach-au1x00/gpio.h
arch/mips/include/asm/mach-db1x00/db1200.h
arch/mips/include/asm/mach-db1x00/db1x00.h
arch/mips/include/asm/mach-pb1x00/pb1200.h
arch/mips/include/asm/mach-pb1x00/pb1550.h
arch/mips/include/asm/mipsprom.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/prom.h
arch/mips/include/asm/regdef.h
arch/mips/jz4740/gpio.c
arch/mips/jz4740/irq.c
arch/mips/jz4740/irq.h
arch/mips/jz4740/pm.c
arch/mips/kernel/Makefile
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/perf_event.c
arch/mips/kernel/perf_event_mipsxx.c
arch/mips/kernel/scall32-o32.S
arch/mips/mm/c-octeon.c
arch/mips/mm/c-r3k.c
arch/mips/mm/c-r4k.c
arch/mips/mm/c-tx39.c
arch/mips/mm/cache.c
arch/mips/mm/tlb-r3k.c
arch/mips/mm/tlb-r4k.c
arch/mips/netlogic/Platform
arch/mips/netlogic/xlr/setup.c
arch/mips/netlogic/xlr/smp.c
arch/mips/netlogic/xlr/smpboot.S
arch/mips/pci/Makefile
arch/mips/pci/fixup-au1000.c [deleted file]
arch/mips/pci/ops-au1000.c [deleted file]
arch/mips/pci/pci-alchemy.c [new file with mode: 0644]
arch/mips/pmc-sierra/msp71xx/msp_setup.c
arch/mips/pmc-sierra/yosemite/py-console.c
arch/mips/pnx8550/common/prom.c
arch/mips/sgi-ip27/ip27-irq.c
arch/powerpc/mm/gup.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/sysdev/fsl_rio.c
arch/s390/hypfs/inode.c
arch/s390/mm/gup.c
arch/sh/Kconfig
arch/sh/Makefile
arch/sh/boards/board-espt.c
arch/sh/boards/board-secureedge5410.c
arch/sh/boards/board-sh7757lcr.c
arch/sh/boards/mach-cayman/irq.c
arch/sh/boards/mach-ecovec24/setup.c
arch/sh/boards/mach-hp6xx/hp6xx_apm.c
arch/sh/boards/mach-se/7724/setup.c
arch/sh/boards/mach-sh7763rdp/setup.c
arch/sh/boot/Makefile
arch/sh/drivers/dma/dma-g2.c
arch/sh/drivers/dma/dma-pvr2.c
arch/sh/drivers/dma/dma-sh.c
arch/sh/drivers/dma/dmabrg.c
arch/sh/drivers/pci/pci-sh5.c
arch/sh/drivers/pci/pci-sh7780.c
arch/sh/drivers/push-switch.c
arch/sh/include/asm/page.h
arch/sh/kernel/cpu/sh4a/clock-sh7757.c
arch/sh/kernel/cpu/sh4a/smp-shx3.c
arch/sh/kernel/setup.c
arch/sh/kernel/topology.c
arch/sh/kernel/vmlinux.lds.S
arch/sh/mm/init.c
arch/sparc/mm/gup.c
arch/um/Kconfig.char
arch/um/Kconfig.rest
arch/um/Kconfig.um
arch/um/Kconfig.x86 [deleted file]
arch/um/Makefile
arch/um/Makefile-i386 [deleted file]
arch/um/Makefile-x86_64 [deleted file]
arch/um/drivers/chan.h [new file with mode: 0644]
arch/um/drivers/chan_kern.c
arch/um/drivers/chan_user.c
arch/um/drivers/chan_user.h [new file with mode: 0644]
arch/um/drivers/cow_sys.h
arch/um/drivers/daemon_user.c
arch/um/drivers/fd.c
arch/um/drivers/harddog_user.c
arch/um/drivers/line.c
arch/um/drivers/line.h [new file with mode: 0644]
arch/um/drivers/mconsole.h [new file with mode: 0644]
arch/um/drivers/mconsole_kern.h [new file with mode: 0644]
arch/um/drivers/mconsole_user.c
arch/um/drivers/net_user.c
arch/um/drivers/pcap_user.c
arch/um/drivers/port_user.c
arch/um/drivers/pty.c
arch/um/drivers/slip_user.c
arch/um/drivers/slirp_user.c
arch/um/drivers/ssl.c
arch/um/drivers/stdio_console.c
arch/um/drivers/tty.c
arch/um/drivers/ubd_kern.c
arch/um/drivers/ubd_user.c
arch/um/drivers/ubd_user.h [new file with mode: 0644]
arch/um/drivers/umcast_user.c
arch/um/drivers/vde_user.c
arch/um/drivers/xterm.c
arch/um/include/asm/Kbuild [new file with mode: 0644]
arch/um/include/asm/apic.h [deleted file]
arch/um/include/asm/arch_hweight.h [deleted file]
arch/um/include/asm/bug.h [deleted file]
arch/um/include/asm/checksum.h [deleted file]
arch/um/include/asm/cputime.h [deleted file]
arch/um/include/asm/desc.h [deleted file]
arch/um/include/asm/device.h [deleted file]
arch/um/include/asm/emergency-restart.h [deleted file]
arch/um/include/asm/ftrace.h [deleted file]
arch/um/include/asm/futex.h [deleted file]
arch/um/include/asm/hardirq.h [deleted file]
arch/um/include/asm/hw_irq.h [deleted file]
arch/um/include/asm/irq_regs.h [deleted file]
arch/um/include/asm/irq_vectors.h [deleted file]
arch/um/include/asm/irqflags.h
arch/um/include/asm/kdebug.h [deleted file]
arch/um/include/asm/mmu.h
arch/um/include/asm/mmu_context.h
arch/um/include/asm/page.h
arch/um/include/asm/page_offset.h [deleted file]
arch/um/include/asm/pda.h [deleted file]
arch/um/include/asm/percpu.h [deleted file]
arch/um/include/asm/ptrace-generic.h
arch/um/include/asm/required-features.h [deleted file]
arch/um/include/asm/sections.h [deleted file]
arch/um/include/asm/segment.h [deleted file]
arch/um/include/asm/system.h [deleted file]
arch/um/include/asm/topology.h [deleted file]
arch/um/include/asm/uaccess.h
arch/um/include/asm/xor.h [deleted file]
arch/um/include/shared/as-layout.h
arch/um/include/shared/chan_kern.h [deleted file]
arch/um/include/shared/chan_user.h [deleted file]
arch/um/include/shared/common-offsets.h
arch/um/include/shared/initrd.h [deleted file]
arch/um/include/shared/kern.h
arch/um/include/shared/kern_util.h
arch/um/include/shared/ldt.h [deleted file]
arch/um/include/shared/line.h [deleted file]
arch/um/include/shared/mconsole.h [deleted file]
arch/um/include/shared/mconsole_kern.h [deleted file]
arch/um/include/shared/mem_kern.h [deleted file]
arch/um/include/shared/os.h
arch/um/include/shared/process.h [deleted file]
arch/um/include/shared/ptrace_user.h
arch/um/include/shared/skas_ptregs.h [deleted file]
arch/um/include/shared/syscall.h [deleted file]
arch/um/include/shared/task.h [deleted file]
arch/um/include/shared/tlb.h [deleted file]
arch/um/include/shared/ubd_user.h [deleted file]
arch/um/include/shared/um_malloc.h
arch/um/include/shared/um_mmu.h [deleted file]
arch/um/include/shared/um_uaccess.h [deleted file]
arch/um/include/shared/user.h
arch/um/kernel/Makefile
arch/um/kernel/exec.c
arch/um/kernel/gmon_syms.c
arch/um/kernel/initrd.c
arch/um/kernel/irq.c
arch/um/kernel/ksyms.c
arch/um/kernel/mem.c
arch/um/kernel/physmem.c
arch/um/kernel/process.c
arch/um/kernel/signal.c
arch/um/kernel/skas/clone.c
arch/um/kernel/skas/uaccess.c
arch/um/kernel/tlb.c
arch/um/kernel/trap.c
arch/um/kernel/uaccess.c [deleted file]
arch/um/kernel/um_arch.c
arch/um/os-Linux/Makefile
arch/um/os-Linux/aio.c
arch/um/os-Linux/drivers/ethertap_user.c
arch/um/os-Linux/drivers/tuntap_user.c
arch/um/os-Linux/elf_aux.c
arch/um/os-Linux/file.c
arch/um/os-Linux/helper.c
arch/um/os-Linux/internal.h [new file with mode: 0644]
arch/um/os-Linux/irq.c
arch/um/os-Linux/main.c
arch/um/os-Linux/mem.c
arch/um/os-Linux/process.c
arch/um/os-Linux/sigio.c
arch/um/os-Linux/signal.c
arch/um/os-Linux/skas/mem.c
arch/um/os-Linux/skas/process.c
arch/um/os-Linux/start_up.c
arch/um/os-Linux/sys-i386/Makefile [deleted file]
arch/um/os-Linux/sys-i386/registers.c [deleted file]
arch/um/os-Linux/sys-i386/signal.c [deleted file]
arch/um/os-Linux/sys-i386/task_size.c [deleted file]
arch/um/os-Linux/sys-i386/tls.c [deleted file]
arch/um/os-Linux/sys-x86_64/Makefile [deleted file]
arch/um/os-Linux/sys-x86_64/prctl.c [deleted file]
arch/um/os-Linux/sys-x86_64/registers.c [deleted file]
arch/um/os-Linux/sys-x86_64/signal.c [deleted file]
arch/um/os-Linux/sys-x86_64/task_size.c [deleted file]
arch/um/os-Linux/time.c
arch/um/os-Linux/tls.c [deleted file]
arch/um/os-Linux/tty.c
arch/um/os-Linux/uaccess.c [deleted file]
arch/um/os-Linux/umid.c
arch/um/os-Linux/util.c
arch/um/scripts/Makefile.rules
arch/um/sys-i386/Makefile [deleted file]
arch/um/sys-i386/asm/archparam.h [deleted file]
arch/um/sys-i386/asm/elf.h [deleted file]
arch/um/sys-i386/asm/module.h [deleted file]
arch/um/sys-i386/asm/processor.h [deleted file]
arch/um/sys-i386/asm/ptrace.h [deleted file]
arch/um/sys-i386/atomic64_cx8_32.S [deleted file]
arch/um/sys-i386/bug.c [deleted file]
arch/um/sys-i386/bugs.c [deleted file]
arch/um/sys-i386/checksum.S [deleted file]
arch/um/sys-i386/delay.c [deleted file]
arch/um/sys-i386/elfcore.c [deleted file]
arch/um/sys-i386/fault.c [deleted file]
arch/um/sys-i386/ksyms.c [deleted file]
arch/um/sys-i386/ldt.c [deleted file]
arch/um/sys-i386/mem.c [deleted file]
arch/um/sys-i386/ptrace.c [deleted file]
arch/um/sys-i386/ptrace_user.c [deleted file]
arch/um/sys-i386/setjmp.S [deleted file]
arch/um/sys-i386/shared/sysdep/archsetjmp.h [deleted file]
arch/um/sys-i386/shared/sysdep/barrier.h [deleted file]
arch/um/sys-i386/shared/sysdep/checksum.h [deleted file]
arch/um/sys-i386/shared/sysdep/faultinfo.h [deleted file]
arch/um/sys-i386/shared/sysdep/host_ldt.h [deleted file]
arch/um/sys-i386/shared/sysdep/kernel-offsets.h [deleted file]
arch/um/sys-i386/shared/sysdep/ptrace.h [deleted file]
arch/um/sys-i386/shared/sysdep/ptrace_user.h [deleted file]
arch/um/sys-i386/shared/sysdep/sc.h [deleted file]
arch/um/sys-i386/shared/sysdep/sigcontext.h [deleted file]
arch/um/sys-i386/shared/sysdep/skas_ptrace.h [deleted file]
arch/um/sys-i386/shared/sysdep/stub.h [deleted file]
arch/um/sys-i386/shared/sysdep/syscalls.h [deleted file]
arch/um/sys-i386/shared/sysdep/system.h [deleted file]
arch/um/sys-i386/shared/sysdep/tls.h [deleted file]
arch/um/sys-i386/shared/sysdep/vm-flags.h [deleted file]
arch/um/sys-i386/signal.c [deleted file]
arch/um/sys-i386/stub.S [deleted file]
arch/um/sys-i386/stub_segv.c [deleted file]
arch/um/sys-i386/sys_call_table.S [deleted file]
arch/um/sys-i386/syscalls.c [deleted file]
arch/um/sys-i386/sysrq.c [deleted file]
arch/um/sys-i386/tls.c [deleted file]
arch/um/sys-i386/user-offsets.c [deleted file]
arch/um/sys-x86_64/Makefile [deleted file]
arch/um/sys-x86_64/asm/archparam.h [deleted file]
arch/um/sys-x86_64/asm/elf.h [deleted file]
arch/um/sys-x86_64/asm/module.h [deleted file]
arch/um/sys-x86_64/asm/processor.h [deleted file]
arch/um/sys-x86_64/asm/ptrace.h [deleted file]
arch/um/sys-x86_64/bug.c [deleted file]
arch/um/sys-x86_64/bugs.c [deleted file]
arch/um/sys-x86_64/delay.c [deleted file]
arch/um/sys-x86_64/fault.c [deleted file]
arch/um/sys-x86_64/ksyms.c [deleted file]
arch/um/sys-x86_64/mem.c [deleted file]
arch/um/sys-x86_64/ptrace.c [deleted file]
arch/um/sys-x86_64/ptrace_user.c [deleted file]
arch/um/sys-x86_64/setjmp.S [deleted file]
arch/um/sys-x86_64/shared/sysdep/archsetjmp.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/barrier.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/checksum.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/faultinfo.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/host_ldt.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/ptrace.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/ptrace_user.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/sc.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/sigcontext.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/stub.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/syscalls.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/system.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/tls.h [deleted file]
arch/um/sys-x86_64/shared/sysdep/vm-flags.h [deleted file]
arch/um/sys-x86_64/signal.c [deleted file]
arch/um/sys-x86_64/stub.S [deleted file]
arch/um/sys-x86_64/stub_segv.c [deleted file]
arch/um/sys-x86_64/syscall_table.c [deleted file]
arch/um/sys-x86_64/syscalls.c [deleted file]
arch/um/sys-x86_64/sysrq.c [deleted file]
arch/um/sys-x86_64/tls.c [deleted file]
arch/um/sys-x86_64/user-offsets.c [deleted file]
arch/um/sys-x86_64/vdso/Makefile [deleted file]
arch/um/sys-x86_64/vdso/checkundef.sh [deleted file]
arch/um/sys-x86_64/vdso/um_vdso.c [deleted file]
arch/um/sys-x86_64/vdso/vdso-layout.lds.S [deleted file]
arch/um/sys-x86_64/vdso/vdso-note.S [deleted file]
arch/um/sys-x86_64/vdso/vdso.S [deleted file]
arch/um/sys-x86_64/vdso/vdso.lds.S [deleted file]
arch/um/sys-x86_64/vdso/vma.c [deleted file]
arch/x86/Makefile.um [new file with mode: 0644]
arch/x86/include/asm/intel_scu_ipc.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/mm/gup.c
arch/x86/platform/mrst/mrst.c
arch/x86/um/Kconfig [new file with mode: 0644]
arch/x86/um/Makefile [new file with mode: 0644]
arch/x86/um/asm/apic.h [new file with mode: 0644]
arch/x86/um/asm/arch_hweight.h [new file with mode: 0644]
arch/x86/um/asm/archparam.h [new file with mode: 0644]
arch/x86/um/asm/checksum.h [new file with mode: 0644]
arch/x86/um/asm/checksum_32.h [new file with mode: 0644]
arch/x86/um/asm/checksum_64.h [new file with mode: 0644]
arch/x86/um/asm/desc.h [new file with mode: 0644]
arch/x86/um/asm/elf.h [new file with mode: 0644]
arch/x86/um/asm/irq_vectors.h [new file with mode: 0644]
arch/x86/um/asm/mm_context.h [new file with mode: 0644]
arch/x86/um/asm/module.h [new file with mode: 0644]
arch/x86/um/asm/processor.h [new file with mode: 0644]
arch/x86/um/asm/processor_32.h [new file with mode: 0644]
arch/x86/um/asm/processor_64.h [new file with mode: 0644]
arch/x86/um/asm/ptrace.h [new file with mode: 0644]
arch/x86/um/asm/ptrace_32.h [new file with mode: 0644]
arch/x86/um/asm/ptrace_64.h [new file with mode: 0644]
arch/x86/um/asm/required-features.h [new file with mode: 0644]
arch/x86/um/asm/segment.h [new file with mode: 0644]
arch/x86/um/asm/system.h [new file with mode: 0644]
arch/x86/um/asm/vm-flags.h [new file with mode: 0644]
arch/x86/um/bug.c [new file with mode: 0644]
arch/x86/um/bugs_32.c [new file with mode: 0644]
arch/x86/um/bugs_64.c [new file with mode: 0644]
arch/x86/um/checksum_32.S [new file with mode: 0644]
arch/x86/um/delay.c [new file with mode: 0644]
arch/x86/um/elfcore.c [new file with mode: 0644]
arch/x86/um/fault.c [new file with mode: 0644]
arch/x86/um/ksyms.c [new file with mode: 0644]
arch/x86/um/ldt.c [new file with mode: 0644]
arch/x86/um/mem_32.c [new file with mode: 0644]
arch/x86/um/mem_64.c [new file with mode: 0644]
arch/x86/um/os-Linux/Makefile [new file with mode: 0644]
arch/x86/um/os-Linux/mcontext.c [new file with mode: 0644]
arch/x86/um/os-Linux/prctl.c [new file with mode: 0644]
arch/x86/um/os-Linux/registers.c [new file with mode: 0644]
arch/x86/um/os-Linux/task_size.c [new file with mode: 0644]
arch/x86/um/os-Linux/tls.c [new file with mode: 0644]
arch/x86/um/ptrace_32.c [new file with mode: 0644]
arch/x86/um/ptrace_64.c [new file with mode: 0644]
arch/x86/um/ptrace_user.c [new file with mode: 0644]
arch/x86/um/setjmp_32.S [new file with mode: 0644]
arch/x86/um/setjmp_64.S [new file with mode: 0644]
arch/x86/um/shared/sysdep/archsetjmp.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/archsetjmp_32.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/archsetjmp_64.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/faultinfo.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/faultinfo_32.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/faultinfo_64.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/kernel-offsets.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/mcontext.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/ptrace.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/ptrace_32.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/ptrace_64.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/ptrace_user.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/skas_ptrace.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/stub.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/stub_32.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/stub_64.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/syscalls.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/syscalls_32.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/syscalls_64.h [new file with mode: 0644]
arch/x86/um/shared/sysdep/tls.h [new file with mode: 0644]
arch/x86/um/signal.c [new file with mode: 0644]
arch/x86/um/stub_32.S [new file with mode: 0644]
arch/x86/um/stub_64.S [new file with mode: 0644]
arch/x86/um/stub_segv.c [new file with mode: 0644]
arch/x86/um/sys_call_table_32.S [new file with mode: 0644]
arch/x86/um/sys_call_table_64.c [new file with mode: 0644]
arch/x86/um/syscalls_32.c [new file with mode: 0644]
arch/x86/um/syscalls_64.c [new file with mode: 0644]
arch/x86/um/sysrq_32.c [new file with mode: 0644]
arch/x86/um/sysrq_64.c [new file with mode: 0644]
arch/x86/um/tls_32.c [new file with mode: 0644]
arch/x86/um/tls_64.c [new file with mode: 0644]
arch/x86/um/user-offsets.c [new file with mode: 0644]
arch/x86/um/vdso/Makefile [new file with mode: 0644]
arch/x86/um/vdso/checkundef.sh [new file with mode: 0644]
arch/x86/um/vdso/um_vdso.c [new file with mode: 0644]
arch/x86/um/vdso/vdso-layout.lds.S [new file with mode: 0644]
arch/x86/um/vdso/vdso-note.S [new file with mode: 0644]
arch/x86/um/vdso/vdso.S [new file with mode: 0644]
arch/x86/um/vdso/vdso.lds.S [new file with mode: 0644]
arch/x86/um/vdso/vma.c [new file with mode: 0644]
drivers/block/virtio_blk.c
drivers/char/Kconfig
drivers/char/agp/hp-agp.c
drivers/char/hw_random/Kconfig
drivers/char/ttyprintk.c
drivers/char/virtio_console.c
drivers/cpufreq/db8500-cpufreq.c
drivers/cpufreq/e_powersaver.c
drivers/cpufreq/exynos4210-cpufreq.c
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/edac_core.h
drivers/edac/edac_mce.c [deleted file]
drivers/edac/i7300_edac.c
drivers/edac/i7core_edac.c
drivers/edac/sb_edac.c [new file with mode: 0644]
drivers/gpio/Kconfig
drivers/hid/hid-apple.c
drivers/hid/hid-core.c
drivers/hid/hid-ids.h
drivers/hid/hid-multitouch.c
drivers/hid/hid-roccat.c
drivers/hwmon/mc13783-adc.c
drivers/hwspinlock/Kconfig
drivers/hwspinlock/Makefile
drivers/hwspinlock/hwspinlock_core.c
drivers/hwspinlock/hwspinlock_internal.h
drivers/hwspinlock/omap_hwspinlock.c
drivers/hwspinlock/u8500_hsem.c [new file with mode: 0644]
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-au1550.c
drivers/ide/Kconfig
drivers/ide/au1xxx-ide.c
drivers/input/Kconfig
drivers/input/misc/Kconfig
drivers/input/misc/Makefile
drivers/input/misc/mc13783-pwrbutton.c [new file with mode: 0644]
drivers/input/touchscreen/mc13783_ts.c
drivers/isdn/Kconfig
drivers/isdn/hisax/l3dss1.c
drivers/leds/leds-asic3.c
drivers/leds/leds-mc13783.c
drivers/md/Kconfig
drivers/md/Makefile
drivers/md/dm-bufio.c [new file with mode: 0644]
drivers/md/dm-bufio.h [new file with mode: 0644]
drivers/md/dm-ioctl.c
drivers/md/dm-kcopyd.c
drivers/md/dm-log-userspace-base.c
drivers/md/dm-raid.c
drivers/md/dm-table.c
drivers/md/dm-thin-metadata.c [new file with mode: 0644]
drivers/md/dm-thin-metadata.h [new file with mode: 0644]
drivers/md/dm-thin.c [new file with mode: 0644]
drivers/md/dm.c
drivers/md/dm.h
drivers/md/persistent-data/Kconfig [new file with mode: 0644]
drivers/md/persistent-data/Makefile [new file with mode: 0644]
drivers/md/persistent-data/dm-block-manager.c [new file with mode: 0644]
drivers/md/persistent-data/dm-block-manager.h [new file with mode: 0644]
drivers/md/persistent-data/dm-btree-internal.h [new file with mode: 0644]
drivers/md/persistent-data/dm-btree-remove.c [new file with mode: 0644]
drivers/md/persistent-data/dm-btree-spine.c [new file with mode: 0644]
drivers/md/persistent-data/dm-btree.c [new file with mode: 0644]
drivers/md/persistent-data/dm-btree.h [new file with mode: 0644]
drivers/md/persistent-data/dm-persistent-data-internal.h [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-checker.c [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-checker.h [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-common.c [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-common.h [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-disk.c [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-disk.h [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-metadata.c [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map-metadata.h [new file with mode: 0644]
drivers/md/persistent-data/dm-space-map.h [new file with mode: 0644]
drivers/md/persistent-data/dm-transaction-manager.c [new file with mode: 0644]
drivers/md/persistent-data/dm-transaction-manager.h [new file with mode: 0644]
drivers/media/radio/Kconfig
drivers/mfd/Kconfig
drivers/mfd/Makefile
drivers/mfd/aat2870-core.c
drivers/mfd/ab3100-core.c
drivers/mfd/ab3550-core.c [deleted file]
drivers/mfd/ab5500-core.c [new file with mode: 0644]
drivers/mfd/ab5500-core.h [new file with mode: 0644]
drivers/mfd/ab5500-debugfs.c [new file with mode: 0644]
drivers/mfd/ab5500-debugfs.h [new file with mode: 0644]
drivers/mfd/ab8500-core.c
drivers/mfd/ab8500-gpadc.c
drivers/mfd/asic3.c
drivers/mfd/da903x.c
drivers/mfd/db5500-prcmu-regs.h [deleted file]
drivers/mfd/db5500-prcmu.c
drivers/mfd/db8500-prcmu-regs.h [deleted file]
drivers/mfd/db8500-prcmu.c
drivers/mfd/dbx500-prcmu-regs.h [new file with mode: 0644]
drivers/mfd/intel_msic.c [new file with mode: 0644]
drivers/mfd/jz4740-adc.c
drivers/mfd/max8997.c
drivers/mfd/mc13xxx-core.c
drivers/mfd/menelaus.c
drivers/mfd/pcf50633-core.c
drivers/mfd/tc3589x.c
drivers/mfd/timberdale.c
drivers/mfd/tps65912-core.c
drivers/mfd/twl-core.c
drivers/mfd/twl4030-irq.c
drivers/mfd/twl4030-madc.c
drivers/mfd/twl6030-irq.c
drivers/mfd/wm831x-irq.c
drivers/mfd/wm8994-core.c
drivers/misc/vmw_balloon.c
drivers/mmc/host/Kconfig
drivers/mmc/host/au1xmmc.c
drivers/mmc/host/omap_hsmmc.c
drivers/mtd/maps/lantiq-flash.c
drivers/mtd/mtdchar.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/au1550nd.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_procfs.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/amd/au1000_eth.c
drivers/net/ethernet/amd/au1000_eth.h
drivers/net/ethernet/apple/Kconfig
drivers/net/ethernet/apple/Makefile
drivers/net/ethernet/apple/mac89x0.c [deleted file]
drivers/net/ethernet/cirrus/Kconfig
drivers/net/ethernet/cirrus/Makefile
drivers/net/ethernet/cirrus/mac89x0.c [new file with mode: 0644]
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_hw.h
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/i825xx/3c505.c
drivers/net/ethernet/intel/e100.c
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/igb/e1000_phy.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/realtek/Kconfig
drivers/net/ethernet/realtek/Makefile
drivers/net/ethernet/realtek/sc92031.c [deleted file]
drivers/net/ethernet/silan/Kconfig [new file with mode: 0644]
drivers/net/ethernet/silan/Makefile [new file with mode: 0644]
drivers/net/ethernet/silan/sc92031.c [new file with mode: 0644]
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/irda/Kconfig
drivers/net/rionet.c
drivers/net/virtio_net.c
drivers/net/wireless/ath/Kconfig
drivers/net/wireless/rtlwifi/Kconfig
drivers/power/Kconfig
drivers/power/ds2780_battery.c
drivers/pps/clients/Kconfig
drivers/pps/clients/Makefile
drivers/pps/clients/pps-gpio.c [new file with mode: 0644]
drivers/pps/clients/pps-ktimer.c
drivers/pps/clients/pps_parport.c
drivers/pps/kapi.c
drivers/rapidio/Kconfig
drivers/rapidio/Makefile
drivers/rapidio/devices/Kconfig [new file with mode: 0644]
drivers/rapidio/devices/Makefile [new file with mode: 0644]
drivers/rapidio/devices/tsi721.c [new file with mode: 0644]
drivers/rapidio/devices/tsi721.h [new file with mode: 0644]
drivers/rapidio/rio-scan.c
drivers/regulator/db8500-prcmu.c
drivers/regulator/mc13783-regulator.c
drivers/rtc/Kconfig
drivers/rtc/class.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-mc13xxx.c
drivers/sh/intc/chip.c
drivers/sh/intc/userimask.c
drivers/sh/pfc.c
drivers/spi/Kconfig
drivers/staging/pohmelfs/inode.c
drivers/tty/Kconfig
drivers/usb/Kconfig
drivers/usb/host/Makefile
drivers/usb/host/alchemy-common.c [new file with mode: 0644]
drivers/usb/host/ehci-au1xxx.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/ohci-au1xxx.c
drivers/video/Kconfig
drivers/virtio/Kconfig
drivers/virtio/Makefile
drivers/virtio/virtio_mmio.c [new file with mode: 0644]
drivers/virtio/virtio_pci.c
drivers/w1/slaves/w1_ds2760.c
drivers/w1/slaves/w1_ds2780.c
drivers/w1/slaves/w1_ds2780.h
drivers/w1/w1_int.c
drivers/w1/w1_io.c
drivers/watchdog/Kconfig
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/adfs/inode.c
fs/affs/amigaffs.c
fs/affs/inode.c
fs/affs/namei.c
fs/afs/fsclient.c
fs/afs/inode.c
fs/aio.c
fs/autofs4/inode.c
fs/befs/linuxvfs.c
fs/bfs/dir.c
fs/bfs/inode.c
fs/binfmt_elf.c
fs/binfmt_misc.c
fs/btrfs/delayed-inode.c
fs/btrfs/disk-io.c
fs/btrfs/inode.c
fs/btrfs/tree-log.c
fs/ceph/caps.c
fs/ceph/inode.c
fs/cifs/cifsencrypt.c
fs/cifs/cifsfs.h
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/sess.c
fs/cifs/smbencrypt.c
fs/coda/coda_linux.c
fs/coda/dir.c
fs/dcache.c
fs/devpts/inode.c
fs/ecryptfs/inode.c
fs/efs/inode.c
fs/exofs/inode.c
fs/ext2/balloc.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/super.c
fs/ext3/balloc.c
fs/ext3/fsync.c
fs/ext3/ialloc.c
fs/ext3/inode.c
fs/ext3/ioctl.c
fs/ext3/namei.c
fs/ext3/super.c
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/ext4_extents.h
fs/ext4/ext4_jbd2.c
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/indirect.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/mballoc.h
fs/ext4/migrate.c
fs/ext4/mmp.c
fs/ext4/move_extent.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/fat/inode.c
fs/fat/namei_msdos.c
fs/fat/namei_vfat.c
fs/freevxfs/vxfs_inode.c
fs/fuse/control.c
fs/fuse/inode.c
fs/gfs2/glops.c
fs/hfs/btree.c
fs/hfs/dir.c
fs/hfs/inode.c
fs/hfsplus/dir.c
fs/hfsplus/inode.c
fs/hostfs/hostfs_kern.c
fs/hostfs/hostfs_user.c
fs/hpfs/dir.c
fs/hpfs/inode.c
fs/hpfs/namei.c
fs/hppfs/hppfs.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/isofs/inode.c
fs/isofs/rock.c
fs/jbd/journal.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/jbd2/transaction.c
fs/jffs2/dir.c
fs/jffs2/fs.c
fs/jfs/jfs_imap.c
fs/jfs/jfs_inode.c
fs/jfs/namei.c
fs/jfs/super.c
fs/libfs.c
fs/logfs/dir.c
fs/logfs/inode.c
fs/logfs/readwrite.c
fs/minix/inode.c
fs/namei.c
fs/ncpfs/inode.c
fs/nfs/inode.c
fs/nfsd/nfs4xdr.c
fs/nilfs2/inode.c
fs/nilfs2/namei.c
fs/ntfs/inode.c
fs/ocfs2/dir.c
fs/ocfs2/dlmglue.c
fs/ocfs2/inode.c
fs/ocfs2/namei.c
fs/openpromfs/inode.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/proc_sysctl.c
fs/qnx4/inode.c
fs/quota/quota.c
fs/ramfs/inode.c
fs/reiserfs/inode.c
fs/reiserfs/namei.c
fs/romfs/super.c
fs/squashfs/inode.c
fs/stack.c
fs/stat.c
fs/super.c
fs/sysfs/inode.c
fs/sysv/inode.c
fs/ubifs/super.c
fs/ubifs/xattr.c
fs/udf/balloc.c
fs/udf/directory.c
fs/udf/inode.c
fs/udf/lowlevel.c
fs/udf/misc.c
fs/udf/namei.c
fs/udf/partition.c
fs/udf/super.c
fs/udf/truncate.c
fs/udf/udf_sb.h
fs/udf/udfdecl.h
fs/udf/udftime.c
fs/udf/unicode.c
fs/ufs/ialloc.c
fs/ufs/inode.c
fs/xfs/xfs_iops.c
include/linux/aio.h
include/linux/cgroup.h
include/linux/dcache.h
include/linux/device-mapper.h
include/linux/dm-ioctl.h
include/linux/dm-kcopyd.h
include/linux/dm-log-userspace.h
include/linux/dma-mapping.h
include/linux/edac.h
include/linux/edac_mce.h [deleted file]
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/ext3_fs_sb.h
include/linux/fs.h
include/linux/hwspinlock.h
include/linux/i2c/twl4030-madc.h
include/linux/jbd.h
include/linux/jbd2.h
include/linux/jbd_common.h [new file with mode: 0644]
include/linux/magic.h
include/linux/memcontrol.h
include/linux/mfd/ab5500/ab5500.h [new file with mode: 0644]
include/linux/mfd/ab8500/gpadc.h
include/linux/mfd/abx500.h
include/linux/mfd/db5500-prcmu.h
include/linux/mfd/db8500-prcmu.h
include/linux/mfd/dbx500-prcmu.h [new file with mode: 0644]
include/linux/mfd/intel_msic.h [new file with mode: 0644]
include/linux/mfd/max8997-private.h
include/linux/mfd/mc13783.h
include/linux/mfd/mc13xxx.h
include/linux/mfd/pcf50633/core.h
include/linux/mfd/wm831x/core.h
include/linux/mfd/wm8994/core.h
include/linux/mfd/wm8994/pdata.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/namei.h
include/linux/netfilter_ipv4/Kbuild
include/linux/netfilter_ipv4/nf_nat.h [new file with mode: 0644]
include/linux/pps-gpio.h [new file with mode: 0644]
include/linux/rio_ids.h
include/linux/sem.h
include/linux/sh_pfc.h
include/linux/skbuff.h
include/linux/sysctl.h
include/linux/utsname.h
include/linux/virtio.h
include/linux/virtio_config.h
include/linux/virtio_mmio.h [new file with mode: 0644]
include/linux/virtio_ring.h
include/net/ip_vs.h
include/net/netfilter/nf_conntrack_tuple.h
include/net/netfilter/nf_nat.h
include/net/tcp.h
include/net/udp.h
include/trace/events/ext4.h
init/Kconfig
init/do_mounts.c
init/do_mounts_rd.c
ipc/sem.c
kernel/cgroup.c
kernel/cpuset.c
kernel/events/core.c
kernel/sys.c
kernel/utsname_sysctl.c
lib/idr.c
mm/huge_memory.c
mm/internal.h
mm/memcontrol.c
mm/memory.c
mm/page_cgroup.c
mm/shmem.c
mm/swap.c
mm/vmscan.c
net/8021q/vlan_dev.c
net/bridge/netfilter/ebt_ulog.c
net/core/neighbour.c
net/core/skbuff.c
net/dccp/ipv4.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_ULOG.c
net/ipv4/netfilter/nf_nat_snmp_basic.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv4/udplite.c
net/ipv6/netfilter.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/udplite.c
net/l2tp/l2tp_core.c
net/netfilter/core.c
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_dh.c
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_nfct.c
net/netfilter/ipvs/ip_vs_proto.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_proto_udp.c
net/netfilter/ipvs/ip_vs_sh.c
net/netfilter/ipvs/ip_vs_wrr.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nfnetlink_log.c
net/netfilter/xt_IDLETIMER.c
net/netfilter/xt_hashlimit.c
net/packet/af_packet.c
net/x25/af_x25.c
security/integrity/ima/Kconfig
security/tomoyo/common.c
sound/Kconfig
sound/mips/Kconfig
sound/soc/au1x/Kconfig

index fe2326906610d15c25b928000d343b7917046bce..66bd97a95f10e18fc9b2a4bff7acb691957cc6a3 100644 (file)
@@ -50,6 +50,13 @@ specify the GFP_ flags (see kmalloc) for the allocation (the
 implementation may choose to ignore flags that affect the location of
 the returned memory, like GFP_DMA).
 
+void *
+dma_zalloc_coherent(struct device *dev, size_t size,
+                            dma_addr_t *dma_handle, gfp_t flag)
+
+Wraps dma_alloc_coherent() and also zeroes the returned memory if the
+allocation attempt succeeded.
+
 void
 dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
                           dma_addr_t dma_handle)
index 06eb6d957c83097b85fd15e87e94b8ed7edfe1cf..cc0ebc5241b39f2e9513d89a39d880771cab9c67 100644 (file)
@@ -418,7 +418,6 @@ total_unevictable   - sum of all children's "unevictable"
 
 # The following additional stats are dependent on CONFIG_DEBUG_VM.
 
-inactive_ratio         - VM internal parameter. (see mm/page_alloc.c)
 recent_rotated_anon    - VM internal parameter. (see mm/vmscan.c)
 recent_rotated_file    - VM internal parameter. (see mm/vmscan.c)
 recent_scanned_anon    - VM internal parameter. (see mm/vmscan.c)
index 994dd75475a63a851809c00f972b9c2bbdfba8e1..c155ac569c440b49c336d763a3ca30d25afde9db 100644 (file)
@@ -48,7 +48,7 @@ kernel and userspace, 'connector' is used as the interface for
 communication.
 
 There are currently two userspace log implementations that leverage this
-framework - "clustered_disk" and "clustered_core".  These implementations
+framework - "clustered-disk" and "clustered-core".  These implementations
 provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
 can be used in a shared-storage environment when the cluster log implementations
 are employed.
diff --git a/Documentation/device-mapper/persistent-data.txt b/Documentation/device-mapper/persistent-data.txt
new file mode 100644 (file)
index 0000000..0e5df9b
--- /dev/null
@@ -0,0 +1,84 @@
+Introduction
+============
+
+The more-sophisticated device-mapper targets require complex metadata
+that is managed in kernel.  In late 2010 we were seeing that various
+different targets were rolling their own data strutures, for example:
+
+- Mikulas Patocka's multisnap implementation
+- Heinz Mauelshagen's thin provisioning target
+- Another btree-based caching target posted to dm-devel
+- Another multi-snapshot target based on a design of Daniel Phillips
+
+Maintaining these data structures takes a lot of work, so if possible
+we'd like to reduce the number.
+
+The persistent-data library is an attempt to provide a re-usable
+framework for people who want to store metadata in device-mapper
+targets.  It's currently used by the thin-provisioning target and an
+upcoming hierarchical storage target.
+
+Overview
+========
+
+The main documentation is in the header files which can all be found
+under drivers/md/persistent-data.
+
+The block manager
+-----------------
+
+dm-block-manager.[hc]
+
+This provides access to the data on disk in fixed sized-blocks.  There
+is a read/write locking interface to prevent concurrent accesses, and
+keep data that is being used in the cache.
+
+Clients of persistent-data are unlikely to use this directly.
+
+The transaction manager
+-----------------------
+
+dm-transaction-manager.[hc]
+
+This restricts access to blocks and enforces copy-on-write semantics.
+The only way you can get hold of a writable block through the
+transaction manager is by shadowing an existing block (ie. doing
+copy-on-write) or allocating a fresh one.  Shadowing is elided within
+the same transaction so performance is reasonable.  The commit method
+ensures that all data is flushed before it writes the superblock.
+On power failure your metadata will be as it was when last committed.
+
+The Space Maps
+--------------
+
+dm-space-map.h
+dm-space-map-metadata.[hc]
+dm-space-map-disk.[hc]
+
+On-disk data structures that keep track of reference counts of blocks.
+Also acts as the allocator of new blocks.  Currently two
+implementations: a simpler one for managing blocks on a different
+device (eg. thinly-provisioned data blocks); and one for managing
+the metadata space.  The latter is complicated by the need to store
+its own data within the space it's managing.
+
+The data structures
+-------------------
+
+dm-btree.[hc]
+dm-btree-remove.c
+dm-btree-spine.c
+dm-btree-internal.h
+
+Currently there is only one data structure, a hierarchical btree.
+There are plans to add more.  For example, something with an
+array-like interface would see a lot of use.
+
+The btree is 'hierarchical' in that you can define it to be composed
+of nested btrees, and take multiple keys.  For example, the
+thin-provisioning target uses a btree with two levels of nesting.
+The first maps a device id to a mapping tree, and that in turn maps a
+virtual block to a physical block.
+
+Values stored in the btrees can have arbitrary size.  Keys are always
+64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
new file mode 100644 (file)
index 0000000..801d9d1
--- /dev/null
@@ -0,0 +1,285 @@
+Introduction
+============
+
+This document descibes a collection of device-mapper targets that
+between them implement thin-provisioning and snapshots.
+
+The main highlight of this implementation, compared to the previous
+implementation of snapshots, is that it allows many virtual devices to
+be stored on the same data volume.  This simplifies administration and
+allows the sharing of data between volumes, thus reducing disk usage.
+
+Another significant feature is support for an arbitrary depth of
+recursive snapshots (snapshots of snapshots of snapshots ...).  The
+previous implementation of snapshots did this by chaining together
+lookup tables, and so performance was O(depth).  This new
+implementation uses a single data structure to avoid this degradation
+with depth.  Fragmentation may still be an issue, however, in some
+scenarios.
+
+Metadata is stored on a separate device from data, giving the
+administrator some freedom, for example to:
+
+- Improve metadata resilience by storing metadata on a mirrored volume
+  but data on a non-mirrored one.
+
+- Improve performance by storing the metadata on SSD.
+
+Status
+======
+
+These targets are very much still in the EXPERIMENTAL state.  Please
+do not yet rely on them in production.  But do experiment and offer us
+feedback.  Different use cases will have different performance
+characteristics, for example due to fragmentation of the data volume.
+
+If you find this software is not performing as expected please mail
+dm-devel@redhat.com with details and we'll try our best to improve
+things for you.
+
+Userspace tools for checking and repairing the metadata are under
+development.
+
+Cookbook
+========
+
+This section describes some quick recipes for using thin provisioning.
+They use the dmsetup program to control the device-mapper driver
+directly.  End users will be advised to use a higher-level volume
+manager such as LVM2 once support has been added.
+
+Pool device
+-----------
+
+The pool device ties together the metadata volume and the data volume.
+It maps I/O linearly to the data volume and updates the metadata via
+two mechanisms:
+
+- Function calls from the thin targets
+
+- Device-mapper 'messages' from userspace which control the creation of new
+  virtual devices amongst other things.
+
+Setting up a fresh pool device
+------------------------------
+
+Setting up a pool device requires a valid metadata device, and a
+data device.  If you do not have an existing metadata device you can
+make one by zeroing the first 4k to indicate empty metadata.
+
+    dd if=/dev/zero of=$metadata_dev bs=4096 count=1
+
+The amount of metadata you need will vary according to how many blocks
+are shared between thin devices (i.e. through snapshots).  If you have
+less sharing than average you'll need a larger-than-average metadata device.
+
+As a guide, we suggest you calculate the number of bytes to use in the
+metadata device as 48 * $data_dev_size / $data_block_size but round it up
+to 2MB if the answer is smaller.  The largest size supported is 16GB.
+
+If you're creating large numbers of snapshots which are recording large
+amounts of change, you may need find you need to increase this.
+
+Reloading a pool table
+----------------------
+
+You may reload a pool's table, indeed this is how the pool is resized
+if it runs out of space.  (N.B. While specifying a different metadata
+device when reloading is not forbidden at the moment, things will go
+wrong if it does not route I/O to exactly the same on-disk location as
+previously.)
+
+Using an existing pool device
+-----------------------------
+
+    dmsetup create pool \
+       --table "0 20971520 thin-pool $metadata_dev $data_dev \
+                $data_block_size $low_water_mark"
+
+$data_block_size gives the smallest unit of disk space that can be
+allocated at a time expressed in units of 512-byte sectors.  People
+primarily interested in thin provisioning may want to use a value such
+as 1024 (512KB).  People doing lots of snapshotting may want a smaller value
+such as 128 (64KB).  If you are not zeroing newly-allocated data,
+a larger $data_block_size in the region of 256000 (128MB) is suggested.
+$data_block_size must be the same for the lifetime of the
+metadata device.
+
+$low_water_mark is expressed in blocks of size $data_block_size.  If
+free space on the data device drops below this level then a dm event
+will be triggered which a userspace daemon should catch allowing it to
+extend the pool device.  Only one such event will be sent.
+Resuming a device with a new table itself triggers an event so the
+userspace daemon can use this to detect a situation where a new table
+already exceeds the threshold.
+
+Thin provisioning
+-----------------
+
+i) Creating a new thinly-provisioned volume.
+
+  To create a new thinly- provisioned volume you must send a message to an
+  active pool device, /dev/mapper/pool in this example.
+
+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+  Here '0' is an identifier for the volume, a 24-bit number.  It's up
+  to the caller to allocate and manage these identifiers.  If the
+  identifier is already in use, the message will fail with -EEXIST.
+
+ii) Using a thinly-provisioned volume.
+
+  Thinly-provisioned volumes are activated using the 'thin' target:
+
+    dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
+
+  The last parameter is the identifier for the thinp device.
+
+Internal snapshots
+------------------
+
+i) Creating an internal snapshot.
+
+  Snapshots are created with another message to the pool.
+
+  N.B.  If the origin device that you wish to snapshot is active, you
+  must suspend it before creating the snapshot to avoid corruption.
+  This is NOT enforced at the moment, so please be careful!
+
+    dmsetup suspend /dev/mapper/thin
+    dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
+    dmsetup resume /dev/mapper/thin
+
+  Here '1' is the identifier for the volume, a 24-bit number.  '0' is the
+  identifier for the origin device.
+
+ii) Using an internal snapshot.
+
+  Once created, the user doesn't have to worry about any connection
+  between the origin and the snapshot.  Indeed the snapshot is no
+  different from any other thinly-provisioned device and can be
+  snapshotted itself via the same method.  It's perfectly legal to
+  have only one of them active, and there's no ordering requirement on
+  activating or removing them both.  (This differs from conventional
+  device-mapper snapshots.)
+
+  Activate it exactly the same way as any other thinly-provisioned volume:
+
+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+
+Deactivation
+------------
+
+All devices using a pool must be deactivated before the pool itself
+can be.
+
+    dmsetup remove thin
+    dmsetup remove snap
+    dmsetup remove pool
+
+Reference
+=========
+
+'thin-pool' target
+------------------
+
+i) Constructor
+
+    thin-pool <metadata dev> <data dev> <data block size (sectors)> \
+             <low water mark (blocks)> [<number of feature args> [<arg>]*]
+
+    Optional feature arguments:
+    - 'skip_block_zeroing': skips the zeroing of newly-provisioned blocks.
+
+    Data block size must be between 64KB (128 sectors) and 1GB
+    (2097152 sectors) inclusive.
+
+
+ii) Status
+
+    <transaction id> <used metadata blocks>/<total metadata blocks>
+    <used data blocks>/<total data blocks> <held metadata root>
+
+
+    transaction id:
+       A 64-bit number used by userspace to help synchronise with metadata
+       from volume managers.
+
+    used data blocks / total data blocks
+       If the number of free blocks drops below the pool's low water mark a
+       dm event will be sent to userspace.  This event is edge-triggered and
+       it will occur only once after each resume so volume manager writers
+       should register for the event and then check the target's status.
+
+    held metadata root:
+       The location, in sectors, of the metadata root that has been
+       'held' for userspace read access.  '-' indicates there is no
+       held root.  This feature is not yet implemented so '-' is
+       always returned.
+
+iii) Messages
+
+    create_thin <dev id>
+
+       Create a new thinly-provisioned device.
+       <dev id> is an arbitrary unique 24-bit identifier chosen by
+       the caller.
+
+    create_snap <dev id> <origin id>
+
+       Create a new snapshot of another thinly-provisioned device.
+       <dev id> is an arbitrary unique 24-bit identifier chosen by
+       the caller.
+       <origin id> is the identifier of the thinly-provisioned device
+       of which the new device will be a snapshot.
+
+    delete <dev id>
+
+       Deletes a thin device.  Irreversible.
+
+    trim <dev id> <new size in sectors>
+
+       Delete mappings from the end of a thin device.  Irreversible.
+       You might want to use this if you're reducing the size of
+       your thinly-provisioned device.  In many cases, due to the
+       sharing of blocks between devices, it is not possible to
+       determine in advance how much space 'trim' will release.  (In
+       future a userspace tool might be able to perform this
+       calculation.)
+
+    set_transaction_id <current id> <new id>
+
+       Userland volume managers, such as LVM, need a way to
+       synchronise their external metadata with the internal metadata of the
+       pool target.  The thin-pool target offers to store an
+       arbitrary 64-bit transaction id and return it on the target's
+       status line.  To avoid races you must provide what you think
+       the current transaction id is when you change it with this
+       compare-and-swap message.
+
+'thin' target
+-------------
+
+i) Constructor
+
+    thin <pool dev> <dev id>
+
+    pool dev:
+       the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
+
+    dev id:
+       the internal device identifier of the device to be
+       activated.
+
+The pool doesn't store any size against the thin devices.  If you
+load a thin target that is smaller than you've been using previously,
+then you'll have no access to blocks mapped beyond the end.  If you
+load a target that is bigger than before, then extra blocks will be
+provisioned as and when needed.
+
+If you wish to reduce the size of your thin device and potentially
+regain some space then send the 'trim' message to the pool.
+
+ii) Status
+
+     <nr mapped sectors> <highest mapped sector>
diff --git a/Documentation/devicetree/bindings/virtio/mmio.txt b/Documentation/devicetree/bindings/virtio/mmio.txt
new file mode 100644 (file)
index 0000000..5069c1b
--- /dev/null
@@ -0,0 +1,17 @@
+* virtio memory mapped device
+
+See http://ozlabs.org/~rusty/virtio-spec/ for more details.
+
+Required properties:
+
+- compatible:  "virtio,mmio" compatibility string
+- reg:         control registers base address and size including configuration space
+- interrupts:  interrupt generated by the device
+
+Example:
+
+       virtio_block@3000 {
+               compatible = "virtio,mmio";
+               reg = <0x3000 0x100>;
+               interrupts = <41>;
+       }
index 7c799fc5b88e3525e611066d7be26c06226fd3db..3d849122b5b1bf345eac90465c988ff6bf1a0b68 100644 (file)
@@ -133,41 +133,6 @@ Who:       Pavel Machek <pavel@ucw.cz>
 
 ---------------------------
 
-What:  sys_sysctl
-When:  September 2010
-Option: CONFIG_SYSCTL_SYSCALL
-Why:   The same information is available in a more convenient from
-       /proc/sys, and none of the sysctl variables appear to be
-       important performance wise.
-
-       Binary sysctls are a long standing source of subtle kernel
-       bugs and security issues.
-
-       When I looked several months ago all I could find after
-       searching several distributions were 5 user space programs and
-       glibc (which falls back to /proc/sys) using this syscall.
-
-       The man page for sysctl(2) documents it as unusable for user
-       space programs.
-
-       sysctl(2) is not generally ABI compatible to a 32bit user
-       space application on a 64bit and a 32bit kernel.
-
-       For the last several months the policy has been no new binary
-       sysctls and no one has put forward an argument to use them.
-
-       Binary sysctls issues seem to keep happening appearing so
-       properly deprecating them (with a warning to user space) and a
-       2 year grace warning period will mean eventually we can kill
-       them and end the pain.
-
-       In the mean time individual binary sysctls can be dealt with
-       in a piecewise fashion.
-
-Who:   Eric Biederman <ebiederm@xmission.com>
-
----------------------------
-
 What:  /proc/<pid>/oom_adj
 When:  August 2012
 Why:   /proc/<pid>/oom_adj allows userspace to influence the oom killer's
index 653380793a6cf9d7e7cca9ba6b4da3384ce69623..d819ba16a0c7eb71c8b85201ef0bc59da37a4837 100644 (file)
@@ -29,6 +29,7 @@ d_hash                no              no              no              maybe
 d_compare:     yes             no              no              maybe
 d_delete:      no              yes             no              no
 d_release:     no              no              yes             no
+d_prune:        no              yes             no              no
 d_iput:                no              no              yes             no
 d_dname:       no              no              no              no
 d_automount:   no              no              yes             no
index 22f3a0eda1d22e430ebe350d7e952099b9a9e880..b100adc38adb9af3b03d831afa26f9318b3e2855 100644 (file)
@@ -73,14 +73,6 @@ nobarrier    (*)     This also requires an IO stack which can support
                        also be used to enable or disable barriers, for
                        consistency with other ext3 mount options.
 
-orlov          (*)     This enables the new Orlov block allocator. It is
-                       enabled by default.
-
-oldalloc               This disables the Orlov block allocator and enables
-                       the old block allocator.  Orlov should have better
-                       performance - we'd like to get some feedback if it's
-                       the contrary for you.
-
 user_xattr             Enables Extended User Attributes.  Additionally, you
                        need to have extended attribute support enabled in the
                        kernel configuration (CONFIG_EXT3_FS_XATTR).  See the
index 232a575a0c4857249edc5aa76a235ce4e258082f..4917cf24a5e0885518cf06a12e53d4057f5c91fa 100644 (file)
@@ -160,7 +160,9 @@ noload                      if the filesystem was not unmounted cleanly,
                        lead to any number of problems.
 
 data=journal           All data are committed into the journal prior to being
-                       written into the main file system.
+                       written into the main file system.  Enabling
+                       this mode will disable delayed allocation and
+                       O_DIRECT support.
 
 data=ordered   (*)     All data are forced directly out to the main file
                        system prior to its metadata being committed to the
@@ -201,30 +203,19 @@ inode_readahead_blks=n    This tuning parameter controls the maximum
                        table readahead algorithm will pre-read into
                        the buffer cache.  The default value is 32 blocks.
 
-orlov          (*)     This enables the new Orlov block allocator. It is
-                       enabled by default.
-
-oldalloc               This disables the Orlov block allocator and enables
-                       the old block allocator.  Orlov should have better
-                       performance - we'd like to get some feedback if it's
-                       the contrary for you.
-
-user_xattr             Enables Extended User Attributes.  Additionally, you
-                       need to have extended attribute support enabled in the
-                       kernel configuration (CONFIG_EXT4_FS_XATTR).  See the
-                       attr(5) manual page and http://acl.bestbits.at/ to
-                       learn more about extended attributes.
-
-nouser_xattr           Disables Extended User Attributes.
-
-acl                    Enables POSIX Access Control Lists support.
-                       Additionally, you need to have ACL support enabled in
-                       the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL).
-                       See the acl(5) manual page and http://acl.bestbits.at/
-                       for more information.
+nouser_xattr           Disables Extended User Attributes. If you have extended
+                       attribute support enabled in the kernel configuration
+                       (CONFIG_EXT4_FS_XATTR), extended attribute support
+                       is enabled by default on mount. See the attr(5) manual
+                       page and http://acl.bestbits.at/ for more information
+                       about extended attributes.
 
 noacl                  This option disables POSIX Access Control List
-                       support.
+                       support. If ACL support is enabled in the kernel
+                       configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is
+                       enabled by default on mount. See the acl(5) manual
+                       page and http://acl.bestbits.at/ for more information
+                       about acl.
 
 bsddf          (*)     Make 'df' act like BSD.
 minixdf                        Make 'df' act like Minix.
@@ -419,8 +410,8 @@ written to the journal first, and then to its final location.
 In the event of a crash, the journal can be replayed, bringing both data and
 metadata into a consistent state.  This mode is the slowest except when data
 needs to be read from and written to disk at the same time where it
-outperforms all others modes.  Currently ext4 does not have delayed
-allocation support if this data journalling mode is selected.
+outperforms all others modes.  Enabling this mode will disable delayed
+allocation and O_DIRECT support.
 
 /proc entries
 =============
index 7dcd1a4e726c40ceea1283dfcc328c494f12d26d..a903ee5e977643e945539d66d4e11e8c6354fb89 100644 (file)
@@ -39,23 +39,20 @@ independent, drivers.
      in case an unused hwspinlock isn't available. Users of this
      API will usually want to communicate the lock's id to the remote core
      before it can be used to achieve synchronization.
-     Can be called from an atomic context (this function will not sleep) but
-     not from within interrupt context.
+     Should be called from a process context (might sleep).
 
   struct hwspinlock *hwspin_lock_request_specific(unsigned int id);
    - assign a specific hwspinlock id and return its address, or NULL
      if that hwspinlock is already in use. Usually board code will
      be calling this function in order to reserve specific hwspinlock
      ids for predefined purposes.
-     Can be called from an atomic context (this function will not sleep) but
-     not from within interrupt context.
+     Should be called from a process context (might sleep).
 
   int hwspin_lock_free(struct hwspinlock *hwlock);
    - free a previously-assigned hwspinlock; returns 0 on success, or an
      appropriate error code on failure (e.g. -EINVAL if the hwspinlock
      is already free).
-     Can be called from an atomic context (this function will not sleep) but
-     not from within interrupt context.
+     Should be called from a process context (might sleep).
 
   int hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int timeout);
    - lock a previously-assigned hwspinlock with a timeout limit (specified in
@@ -230,45 +227,62 @@ int hwspinlock_example2(void)
 
 4. API for implementors
 
-  int hwspin_lock_register(struct hwspinlock *hwlock);
+  int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+               const struct hwspinlock_ops *ops, int base_id, int num_locks);
    - to be called from the underlying platform-specific implementation, in
-     order to register a new hwspinlock instance. Can be called from an atomic
-     context (this function will not sleep) but not from within interrupt
-     context. Returns 0 on success, or appropriate error code on failure.
+     order to register a new hwspinlock device (which is usually a bank of
+     numerous locks). Should be called from a process context (this function
+     might sleep).
+     Returns 0 on success, or appropriate error code on failure.
 
-  struct hwspinlock *hwspin_lock_unregister(unsigned int id);
+  int hwspin_lock_unregister(struct hwspinlock_device *bank);
    - to be called from the underlying vendor-specific implementation, in order
-     to unregister an existing (and unused) hwspinlock instance.
-     Can be called from an atomic context (will not sleep) but not from
-     within interrupt context.
+     to unregister an hwspinlock device (which is usually a bank of numerous
+     locks).
+     Should be called from a process context (this function might sleep).
      Returns the address of hwspinlock on success, or NULL on error (e.g.
      if the hwspinlock is sill in use).
 
-5. struct hwspinlock
+5. Important structs
 
-This struct represents an hwspinlock instance. It is registered by the
-underlying hwspinlock implementation using the hwspin_lock_register() API.
+struct hwspinlock_device is a device which usually contains a bank
+of hardware locks. It is registered by the underlying hwspinlock
+implementation using the hwspin_lock_register() API.
 
 /**
- * struct hwspinlock - vendor-specific hwspinlock implementation
- *
- * @dev: underlying device, will be used with runtime PM api
- * @ops: vendor-specific hwspinlock handlers
- * @id: a global, unique, system-wide, index of the lock.
- * @lock: initialized and used by hwspinlock core
- * @owner: underlying implementation module, used to maintain module ref count
+ * struct hwspinlock_device - a device which usually spans numerous hwspinlocks
+ * @dev: underlying device, will be used to invoke runtime PM api
+ * @ops: platform-specific hwspinlock handlers
+ * @base_id: id index of the first lock in this device
+ * @num_locks: number of locks in this device
+ * @lock: dynamically allocated array of 'struct hwspinlock'
  */
-struct hwspinlock {
+struct hwspinlock_device {
        struct device *dev;
        const struct hwspinlock_ops *ops;
-       int id;
+       int base_id;
+       int num_locks;
+       struct hwspinlock lock[0];
+};
+
+struct hwspinlock_device contains an array of hwspinlock structs, each
+of which represents a single hardware lock:
+
+/**
+ * struct hwspinlock - this struct represents a single hwspinlock instance
+ * @bank: the hwspinlock_device structure which owns this lock
+ * @lock: initialized and used by hwspinlock core
+ * @priv: private data, owned by the underlying platform-specific hwspinlock drv
+ */
+struct hwspinlock {
+       struct hwspinlock_device *bank;
        spinlock_t lock;
-       struct module *owner;
+       void *priv;
 };
 
-The underlying implementation is responsible to assign the dev, ops, id and
-owner members. The lock member, OTOH, is initialized and used by the hwspinlock
-core.
+When registering a bank of locks, the hwspinlock driver only needs to
+set the priv members of the locks. The rest of the members are set and
+initialized by the hwspinlock core itself.
 
 6. Implementation callbacks
 
index 4ccdbca038115140a4aa95d668065935b29cdd8a..f2a2488f1bf33d8290384c4823fccee2b39afe36 100644 (file)
@@ -15,6 +15,23 @@ amemthresh - INTEGER
         enabled and the variable is automatically set to 2, otherwise
         the strategy is disabled and the variable is  set  to 1.
 
+conntrack - BOOLEAN
+       0 - disabled (default)
+       not 0 - enabled
+
+       If set, maintain connection tracking entries for
+       connections handled by IPVS.
+
+       This should be enabled if connections handled by IPVS are to be
+       also handled by stateful firewall rules. That is, iptables rules
+       that make use of connection tracking.  It is a performance
+       optimisation to disable this setting otherwise.
+
+       Connections handled by the IPVS FTP application module
+       will have connection tracking entries regardless of this setting.
+
+       Only available when IPVS is compiled with CONFIG_IP_VS_NFCT enabled.
+
 cache_bypass - BOOLEAN
         0 - disabled (default)
         not 0 - enabled
@@ -39,7 +56,7 @@ debug_level - INTEGER
        11         - IPVS packet handling (ip_vs_in/ip_vs_out)
        12 or more - packet traversal
 
-       Only available when IPVS is compiled with the CONFIG_IPVS_DEBUG
+       Only available when IPVS is compiled with CONFIG_IP_VS_DEBUG enabled.
 
        Higher debugging levels include the messages for lower debugging
        levels, so setting debug level 2, includes level 0, 1 and 2
@@ -123,13 +140,11 @@ nat_icmp_send - BOOLEAN
 secure_tcp - INTEGER
         0  - disabled (default)
 
-        The secure_tcp defense is to use a more complicated state
-        transition table and some possible short timeouts of each
-        state. In the VS/NAT, it delays the entering the ESTABLISHED
-        until the real server starts to send data and ACK packet
-        (after 3-way handshake).
+       The secure_tcp defense is to use a more complicated TCP state
+       transition table. For VS/NAT, it also delays entering the
+       TCP ESTABLISHED state until the three way handshake is completed.
 
-        The value definition is the same as that of drop_entry or
+        The value definition is the same as that of drop_entry and
         drop_packet.
 
 sync_threshold - INTEGER
@@ -141,3 +156,36 @@ sync_threshold - INTEGER
         synchronized, every time the number of its incoming packets
         modulus 50 equals the threshold. The range of the threshold is
         from 0 to 49.
+
+snat_reroute - BOOLEAN
+       0 - disabled
+       not 0 - enabled (default)
+
+       If enabled, recalculate the route of SNATed packets from
+       realservers so that they are routed as if they originate from the
+       director. Otherwise they are routed as if they are forwarded by the
+       director.
+
+       If policy routing is in effect then it is possible that the route
+       of a packet originating from a director is routed differently to a
+       packet being forwarded by the director.
+
+       If policy routing is not in effect then the recalculated route will
+       always be the same as the original route so it is an optimisation
+       to disable snat_reroute and avoid the recalculation.
+
+sync_version - INTEGER
+       default 1
+
+       The version of the synchronisation protocol used when sending
+       synchronisation messages.
+
+       0 selects the original synchronisation protocol (version 0). This
+       should be used when sending synchronisation messages to a legacy
+       system that only understands the original synchronisation protocol.
+
+       1 selects the current synchronisation protocol (version 1). This
+       should be used where possible.
+
+       Kernels with this sync_version entry are able to receive messages
+       of both version 1 and version 2 of the synchronisation protocol.
index be70ee15f8cab5d1b10ad1f792cb0650925dc0b2..c75694b35d08b7f6f70290a1b715658f4d6c7156 100644 (file)
@@ -144,7 +144,7 @@ and the default device ID in order to access the device on the active port.
 
 After the host has completed enumeration of the entire network it releases
 devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
-in the system, it sets the Master Enable bit in the Port General Control CSR
+in the system, it sets the Discovered bit in the Port General Control CSR
 to indicate that enumeration is completed and agents are allowed to execute
 passive discovery of the network.
 
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
new file mode 100644 (file)
index 0000000..335f3c6
--- /dev/null
@@ -0,0 +1,49 @@
+RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
+=========================================================================
+
+I. Overview
+
+This driver implements all currently defined RapidIO mport callback functions.
+It supports maintenance read and write operations, inbound and outbound RapidIO
+doorbells, inbound maintenance port-writes and RapidIO messaging.
+
+To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
+channels. This mechanism provides access to larger range of hop counts and
+destination IDs without need for changes in outbound window translation.
+
+RapidIO messaging support uses dedicated messaging channels for each mailbox.
+For inbound messages this driver uses destination ID matching to forward messages
+into the corresponding message queue. Messaging callbacks are implemented to be
+fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
+
+II. Known problems
+
+  None.
+
+III. To do
+
+ Add DMA data transfers (non-messaging).
+ Add inbound region (SRIO-to-PCIe) mapping.
+
+IV. Version History
+
+  1.0.0 - Initial driver release.
+
+V.  License
+-----------------------------------------------
+
+  Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the Free
+  Software Foundation; either version 2 of the License, or (at your option)
+  any later version.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
index 5d0fc8bfcdb9b608bccf03855c33a4d01386d270..77dfecf4e2d6e7ec310b145496f494f9aac25b08 100644 (file)
 
   ______________________________________________________________________
 
-  1\b1.\b.  I\bIn\bnt\btr\bro\bod\bdu\buc\bct\bti\bio\bon\bn
+  1.  Introduction
 
   Welcome to User Mode Linux.  It's going to be fun.
 
 
 
-  1\b1.\b.1\b1.\b.  H\bHo\bow\bw i\bis\bs U\bUs\bse\ber\br M\bMo\bod\bde\be L\bLi\bin\bnu\bux\bx D\bDi\bif\bff\bfe\ber\bre\ben\bnt\bt?\b?
+  1.1.  How is User Mode Linux Different?
 
   Normally, the Linux Kernel talks straight to your hardware (video
   card, keyboard, hard drives, etc), and any programs which run ask the
 
 
 
-  1\b1.\b.2\b2.\b.  W\bWh\bhy\by W\bWo\bou\bul\bld\bd I\bI W\bWa\ban\bnt\bt U\bUs\bse\ber\br M\bMo\bod\bde\be L\bLi\bin\bnu\bux\bx?\b?
+  1.2.  Why Would I Want User Mode Linux?
 
 
   1. If User Mode Linux crashes, your host kernel is still fine.
 
 
 
-  2\b2.\b.  C\bCo\bom\bmp\bpi\bil\bli\bin\bng\bg t\bth\bhe\be k\bke\ber\brn\bne\bel\bl a\ban\bnd\bd m\bmo\bod\bdu\bul\ble\bes\bs
+  2.  Compiling the kernel and modules
 
 
 
 
-  2\b2.\b.1\b1.\b.  C\bCo\bom\bmp\bpi\bil\bli\bin\bng\bg t\bth\bhe\be k\bke\ber\brn\bne\bel\bl
+  2.1.  Compiling the kernel
 
 
   Compiling the user mode kernel is just like compiling any other
   bug fixes and enhancements that have gone into subsequent releases.
 
 
-  2\b2.\b.2\b2.\b.  C\bCo\bom\bmp\bpi\bil\bli\bin\bng\bg a\ban\bnd\bd i\bin\bns\bst\bta\bal\bll\bli\bin\bng\bg k\bke\ber\brn\bne\bel\bl m\bmo\bod\bdu\bul\ble\bes\bs
+  2.2.  Compiling and installing kernel modules
 
   UML modules are built in the same way as the native kernel (with the
   exception of the 'ARCH=um' that you always need for UML):
 
 
 
-  2\b2.\b.3\b3.\b.  C\bCo\bom\bmp\bpi\bil\bli\bin\bng\bg a\ban\bnd\bd i\bin\bns\bst\bta\bal\bll\bli\bin\bng\bg u\bum\bml\bl_\b_u\but\bti\bil\bli\bit\bti\bie\bes\bs
+  2.3.  Compiling and installing uml_utilities
 
   Many features of the UML kernel require a user-space helper program,
   so a uml_utilities package is distributed separately from the kernel
   patch which provides these helpers. Included within this is:
 
-  +\bo  port-helper - Used by consoles which connect to xterms or ports
+  o  port-helper - Used by consoles which connect to xterms or ports
 
-  +\bo  tunctl - Configuration tool to create and delete tap devices
+  o  tunctl - Configuration tool to create and delete tap devices
 
-  +\bo  uml_net - Setuid binary for automatic tap device configuration
+  o  uml_net - Setuid binary for automatic tap device configuration
 
-  +\bo  uml_switch - User-space virtual switch required for daemon
+  o  uml_switch - User-space virtual switch required for daemon
      transport
 
      The uml_utilities tree is compiled with:
 
 
 
-  3\b3.\b.  R\bRu\bun\bnn\bni\bin\bng\bg U\bUM\bML\bL a\ban\bnd\bd l\blo\bog\bgg\bgi\bin\bng\bg i\bin\bn
+  3.  Running UML and logging in
 
 
 
-  3\b3.\b.1\b1.\b.  R\bRu\bun\bnn\bni\bin\bng\bg U\bUM\bML\bL
+  3.1.  Running UML
 
   It runs on 2.2.15 or later, and all 2.4 kernels.
 
 
 
 
-  3\b3.\b.2\b2.\b.  L\bLo\bog\bgg\bgi\bin\bng\bg i\bin\bn
+  3.2.  Logging in
 
 
 
 
   There are a couple of other ways to log in:
 
-  +\bo  On a virtual console
+  o  On a virtual console
 
 
 
 
 
 
-  +\bo  Over the serial line
+  o  Over the serial line
 
 
      In the boot output, find a line that looks like:
 
 
 
-  +\bo  Over the net
+  o  Over the net
 
 
      If the network is running, then you can telnet to the virtual
   down and the process will exit.
 
 
-  3\b3.\b.3\b3.\b.  E\bEx\bxa\bam\bmp\bpl\ble\bes\bs
+  3.3.  Examples
 
   Here are some examples of UML in action:
 
-  +\bo  A login session <http://user-mode-linux.sourceforge.net/login.html>
+  o  A login session <http://user-mode-linux.sourceforge.net/login.html>
 
-  +\bo  A virtual network <http://user-mode-linux.sourceforge.net/net.html>
+  o  A virtual network <http://user-mode-linux.sourceforge.net/net.html>
 
 
 
 
 
 
-  4\b4.\b.  U\bUM\bML\bL o\bon\bn 2\b2G\bG/\b/2\b2G\bG h\bho\bos\bst\bts\bs
+  4.  UML on 2G/2G hosts
 
 
 
 
-  4\b4.\b.1\b1.\b.  I\bIn\bnt\btr\bro\bod\bdu\buc\bct\bti\bio\bon\bn
+  4.1.  Introduction
 
 
   Most Linux machines are configured so that the kernel occupies the
 
 
 
-  4\b4.\b.2\b2.\b.  T\bTh\bhe\be p\bpr\bro\bob\bbl\ble\bem\bm
+  4.2.  The problem
 
 
   The prebuilt UML binaries on this site will not run on 2G/2G hosts
 
 
 
-  4\b4.\b.3\b3.\b.  T\bTh\bhe\be s\bso\bol\blu\but\bti\bio\bon\bn
+  4.3.  The solution
 
 
   The fix for this is to rebuild UML from source after enabling
 
 
 
-  5\b5.\b.  S\bSe\bet\btt\bti\bin\bng\bg u\bup\bp s\bse\ber\bri\bia\bal\bl l\bli\bin\bne\bes\bs a\ban\bnd\bd c\bco\bon\bns\bso\bol\ble\bes\bs
+  5.  Setting up serial lines and consoles
 
 
   It is possible to attach UML serial lines and consoles to many types
   You can attach them to host ptys, ttys, file descriptors, and ports.
   This allows you to do things like
 
-  +\bo  have a UML console appear on an unused host console,
+  o  have a UML console appear on an unused host console,
 
-  +\bo  hook two virtual machines together by having one attach to a pty
+  o  hook two virtual machines together by having one attach to a pty
      and having the other attach to the corresponding tty
 
-  +\bo  make a virtual machine accessible from the net by attaching a
+  o  make a virtual machine accessible from the net by attaching a
      console to a port on the host.
 
 
 
 
 
-  5\b5.\b.1\b1.\b.  S\bSp\bpe\bec\bci\bif\bfy\byi\bin\bng\bg t\bth\bhe\be d\bde\bev\bvi\bic\bce\be
+  5.1.  Specifying the device
 
   Devices are specified with "con" or "ssl" (console or serial line,
   respectively), optionally with a device number if you are talking
 
 
 
-  5\b5.\b.2\b2.\b.  S\bSp\bpe\bec\bci\bif\bfy\byi\bin\bng\bg t\bth\bhe\be c\bch\bha\ban\bnn\bne\bel\bl
+  5.2.  Specifying the channel
 
   There are a number of different types of channels to attach a UML
   device to, each with a different way of specifying exactly what to
   attach to.
 
-  +\bo  pseudo-terminals - device=pty pts terminals - device=pts
+  o  pseudo-terminals - device=pty pts terminals - device=pts
 
 
      This will cause UML to allocate a free host pseudo-terminal for the
      log.  You access it by attaching a terminal program to the
      corresponding tty:
 
-  +\bo  screen /dev/pts/n
+  o  screen /dev/pts/n
 
-  +\bo  screen /dev/ttyxx
+  o  screen /dev/ttyxx
 
-  +\bo  minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
+  o  minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
      devices
 
-  +\bo  kermit - start it up, 'open' the device, then 'connect'
+  o  kermit - start it up, 'open' the device, then 'connect'
 
 
 
 
 
-  +\bo  terminals - device=tty:tty device file
+  o  terminals - device=tty:tty device file
 
 
      This will make UML attach the device to the specified tty (i.e
 
 
 
-  +\bo  xterms - device=xterm
+  o  xterms - device=xterm
 
 
      UML will run an xterm and the device will be attached to it.
 
 
 
-  +\bo  Port - device=port:port number
+  o  Port - device=port:port number
 
 
      This will attach the UML devices to the specified host port.
 
 
 
-  +\bo  already-existing file descriptors - device=file descriptor
+  o  already-existing file descriptors - device=file descriptor
 
 
      If you set up a file descriptor on the UML command line, you can
 
 
 
-  +\bo  Nothing - device=null
+  o  Nothing - device=null
 
 
      This allows the device to be opened, in contrast to 'none', but
 
 
 
-  +\bo  None - device=none
+  o  None - device=none
 
 
      This causes the device to disappear.
 
 
 
-  will cause serial line 3 to accept input on the host's /dev/tty3 and
+  will cause serial line 3 to accept input on the host's /dev/tty2 and
   display output on an xterm.  That's a silly example - the most common
   use of this syntax is to reattach the main console to stdin and stdout
   as shown above.
 
 
 
-  5\b5.\b.3\b3.\b.  E\bEx\bxa\bam\bmp\bpl\ble\bes\bs
+  5.3.  Examples
 
   There are a number of interesting things you can do with this
   capability.
   prompt of the other virtual machine.
 
 
-  6\b6.\b.  S\bSe\bet\btt\bti\bin\bng\bg u\bup\bp t\bth\bhe\be n\bne\bet\btw\bwo\bor\brk\bk
+  6.  Setting up the network
 
 
 
   There are currently five transport types available for a UML virtual
   machine to exchange packets with other hosts:
 
-  +\bo  ethertap
+  o  ethertap
 
-  +\bo  TUN/TAP
+  o  TUN/TAP
 
-  +\bo  Multicast
+  o  Multicast
 
-  +\bo  a switch daemon
+  o  a switch daemon
 
-  +\bo  slip
+  o  slip
 
-  +\bo  slirp
+  o  slirp
 
-  +\bo  pcap
+  o  pcap
 
      The TUN/TAP, ethertap, slip, and slirp transports allow a UML
      instance to exchange packets with the host.  They may be directed
   With so many host transports, which one should you use?  Here's when
   you should use each one:
 
-  +\bo  ethertap - if you want access to the host networking and it is
+  o  ethertap - if you want access to the host networking and it is
      running 2.2
 
-  +\bo  TUN/TAP - if you want access to the host networking and it is
+  o  TUN/TAP - if you want access to the host networking and it is
      running 2.4.  Also, the TUN/TAP transport is able to use a
      preconfigured device, allowing it to avoid using the setuid uml_net
      helper, which is a security advantage.
 
-  +\bo  Multicast - if you want a purely virtual network and you don't want
+  o  Multicast - if you want a purely virtual network and you don't want
      to set up anything but the UML
 
-  +\bo  a switch daemon - if you want a purely virtual network and you
+  o  a switch daemon - if you want a purely virtual network and you
      don't mind running the daemon in order to get somewhat better
      performance
 
-  +\bo  slip - there is no particular reason to run the slip backend unless
+  o  slip - there is no particular reason to run the slip backend unless
      ethertap and TUN/TAP are just not available for some reason
 
-  +\bo  slirp - if you don't have root access on the host to setup
+  o  slirp - if you don't have root access on the host to setup
      networking, or if you don't want to allocate an IP to your UML
 
-  +\bo  pcap - not much use for actual network connectivity, but great for
+  o  pcap - not much use for actual network connectivity, but great for
      monitoring traffic on the host
 
      Ethertap is available on 2.4 and works fine.  TUN/TAP is preferred
      exploit the helper's root privileges.
 
 
-  6\b6.\b.1\b1.\b.  G\bGe\ben\bne\ber\bra\bal\bl s\bse\bet\btu\bup\bp
+  6.1.  General setup
 
   First, you must have the virtual network enabled in your UML.  If are
   running a prebuilt kernel from this site, everything is already
 
 
 
-  6\b6.\b.2\b2.\b.  U\bUs\bse\ber\brs\bsp\bpa\bac\bce\be d\bda\bae\bem\bmo\bon\bns\bs
+  6.2.  Userspace daemons
 
   You will likely need the setuid helper, or the switch daemon, or both.
   They are both installed with the RPM and deb, so if you've installed
 
 
 
-  6\b6.\b.3\b3.\b.  S\bSp\bpe\bec\bci\bif\bfy\byi\bin\bng\bg e\bet\bth\bhe\ber\brn\bne\bet\bt a\bad\bdd\bdr\bre\bes\bss\bse\bes\bs
+  6.3.  Specifying ethernet addresses
 
   Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
   allow you to specify hardware addresses for the virtual ethernet
   sufficient to guarantee a unique hardware address for the device.  A
   couple of exceptions are:
 
-  +\bo  Another set of virtual ethernet devices are on the same network and
+  o  Another set of virtual ethernet devices are on the same network and
      they are assigned hardware addresses using a different scheme which
      may conflict with the UML IP address-based scheme
 
-  +\bo  You aren't going to use the device for IP networking, so you don't
+  o  You aren't going to use the device for IP networking, so you don't
      assign the device an IP address
 
      If you let the driver provide the hardware address, you should make
 
 
 
-  6\b6.\b.4\b4.\b.  U\bUM\bML\bL i\bin\bnt\bte\ber\brf\bfa\bac\bce\be s\bse\bet\btu\bup\bp
+  6.4.  UML interface setup
 
   Once the network devices have been described on the command line, you
   should boot UML and log in.
 
 
 
-  6\b6.\b.5\b5.\b.  M\bMu\bul\blt\bti\bic\bca\bas\bst\bt
+  6.5.  Multicast
 
   The simplest way to set up a virtual network between multiple UMLs is
   to use the mcast transport.  This was written by Harald Welte and is
 
 
 
-  6\b6.\b.6\b6.\b.  T\bTU\bUN\bN/\b/T\bTA\bAP\bP w\bwi\bit\bth\bh t\bth\bhe\be u\bum\bml\bl_\b_n\bne\bet\bt h\bhe\bel\blp\bpe\ber\br
+  6.6.  TUN/TAP with the uml_net helper
 
   TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
   host.  The TUN/TAP backend has been in UML since 2.4.9-3um.
   There are a couple potential problems with running the TUN/TAP
   transport on a 2.4 host kernel
 
-  +\bo  TUN/TAP seems not to work on 2.4.3 and earlier.  Upgrade the host
+  o  TUN/TAP seems not to work on 2.4.3 and earlier.  Upgrade the host
      kernel or use the ethertap transport.
 
-  +\bo  With an upgraded kernel, TUN/TAP may fail with
+  o  With an upgraded kernel, TUN/TAP may fail with
 
 
        File descriptor in bad state
 
 
 
-  6\b6.\b.7\b7.\b.  T\bTU\bUN\bN/\b/T\bTA\bAP\bP w\bwi\bit\bth\bh a\ba p\bpr\bre\bec\bco\bon\bnf\bfi\big\bgu\bur\bre\bed\bd t\bta\bap\bp d\bde\bev\bvi\bic\bce\be
+  6.7.  TUN/TAP with a preconfigured tap device
 
   If you prefer not to have UML use uml_net (which is somewhat
   insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
   there is no need for root assistance.  Setting up the device is done
   as follows:
 
-  +\bo  Create the device with tunctl (available from the UML utilities
+  o  Create the device with tunctl (available from the UML utilities
      tarball)
 
 
   where uid is the user id or username that UML will be run as.  This
   will tell you what device was created.
 
-  +\bo  Configure the device IP (change IP addresses and device name to
+  o  Configure the device IP (change IP addresses and device name to
      suit)
 
 
 
 
 
-  +\bo  Set up routing and arping if desired - this is my recipe, there are
+  o  Set up routing and arping if desired - this is my recipe, there are
      other ways of doing the same thing
 
 
   utility which reads the information from a config file and sets up
   devices at boot time.
 
-  +\bo  Rather than using up two IPs and ARPing for one of them, you can
+  o  Rather than using up two IPs and ARPing for one of them, you can
      also provide direct access to your LAN by the UML by using a
      bridge.
 
   Note that 'br0' should be setup using ifconfig with the existing IP
   address of eth0, as eth0 no longer has its own IP.
 
-  +\bo
+  o
 
 
      Also, the /dev/net/tun device must be writable by the user running
   devices and chgrp /dev/net/tun to that group with mode 664 or 660.
 
 
-  +\bo  Once the device is set up, run UML with 'eth0=tuntap,device name'
+  o  Once the device is set up, run UML with 'eth0=tuntap,device name'
      (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
      mconsole config command).
 
-  +\bo  Bring the eth device up in UML and you're in business.
+  o  Bring the eth device up in UML and you're in business.
 
      If you don't want that tap device any more, you can make it non-
      persistent with
 
 
 
-  6\b6.\b.8\b8.\b.  E\bEt\bth\bhe\ber\brt\bta\bap\bp
+  6.8.  Ethertap
 
   Ethertap is the general mechanism on 2.2 for userspace processes to
   exchange packets with the kernel.
 
 
 
-  6\b6.\b.9\b9.\b.  T\bTh\bhe\be s\bsw\bwi\bit\btc\bch\bh d\bda\bae\bem\bmo\bon\bn
+  6.9.  The switch daemon
 
-  N\bNo\bot\bte\be: This is the daemon formerly known as uml_router, but which was
+  Note: This is the daemon formerly known as uml_router, but which was
   renamed so the network weenies of the world would stop growling at me.
 
 
 
 
 
-  6\b6.\b.1\b10\b0.\b.  S\bSl\bli\bip\bp
+  6.10.  Slip
 
   Slip is another, less general, mechanism for a process to communicate
   with the host networking.  In contrast to the ethertap interface,
 
 
 
-  6\b6.\b.1\b11\b1.\b.  S\bSl\bli\bir\brp\bp
+  6.11.  Slirp
 
   slirp uses an external program, usually /usr/bin/slirp, to provide IP
   only networking connectivity through the host. This is similar to IP
 
 
 
-  6\b6.\b.1\b12\b2.\b.  p\bpc\bca\bap\bp
+  6.12.  pcap
 
   The pcap transport is attached to a UML ethernet device on the command
   line or with uml_mconsole with the following syntax:
 
 
 
-  6\b6.\b.1\b13\b3.\b.  S\bSe\bet\btt\bti\bin\bng\bg u\bup\bp t\bth\bhe\be h\bho\bos\bst\bt y\byo\bou\bur\brs\bse\bel\blf\bf
+  6.13.  Setting up the host yourself
 
   If you don't specify an address for the host side of the ethertap or
   slip device, UML won't do any setup on the host.  So this is what is
   192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
   own network):
 
-  +\bo  The device needs to be configured with its IP address.  Tap devices
+  o  The device needs to be configured with its IP address.  Tap devices
      are also configured with an mtu of 1484.  Slip devices are
      configured with a point-to-point address pointing at the UML ip
      address.
 
 
 
-  +\bo  If a tap device is being set up, a route is set to the UML IP.
+  o  If a tap device is being set up, a route is set to the UML IP.
 
 
        UML# route add -host 192.168.0.250 gw 192.168.0.251
 
 
 
-  +\bo  To allow other hosts on your network to see the virtual machine,
+  o  To allow other hosts on your network to see the virtual machine,
      proxy arp is set up for it.
 
 
 
 
 
-  +\bo  Finally, the host is set up to route packets.
+  o  Finally, the host is set up to route packets.
 
 
        host#  echo 1 > /proc/sys/net/ipv4/ip_forward
 
 
 
-  7\b7.\b.  S\bSh\bha\bar\bri\bin\bng\bg F\bFi\bil\ble\bes\bsy\bys\bst\bte\bem\bms\bs b\bbe\bet\btw\bwe\bee\ben\bn V\bVi\bir\brt\btu\bua\bal\bl M\bMa\bac\bch\bhi\bin\bne\bes\bs
+  7.  Sharing Filesystems between Virtual Machines
 
 
 
 
-  7\b7.\b.1\b1.\b.  A\bA w\bwa\bar\brn\bni\bin\bng\bg
+  7.1.  A warning
 
   Don't attempt to share filesystems simply by booting two UMLs from the
   same file.  That's the same thing as booting two physical machines
 
 
 
-  7\b7.\b.2\b2.\b.  U\bUs\bsi\bin\bng\bg l\bla\bay\bye\ber\bre\bed\bd b\bbl\blo\boc\bck\bk d\bde\bev\bvi\bic\bce\bes\bs
+  7.2.  Using layered block devices
 
   The way to share a filesystem between two virtual machines is to use
   the copy-on-write (COW) layering capability of the ubd block driver.
 
 
 
-  7\b7.\b.3\b3.\b.  N\bNo\bot\bte\be!\b!
+  7.3.  Note!
 
   When checking the size of the COW file in order to see the gobs of
   space that you're saving, make sure you use 'ls -ls' to see the actual
 
 
 
-  7\b7.\b.4\b4.\b.  A\bAn\bno\bot\bth\bhe\ber\br w\bwa\bar\brn\bni\bin\bng\bg
+  7.4.  Another warning
 
   Once a filesystem is being used as a readonly backing file for a COW
   file, do not boot directly from it or modify it in any way.  Doing so
 
 
 
-  7\b7.\b.5\b5.\b.  u\bum\bml\bl_\b_m\bmo\boo\bo :\b: M\bMe\ber\brg\bgi\bin\bng\bg a\ba C\bCO\bOW\bW f\bfi\bil\ble\be w\bwi\bit\bth\bh i\bit\bts\bs b\bba\bac\bck\bki\bin\bng\bg f\bfi\bil\ble\be
+  7.5.  uml_moo : Merging a COW file with its backing file
 
   Depending on how you use UML and COW devices, it may be advisable to
   merge the changes in the COW file into the backing file every once in
 
 
 
-  8\b8.\b.  C\bCr\bre\bea\bat\bti\bin\bng\bg f\bfi\bil\ble\bes\bsy\bys\bst\bte\bem\bms\bs
+  8.  Creating filesystems
 
 
   You may want to create and mount new UML filesystems, either because
   should be easy to translate to the filesystem of your choice.
 
 
-  8\b8.\b.1\b1.\b.  C\bCr\bre\bea\bat\bte\be t\bth\bhe\be f\bfi\bil\ble\bes\bsy\bys\bst\bte\bem\bm f\bfi\bil\ble\be
+  8.1.  Create the filesystem file
 
   dd is your friend.  All you need to do is tell dd to create an empty
   file of the appropriate size.  I usually make it sparse to save time
 
 
 
-  8\b8.\b.2\b2.\b.  A\bAs\bss\bsi\big\bgn\bn t\bth\bhe\be f\bfi\bil\ble\be t\bto\bo a\ba U\bUM\bML\bL d\bde\bev\bvi\bic\bce\be
+  8.2.  Assign the file to a UML device
 
   Add an argument like the following to the UML command line:
 
 
 
 
-  8\b8.\b.3\b3.\b.  C\bCr\bre\bea\bat\bti\bin\bng\bg a\ban\bnd\bd m\bmo\bou\bun\bnt\bti\bin\bng\bg t\bth\bhe\be f\bfi\bil\ble\bes\bsy\bys\bst\bte\bem\bm
+  8.3.  Creating and mounting the filesystem
 
   Make sure that the filesystem is available, either by being built into
   the kernel, or available as a module, then boot up UML and log in.  If
 
 
 
-  9\b9.\b.  H\bHo\bos\bst\bt f\bfi\bil\ble\be a\bac\bcc\bce\bes\bss\bs
+  9.  Host file access
 
 
   If you want to access files on the host machine from inside UML, you
   files contained in it just as you would on the host.
 
 
-  9\b9.\b.1\b1.\b.  U\bUs\bsi\bin\bng\bg h\bho\bos\bst\btf\bfs\bs
+  9.1.  Using hostfs
 
   To begin with, make sure that hostfs is available inside the virtual
   machine with
 
 
 
-  9\b9.\b.2\b2.\b.  h\bho\bos\bst\btf\bfs\bs a\bas\bs t\bth\bhe\be r\bro\boo\bot\bt f\bfi\bil\ble\bes\bsy\bys\bst\bte\bem\bm
+  9.2.  hostfs as the root filesystem
 
   It's possible to boot from a directory hierarchy on the host using
   hostfs rather than using the standard filesystem in a file.
   UML should then boot as it does normally.
 
 
-  9\b9.\b.3\b3.\b.  B\bBu\bui\bil\bld\bdi\bin\bng\bg h\bho\bos\bst\btf\bfs\bs
+  9.3.  Building hostfs
 
   If you need to build hostfs because it's not in your kernel, you have
   two choices:
 
 
 
-  +\bo  Compiling hostfs into the kernel:
+  o  Compiling hostfs into the kernel:
 
 
      Reconfigure the kernel and set the 'Host filesystem' option under
 
 
-  +\bo  Compiling hostfs as a module:
+  o  Compiling hostfs as a module:
 
 
      Reconfigure the kernel and set the 'Host filesystem' option under
 
 
 
-  1\b10\b0.\b.  T\bTh\bhe\be M\bMa\ban\bna\bag\bge\bem\bme\ben\bnt\bt C\bCo\bon\bns\bso\bol\ble\be
+  10.  The Management Console
 
 
 
 
   There are a number of things you can do with the mconsole interface:
 
-  +\bo  get the kernel version
+  o  get the kernel version
 
-  +\bo  add and remove devices
+  o  add and remove devices
 
-  +\bo  halt or reboot the machine
+  o  halt or reboot the machine
 
-  +\bo  Send SysRq commands
+  o  Send SysRq commands
 
-  +\bo  Pause and resume the UML
+  o  Pause and resume the UML
 
 
   You need the mconsole client (uml_mconsole) which is present in CVS
 
   You'll get a prompt, at which you can run one of these commands:
 
-  +\bo  version
+  o  version
 
-  +\bo  halt
+  o  halt
 
-  +\bo  reboot
+  o  reboot
 
-  +\bo  config
+  o  config
 
-  +\bo  remove
+  o  remove
 
-  +\bo  sysrq
+  o  sysrq
 
-  +\bo  help
+  o  help
 
-  +\bo  cad
+  o  cad
 
-  +\bo  stop
+  o  stop
 
-  +\bo  go
+  o  go
 
 
-  1\b10\b0.\b.1\b1.\b.  v\bve\ber\brs\bsi\bio\bon\bn
+  10.1.  version
 
   This takes no arguments.  It prints the UML version.
 
 
 
 
-  1\b10\b0.\b.2\b2.\b.  h\bha\bal\blt\bt a\ban\bnd\bd r\bre\beb\bbo\boo\bot\bt
+  10.2.  halt and reboot
 
   These take no arguments.  They shut the machine down immediately, with
   no syncing of disks and no clean shutdown of userspace.  So, they are
 
 
 
-  1\b10\b0.\b.3\b3.\b.  c\bco\bon\bnf\bfi\big\bg
+  10.3.  config
 
   "config" adds a new device to the virtual machine.  Currently the ubd
   and network drivers support this.  It takes one argument, which is the
 
 
 
-  1\b10\b0.\b.4\b4.\b.  r\bre\bem\bmo\bov\bve\be
+  10.4.  remove
 
   "remove" deletes a device from the system.  Its argument is just the
   name of the device to be removed. The device must be idle in whatever
 
 
 
-  1\b10\b0.\b.5\b5.\b.  s\bsy\bys\bsr\brq\bq
+  10.5.  sysrq
 
   This takes one argument, which is a single letter.  It calls the
   generic kernel's SysRq driver, which does whatever is called for by
 
 
 
-  1\b10\b0.\b.6\b6.\b.  h\bhe\bel\blp\bp
+  10.6.  help
 
   "help" returns a string listing the valid commands and what each one
   does.
 
 
 
-  1\b10\b0.\b.7\b7.\b.  c\bca\bad\bd
+  10.7.  cad
 
   This invokes the Ctl-Alt-Del action on init.  What exactly this ends
   up doing is up to /etc/inittab.  Normally, it reboots the machine.
 
 
 
-  1\b10\b0.\b.8\b8.\b.  s\bst\bto\bop\bp
+  10.8.  stop
 
   This puts the UML in a loop reading mconsole requests until a 'go'
   mconsole command is received. This is very useful for making backups
 
 
 
-  1\b10\b0.\b.9\b9.\b.  g\bgo\bo
+  10.9.  go
 
   This resumes a UML after being paused by a 'stop' command. Note that
   when the UML has resumed, TCP connections may have timed out and if
 
 
 
-  1\b11\b1.\b.  K\bKe\ber\brn\bne\bel\bl d\bde\beb\bbu\bug\bgg\bgi\bin\bng\bg
+  11.  Kernel debugging
 
 
-  N\bNo\bot\bte\be:\b: The interface that makes debugging, as described here, possible
+  Note: The interface that makes debugging, as described here, possible
   is present in 2.4.0-test6 kernels and later.
 
 
 
 
 
-  1\b11\b1.\b.1\b1.\b.  S\bSt\bta\bar\brt\bti\bin\bng\bg t\bth\bhe\be k\bke\ber\brn\bne\bel\bl u\bun\bnd\bde\ber\br g\bgd\bdb\bb
+  11.1.  Starting the kernel under gdb
 
   You can have the kernel running under the control of gdb from the
   beginning by putting 'debug' on the command line.  You will get an
   There is a transcript of a debugging session  here <debug-
   session.html> , with breakpoints being set in the scheduler and in an
   interrupt handler.
-  1\b11\b1.\b.2\b2.\b.  E\bEx\bxa\bam\bmi\bin\bni\bin\bng\bg s\bsl\ble\bee\bep\bpi\bin\bng\bg p\bpr\bro\boc\bce\bes\bss\bse\bes\bs
+  11.2.  Examining sleeping processes
 
   Not every bug is evident in the currently running process.  Sometimes,
   processes hang in the kernel when they shouldn't because they've
 
   Now what you do is this:
 
-  +\bo  detach from the current thread
+  o  detach from the current thread
 
 
        (UML gdb)  det
 
 
 
-  +\bo  attach to the thread you are interested in
+  o  attach to the thread you are interested in
 
 
        (UML gdb)  att <host pid>
 
 
 
-  +\bo  look at its stack and anything else of interest
+  o  look at its stack and anything else of interest
 
 
        (UML gdb)  bt
   Note that you can't do anything at this point that requires that a
   process execute, e.g. calling a function
 
-  +\bo  when you're done looking at that process, reattach to the current
+  o  when you're done looking at that process, reattach to the current
      thread and continue it
 
 
 
 
 
-  1\b11\b1.\b.3\b3.\b.  R\bRu\bun\bnn\bni\bin\bng\bg d\bdd\bdd\bd o\bon\bn U\bUM\bML\bL
+  11.3.  Running ddd on UML
 
   ddd works on UML, but requires a special kludge.  The process goes
   like this:
 
-  +\bo  Start ddd
+  o  Start ddd
 
 
        host% ddd linux
 
 
 
-  +\bo  With ps, get the pid of the gdb that ddd started.  You can ask the
+  o  With ps, get the pid of the gdb that ddd started.  You can ask the
      gdb to tell you, but for some reason that confuses things and
      causes a hang.
 
-  +\bo  run UML with 'debug=parent gdb-pid=<pid>' added to the command line
+  o  run UML with 'debug=parent gdb-pid=<pid>' added to the command line
      - it will just sit there after you hit return
 
-  +\bo  type 'att 1' to the ddd gdb and you will see something like
+  o  type 'att 1' to the ddd gdb and you will see something like
 
 
        0xa013dc51 in __kill ()
 
 
 
-  +\bo  At this point, type 'c', UML will boot up, and you can use ddd just
+  o  At this point, type 'c', UML will boot up, and you can use ddd just
      as you do on any other process.
 
 
 
-  1\b11\b1.\b.4\b4.\b.  D\bDe\beb\bbu\bug\bgg\bgi\bin\bng\bg m\bmo\bod\bdu\bul\ble\bes\bs
+  11.4.  Debugging modules
 
   gdb has support for debugging code which is dynamically loaded into
   the process.  This support is what is needed to debug kernel modules
 
 
 
-  1\b11\b1.\b.5\b5.\b.  A\bAt\btt\bta\bac\bch\bhi\bin\bng\bg g\bgd\bdb\bb t\bto\bo t\bth\bhe\be k\bke\ber\brn\bne\bel\bl
+  11.5.  Attaching gdb to the kernel
 
   If you don't have the kernel running under gdb, you can attach gdb to
   it later by sending the tracing thread a SIGUSR1.  The first line of
 
 
 
-  1\b11\b1.\b.6\b6.\b.  U\bUs\bsi\bin\bng\bg a\bal\blt\bte\ber\brn\bna\bat\bte\be d\bde\beb\bbu\bug\bgg\bge\ber\brs\bs
+  11.6.  Using alternate debuggers
 
   UML has support for attaching to an already running debugger rather
   than starting gdb itself.  This is present in CVS as of 17 Apr 2001.
   An example of an alternate debugger is strace.  You can strace the
   actual kernel as follows:
 
-  +\bo  Run the following in a shell
+  o  Run the following in a shell
 
 
        host%
 
 
 
-  +\bo  Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
+  o  Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
      by the previous command
 
-  +\bo  Hit return in the shell, and UML will start running, and strace
+  o  Hit return in the shell, and UML will start running, and strace
      output will start accumulating in the output file.
 
      Note that this is different from running
 
 
 
-  1\b12\b2.\b.  K\bKe\ber\brn\bne\bel\bl d\bde\beb\bbu\bug\bgg\bgi\bin\bng\bg e\bex\bxa\bam\bmp\bpl\ble\bes\bs
+  12.  Kernel debugging examples
 
-  1\b12\b2.\b.1\b1.\b.  T\bTh\bhe\be c\bca\bas\bse\be o\bof\bf t\bth\bhe\be h\bhu\bun\bng\bg f\bfs\bsc\bck\bk
+  12.1.  The case of the hung fsck
 
   When booting up the kernel, fsck failed, and dropped me into a shell
   to fix things up.  I ran fsck -y, which hung:
 
   The interesting things here are :
 
-  +\bo  There are two segfaults on this stack (frames 9 and 14)
+  o  There are two segfaults on this stack (frames 9 and 14)
 
-  +\bo  The first faulting address (frame 11) is 0x50000800
+  o  The first faulting address (frame 11) is 0x50000800
 
   (gdb) p (void *)1342179328
   $16 = (void *) 0x50000800
   on will be somewhat clearer.
 
 
-  1\b12\b2.\b.2\b2.\b.  E\bEp\bpi\bis\bso\bod\bde\be 2\b2:\b: T\bTh\bhe\be c\bca\bas\bse\be o\bof\bf t\bth\bhe\be h\bhu\bun\bng\bg f\bfs\bsc\bck\bk
+  12.2.  Episode 2: The case of the hung fsck
 
   After setting a trap in the SEGV handler for accesses to the signal
   thread's stack, I reran the kernel.
 
 
 
-  1\b13\b3.\b.  W\bWh\bha\bat\bt t\bto\bo d\bdo\bo w\bwh\bhe\ben\bn U\bUM\bML\bL d\bdo\boe\bes\bsn\bn'\b't\bt w\bwo\bor\brk\bk
+  13.  What to do when UML doesn't work
 
 
 
 
-  1\b13\b3.\b.1\b1.\b.  S\bSt\btr\bra\ban\bng\bge\be c\bco\bom\bmp\bpi\bil\bla\bat\bti\bio\bon\bn e\ber\brr\bro\bor\brs\bs w\bwh\bhe\ben\bn y\byo\bou\bu b\bbu\bui\bil\bld\bd f\bfr\bro\bom\bm s\bso\bou\bur\brc\bce\be
+  13.1.  Strange compilation errors when you build from source
 
   As of test11, it is necessary to have "ARCH=um" in the environment or
   on the make command line for all steps in building UML, including
 
 
 
-  1\b13\b3.\b.3\b3.\b.  A\bA v\bva\bar\bri\bie\bet\bty\by o\bof\bf p\bpa\ban\bni\bic\bcs\bs a\ban\bnd\bd h\bha\ban\bng\bgs\bs w\bwi\bit\bth\bh /\b/t\btm\bmp\bp o\bon\bn a\ba r\bre\bei\bis\bse\ber\brf\bfs\bs  f\bfi\bil\ble\bes\bsy\bys\bs-\b-
-  t\bte\bem\bm
+  13.3.  A variety of panics and hangs with /tmp on a reiserfs  filesys-
+  tem
 
   I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
   Panics preceded by
 
 
 
-  1\b13\b3.\b.4\b4.\b.  T\bTh\bhe\be c\bco\bom\bmp\bpi\bil\ble\be f\bfa\bai\bil\bls\bs w\bwi\bit\bth\bh e\ber\brr\bro\bor\brs\bs a\bab\bbo\bou\but\bt c\bco\bon\bnf\bfl\bli\bic\bct\bti\bin\bng\bg t\bty\byp\bpe\bes\bs f\bfo\bor\br
-  '\b'o\bop\bpe\ben\bn'\b',\b, '\b'd\bdu\bup\bp'\b',\b, a\ban\bnd\bd '\b'w\bwa\bai\bit\btp\bpi\bid\bd'\b'
+  13.4.  The compile fails with errors about conflicting types for
+  'open', 'dup', and 'waitpid'
 
   This happens when you build in /usr/src/linux.  The UML build makes
   the include/asm link point to include/asm-um.  /usr/include/asm points
 
 
 
-  1\b13\b3.\b.5\b5.\b.  U\bUM\bML\bL d\bdo\boe\bes\bsn\bn'\b't\bt w\bwo\bor\brk\bk w\bwh\bhe\ben\bn /\b/t\btm\bmp\bp i\bis\bs a\ban\bn N\bNF\bFS\bS f\bfi\bil\ble\bes\bsy\bys\bst\bte\bem\bm
+  13.5.  UML doesn't work when /tmp is an NFS filesystem
 
   This seems to be a similar situation with the ReiserFS problem above.
   Some versions of NFS seems not to handle mmap correctly, which UML
   depends on.  The workaround is have /tmp be a non-NFS directory.
 
 
-  1\b13\b3.\b.6\b6.\b.  U\bUM\bML\bL h\bha\ban\bng\bgs\bs o\bon\bn b\bbo\boo\bot\bt w\bwh\bhe\ben\bn c\bco\bom\bmp\bpi\bil\ble\bed\bd w\bwi\bit\bth\bh g\bgp\bpr\bro\bof\bf s\bsu\bup\bpp\bpo\bor\brt\bt
+  13.6.  UML hangs on boot when compiled with gprof support
 
   If you build UML with gprof support and, early in the boot, it does
   this
 
 
 
-  1\b13\b3.\b.7\b7.\b.  s\bsy\bys\bsl\blo\bog\bgd\bd d\bdi\bie\bes\bs w\bwi\bit\bth\bh a\ba S\bSI\bIG\bGT\bTE\bER\bRM\bM o\bon\bn s\bst\bta\bar\brt\btu\bup\bp
+  13.7.  syslogd dies with a SIGTERM on startup
 
   The exact boot error depends on the distribution that you're booting,
   but Debian produces this:
 
 
 
-  1\b13\b3.\b.8\b8.\b.  T\bTU\bUN\bN/\b/T\bTA\bAP\bP n\bne\bet\btw\bwo\bor\brk\bki\bin\bng\bg d\bdo\boe\bes\bsn\bn'\b't\bt w\bwo\bor\brk\bk o\bon\bn a\ba 2\b2.\b.4\b4 h\bho\bos\bst\bt
+  13.8.  TUN/TAP networking doesn't work on a 2.4 host
 
   There are a couple of problems which were
   <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
   out">  by Tim Robinson <timro at trkr dot net>
 
-  +\bo  It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
+  o  It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
      The fix is to upgrade to something more recent and then read the
      next item.
 
-  +\bo  If you see
+  o  If you see
 
 
        File descriptor in bad state
 
 
 
-  1\b13\b3.\b.9\b9.\b.  Y\bYo\bou\bu c\bca\ban\bn n\bne\bet\btw\bwo\bor\brk\bk t\bto\bo t\bth\bhe\be h\bho\bos\bst\bt b\bbu\but\bt n\bno\bot\bt t\bto\bo o\bot\bth\bhe\ber\br m\bma\bac\bch\bhi\bin\bne\bes\bs o\bon\bn t\bth\bhe\be
-  n\bne\bet\bt
+  13.9.  You can network to the host but not to other machines on the
+  net
 
   If you can connect to the host, and the host can connect to UML, but
   you cannot connect to any other machines, then you may need to enable
 
 
 
-  1\b13\b3.\b.1\b10\b0.\b.  I\bI h\bha\bav\bve\be n\bno\bo r\bro\boo\bot\bt a\ban\bnd\bd I\bI w\bwa\ban\bnt\bt t\bto\bo s\bsc\bcr\bre\bea\bam\bm
+  13.10.  I have no root and I want to scream
 
   Thanks to Birgit Wahlich for telling me about this strange one.  It
   turns out that there's a limit of six environment variables on the
 
 
 
-  1\b13\b3.\b.1\b11\b1.\b.  U\bUM\bML\bL b\bbu\bui\bil\bld\bd c\bco\bon\bnf\bfl\bli\bic\bct\bt b\bbe\bet\btw\bwe\bee\ben\bn p\bpt\btr\bra\bac\bce\be.\b.h\bh a\ban\bnd\bd u\buc\bco\bon\bnt\bte\bex\bxt\bt.\b.h\bh
+  13.11.  UML build conflict between ptrace.h and ucontext.h
 
   On some older systems, /usr/include/asm/ptrace.h and
   /usr/include/sys/ucontext.h define the same names.  So, when they're
 
 
 
-  1\b13\b3.\b.1\b12\b2.\b.  T\bTh\bhe\be U\bUM\bML\bL B\bBo\bog\bgo\boM\bMi\bip\bps\bs i\bis\bs e\bex\bxa\bac\bct\btl\bly\by h\bha\bal\blf\bf t\bth\bhe\be h\bho\bos\bst\bt'\b's\bs B\bBo\bog\bgo\boM\bMi\bip\bps\bs
+  13.12.  The UML BogoMips is exactly half the host's BogoMips
 
   On i386 kernels, there are two ways of running the loop that is used
   to calculate the BogoMips rating, using the TSC if it's there or using
 
 
 
-  1\b13\b3.\b.1\b13\b3.\b.  W\bWh\bhe\ben\bn y\byo\bou\bu r\bru\bun\bn U\bUM\bML\bL,\b, i\bit\bt i\bim\bmm\bme\bed\bdi\bia\bat\bte\bel\bly\by s\bse\beg\bgf\bfa\bau\bul\blt\bts\bs
+  13.13.  When you run UML, it immediately segfaults
 
   If the host is configured with the 2G/2G address space split, that's
   why.  See ``UML on 2G/2G hosts''  for the details on getting UML to
 
 
 
-  1\b13\b3.\b.1\b14\b4.\b.  x\bxt\bte\ber\brm\bms\bs a\bap\bpp\bpe\bea\bar\br,\b, t\bth\bhe\ben\bn i\bim\bmm\bme\bed\bdi\bia\bat\bte\bel\bly\by d\bdi\bis\bsa\bap\bpp\bpe\bea\bar\br
+  13.14.  xterms appear, then immediately disappear
 
   If you're running an up to date kernel with an old release of
   uml_utilities, the port-helper program will not work properly, so
 
 
 
-  1\b13\b3.\b.1\b15\b5.\b.  A\bAn\bny\by o\bot\bth\bhe\ber\br p\bpa\ban\bni\bic\bc,\b, h\bha\ban\bng\bg,\b, o\bor\br s\bst\btr\bra\ban\bng\bge\be b\bbe\beh\bha\bav\bvi\bio\bor\br
+  13.15.  Any other panic, hang, or strange behavior
 
   If you're seeing truly strange behavior, such as hangs or panics that
   happen in random places, or you try running the debugger to see what's
 
   If you want to be super-helpful, read ``Diagnosing Problems'' and
   follow the instructions contained therein.
-  1\b14\b4.\b.  D\bDi\bia\bag\bgn\bno\bos\bsi\bin\bng\bg P\bPr\bro\bob\bbl\ble\bem\bms\bs
+  14.  Diagnosing Problems
 
 
   If you get UML to crash, hang, or otherwise misbehave, you should
   ``Kernel debugging''  UML first.
 
 
-  1\b14\b4.\b.1\b1.\b.  C\bCa\bas\bse\be 1\b1 :\b: N\bNo\bor\brm\bma\bal\bl k\bke\ber\brn\bne\bel\bl p\bpa\ban\bni\bic\bcs\bs
+  14.1.  Case 1 : Normal kernel panics
 
   The most common case is for a normal thread to panic.  To debug this,
   you will need to run it under the debugger (add 'debug' to the command
   to get that information from the faulting ip.
 
 
-  1\b14\b4.\b.2\b2.\b.  C\bCa\bas\bse\be 2\b2 :\b: T\bTr\bra\bac\bci\bin\bng\bg t\bth\bhr\bre\bea\bad\bd p\bpa\ban\bni\bic\bcs\bs
+  14.2.  Case 2 : Tracing thread panics
 
   The less common and more painful case is when the tracing thread
   panics.  In this case, the kernel debugger will be useless because it
   backtrace in and wait for our crack debugging team to fix the problem.
 
 
-  1\b14\b4.\b.3\b3.\b.  C\bCa\bas\bse\be 3\b3 :\b: T\bTr\bra\bac\bci\bin\bng\bg t\bth\bhr\bre\bea\bad\bd p\bpa\ban\bni\bic\bcs\bs c\bca\bau\bus\bse\bed\bd b\bby\by o\bot\bth\bhe\ber\br t\bth\bhr\bre\bea\bad\bds\bs
+  14.3.  Case 3 : Tracing thread panics caused by other threads
 
   However, there are cases where the misbehavior of another thread
   caused the problem.  The most common panic of this type is:
 
 
 
-  1\b14\b4.\b.4\b4.\b.  C\bCa\bas\bse\be 4\b4 :\b: H\bHa\ban\bng\bgs\bs
+  14.4.  Case 4 : Hangs
 
   Hangs seem to be fairly rare, but they sometimes happen.  When a hang
   happens, we need a backtrace from the offending process.  Run the
 
 
 
-  1\b15\b5.\b.  T\bTh\bha\ban\bnk\bks\bs
+  15.  Thanks
 
 
   A number of people have helped this project in various ways, and this
   bookkeeping lapses and I forget about contributions.
 
 
-  1\b15\b5.\b.1\b1.\b.  C\bCo\bod\bde\be a\ban\bnd\bd D\bDo\boc\bcu\bum\bme\ben\bnt\bta\bat\bti\bio\bon\bn
+  15.1.  Code and Documentation
 
   Rusty Russell <rusty at linuxcare.com.au>  -
 
-  +\bo  wrote the  HOWTO <http://user-mode-
+  o  wrote the  HOWTO <http://user-mode-
      linux.sourceforge.net/UserModeLinux-HOWTO.html>
 
-  +\bo  prodded me into making this project official and putting it on
+  o  prodded me into making this project official and putting it on
      SourceForge
 
-  +\bo  came up with the way cool UML logo <http://user-mode-
+  o  came up with the way cool UML logo <http://user-mode-
      linux.sourceforge.net/uml-small.png>
 
-  +\bo  redid the config process
+  o  redid the config process
 
 
   Peter Moulder <reiter at netspace.net.au>  - Fixed my config and build
 
   Bill Stearns <wstearns at pobox.com>  -
 
-  +\bo  HOWTO updates
+  o  HOWTO updates
 
-  +\bo  lots of bug reports
+  o  lots of bug reports
 
-  +\bo  lots of testing
+  o  lots of testing
 
-  +\bo  dedicated a box (uml.ists.dartmouth.edu) to support UML development
+  o  dedicated a box (uml.ists.dartmouth.edu) to support UML development
 
-  +\bo  wrote the mkrootfs script, which allows bootable filesystems of
+  o  wrote the mkrootfs script, which allows bootable filesystems of
      RPM-based distributions to be cranked out
 
-  +\bo  cranked out a large number of filesystems with said script
+  o  cranked out a large number of filesystems with said script
 
 
   Jim Leu <jleu at mindspring.com>  - Wrote the virtual ethernet driver
 
   David Coulson <http://davidcoulson.net>  -
 
-  +\bo  Set up the usermodelinux.org <http://usermodelinux.org>  site,
+  o  Set up the usermodelinux.org <http://usermodelinux.org>  site,
      which is a great way of keeping the UML user community on top of
      UML goings-on.
 
-  +\bo  Site documentation and updates
+  o  Site documentation and updates
 
-  +\bo  Nifty little UML management daemon  UMLd
+  o  Nifty little UML management daemon  UMLd
      <http://uml.openconsultancy.com/umld/>
 
-  +\bo  Lots of testing and bug reports
+  o  Lots of testing and bug reports
 
 
 
 
-  1\b15\b5.\b.2\b2.\b.  F\bFl\blu\bus\bsh\bhi\bin\bng\bg o\bou\but\bt b\bbu\bug\bgs\bs
+  15.2.  Flushing out bugs
 
 
 
-  +\bo  Yuri Pudgorodsky
+  o  Yuri Pudgorodsky
 
-  +\bo  Gerald Britton
+  o  Gerald Britton
 
-  +\bo  Ian Wehrman
+  o  Ian Wehrman
 
-  +\bo  Gord Lamb
+  o  Gord Lamb
 
-  +\bo  Eugene Koontz
+  o  Eugene Koontz
 
-  +\bo  John H. Hartman
+  o  John H. Hartman
 
-  +\bo  Anders Karlsson
+  o  Anders Karlsson
 
-  +\bo  Daniel Phillips
+  o  Daniel Phillips
 
-  +\bo  John Fremlin
+  o  John Fremlin
 
-  +\bo  Rainer Burgstaller
+  o  Rainer Burgstaller
 
-  +\bo  James Stevenson
+  o  James Stevenson
 
-  +\bo  Matt Clay
+  o  Matt Clay
 
-  +\bo  Cliff Jefferies
+  o  Cliff Jefferies
 
-  +\bo  Geoff Hoff
+  o  Geoff Hoff
 
-  +\bo  Lennert Buytenhek
+  o  Lennert Buytenhek
 
-  +\bo  Al Viro
+  o  Al Viro
 
-  +\bo  Frank Klingenhoefer
+  o  Frank Klingenhoefer
 
-  +\bo  Livio Baldini Soares
+  o  Livio Baldini Soares
 
-  +\bo  Jon Burgess
+  o  Jon Burgess
 
-  +\bo  Petru Paler
+  o  Petru Paler
 
-  +\bo  Paul
+  o  Paul
 
-  +\bo  Chris Reahard
+  o  Chris Reahard
 
-  +\bo  Sverker Nilsson
+  o  Sverker Nilsson
 
-  +\bo  Gong Su
+  o  Gong Su
 
-  +\bo  johan verrept
+  o  johan verrept
 
-  +\bo  Bjorn Eriksson
+  o  Bjorn Eriksson
 
-  +\bo  Lorenzo Allegrucci
+  o  Lorenzo Allegrucci
 
-  +\bo  Muli Ben-Yehuda
+  o  Muli Ben-Yehuda
 
-  +\bo  David Mansfield
+  o  David Mansfield
 
-  +\bo  Howard Goff
+  o  Howard Goff
 
-  +\bo  Mike Anderson
+  o  Mike Anderson
 
-  +\bo  John Byrne
+  o  John Byrne
 
-  +\bo  Sapan J. Batia
+  o  Sapan J. Batia
 
-  +\bo  Iris Huang
+  o  Iris Huang
 
-  +\bo  Jan Hudec
+  o  Jan Hudec
 
-  +\bo  Voluspa
+  o  Voluspa
 
 
 
 
-  1\b15\b5.\b.3\b3.\b.  B\bBu\bug\bgl\ble\bet\bts\bs a\ban\bnd\bd c\bcl\ble\bea\ban\bn-\b-u\bup\bps\bs
+  15.3.  Buglets and clean-ups
 
 
 
-  +\bo  Dave Zarzycki
+  o  Dave Zarzycki
 
-  +\bo  Adam Lazur
+  o  Adam Lazur
 
-  +\bo  Boria Feigin
+  o  Boria Feigin
 
-  +\bo  Brian J. Murrell
+  o  Brian J. Murrell
 
-  +\bo  JS
+  o  JS
 
-  +\bo  Roman Zippel
+  o  Roman Zippel
 
-  +\bo  Wil Cooley
+  o  Wil Cooley
 
-  +\bo  Ayelet Shemesh
+  o  Ayelet Shemesh
 
-  +\bo  Will Dyson
+  o  Will Dyson
 
-  +\bo  Sverker Nilsson
+  o  Sverker Nilsson
 
-  +\bo  dvorak
+  o  dvorak
 
-  +\bo  v.naga srinivas
+  o  v.naga srinivas
 
-  +\bo  Shlomi Fish
+  o  Shlomi Fish
 
-  +\bo  Roger Binns
+  o  Roger Binns
 
-  +\bo  johan verrept
+  o  johan verrept
 
-  +\bo  MrChuoi
+  o  MrChuoi
 
-  +\bo  Peter Cleve
+  o  Peter Cleve
 
-  +\bo  Vincent Guffens
+  o  Vincent Guffens
 
-  +\bo  Nathan Scott
+  o  Nathan Scott
 
-  +\bo  Patrick Caulfield
+  o  Patrick Caulfield
 
-  +\bo  jbearce
+  o  jbearce
 
-  +\bo  Catalin Marinas
+  o  Catalin Marinas
 
-  +\bo  Shane Spencer
+  o  Shane Spencer
 
-  +\bo  Zou Min
+  o  Zou Min
 
 
-  +\bo  Ryan Boder
+  o  Ryan Boder
 
-  +\bo  Lorenzo Colitti
+  o  Lorenzo Colitti
 
-  +\bo  Gwendal Grignou
+  o  Gwendal Grignou
 
-  +\bo  Andre' Breiler
+  o  Andre' Breiler
 
-  +\bo  Tsutomu Yasuda
+  o  Tsutomu Yasuda
 
 
 
-  1\b15\b5.\b.4\b4.\b.  C\bCa\bas\bse\be S\bSt\btu\bud\bdi\bie\bes\bs
+  15.4.  Case Studies
 
 
-  +\bo  Jon Wright
+  o  Jon Wright
 
-  +\bo  William McEwan
+  o  William McEwan
 
-  +\bo  Michael Richardson
+  o  Michael Richardson
 
 
 
-  1\b15\b5.\b.5\b5.\b.  O\bOt\bth\bhe\ber\br c\bco\bon\bnt\btr\bri\bib\bbu\but\bti\bio\bon\bns\bs
+  15.5.  Other contributions
 
 
   Bill Carr <Bill.Carr at compaq.com>  made the Red Hat mkrootfs script
index c406f9ba1923d377466e6453a5b21e936d4e9f53..a6afe342f0fc767fb4b90bb48a37786bc1ade371 100644 (file)
@@ -1716,6 +1716,7 @@ F:        include/linux/can.h
 F:     include/linux/can/core.h
 F:     include/linux/can/bcm.h
 F:     include/linux/can/raw.h
+F:     include/linux/can/gw.h
 
 CAN NETWORK DRIVERS
 M:     Wolfgang Grandegger <wg@grandegger.com>
@@ -2467,8 +2468,6 @@ L:        linux-edac@vger.kernel.org
 W:     bluesmoke.sourceforge.net
 S:     Maintained
 F:     drivers/edac/i7core_edac.c
-F:     drivers/edac/edac_mce.c
-F:     include/linux/edac_mce.h
 
 EDAC-I82975X
 M:     Ranganathan Desikan <ravi@jetztechnologies.com>
@@ -2492,6 +2491,13 @@ W:       bluesmoke.sourceforge.net
 S:     Maintained
 F:     drivers/edac/r82600_edac.c
 
+EDAC-SBRIDGE
+M:     Mauro Carvalho Chehab <mchehab@redhat.com>
+L:     linux-edac@vger.kernel.org
+W:     bluesmoke.sourceforge.net
+S:     Maintained
+F:     drivers/edac/sb_edac.c
+
 EDIROL UA-101/UA-1000 DRIVER
 M:     Clemens Ladisch <clemens@ladisch.de>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -3013,6 +3019,13 @@ F:       Documentation/hw_random.txt
 F:     drivers/char/hw_random/
 F:     include/linux/hw_random.h
 
+HARDWARE SPINLOCK CORE
+M:     Ohad Ben-Cohen <ohad@wizery.com>
+S:     Maintained
+F:     Documentation/hwspinlock.txt
+F:     drivers/hwspinlock/hwspinlock_*
+F:     include/linux/hwspinlock.h
+
 HARMONY SOUND DRIVER
 M:     Kyle McMartin <kyle@mcmartin.ca>
 L:     linux-parisc@vger.kernel.org
@@ -3205,8 +3218,7 @@ IA64 (Itanium) PLATFORM
 M:     Tony Luck <tony.luck@intel.com>
 M:     Fenghua Yu <fenghua.yu@intel.com>
 L:     linux-ia64@vger.kernel.org
-W:     http://www.ia64-linux.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git
 S:     Maintained
 F:     arch/ia64/
 
@@ -4465,11 +4477,9 @@ F:       Documentation/networking/vxge.txt
 F:     drivers/net/ethernet/neterion/
 
 NETFILTER/IPTABLES/IPCHAINS
-P:     Rusty Russell
-P:     Marc Boucher
-P:     James Morris
 P:     Harald Welte
 P:     Jozsef Kadlecsik
+M:     Pablo Neira Ayuso <pablo@netfilter.org>
 M:     Patrick McHardy <kaber@trash.net>
 L:     netfilter-devel@vger.kernel.org
 L:     netfilter@vger.kernel.org
@@ -4710,6 +4720,13 @@ S:       Maintained
 F:     drivers/video/omap2/
 F:     Documentation/arm/OMAP/DSS
 
+OMAP HARDWARE SPINLOCK SUPPORT
+M:     Ohad Ben-Cohen <ohad@wizery.com>
+L:     linux-omap@vger.kernel.org
+S:     Maintained
+F:     drivers/hwspinlock/omap_hwspinlock.c
+F:     arch/arm/mach-omap2/hwspinlock.c
+
 OMAP MMC SUPPORT
 M:     Jarkko Lavinen <jarkko.lavinen@nokia.com>
 L:     linux-omap@vger.kernel.org
@@ -6683,7 +6700,6 @@ F:        drivers/net/ethernet/8390/ne-h8300.c
 
 UDF FILESYSTEM
 M:     Jan Kara <jack@suse.cz>
-W:     http://linux-udf.sourceforge.net
 S:     Maintained
 F:     Documentation/filesystems/udf.txt
 F:     fs/udf/
index cfa84178eb261636ee2fe45714953024d26dcdb3..ba232d79fa81a2f87e2402fa03d203243f25f44e 100644 (file)
@@ -293,8 +293,7 @@ static struct mc13xxx_platform_data mc13783_pdata = {
                .num_regulators = ARRAY_SIZE(mx27_3ds_regulators),
 
        },
-       .flags  = MC13783_USE_REGULATOR | MC13783_USE_TOUCHSCREEN |
-       MC13783_USE_RTC,
+       .flags  = MC13XXX_USE_TOUCHSCREEN | MC13XXX_USE_RTC,
 };
 
 /* SPI */
index 60f1fda6ce97b33c9778c4e821cb0ebb11792d8f..b8c54b840185edc2bd4fd7b344e22bdc29187bd8 100644 (file)
@@ -492,7 +492,7 @@ static struct mc13xxx_platform_data mc13783_pdata = {
                .regulators = mx31_3ds_regulators,
                .num_regulators = ARRAY_SIZE(mx31_3ds_regulators),
        },
-       .flags  = MC13783_USE_REGULATOR | MC13783_USE_TOUCHSCREEN,
+       .flags  = MC13XXX_USE_TOUCHSCREEN,
 };
 
 /* SPI */
index c97c26d814ed1640df98300a90dc281380734541..05f1c71ba409f77cc0032a5c14b9f2285d18d93a 100644 (file)
@@ -112,8 +112,7 @@ static const struct spi_imx_master spi1_pdata __initconst = {
 };
 
 static struct mc13xxx_platform_data mc13783_pdata __initdata = {
-       .flags  = MC13XXX_USE_RTC |
-                 MC13XXX_USE_REGULATOR,
+       .flags = MC13XXX_USE_RTC,
 };
 
 static struct spi_board_info mc13783_spi_dev __initdata = {
index fff7791b7e7c8731a3575ead8d2eb020d617bccc..07034f44466a8c30f2c8ff3aaedca60513a61827 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/err.h>
+#include <linux/input.h>
 
 #include <linux/usb/otg.h>
 #include <linux/usb/ulpi.h>
@@ -225,7 +226,7 @@ static struct mc13xxx_regulator_init_data moboard_regulators[] = {
        },
 };
 
-static struct mc13783_led_platform_data moboard_led[] = {
+static struct mc13xxx_led_platform_data moboard_led[] = {
        {
                .id = MC13783_LED_R1,
                .name = "coreboard-led-4:red",
@@ -258,7 +259,7 @@ static struct mc13783_led_platform_data moboard_led[] = {
        },
 };
 
-static struct mc13783_leds_platform_data moboard_leds = {
+static struct mc13xxx_leds_platform_data moboard_leds = {
        .num_leds = ARRAY_SIZE(moboard_led),
        .led = moboard_led,
        .flags = MC13783_LED_SLEWLIMTC,
@@ -267,14 +268,20 @@ static struct mc13783_leds_platform_data moboard_leds = {
        .tc2_period = MC13783_LED_PERIOD_10MS,
 };
 
+static struct mc13xxx_buttons_platform_data moboard_buttons = {
+       .b1on_flags = MC13783_BUTTON_DBNC_750MS | MC13783_BUTTON_ENABLE |
+                       MC13783_BUTTON_POL_INVERT,
+       .b1on_key = KEY_POWER,
+};
+
 static struct mc13xxx_platform_data moboard_pmic = {
        .regulators = {
                .regulators = moboard_regulators,
                .num_regulators = ARRAY_SIZE(moboard_regulators),
        },
        .leds = &moboard_leds,
-       .flags = MC13XXX_USE_REGULATOR | MC13XXX_USE_RTC |
-               MC13XXX_USE_ADC | MC13XXX_USE_LED,
+       .buttons = &moboard_buttons,
+       .flags = MC13XXX_USE_RTC | MC13XXX_USE_ADC,
 };
 
 static struct spi_board_info moboard_spi_board_info[] __initdata = {
index 100bc733ce93511d62d1e8f71fb8e47f1073c6df..a17e9c7dfca0f4dfde76b49ba4639510b714fe7e 100644 (file)
@@ -268,8 +268,7 @@ static struct mc13xxx_platform_data pcm038_pmic = {
                .regulators = pcm038_regulators,
                .num_regulators = ARRAY_SIZE(pcm038_regulators),
        },
-       .flags = MC13783_USE_ADC | MC13783_USE_REGULATOR |
-                MC13783_USE_TOUCHSCREEN,
+       .flags = MC13XXX_USE_ADC | MC13XXX_USE_TOUCHSCREEN,
 };
 
 static struct spi_board_info pcm038_spi_board_info[] __initdata = {
index b004e178417d1be13d96961638fe27d95b4871d8..ec6ca91b299b629039ff3ac13ed7942494b3b830 100644 (file)
@@ -565,7 +565,7 @@ static struct mc13xxx_regulator_init_data mx51_efika_regulators[] = {
 };
 
 static struct mc13xxx_platform_data mx51_efika_mc13892_data = {
-       .flags = MC13XXX_USE_RTC | MC13XXX_USE_REGULATOR,
+       .flags = MC13XXX_USE_RTC,
        .regulators = {
                .num_regulators = ARRAY_SIZE(mx51_efika_regulators),
                .regulators = mx51_efika_regulators,
index 497e9dc2795890d0887fc524a1f3c66ac3310918..503414718905862d12e104b7940bfc7dc1524971 100644 (file)
@@ -14,7 +14,6 @@ config ARCH_OMAP2PLUS_TYPICAL
        select SERIAL_OMAP_CONSOLE
        select I2C
        select I2C_OMAP
-       select MFD_SUPPORT
        select MENELAUS if ARCH_OMAP2
        select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
        select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4
index 70261bcda3f90e57d480cb1b2d13154c3239e69d..4a71cb7e42d4b02517d30be11a0151b93ef1e18f 100644 (file)
@@ -378,7 +378,8 @@ static struct i2c_board_info __initdata beagle_i2c_eeprom[] = {
 static int __init omap3_beagle_i2c_init(void)
 {
        omap3_pmic_get_config(&beagle_twldata,
-                       TWL_COMMON_PDATA_USB | TWL_COMMON_PDATA_AUDIO,
+                       TWL_COMMON_PDATA_USB | TWL_COMMON_PDATA_MADC |
+                       TWL_COMMON_PDATA_AUDIO,
                        TWL_COMMON_REGULATOR_VDAC | TWL_COMMON_REGULATOR_VPLL2);
 
        beagle_twldata.vpll2->constraints.name = "VDVI";
@@ -444,9 +445,15 @@ static struct platform_device keys_gpio = {
        },
 };
 
+static struct platform_device madc_hwmon = {
+       .name   = "twl4030_madc_hwmon",
+       .id     = -1,
+};
+
 static struct platform_device *omap3_beagle_devices[] __initdata = {
        &leds_gpio,
        &keys_gpio,
+       &madc_hwmon,
 };
 
 static const struct usbhs_omap_board_data usbhs_bdata __initconst = {
index 36e21091b06aa534e9724850e0dfc36769e36f26..454dfce125ca3e31337b2f2eb3c760ded2c51c60 100644 (file)
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/hwspinlock.h>
 
 #include <plat/omap_hwmod.h>
 #include <plat/omap_device.h>
 
+static struct hwspinlock_pdata omap_hwspinlock_pdata __initdata = {
+       .base_id = 0,
+};
+
 int __init hwspinlocks_init(void)
 {
        int retval = 0;
@@ -40,7 +45,9 @@ int __init hwspinlocks_init(void)
        if (oh == NULL)
                return -EINVAL;
 
-       pdev = omap_device_build(dev_name, 0, oh, NULL, 0, NULL, 0, false);
+       pdev = omap_device_build(dev_name, 0, oh, &omap_hwspinlock_pdata,
+                               sizeof(struct hwspinlock_pdata),
+                               NULL, 0, false);
        if (IS_ERR(pdev)) {
                pr_err("Can't build omap_device for %s:%s\n", dev_name,
                                                                oh_name);
index f0394baa11fa03ec0864bb66462039b88a5fae9d..5140deeddf7be45264de5257fb79dd6ada842382 100644 (file)
@@ -256,57 +256,8 @@ static struct ab3100_platform_data ab3100_plf_data = {
 };
 #endif
 
-#ifdef CONFIG_AB3550_CORE
-static struct abx500_init_settings ab3550_init_settings[] = {
-       {
-               .bank = 0,
-               .reg = AB3550_IMR1,
-               .setting = 0xff
-       },
-       {
-               .bank = 0,
-               .reg = AB3550_IMR2,
-               .setting = 0xff
-       },
-       {
-               .bank = 0,
-               .reg = AB3550_IMR3,
-               .setting = 0xff
-       },
-       {
-               .bank = 0,
-               .reg = AB3550_IMR4,
-               .setting = 0xff
-       },
-       {
-               .bank = 0,
-               .reg = AB3550_IMR5,
-               /* The two most significant bits are not used */
-               .setting = 0x3f
-       },
-};
-
-static struct ab3550_platform_data ab3550_plf_data = {
-       .irq = {
-               .base = IRQ_AB3550_BASE,
-               .count = (IRQ_AB3550_END - IRQ_AB3550_BASE + 1),
-       },
-       .dev_data = {
-       },
-       .init_settings = ab3550_init_settings,
-       .init_settings_sz = ARRAY_SIZE(ab3550_init_settings),
-};
-#endif
-
 static struct i2c_board_info __initdata bus0_i2c_board_info[] = {
-#if defined(CONFIG_AB3550_CORE)
-       {
-               .type = "ab3550",
-               .addr = 0x4A,
-               .irq = IRQ_U300_IRQ0_EXT,
-               .platform_data = &ab3550_plf_data,
-       },
-#elif defined(CONFIG_AB3100_CORE)
+#ifdef CONFIG_AB3100_CORE
        {
                .type = "ab3100",
                .addr = 0x48,
index d270fea32926dcf7687013f4cb0fbe0bb4c70694..db3fbfa1d6e97749e43ccb19d060c99a81fadb35 100644 (file)
 #define IRQ_U300_GPIO_END              (U300_VIC_IRQS_END)
 #endif
 
-/* Optional AB3550 mixsig chip */
-#ifdef CONFIG_AB3550_CORE
-#define IRQ_AB3550_BASE                        (IRQ_U300_GPIO_END)
-#define IRQ_AB3550_END                 (IRQ_AB3550_BASE + 38)
-#else
-#define IRQ_AB3550_END                 (IRQ_U300_GPIO_END)
-#endif
-
-#define NR_IRQS                                (IRQ_AB3550_END)
+#define NR_IRQS                                (IRQ_U300_GPIO_END)
 
 #endif
index e014aa749b039e6ef7da7f9d34e7c7f6ea65c9b8..82025ba70c0301b11326ac9f6ce3b2da6bcbefa4 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/amba/bus.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
+#include <linux/mfd/ab5500/ab5500.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
@@ -87,7 +88,6 @@ static struct lm3530_platform_data u5500_als_platform_data = {
        .brt_val = 0x7F,        /* Max brightness */
 };
 
-
 static struct i2c_board_info __initdata u5500_i2c2_devices[] = {
        {
                /* Backlight */
@@ -101,6 +101,30 @@ static void __init u5500_i2c_init(void)
        db5500_add_i2c2(&u5500_i2c2_data);
        i2c_register_board_info(2, ARRAY_AND_SIZE(u5500_i2c2_devices));
 }
+
+static struct ab5500_platform_data ab5500_plf_data = {
+       .irq = {
+               .base = 0,
+               .count = 0,
+       },
+       .init_settings = NULL,
+       .init_settings_sz = 0,
+       .pm_power_off = false,
+};
+
+static struct platform_device ab5500_device = {
+       .name = "ab5500-core",
+       .id = 0,
+       .dev = {
+               .platform_data = &ab5500_plf_data,
+       },
+       .num_resources = 0,
+};
+
+static struct platform_device *u5500_platform_devices[] __initdata = {
+       &ab5500_device,
+};
+
 static void __init u5500_uart_init(void)
 {
        db5500_add_uart0(NULL);
@@ -115,6 +139,9 @@ static void __init u5500_init_machine(void)
        u5500_i2c_init();
        u5500_sdi_init();
        u5500_uart_init();
+
+       platform_add_devices(u5500_platform_devices,
+               ARRAY_SIZE(u5500_platform_devices));
 }
 
 MACHINE_START(U5500, "ST-Ericsson U5500 Platform")
index 1405d0eb7edb1ff11de52d87553c8346c6f638d1..f4185749437533f5acd930dcc612113f57556eef 100644 (file)
@@ -47,6 +47,6 @@ void __init ux500_init_irq(void)
        if (cpu_is_u5500())
                db5500_prcmu_early_init();
        if (cpu_is_u8500())
-               prcmu_early_init();
+               db8500_prcmu_early_init();
        clk_init();
 }
index 3ff7785b3beb4d5d042b60e07ce25dc793c8c10f..27489b6dd5334a2297b24c69536293935728afa3 100644 (file)
@@ -92,7 +92,7 @@ config HAVE_SETUP_PER_CPU_AREA
        def_bool y
 
 config GENERIC_GPIO
-       def_bool y
+       bool
 
 config DMI
        bool
index 43ab1cd097a56ea3a9b68f5229b1a19c66288f22..954d81e2e837648d80a67fb9648dad29dd6f39c4 100644 (file)
@@ -87,6 +87,7 @@ CONFIG_NET_ETHERNET=y
 CONFIG_NET_TULIP=y
 CONFIG_TULIP=m
 CONFIG_NET_PCI=y
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=m
 CONFIG_E1000=y
 CONFIG_IGB=y
index b11fa880e4b60dd72a8f0b41d99f26364cf553ab..91c41ecfa6d9e0fad551ef268cab3f01200af2d8 100644 (file)
@@ -75,6 +75,7 @@ CONFIG_NET_ETHERNET=y
 CONFIG_NET_TULIP=y
 CONFIG_TULIP=m
 CONFIG_NET_PCI=y
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=m
 CONFIG_E1000=y
 CONFIG_TIGON3=y
index 137a453dd5080ad9fc59cbdf62639d8b16c824c0..a5a9e02e60a05c94e9b9a4c6c35a6cf78eecf429 100644 (file)
@@ -77,6 +77,7 @@ CONFIG_NET_ETHERNET=y
 CONFIG_NET_TULIP=y
 CONFIG_TULIP=m
 CONFIG_NET_PCI=y
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=m
 CONFIG_E1000=y
 CONFIG_TIGON3=y
index 2bf76e418bc569bf2d53d150460a79ec3f2b009a..37b9b422caadf9c89196aa63781d9cdb2a05830d 100644 (file)
@@ -80,6 +80,7 @@ CONFIG_NET_ETHERNET=y
 CONFIG_NET_TULIP=y
 CONFIG_TULIP=m
 CONFIG_NET_PCI=y
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=m
 CONFIG_E1000=y
 CONFIG_TIGON3=y
index 1d42827f9fa44df9a40f667edcad92a1250d6bba..fc7aba07c2b4f7064efa3e0e9704382e5495fdf6 100644 (file)
@@ -57,6 +57,7 @@ CONFIG_TULIP_MMIO=y
 CONFIG_TULIP_NAPI=y
 CONFIG_TULIP_NAPI_HW_MITIGATION=y
 CONFIG_NET_PCI=y
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_TIGON3=y
index 30862c0358cd7a3644f9dd204e203bb2d7efa239..2de41d44266ebc2e860ad185e88cb4ff07e0eab7 100644 (file)
@@ -615,11 +615,15 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
                }
        }
 
-       if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
-               /* don't care, or already on correct cpu */
+       if (cpu == SN_HWPERF_ARG_ANY_CPU) {
+               /* don't care which cpu */
                sn_hwperf_call_sal(op_info);
-       }
-       else {
+       } else if (cpu == get_cpu()) {
+               /* already on correct cpu */
+               sn_hwperf_call_sal(op_info);
+               put_cpu();
+       } else {
+               put_cpu();
                if (use_ipi) {
                        /* use an interprocessor interrupt to call SAL */
                        smp_call_function_single(cpu, sn_hwperf_call_sal,
index 4cbc6d8de210377b612a2c0ccf7b1f7378120262..62b9677c39a10bf17857f0f6b5621977771302f7 100644 (file)
@@ -47,6 +47,8 @@ config MIPS_ALCHEMY
        select GENERIC_GPIO
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select SYS_SUPPORTS_ZBOOT
+       select USB_ARCH_HAS_OHCI
+       select USB_ARCH_HAS_EHCI
 
 config AR7
        bool "Texas Instruments AR7"
@@ -206,6 +208,7 @@ config MACH_JZ4740
        select SYS_HAS_EARLY_PRINTK
        select HAVE_PWM
        select HAVE_CLK
+       select GENERIC_IRQ_CHIP
 
 config LANTIQ
        bool "Lantiq based platforms"
@@ -2092,7 +2095,7 @@ config NODES_SHIFT
 
 config HW_PERF_EVENTS
        bool "Enable hardware performance counter support for perf events"
-       depends on PERF_EVENTS && !MIPS_MT_SMTC && OPROFILE=n && CPU_MIPS32
+       depends on PERF_EVENTS && !MIPS_MT_SMTC && OPROFILE=n && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON)
        default y
        help
          Enable hardware performance counter support for perf events. If
index 53e3514ba10e93c249d837f460fb86fbe3879f52..9b4cb00407d7c468f607213f88dd729731a588be 100644 (file)
@@ -226,7 +226,7 @@ LDFLAGS                     += -m $(ld-emul)
 ifdef CONFIG_MIPS
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -xc /dev/null | \
        egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
-       sed -e 's/^\#define /-D/' -e "s/ /='/" -e "s/$$/'/")
+       sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
 ifdef CONFIG_64BIT
 CHECKFLAGS             += -m64
 endif
@@ -295,7 +295,9 @@ endif
 
 install:
        $(Q)install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE)
+ifdef CONFIG_SYS_SUPPORTS_ZBOOT
        $(Q)install -D -m 755 vmlinuz $(INSTALL_PATH)/vmlinuz-$(KERNELRELEASE)
+endif
        $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE)
        $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE)
 
index 2ccfd4a135bce633d3fcefba360faeceeae3724e..2a68be6a1b97ed953bdfe619d6ab16d5d924d320 100644 (file)
@@ -18,20 +18,20 @@ config MIPS_MTX1
        bool "4G Systems MTX-1 board"
        select DMA_NONCOHERENT
        select HW_HAS_PCI
-       select SOC_AU1500
+       select ALCHEMY_GPIOINT_AU1000
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_HAS_EARLY_PRINTK
 
 config MIPS_BOSPORUS
        bool "Alchemy Bosporus board"
-       select SOC_AU1500
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_HAS_EARLY_PRINTK
 
 config MIPS_DB1000
        bool "Alchemy DB1000 board"
-       select SOC_AU1000
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select HW_HAS_PCI
        select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -39,14 +39,14 @@ config MIPS_DB1000
 
 config MIPS_DB1100
        bool "Alchemy DB1100 board"
-       select SOC_AU1100
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_HAS_EARLY_PRINTK
 
 config MIPS_DB1200
        bool "Alchemy DB1200 board"
-       select SOC_AU1200
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_COHERENT
        select MIPS_DISABLE_OBSOLETE_IDE
        select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -54,7 +54,7 @@ config MIPS_DB1200
 
 config MIPS_DB1500
        bool "Alchemy DB1500 board"
-       select SOC_AU1500
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select HW_HAS_PCI
        select MIPS_DISABLE_OBSOLETE_IDE
@@ -64,7 +64,7 @@ config MIPS_DB1500
 
 config MIPS_DB1550
        bool "Alchemy DB1550 board"
-       select SOC_AU1550
+       select ALCHEMY_GPIOINT_AU1000
        select HW_HAS_PCI
        select DMA_NONCOHERENT
        select MIPS_DISABLE_OBSOLETE_IDE
@@ -74,13 +74,13 @@ config MIPS_DB1550
 config MIPS_MIRAGE
        bool "Alchemy Mirage board"
        select DMA_NONCOHERENT
-       select SOC_AU1500
+       select ALCHEMY_GPIOINT_AU1000
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_HAS_EARLY_PRINTK
 
 config MIPS_PB1000
        bool "Alchemy PB1000 board"
-       select SOC_AU1000
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select HW_HAS_PCI
        select SWAP_IO_SPACE
@@ -89,7 +89,7 @@ config MIPS_PB1000
 
 config MIPS_PB1100
        bool "Alchemy PB1100 board"
-       select SOC_AU1100
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select HW_HAS_PCI
        select SWAP_IO_SPACE
@@ -98,7 +98,7 @@ config MIPS_PB1100
 
 config MIPS_PB1200
        bool "Alchemy PB1200 board"
-       select SOC_AU1200
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select MIPS_DISABLE_OBSOLETE_IDE
        select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -106,7 +106,7 @@ config MIPS_PB1200
 
 config MIPS_PB1500
        bool "Alchemy PB1500 board"
-       select SOC_AU1500
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select HW_HAS_PCI
        select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -114,7 +114,7 @@ config MIPS_PB1500
 
 config MIPS_PB1550
        bool "Alchemy PB1550 board"
-       select SOC_AU1550
+       select ALCHEMY_GPIOINT_AU1000
        select DMA_NONCOHERENT
        select HW_HAS_PCI
        select MIPS_DISABLE_OBSOLETE_IDE
@@ -124,13 +124,13 @@ config MIPS_PB1550
 config MIPS_XXS1500
        bool "MyCable XXS1500 board"
        select DMA_NONCOHERENT
-       select SOC_AU1500
+       select ALCHEMY_GPIOINT_AU1000
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_HAS_EARLY_PRINTK
 
 config MIPS_GPR
        bool "Trapeze ITS GPR board"
-       select SOC_AU1550
+       select ALCHEMY_GPIOINT_AU1000
        select HW_HAS_PCI
        select DMA_NONCOHERENT
        select MIPS_DISABLE_OBSOLETE_IDE
@@ -138,23 +138,3 @@ config MIPS_GPR
        select SYS_HAS_EARLY_PRINTK
 
 endchoice
-
-config SOC_AU1000
-       bool
-       select ALCHEMY_GPIOINT_AU1000
-
-config SOC_AU1100
-       bool
-       select ALCHEMY_GPIOINT_AU1000
-
-config SOC_AU1500
-       bool
-       select ALCHEMY_GPIOINT_AU1000
-
-config SOC_AU1550
-       bool
-       select ALCHEMY_GPIOINT_AU1000
-
-config SOC_AU1200
-       bool
-       select ALCHEMY_GPIOINT_AU1000
index 27811fe341d6e732353a55243ee7b49d916eb482..811ece7b22e304fd9d04a8f38c1c6a241267b058 100644 (file)
@@ -12,9 +12,5 @@ obj-$(CONFIG_ALCHEMY_GPIOINT_AU1000) += irq.o
 
 # optional gpiolib support
 ifeq ($(CONFIG_ALCHEMY_GPIO_INDIRECT),)
- ifeq ($(CONFIG_GPIOLIB),y)
-  obj-$(CONFIG_ALCHEMY_GPIOINT_AU1000) += gpiolib-au1000.o
- endif
+ obj-$(CONFIG_GPIOLIB) += gpiolib.o
 endif
-
-obj-$(CONFIG_PCI)              += pci.o
index 3a5abb54d5052980c879e81d8ce983e576c2c9b1..0e63ee487d6d1315b6f06c069631cb383c932eb7 100644 (file)
@@ -40,8 +40,6 @@
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
 
-#if defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200)
-
 /*
  * The Descriptor Based DMA supports up to 16 channels.
  *
@@ -62,120 +60,96 @@ static dbdma_global_t *dbdma_gptr =
                        (dbdma_global_t *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
 static int dbdma_initialized;
 
-static dbdev_tab_t dbdev_tab[] = {
-#ifdef CONFIG_SOC_AU1550
+static dbdev_tab_t *dbdev_tab;
+
+static dbdev_tab_t au1550_dbdev_tab[] __initdata = {
        /* UARTS */
-       { DSCR_CMD0_UART0_TX, DEV_FLAGS_OUT, 0, 8, 0x11100004, 0, 0 },
-       { DSCR_CMD0_UART0_RX, DEV_FLAGS_IN, 0, 8, 0x11100000, 0, 0 },
-       { DSCR_CMD0_UART3_TX, DEV_FLAGS_OUT, 0, 8, 0x11400004, 0, 0 },
-       { DSCR_CMD0_UART3_RX, DEV_FLAGS_IN, 0, 8, 0x11400000, 0, 0 },
+       { AU1550_DSCR_CMD0_UART0_TX, DEV_FLAGS_OUT, 0, 8, 0x11100004, 0, 0 },
+       { AU1550_DSCR_CMD0_UART0_RX, DEV_FLAGS_IN,  0, 8, 0x11100000, 0, 0 },
+       { AU1550_DSCR_CMD0_UART3_TX, DEV_FLAGS_OUT, 0, 8, 0x11400004, 0, 0 },
+       { AU1550_DSCR_CMD0_UART3_RX, DEV_FLAGS_IN,  0, 8, 0x11400000, 0, 0 },
 
        /* EXT DMA */
-       { DSCR_CMD0_DMA_REQ0, 0, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_DMA_REQ1, 0, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_DMA_REQ2, 0, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_DMA_REQ3, 0, 0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_DMA_REQ0, 0, 0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_DMA_REQ1, 0, 0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_DMA_REQ2, 0, 0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_DMA_REQ3, 0, 0, 0, 0x00000000, 0, 0 },
 
        /* USB DEV */
-       { DSCR_CMD0_USBDEV_RX0, DEV_FLAGS_IN, 4, 8, 0x10200000, 0, 0 },
-       { DSCR_CMD0_USBDEV_TX0, DEV_FLAGS_OUT, 4, 8, 0x10200004, 0, 0 },
-       { DSCR_CMD0_USBDEV_TX1, DEV_FLAGS_OUT, 4, 8, 0x10200008, 0, 0 },
-       { DSCR_CMD0_USBDEV_TX2, DEV_FLAGS_OUT, 4, 8, 0x1020000c, 0, 0 },
-       { DSCR_CMD0_USBDEV_RX3, DEV_FLAGS_IN, 4, 8, 0x10200010, 0, 0 },
-       { DSCR_CMD0_USBDEV_RX4, DEV_FLAGS_IN, 4, 8, 0x10200014, 0, 0 },
-
-       /* PSC 0 */
-       { DSCR_CMD0_PSC0_TX, DEV_FLAGS_OUT, 0, 0, 0x11a0001c, 0, 0 },
-       { DSCR_CMD0_PSC0_RX, DEV_FLAGS_IN, 0, 0, 0x11a0001c, 0, 0 },
-
-       /* PSC 1 */
-       { DSCR_CMD0_PSC1_TX, DEV_FLAGS_OUT, 0, 0, 0x11b0001c, 0, 0 },
-       { DSCR_CMD0_PSC1_RX, DEV_FLAGS_IN, 0, 0, 0x11b0001c, 0, 0 },
-
-       /* PSC 2 */
-       { DSCR_CMD0_PSC2_TX, DEV_FLAGS_OUT, 0, 0, 0x10a0001c, 0, 0 },
-       { DSCR_CMD0_PSC2_RX, DEV_FLAGS_IN, 0, 0, 0x10a0001c, 0, 0 },
-
-       /* PSC 3 */
-       { DSCR_CMD0_PSC3_TX, DEV_FLAGS_OUT, 0, 0, 0x10b0001c, 0, 0 },
-       { DSCR_CMD0_PSC3_RX, DEV_FLAGS_IN, 0, 0, 0x10b0001c, 0, 0 },
-
-       { DSCR_CMD0_PCI_WRITE, 0, 0, 0, 0x00000000, 0, 0 },     /* PCI */
-       { DSCR_CMD0_NAND_FLASH, 0, 0, 0, 0x00000000, 0, 0 },    /* NAND */
+       { AU1550_DSCR_CMD0_USBDEV_RX0, DEV_FLAGS_IN,  4, 8, 0x10200000, 0, 0 },
+       { AU1550_DSCR_CMD0_USBDEV_TX0, DEV_FLAGS_OUT, 4, 8, 0x10200004, 0, 0 },
+       { AU1550_DSCR_CMD0_USBDEV_TX1, DEV_FLAGS_OUT, 4, 8, 0x10200008, 0, 0 },
+       { AU1550_DSCR_CMD0_USBDEV_TX2, DEV_FLAGS_OUT, 4, 8, 0x1020000c, 0, 0 },
+       { AU1550_DSCR_CMD0_USBDEV_RX3, DEV_FLAGS_IN,  4, 8, 0x10200010, 0, 0 },
+       { AU1550_DSCR_CMD0_USBDEV_RX4, DEV_FLAGS_IN,  4, 8, 0x10200014, 0, 0 },
+
+       /* PSCs */
+       { AU1550_DSCR_CMD0_PSC0_TX, DEV_FLAGS_OUT, 0, 0, 0x11a0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC0_RX, DEV_FLAGS_IN,  0, 0, 0x11a0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC1_TX, DEV_FLAGS_OUT, 0, 0, 0x11b0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC1_RX, DEV_FLAGS_IN,  0, 0, 0x11b0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC2_TX, DEV_FLAGS_OUT, 0, 0, 0x10a0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC2_RX, DEV_FLAGS_IN,  0, 0, 0x10a0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC3_TX, DEV_FLAGS_OUT, 0, 0, 0x10b0001c, 0, 0 },
+       { AU1550_DSCR_CMD0_PSC3_RX, DEV_FLAGS_IN,  0, 0, 0x10b0001c, 0, 0 },
+
+       { AU1550_DSCR_CMD0_PCI_WRITE,  0, 0, 0, 0x00000000, 0, 0 },  /* PCI */
+       { AU1550_DSCR_CMD0_NAND_FLASH, 0, 0, 0, 0x00000000, 0, 0 }, /* NAND */
 
        /* MAC 0 */
-       { DSCR_CMD0_MAC0_RX, DEV_FLAGS_IN, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_MAC0_TX, DEV_FLAGS_OUT, 0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_MAC0_RX, DEV_FLAGS_IN,  0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_MAC0_TX, DEV_FLAGS_OUT, 0, 0, 0x00000000, 0, 0 },
 
        /* MAC 1 */
-       { DSCR_CMD0_MAC1_RX, DEV_FLAGS_IN, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_MAC1_TX, DEV_FLAGS_OUT, 0, 0, 0x00000000, 0, 0 },
-
-#endif /* CONFIG_SOC_AU1550 */
+       { AU1550_DSCR_CMD0_MAC1_RX, DEV_FLAGS_IN,  0, 0, 0x00000000, 0, 0 },
+       { AU1550_DSCR_CMD0_MAC1_TX, DEV_FLAGS_OUT, 0, 0, 0x00000000, 0, 0 },
 
-#ifdef CONFIG_SOC_AU1200
-       { DSCR_CMD0_UART0_TX, DEV_FLAGS_OUT, 0, 8, 0x11100004, 0, 0 },
-       { DSCR_CMD0_UART0_RX, DEV_FLAGS_IN, 0, 8, 0x11100000, 0, 0 },
-       { DSCR_CMD0_UART1_TX, DEV_FLAGS_OUT, 0, 8, 0x11200004, 0, 0 },
-       { DSCR_CMD0_UART1_RX, DEV_FLAGS_IN, 0, 8, 0x11200000, 0, 0 },
-
-       { DSCR_CMD0_DMA_REQ0, 0, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_DMA_REQ1, 0, 0, 0, 0x00000000, 0, 0 },
+       { DSCR_CMD0_THROTTLE, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { DSCR_CMD0_ALWAYS,   DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+};
 
-       { DSCR_CMD0_MAE_BE, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_MAE_FE, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_MAE_BOTH, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_LCD, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+static dbdev_tab_t au1200_dbdev_tab[] __initdata = {
+       { AU1200_DSCR_CMD0_UART0_TX, DEV_FLAGS_OUT, 0, 8, 0x11100004, 0, 0 },
+       { AU1200_DSCR_CMD0_UART0_RX, DEV_FLAGS_IN,  0, 8, 0x11100000, 0, 0 },
+       { AU1200_DSCR_CMD0_UART1_TX, DEV_FLAGS_OUT, 0, 8, 0x11200004, 0, 0 },
+       { AU1200_DSCR_CMD0_UART1_RX, DEV_FLAGS_IN,  0, 8, 0x11200000, 0, 0 },
 
-       { DSCR_CMD0_SDMS_TX0, DEV_FLAGS_OUT, 4, 8, 0x10600000, 0, 0 },
-       { DSCR_CMD0_SDMS_RX0, DEV_FLAGS_IN, 4, 8, 0x10600004, 0, 0 },
-       { DSCR_CMD0_SDMS_TX1, DEV_FLAGS_OUT, 4, 8, 0x10680000, 0, 0 },
-       { DSCR_CMD0_SDMS_RX1, DEV_FLAGS_IN, 4, 8, 0x10680004, 0, 0 },
+       { AU1200_DSCR_CMD0_DMA_REQ0, 0, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_DMA_REQ1, 0, 0, 0, 0x00000000, 0, 0 },
 
-       { DSCR_CMD0_AES_RX, DEV_FLAGS_IN , 4, 32, 0x10300008, 0, 0 },
-       { DSCR_CMD0_AES_TX, DEV_FLAGS_OUT, 4, 32, 0x10300004, 0, 0 },
+       { AU1200_DSCR_CMD0_MAE_BE, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_MAE_FE, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_MAE_BOTH, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_LCD, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
 
-       { DSCR_CMD0_PSC0_TX, DEV_FLAGS_OUT, 0, 16, 0x11a0001c, 0, 0 },
-       { DSCR_CMD0_PSC0_RX, DEV_FLAGS_IN, 0, 16, 0x11a0001c, 0, 0 },
-       { DSCR_CMD0_PSC0_SYNC, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_SDMS_TX0, DEV_FLAGS_OUT, 4, 8, 0x10600000, 0, 0 },
+       { AU1200_DSCR_CMD0_SDMS_RX0, DEV_FLAGS_IN,  4, 8, 0x10600004, 0, 0 },
+       { AU1200_DSCR_CMD0_SDMS_TX1, DEV_FLAGS_OUT, 4, 8, 0x10680000, 0, 0 },
+       { AU1200_DSCR_CMD0_SDMS_RX1, DEV_FLAGS_IN,  4, 8, 0x10680004, 0, 0 },
 
-       { DSCR_CMD0_PSC1_TX, DEV_FLAGS_OUT, 0, 16, 0x11b0001c, 0, 0 },
-       { DSCR_CMD0_PSC1_RX, DEV_FLAGS_IN, 0, 16, 0x11b0001c, 0, 0 },
-       { DSCR_CMD0_PSC1_SYNC, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_AES_RX, DEV_FLAGS_IN , 4, 32, 0x10300008, 0, 0 },
+       { AU1200_DSCR_CMD0_AES_TX, DEV_FLAGS_OUT, 4, 32, 0x10300004, 0, 0 },
 
-       { DSCR_CMD0_CIM_RXA, DEV_FLAGS_IN, 0, 32, 0x14004020, 0, 0 },
-       { DSCR_CMD0_CIM_RXB, DEV_FLAGS_IN, 0, 32, 0x14004040, 0, 0 },
-       { DSCR_CMD0_CIM_RXC, DEV_FLAGS_IN, 0, 32, 0x14004060, 0, 0 },
-       { DSCR_CMD0_CIM_SYNC, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_PSC0_TX,   DEV_FLAGS_OUT, 0, 16, 0x11a0001c, 0, 0 },
+       { AU1200_DSCR_CMD0_PSC0_RX,   DEV_FLAGS_IN,  0, 16, 0x11a0001c, 0, 0 },
+       { AU1200_DSCR_CMD0_PSC0_SYNC, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_PSC1_TX,   DEV_FLAGS_OUT, 0, 16, 0x11b0001c, 0, 0 },
+       { AU1200_DSCR_CMD0_PSC1_RX,   DEV_FLAGS_IN,  0, 16, 0x11b0001c, 0, 0 },
+       { AU1200_DSCR_CMD0_PSC1_SYNC, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
 
-       { DSCR_CMD0_NAND_FLASH, DEV_FLAGS_IN, 0, 0, 0x00000000, 0, 0 },
+       { AU1200_DSCR_CMD0_CIM_RXA,  DEV_FLAGS_IN, 0, 32, 0x14004020, 0, 0 },
+       { AU1200_DSCR_CMD0_CIM_RXB,  DEV_FLAGS_IN, 0, 32, 0x14004040, 0, 0 },
+       { AU1200_DSCR_CMD0_CIM_RXC,  DEV_FLAGS_IN, 0, 32, 0x14004060, 0, 0 },
+       { AU1200_DSCR_CMD0_CIM_SYNC, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
 
-#endif /* CONFIG_SOC_AU1200 */
+       { AU1200_DSCR_CMD0_NAND_FLASH, DEV_FLAGS_IN, 0, 0, 0x00000000, 0, 0 },
 
        { DSCR_CMD0_THROTTLE, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
-       { DSCR_CMD0_ALWAYS, DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
-
-       /* Provide 16 user definable device types */
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
-       { ~0, 0, 0, 0, 0, 0, 0 },
+       { DSCR_CMD0_ALWAYS,   DEV_FLAGS_ANYUSE, 0, 0, 0x00000000, 0, 0 },
 };
 
-#define DBDEV_TAB_SIZE ARRAY_SIZE(dbdev_tab)
-
+/* 32 predefined plus 32 custom */
+#define DBDEV_TAB_SIZE         64
 
 static chan_tab_t *chan_tab_ptr[NUM_DBDMA_CHANS];
 
@@ -1028,38 +1002,43 @@ static struct syscore_ops alchemy_dbdma_syscore_ops = {
        .resume         = alchemy_dbdma_resume,
 };
 
-static int __init au1xxx_dbdma_init(void)
+static int __init dbdma_setup(unsigned int irq, dbdev_tab_t *idtable)
 {
-       int irq_nr, ret;
+       int ret;
+
+       dbdev_tab = kzalloc(sizeof(dbdev_tab_t) * DBDEV_TAB_SIZE, GFP_KERNEL);
+       if (!dbdev_tab)
+               return -ENOMEM;
+
+       memcpy(dbdev_tab, idtable, 32 * sizeof(dbdev_tab_t));
+       for (ret = 32; ret < DBDEV_TAB_SIZE; ret++)
+               dbdev_tab[ret].dev_id = ~0;
 
        dbdma_gptr->ddma_config = 0;
        dbdma_gptr->ddma_throttle = 0;
        dbdma_gptr->ddma_inten = 0xffff;
        au_sync();
 
-       switch (alchemy_get_cputype()) {
-       case ALCHEMY_CPU_AU1550:
-               irq_nr = AU1550_DDMA_INT;
-               break;
-       case ALCHEMY_CPU_AU1200:
-               irq_nr = AU1200_DDMA_INT;
-               break;
-       default:
-               return -ENODEV;
-       }
-
-       ret = request_irq(irq_nr, dbdma_interrupt, IRQF_DISABLED,
-                       "Au1xxx dbdma", (void *)dbdma_gptr);
+       ret = request_irq(irq, dbdma_interrupt, IRQF_DISABLED, "dbdma",
+                         (void *)dbdma_gptr);
        if (ret)
                printk(KERN_ERR "Cannot grab DBDMA interrupt!\n");
        else {
                dbdma_initialized = 1;
-               printk(KERN_INFO "Alchemy DBDMA initialized\n");
                register_syscore_ops(&alchemy_dbdma_syscore_ops);
        }
 
        return ret;
 }
-subsys_initcall(au1xxx_dbdma_init);
 
-#endif /* defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200) */
+static int __init alchemy_dbdma_init(void)
+{
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1550:
+               return dbdma_setup(AU1550_DDMA_INT, au1550_dbdev_tab);
+       case ALCHEMY_CPU_AU1200:
+               return dbdma_setup(AU1200_DDMA_INT, au1200_dbdev_tab);
+       }
+       return 0;
+}
+subsys_initcall(alchemy_dbdma_init);
index 347980e79a892048a8d7097ac5561634af5e950e..9b624e2c0fcf8be6703debf45fa948e1d5e6eff0 100644 (file)
@@ -40,8 +40,6 @@
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1000_dma.h>
 
-#if defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1500) || \
-    defined(CONFIG_SOC_AU1100)
 /*
  * A note on resource allocation:
  *
@@ -88,12 +86,12 @@ static const struct dma_dev {
        { AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 | DMA_DR },    /* AC97 RX c */
        { AU1000_UART3_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC },    /* UART3_TX */
        { AU1000_UART3_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* UART3_RX */
-       { AU1000_USBD_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* EP0RD */
-       { AU1000_USBD_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* EP0WR */
-       { AU1000_USBD_PHYS_ADDR + 0x08, DMA_DW8 | DMA_NC }, /* EP2WR */
-       { AU1000_USBD_PHYS_ADDR + 0x0c, DMA_DW8 | DMA_NC }, /* EP3WR */
-       { AU1000_USBD_PHYS_ADDR + 0x10, DMA_DW8 | DMA_NC | DMA_DR }, /* EP4RD */
-       { AU1000_USBD_PHYS_ADDR + 0x14, DMA_DW8 | DMA_NC | DMA_DR }, /* EP5RD */
+       { AU1000_USB_UDC_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* EP0RD */
+       { AU1000_USB_UDC_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* EP0WR */
+       { AU1000_USB_UDC_PHYS_ADDR + 0x08, DMA_DW8 | DMA_NC }, /* EP2WR */
+       { AU1000_USB_UDC_PHYS_ADDR + 0x0c, DMA_DW8 | DMA_NC }, /* EP3WR */
+       { AU1000_USB_UDC_PHYS_ADDR + 0x10, DMA_DW8 | DMA_NC | DMA_DR }, /* EP4RD */
+       { AU1000_USB_UDC_PHYS_ADDR + 0x14, DMA_DW8 | DMA_NC | DMA_DR }, /* EP5RD */
        /* on Au1500, these 2 are DMA_REQ2/3 (GPIO208/209) instead! */
        { AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC},      /* I2S TX */
        { AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC | DMA_DR}, /* I2S RX */
@@ -170,13 +168,13 @@ int request_au1000_dma(int dev_id, const char *dev_str,
        const struct dma_dev *dev;
        int i, ret;
 
-#if defined(CONFIG_SOC_AU1100)
-       if (dev_id < 0 || dev_id >= (DMA_NUM_DEV + DMA_NUM_DEV_BANK2))
-               return -EINVAL;
-#else
-       if (dev_id < 0 || dev_id >= DMA_NUM_DEV)
-               return -EINVAL;
-#endif
+       if (alchemy_get_cputype() == ALCHEMY_CPU_AU1100) {
+               if (dev_id < 0 || dev_id >= (DMA_NUM_DEV + DMA_NUM_DEV_BANK2))
+                       return -EINVAL;
+       } else {
+               if (dev_id < 0 || dev_id >= DMA_NUM_DEV)
+                       return -EINVAL;
+       }
 
        for (i = 0; i < NUM_AU1000_DMA_CHANNELS; i++)
                if (au1000_dma_table[i].dev_id < 0)
@@ -239,30 +237,28 @@ EXPORT_SYMBOL(free_au1000_dma);
 
 static int __init au1000_dma_init(void)
 {
-        int base, i;
-
-        switch (alchemy_get_cputype()) {
-        case ALCHEMY_CPU_AU1000:
-                base = AU1000_DMA_INT_BASE;
-                break;
-        case ALCHEMY_CPU_AU1500:
-                base = AU1500_DMA_INT_BASE;
-                break;
-        case ALCHEMY_CPU_AU1100:
-                base = AU1100_DMA_INT_BASE;
-                break;
-        default:
-                goto out;
-        }
-
-        for (i = 0; i < NUM_AU1000_DMA_CHANNELS; i++)
-                au1000_dma_table[i].irq = base + i;
-
-        printk(KERN_INFO "Alchemy DMA initialized\n");
+       int base, i;
+
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1000:
+               base = AU1000_DMA_INT_BASE;
+               break;
+       case ALCHEMY_CPU_AU1500:
+               base = AU1500_DMA_INT_BASE;
+               break;
+       case ALCHEMY_CPU_AU1100:
+               base = AU1100_DMA_INT_BASE;
+               break;
+       default:
+               goto out;
+       }
+
+       for (i = 0; i < NUM_AU1000_DMA_CHANNELS; i++)
+               au1000_dma_table[i].irq = base + i;
+
+       printk(KERN_INFO "Alchemy DMA initialized\n");
 
 out:
-        return 0;
+       return 0;
 }
 arch_initcall(au1000_dma_init);
-
-#endif /* AU1000 AU1500 AU1100 */
diff --git a/arch/mips/alchemy/common/gpiolib-au1000.c b/arch/mips/alchemy/common/gpiolib-au1000.c
deleted file mode 100644 (file)
index c8e1a94..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- *  Copyright (C) 2007-2009, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
- *     GPIOLIB support for Au1000, Au1500, Au1100, Au1550 and Au12x0.
- *
- *  This program is free software; you can redistribute         it and/or modify it
- *  under  the terms of         the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED          ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,          INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED          TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA, OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN         CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- *  Notes :
- *     au1000 SoC have only one GPIO block : GPIO1
- *     Au1100, Au15x0, Au12x0 have a second one : GPIO2
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-
-#include <asm/mach-au1x00/au1000.h>
-#include <asm/mach-au1x00/gpio.h>
-
-static int gpio2_get(struct gpio_chip *chip, unsigned offset)
-{
-       return alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
-}
-
-static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-       alchemy_gpio2_set_value(offset + ALCHEMY_GPIO2_BASE, value);
-}
-
-static int gpio2_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-       return alchemy_gpio2_direction_input(offset + ALCHEMY_GPIO2_BASE);
-}
-
-static int gpio2_direction_output(struct gpio_chip *chip, unsigned offset,
-                                 int value)
-{
-       return alchemy_gpio2_direction_output(offset + ALCHEMY_GPIO2_BASE,
-                                               value);
-}
-
-static int gpio2_to_irq(struct gpio_chip *chip, unsigned offset)
-{
-       return alchemy_gpio2_to_irq(offset + ALCHEMY_GPIO2_BASE);
-}
-
-
-static int gpio1_get(struct gpio_chip *chip, unsigned offset)
-{
-       return alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
-}
-
-static void gpio1_set(struct gpio_chip *chip,
-                               unsigned offset, int value)
-{
-       alchemy_gpio1_set_value(offset + ALCHEMY_GPIO1_BASE, value);
-}
-
-static int gpio1_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-       return alchemy_gpio1_direction_input(offset + ALCHEMY_GPIO1_BASE);
-}
-
-static int gpio1_direction_output(struct gpio_chip *chip,
-                                       unsigned offset, int value)
-{
-       return alchemy_gpio1_direction_output(offset + ALCHEMY_GPIO1_BASE,
-                                            value);
-}
-
-static int gpio1_to_irq(struct gpio_chip *chip, unsigned offset)
-{
-       return alchemy_gpio1_to_irq(offset + ALCHEMY_GPIO1_BASE);
-}
-
-struct gpio_chip alchemy_gpio_chip[] = {
-       [0] = {
-               .label                  = "alchemy-gpio1",
-               .direction_input        = gpio1_direction_input,
-               .direction_output       = gpio1_direction_output,
-               .get                    = gpio1_get,
-               .set                    = gpio1_set,
-               .to_irq                 = gpio1_to_irq,
-               .base                   = ALCHEMY_GPIO1_BASE,
-               .ngpio                  = ALCHEMY_GPIO1_NUM,
-       },
-       [1] = {
-               .label                  = "alchemy-gpio2",
-               .direction_input        = gpio2_direction_input,
-               .direction_output       = gpio2_direction_output,
-               .get                    = gpio2_get,
-               .set                    = gpio2_set,
-               .to_irq                 = gpio2_to_irq,
-               .base                   = ALCHEMY_GPIO2_BASE,
-               .ngpio                  = ALCHEMY_GPIO2_NUM,
-       },
-};
-
-static int __init alchemy_gpiolib_init(void)
-{
-       gpiochip_add(&alchemy_gpio_chip[0]);
-       if (alchemy_get_cputype() != ALCHEMY_CPU_AU1000)
-               gpiochip_add(&alchemy_gpio_chip[1]);
-
-       return 0;
-}
-arch_initcall(alchemy_gpiolib_init);
diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c
new file mode 100644 (file)
index 0000000..91fb4d9
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ *  Copyright (C) 2007-2009, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
+ *     GPIOLIB support for Alchemy chips.
+ *
+ *  This program is free software; you can redistribute         it and/or modify it
+ *  under  the terms of         the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED          ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,          INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED          TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA, OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN         CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  Notes :
+ *     This file must ONLY be built when CONFIG_GPIOLIB=y and
+ *      CONFIG_ALCHEMY_GPIO_INDIRECT=n, otherwise compilation will fail!
+ *     au1000 SoC have only one GPIO block : GPIO1
+ *     Au1100, Au15x0, Au12x0 have a second one : GPIO2
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/gpio.h>
+#include <asm/mach-au1x00/gpio-au1000.h>
+
+static int gpio2_get(struct gpio_chip *chip, unsigned offset)
+{
+       return alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
+}
+
+static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+       alchemy_gpio2_set_value(offset + ALCHEMY_GPIO2_BASE, value);
+}
+
+static int gpio2_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+       return alchemy_gpio2_direction_input(offset + ALCHEMY_GPIO2_BASE);
+}
+
+static int gpio2_direction_output(struct gpio_chip *chip, unsigned offset,
+                                 int value)
+{
+       return alchemy_gpio2_direction_output(offset + ALCHEMY_GPIO2_BASE,
+                                               value);
+}
+
+static int gpio2_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+       return alchemy_gpio2_to_irq(offset + ALCHEMY_GPIO2_BASE);
+}
+
+
+static int gpio1_get(struct gpio_chip *chip, unsigned offset)
+{
+       return alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
+}
+
+static void gpio1_set(struct gpio_chip *chip,
+                               unsigned offset, int value)
+{
+       alchemy_gpio1_set_value(offset + ALCHEMY_GPIO1_BASE, value);
+}
+
+static int gpio1_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+       return alchemy_gpio1_direction_input(offset + ALCHEMY_GPIO1_BASE);
+}
+
+static int gpio1_direction_output(struct gpio_chip *chip,
+                                       unsigned offset, int value)
+{
+       return alchemy_gpio1_direction_output(offset + ALCHEMY_GPIO1_BASE,
+                                            value);
+}
+
+static int gpio1_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+       return alchemy_gpio1_to_irq(offset + ALCHEMY_GPIO1_BASE);
+}
+
+struct gpio_chip alchemy_gpio_chip[] = {
+       [0] = {
+               .label                  = "alchemy-gpio1",
+               .direction_input        = gpio1_direction_input,
+               .direction_output       = gpio1_direction_output,
+               .get                    = gpio1_get,
+               .set                    = gpio1_set,
+               .to_irq                 = gpio1_to_irq,
+               .base                   = ALCHEMY_GPIO1_BASE,
+               .ngpio                  = ALCHEMY_GPIO1_NUM,
+       },
+       [1] = {
+               .label                  = "alchemy-gpio2",
+               .direction_input        = gpio2_direction_input,
+               .direction_output       = gpio2_direction_output,
+               .get                    = gpio2_get,
+               .set                    = gpio2_set,
+               .to_irq                 = gpio2_to_irq,
+               .base                   = ALCHEMY_GPIO2_BASE,
+               .ngpio                  = ALCHEMY_GPIO2_NUM,
+       },
+};
+
+static int __init alchemy_gpiochip_init(void)
+{
+       int ret = 0;
+
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1000:
+               ret = gpiochip_add(&alchemy_gpio_chip[0]);
+               break;
+       case ALCHEMY_CPU_AU1500...ALCHEMY_CPU_AU1200:
+               ret = gpiochip_add(&alchemy_gpio_chip[0]);
+               ret |= gpiochip_add(&alchemy_gpio_chip[1]);
+               break;
+       }
+       return ret;
+}
+arch_initcall(alchemy_gpiochip_init);
diff --git a/arch/mips/alchemy/common/pci.c b/arch/mips/alchemy/common/pci.c
deleted file mode 100644 (file)
index 7866cf5..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *     Alchemy/AMD Au1x00 PCI support.
- *
- * Copyright 2001-2003, 2007-2008 MontaVista Software Inc.
- * Author: MontaVista Software, Inc. <source@mvista.com>
- *
- * Copyright (C) 2004 by Ralf Baechle (ralf@linux-mips.org)
- *
- *  Support for all devices (greater than 16) added by David Gathright.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/mach-au1x00/au1000.h>
-
-/* TBD */
-static struct resource pci_io_resource = {
-       .start  = PCI_IO_START,
-       .end    = PCI_IO_END,
-       .name   = "PCI IO space",
-       .flags  = IORESOURCE_IO
-};
-
-static struct resource pci_mem_resource = {
-       .start  = PCI_MEM_START,
-       .end    = PCI_MEM_END,
-       .name   = "PCI memory space",
-       .flags  = IORESOURCE_MEM
-};
-
-extern struct pci_ops au1x_pci_ops;
-
-static struct pci_controller au1x_controller = {
-       .pci_ops        = &au1x_pci_ops,
-       .io_resource    = &pci_io_resource,
-       .mem_resource   = &pci_mem_resource,
-};
-
-#if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
-static unsigned long virt_io_addr;
-#endif
-
-static int __init au1x_pci_setup(void)
-{
-       extern void au1x_pci_cfg_init(void);
-
-#if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
-       virt_io_addr = (unsigned long)ioremap(Au1500_PCI_IO_START,
-                       Au1500_PCI_IO_END - Au1500_PCI_IO_START + 1);
-
-       if (!virt_io_addr) {
-               printk(KERN_ERR "Unable to ioremap pci space\n");
-               return 1;
-       }
-       au1x_controller.io_map_base = virt_io_addr;
-
-#ifdef CONFIG_DMA_NONCOHERENT
-       {
-               /*
-                *  Set the NC bit in controller for Au1500 pre-AC silicon
-                */
-               u32 prid = read_c0_prid();
-
-               if ((prid & 0xFF000000) == 0x01000000 && prid < 0x01030202) {
-                       au_writel((1 << 16) | au_readl(Au1500_PCI_CFG),
-                                 Au1500_PCI_CFG);
-                       printk(KERN_INFO "Non-coherent PCI accesses enabled\n");
-               }
-       }
-#endif
-
-       set_io_port_base(virt_io_addr);
-#endif
-
-       au1x_pci_cfg_init();
-
-       register_pci_controller(&au1x_controller);
-       return 0;
-}
-
-arch_initcall(au1x_pci_setup);
index f72c48d4804c30989e78324639ec516f6d1aea75..c8e5d72a582684ff79e022a0c0b9b3313a700034 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/serial_8250.h>
 #include <linux/slab.h>
 
-#include <asm/mach-au1x00/au1xxx.h>
+#include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
 #include <asm/mach-au1x00/au1100_mmc.h>
 #include <asm/mach-au1x00/au1xxx_eth.h>
@@ -111,270 +111,87 @@ static void __init alchemy_setup_uarts(int ctype)
                printk(KERN_INFO "Alchemy: failed to register UARTs\n");
 }
 
-/* OHCI (USB full speed host controller) */
-static struct resource au1xxx_usb_ohci_resources[] = {
-       [0] = {
-               .start          = USB_OHCI_BASE,
-               .end            = USB_OHCI_BASE + USB_OHCI_LEN - 1,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = FOR_PLATFORM_C_USB_HOST_INT,
-               .end            = FOR_PLATFORM_C_USB_HOST_INT,
-               .flags          = IORESOURCE_IRQ,
-       },
-};
-
-/* The dmamask must be set for OHCI to work */
-static u64 ohci_dmamask = DMA_BIT_MASK(32);
-
-static struct platform_device au1xxx_usb_ohci_device = {
-       .name           = "au1xxx-ohci",
-       .id             = 0,
-       .dev = {
-               .dma_mask               = &ohci_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(au1xxx_usb_ohci_resources),
-       .resource       = au1xxx_usb_ohci_resources,
-};
-
-/*** AU1100 LCD controller ***/
-
-#ifdef CONFIG_FB_AU1100
-static struct resource au1100_lcd_resources[] = {
-       [0] = {
-               .start          = LCD_PHYS_ADDR,
-               .end            = LCD_PHYS_ADDR + 0x800 - 1,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1100_LCD_INT,
-               .end            = AU1100_LCD_INT,
-               .flags          = IORESOURCE_IRQ,
-       }
-};
-
-static u64 au1100_lcd_dmamask = DMA_BIT_MASK(32);
-
-static struct platform_device au1100_lcd_device = {
-       .name           = "au1100-lcd",
-       .id             = 0,
-       .dev = {
-               .dma_mask               = &au1100_lcd_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(au1100_lcd_resources),
-       .resource       = au1100_lcd_resources,
-};
-#endif
-
-#ifdef CONFIG_SOC_AU1200
-/* EHCI (USB high speed host controller) */
-static struct resource au1xxx_usb_ehci_resources[] = {
-       [0] = {
-               .start          = USB_EHCI_BASE,
-               .end            = USB_EHCI_BASE + USB_EHCI_LEN - 1,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1200_USB_INT,
-               .end            = AU1200_USB_INT,
-               .flags          = IORESOURCE_IRQ,
-       },
-};
-
-static u64 ehci_dmamask = DMA_BIT_MASK(32);
-
-static struct platform_device au1xxx_usb_ehci_device = {
-       .name           = "au1xxx-ehci",
-       .id             = 0,
-       .dev = {
-               .dma_mask               = &ehci_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(au1xxx_usb_ehci_resources),
-       .resource       = au1xxx_usb_ehci_resources,
-};
-
-/* Au1200 UDC (USB gadget controller) */
-static struct resource au1xxx_usb_gdt_resources[] = {
-       [0] = {
-               .start          = USB_UDC_BASE,
-               .end            = USB_UDC_BASE + USB_UDC_LEN - 1,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1200_USB_INT,
-               .end            = AU1200_USB_INT,
-               .flags          = IORESOURCE_IRQ,
-       },
-};
-
-static u64 udc_dmamask = DMA_BIT_MASK(32);
 
-static struct platform_device au1xxx_usb_gdt_device = {
-       .name           = "au1xxx-udc",
-       .id             = 0,
-       .dev = {
-               .dma_mask               = &udc_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(au1xxx_usb_gdt_resources),
-       .resource       = au1xxx_usb_gdt_resources,
-};
+/* The dmamask must be set for OHCI/EHCI to work */
+static u64 alchemy_ohci_dmamask = DMA_BIT_MASK(32);
+static u64 __maybe_unused alchemy_ehci_dmamask = DMA_BIT_MASK(32);
 
-/* Au1200 UOC (USB OTG controller) */
-static struct resource au1xxx_usb_otg_resources[] = {
-       [0] = {
-               .start          = USB_UOC_BASE,
-               .end            = USB_UOC_BASE + USB_UOC_LEN - 1,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1200_USB_INT,
-               .end            = AU1200_USB_INT,
-               .flags          = IORESOURCE_IRQ,
-       },
+static unsigned long alchemy_ohci_data[][2] __initdata = {
+       [ALCHEMY_CPU_AU1000] = { AU1000_USB_OHCI_PHYS_ADDR, AU1000_USB_HOST_INT },
+       [ALCHEMY_CPU_AU1500] = { AU1000_USB_OHCI_PHYS_ADDR, AU1500_USB_HOST_INT },
+       [ALCHEMY_CPU_AU1100] = { AU1000_USB_OHCI_PHYS_ADDR, AU1100_USB_HOST_INT },
+       [ALCHEMY_CPU_AU1550] = { AU1550_USB_OHCI_PHYS_ADDR, AU1550_USB_HOST_INT },
+       [ALCHEMY_CPU_AU1200] = { AU1200_USB_OHCI_PHYS_ADDR, AU1200_USB_INT },
 };
 
-static u64 uoc_dmamask = DMA_BIT_MASK(32);
-
-static struct platform_device au1xxx_usb_otg_device = {
-       .name           = "au1xxx-uoc",
-       .id             = 0,
-       .dev = {
-               .dma_mask               = &uoc_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(au1xxx_usb_otg_resources),
-       .resource       = au1xxx_usb_otg_resources,
+static unsigned long alchemy_ehci_data[][2] __initdata = {
+       [ALCHEMY_CPU_AU1200] = { AU1200_USB_EHCI_PHYS_ADDR, AU1200_USB_INT },
 };
 
-static struct resource au1200_lcd_resources[] = {
-       [0] = {
-               .start          = LCD_PHYS_ADDR,
-               .end            = LCD_PHYS_ADDR + 0x800 - 1,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1200_LCD_INT,
-               .end            = AU1200_LCD_INT,
-               .flags          = IORESOURCE_IRQ,
+static int __init _new_usbres(struct resource **r, struct platform_device **d)
+{
+       *r = kzalloc(sizeof(struct resource) * 2, GFP_KERNEL);
+       if (!*r)
+               return -ENOMEM;
+       *d = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
+       if (!*d) {
+               kfree(*r);
+               return -ENOMEM;
        }
-};
-
-static u64 au1200_lcd_dmamask = DMA_BIT_MASK(32);
-
-static struct platform_device au1200_lcd_device = {
-       .name           = "au1200-lcd",
-       .id             = 0,
-       .dev = {
-               .dma_mask               = &au1200_lcd_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-       },
-       .num_resources  = ARRAY_SIZE(au1200_lcd_resources),
-       .resource       = au1200_lcd_resources,
-};
 
-static u64 au1xxx_mmc_dmamask =  DMA_BIT_MASK(32);
+       (*d)->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+       (*d)->num_resources = 2;
+       (*d)->resource = *r;
 
-extern struct au1xmmc_platform_data au1xmmc_platdata[2];
+       return 0;
+}
 
-static struct resource au1200_mmc0_resources[] = {
-       [0] = {
-               .start          = AU1100_SD0_PHYS_ADDR,
-               .end            = AU1100_SD0_PHYS_ADDR + 0xfff,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1200_SD_INT,
-               .end            = AU1200_SD_INT,
-               .flags          = IORESOURCE_IRQ,
-       },
-       [2] = {
-               .start          = DSCR_CMD0_SDMS_TX0,
-               .end            = DSCR_CMD0_SDMS_TX0,
-               .flags          = IORESOURCE_DMA,
-       },
-       [3] = {
-               .start          = DSCR_CMD0_SDMS_RX0,
-               .end            = DSCR_CMD0_SDMS_RX0,
-               .flags          = IORESOURCE_DMA,
-       }
-};
+static void __init alchemy_setup_usb(int ctype)
+{
+       struct resource *res;
+       struct platform_device *pdev;
 
-static struct platform_device au1200_mmc0_device = {
-       .name = "au1xxx-mmc",
-       .id = 0,
-       .dev = {
-               .dma_mask               = &au1xxx_mmc_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-               .platform_data          = &au1xmmc_platdata[0],
-       },
-       .num_resources  = ARRAY_SIZE(au1200_mmc0_resources),
-       .resource       = au1200_mmc0_resources,
-};
+       /* setup OHCI0.  Every variant has one */
+       if (_new_usbres(&res, &pdev))
+               return;
 
-#ifndef CONFIG_MIPS_DB1200
-static struct resource au1200_mmc1_resources[] = {
-       [0] = {
-               .start          = AU1100_SD1_PHYS_ADDR,
-               .end            = AU1100_SD1_PHYS_ADDR + 0xfff,
-               .flags          = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start          = AU1200_SD_INT,
-               .end            = AU1200_SD_INT,
-               .flags          = IORESOURCE_IRQ,
-       },
-       [2] = {
-               .start          = DSCR_CMD0_SDMS_TX1,
-               .end            = DSCR_CMD0_SDMS_TX1,
-               .flags          = IORESOURCE_DMA,
-       },
-       [3] = {
-               .start          = DSCR_CMD0_SDMS_RX1,
-               .end            = DSCR_CMD0_SDMS_RX1,
-               .flags          = IORESOURCE_DMA,
+       res[0].start = alchemy_ohci_data[ctype][0];
+       res[0].end = res[0].start + 0x100 - 1;
+       res[0].flags = IORESOURCE_MEM;
+       res[1].start = alchemy_ohci_data[ctype][1];
+       res[1].end = res[1].start;
+       res[1].flags = IORESOURCE_IRQ;
+       pdev->name = "au1xxx-ohci";
+       pdev->id = 0;
+       pdev->dev.dma_mask = &alchemy_ohci_dmamask;
+
+       if (platform_device_register(pdev))
+               printk(KERN_INFO "Alchemy USB: cannot add OHCI0\n");
+
+
+       /* setup EHCI0: Au1200 */
+       if (ctype == ALCHEMY_CPU_AU1200) {
+               if (_new_usbres(&res, &pdev))
+                       return;
+
+               res[0].start = alchemy_ehci_data[ctype][0];
+               res[0].end = res[0].start + 0x100 - 1;
+               res[0].flags = IORESOURCE_MEM;
+               res[1].start = alchemy_ehci_data[ctype][1];
+               res[1].end = res[1].start;
+               res[1].flags = IORESOURCE_IRQ;
+               pdev->name = "au1xxx-ehci";
+               pdev->id = 0;
+               pdev->dev.dma_mask = &alchemy_ehci_dmamask;
+
+               if (platform_device_register(pdev))
+                       printk(KERN_INFO "Alchemy USB: cannot add EHCI0\n");
        }
-};
-
-static struct platform_device au1200_mmc1_device = {
-       .name = "au1xxx-mmc",
-       .id = 1,
-       .dev = {
-               .dma_mask               = &au1xxx_mmc_dmamask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32),
-               .platform_data          = &au1xmmc_platdata[1],
-       },
-       .num_resources  = ARRAY_SIZE(au1200_mmc1_resources),
-       .resource       = au1200_mmc1_resources,
-};
-#endif /* #ifndef CONFIG_MIPS_DB1200 */
-#endif /* #ifdef CONFIG_SOC_AU1200 */
-
-/* All Alchemy demoboards with I2C have this #define in their headers */
-#ifdef SMBUS_PSC_BASE
-static struct resource pbdb_smbus_resources[] = {
-       {
-               .start  = CPHYSADDR(SMBUS_PSC_BASE),
-               .end    = CPHYSADDR(SMBUS_PSC_BASE + 0xfffff),
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static struct platform_device pbdb_smbus_device = {
-       .name           = "au1xpsc_smbus",
-       .id             = 0,    /* bus number */
-       .num_resources  = ARRAY_SIZE(pbdb_smbus_resources),
-       .resource       = pbdb_smbus_resources,
-};
-#endif
+}
 
 /* Macro to help defining the Ethernet MAC resources */
-#define MAC_RES_COUNT  3       /* MAC regs base, MAC enable reg, MAC INT */
-#define MAC_RES(_base, _enable, _irq)                  \
+#define MAC_RES_COUNT  4       /* MAC regs, MAC en, MAC INT, MACDMA regs */
+#define MAC_RES(_base, _enable, _irq, _macdma)         \
        {                                               \
                .start  = _base,                        \
                .end    = _base + 0xffff,               \
@@ -389,28 +206,37 @@ static struct platform_device pbdb_smbus_device = {
                .start  = _irq,                         \
                .end    = _irq,                         \
                .flags  = IORESOURCE_IRQ                \
+       },                                              \
+       {                                               \
+               .start  = _macdma,                      \
+               .end    = _macdma + 0x1ff,              \
+               .flags  = IORESOURCE_MEM,               \
        }
 
 static struct resource au1xxx_eth0_resources[][MAC_RES_COUNT] __initdata = {
        [ALCHEMY_CPU_AU1000] = {
                MAC_RES(AU1000_MAC0_PHYS_ADDR,
                        AU1000_MACEN_PHYS_ADDR,
-                       AU1000_MAC0_DMA_INT)
+                       AU1000_MAC0_DMA_INT,
+                       AU1000_MACDMA0_PHYS_ADDR)
        },
        [ALCHEMY_CPU_AU1500] = {
                MAC_RES(AU1500_MAC0_PHYS_ADDR,
                        AU1500_MACEN_PHYS_ADDR,
-                       AU1500_MAC0_DMA_INT)
+                       AU1500_MAC0_DMA_INT,
+                       AU1000_MACDMA0_PHYS_ADDR)
        },
        [ALCHEMY_CPU_AU1100] = {
                MAC_RES(AU1000_MAC0_PHYS_ADDR,
                        AU1000_MACEN_PHYS_ADDR,
-                       AU1100_MAC0_DMA_INT)
+                       AU1100_MAC0_DMA_INT,
+                       AU1000_MACDMA0_PHYS_ADDR)
        },
        [ALCHEMY_CPU_AU1550] = {
                MAC_RES(AU1000_MAC0_PHYS_ADDR,
                        AU1000_MACEN_PHYS_ADDR,
-                       AU1550_MAC0_DMA_INT)
+                       AU1550_MAC0_DMA_INT,
+                       AU1000_MACDMA0_PHYS_ADDR)
        },
 };
 
@@ -429,17 +255,20 @@ static struct resource au1xxx_eth1_resources[][MAC_RES_COUNT] __initdata = {
        [ALCHEMY_CPU_AU1000] = {
                MAC_RES(AU1000_MAC1_PHYS_ADDR,
                        AU1000_MACEN_PHYS_ADDR + 4,
-                       AU1000_MAC1_DMA_INT)
+                       AU1000_MAC1_DMA_INT,
+                       AU1000_MACDMA1_PHYS_ADDR)
        },
        [ALCHEMY_CPU_AU1500] = {
                MAC_RES(AU1500_MAC1_PHYS_ADDR,
                        AU1500_MACEN_PHYS_ADDR + 4,
-                       AU1500_MAC1_DMA_INT)
+                       AU1500_MAC1_DMA_INT,
+                       AU1000_MACDMA1_PHYS_ADDR)
        },
        [ALCHEMY_CPU_AU1550] = {
                MAC_RES(AU1000_MAC1_PHYS_ADDR,
                        AU1000_MACEN_PHYS_ADDR + 4,
-                       AU1550_MAC1_DMA_INT)
+                       AU1550_MAC1_DMA_INT,
+                       AU1000_MACDMA1_PHYS_ADDR)
        },
 };
 
@@ -521,36 +350,15 @@ static void __init alchemy_setup_macs(int ctype)
        }
 }
 
-static struct platform_device *au1xxx_platform_devices[] __initdata = {
-       &au1xxx_usb_ohci_device,
-#ifdef CONFIG_FB_AU1100
-       &au1100_lcd_device,
-#endif
-#ifdef CONFIG_SOC_AU1200
-       &au1xxx_usb_ehci_device,
-       &au1xxx_usb_gdt_device,
-       &au1xxx_usb_otg_device,
-       &au1200_lcd_device,
-       &au1200_mmc0_device,
-#ifndef CONFIG_MIPS_DB1200
-       &au1200_mmc1_device,
-#endif
-#endif
-#ifdef SMBUS_PSC_BASE
-       &pbdb_smbus_device,
-#endif
-};
-
 static int __init au1xxx_platform_init(void)
 {
-       int err, ctype = alchemy_get_cputype();
+       int ctype = alchemy_get_cputype();
 
        alchemy_setup_uarts(ctype);
        alchemy_setup_macs(ctype);
+       alchemy_setup_usb(ctype);
 
-       err = platform_add_devices(au1xxx_platform_devices,
-                                  ARRAY_SIZE(au1xxx_platform_devices));
-       return err;
+       return 0;
 }
 
 arch_initcall(au1xxx_platform_init);
index b86324a42601105f7b540d25c3b89c1245b69b5e..bdd6651e9a4f3d4f0f7fc59f5f38be6e79b1a738 100644 (file)
@@ -37,8 +37,6 @@
 #include <asm/uaccess.h>
 #include <asm/mach-au1x00/au1000.h>
 
-#ifdef CONFIG_PM
-
 /*
  * We need to save/restore a bunch of core registers that are
  * either volatile or reset to some state across a processor sleep.
@@ -49,7 +47,6 @@
  * We only have to save/restore registers that aren't otherwise
  * done as part of a driver pm_* function.
  */
-static unsigned int sleep_usb[2];
 static unsigned int sleep_sys_clocks[5];
 static unsigned int sleep_sys_pinfunc;
 static unsigned int sleep_static_memctlr[4][3];
@@ -57,31 +54,6 @@ static unsigned int sleep_static_memctlr[4][3];
 
 static void save_core_regs(void)
 {
-#ifndef CONFIG_SOC_AU1200
-       /* Shutdown USB host/device. */
-       sleep_usb[0] = au_readl(USB_HOST_CONFIG);
-
-       /* There appears to be some undocumented reset register.... */
-       au_writel(0, 0xb0100004);
-       au_sync();
-       au_writel(0, USB_HOST_CONFIG);
-       au_sync();
-
-       sleep_usb[1] = au_readl(USBD_ENABLE);
-       au_writel(0, USBD_ENABLE);
-       au_sync();
-
-#else  /* AU1200 */
-
-       /* enable access to OTG mmio so we can save OTG CAP/MUX.
-        * FIXME: write an OTG driver and move this stuff there!
-        */
-       au_writel(au_readl(USB_MSR_BASE + 4) | (1 << 6), USB_MSR_BASE + 4);
-       au_sync();
-       sleep_usb[0] = au_readl(0xb4020020);    /* OTG_CAP */
-       sleep_usb[1] = au_readl(0xb4020024);    /* OTG_MUX */
-#endif
-
        /* Clocks and PLLs. */
        sleep_sys_clocks[0] = au_readl(SYS_FREQCTRL0);
        sleep_sys_clocks[1] = au_readl(SYS_FREQCTRL1);
@@ -125,22 +97,6 @@ static void restore_core_regs(void)
        au_writel(sleep_sys_pinfunc, SYS_PINFUNC);
        au_sync();
 
-#ifndef CONFIG_SOC_AU1200
-       au_writel(sleep_usb[0], USB_HOST_CONFIG);
-       au_writel(sleep_usb[1], USBD_ENABLE);
-       au_sync();
-#else
-       /* enable access to OTG memory */
-       au_writel(au_readl(USB_MSR_BASE + 4) | (1 << 6), USB_MSR_BASE + 4);
-       au_sync();
-
-       /* restore OTG caps and port mux. */
-       au_writel(sleep_usb[0], 0xb4020020 + 0);        /* OTG_CAP */
-       au_sync();
-       au_writel(sleep_usb[1], 0xb4020020 + 4);        /* OTG_MUX */
-       au_sync();
-#endif
-
        /* Restore the static memory controller configuration. */
        au_writel(sleep_static_memctlr[0][0], MEM_STCFG0);
        au_writel(sleep_static_memctlr[0][1], MEM_STTIME0);
@@ -174,5 +130,3 @@ void au_sleep(void)
 
        restore_core_regs();
 }
-
-#endif /* CONFIG_PM */
index 1b887c86841754133e2539a2e5f4c9748ed2e7df..37ffd997c6165e2c0ebfab28b82d0c8872156a87 100644 (file)
@@ -73,8 +73,8 @@ void __init plat_mem_setup(void)
 /* This routine should be valid for all Au1x based boards */
 phys_t __fixup_bigphys_addr(phys_t phys_addr, phys_t size)
 {
-       u32 start = (u32)Au1500_PCI_MEM_START;
-       u32 end   = (u32)Au1500_PCI_MEM_END;
+       unsigned long start = ALCHEMY_PCI_MEMWIN_START;
+       unsigned long end = ALCHEMY_PCI_MEMWIN_END;
 
        /* Don't fixup 36-bit addresses */
        if ((phys_addr >> 32) != 0)
@@ -82,7 +82,7 @@ phys_t __fixup_bigphys_addr(phys_t phys_addr, phys_t size)
 
        /* Check for PCI memory window */
        if (phys_addr >= start && (phys_addr + size - 1) <= end)
-               return (phys_t)((phys_addr - start) + Au1500_PCI_MEM_START);
+               return (phys_t)(AU1500_PCI_MEM_PHYS_ADDR + phys_addr);
 
        /* default nop */
        return phys_addr;
index dda090bf74e6bb1a9f47a8492324fd167b5b06ad..c61867c93c4ab49c44c6c9b7df8387cc2f2adac3 100644 (file)
@@ -213,7 +213,12 @@ static struct resource db1200_ide_res[] = {
                .start  = DB1200_IDE_INT,
                .end    = DB1200_IDE_INT,
                .flags  = IORESOURCE_IRQ,
-       }
+       },
+       [2] = {
+               .start  = AU1200_DSCR_CMD0_DMA_REQ1,
+               .end    = AU1200_DSCR_CMD0_DMA_REQ1,
+               .flags  = IORESOURCE_DMA,
+       },
 };
 
 static u64 ide_dmamask = DMA_BIT_MASK(32);
@@ -328,23 +333,85 @@ static struct led_classdev db1200_mmc_led = {
        .brightness_set = db1200_mmcled_set,
 };
 
-/* needed by arch/mips/alchemy/common/platform.c */
-struct au1xmmc_platform_data au1xmmc_platdata[] = {
+static struct au1xmmc_platform_data db1200mmc_platdata = {
+       .cd_setup       = db1200_mmc_cd_setup,
+       .set_power      = db1200_mmc_set_power,
+       .card_inserted  = db1200_mmc_card_inserted,
+       .card_readonly  = db1200_mmc_card_readonly,
+       .led            = &db1200_mmc_led,
+};
+
+static struct resource au1200_mmc0_resources[] = {
        [0] = {
-               .cd_setup       = db1200_mmc_cd_setup,
-               .set_power      = db1200_mmc_set_power,
-               .card_inserted  = db1200_mmc_card_inserted,
-               .card_readonly  = db1200_mmc_card_readonly,
-               .led            = &db1200_mmc_led,
+               .start  = AU1100_SD0_PHYS_ADDR,
+               .end    = AU1100_SD0_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1200_SD_INT,
+               .end    = AU1200_SD_INT,
+               .flags  = IORESOURCE_IRQ,
+       },
+       [2] = {
+               .start  = AU1200_DSCR_CMD0_SDMS_TX0,
+               .end    = AU1200_DSCR_CMD0_SDMS_TX0,
+               .flags  = IORESOURCE_DMA,
+       },
+       [3] = {
+               .start  = AU1200_DSCR_CMD0_SDMS_RX0,
+               .end    = AU1200_DSCR_CMD0_SDMS_RX0,
+               .flags  = IORESOURCE_DMA,
+       }
+};
+
+static u64 au1xxx_mmc_dmamask =  DMA_BIT_MASK(32);
+
+static struct platform_device db1200_mmc0_dev = {
+       .name           = "au1xxx-mmc",
+       .id             = 0,
+       .dev = {
+               .dma_mask               = &au1xxx_mmc_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+               .platform_data          = &db1200mmc_platdata,
+       },
+       .num_resources  = ARRAY_SIZE(au1200_mmc0_resources),
+       .resource       = au1200_mmc0_resources,
+};
+
+/**********************************************************************/
+
+static struct resource au1200_lcd_res[] = {
+       [0] = {
+               .start  = AU1200_LCD_PHYS_ADDR,
+               .end    = AU1200_LCD_PHYS_ADDR + 0x800 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1200_LCD_INT,
+               .end    = AU1200_LCD_INT,
+               .flags  = IORESOURCE_IRQ,
+       }
+};
+
+static u64 au1200_lcd_dmamask = DMA_BIT_MASK(32);
+
+static struct platform_device au1200_lcd_dev = {
+       .name           = "au1200-lcd",
+       .id             = 0,
+       .dev = {
+               .dma_mask               = &au1200_lcd_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
        },
+       .num_resources  = ARRAY_SIZE(au1200_lcd_res),
+       .resource       = au1200_lcd_res,
 };
 
 /**********************************************************************/
 
 static struct resource au1200_psc0_res[] = {
        [0] = {
-               .start  = PSC0_PHYS_ADDR,
-               .end    = PSC0_PHYS_ADDR + 0x000fffff,
+               .start  = AU1550_PSC0_PHYS_ADDR,
+               .end    = AU1550_PSC0_PHYS_ADDR + 0xfff,
                .flags  = IORESOURCE_MEM,
        },
        [1] = {
@@ -353,13 +420,13 @@ static struct resource au1200_psc0_res[] = {
                .flags  = IORESOURCE_IRQ,
        },
        [2] = {
-               .start  = DSCR_CMD0_PSC0_TX,
-               .end    = DSCR_CMD0_PSC0_TX,
+               .start  = AU1200_DSCR_CMD0_PSC0_TX,
+               .end    = AU1200_DSCR_CMD0_PSC0_TX,
                .flags  = IORESOURCE_DMA,
        },
        [3] = {
-               .start  = DSCR_CMD0_PSC0_RX,
-               .end    = DSCR_CMD0_PSC0_RX,
+               .start  = AU1200_DSCR_CMD0_PSC0_RX,
+               .end    = AU1200_DSCR_CMD0_PSC0_RX,
                .flags  = IORESOURCE_DMA,
        },
 };
@@ -401,8 +468,8 @@ static struct platform_device db1200_spi_dev = {
 
 static struct resource au1200_psc1_res[] = {
        [0] = {
-               .start  = PSC1_PHYS_ADDR,
-               .end    = PSC1_PHYS_ADDR + 0x000fffff,
+               .start  = AU1550_PSC1_PHYS_ADDR,
+               .end    = AU1550_PSC1_PHYS_ADDR + 0xfff,
                .flags  = IORESOURCE_MEM,
        },
        [1] = {
@@ -411,13 +478,13 @@ static struct resource au1200_psc1_res[] = {
                .flags  = IORESOURCE_IRQ,
        },
        [2] = {
-               .start  = DSCR_CMD0_PSC1_TX,
-               .end    = DSCR_CMD0_PSC1_TX,
+               .start  = AU1200_DSCR_CMD0_PSC1_TX,
+               .end    = AU1200_DSCR_CMD0_PSC1_TX,
                .flags  = IORESOURCE_DMA,
        },
        [3] = {
-               .start  = DSCR_CMD0_PSC1_RX,
-               .end    = DSCR_CMD0_PSC1_RX,
+               .start  = AU1200_DSCR_CMD0_PSC1_RX,
+               .end    = AU1200_DSCR_CMD0_PSC1_RX,
                .flags  = IORESOURCE_DMA,
        },
 };
@@ -449,6 +516,8 @@ static struct platform_device db1200_audiodma_dev = {
 static struct platform_device *db1200_devs[] __initdata = {
        NULL,           /* PSC0, selected by S6.8 */
        &db1200_ide_dev,
+       &db1200_mmc0_dev,
+       &au1200_lcd_dev,
        &db1200_eth_dev,
        &db1200_rtc_dev,
        &db1200_nand_dev,
@@ -526,32 +595,28 @@ static int __init db1200_dev_init(void)
 
        /* Audio PSC clock is supplied externally. (FIXME: platdata!!) */
        __raw_writel(PSC_SEL_CLK_SERCLK,
-               (void __iomem *)KSEG1ADDR(PSC1_PHYS_ADDR) + PSC_SEL_OFFSET);
+               (void __iomem *)KSEG1ADDR(AU1550_PSC1_PHYS_ADDR) + PSC_SEL_OFFSET);
        wmb();
 
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-                                   DB1200_PC0_INT,
-                                   DB1200_PC0_INSERT_INT,
-                                   /*DB1200_PC0_STSCHG_INT*/0,
-                                   DB1200_PC0_EJECT_INT,
-                                   0);
-
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR + 0x004000000,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x004400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x004000000,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x004400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x004000000,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x004010000 - 1,
-                                   DB1200_PC1_INT,
-                                   DB1200_PC1_INSERT_INT,
-                                   /*DB1200_PC1_STSCHG_INT*/0,
-                                   DB1200_PC1_EJECT_INT,
-                                   1);
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
+               DB1200_PC0_INT, DB1200_PC0_INSERT_INT,
+               /*DB1200_PC0_STSCHG_INT*/0, DB1200_PC0_EJECT_INT, 0);
+
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x004000000,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x004400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x004000000,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x004400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004000000,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004010000 - 1,
+               DB1200_PC1_INT, DB1200_PC1_INSERT_INT,
+               /*DB1200_PC1_STSCHG_INT*/0, DB1200_PC1_EJECT_INT, 1);
 
        swapped = bcsr_read(BCSR_STATUS) & BCSR_STATUS_DB1200_SWAPBOOT;
        db1x_register_norflash(64 << 20, 2, swapped);
index 5c956fe8760ffc9e99c5b928eaedfb6af7ef2cbe..7cd36e631f6c4a6c00387ad28d8d936faadf0416 100644 (file)
 
 #include <prom.h>
 
-#ifdef CONFIG_MIPS_DB1500
-char irq_tab_alchemy[][5] __initdata = {
-       [12] = { -1, AU1500_PCI_INTA, 0xff, 0xff, 0xff }, /* IDSEL 12 - HPT371   */
-       [13] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, AU1500_PCI_INTC, AU1500_PCI_INTD }, /* IDSEL 13 - PCI slot */
-};
-
-#endif
-
-
-#ifdef CONFIG_MIPS_DB1550
-char irq_tab_alchemy[][5] __initdata = {
-       [11] = { -1, AU1550_PCI_INTC, 0xff, 0xff, 0xff }, /* IDSEL 11 - on-board HPT371 */
-       [12] = { -1, AU1550_PCI_INTB, AU1550_PCI_INTC, AU1550_PCI_INTD, AU1550_PCI_INTA }, /* IDSEL 12 - PCI slot 2 (left) */
-       [13] = { -1, AU1550_PCI_INTA, AU1550_PCI_INTB, AU1550_PCI_INTC, AU1550_PCI_INTD }, /* IDSEL 13 - PCI slot 1 (right) */
-};
-#endif
-
-
 #ifdef CONFIG_MIPS_BOSPORUS
 char irq_tab_alchemy[][5] __initdata = {
        [11] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff }, /* IDSEL 11 - miniPCI  */
@@ -91,12 +73,6 @@ const char *get_system_type(void)
 
 
 #ifdef CONFIG_MIPS_MIRAGE
-char irq_tab_alchemy[][5] __initdata = {
-       [11] = { -1, AU1500_PCI_INTD, 0xff, 0xff, 0xff }, /* IDSEL 11 - SMI VGX */
-       [12] = { -1, 0xff, 0xff, AU1500_PCI_INTC, 0xff }, /* IDSEL 12 - PNX1300 */
-       [13] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff }, /* IDSEL 13 - miniPCI */
-};
-
 static void mirage_power_off(void)
 {
        alchemy_gpio_direction_output(210, 1);
@@ -158,9 +134,7 @@ void __init board_setup(void)
        /* initialize board register space */
        bcsr_init(bcsr1, bcsr2);
 
-       /* Not valid for Au1550 */
-#if defined(CONFIG_IRDA) && \
-   (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100))
+#if defined(CONFIG_IRDA) && defined(CONFIG_AU1000_FIR)
        {
                u32 pin_func;
 
index 7057d28f73016dffb4136fb7d5856c1e19f66dac..9e6b3d442acdef7aec6ecba7caeda67d5dce3944 100644 (file)
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1000_dma.h>
-#include <asm/mach-au1x00/au1xxx.h>
 #include <asm/mach-db1x00/bcsr.h>
 #include "../platform.h"
 
+struct pci_dev;
+
 /* DB1xxx PCMCIA interrupt sources:
  * CD0/1       GPIO0/3
  * STSCHG0/1   GPIO1/4
 #endif
 #endif
 
+#ifdef CONFIG_PCI
+#ifdef CONFIG_MIPS_DB1500
+static int db1xxx_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot < 12) || (slot > 13) || pin == 0)
+               return -1;
+       if (slot == 12)
+               return (pin == 1) ? AU1500_PCI_INTA : 0xff;
+       if (slot == 13) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTA;
+               case 2: return AU1500_PCI_INTB;
+               case 3: return AU1500_PCI_INTC;
+               case 4: return AU1500_PCI_INTD;
+               }
+       }
+       return -1;
+}
+#endif
+
+#ifdef CONFIG_MIPS_DB1550
+static int db1xxx_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot < 11) || (slot > 13) || pin == 0)
+               return -1;
+       if (slot == 11)
+               return (pin == 1) ? AU1550_PCI_INTC : 0xff;
+       if (slot == 12) {
+               switch (pin) {
+               case 1: return AU1550_PCI_INTB;
+               case 2: return AU1550_PCI_INTC;
+               case 3: return AU1550_PCI_INTD;
+               case 4: return AU1550_PCI_INTA;
+               }
+       }
+       if (slot == 13) {
+               switch (pin) {
+               case 1: return AU1550_PCI_INTA;
+               case 2: return AU1550_PCI_INTB;
+               case 3: return AU1550_PCI_INTC;
+               case 4: return AU1550_PCI_INTD;
+               }
+       }
+       return -1;
+}
+#endif
+
+#ifdef CONFIG_MIPS_BOSPORUS
+static int db1xxx_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot < 11) || (slot > 13) || pin == 0)
+               return -1;
+       if (slot == 12)
+               return (pin == 1) ? AU1500_PCI_INTA : 0xff;
+       if (slot == 11) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTA;
+               case 2: return AU1500_PCI_INTB;
+               default: return 0xff;
+               }
+       }
+       if (slot == 13) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTA;
+               case 2: return AU1500_PCI_INTB;
+               case 3: return AU1500_PCI_INTC;
+               case 4: return AU1500_PCI_INTD;
+               }
+       }
+       return -1;
+}
+#endif
+
+#ifdef CONFIG_MIPS_MIRAGE
+static int db1xxx_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot < 11) || (slot > 13) || pin == 0)
+               return -1;
+       if (slot == 11)
+               return (pin == 1) ? AU1500_PCI_INTD : 0xff;
+       if (slot == 12)
+               return (pin == 3) ? AU1500_PCI_INTC : 0xff;
+       if (slot == 13) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTA;
+               case 2: return AU1500_PCI_INTB;
+               default: return 0xff;
+               }
+       }
+       return -1;
+}
+#endif
+
+static struct resource alchemy_pci_host_res[] = {
+       [0] = {
+               .start  = AU1500_PCI_PHYS_ADDR,
+               .end    = AU1500_PCI_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static struct alchemy_pci_platdata db1xxx_pci_pd = {
+       .board_map_irq  = db1xxx_map_pci_irq,
+};
+
+static struct platform_device db1xxx_pci_host_dev = {
+       .dev.platform_data = &db1xxx_pci_pd,
+       .name           = "alchemy-pci",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(alchemy_pci_host_res),
+       .resource       = alchemy_pci_host_res,
+};
+
+static int __init db15x0_pci_init(void)
+{
+       return platform_device_register(&db1xxx_pci_host_dev);
+}
+/* must be arch_initcall; MIPS PCI scans busses in a subsys_initcall */
+arch_initcall(db15x0_pci_init);
+#endif
+
+#ifdef CONFIG_MIPS_DB1100
+static struct resource au1100_lcd_resources[] = {
+       [0] = {
+               .start  = AU1100_LCD_PHYS_ADDR,
+               .end    = AU1100_LCD_PHYS_ADDR + 0x800 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1100_LCD_INT,
+               .end    = AU1100_LCD_INT,
+               .flags  = IORESOURCE_IRQ,
+       }
+};
+
+static u64 au1100_lcd_dmamask = DMA_BIT_MASK(32);
+
+static struct platform_device au1100_lcd_device = {
+       .name           = "au1100-lcd",
+       .id             = 0,
+       .dev = {
+               .dma_mask               = &au1100_lcd_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+       },
+       .num_resources  = ARRAY_SIZE(au1100_lcd_resources),
+       .resource       = au1100_lcd_resources,
+};
+#endif
+
 static struct resource alchemy_ac97c_res[] = {
        [0] = {
                .start  = AU1000_AC97_PHYS_ADDR,
@@ -130,29 +281,28 @@ static struct platform_device db1x00_audio_dev = {
 static int __init db1xxx_dev_init(void)
 {
 #ifdef DB1XXX_HAS_PCMCIA
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-                                   DB1XXX_PCMCIA_CARD0,
-                                   DB1XXX_PCMCIA_CD0,
-                                   /*DB1XXX_PCMCIA_STSCHG0*/0,
-                                   0,
-                                   0);
-
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR + 0x004000000,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x004400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x004000000,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x004400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x004000000,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x004010000 - 1,
-                                   DB1XXX_PCMCIA_CARD1,
-                                   DB1XXX_PCMCIA_CD1,
-                                   /*DB1XXX_PCMCIA_STSCHG1*/0,
-                                   0,
-                                   1);
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
+               DB1XXX_PCMCIA_CARD0, DB1XXX_PCMCIA_CD0,
+               /*DB1XXX_PCMCIA_STSCHG0*/0, 0, 0);
+
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x004000000,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x004400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x004000000,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x004400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004000000,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004010000 - 1,
+               DB1XXX_PCMCIA_CARD1, DB1XXX_PCMCIA_CD1,
+               /*DB1XXX_PCMCIA_STSCHG1*/0, 0, 1);
+#endif
+#ifdef CONFIG_MIPS_DB1100
+       platform_device_register(&au1100_lcd_device);
 #endif
        db1x_register_norflash(BOARD_FLASH_SIZE, BOARD_FLASH_WIDTH, F_SWAPPED);
 
index 2c8dc29759fdd9cac6f6feded744332400af04d8..9c57c01a68c4ac91d7a68727b4937a7e1e9cf088 100644 (file)
  */
 
 #include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-db1x00/bcsr.h>
 
 #include "../platform.h"
 
+static struct resource au1100_lcd_resources[] = {
+       [0] = {
+               .start  = AU1100_LCD_PHYS_ADDR,
+               .end    = AU1100_LCD_PHYS_ADDR + 0x800 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1100_LCD_INT,
+               .end    = AU1100_LCD_INT,
+               .flags  = IORESOURCE_IRQ,
+       }
+};
+
+static u64 au1100_lcd_dmamask = DMA_BIT_MASK(32);
+
+static struct platform_device au1100_lcd_device = {
+       .name           = "au1100-lcd",
+       .id             = 0,
+       .dev = {
+               .dma_mask               = &au1100_lcd_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+       },
+       .num_resources  = ARRAY_SIZE(au1100_lcd_resources),
+       .resource       = au1100_lcd_resources,
+};
+
 static int __init pb1100_dev_init(void)
 {
        int swapped;
 
        /* PCMCIA. single socket, identical to Pb1500 */
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-                                   AU1100_GPIO11_INT,   /* card */
-                                   AU1100_GPIO9_INT,    /* insert */
-                                   /*AU1100_GPIO10_INT*/0, /* stschg */
-                                   0,                   /* eject */
-                                   0);                  /* id */
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
+               AU1100_GPIO11_INT, AU1100_GPIO9_INT,     /* card / insert */
+               /*AU1100_GPIO10_INT*/0, 0, 0); /* stschg / eject / id */
 
        swapped = bcsr_read(BCSR_STATUS) &  BCSR_STATUS_DB1000_SWAPBOOT;
        db1x_register_norflash(64 * 1024 * 1024, 4, swapped);
+       platform_device_register(&au1100_lcd_device);
 
        return 0;
 }
index 3ef2dceeb796a8abf807fdd71471f0527419136d..54f7f7b0676e1d105a4d9f03527e4e10a520f183 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/smc91x.h>
 
-#include <asm/mach-au1x00/au1xxx.h>
+#include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1100_mmc.h>
+#include <asm/mach-au1x00/au1xxx_dbdma.h>
 #include <asm/mach-db1x00/bcsr.h>
+#include <asm/mach-pb1x00/pb1200.h>
 
 #include "../platform.h"
 
@@ -88,7 +90,7 @@ static int pb1200mmc1_card_inserted(void *mmc_host)
        return (bcsr_read(BCSR_SIGSTAT) & BCSR_INT_SD1INSERT) ? 1 : 0;
 }
 
-const struct au1xmmc_platform_data au1xmmc_platdata[2] = {
+static struct au1xmmc_platform_data pb1200mmc_platdata[2] = {
        [0] = {
                .set_power      = pb1200mmc0_set_power,
                .card_inserted  = pb1200mmc0_card_inserted,
@@ -105,6 +107,79 @@ const struct au1xmmc_platform_data au1xmmc_platdata[2] = {
        },
 };
 
+static u64 au1xxx_mmc_dmamask =  DMA_BIT_MASK(32);
+
+static struct resource au1200_mmc0_res[] = {
+       [0] = {
+               .start  = AU1100_SD0_PHYS_ADDR,
+               .end    = AU1100_SD0_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1200_SD_INT,
+               .end    = AU1200_SD_INT,
+               .flags  = IORESOURCE_IRQ,
+       },
+       [2] = {
+               .start  = AU1200_DSCR_CMD0_SDMS_TX0,
+               .end    = AU1200_DSCR_CMD0_SDMS_TX0,
+               .flags  = IORESOURCE_DMA,
+       },
+       [3] = {
+               .start  = AU1200_DSCR_CMD0_SDMS_RX0,
+               .end    = AU1200_DSCR_CMD0_SDMS_RX0,
+               .flags  = IORESOURCE_DMA,
+       }
+};
+
+static struct platform_device pb1200_mmc0_dev = {
+       .name           = "au1xxx-mmc",
+       .id             = 0,
+       .dev = {
+               .dma_mask               = &au1xxx_mmc_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+               .platform_data          = &pb1200mmc_platdata[0],
+       },
+       .num_resources  = ARRAY_SIZE(au1200_mmc0_res),
+       .resource       = au1200_mmc0_res,
+};
+
+static struct resource au1200_mmc1_res[] = {
+       [0] = {
+               .start  = AU1100_SD1_PHYS_ADDR,
+               .end    = AU1100_SD1_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1200_SD_INT,
+               .end    = AU1200_SD_INT,
+               .flags  = IORESOURCE_IRQ,
+       },
+       [2] = {
+               .start  = AU1200_DSCR_CMD0_SDMS_TX1,
+               .end    = AU1200_DSCR_CMD0_SDMS_TX1,
+               .flags  = IORESOURCE_DMA,
+       },
+       [3] = {
+               .start  = AU1200_DSCR_CMD0_SDMS_RX1,
+               .end    = AU1200_DSCR_CMD0_SDMS_RX1,
+               .flags  = IORESOURCE_DMA,
+       }
+};
+
+static struct platform_device pb1200_mmc1_dev = {
+       .name           = "au1xxx-mmc",
+       .id             = 1,
+       .dev = {
+               .dma_mask               = &au1xxx_mmc_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+               .platform_data          = &pb1200mmc_platdata[1],
+       },
+       .num_resources  = ARRAY_SIZE(au1200_mmc1_res),
+       .resource       = au1200_mmc1_res,
+};
+
+
 static struct resource ide_resources[] = {
        [0] = {
                .start  = IDE_PHYS_ADDR,
@@ -115,7 +190,12 @@ static struct resource ide_resources[] = {
                .start  = IDE_INT,
                .end    = IDE_INT,
                .flags  = IORESOURCE_IRQ
-       }
+       },
+       [2] = {
+               .start  = AU1200_DSCR_CMD0_DMA_REQ1,
+               .end    = AU1200_DSCR_CMD0_DMA_REQ1,
+               .flags  = IORESOURCE_DMA,
+       },
 };
 
 static u64 ide_dmamask = DMA_BIT_MASK(32);
@@ -161,38 +241,94 @@ static struct platform_device smc91c111_device = {
        .resource       = smc91c111_resources
 };
 
+static struct resource au1200_psc0_res[] = {
+       [0] = {
+               .start  = AU1550_PSC0_PHYS_ADDR,
+               .end    = AU1550_PSC0_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1200_PSC0_INT,
+               .end    = AU1200_PSC0_INT,
+               .flags  = IORESOURCE_IRQ,
+       },
+       [2] = {
+               .start  = AU1200_DSCR_CMD0_PSC0_TX,
+               .end    = AU1200_DSCR_CMD0_PSC0_TX,
+               .flags  = IORESOURCE_DMA,
+       },
+       [3] = {
+               .start  = AU1200_DSCR_CMD0_PSC0_RX,
+               .end    = AU1200_DSCR_CMD0_PSC0_RX,
+               .flags  = IORESOURCE_DMA,
+       },
+};
+
+static struct platform_device pb1200_i2c_dev = {
+       .name           = "au1xpsc_smbus",
+       .id             = 0,    /* bus number */
+       .num_resources  = ARRAY_SIZE(au1200_psc0_res),
+       .resource       = au1200_psc0_res,
+};
+
+static struct resource au1200_lcd_res[] = {
+       [0] = {
+               .start  = AU1200_LCD_PHYS_ADDR,
+               .end    = AU1200_LCD_PHYS_ADDR + 0x800 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1200_LCD_INT,
+               .end    = AU1200_LCD_INT,
+               .flags  = IORESOURCE_IRQ,
+       }
+};
+
+static u64 au1200_lcd_dmamask = DMA_BIT_MASK(32);
+
+static struct platform_device au1200_lcd_dev = {
+       .name           = "au1200-lcd",
+       .id             = 0,
+       .dev = {
+               .dma_mask               = &au1200_lcd_dmamask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+       },
+       .num_resources  = ARRAY_SIZE(au1200_lcd_res),
+       .resource       = au1200_lcd_res,
+};
+
 static struct platform_device *board_platform_devices[] __initdata = {
        &ide_device,
-       &smc91c111_device
+       &smc91c111_device,
+       &pb1200_i2c_dev,
+       &pb1200_mmc0_dev,
+       &pb1200_mmc1_dev,
+       &au1200_lcd_dev,
 };
 
 static int __init board_register_devices(void)
 {
        int swapped;
 
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-                                   PB1200_PC0_INT,
-                                   PB1200_PC0_INSERT_INT,
-                                   /*PB1200_PC0_STSCHG_INT*/0,
-                                   PB1200_PC0_EJECT_INT,
-                                   0);
-
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR + 0x008000000,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x008400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x008000000,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x008400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x008000000,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x008010000 - 1,
-                                   PB1200_PC1_INT,
-                                   PB1200_PC1_INSERT_INT,
-                                   /*PB1200_PC1_STSCHG_INT*/0,
-                                   PB1200_PC1_EJECT_INT,
-                                   1);
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
+               PB1200_PC0_INT, PB1200_PC0_INSERT_INT,
+               /*PB1200_PC0_STSCHG_INT*/0, PB1200_PC0_EJECT_INT, 0);
+
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x008000000,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x008400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x008000000,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x008400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x008000000,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x008010000 - 1,
+               PB1200_PC1_INT, PB1200_PC1_INSERT_INT,
+               /*PB1200_PC1_STSCHG_INT*/0, PB1200_PC1_EJECT_INT, 1);
 
        swapped = bcsr_read(BCSR_STATUS) &  BCSR_STATUS_DB1200_SWAPBOOT;
        db1x_register_norflash(128 * 1024 * 1024, 2, swapped);
index 3b4fa32069692fbd384dbb9890ac98d7c2505261..37c1883b5ea99af433727ddbe170df23ec97ff62 100644 (file)
 
 #include <prom.h>
 
-
-char irq_tab_alchemy[][5] __initdata = {
-       [12] = { -1, AU1500_PCI_INTA, 0xff, 0xff, 0xff },   /* IDSEL 12 - HPT370        */
-       [13] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, AU1500_PCI_INTC, AU1500_PCI_INTD },   /* IDSEL 13 - PCI slot */
-};
-
-
 const char *get_system_type(void)
 {
        return "Alchemy Pb1500";
@@ -101,20 +94,18 @@ void __init board_setup(void)
 #endif /* defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) */
 
 #ifdef CONFIG_PCI
-       /* Setup PCI bus controller */
-       au_writel(0, Au1500_PCI_CMEM);
-       au_writel(0x00003fff, Au1500_CFG_BASE);
-#if defined(__MIPSEB__)
-       au_writel(0xf | (2 << 6) | (1 << 4), Au1500_PCI_CFG);
-#else
-       au_writel(0xf, Au1500_PCI_CFG);
-#endif
-       au_writel(0xf0000000, Au1500_PCI_MWMASK_DEV);
-       au_writel(0, Au1500_PCI_MWBASE_REV_CCL);
-       au_writel(0x02a00356, Au1500_PCI_STATCMD);
-       au_writel(0x00003c04, Au1500_PCI_HDRTYPE);
-       au_writel(0x00000008, Au1500_PCI_MBAR);
-       au_sync();
+       {
+               void __iomem *base =
+                               (void __iomem *)KSEG1ADDR(AU1500_PCI_PHYS_ADDR);
+               /* Setup PCI bus controller */
+               __raw_writel(0x00003fff, base + PCI_REG_CMEM);
+               __raw_writel(0xf0000000, base + PCI_REG_MWMASK_DEV);
+               __raw_writel(0, base + PCI_REG_MWBASE_REV_CCL);
+               __raw_writel(0x02a00356, base + PCI_REG_STATCMD);
+               __raw_writel(0x00003c04, base + PCI_REG_PARAM);
+               __raw_writel(0x00000008, base + PCI_REG_MBAR);
+               wmb();
+       }
 #endif
 
        /* Enable sys bus clock divider when IDLE state or no bus activity. */
index d443bc7aa76e3140052f9d2fb846a893647d3a5c..1e52a01bac00334f31b0c82524552122248eac1a 100644 (file)
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-db1x00/bcsr.h>
 
 #include "../platform.h"
 
+static int pb1500_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot < 12) || (slot > 13) || pin == 0)
+               return -1;
+       if (slot == 12)
+               return (pin == 1) ? AU1500_PCI_INTA : 0xff;
+       if (slot == 13) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTA;
+               case 2: return AU1500_PCI_INTB;
+               case 3: return AU1500_PCI_INTC;
+               case 4: return AU1500_PCI_INTD;
+               }
+       }
+       return -1;
+}
+
+static struct resource alchemy_pci_host_res[] = {
+       [0] = {
+               .start  = AU1500_PCI_PHYS_ADDR,
+               .end    = AU1500_PCI_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static struct alchemy_pci_platdata pb1500_pci_pd = {
+       .board_map_irq  = pb1500_map_pci_irq,
+       .pci_cfg_set    = PCI_CONFIG_AEN | PCI_CONFIG_R2H | PCI_CONFIG_R1H |
+                         PCI_CONFIG_CH |
+#if defined(__MIPSEB__)
+                         PCI_CONFIG_SIC_HWA_DAT | PCI_CONFIG_SM,
+#else
+                         0,
+#endif
+};
+
+static struct platform_device pb1500_pci_host = {
+       .dev.platform_data = &pb1500_pci_pd,
+       .name           = "alchemy-pci",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(alchemy_pci_host_res),
+       .resource       = alchemy_pci_host_res,
+};
+
 static int __init pb1500_dev_init(void)
 {
        int swapped;
 
-       /* PCMCIA. single socket, identical to Pb1500 */
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-                                   AU1500_GPIO11_INT,   /* card */
-                                   AU1500_GPIO9_INT,    /* insert */
-                                   /*AU1500_GPIO10_INT*/0, /* stschg */
-                                   0,                   /* eject */
-                                   0);                  /* id */
+       /* PCMCIA. single socket, identical to Pb1100 */
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
+               AU1500_GPIO11_INT, AU1500_GPIO9_INT,     /* card / insert */
+               /*AU1500_GPIO10_INT*/0, 0, 0); /* stschg / eject / id */
 
        swapped = bcsr_read(BCSR_STATUS) &  BCSR_STATUS_DB1000_SWAPBOOT;
        db1x_register_norflash(64 * 1024 * 1024, 4, swapped);
+       platform_device_register(&pb1500_pci_host);
 
        return 0;
 }
-device_initcall(pb1500_dev_init);
+arch_initcall(pb1500_dev_init);
index b790213848bd3f31086fbe940a4d1456b70c1617..0f62d1e3df244a51afcd0754e4604144f64b1d11 100644 (file)
 
 #include <prom.h>
 
-
-char irq_tab_alchemy[][5] __initdata = {
-       [12] = { -1, AU1550_PCI_INTB, AU1550_PCI_INTC, AU1550_PCI_INTD, AU1550_PCI_INTA }, /* IDSEL 12 - PCI slot 2 (left)  */
-       [13] = { -1, AU1550_PCI_INTA, AU1550_PCI_INTB, AU1550_PCI_INTC, AU1550_PCI_INTD }, /* IDSEL 13 - PCI slot 1 (right) */
-};
-
 const char *get_system_type(void)
 {
        return "Alchemy Pb1550";
index d7150d0f49c0d932b589eb16426be61ff08b9c8f..a4604b8a349ef222d7a7d2a5c83ab0536f524ed0 100644 (file)
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/init.h>
-
+#include <linux/platform_device.h>
 #include <asm/mach-au1x00/au1000.h>
+#include <asm/mach-au1x00/au1xxx_dbdma.h>
 #include <asm/mach-pb1x00/pb1550.h>
 #include <asm/mach-db1x00/bcsr.h>
 
 #include "../platform.h"
 
+static int pb1550_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot < 12) || (slot > 13) || pin == 0)
+               return -1;
+       if (slot == 12) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTB;
+               case 2: return AU1500_PCI_INTC;
+               case 3: return AU1500_PCI_INTD;
+               case 4: return AU1500_PCI_INTA;
+               }
+       }
+       if (slot == 13) {
+               switch (pin) {
+               case 1: return AU1500_PCI_INTA;
+               case 2: return AU1500_PCI_INTB;
+               case 3: return AU1500_PCI_INTC;
+               case 4: return AU1500_PCI_INTD;
+               }
+       }
+       return -1;
+}
+
+static struct resource alchemy_pci_host_res[] = {
+       [0] = {
+               .start  = AU1500_PCI_PHYS_ADDR,
+               .end    = AU1500_PCI_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static struct alchemy_pci_platdata pb1550_pci_pd = {
+       .board_map_irq  = pb1550_map_pci_irq,
+};
+
+static struct platform_device pb1550_pci_host = {
+       .dev.platform_data = &pb1550_pci_pd,
+       .name           = "alchemy-pci",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(alchemy_pci_host_res),
+       .resource       = alchemy_pci_host_res,
+};
+
+static struct resource au1550_psc2_res[] = {
+       [0] = {
+               .start  = AU1550_PSC2_PHYS_ADDR,
+               .end    = AU1550_PSC2_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AU1550_PSC2_INT,
+               .end    = AU1550_PSC2_INT,
+               .flags  = IORESOURCE_IRQ,
+       },
+       [2] = {
+               .start  = AU1550_DSCR_CMD0_PSC2_TX,
+               .end    = AU1550_DSCR_CMD0_PSC2_TX,
+               .flags  = IORESOURCE_DMA,
+       },
+       [3] = {
+               .start  = AU1550_DSCR_CMD0_PSC2_RX,
+               .end    = AU1550_DSCR_CMD0_PSC2_RX,
+               .flags  = IORESOURCE_DMA,
+       },
+};
+
+static struct platform_device pb1550_i2c_dev = {
+       .name           = "au1xpsc_smbus",
+       .id             = 0,    /* bus number */
+       .num_resources  = ARRAY_SIZE(au1550_psc2_res),
+       .resource       = au1550_psc2_res,
+};
+
 static int __init pb1550_dev_init(void)
 {
        int swapped;
@@ -37,33 +112,29 @@ static int __init pb1550_dev_init(void)
        * drivers are used to shared irqs and b) statuschange isn't really use-
        * ful anyway.
        */
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-                                   AU1550_GPIO201_205_INT,
-                                   AU1550_GPIO0_INT,
-                                   0,
-                                   0,
-                                   0);
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
+               AU1550_GPIO201_205_INT, AU1550_GPIO0_INT, 0, 0, 0);
 
-       db1x_register_pcmcia_socket(PCMCIA_ATTR_PHYS_ADDR + 0x008000000,
-                                   PCMCIA_ATTR_PHYS_ADDR + 0x008400000 - 1,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x008000000,
-                                   PCMCIA_MEM_PHYS_ADDR  + 0x008400000 - 1,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x008000000,
-                                   PCMCIA_IO_PHYS_ADDR   + 0x008010000 - 1,
-                                   AU1550_GPIO201_205_INT,
-                                   AU1550_GPIO1_INT,
-                                   0,
-                                   0,
-                                   1);
+       db1x_register_pcmcia_socket(
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x008000000,
+               AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x008400000 - 1,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x008000000,
+               AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x008400000 - 1,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x008000000,
+               AU1000_PCMCIA_IO_PHYS_ADDR   + 0x008010000 - 1,
+               AU1550_GPIO201_205_INT, AU1550_GPIO1_INT, 0, 0, 1);
 
        swapped = bcsr_read(BCSR_STATUS) & BCSR_STATUS_PB1550_SWAPBOOT;
        db1x_register_norflash(128 * 1024 * 1024, 4, swapped);
+       platform_device_register(&pb1550_pci_host);
+       platform_device_register(&pb1550_i2c_dev);
 
        return 0;
 }
-device_initcall(pb1550_dev_init);
+arch_initcall(pb1550_dev_init);
index 5f8f0691ed2d4fe4d01776f88e4b70685d6efbfe..dea45c78fdcd61cbf4100f911ca965dea1e5d3d1 100644 (file)
 
 #include <prom.h>
 
-char irq_tab_alchemy[][5] __initdata = {
-       [0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff },
-};
-
 static void gpr_reset(char *c)
 {
        /* switch System-LED to orange (red# and green# on) */
@@ -76,12 +72,4 @@ void __init board_setup(void)
 
        /* Take away Reset of UMTS-card */
        alchemy_gpio_direction_output(215, 1);
-
-#ifdef CONFIG_PCI
-#if defined(__MIPSEB__)
-       au_writel(0xf | (2 << 6) | (1 << 4), Au1500_PCI_CFG);
-#else
-       au_writel(0xf, Au1500_PCI_CFG);
-#endif
-#endif
 }
index 14b46629cfc81cb1a23c5665f759cb89b76cbfa0..982ce85db60de51d15f05f908f5960594f106726 100644 (file)
@@ -167,6 +167,45 @@ static struct i2c_board_info gpr_i2c_info[] __initdata = {
        }
 };
 
+
+
+static struct resource alchemy_pci_host_res[] = {
+       [0] = {
+               .start  = AU1500_PCI_PHYS_ADDR,
+               .end    = AU1500_PCI_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static int gpr_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       if ((slot == 0) && (pin == 1))
+               return AU1550_PCI_INTA;
+       else if ((slot == 0) && (pin == 2))
+               return AU1550_PCI_INTB;
+
+       return -1;
+}
+
+static struct alchemy_pci_platdata gpr_pci_pd = {
+       .board_map_irq  = gpr_map_pci_irq,
+       .pci_cfg_set    = PCI_CONFIG_AEN | PCI_CONFIG_R2H | PCI_CONFIG_R1H |
+                         PCI_CONFIG_CH |
+#if defined(__MIPSEB__)
+                         PCI_CONFIG_SIC_HWA_DAT | PCI_CONFIG_SM,
+#else
+                         0,
+#endif
+};
+
+static struct platform_device gpr_pci_host_dev = {
+       .dev.platform_data = &gpr_pci_pd,
+       .name           = "alchemy-pci",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(alchemy_pci_host_res),
+       .resource       = alchemy_pci_host_res,
+};
+
 static struct platform_device *gpr_devices[] __initdata = {
        &gpr_wdt_device,
        &gpr_mtd_device,
@@ -174,6 +213,14 @@ static struct platform_device *gpr_devices[] __initdata = {
        &gpr_led_devices,
 };
 
+static int __init gpr_pci_init(void)
+{
+       return platform_device_register(&gpr_pci_host_dev);
+}
+/* must be arch_initcall; MIPS PCI scans busses in a subsys_initcall */
+arch_initcall(gpr_pci_init);
+
+
 static int __init gpr_dev_init(void)
 {
        i2c_register_board_info(0, gpr_i2c_info, ARRAY_SIZE(gpr_i2c_info));
index 3ae984cf98cf25ce8c695f6c06808d7dc6379647..851a5ab4c8f2e8fb21289d3ea6065f0bfdd374f3 100644 (file)
 
 #include <prom.h>
 
-char irq_tab_alchemy[][5] __initdata = {
-       [0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTA, 0xff, 0xff }, /* IDSEL 00 - AdapterA-Slot0 (top) */
-       [1] = { -1, AU1500_PCI_INTB, AU1500_PCI_INTA, 0xff, 0xff }, /* IDSEL 01 - AdapterA-Slot1 (bottom) */
-       [2] = { -1, AU1500_PCI_INTC, AU1500_PCI_INTD, 0xff, 0xff }, /* IDSEL 02 - AdapterB-Slot0 (top) */
-       [3] = { -1, AU1500_PCI_INTD, AU1500_PCI_INTC, 0xff, 0xff }, /* IDSEL 03 - AdapterB-Slot1 (bottom) */
-       [4] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff }, /* IDSEL 04 - AdapterC-Slot0 (top) */
-       [5] = { -1, AU1500_PCI_INTB, AU1500_PCI_INTA, 0xff, 0xff }, /* IDSEL 05 - AdapterC-Slot1 (bottom) */
-       [6] = { -1, AU1500_PCI_INTC, AU1500_PCI_INTD, 0xff, 0xff }, /* IDSEL 06 - AdapterD-Slot0 (top) */
-       [7] = { -1, AU1500_PCI_INTD, AU1500_PCI_INTC, 0xff, 0xff }, /* IDSEL 07 - AdapterD-Slot1 (bottom) */
-};
-
-extern int (*board_pci_idsel)(unsigned int devsel, int assert);
-int mtx1_pci_idsel(unsigned int devsel, int assert);
-
 static void mtx1_reset(char *c)
 {
        /* Jump to the reset vector */
@@ -74,15 +60,6 @@ void __init board_setup(void)
        alchemy_gpio_direction_output(204, 0);
 #endif /* defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) */
 
-#ifdef CONFIG_PCI
-#if defined(__MIPSEB__)
-       au_writel(0xf | (2 << 6) | (1 << 4), Au1500_PCI_CFG);
-#else
-       au_writel(0xf, Au1500_PCI_CFG);
-#endif
-       board_pci_idsel = mtx1_pci_idsel;
-#endif
-
        /* Initialize sys_pinfunc */
        au_writel(SYS_PF_NI2, SYS_PINFUNC);
 
@@ -104,23 +81,6 @@ void __init board_setup(void)
        printk(KERN_INFO "4G Systems MTX-1 Board\n");
 }
 
-int
-mtx1_pci_idsel(unsigned int devsel, int assert)
-{
-       /* This function is only necessary to support a proprietary Cardbus
-        * adapter on the mtx-1 "singleboard" variant. It triggers a custom
-        * logic chip connected to EXT_IO3 (GPIO1) to suppress IDSEL signals.
-        */
-       if (assert && devsel != 0)
-               /* Suppress signal to Cardbus */
-               alchemy_gpio_set_value(1, 0);   /* set EXT_IO3 OFF */
-       else
-               alchemy_gpio_set_value(1, 1);   /* set EXT_IO3 ON */
-
-       udelay(1);
-       return 1;
-}
-
 static int __init mtx1_init_irq(void)
 {
        irq_set_irq_type(AU1500_GPIO204_INT, IRQF_TRIGGER_HIGH);
index 55628e390fd7460c4a9d25ede7ffb7cd1c6addfe..cc47b6868ca3f845fb851f2885862024ba96e86d 100644 (file)
@@ -135,7 +135,69 @@ static struct platform_device mtx1_mtd = {
        .resource       = &mtx1_mtd_resource,
 };
 
+static struct resource alchemy_pci_host_res[] = {
+       [0] = {
+               .start  = AU1500_PCI_PHYS_ADDR,
+               .end    = AU1500_PCI_PHYS_ADDR + 0xfff,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static int mtx1_pci_idsel(unsigned int devsel, int assert)
+{
+       /* This function is only necessary to support a proprietary Cardbus
+        * adapter on the mtx-1 "singleboard" variant. It triggers a custom
+        * logic chip connected to EXT_IO3 (GPIO1) to suppress IDSEL signals.
+        */
+       if (assert && devsel != 0)
+               /* Suppress signal to Cardbus */
+               alchemy_gpio_set_value(1, 0);   /* set EXT_IO3 OFF */
+       else
+               alchemy_gpio_set_value(1, 1);   /* set EXT_IO3 ON */
+
+       udelay(1);
+       return 1;
+}
+
+static const char mtx1_irqtab[][5] = {
+       [0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTA, 0xff, 0xff }, /* IDSEL 00 - AdapterA-Slot0 (top) */
+       [1] = { -1, AU1500_PCI_INTB, AU1500_PCI_INTA, 0xff, 0xff }, /* IDSEL 01 - AdapterA-Slot1 (bottom) */
+       [2] = { -1, AU1500_PCI_INTC, AU1500_PCI_INTD, 0xff, 0xff }, /* IDSEL 02 - AdapterB-Slot0 (top) */
+       [3] = { -1, AU1500_PCI_INTD, AU1500_PCI_INTC, 0xff, 0xff }, /* IDSEL 03 - AdapterB-Slot1 (bottom) */
+       [4] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff }, /* IDSEL 04 - AdapterC-Slot0 (top) */
+       [5] = { -1, AU1500_PCI_INTB, AU1500_PCI_INTA, 0xff, 0xff }, /* IDSEL 05 - AdapterC-Slot1 (bottom) */
+       [6] = { -1, AU1500_PCI_INTC, AU1500_PCI_INTD, 0xff, 0xff }, /* IDSEL 06 - AdapterD-Slot0 (top) */
+       [7] = { -1, AU1500_PCI_INTD, AU1500_PCI_INTC, 0xff, 0xff }, /* IDSEL 07 - AdapterD-Slot1 (bottom) */
+};
+
+static int mtx1_map_pci_irq(const struct pci_dev *d, u8 slot, u8 pin)
+{
+       return mtx1_irqtab[slot][pin];
+}
+
+static struct alchemy_pci_platdata mtx1_pci_pd = {
+       .board_map_irq   = mtx1_map_pci_irq,
+       .board_pci_idsel = mtx1_pci_idsel,
+       .pci_cfg_set     = PCI_CONFIG_AEN | PCI_CONFIG_R2H | PCI_CONFIG_R1H |
+                          PCI_CONFIG_CH |
+#if defined(__MIPSEB__)
+                          PCI_CONFIG_SIC_HWA_DAT | PCI_CONFIG_SM,
+#else
+                          0,
+#endif
+};
+
+static struct platform_device mtx1_pci_host = {
+       .dev.platform_data = &mtx1_pci_pd,
+       .name           = "alchemy-pci",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(alchemy_pci_host_res),
+       .resource       = alchemy_pci_host_res,
+};
+
+
 static struct __initdata platform_device * mtx1_devs[] = {
+       &mtx1_pci_host,
        &mtx1_gpio_leds,
        &mtx1_wdt,
        &mtx1_button,
index 81e57fad07ab4bd5567bec068d70c0b18955b0fb..3fa83f72e014d71ace39113f7ad6803ddeaa2ba4 100644 (file)
@@ -70,14 +70,6 @@ void __init board_setup(void)
        /* Enable DTR (MCR bit 0) = USB power up */
        __raw_writel(1, (void __iomem *)KSEG1ADDR(AU1000_UART3_PHYS_ADDR + 0x18));
        wmb();
-
-#ifdef CONFIG_PCI
-#if defined(__MIPSEB__)
-       au_writel(0xf | (2 << 6) | (1 << 4), Au1500_PCI_CFG);
-#else
-       au_writel(0xf, Au1500_PCI_CFG);
-#endif
-#endif
 }
 
 static int __init xxs1500_init_irq(void)
index e87c45cde61b07f51db731cb9fc1e250a1d85e71..06a3a459b8aa29b78bdd8a874b8b573f273f394b 100644 (file)
@@ -27,20 +27,20 @@ static struct resource xxs1500_pcmcia_res[] = {
        {
                .name   = "pcmcia-io",
                .flags  = IORESOURCE_MEM,
-               .start  = PCMCIA_IO_PHYS_ADDR,
-               .end    = PCMCIA_IO_PHYS_ADDR + 0x000400000 - 1,
+               .start  = AU1000_PCMCIA_IO_PHYS_ADDR,
+               .end    = AU1000_PCMCIA_IO_PHYS_ADDR + 0x000400000 - 1,
        },
        {
                .name   = "pcmcia-attr",
                .flags  = IORESOURCE_MEM,
-               .start  = PCMCIA_ATTR_PHYS_ADDR,
-               .end    = PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
+               .start  = AU1000_PCMCIA_ATTR_PHYS_ADDR,
+               .end    = AU1000_PCMCIA_ATTR_PHYS_ADDR + 0x000400000 - 1,
        },
        {
                .name   = "pcmcia-mem",
                .flags  = IORESOURCE_MEM,
-               .start  = PCMCIA_MEM_PHYS_ADDR,
-               .end    = PCMCIA_MEM_PHYS_ADDR + 0x000400000 - 1,
+               .start  = AU1000_PCMCIA_MEM_PHYS_ADDR,
+               .end    = AU1000_PCMCIA_MEM_PHYS_ADDR + 0x000400000 - 1,
        },
 };
 
index 40bb9fde205ff9756812134cc826120f5d8bcb30..69468ded282820efd9547f7aa3837562e56db016 100644 (file)
@@ -114,4 +114,28 @@ unsigned long run_uncached(void *func);
 extern void *kmap_coherent(struct page *page, unsigned long addr);
 extern void kunmap_coherent(void);
 
+#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+static inline void flush_kernel_dcache_page(struct page *page)
+{
+       BUG_ON(cpu_has_dc_aliases && PageHighMem(page));
+}
+
+/*
+ * For now flush_kernel_vmap_range and invalidate_kernel_vmap_range both do a
+ * cache writeback and invalidate operation.
+ */
+extern void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size);
+
+static inline void flush_kernel_vmap_range(void *vaddr, int size)
+{
+       if (cpu_has_dc_aliases)
+               __flush_kernel_vmap_range((unsigned long) vaddr, size);
+}
+
+static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+       if (cpu_has_dc_aliases)
+               __flush_kernel_vmap_range((unsigned long) vaddr, size);
+}
+
 #endif /* _ASM_CACHEFLUSH_H */
index 5f95a4bfc73558c4e155ca986dd999ceed50497b..2f7f41873f242ef9abd77002976ebced8cbdea3c 100644 (file)
 #define PRID_IMP_CAVIUM_CN50XX 0x0600
 #define PRID_IMP_CAVIUM_CN52XX 0x0700
 #define PRID_IMP_CAVIUM_CN63XX 0x9000
+#define PRID_IMP_CAVIUM_CN68XX 0x9100
+#define PRID_IMP_CAVIUM_CN66XX 0x9200
+#define PRID_IMP_CAVIUM_CN61XX 0x9300
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_INGENIC
index b04e4de5dd2e0a2cf39074cd82b8d67f3eb81cce..a58f22998a86507729e3d72d379ef6e23ddad019 100644 (file)
@@ -329,14 +329,10 @@ static inline void pfx##write##bwlq(type val,                             \
                        "dsrl32 %L0, %L0, 0"                    "\n\t"  \
                        "dsll32 %M0, %M0, 0"                    "\n\t"  \
                        "or     %L0, %L0, %M0"                  "\n\t"  \
-                       ".set   push"                           "\n\t"  \
-                       ".set   noreorder"                      "\n\t"  \
-                       ".set   nomacro"                        "\n\t"  \
                        "sd     %L0, %2"                        "\n\t"  \
-                       ".set   pop"                            "\n\t"  \
                        ".set   mips0"                          "\n"    \
                        : "=r" (__tmp)                                  \
-                       : "0" (__val), "R" (*__mem));                   \
+                       : "0" (__val), "m" (*__mem));                   \
                if (irq)                                                \
                        local_irq_restore(__flags);                     \
        } else                                                          \
@@ -359,16 +355,12 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem)      \
                        local_irq_save(__flags);                        \
                __asm__ __volatile__(                                   \
                        ".set   mips3"          "\t\t# __readq" "\n\t"  \
-                       ".set   push"                           "\n\t"  \
-                       ".set   noreorder"                      "\n\t"  \
-                       ".set   nomacro"                        "\n\t"  \
                        "ld     %L0, %1"                        "\n\t"  \
-                       ".set   pop"                            "\n\t"  \
                        "dsra32 %M0, %L0, 0"                    "\n\t"  \
                        "sll    %L0, %L0, 0"                    "\n\t"  \
                        ".set   mips0"                          "\n"    \
                        : "=r" (__val)                                  \
-                       : "R" (*__mem));                                \
+                       : "m" (*__mem));                                \
                if (irq)                                                \
                        local_irq_restore(__flags);                     \
        } else {                                                        \
index f260ebed713b4cf72e00eb8db2dca24a83c42b6f..de24ec57dd2f6181200caa0be4136c45c1066748 100644 (file)
@@ -245,6 +245,23 @@ void alchemy_sleep_au1000(void);
 void alchemy_sleep_au1550(void);
 void au_sleep(void);
 
+/* USB: drivers/usb/host/alchemy-common.c */
+enum alchemy_usb_block {
+       ALCHEMY_USB_OHCI0,
+       ALCHEMY_USB_UDC0,
+       ALCHEMY_USB_EHCI0,
+       ALCHEMY_USB_OTG0,
+};
+int alchemy_usb_control(int block, int enable);
+
+/* PCI controller platform data */
+struct alchemy_pci_platdata {
+       int (*board_map_irq)(const struct pci_dev *d, u8 slot, u8 pin);
+       int (*board_pci_idsel)(unsigned int devsel, int assert);
+       /* bits to set/clear in PCI_CONFIG register */
+       unsigned long pci_cfg_set;
+       unsigned long pci_cfg_clr;
+};
 
 /* SOC Interrupt numbers */
 
@@ -575,38 +592,95 @@ enum soc_au1200_ints {
 #endif /* !defined (_LANGUAGE_ASSEMBLY) */
 
 /*
- * SDRAM register offsets
+ * Physical base addresses for integrated peripherals
+ * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200
  */
-#if defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1500) || \
-    defined(CONFIG_SOC_AU1100)
-#define MEM_SDMODE0            0x0000
-#define MEM_SDMODE1            0x0004
-#define MEM_SDMODE2            0x0008
-#define MEM_SDADDR0            0x000C
-#define MEM_SDADDR1            0x0010
-#define MEM_SDADDR2            0x0014
-#define MEM_SDREFCFG           0x0018
-#define MEM_SDPRECMD           0x001C
-#define MEM_SDAUTOREF          0x0020
-#define MEM_SDWRMD0            0x0024
-#define MEM_SDWRMD1            0x0028
-#define MEM_SDWRMD2            0x002C
-#define MEM_SDSLEEP            0x0030
-#define MEM_SDSMCKE            0x0034
 
-/*
- * MEM_SDMODE register content definitions
- */
+#define AU1000_AC97_PHYS_ADDR          0x10000000 /* 012 */
+#define AU1000_USB_OHCI_PHYS_ADDR      0x10100000 /* 012 */
+#define AU1000_USB_UDC_PHYS_ADDR       0x10200000 /* 0123 */
+#define AU1000_IRDA_PHYS_ADDR          0x10300000 /* 02 */
+#define AU1200_AES_PHYS_ADDR           0x10300000 /* 4 */
+#define AU1000_IC0_PHYS_ADDR           0x10400000 /* 01234 */
+#define AU1000_MAC0_PHYS_ADDR          0x10500000 /* 023 */
+#define AU1000_MAC1_PHYS_ADDR          0x10510000 /* 023 */
+#define AU1000_MACEN_PHYS_ADDR         0x10520000 /* 023 */
+#define AU1100_SD0_PHYS_ADDR           0x10600000 /* 24 */
+#define AU1100_SD1_PHYS_ADDR           0x10680000 /* 24 */
+#define AU1550_PSC2_PHYS_ADDR          0x10A00000 /* 3 */
+#define AU1550_PSC3_PHYS_ADDR          0x10B00000 /* 3 */
+#define AU1000_I2S_PHYS_ADDR           0x11000000 /* 02 */
+#define AU1500_MAC0_PHYS_ADDR          0x11500000 /* 1 */
+#define AU1500_MAC1_PHYS_ADDR          0x11510000 /* 1 */
+#define AU1500_MACEN_PHYS_ADDR         0x11520000 /* 1 */
+#define AU1000_UART0_PHYS_ADDR         0x11100000 /* 01234 */
+#define AU1200_SWCNT_PHYS_ADDR         0x1110010C /* 4 */
+#define AU1000_UART1_PHYS_ADDR         0x11200000 /* 0234 */
+#define AU1000_UART2_PHYS_ADDR         0x11300000 /* 0 */
+#define AU1000_UART3_PHYS_ADDR         0x11400000 /* 0123 */
+#define AU1000_SSI0_PHYS_ADDR          0x11600000 /* 02 */
+#define AU1000_SSI1_PHYS_ADDR          0x11680000 /* 02 */
+#define AU1500_GPIO2_PHYS_ADDR         0x11700000 /* 1234 */
+#define AU1000_IC1_PHYS_ADDR           0x11800000 /* 01234 */
+#define AU1000_SYS_PHYS_ADDR           0x11900000 /* 01234 */
+#define AU1550_PSC0_PHYS_ADDR          0x11A00000 /* 34 */
+#define AU1550_PSC1_PHYS_ADDR          0x11B00000 /* 34 */
+#define AU1000_MEM_PHYS_ADDR           0x14000000 /* 01234 */
+#define AU1000_STATIC_MEM_PHYS_ADDR    0x14001000 /* 01234 */
+#define AU1000_DMA_PHYS_ADDR           0x14002000 /* 012 */
+#define AU1550_DBDMA_PHYS_ADDR         0x14002000 /* 34 */
+#define AU1550_DBDMA_CONF_PHYS_ADDR    0x14003000 /* 34 */
+#define AU1000_MACDMA0_PHYS_ADDR       0x14004000 /* 0123 */
+#define AU1000_MACDMA1_PHYS_ADDR       0x14004200 /* 0123 */
+#define AU1200_CIM_PHYS_ADDR           0x14004000 /* 4 */
+#define AU1500_PCI_PHYS_ADDR           0x14005000 /* 13 */
+#define AU1550_PE_PHYS_ADDR            0x14008000 /* 3 */
+#define AU1200_MAEBE_PHYS_ADDR         0x14010000 /* 4 */
+#define AU1200_MAEFE_PHYS_ADDR         0x14012000 /* 4 */
+#define AU1550_USB_OHCI_PHYS_ADDR      0x14020000 /* 3 */
+#define AU1200_USB_CTL_PHYS_ADDR       0x14020000 /* 4 */
+#define AU1200_USB_OTG_PHYS_ADDR       0x14020020 /* 4 */
+#define AU1200_USB_OHCI_PHYS_ADDR      0x14020100 /* 4 */
+#define AU1200_USB_EHCI_PHYS_ADDR      0x14020200 /* 4 */
+#define AU1200_USB_UDC_PHYS_ADDR       0x14022000 /* 4 */
+#define AU1100_LCD_PHYS_ADDR           0x15000000 /* 2 */
+#define AU1200_LCD_PHYS_ADDR           0x15000000 /* 4 */
+#define AU1500_PCI_MEM_PHYS_ADDR       0x400000000ULL /* 13 */
+#define AU1500_PCI_IO_PHYS_ADDR                0x500000000ULL /* 13 */
+#define AU1500_PCI_CONFIG0_PHYS_ADDR   0x600000000ULL /* 13 */
+#define AU1500_PCI_CONFIG1_PHYS_ADDR   0x680000000ULL /* 13 */
+#define AU1000_PCMCIA_IO_PHYS_ADDR     0xF00000000ULL /* 01234 */
+#define AU1000_PCMCIA_ATTR_PHYS_ADDR   0xF40000000ULL /* 01234 */
+#define AU1000_PCMCIA_MEM_PHYS_ADDR    0xF80000000ULL /* 01234 */
+
+
+/* Au1000 SDRAM memory controller register offsets */
+#define AU1000_MEM_SDMODE0             0x0000
+#define AU1000_MEM_SDMODE1             0x0004
+#define AU1000_MEM_SDMODE2             0x0008
+#define AU1000_MEM_SDADDR0             0x000C
+#define AU1000_MEM_SDADDR1             0x0010
+#define AU1000_MEM_SDADDR2             0x0014
+#define AU1000_MEM_SDREFCFG            0x0018
+#define AU1000_MEM_SDPRECMD            0x001C
+#define AU1000_MEM_SDAUTOREF           0x0020
+#define AU1000_MEM_SDWRMD0             0x0024
+#define AU1000_MEM_SDWRMD1             0x0028
+#define AU1000_MEM_SDWRMD2             0x002C
+#define AU1000_MEM_SDSLEEP             0x0030
+#define AU1000_MEM_SDSMCKE             0x0034
+
+/* MEM_SDMODE register content definitions */
 #define MEM_SDMODE_F           (1 << 22)
 #define MEM_SDMODE_SR          (1 << 21)
 #define MEM_SDMODE_BS          (1 << 20)
 #define MEM_SDMODE_RS          (3 << 18)
 #define MEM_SDMODE_CS          (7 << 15)
-#define MEM_SDMODE_TRAS        (15 << 11)
-#define MEM_SDMODE_TMRD        (3 << 9)
+#define MEM_SDMODE_TRAS                (15 << 11)
+#define MEM_SDMODE_TMRD                (3 << 9)
 #define MEM_SDMODE_TWR         (3 << 7)
 #define MEM_SDMODE_TRP         (3 << 5)
-#define MEM_SDMODE_TRCD        (3 << 3)
+#define MEM_SDMODE_TRCD                (3 << 3)
 #define MEM_SDMODE_TCL         (7 << 0)
 
 #define MEM_SDMODE_BS_2Bank    (0 << 20)
@@ -628,173 +702,43 @@ enum soc_au1200_ints {
 #define MEM_SDMODE_TRCD_N(N)   ((N) << 3)
 #define MEM_SDMODE_TCL_N(N)    ((N) << 0)
 
-/*
- * MEM_SDADDR register contents definitions
- */
+/* MEM_SDADDR register contents definitions */
 #define MEM_SDADDR_E           (1 << 20)
-#define MEM_SDADDR_CSBA        (0x03FF << 10)
+#define MEM_SDADDR_CSBA                (0x03FF << 10)
 #define MEM_SDADDR_CSMASK      (0x03FF << 0)
 #define MEM_SDADDR_CSBA_N(N)   ((N) & (0x03FF << 22) >> 12)
 #define MEM_SDADDR_CSMASK_N(N) ((N)&(0x03FF << 22) >> 22)
 
-/*
- * MEM_SDREFCFG register content definitions
- */
+/* MEM_SDREFCFG register content definitions */
 #define MEM_SDREFCFG_TRC       (15 << 28)
 #define MEM_SDREFCFG_TRPM      (3 << 26)
 #define MEM_SDREFCFG_E         (1 << 25)
-#define MEM_SDREFCFG_RE        (0x1ffffff << 0)
+#define MEM_SDREFCFG_RE                (0x1ffffff << 0)
 #define MEM_SDREFCFG_TRC_N(N)  ((N) << MEM_SDREFCFG_TRC)
 #define MEM_SDREFCFG_TRPM_N(N) ((N) << MEM_SDREFCFG_TRPM)
 #define MEM_SDREFCFG_REF_N(N)  (N)
-#endif
-
-/***********************************************************************/
 
-/*
- * Au1550 SDRAM Register Offsets
- */
-
-/***********************************************************************/
-
-#if defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200)
-#define MEM_SDMODE0            0x0800
-#define MEM_SDMODE1            0x0808
-#define MEM_SDMODE2            0x0810
-#define MEM_SDADDR0            0x0820
-#define MEM_SDADDR1            0x0828
-#define MEM_SDADDR2            0x0830
-#define MEM_SDCONFIGA          0x0840
-#define MEM_SDCONFIGB          0x0848
-#define MEM_SDSTAT             0x0850
-#define MEM_SDERRADDR          0x0858
-#define MEM_SDSTRIDE0          0x0860
-#define MEM_SDSTRIDE1          0x0868
-#define MEM_SDSTRIDE2          0x0870
-#define MEM_SDWRMD0            0x0880
-#define MEM_SDWRMD1            0x0888
-#define MEM_SDWRMD2            0x0890
-#define MEM_SDPRECMD           0x08C0
-#define MEM_SDAUTOREF          0x08C8
-#define MEM_SDSREF             0x08D0
-#define MEM_SDSLEEP            MEM_SDSREF
-
-#endif
-
-/*
- * Physical base addresses for integrated peripherals
- * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200
- */
-
-#define AU1000_AC97_PHYS_ADDR          0x10000000 /* 012 */
-#define AU1000_USBD_PHYS_ADDR          0x10200000 /* 0123 */
-#define AU1000_IC0_PHYS_ADDR           0x10400000 /* 01234 */
-#define AU1000_MAC0_PHYS_ADDR          0x10500000 /* 023 */
-#define AU1000_MAC1_PHYS_ADDR          0x10510000 /* 023 */
-#define AU1000_MACEN_PHYS_ADDR         0x10520000 /* 023 */
-#define AU1100_SD0_PHYS_ADDR           0x10600000 /* 24 */
-#define AU1100_SD1_PHYS_ADDR           0x10680000 /* 24 */
-#define AU1000_I2S_PHYS_ADDR           0x11000000 /* 02 */
-#define AU1500_MAC0_PHYS_ADDR          0x11500000 /* 1 */
-#define AU1500_MAC1_PHYS_ADDR          0x11510000 /* 1 */
-#define AU1500_MACEN_PHYS_ADDR         0x11520000 /* 1 */
-#define AU1000_UART0_PHYS_ADDR         0x11100000 /* 01234 */
-#define AU1000_UART1_PHYS_ADDR         0x11200000 /* 0234 */
-#define AU1000_UART2_PHYS_ADDR         0x11300000 /* 0 */
-#define AU1000_UART3_PHYS_ADDR         0x11400000 /* 0123 */
-#define AU1500_GPIO2_PHYS_ADDR         0x11700000 /* 1234 */
-#define AU1000_IC1_PHYS_ADDR           0x11800000 /* 01234 */
-#define AU1000_SYS_PHYS_ADDR           0x11900000 /* 01234 */
-#define AU1000_DMA_PHYS_ADDR           0x14002000 /* 012 */
-#define AU1550_DBDMA_PHYS_ADDR         0x14002000 /* 34 */
-#define AU1550_DBDMA_CONF_PHYS_ADDR    0x14003000 /* 34 */
-#define AU1000_MACDMA0_PHYS_ADDR       0x14004000 /* 0123 */
-#define AU1000_MACDMA1_PHYS_ADDR       0x14004200 /* 0123 */
-
-
-#ifdef CONFIG_SOC_AU1000
-#define        MEM_PHYS_ADDR           0x14000000
-#define        STATIC_MEM_PHYS_ADDR    0x14001000
-#define        USBH_PHYS_ADDR          0x10100000
-#define        IRDA_PHYS_ADDR          0x10300000
-#define        SSI0_PHYS_ADDR          0x11600000
-#define        SSI1_PHYS_ADDR          0x11680000
-#define PCMCIA_IO_PHYS_ADDR    0xF00000000ULL
-#define PCMCIA_ATTR_PHYS_ADDR  0xF40000000ULL
-#define PCMCIA_MEM_PHYS_ADDR   0xF80000000ULL
-#endif
-
-/********************************************************************/
-
-#ifdef CONFIG_SOC_AU1500
-#define        MEM_PHYS_ADDR           0x14000000
-#define        STATIC_MEM_PHYS_ADDR    0x14001000
-#define        USBH_PHYS_ADDR          0x10100000
-#define PCI_PHYS_ADDR          0x14005000
-#define PCI_MEM_PHYS_ADDR      0x400000000ULL
-#define PCI_IO_PHYS_ADDR       0x500000000ULL
-#define PCI_CONFIG0_PHYS_ADDR  0x600000000ULL
-#define PCI_CONFIG1_PHYS_ADDR  0x680000000ULL
-#define PCMCIA_IO_PHYS_ADDR    0xF00000000ULL
-#define PCMCIA_ATTR_PHYS_ADDR  0xF40000000ULL
-#define PCMCIA_MEM_PHYS_ADDR   0xF80000000ULL
-#endif
-
-/********************************************************************/
-
-#ifdef CONFIG_SOC_AU1100
-#define        MEM_PHYS_ADDR           0x14000000
-#define        STATIC_MEM_PHYS_ADDR    0x14001000
-#define        USBH_PHYS_ADDR          0x10100000
-#define        IRDA_PHYS_ADDR          0x10300000
-#define        SSI0_PHYS_ADDR          0x11600000
-#define        SSI1_PHYS_ADDR          0x11680000
-#define LCD_PHYS_ADDR          0x15000000
-#define PCMCIA_IO_PHYS_ADDR    0xF00000000ULL
-#define PCMCIA_ATTR_PHYS_ADDR  0xF40000000ULL
-#define PCMCIA_MEM_PHYS_ADDR   0xF80000000ULL
-#endif
-
-/***********************************************************************/
-
-#ifdef CONFIG_SOC_AU1550
-#define        MEM_PHYS_ADDR           0x14000000
-#define        STATIC_MEM_PHYS_ADDR    0x14001000
-#define        USBH_PHYS_ADDR          0x14020000
-#define PCI_PHYS_ADDR          0x14005000
-#define PE_PHYS_ADDR           0x14008000
-#define PSC0_PHYS_ADDR         0x11A00000
-#define PSC1_PHYS_ADDR         0x11B00000
-#define PSC2_PHYS_ADDR         0x10A00000
-#define PSC3_PHYS_ADDR         0x10B00000
-#define PCI_MEM_PHYS_ADDR      0x400000000ULL
-#define PCI_IO_PHYS_ADDR       0x500000000ULL
-#define PCI_CONFIG0_PHYS_ADDR  0x600000000ULL
-#define PCI_CONFIG1_PHYS_ADDR  0x680000000ULL
-#define PCMCIA_IO_PHYS_ADDR    0xF00000000ULL
-#define PCMCIA_ATTR_PHYS_ADDR  0xF40000000ULL
-#define PCMCIA_MEM_PHYS_ADDR   0xF80000000ULL
-#endif
-
-/***********************************************************************/
-
-#ifdef CONFIG_SOC_AU1200
-#define        MEM_PHYS_ADDR           0x14000000
-#define        STATIC_MEM_PHYS_ADDR    0x14001000
-#define AES_PHYS_ADDR          0x10300000
-#define CIM_PHYS_ADDR          0x14004000
-#define USBM_PHYS_ADDR         0x14020000
-#define        USBH_PHYS_ADDR          0x14020100
-#define PSC0_PHYS_ADDR         0x11A00000
-#define PSC1_PHYS_ADDR         0x11B00000
-#define LCD_PHYS_ADDR          0x15000000
-#define SWCNT_PHYS_ADDR                0x1110010C
-#define MAEFE_PHYS_ADDR                0x14012000
-#define MAEBE_PHYS_ADDR                0x14010000
-#define PCMCIA_IO_PHYS_ADDR    0xF00000000ULL
-#define PCMCIA_ATTR_PHYS_ADDR  0xF40000000ULL
-#define PCMCIA_MEM_PHYS_ADDR   0xF80000000ULL
-#endif
+/* Au1550 SDRAM Register Offsets */
+#define AU1550_MEM_SDMODE0             0x0800
+#define AU1550_MEM_SDMODE1             0x0808
+#define AU1550_MEM_SDMODE2             0x0810
+#define AU1550_MEM_SDADDR0             0x0820
+#define AU1550_MEM_SDADDR1             0x0828
+#define AU1550_MEM_SDADDR2             0x0830
+#define AU1550_MEM_SDCONFIGA           0x0840
+#define AU1550_MEM_SDCONFIGB           0x0848
+#define AU1550_MEM_SDSTAT              0x0850
+#define AU1550_MEM_SDERRADDR           0x0858
+#define AU1550_MEM_SDSTRIDE0           0x0860
+#define AU1550_MEM_SDSTRIDE1           0x0868
+#define AU1550_MEM_SDSTRIDE2           0x0870
+#define AU1550_MEM_SDWRMD0             0x0880
+#define AU1550_MEM_SDWRMD1             0x0888
+#define AU1550_MEM_SDWRMD2             0x0890
+#define AU1550_MEM_SDPRECMD            0x08C0
+#define AU1550_MEM_SDAUTOREF           0x08C8
+#define AU1550_MEM_SDSREF              0x08D0
+#define AU1550_MEM_SDSLEEP             MEM_SDSREF
 
 /* Static Bus Controller */
 #define MEM_STCFG0             0xB4001000
@@ -813,81 +757,14 @@ enum soc_au1200_ints {
 #define MEM_STTIME3            0xB4001034
 #define MEM_STADDR3            0xB4001038
 
-#if defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200)
 #define MEM_STNDCTL            0xB4001100
 #define MEM_STSTAT             0xB4001104
 
 #define MEM_STNAND_CMD         0x0
 #define MEM_STNAND_ADDR        0x4
 #define MEM_STNAND_DATA        0x20
-#endif
-
 
 
-
-/* Au1000 */
-#ifdef CONFIG_SOC_AU1000
-
-#define USB_OHCI_BASE          0x10100000      /* phys addr for ioremap */
-#define USB_HOST_CONFIG        0xB017FFFC
-#define FOR_PLATFORM_C_USB_HOST_INT AU1000_USB_HOST_INT
-#endif /* CONFIG_SOC_AU1000 */
-
-/* Au1500 */
-#ifdef CONFIG_SOC_AU1500
-
-#define USB_OHCI_BASE          0x10100000      /* phys addr for ioremap */
-#define USB_HOST_CONFIG        0xB017fffc
-#define FOR_PLATFORM_C_USB_HOST_INT AU1500_USB_HOST_INT
-#endif /* CONFIG_SOC_AU1500 */
-
-/* Au1100 */
-#ifdef CONFIG_SOC_AU1100
-
-#define USB_OHCI_BASE          0x10100000      /* phys addr for ioremap */
-#define USB_HOST_CONFIG        0xB017FFFC
-#define FOR_PLATFORM_C_USB_HOST_INT AU1100_USB_HOST_INT
-#endif /* CONFIG_SOC_AU1100 */
-
-#ifdef CONFIG_SOC_AU1550
-
-#define USB_OHCI_BASE          0x14020000      /* phys addr for ioremap */
-#define USB_OHCI_LEN           0x00060000
-#define USB_HOST_CONFIG        0xB4027ffc
-#define FOR_PLATFORM_C_USB_HOST_INT AU1550_USB_HOST_INT
-#endif /* CONFIG_SOC_AU1550 */
-
-
-#ifdef CONFIG_SOC_AU1200
-
-#define USB_UOC_BASE           0x14020020
-#define USB_UOC_LEN            0x20
-#define USB_OHCI_BASE          0x14020100
-#define USB_OHCI_LEN           0x100
-#define USB_EHCI_BASE          0x14020200
-#define USB_EHCI_LEN           0x100
-#define USB_UDC_BASE           0x14022000
-#define USB_UDC_LEN            0x2000
-#define USB_MSR_BASE           0xB4020000
-#define USB_MSR_MCFG           4
-#define USBMSRMCFG_OMEMEN      0
-#define USBMSRMCFG_OBMEN       1
-#define USBMSRMCFG_EMEMEN      2
-#define USBMSRMCFG_EBMEN       3
-#define USBMSRMCFG_DMEMEN      4
-#define USBMSRMCFG_DBMEN       5
-#define USBMSRMCFG_GMEMEN      6
-#define USBMSRMCFG_OHCCLKEN    16
-#define USBMSRMCFG_EHCCLKEN    17
-#define USBMSRMCFG_UDCCLKEN    18
-#define USBMSRMCFG_PHYPLLEN    19
-#define USBMSRMCFG_RDCOMB      30
-#define USBMSRMCFG_PFEN        31
-
-#define FOR_PLATFORM_C_USB_HOST_INT AU1200_USB_INT
-
-#endif /* CONFIG_SOC_AU1200 */
-
 /* Programmable Counters 0 and 1 */
 #define SYS_BASE               0xB1900000
 #define SYS_COUNTER_CNTRL      (SYS_BASE + 0x14)
@@ -958,56 +835,6 @@ enum soc_au1200_ints {
 #  define I2S_CONTROL_D        (1 << 1)
 #  define I2S_CONTROL_CE       (1 << 0)
 
-/* USB Host Controller */
-#ifndef USB_OHCI_LEN
-#define USB_OHCI_LEN           0x00100000
-#endif
-
-#ifndef CONFIG_SOC_AU1200
-
-/* USB Device Controller */
-#define USBD_EP0RD             0xB0200000
-#define USBD_EP0WR             0xB0200004
-#define USBD_EP2WR             0xB0200008
-#define USBD_EP3WR             0xB020000C
-#define USBD_EP4RD             0xB0200010
-#define USBD_EP5RD             0xB0200014
-#define USBD_INTEN             0xB0200018
-#define USBD_INTSTAT           0xB020001C
-#  define USBDEV_INT_SOF       (1 << 12)
-#  define USBDEV_INT_HF_BIT    6
-#  define USBDEV_INT_HF_MASK   (0x3f << USBDEV_INT_HF_BIT)
-#  define USBDEV_INT_CMPLT_BIT 0
-#  define USBDEV_INT_CMPLT_MASK (0x3f << USBDEV_INT_CMPLT_BIT)
-#define USBD_CONFIG            0xB0200020
-#define USBD_EP0CS             0xB0200024
-#define USBD_EP2CS             0xB0200028
-#define USBD_EP3CS             0xB020002C
-#define USBD_EP4CS             0xB0200030
-#define USBD_EP5CS             0xB0200034
-#  define USBDEV_CS_SU         (1 << 14)
-#  define USBDEV_CS_NAK        (1 << 13)
-#  define USBDEV_CS_ACK        (1 << 12)
-#  define USBDEV_CS_BUSY       (1 << 11)
-#  define USBDEV_CS_TSIZE_BIT  1
-#  define USBDEV_CS_TSIZE_MASK (0x3ff << USBDEV_CS_TSIZE_BIT)
-#  define USBDEV_CS_STALL      (1 << 0)
-#define USBD_EP0RDSTAT         0xB0200040
-#define USBD_EP0WRSTAT         0xB0200044
-#define USBD_EP2WRSTAT         0xB0200048
-#define USBD_EP3WRSTAT         0xB020004C
-#define USBD_EP4RDSTAT         0xB0200050
-#define USBD_EP5RDSTAT         0xB0200054
-#  define USBDEV_FSTAT_FLUSH   (1 << 6)
-#  define USBDEV_FSTAT_UF      (1 << 5)
-#  define USBDEV_FSTAT_OF      (1 << 4)
-#  define USBDEV_FSTAT_FCNT_BIT 0
-#  define USBDEV_FSTAT_FCNT_MASK (0x0f << USBDEV_FSTAT_FCNT_BIT)
-#define USBD_ENABLE            0xB0200058
-#  define USBDEV_ENABLE        (1 << 1)
-#  define USBDEV_CE            (1 << 0)
-
-#endif /* !CONFIG_SOC_AU1200 */
 
 /* Ethernet Controllers  */
 
@@ -1322,7 +1149,6 @@ enum soc_au1200_ints {
 #  define SYS_PF_MUST_BE_SET   ((1 << 5) | (1 << 2))
 
 /* Au1200 only */
-#ifdef CONFIG_SOC_AU1200
 #define SYS_PINFUNC_DMA        (1 << 31)
 #define SYS_PINFUNC_S0A        (1 << 30)
 #define SYS_PINFUNC_S1A        (1 << 29)
@@ -1350,7 +1176,6 @@ enum soc_au1200_ints {
 #define SYS_PINFUNC_P0B        (1 << 4)
 #define SYS_PINFUNC_U0T        (1 << 3)
 #define SYS_PINFUNC_S1B        (1 << 2)
-#endif
 
 /* Power Management */
 #define SYS_SCRATCH0           0xB1900018
@@ -1406,12 +1231,12 @@ enum soc_au1200_ints {
 #  define SYS_CS_MI2_MASK      (0x7 << SYS_CS_MI2_BIT)
 #  define SYS_CS_DI2           (1 << 16)
 #  define SYS_CS_CI2           (1 << 15)
-#ifdef CONFIG_SOC_AU1100
+
 #  define SYS_CS_ML_BIT        7
 #  define SYS_CS_ML_MASK       (0x7 << SYS_CS_ML_BIT)
 #  define SYS_CS_DL            (1 << 6)
 #  define SYS_CS_CL            (1 << 5)
-#else
+
 #  define SYS_CS_MUH_BIT       12
 #  define SYS_CS_MUH_MASK      (0x7 << SYS_CS_MUH_BIT)
 #  define SYS_CS_DUH           (1 << 11)
@@ -1420,7 +1245,7 @@ enum soc_au1200_ints {
 #  define SYS_CS_MUD_MASK      (0x7 << SYS_CS_MUD_BIT)
 #  define SYS_CS_DUD           (1 << 6)
 #  define SYS_CS_CUD           (1 << 5)
-#endif
+
 #  define SYS_CS_MIR_BIT       2
 #  define SYS_CS_MIR_MASK      (0x7 << SYS_CS_MIR_BIT)
 #  define SYS_CS_DIR           (1 << 1)
@@ -1467,58 +1292,30 @@ enum soc_au1200_ints {
 #  define AC97C_RS             (1 << 1)
 #  define AC97C_CE             (1 << 0)
 
-#if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
-/* Au1500 PCI Controller */
-#define Au1500_CFG_BASE        0xB4005000      /* virtual, KSEG1 addr */
-#define Au1500_PCI_CMEM        (Au1500_CFG_BASE + 0)
-#define Au1500_PCI_CFG         (Au1500_CFG_BASE + 4)
-#  define PCI_ERROR            ((1 << 22) | (1 << 23) | (1 << 24) | \
-                                (1 << 25) | (1 << 26) | (1 << 27))
-#define Au1500_PCI_B2BMASK_CCH (Au1500_CFG_BASE + 8)
-#define Au1500_PCI_B2B0_VID    (Au1500_CFG_BASE + 0xC)
-#define Au1500_PCI_B2B1_ID     (Au1500_CFG_BASE + 0x10)
-#define Au1500_PCI_MWMASK_DEV  (Au1500_CFG_BASE + 0x14)
-#define Au1500_PCI_MWBASE_REV_CCL (Au1500_CFG_BASE + 0x18)
-#define Au1500_PCI_ERR_ADDR    (Au1500_CFG_BASE + 0x1C)
-#define Au1500_PCI_SPEC_INTACK (Au1500_CFG_BASE + 0x20)
-#define Au1500_PCI_ID          (Au1500_CFG_BASE + 0x100)
-#define Au1500_PCI_STATCMD     (Au1500_CFG_BASE + 0x104)
-#define Au1500_PCI_CLASSREV    (Au1500_CFG_BASE + 0x108)
-#define Au1500_PCI_HDRTYPE     (Au1500_CFG_BASE + 0x10C)
-#define Au1500_PCI_MBAR        (Au1500_CFG_BASE + 0x110)
-
-#define Au1500_PCI_HDR         0xB4005100      /* virtual, KSEG1 addr */
 
-/*
- * All of our structures, like PCI resource, have 32-bit members.
- * Drivers are expected to do an ioremap on the PCI MEM resource, but it's
- * hard to store 0x4 0000 0000 in a 32-bit type.  We require a small patch
- * to __ioremap to check for addresses between (u32)Au1500_PCI_MEM_START and
- * (u32)Au1500_PCI_MEM_END and change those to the full 36-bit PCI MEM
- * addresses.  For PCI I/O, it's simpler because we get to do the ioremap
- * ourselves and then adjust the device's resources.
+/* The PCI chip selects are outside the 32bit space, and since we can't
+ * just program the 36bit addresses into BARs, we have to take a chunk
+ * out of the 32bit space and reserve it for PCI.  When these addresses
+ * are ioremap()ed, they'll be fixed up to the real 36bit address before
+ * being passed to the real ioremap function.
  */
-#define Au1500_EXT_CFG         0x600000000ULL
-#define Au1500_EXT_CFG_TYPE1   0x680000000ULL
-#define Au1500_PCI_IO_START    0x500000000ULL
-#define Au1500_PCI_IO_END      0x5000FFFFFULL
-#define Au1500_PCI_MEM_START   0x440000000ULL
-#define Au1500_PCI_MEM_END     0x44FFFFFFFULL
+#define ALCHEMY_PCI_MEMWIN_START       (AU1500_PCI_MEM_PHYS_ADDR >> 4)
+#define ALCHEMY_PCI_MEMWIN_END         (ALCHEMY_PCI_MEMWIN_START + 0x0FFFFFFF)
 
-#define PCI_IO_START   0x00001000
-#define PCI_IO_END     0x000FFFFF
-#define PCI_MEM_START  0x40000000
-#define PCI_MEM_END    0x4FFFFFFF
+/* for PCI IO it's simpler because we get to do the ioremap ourselves and then
+ * adjust the device's resources.
+ */
+#define ALCHEMY_PCI_IOWIN_START                0x00001000
+#define ALCHEMY_PCI_IOWIN_END          0x0000FFFF
 
-#define PCI_FIRST_DEVFN (0 << 3)
-#define PCI_LAST_DEVFN (19 << 3)
+#ifdef CONFIG_PCI
 
 #define IOPORT_RESOURCE_START  0x00001000      /* skip legacy probing */
 #define IOPORT_RESOURCE_END    0xffffffff
 #define IOMEM_RESOURCE_START   0x10000000
 #define IOMEM_RESOURCE_END     0xfffffffffULL
 
-#else /* Au1000 and Au1100 and Au1200 */
+#else
 
 /* Don't allow any legacy ports probing */
 #define IOPORT_RESOURCE_START  0x10000000
@@ -1526,13 +1323,77 @@ enum soc_au1200_ints {
 #define IOMEM_RESOURCE_START   0x10000000
 #define IOMEM_RESOURCE_END     0xfffffffffULL
 
-#define PCI_IO_START   0
-#define PCI_IO_END     0
-#define PCI_MEM_START  0
-#define PCI_MEM_END    0
-#define PCI_FIRST_DEVFN 0
-#define PCI_LAST_DEVFN 0
-
 #endif
 
+/* PCI controller block register offsets */
+#define PCI_REG_CMEM           0x0000
+#define PCI_REG_CONFIG         0x0004
+#define PCI_REG_B2BMASK_CCH    0x0008
+#define PCI_REG_B2BBASE0_VID   0x000C
+#define PCI_REG_B2BBASE1_SID   0x0010
+#define PCI_REG_MWMASK_DEV     0x0014
+#define PCI_REG_MWBASE_REV_CCL 0x0018
+#define PCI_REG_ERR_ADDR       0x001C
+#define PCI_REG_SPEC_INTACK    0x0020
+#define PCI_REG_ID             0x0100
+#define PCI_REG_STATCMD                0x0104
+#define PCI_REG_CLASSREV       0x0108
+#define PCI_REG_PARAM          0x010C
+#define PCI_REG_MBAR           0x0110
+#define PCI_REG_TIMEOUT                0x0140
+
+/* PCI controller block register bits */
+#define PCI_CMEM_E             (1 << 28)       /* enable cacheable memory */
+#define PCI_CMEM_CMBASE(x)     (((x) & 0x3fff) << 14)
+#define PCI_CMEM_CMMASK(x)     ((x) & 0x3fff)
+#define PCI_CONFIG_ERD         (1 << 27) /* pci error during R/W */
+#define PCI_CONFIG_ET          (1 << 26) /* error in target mode */
+#define PCI_CONFIG_EF          (1 << 25) /* fatal error */
+#define PCI_CONFIG_EP          (1 << 24) /* parity error */
+#define PCI_CONFIG_EM          (1 << 23) /* multiple errors */
+#define PCI_CONFIG_BM          (1 << 22) /* bad master error */
+#define PCI_CONFIG_PD          (1 << 20) /* PCI Disable */
+#define PCI_CONFIG_BME         (1 << 19) /* Byte Mask Enable for reads */
+#define PCI_CONFIG_NC          (1 << 16) /* mark mem access non-coherent */
+#define PCI_CONFIG_IA          (1 << 15) /* INTA# enabled (target mode) */
+#define PCI_CONFIG_IP          (1 << 13) /* int on PCI_PERR# */
+#define PCI_CONFIG_IS          (1 << 12) /* int on PCI_SERR# */
+#define PCI_CONFIG_IMM         (1 << 11) /* int on master abort */
+#define PCI_CONFIG_ITM         (1 << 10) /* int on target abort (as master) */
+#define PCI_CONFIG_ITT         (1 << 9)  /* int on target abort (as target) */
+#define PCI_CONFIG_IPB         (1 << 8)  /* int on PERR# in bus master acc */
+#define PCI_CONFIG_SIC_NO      (0 << 6)  /* no byte mask changes */
+#define PCI_CONFIG_SIC_BA_ADR  (1 << 6)  /* on byte/hw acc, invert adr bits */
+#define PCI_CONFIG_SIC_HWA_DAT (2 << 6)  /* on halfword acc, swap data */
+#define PCI_CONFIG_SIC_ALL     (3 << 6)  /* swap data bytes on all accesses */
+#define PCI_CONFIG_ST          (1 << 5)  /* swap data by target transactions */
+#define PCI_CONFIG_SM          (1 << 4)  /* swap data from PCI ctl */
+#define PCI_CONFIG_AEN         (1 << 3)  /* enable internal arbiter */
+#define PCI_CONFIG_R2H         (1 << 2)  /* REQ2# to hi-prio arbiter */
+#define PCI_CONFIG_R1H         (1 << 1)  /* REQ1# to hi-prio arbiter */
+#define PCI_CONFIG_CH          (1 << 0)  /* PCI ctl to hi-prio arbiter */
+#define PCI_B2BMASK_B2BMASK(x) (((x) & 0xffff) << 16)
+#define PCI_B2BMASK_CCH(x)     ((x) & 0xffff) /* 16 upper bits of class code */
+#define PCI_B2BBASE0_VID_B0(x) (((x) & 0xffff) << 16)
+#define PCI_B2BBASE0_VID_SV(x) ((x) & 0xffff)
+#define PCI_B2BBASE1_SID_B1(x) (((x) & 0xffff) << 16)
+#define PCI_B2BBASE1_SID_SI(x) ((x) & 0xffff)
+#define PCI_MWMASKDEV_MWMASK(x) (((x) & 0xffff) << 16)
+#define PCI_MWMASKDEV_DEVID(x) ((x) & 0xffff)
+#define PCI_MWBASEREVCCL_BASE(x) (((x) & 0xffff) << 16)
+#define PCI_MWBASEREVCCL_REV(x)  (((x) & 0xff) << 8)
+#define PCI_MWBASEREVCCL_CCL(x)  ((x) & 0xff)
+#define PCI_ID_DID(x)          (((x) & 0xffff) << 16)
+#define PCI_ID_VID(x)          ((x) & 0xffff)
+#define PCI_STATCMD_STATUS(x)  (((x) & 0xffff) << 16)
+#define PCI_STATCMD_CMD(x)     ((x) & 0xffff)
+#define PCI_CLASSREV_CLASS(x)  (((x) & 0x00ffffff) << 8)
+#define PCI_CLASSREV_REV(x)    ((x) & 0xff)
+#define PCI_PARAM_BIST(x)      (((x) & 0xff) << 24)
+#define PCI_PARAM_HT(x)                (((x) & 0xff) << 16)
+#define PCI_PARAM_LT(x)                (((x) & 0xff) << 8)
+#define PCI_PARAM_CLS(x)       ((x) & 0xff)
+#define PCI_TIMEOUT_RETRIES(x) (((x) & 0xff) << 8)     /* max retries */
+#define PCI_TIMEOUT_TO(x)      ((x) & 0xff)    /* target ready timeout */
+
 #endif
diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx.h b/arch/mips/include/asm/mach-au1x00/au1xxx.h
deleted file mode 100644 (file)
index 1b36550..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _AU1XXX_H_
-#define _AU1XXX_H_
-
-#include <asm/mach-au1x00/au1000.h>
-
-#if defined(CONFIG_MIPS_DB1000) || defined(CONFIG_MIPS_DB1100) || \
-    defined(CONFIG_MIPS_DB1500) || defined(CONFIG_MIPS_DB1550)
-#include <asm/mach-db1x00/db1x00.h>
-
-#elif defined(CONFIG_MIPS_PB1550)
-#include <asm/mach-pb1x00/pb1550.h>
-
-#elif defined(CONFIG_MIPS_PB1200)
-#include <asm/mach-pb1x00/pb1200.h>
-
-#elif defined(CONFIG_MIPS_DB1200)
-#include <asm/mach-db1x00/db1200.h>
-
-#endif
-
-#endif /* _AU1XXX_H_ */
index 2fdacfe85e2357d1c53e295355ce5804c21efbb0..323ce2d145f29e064c3daa87e936214778421c25 100644 (file)
@@ -126,66 +126,62 @@ typedef volatile struct au1xxx_ddma_desc {
 #define SW_STATUS_INUSE        (1 << 0)
 
 /* Command 0 device IDs. */
-#ifdef CONFIG_SOC_AU1550
-#define DSCR_CMD0_UART0_TX     0
-#define DSCR_CMD0_UART0_RX     1
-#define DSCR_CMD0_UART3_TX     2
-#define DSCR_CMD0_UART3_RX     3
-#define DSCR_CMD0_DMA_REQ0     4
-#define DSCR_CMD0_DMA_REQ1     5
-#define DSCR_CMD0_DMA_REQ2     6
-#define DSCR_CMD0_DMA_REQ3     7
-#define DSCR_CMD0_USBDEV_RX0   8
-#define DSCR_CMD0_USBDEV_TX0   9
-#define DSCR_CMD0_USBDEV_TX1   10
-#define DSCR_CMD0_USBDEV_TX2   11
-#define DSCR_CMD0_USBDEV_RX3   12
-#define DSCR_CMD0_USBDEV_RX4   13
-#define DSCR_CMD0_PSC0_TX      14
-#define DSCR_CMD0_PSC0_RX      15
-#define DSCR_CMD0_PSC1_TX      16
-#define DSCR_CMD0_PSC1_RX      17
-#define DSCR_CMD0_PSC2_TX      18
-#define DSCR_CMD0_PSC2_RX      19
-#define DSCR_CMD0_PSC3_TX      20
-#define DSCR_CMD0_PSC3_RX      21
-#define DSCR_CMD0_PCI_WRITE    22
-#define DSCR_CMD0_NAND_FLASH   23
-#define DSCR_CMD0_MAC0_RX      24
-#define DSCR_CMD0_MAC0_TX      25
-#define DSCR_CMD0_MAC1_RX      26
-#define DSCR_CMD0_MAC1_TX      27
-#endif /* CONFIG_SOC_AU1550 */
-
-#ifdef CONFIG_SOC_AU1200
-#define DSCR_CMD0_UART0_TX     0
-#define DSCR_CMD0_UART0_RX     1
-#define DSCR_CMD0_UART1_TX     2
-#define DSCR_CMD0_UART1_RX     3
-#define DSCR_CMD0_DMA_REQ0     4
-#define DSCR_CMD0_DMA_REQ1     5
-#define DSCR_CMD0_MAE_BE       6
-#define DSCR_CMD0_MAE_FE       7
-#define DSCR_CMD0_SDMS_TX0     8
-#define DSCR_CMD0_SDMS_RX0     9
-#define DSCR_CMD0_SDMS_TX1     10
-#define DSCR_CMD0_SDMS_RX1     11
-#define DSCR_CMD0_AES_TX       13
-#define DSCR_CMD0_AES_RX       12
-#define DSCR_CMD0_PSC0_TX      14
-#define DSCR_CMD0_PSC0_RX      15
-#define DSCR_CMD0_PSC1_TX      16
-#define DSCR_CMD0_PSC1_RX      17
-#define DSCR_CMD0_CIM_RXA      18
-#define DSCR_CMD0_CIM_RXB      19
-#define DSCR_CMD0_CIM_RXC      20
-#define DSCR_CMD0_MAE_BOTH     21
-#define DSCR_CMD0_LCD          22
-#define DSCR_CMD0_NAND_FLASH   23
-#define DSCR_CMD0_PSC0_SYNC    24
-#define DSCR_CMD0_PSC1_SYNC    25
-#define DSCR_CMD0_CIM_SYNC     26
-#endif /* CONFIG_SOC_AU1200 */
+#define AU1550_DSCR_CMD0_UART0_TX      0
+#define AU1550_DSCR_CMD0_UART0_RX      1
+#define AU1550_DSCR_CMD0_UART3_TX      2
+#define AU1550_DSCR_CMD0_UART3_RX      3
+#define AU1550_DSCR_CMD0_DMA_REQ0      4
+#define AU1550_DSCR_CMD0_DMA_REQ1      5
+#define AU1550_DSCR_CMD0_DMA_REQ2      6
+#define AU1550_DSCR_CMD0_DMA_REQ3      7
+#define AU1550_DSCR_CMD0_USBDEV_RX0    8
+#define AU1550_DSCR_CMD0_USBDEV_TX0    9
+#define AU1550_DSCR_CMD0_USBDEV_TX1    10
+#define AU1550_DSCR_CMD0_USBDEV_TX2    11
+#define AU1550_DSCR_CMD0_USBDEV_RX3    12
+#define AU1550_DSCR_CMD0_USBDEV_RX4    13
+#define AU1550_DSCR_CMD0_PSC0_TX       14
+#define AU1550_DSCR_CMD0_PSC0_RX       15
+#define AU1550_DSCR_CMD0_PSC1_TX       16
+#define AU1550_DSCR_CMD0_PSC1_RX       17
+#define AU1550_DSCR_CMD0_PSC2_TX       18
+#define AU1550_DSCR_CMD0_PSC2_RX       19
+#define AU1550_DSCR_CMD0_PSC3_TX       20
+#define AU1550_DSCR_CMD0_PSC3_RX       21
+#define AU1550_DSCR_CMD0_PCI_WRITE     22
+#define AU1550_DSCR_CMD0_NAND_FLASH    23
+#define AU1550_DSCR_CMD0_MAC0_RX       24
+#define AU1550_DSCR_CMD0_MAC0_TX       25
+#define AU1550_DSCR_CMD0_MAC1_RX       26
+#define AU1550_DSCR_CMD0_MAC1_TX       27
+
+#define AU1200_DSCR_CMD0_UART0_TX      0
+#define AU1200_DSCR_CMD0_UART0_RX      1
+#define AU1200_DSCR_CMD0_UART1_TX      2
+#define AU1200_DSCR_CMD0_UART1_RX      3
+#define AU1200_DSCR_CMD0_DMA_REQ0      4
+#define AU1200_DSCR_CMD0_DMA_REQ1      5
+#define AU1200_DSCR_CMD0_MAE_BE                6
+#define AU1200_DSCR_CMD0_MAE_FE                7
+#define AU1200_DSCR_CMD0_SDMS_TX0      8
+#define AU1200_DSCR_CMD0_SDMS_RX0      9
+#define AU1200_DSCR_CMD0_SDMS_TX1      10
+#define AU1200_DSCR_CMD0_SDMS_RX1      11
+#define AU1200_DSCR_CMD0_AES_TX                13
+#define AU1200_DSCR_CMD0_AES_RX                12
+#define AU1200_DSCR_CMD0_PSC0_TX       14
+#define AU1200_DSCR_CMD0_PSC0_RX       15
+#define AU1200_DSCR_CMD0_PSC1_TX       16
+#define AU1200_DSCR_CMD0_PSC1_RX       17
+#define AU1200_DSCR_CMD0_CIM_RXA       18
+#define AU1200_DSCR_CMD0_CIM_RXB       19
+#define AU1200_DSCR_CMD0_CIM_RXC       20
+#define AU1200_DSCR_CMD0_MAE_BOTH      21
+#define AU1200_DSCR_CMD0_LCD           22
+#define AU1200_DSCR_CMD0_NAND_FLASH    23
+#define AU1200_DSCR_CMD0_PSC0_SYNC     24
+#define AU1200_DSCR_CMD0_PSC1_SYNC     25
+#define AU1200_DSCR_CMD0_CIM_SYNC      26
 
 #define DSCR_CMD0_THROTTLE     30
 #define DSCR_CMD0_ALWAYS       31
index 5656c72de6d39ef481c22e9fa6ba1b8ad0802db2..e306384b1414f2cd35301e81f9705e3d052d0f6c 100644 (file)
@@ -58,6 +58,7 @@ typedef struct {
 #endif
        int                     irq;
        u32                     regbase;
+       int                     ddma_id;
 } _auide_hwif;
 
 /******************************************************************************/
index 5a5cb7386427d0710c26fb2bd83a6ec32aa5341d..4e3f3bc26c6071af89a74f4dd75c77ea05969ca4 100644 (file)
 #ifndef _AU1000_PSC_H_
 #define _AU1000_PSC_H_
 
-/* The PSC base addresses.  */
-#ifdef CONFIG_SOC_AU1550
-#define PSC0_BASE_ADDR         0xb1a00000
-#define PSC1_BASE_ADDR         0xb1b00000
-#define PSC2_BASE_ADDR         0xb0a00000
-#define PSC3_BASE_ADDR         0xb0b00000
-#endif
-
-#ifdef CONFIG_SOC_AU1200
-#define PSC0_BASE_ADDR         0xb1a00000
-#define PSC1_BASE_ADDR         0xb1b00000
-#endif
-
 /*
  * The PSC select and control registers are common to all protocols.
  */
 #define PSC_AC97GPO_OFFSET     0x00000028
 #define PSC_AC97GPI_OFFSET     0x0000002c
 
-#define AC97_PSC_SEL           (AC97_PSC_BASE + PSC_SEL_OFFSET)
-#define AC97_PSC_CTRL          (AC97_PSC_BASE + PSC_CTRL_OFFSET)
-#define PSC_AC97CFG            (AC97_PSC_BASE + PSC_AC97CFG_OFFSET)
-#define PSC_AC97MSK            (AC97_PSC_BASE + PSC_AC97MSK_OFFSET)
-#define PSC_AC97PCR            (AC97_PSC_BASE + PSC_AC97PCR_OFFSET)
-#define PSC_AC97STAT           (AC97_PSC_BASE + PSC_AC97STAT_OFFSET)
-#define PSC_AC97EVNT           (AC97_PSC_BASE + PSC_AC97EVNT_OFFSET)
-#define PSC_AC97TXRX           (AC97_PSC_BASE + PSC_AC97TXRX_OFFSET)
-#define PSC_AC97CDC            (AC97_PSC_BASE + PSC_AC97CDC_OFFSET)
-#define PSC_AC97RST            (AC97_PSC_BASE + PSC_AC97RST_OFFSET)
-#define PSC_AC97GPO            (AC97_PSC_BASE + PSC_AC97GPO_OFFSET)
-#define PSC_AC97GPI            (AC97_PSC_BASE + PSC_AC97GPI_OFFSET)
-
 /* AC97 Config Register. */
 #define PSC_AC97CFG_RT_MASK    (3 << 30)
 #define PSC_AC97CFG_RT_FIFO1   (0 << 30)
index 1f41a522906d072b24ee8e78aeb5dfdd3af668a8..73853b5a2a31d1ace4cf4cf4e7bc150825d9273e 100644 (file)
@@ -347,17 +347,6 @@ static inline int alchemy_gpio2_to_irq(int gpio)
 
 /**********************************************************************/
 
-/* On Au1000, Au1500 and Au1100 GPIOs won't work as inputs before
- * SYS_PININPUTEN is written to at least once.  On Au1550/Au1200 this
- * register enables use of GPIOs as wake source.
- */
-static inline void alchemy_gpio1_input_enable(void)
-{
-       void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
-       __raw_writel(0, base + SYS_PININPUTEN); /* the write op is key */
-       wmb();
-}
-
 /* GPIO2 shared interrupts and control */
 
 static inline void __alchemy_gpio2_mod_int(int gpio2, int en)
@@ -561,6 +550,7 @@ static inline int alchemy_irq_to_gpio(int irq)
 
 #ifndef CONFIG_GPIOLIB
 
+#ifdef CONFIG_ALCHEMY_GPIOINT_AU1000
 
 #ifndef CONFIG_ALCHEMY_GPIO_INDIRECT   /* case (4) */
 
@@ -665,24 +655,7 @@ static inline void gpio_unexport(unsigned gpio)
 
 #endif /* !CONFIG_ALCHEMY_GPIO_INDIRECT */
 
-
-#else  /* CONFIG GPIOLIB */
-
-
- /* using gpiolib to provide up to 2 gpio_chips for on-chip gpios */
-#ifndef CONFIG_ALCHEMY_GPIO_INDIRECT   /* case (2) */
-
-/* get everything through gpiolib */
-#define gpio_to_irq    __gpio_to_irq
-#define gpio_get_value __gpio_get_value
-#define gpio_set_value __gpio_set_value
-#define gpio_cansleep  __gpio_cansleep
-#define irq_to_gpio    alchemy_irq_to_gpio
-
-#include <asm-generic/gpio.h>
-
-#endif /* !CONFIG_ALCHEMY_GPIO_INDIRECT */
-
+#endif /* CONFIG_ALCHEMY_GPIOINT_AU1000 */
 
 #endif /* !CONFIG_GPIOLIB */
 
index c3f60cdc320371e49d58461f6a20ce157ddfadf1..fcdc8c4809db523617cefd3ef46c93cf26ee5e4c 100644 (file)
@@ -1,10 +1,83 @@
+/*
+ * Alchemy GPIO support.
+ *
+ * With CONFIG_GPIOLIB=y different types of on-chip GPIO can be supported within
+ *  the same kernel image.
+ * With CONFIG_GPIOLIB=n, your board must select ALCHEMY_GPIOINT_AU1XXX for the
+ *  appropriate CPU type (AU1000 currently).
+ */
+
 #ifndef _ALCHEMY_GPIO_H_
 #define _ALCHEMY_GPIO_H_
 
-#if defined(CONFIG_ALCHEMY_GPIOINT_AU1000)
-
+#include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/gpio-au1000.h>
 
-#endif
+/* On Au1000, Au1500 and Au1100 GPIOs won't work as inputs before
+ * SYS_PININPUTEN is written to at least once.  On Au1550/Au1200/Au1300 this
+ * register enables use of GPIOs as wake source.
+ */
+static inline void alchemy_gpio1_input_enable(void)
+{
+       void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
+       __raw_writel(0, base + 0x110);          /* the write op is key */
+       wmb();
+}
+
+
+/* Linux gpio framework integration.
+*
+* 4 use cases of Alchemy GPIOS:
+*(1) GPIOLIB=y, ALCHEMY_GPIO_INDIRECT=y:
+*      Board must register gpiochips.
+*(2) GPIOLIB=y, ALCHEMY_GPIO_INDIRECT=n:
+*      A gpiochip for the 75 GPIOs is registered.
+*
+*(3) GPIOLIB=n, ALCHEMY_GPIO_INDIRECT=y:
+*      the boards' gpio.h must provide the linux gpio wrapper functions,
+*
+*(4) GPIOLIB=n, ALCHEMY_GPIO_INDIRECT=n:
+*      inlinable gpio functions are provided which enable access to the
+*      Au1300 gpios only by using the numbers straight out of the data-
+*      sheets.
+
+* Cases 1 and 3 are intended for boards which want to provide their own
+* GPIO namespace and -operations (i.e. for example you have 8 GPIOs
+* which are in part provided by spare Au1300 GPIO pins and in part by
+* an external FPGA but you still want them to be accssible in linux
+* as gpio0-7. The board can of course use the alchemy_gpioX_* functions
+* as required).
+*/
+
+#ifdef CONFIG_GPIOLIB
+
+/* wraps the cpu-dependent irq_to_gpio functions */
+/* FIXME: gpiolib needs an irq_to_gpio hook */
+static inline int __au_irq_to_gpio(unsigned int irq)
+{
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1000...ALCHEMY_CPU_AU1200:
+               return alchemy_irq_to_gpio(irq);
+       }
+       return -EINVAL;
+}
+
+
+/* using gpiolib to provide up to 2 gpio_chips for on-chip gpios */
+#ifndef CONFIG_ALCHEMY_GPIO_INDIRECT   /* case (2) */
+
+/* get everything through gpiolib */
+#define gpio_to_irq    __gpio_to_irq
+#define gpio_get_value __gpio_get_value
+#define gpio_set_value __gpio_set_value
+#define gpio_cansleep  __gpio_cansleep
+#define irq_to_gpio    __au_irq_to_gpio
+
+#include <asm-generic/gpio.h>
+
+#endif /* !CONFIG_ALCHEMY_GPIO_INDIRECT */
+
+
+#endif /* CONFIG_GPIOLIB */
 
 #endif /* _ALCHEMY_GPIO_H_ */
index 3404248f50946e377e37964b6042765432b34896..7a39657108c4a5d7f5ee2d8dba99d6a1b9713f1c 100644 (file)
@@ -46,8 +46,6 @@
 
 #define IDE_PHYS_ADDR          0x18800000
 #define IDE_REG_SHIFT          5
-#define IDE_DDMA_REQ           DSCR_CMD0_DMA_REQ1
-#define IDE_RQSIZE             128
 
 #define DB1200_IDE_PHYS_ADDR   IDE_PHYS_ADDR
 #define DB1200_IDE_PHYS_LEN    (16 << IDE_REG_SHIFT)
index a919dac525a177410f4c67bdad9e18be044b2f24..a5affb0568ef5b9799dc233794db8820966b3f5a 100644 (file)
 
 #ifdef CONFIG_MIPS_DB1550
 
-#define DBDMA_AC97_TX_CHAN     DSCR_CMD0_PSC1_TX
-#define DBDMA_AC97_RX_CHAN     DSCR_CMD0_PSC1_RX
-#define DBDMA_I2S_TX_CHAN      DSCR_CMD0_PSC3_TX
-#define DBDMA_I2S_RX_CHAN      DSCR_CMD0_PSC3_RX
+#define DBDMA_AC97_TX_CHAN     AU1550_DSCR_CMD0_PSC1_TX
+#define DBDMA_AC97_RX_CHAN     AU1550_DSCR_CMD0_PSC1_RX
+#define DBDMA_I2S_TX_CHAN      AU1550_DSCR_CMD0_PSC3_TX
+#define DBDMA_I2S_RX_CHAN      AU1550_DSCR_CMD0_PSC3_RX
 
-#define SPI_PSC_BASE           PSC0_BASE_ADDR
-#define AC97_PSC_BASE          PSC1_BASE_ADDR
-#define SMBUS_PSC_BASE         PSC2_BASE_ADDR
-#define I2S_PSC_BASE           PSC3_BASE_ADDR
+#define SPI_PSC_BASE           AU1550_PSC0_PHYS_ADDR
+#define AC97_PSC_BASE          AU1550_PSC1_PHYS_ADDR
+#define SMBUS_PSC_BASE         AU1550_PSC2_PHYS_ADDR
+#define I2S_PSC_BASE           AU1550_PSC3_PHYS_ADDR
 
 #define NAND_PHYS_ADDR         0x20000000
 
index fce4332ebb7fc2ff1ea3380b059774c941bd5b72..374416adb65bde0519d15553fe5a02f1da54703c 100644 (file)
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_psc.h>
 
-#define DBDMA_AC97_TX_CHAN     DSCR_CMD0_PSC1_TX
-#define DBDMA_AC97_RX_CHAN     DSCR_CMD0_PSC1_RX
-#define DBDMA_I2S_TX_CHAN      DSCR_CMD0_PSC1_TX
-#define DBDMA_I2S_RX_CHAN      DSCR_CMD0_PSC1_RX
+#define DBDMA_AC97_TX_CHAN     AU1200_DSCR_CMD0_PSC1_TX
+#define DBDMA_AC97_RX_CHAN     AU1200_DSCR_CMD0_PSC1_RX
+#define DBDMA_I2S_TX_CHAN      AU1200_DSCR_CMD0_PSC1_TX
+#define DBDMA_I2S_RX_CHAN      AU1200_DSCR_CMD0_PSC1_RX
 
 /*
  * SPI and SMB are muxed on the Pb1200 board.
  * Refer to board documentation.
  */
-#define SPI_PSC_BASE           PSC0_BASE_ADDR
-#define SMBUS_PSC_BASE         PSC0_BASE_ADDR
+#define SPI_PSC_BASE           AU1550_PSC0_PHYS_ADDR
+#define SMBUS_PSC_BASE         AU1550_PSC0_PHYS_ADDR
 /*
  * AC97 and I2S are muxed on the Pb1200 board.
  * Refer to board documentation.
  */
-#define AC97_PSC_BASE       PSC1_BASE_ADDR
-#define I2S_PSC_BASE   PSC1_BASE_ADDR
+#define AC97_PSC_BASE       AU1550_PSC1_PHYS_ADDR
+#define I2S_PSC_BASE   AU1550_PSC1_PHYS_ADDR
 
 
 #define BCSR_SYSTEM_VDDI       0x001F
@@ -76,8 +76,6 @@
 #define IDE_REG_SHIFT          5
 #define IDE_PHYS_LEN           (16 << IDE_REG_SHIFT)
 #define IDE_INT                PB1200_IDE_INT
-#define IDE_DDMA_REQ           DSCR_CMD0_DMA_REQ1
-#define IDE_RQSIZE             128
 
 #define NAND_PHYS_ADDR         0x1C000000
 
index f835c88e95930616e7b02fb9e979ad944306106a..443b88adebf1649d75d8ba8c8bee7baec5e69e1a 100644 (file)
 #include <linux/types.h>
 #include <asm/mach-au1x00/au1xxx_psc.h>
 
-#define DBDMA_AC97_TX_CHAN     DSCR_CMD0_PSC1_TX
-#define DBDMA_AC97_RX_CHAN     DSCR_CMD0_PSC1_RX
-#define DBDMA_I2S_TX_CHAN      DSCR_CMD0_PSC3_TX
-#define DBDMA_I2S_RX_CHAN      DSCR_CMD0_PSC3_RX
+#define DBDMA_AC97_TX_CHAN     AU1550_DSCR_CMD0_PSC1_TX
+#define DBDMA_AC97_RX_CHAN     AU1550_DSCR_CMD0_PSC1_RX
+#define DBDMA_I2S_TX_CHAN      AU1550_DSCR_CMD0_PSC3_TX
+#define DBDMA_I2S_RX_CHAN      AU1550_DSCR_CMD0_PSC3_RX
 
-#define SPI_PSC_BASE           PSC0_BASE_ADDR
-#define AC97_PSC_BASE          PSC1_BASE_ADDR
-#define SMBUS_PSC_BASE         PSC2_BASE_ADDR
-#define I2S_PSC_BASE           PSC3_BASE_ADDR
+#define SPI_PSC_BASE           AU1550_PSC0_PHYS_ADDR
+#define AC97_PSC_BASE          AU1550_PSC1_PHYS_ADDR
+#define SMBUS_PSC_BASE         AU1550_PSC2_PHYS_ADDR
+#define I2S_PSC_BASE           AU1550_PSC3_PHYS_ADDR
 
 /*
  * Timing values as described in databook, * ns value stripped of
index 146d41b67adc301462407abbc80dce0c11475d99..e93943fabeac7ba4877b2f79a8c7ab262f674dc0 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_MIPS_PROM_H
-#define __ASM_MIPS_PROM_H
+#ifndef __ASM_MIPSPROM_H
+#define __ASM_MIPSPROM_H
 
 #define PROM_RESET             0
 #define PROM_EXEC              1
@@ -73,4 +73,4 @@
 
 extern char *prom_getenv(char *);
 
-#endif /* __ASM_MIPS_PROM_H */
+#endif /* __ASM_MIPSPROM_H */
index 6a6f8a8f542dfb2516f4f5e3de8f13d9d810327f..2ea7b817feb87988c09711d63f5c02f570ea0ee1 100644 (file)
@@ -1006,18 +1006,26 @@ do {                                                                    \
 #define write_c0_perfctrl0(val)        __write_32bit_c0_register($25, 0, val)
 #define read_c0_perfcntr0()    __read_32bit_c0_register($25, 1)
 #define write_c0_perfcntr0(val)        __write_32bit_c0_register($25, 1, val)
+#define read_c0_perfcntr0_64() __read_64bit_c0_register($25, 1)
+#define write_c0_perfcntr0_64(val) __write_64bit_c0_register($25, 1, val)
 #define read_c0_perfctrl1()    __read_32bit_c0_register($25, 2)
 #define write_c0_perfctrl1(val)        __write_32bit_c0_register($25, 2, val)
 #define read_c0_perfcntr1()    __read_32bit_c0_register($25, 3)
 #define write_c0_perfcntr1(val)        __write_32bit_c0_register($25, 3, val)
+#define read_c0_perfcntr1_64() __read_64bit_c0_register($25, 3)
+#define write_c0_perfcntr1_64(val) __write_64bit_c0_register($25, 3, val)
 #define read_c0_perfctrl2()    __read_32bit_c0_register($25, 4)
 #define write_c0_perfctrl2(val)        __write_32bit_c0_register($25, 4, val)
 #define read_c0_perfcntr2()    __read_32bit_c0_register($25, 5)
 #define write_c0_perfcntr2(val)        __write_32bit_c0_register($25, 5, val)
+#define read_c0_perfcntr2_64() __read_64bit_c0_register($25, 5)
+#define write_c0_perfcntr2_64(val) __write_64bit_c0_register($25, 5, val)
 #define read_c0_perfctrl3()    __read_32bit_c0_register($25, 6)
 #define write_c0_perfctrl3(val)        __write_32bit_c0_register($25, 6, val)
 #define read_c0_perfcntr3()    __read_32bit_c0_register($25, 7)
 #define write_c0_perfcntr3(val)        __write_32bit_c0_register($25, 7, val)
+#define read_c0_perfcntr3_64() __read_64bit_c0_register($25, 7)
+#define write_c0_perfcntr3_64(val) __write_64bit_c0_register($25, 7, val)
 
 /* RM9000 PerfCount performance counter register */
 #define read_c0_perfcount()    __read_64bit_c0_register($25, 0)
index 857d9b7858ad6265a6406d355c71eb68e7a45a3e..7a6e82ef449b3bf4e8747974dd3ed81c2a231401 100644 (file)
@@ -8,8 +8,8 @@
  * published by the Free Software Foundation.
  *
  */
-#ifndef __ASM_MIPS_PROM_H
-#define __ASM_MIPS_PROM_H
+#ifndef __ASM_PROM_H
+#define __ASM_PROM_H
 
 #ifdef CONFIG_OF
 #include <asm/bootinfo.h>
@@ -25,4 +25,4 @@ extern void device_tree_init(void);
 static inline void device_tree_init(void) { }
 #endif /* CONFIG_OF */
 
-#endif /* _ASM_MIPS_PROM_H */
+#endif /* __ASM_PROM_H */
index 7c8ecb6b9c40ebadfeaa72e2db5f332baf27e2b3..785a5189b374df50babf8b719a72e29c2c633823 100644 (file)
@@ -6,6 +6,8 @@
  * Copyright (C) 1985 MIPS Computer Systems, Inc.
  * Copyright (C) 1994, 95, 99, 2003 by Ralf Baechle
  * Copyright (C) 1990 - 1992, 1999 Silicon Graphics, Inc.
+ * Copyright (C) 2011 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
  */
 #ifndef _ASM_REGDEF_H
 #define _ASM_REGDEF_H
 #define t2      $10
 #define t3      $11
 #define t4      $12
+#define ta0    $12
 #define t5      $13
+#define ta1    $13
 #define t6      $14
+#define ta2    $14
 #define t7      $15
+#define ta3    $15
 #define s0      $16     /* callee saved */
 #define s1      $17
 #define s2      $18
index 4397972949fa315e6796b828d65778394fd21451..e1ddb95c05e3459569105a34933a50906be65097 100644 (file)
@@ -17,8 +17,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include <linux/spinlock.h>
-#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
@@ -30,6 +28,8 @@
 
 #include <asm/mach-jz4740/base.h>
 
+#include "irq.h"
+
 #define JZ4740_GPIO_BASE_A (32*0)
 #define JZ4740_GPIO_BASE_B (32*1)
 #define JZ4740_GPIO_BASE_C (32*2)
 struct jz_gpio_chip {
        unsigned int irq;
        unsigned int irq_base;
-       uint32_t wakeup;
-       uint32_t suspend_mask;
        uint32_t edge_trigger_both;
 
        void __iomem *base;
 
-       spinlock_t lock;
-
        struct gpio_chip gpio_chip;
 };
 
@@ -102,7 +98,8 @@ static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *g
 
 static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(struct irq_data *data)
 {
-       return irq_data_get_irq_chip_data(data);
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+       return gc->private;
 }
 
 static inline void jz_gpio_write_bit(unsigned int gpio, unsigned int reg)
@@ -304,21 +301,15 @@ static void jz_gpio_irq_demux_handler(unsigned int irq, struct irq_desc *desc)
 {
        uint32_t flag;
        unsigned int gpio_irq;
-       unsigned int gpio_bank;
        struct jz_gpio_chip *chip = irq_desc_get_handler_data(desc);
 
-       gpio_bank = JZ4740_IRQ_GPIO0 - irq;
-
        flag = readl(chip->base + JZ_REG_GPIO_FLAG);
-
        if (!flag)
                return;
 
-       gpio_irq = __fls(flag);
+       gpio_irq = chip->irq_base + __fls(flag);
 
-       jz_gpio_check_trigger_both(chip, irq);
-
-       gpio_irq += (gpio_bank << 5) + JZ4740_IRQ_GPIO(0);
+       jz_gpio_check_trigger_both(chip, gpio_irq);
 
        generic_handle_irq(gpio_irq);
 };
@@ -329,18 +320,12 @@ static inline void jz_gpio_set_irq_bit(struct irq_data *data, unsigned int reg)
        writel(IRQ_TO_BIT(data->irq), chip->base + reg);
 }
 
-static void jz_gpio_irq_mask(struct irq_data *data)
-{
-       jz_gpio_set_irq_bit(data, JZ_REG_GPIO_MASK_SET);
-};
-
 static void jz_gpio_irq_unmask(struct irq_data *data)
 {
        struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
 
        jz_gpio_check_trigger_both(chip, data->irq);
-
-       jz_gpio_set_irq_bit(data, JZ_REG_GPIO_MASK_CLEAR);
+       irq_gc_unmask_enable_reg(data);
 };
 
 /* TODO: Check if function is gpio */
@@ -353,18 +338,13 @@ static unsigned int jz_gpio_irq_startup(struct irq_data *data)
 
 static void jz_gpio_irq_shutdown(struct irq_data *data)
 {
-       jz_gpio_irq_mask(data);
+       irq_gc_mask_disable_reg(data);
 
        /* Set direction to input */
        jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
        jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_CLEAR);
 }
 
-static void jz_gpio_irq_ack(struct irq_data *data)
-{
-       jz_gpio_set_irq_bit(data, JZ_REG_GPIO_FLAG_CLEAR);
-};
-
 static int jz_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
 {
        struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
@@ -408,35 +388,13 @@ static int jz_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
 static int jz_gpio_irq_set_wake(struct irq_data *data, unsigned int on)
 {
        struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
-       spin_lock(&chip->lock);
-       if (on)
-               chip->wakeup |= IRQ_TO_BIT(data->irq);
-       else
-               chip->wakeup &= ~IRQ_TO_BIT(data->irq);
-       spin_unlock(&chip->lock);
 
+       irq_gc_set_wake(data, on);
        irq_set_irq_wake(chip->irq, on);
+
        return 0;
 }
 
-static struct irq_chip jz_gpio_irq_chip = {
-       .name = "GPIO",
-       .irq_mask = jz_gpio_irq_mask,
-       .irq_unmask = jz_gpio_irq_unmask,
-       .irq_ack = jz_gpio_irq_ack,
-       .irq_startup = jz_gpio_irq_startup,
-       .irq_shutdown = jz_gpio_irq_shutdown,
-       .irq_set_type = jz_gpio_irq_set_type,
-       .irq_set_wake = jz_gpio_irq_set_wake,
-       .flags = IRQCHIP_SET_TYPE_MASKED,
-};
-
-/*
- * This lock class tells lockdep that GPIO irqs are in a different
- * category than their parents, so it won't report false recursion.
- */
-static struct lock_class_key gpio_lock_class;
-
 #define JZ4740_GPIO_CHIP(_bank) { \
        .irq_base = JZ4740_IRQ_GPIO_BASE_ ## _bank, \
        .gpio_chip = { \
@@ -458,64 +416,44 @@ static struct jz_gpio_chip jz4740_gpio_chips[] = {
        JZ4740_GPIO_CHIP(D),
 };
 
-static void jz4740_gpio_suspend_chip(struct jz_gpio_chip *chip)
-{
-       chip->suspend_mask = readl(chip->base + JZ_REG_GPIO_MASK);
-       writel(~(chip->wakeup), chip->base + JZ_REG_GPIO_MASK_SET);
-       writel(chip->wakeup, chip->base + JZ_REG_GPIO_MASK_CLEAR);
-}
-
-static int jz4740_gpio_suspend(void)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); i++)
-               jz4740_gpio_suspend_chip(&jz4740_gpio_chips[i]);
-
-       return 0;
-}
-
-static void jz4740_gpio_resume_chip(struct jz_gpio_chip *chip)
+static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
 {
-       uint32_t mask = chip->suspend_mask;
+       struct irq_chip_generic *gc;
+       struct irq_chip_type *ct;
 
-       writel(~mask, chip->base + JZ_REG_GPIO_MASK_CLEAR);
-       writel(mask, chip->base + JZ_REG_GPIO_MASK_SET);
-}
+       chip->base = ioremap(JZ4740_GPIO_BASE_ADDR + (id * 0x100), 0x100);
 
-static void jz4740_gpio_resume(void)
-{
-       int i;
+       chip->irq = JZ4740_IRQ_INTC_GPIO(id);
+       irq_set_handler_data(chip->irq, chip);
+       irq_set_chained_handler(chip->irq, jz_gpio_irq_demux_handler);
 
-       for (i = ARRAY_SIZE(jz4740_gpio_chips) - 1; i >= 0 ; i--)
-               jz4740_gpio_resume_chip(&jz4740_gpio_chips[i]);
-}
+       gc = irq_alloc_generic_chip(chip->gpio_chip.label, 1, chip->irq_base,
+               chip->base, handle_level_irq);
 
-static struct syscore_ops jz4740_gpio_syscore_ops = {
-       .suspend = jz4740_gpio_suspend,
-       .resume = jz4740_gpio_resume,
-};
+       gc->wake_enabled = IRQ_MSK(chip->gpio_chip.ngpio);
+       gc->private = chip;
 
-static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
-{
-       int irq;
+       ct = gc->chip_types;
+       ct->regs.enable = JZ_REG_GPIO_MASK_CLEAR;
+       ct->regs.disable = JZ_REG_GPIO_MASK_SET;
+       ct->regs.ack = JZ_REG_GPIO_FLAG_CLEAR;
 
-       spin_lock_init(&chip->lock);
+       ct->chip.name = "GPIO";
+       ct->chip.irq_mask = irq_gc_mask_disable_reg;
+       ct->chip.irq_unmask = jz_gpio_irq_unmask;
+       ct->chip.irq_ack = irq_gc_ack_set_bit;
+       ct->chip.irq_suspend = jz4740_irq_suspend;
+       ct->chip.irq_resume = jz4740_irq_resume;
+       ct->chip.irq_startup = jz_gpio_irq_startup;
+       ct->chip.irq_shutdown = jz_gpio_irq_shutdown;
+       ct->chip.irq_set_type = jz_gpio_irq_set_type;
+       ct->chip.irq_set_wake = jz_gpio_irq_set_wake;
+       ct->chip.flags = IRQCHIP_SET_TYPE_MASKED;
 
-       chip->base = ioremap(JZ4740_GPIO_BASE_ADDR + (id * 0x100), 0x100);
+       irq_setup_generic_chip(gc, IRQ_MSK(chip->gpio_chip.ngpio),
+               IRQ_GC_INIT_NESTED_LOCK, 0, IRQ_NOPROBE | IRQ_LEVEL);
 
        gpiochip_add(&chip->gpio_chip);
-
-       chip->irq = JZ4740_IRQ_INTC_GPIO(id);
-       irq_set_handler_data(chip->irq, chip);
-       irq_set_chained_handler(chip->irq, jz_gpio_irq_demux_handler);
-
-       for (irq = chip->irq_base; irq < chip->irq_base + chip->gpio_chip.ngpio; ++irq) {
-               irq_set_lockdep_class(irq, &gpio_lock_class);
-               irq_set_chip_data(irq, chip);
-               irq_set_chip_and_handler(irq, &jz_gpio_irq_chip,
-                                        handle_level_irq);
-       }
 }
 
 static int __init jz4740_gpio_init(void)
@@ -525,8 +463,6 @@ static int __init jz4740_gpio_init(void)
        for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i)
                jz4740_gpio_chip_init(&jz4740_gpio_chips[i], i);
 
-       register_syscore_ops(&jz4740_gpio_syscore_ops);
-
        printk(KERN_INFO "JZ4740 GPIO initialized\n");
 
        return 0;
index d82c0c430e03b27370fc12718c0077bd71f3c96a..fc57ded326d869d591f202cd90df59d8ef903869 100644 (file)
@@ -32,8 +32,6 @@
 #include <asm/mach-jz4740/base.h>
 
 static void __iomem *jz_intc_base;
-static uint32_t jz_intc_wakeup;
-static uint32_t jz_intc_saved;
 
 #define JZ_REG_INTC_STATUS     0x00
 #define JZ_REG_INTC_MASK       0x04
@@ -41,51 +39,36 @@ static uint32_t jz_intc_saved;
 #define JZ_REG_INTC_CLEAR_MASK 0x0c
 #define JZ_REG_INTC_PENDING    0x10
 
-#define IRQ_BIT(x) BIT((x) - JZ4740_IRQ_BASE)
-
-static inline unsigned long intc_irq_bit(struct irq_data *data)
+static irqreturn_t jz4740_cascade(int irq, void *data)
 {
-       return (unsigned long)irq_data_get_irq_chip_data(data);
-}
+       uint32_t irq_reg;
 
-static void intc_irq_unmask(struct irq_data *data)
-{
-       writel(intc_irq_bit(data), jz_intc_base + JZ_REG_INTC_CLEAR_MASK);
-}
+       irq_reg = readl(jz_intc_base + JZ_REG_INTC_PENDING);
 
-static void intc_irq_mask(struct irq_data *data)
-{
-       writel(intc_irq_bit(data), jz_intc_base + JZ_REG_INTC_SET_MASK);
+       if (irq_reg)
+               generic_handle_irq(__fls(irq_reg) + JZ4740_IRQ_BASE);
+
+       return IRQ_HANDLED;
 }
 
-static int intc_irq_set_wake(struct irq_data *data, unsigned int on)
+static void jz4740_irq_set_mask(struct irq_chip_generic *gc, uint32_t mask)
 {
-       if (on)
-               jz_intc_wakeup |= intc_irq_bit(data);
-       else
-               jz_intc_wakeup &= ~intc_irq_bit(data);
+       struct irq_chip_regs *regs = &gc->chip_types->regs;
 
-       return 0;
+       writel(mask, gc->reg_base + regs->enable);
+       writel(~mask, gc->reg_base + regs->disable);
 }
 
-static struct irq_chip intc_irq_type = {
-       .name =         "INTC",
-       .irq_mask =     intc_irq_mask,
-       .irq_mask_ack = intc_irq_mask,
-       .irq_unmask =   intc_irq_unmask,
-       .irq_set_wake = intc_irq_set_wake,
-};
-
-static irqreturn_t jz4740_cascade(int irq, void *data)
+void jz4740_irq_suspend(struct irq_data *data)
 {
-       uint32_t irq_reg;
-
-       irq_reg = readl(jz_intc_base + JZ_REG_INTC_PENDING);
-
-       if (irq_reg)
-               generic_handle_irq(__fls(irq_reg) + JZ4740_IRQ_BASE);
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+       jz4740_irq_set_mask(gc, gc->wake_active);
+}
 
-       return IRQ_HANDLED;
+void jz4740_irq_resume(struct irq_data *data)
+{
+       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+       jz4740_irq_set_mask(gc, gc->mask_cache);
 }
 
 static struct irqaction jz4740_cascade_action = {
@@ -95,7 +78,9 @@ static struct irqaction jz4740_cascade_action = {
 
 void __init arch_init_irq(void)
 {
-       int i;
+       struct irq_chip_generic *gc;
+       struct irq_chip_type *ct;
+
        mips_cpu_irq_init();
 
        jz_intc_base = ioremap(JZ4740_INTC_BASE_ADDR, 0x14);
@@ -103,10 +88,22 @@ void __init arch_init_irq(void)
        /* Mask all irqs */
        writel(0xffffffff, jz_intc_base + JZ_REG_INTC_SET_MASK);
 
-       for (i = JZ4740_IRQ_BASE; i < JZ4740_IRQ_BASE + 32; i++) {
-               irq_set_chip_data(i, (void *)IRQ_BIT(i));
-               irq_set_chip_and_handler(i, &intc_irq_type, handle_level_irq);
-       }
+       gc = irq_alloc_generic_chip("INTC", 1, JZ4740_IRQ_BASE, jz_intc_base,
+               handle_level_irq);
+
+       gc->wake_enabled = IRQ_MSK(32);
+
+       ct = gc->chip_types;
+       ct->regs.enable = JZ_REG_INTC_CLEAR_MASK;
+       ct->regs.disable = JZ_REG_INTC_SET_MASK;
+       ct->chip.irq_unmask = irq_gc_unmask_enable_reg;
+       ct->chip.irq_mask = irq_gc_mask_disable_reg;
+       ct->chip.irq_mask_ack = irq_gc_mask_disable_reg;
+       ct->chip.irq_set_wake = irq_gc_set_wake;
+       ct->chip.irq_suspend = jz4740_irq_suspend;
+       ct->chip.irq_resume = jz4740_irq_resume;
+
+       irq_setup_generic_chip(gc, IRQ_MSK(32), 0, 0, IRQ_NOPROBE | IRQ_LEVEL);
 
        setup_irq(2, &jz4740_cascade_action);
 }
@@ -122,19 +119,6 @@ asmlinkage void plat_irq_dispatch(void)
                spurious_interrupt();
 }
 
-void jz4740_intc_suspend(void)
-{
-       jz_intc_saved = readl(jz_intc_base + JZ_REG_INTC_MASK);
-       writel(~jz_intc_wakeup, jz_intc_base + JZ_REG_INTC_SET_MASK);
-       writel(jz_intc_wakeup, jz_intc_base + JZ_REG_INTC_CLEAR_MASK);
-}
-
-void jz4740_intc_resume(void)
-{
-       writel(~jz_intc_saved, jz_intc_base + JZ_REG_INTC_CLEAR_MASK);
-       writel(jz_intc_saved, jz_intc_base + JZ_REG_INTC_SET_MASK);
-}
-
 #ifdef CONFIG_DEBUG_FS
 
 static inline void intc_seq_reg(struct seq_file *s, const char *name,
index 56b5eadd1fa27d3e00e76370c34324d152dc26b4..f75e39d62885858d187da42e72e283e5700c6a55 100644 (file)
@@ -15,7 +15,9 @@
 #ifndef __MIPS_JZ4740_IRQ_H__
 #define __MIPS_JZ4740_IRQ_H__
 
-extern void jz4740_intc_suspend(void);
-extern void jz4740_intc_resume(void);
+#include <linux/irq.h>
+
+extern void jz4740_irq_suspend(struct irq_data *data);
+extern void jz4740_irq_resume(struct irq_data *data);
 
 #endif
index 902d5b50124ccbb7ac2f45ec9b092f343272fe4e..6744fa723f72d19bf024d1a0da16788192300ccf 100644 (file)
 #include <asm/mach-jz4740/clock.h>
 
 #include "clock.h"
-#include "irq.h"
 
 static int jz4740_pm_enter(suspend_state_t state)
 {
-       jz4740_intc_suspend();
        jz4740_clock_suspend();
 
        jz4740_clock_set_wait_mode(JZ4740_WAIT_MODE_SLEEP);
@@ -37,7 +35,6 @@ static int jz4740_pm_enter(suspend_state_t state)
        jz4740_clock_set_wait_mode(JZ4740_WAIT_MODE_IDLE);
 
        jz4740_clock_resume();
-       jz4740_intc_resume();
 
        return 0;
 }
index 83bba332bbfc17d78d372555ab2c251a9e6d3732..1a966183e3531c4c9f5dd735310364e82526cf6d 100644 (file)
@@ -11,6 +11,8 @@ obj-y         += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
+CFLAGS_REMOVE_perf_event_mipsxx.o = -pg
 endif
 
 obj-$(CONFIG_CEVT_BCM1480)     += cevt-bcm1480.o
@@ -106,7 +108,8 @@ obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT)       += 8250-platform.o
 
 obj-$(CONFIG_MIPS_CPUFREQ)     += cpufreq/
 
-obj-$(CONFIG_HW_PERF_EVENTS)   += perf_event.o
+obj-$(CONFIG_PERF_EVENTS)      += perf_event.o
+obj-$(CONFIG_HW_PERF_EVENTS)   += perf_event_mipsxx.o
 
 obj-$(CONFIG_JUMP_LABEL)       += jump_label.o
 
index ebc0cd20b35d612a1265a69cb1fb433d89bb41ae..aa327a755982e86134fd88b014d42781ff6d6b7b 100644 (file)
@@ -978,7 +978,10 @@ static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu)
 platform:
                set_elf_platform(cpu, "octeon");
                break;
+       case PRID_IMP_CAVIUM_CN61XX:
        case PRID_IMP_CAVIUM_CN63XX:
+       case PRID_IMP_CAVIUM_CN66XX:
+       case PRID_IMP_CAVIUM_CN68XX:
                c->cputype = CPU_CAVIUM_OCTEON2;
                __cpu_name[cpu] = "Cavium Octeon II";
                set_elf_platform(cpu, "octeon2");
index 0aee944ac38089eafb3001754d43c5ac0f7a3ac6..c1cf9c6c3f7705b9c50281d196633d91c8e788e5 100644 (file)
  * published by the Free Software Foundation.
  */
 
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
 #include <linux/perf_event.h>
-#include <linux/uaccess.h>
 
-#include <asm/irq.h>
-#include <asm/irq_regs.h>
 #include <asm/stacktrace.h>
-#include <asm/time.h> /* For perf_irq */
-
-/* These are for 32bit counters. For 64bit ones, define them accordingly. */
-#define MAX_PERIOD     ((1ULL << 32) - 1)
-#define VALID_COUNT    0x7fffffff
-#define TOTAL_BITS     32
-#define HIGHEST_BIT    31
-
-#define MIPS_MAX_HWEVENTS 4
-
-struct cpu_hw_events {
-       /* Array of events on this cpu. */
-       struct perf_event       *events[MIPS_MAX_HWEVENTS];
-
-       /*
-        * Set the bit (indexed by the counter number) when the counter
-        * is used for an event.
-        */
-       unsigned long           used_mask[BITS_TO_LONGS(MIPS_MAX_HWEVENTS)];
-
-       /*
-        * The borrowed MSB for the performance counter. A MIPS performance
-        * counter uses its bit 31 (for 32bit counters) or bit 63 (for 64bit
-        * counters) as a factor of determining whether a counter overflow
-        * should be signaled. So here we use a separate MSB for each
-        * counter to make things easy.
-        */
-       unsigned long           msbs[BITS_TO_LONGS(MIPS_MAX_HWEVENTS)];
-
-       /*
-        * Software copy of the control register for each performance counter.
-        * MIPS CPUs vary in performance counters. They use this differently,
-        * and even may not use it.
-        */
-       unsigned int            saved_ctrl[MIPS_MAX_HWEVENTS];
-};
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
-       .saved_ctrl = {0},
-};
-
-/* The description of MIPS performance events. */
-struct mips_perf_event {
-       unsigned int event_id;
-       /*
-        * MIPS performance counters are indexed starting from 0.
-        * CNTR_EVEN indicates the indexes of the counters to be used are
-        * even numbers.
-        */
-       unsigned int cntr_mask;
-       #define CNTR_EVEN       0x55555555
-       #define CNTR_ODD        0xaaaaaaaa
-#ifdef CONFIG_MIPS_MT_SMP
-       enum {
-               T  = 0,
-               V  = 1,
-               P  = 2,
-       } range;
-#else
-       #define T
-       #define V
-       #define P
-#endif
-};
-
-static struct mips_perf_event raw_event;
-static DEFINE_MUTEX(raw_event_mutex);
-
-#define UNSUPPORTED_PERF_EVENT_ID 0xffffffff
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-struct mips_pmu {
-       const char      *name;
-       int             irq;
-       irqreturn_t     (*handle_irq)(int irq, void *dev);
-       int             (*handle_shared_irq)(void);
-       void            (*start)(void);
-       void            (*stop)(void);
-       int             (*alloc_counter)(struct cpu_hw_events *cpuc,
-                                       struct hw_perf_event *hwc);
-       u64             (*read_counter)(unsigned int idx);
-       void            (*write_counter)(unsigned int idx, u64 val);
-       void            (*enable_event)(struct hw_perf_event *evt, int idx);
-       void            (*disable_event)(int idx);
-       const struct mips_perf_event *(*map_raw_event)(u64 config);
-       const struct mips_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
-       const struct mips_perf_event (*cache_event_map)
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-       unsigned int    num_counters;
-};
-
-static const struct mips_pmu *mipspmu;
-
-static int
-mipspmu_event_set_period(struct perf_event *event,
-                       struct hw_perf_event *hwc,
-                       int idx)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       s64 left = local64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
-       int ret = 0;
-       u64 uleft;
-       unsigned long flags;
-
-       if (unlikely(left <= -period)) {
-               left = period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-
-       if (unlikely(left <= 0)) {
-               left += period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-
-       if (left > (s64)MAX_PERIOD)
-               left = MAX_PERIOD;
-
-       local64_set(&hwc->prev_count, (u64)-left);
-
-       local_irq_save(flags);
-       uleft = (u64)(-left) & MAX_PERIOD;
-       uleft > VALID_COUNT ?
-               set_bit(idx, cpuc->msbs) : clear_bit(idx, cpuc->msbs);
-       mipspmu->write_counter(idx, (u64)(-left) & VALID_COUNT);
-       local_irq_restore(flags);
-
-       perf_event_update_userpage(event);
-
-       return ret;
-}
-
-static void mipspmu_event_update(struct perf_event *event,
-                       struct hw_perf_event *hwc,
-                       int idx)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       unsigned long flags;
-       int shift = 64 - TOTAL_BITS;
-       s64 prev_raw_count, new_raw_count;
-       u64 delta;
-
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       local_irq_save(flags);
-       /* Make the counter value be a "real" one. */
-       new_raw_count = mipspmu->read_counter(idx);
-       if (new_raw_count & (test_bit(idx, cpuc->msbs) << HIGHEST_BIT)) {
-               new_raw_count &= VALID_COUNT;
-               clear_bit(idx, cpuc->msbs);
-       } else
-               new_raw_count |= (test_bit(idx, cpuc->msbs) << HIGHEST_BIT);
-       local_irq_restore(flags);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                               new_raw_count) != prev_raw_count)
-               goto again;
-
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-       local64_sub(delta, &hwc->period_left);
-}
-
-static void mipspmu_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (!mipspmu)
-               return;
-
-       if (flags & PERF_EF_RELOAD)
-               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-       hwc->state = 0;
-
-       /* Set the period for the event. */
-       mipspmu_event_set_period(event, hwc, hwc->idx);
-
-       /* Enable the event. */
-       mipspmu->enable_event(hwc, hwc->idx);
-}
-
-static void mipspmu_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (!mipspmu)
-               return;
-
-       if (!(hwc->state & PERF_HES_STOPPED)) {
-               /* We are working on a local event. */
-               mipspmu->disable_event(hwc->idx);
-               barrier();
-               mipspmu_event_update(event, hwc, hwc->idx);
-               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-       }
-}
-
-static int mipspmu_add(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       int idx;
-       int err = 0;
-
-       perf_pmu_disable(event->pmu);
-
-       /* To look for a free counter for this event. */
-       idx = mipspmu->alloc_counter(cpuc, hwc);
-       if (idx < 0) {
-               err = idx;
-               goto out;
-       }
-
-       /*
-        * If there is an event in the counter we are going to use then
-        * make sure it is disabled.
-        */
-       event->hw.idx = idx;
-       mipspmu->disable_event(idx);
-       cpuc->events[idx] = event;
-
-       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-       if (flags & PERF_EF_START)
-               mipspmu_start(event, PERF_EF_RELOAD);
-
-       /* Propagate our changes to the userspace mapping. */
-       perf_event_update_userpage(event);
-
-out:
-       perf_pmu_enable(event->pmu);
-       return err;
-}
-
-static void mipspmu_del(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       int idx = hwc->idx;
-
-       WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
-
-       mipspmu_stop(event, PERF_EF_UPDATE);
-       cpuc->events[idx] = NULL;
-       clear_bit(idx, cpuc->used_mask);
-
-       perf_event_update_userpage(event);
-}
-
-static void mipspmu_read(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       /* Don't read disabled counters! */
-       if (hwc->idx < 0)
-               return;
-
-       mipspmu_event_update(event, hwc, hwc->idx);
-}
-
-static void mipspmu_enable(struct pmu *pmu)
-{
-       if (mipspmu)
-               mipspmu->start();
-}
-
-static void mipspmu_disable(struct pmu *pmu)
-{
-       if (mipspmu)
-               mipspmu->stop();
-}
-
-static atomic_t active_events = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmu_reserve_mutex);
-static int (*save_perf_irq)(void);
-
-static int mipspmu_get_irq(void)
-{
-       int err;
-
-       if (mipspmu->irq >= 0) {
-               /* Request my own irq handler. */
-               err = request_irq(mipspmu->irq, mipspmu->handle_irq,
-                       IRQF_DISABLED | IRQF_NOBALANCING,
-                       "mips_perf_pmu", NULL);
-               if (err) {
-                       pr_warning("Unable to request IRQ%d for MIPS "
-                          "performance counters!\n", mipspmu->irq);
-               }
-       } else if (cp0_perfcount_irq < 0) {
-               /*
-                * We are sharing the irq number with the timer interrupt.
-                */
-               save_perf_irq = perf_irq;
-               perf_irq = mipspmu->handle_shared_irq;
-               err = 0;
-       } else {
-               pr_warning("The platform hasn't properly defined its "
-                       "interrupt controller.\n");
-               err = -ENOENT;
-       }
-
-       return err;
-}
-
-static void mipspmu_free_irq(void)
-{
-       if (mipspmu->irq >= 0)
-               free_irq(mipspmu->irq, NULL);
-       else if (cp0_perfcount_irq < 0)
-               perf_irq = save_perf_irq;
-}
-
-/*
- * mipsxx/rm9000/loongson2 have different performance counters, they have
- * specific low-level init routines.
- */
-static void reset_counters(void *arg);
-static int __hw_perf_event_init(struct perf_event *event);
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
-       if (atomic_dec_and_mutex_lock(&active_events,
-                               &pmu_reserve_mutex)) {
-               /*
-                * We must not call the destroy function with interrupts
-                * disabled.
-                */
-               on_each_cpu(reset_counters,
-                       (void *)(long)mipspmu->num_counters, 1);
-               mipspmu_free_irq();
-               mutex_unlock(&pmu_reserve_mutex);
-       }
-}
-
-static int mipspmu_event_init(struct perf_event *event)
-{
-       int err = 0;
-
-       switch (event->attr.type) {
-       case PERF_TYPE_RAW:
-       case PERF_TYPE_HARDWARE:
-       case PERF_TYPE_HW_CACHE:
-               break;
-
-       default:
-               return -ENOENT;
-       }
-
-       if (!mipspmu || event->cpu >= nr_cpumask_bits ||
-               (event->cpu >= 0 && !cpu_online(event->cpu)))
-               return -ENODEV;
-
-       if (!atomic_inc_not_zero(&active_events)) {
-               if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
-                       atomic_dec(&active_events);
-                       return -ENOSPC;
-               }
-
-               mutex_lock(&pmu_reserve_mutex);
-               if (atomic_read(&active_events) == 0)
-                       err = mipspmu_get_irq();
-
-               if (!err)
-                       atomic_inc(&active_events);
-               mutex_unlock(&pmu_reserve_mutex);
-       }
-
-       if (err)
-               return err;
-
-       err = __hw_perf_event_init(event);
-       if (err)
-               hw_perf_event_destroy(event);
-
-       return err;
-}
-
-static struct pmu pmu = {
-       .pmu_enable     = mipspmu_enable,
-       .pmu_disable    = mipspmu_disable,
-       .event_init     = mipspmu_event_init,
-       .add            = mipspmu_add,
-       .del            = mipspmu_del,
-       .start          = mipspmu_start,
-       .stop           = mipspmu_stop,
-       .read           = mipspmu_read,
-};
-
-static inline unsigned int
-mipspmu_perf_event_encode(const struct mips_perf_event *pev)
-{
-/*
- * Top 8 bits for range, next 16 bits for cntr_mask, lowest 8 bits for
- * event_id.
- */
-#ifdef CONFIG_MIPS_MT_SMP
-       return ((unsigned int)pev->range << 24) |
-               (pev->cntr_mask & 0xffff00) |
-               (pev->event_id & 0xff);
-#else
-       return (pev->cntr_mask & 0xffff00) |
-               (pev->event_id & 0xff);
-#endif
-}
-
-static const struct mips_perf_event *
-mipspmu_map_general_event(int idx)
-{
-       const struct mips_perf_event *pev;
-
-       pev = ((*mipspmu->general_event_map)[idx].event_id ==
-               UNSUPPORTED_PERF_EVENT_ID ? ERR_PTR(-EOPNOTSUPP) :
-               &(*mipspmu->general_event_map)[idx]);
-
-       return pev;
-}
-
-static const struct mips_perf_event *
-mipspmu_map_cache_event(u64 config)
-{
-       unsigned int cache_type, cache_op, cache_result;
-       const struct mips_perf_event *pev;
-
-       cache_type = (config >> 0) & 0xff;
-       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-               return ERR_PTR(-EINVAL);
-
-       cache_op = (config >> 8) & 0xff;
-       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-               return ERR_PTR(-EINVAL);
-
-       cache_result = (config >> 16) & 0xff;
-       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-               return ERR_PTR(-EINVAL);
-
-       pev = &((*mipspmu->cache_event_map)
-                                       [cache_type]
-                                       [cache_op]
-                                       [cache_result]);
-
-       if (pev->event_id == UNSUPPORTED_PERF_EVENT_ID)
-               return ERR_PTR(-EOPNOTSUPP);
-
-       return pev;
-
-}
-
-static int validate_event(struct cpu_hw_events *cpuc,
-              struct perf_event *event)
-{
-       struct hw_perf_event fake_hwc = event->hw;
-
-       /* Allow mixed event group. So return 1 to pass validation. */
-       if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
-               return 1;
-
-       return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
-}
-
-static int validate_group(struct perf_event *event)
-{
-       struct perf_event *sibling, *leader = event->group_leader;
-       struct cpu_hw_events fake_cpuc;
-
-       memset(&fake_cpuc, 0, sizeof(fake_cpuc));
-
-       if (!validate_event(&fake_cpuc, leader))
-               return -ENOSPC;
-
-       list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-               if (!validate_event(&fake_cpuc, sibling))
-                       return -ENOSPC;
-       }
-
-       if (!validate_event(&fake_cpuc, event))
-               return -ENOSPC;
-
-       return 0;
-}
-
-/* This is needed by specific irq handlers in perf_event_*.c */
-static void
-handle_associated_event(struct cpu_hw_events *cpuc,
-       int idx, struct perf_sample_data *data, struct pt_regs *regs)
-{
-       struct perf_event *event = cpuc->events[idx];
-       struct hw_perf_event *hwc = &event->hw;
-
-       mipspmu_event_update(event, hwc, idx);
-       data->period = event->hw.last_period;
-       if (!mipspmu_event_set_period(event, hwc, idx))
-               return;
-
-       if (perf_event_overflow(event, data, regs))
-               mipspmu->disable_event(idx);
-}
-
-#include "perf_event_mipsxx.c"
 
 /* Callchain handling code. */
 
 /*
  * Leave userspace callchain empty for now. When we find a way to trace
- * the user stack callchains, we add here.
+ * the user stack callchains, we will add it here.
  */
-void perf_callchain_user(struct perf_callchain_entry *entry,
-                   struct pt_regs *regs)
-{
-}
 
 static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
        unsigned long reg29)
index e5ad09a9baf7f55ab2fbf3f0ba602a6f7eb5a4c8..4f2971bcf8e5464577885b5ecc64db11c388d295 100644 (file)
-#if defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64) || \
-    defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_SB1)
+/*
+ * Linux performance counter support for MIPS.
+ *
+ * Copyright (C) 2010 MIPS Technologies, Inc.
+ * Copyright (C) 2011 Cavium Networks, Inc.
+ * Author: Deng-Cheng Zhu
+ *
+ * This code is based on the implementation for ARM, which is in turn
+ * based on the sparc64 perf event code and the x86 code. Performance
+ * counter access is based on the MIPS Oprofile code. And the callchain
+ * support references the code of MIPS stacktrace.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/stacktrace.h>
+#include <asm/time.h> /* For perf_irq */
+
+#define MIPS_MAX_HWEVENTS 4
+
+struct cpu_hw_events {
+       /* Array of events on this cpu. */
+       struct perf_event       *events[MIPS_MAX_HWEVENTS];
+
+       /*
+        * Set the bit (indexed by the counter number) when the counter
+        * is used for an event.
+        */
+       unsigned long           used_mask[BITS_TO_LONGS(MIPS_MAX_HWEVENTS)];
+
+       /*
+        * Software copy of the control register for each performance counter.
+        * MIPS CPUs vary in performance counters. They use this differently,
+        * and even may not use it.
+        */
+       unsigned int            saved_ctrl[MIPS_MAX_HWEVENTS];
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+       .saved_ctrl = {0},
+};
+
+/* The description of MIPS performance events. */
+struct mips_perf_event {
+       unsigned int event_id;
+       /*
+        * MIPS performance counters are indexed starting from 0.
+        * CNTR_EVEN indicates the indexes of the counters to be used are
+        * even numbers.
+        */
+       unsigned int cntr_mask;
+       #define CNTR_EVEN       0x55555555
+       #define CNTR_ODD        0xaaaaaaaa
+       #define CNTR_ALL        0xffffffff
+#ifdef CONFIG_MIPS_MT_SMP
+       enum {
+               T  = 0,
+               V  = 1,
+               P  = 2,
+       } range;
+#else
+       #define T
+       #define V
+       #define P
+#endif
+};
+
+static struct mips_perf_event raw_event;
+static DEFINE_MUTEX(raw_event_mutex);
+
+#define UNSUPPORTED_PERF_EVENT_ID 0xffffffff
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+struct mips_pmu {
+       u64             max_period;
+       u64             valid_count;
+       u64             overflow;
+       const char      *name;
+       int             irq;
+       u64             (*read_counter)(unsigned int idx);
+       void            (*write_counter)(unsigned int idx, u64 val);
+       const struct mips_perf_event *(*map_raw_event)(u64 config);
+       const struct mips_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
+       const struct mips_perf_event (*cache_event_map)
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+       unsigned int    num_counters;
+};
+
+static struct mips_pmu mipspmu;
 
 #define M_CONFIG1_PC   (1 << 4)
 
-#define M_PERFCTL_EXL                  (1UL      <<  0)
-#define M_PERFCTL_KERNEL               (1UL      <<  1)
-#define M_PERFCTL_SUPERVISOR           (1UL      <<  2)
-#define M_PERFCTL_USER                 (1UL      <<  3)
-#define M_PERFCTL_INTERRUPT_ENABLE     (1UL      <<  4)
+#define M_PERFCTL_EXL                  (1      <<  0)
+#define M_PERFCTL_KERNEL               (1      <<  1)
+#define M_PERFCTL_SUPERVISOR           (1      <<  2)
+#define M_PERFCTL_USER                 (1      <<  3)
+#define M_PERFCTL_INTERRUPT_ENABLE     (1      <<  4)
 #define M_PERFCTL_EVENT(event)         (((event) & 0x3ff)  << 5)
 #define M_PERFCTL_VPEID(vpe)           ((vpe)    << 16)
 #define M_PERFCTL_MT_EN(filter)                ((filter) << 20)
 #define    M_TC_EN_VPE                 M_PERFCTL_MT_EN(1)
 #define    M_TC_EN_TC                  M_PERFCTL_MT_EN(2)
 #define M_PERFCTL_TCID(tcid)           ((tcid)   << 22)
-#define M_PERFCTL_WIDE                 (1UL      << 30)
-#define M_PERFCTL_MORE                 (1UL      << 31)
+#define M_PERFCTL_WIDE                 (1      << 30)
+#define M_PERFCTL_MORE                 (1      << 31)
 
 #define M_PERFCTL_COUNT_EVENT_WHENEVER (M_PERFCTL_EXL |                \
                                        M_PERFCTL_KERNEL |              \
 #endif
 #define M_PERFCTL_EVENT_MASK           0xfe0
 
-#define M_COUNTER_OVERFLOW             (1UL      << 31)
 
 #ifdef CONFIG_MIPS_MT_SMP
 static int cpu_has_mipsmt_pertccounters;
 
+static DEFINE_RWLOCK(pmuint_rwlock);
+
 /*
  * FIXME: For VSMP, vpe_id() is redefined for Perf-events, because
  * cpu_data[cpuid].vpe_id reports 0 for _both_ CPUs.
@@ -49,210 +149,674 @@ static int cpu_has_mipsmt_pertccounters;
 #endif
 
 /* Copied from op_model_mipsxx.c */
-static inline unsigned int vpe_shift(void)
+static unsigned int vpe_shift(void)
 {
        if (num_possible_cpus() > 1)
                return 1;
 
        return 0;
 }
-#else /* !CONFIG_MIPS_MT_SMP */
-#define vpe_id()       0
-
-static inline unsigned int vpe_shift(void)
-{
-       return 0;
-}
-#endif /* CONFIG_MIPS_MT_SMP */
 
-static inline unsigned int
-counters_total_to_per_cpu(unsigned int counters)
+static unsigned int counters_total_to_per_cpu(unsigned int counters)
 {
        return counters >> vpe_shift();
 }
 
-static inline unsigned int
-counters_per_cpu_to_total(unsigned int counters)
+static unsigned int counters_per_cpu_to_total(unsigned int counters)
 {
        return counters << vpe_shift();
 }
 
-#define __define_perf_accessors(r, n, np)                              \
-                                                                       \
-static inline unsigned int r_c0_ ## r ## n(void)                       \
-{                                                                      \
-       unsigned int cpu = vpe_id();                                    \
-                                                                       \
-       switch (cpu) {                                                  \
-       case 0:                                                         \
-               return read_c0_ ## r ## n();                            \
-       case 1:                                                         \
-               return read_c0_ ## r ## np();                           \
-       default:                                                        \
-               BUG();                                                  \
-       }                                                               \
-       return 0;                                                       \
-}                                                                      \
-                                                                       \
-static inline void w_c0_ ## r ## n(unsigned int value)                 \
-{                                                                      \
-       unsigned int cpu = vpe_id();                                    \
-                                                                       \
-       switch (cpu) {                                                  \
-       case 0:                                                         \
-               write_c0_ ## r ## n(value);                             \
-               return;                                                 \
-       case 1:                                                         \
-               write_c0_ ## r ## np(value);                            \
-               return;                                                 \
-       default:                                                        \
-               BUG();                                                  \
-       }                                                               \
-       return;                                                         \
-}                                                                      \
-
-__define_perf_accessors(perfcntr, 0, 2)
-__define_perf_accessors(perfcntr, 1, 3)
-__define_perf_accessors(perfcntr, 2, 0)
-__define_perf_accessors(perfcntr, 3, 1)
-
-__define_perf_accessors(perfctrl, 0, 2)
-__define_perf_accessors(perfctrl, 1, 3)
-__define_perf_accessors(perfctrl, 2, 0)
-__define_perf_accessors(perfctrl, 3, 1)
-
-static inline int __n_counters(void)
-{
-       if (!(read_c0_config1() & M_CONFIG1_PC))
-               return 0;
-       if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
-               return 1;
-       if (!(read_c0_perfctrl1() & M_PERFCTL_MORE))
-               return 2;
-       if (!(read_c0_perfctrl2() & M_PERFCTL_MORE))
-               return 3;
+#else /* !CONFIG_MIPS_MT_SMP */
+#define vpe_id()       0
 
-       return 4;
+#endif /* CONFIG_MIPS_MT_SMP */
+
+static void resume_local_counters(void);
+static void pause_local_counters(void);
+static irqreturn_t mipsxx_pmu_handle_irq(int, void *);
+static int mipsxx_pmu_handle_shared_irq(void);
+
+static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx)
+{
+       if (vpe_id() == 1)
+               idx = (idx + 2) & 3;
+       return idx;
 }
 
-static inline int n_counters(void)
+static u64 mipsxx_pmu_read_counter(unsigned int idx)
 {
-       int counters;
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
 
-       switch (current_cpu_type()) {
-       case CPU_R10000:
-               counters = 2;
-               break;
+       switch (idx) {
+       case 0:
+               /*
+                * The counters are unsigned, we must cast to truncate
+                * off the high bits.
+                */
+               return (u32)read_c0_perfcntr0();
+       case 1:
+               return (u32)read_c0_perfcntr1();
+       case 2:
+               return (u32)read_c0_perfcntr2();
+       case 3:
+               return (u32)read_c0_perfcntr3();
+       default:
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return 0;
+       }
+}
 
-       case CPU_R12000:
-       case CPU_R14000:
-               counters = 4;
-               break;
+static u64 mipsxx_pmu_read_counter_64(unsigned int idx)
+{
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
 
+       switch (idx) {
+       case 0:
+               return read_c0_perfcntr0_64();
+       case 1:
+               return read_c0_perfcntr1_64();
+       case 2:
+               return read_c0_perfcntr2_64();
+       case 3:
+               return read_c0_perfcntr3_64();
        default:
-               counters = __n_counters();
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return 0;
        }
-
-       return counters;
 }
 
-static void reset_counters(void *arg)
+static void mipsxx_pmu_write_counter(unsigned int idx, u64 val)
 {
-       int counters = (int)(long)arg;
-       switch (counters) {
-       case 4:
-               w_c0_perfctrl3(0);
-               w_c0_perfcntr3(0);
-       case 3:
-               w_c0_perfctrl2(0);
-               w_c0_perfcntr2(0);
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
+       switch (idx) {
+       case 0:
+               write_c0_perfcntr0(val);
+               return;
+       case 1:
+               write_c0_perfcntr1(val);
+               return;
        case 2:
-               w_c0_perfctrl1(0);
-               w_c0_perfcntr1(0);
+               write_c0_perfcntr2(val);
+               return;
+       case 3:
+               write_c0_perfcntr3(val);
+               return;
+       }
+}
+
+static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val)
+{
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
+       switch (idx) {
+       case 0:
+               write_c0_perfcntr0_64(val);
+               return;
        case 1:
-               w_c0_perfctrl0(0);
-               w_c0_perfcntr0(0);
+               write_c0_perfcntr1_64(val);
+               return;
+       case 2:
+               write_c0_perfcntr2_64(val);
+               return;
+       case 3:
+               write_c0_perfcntr3_64(val);
+               return;
        }
 }
 
-static inline u64
-mipsxx_pmu_read_counter(unsigned int idx)
+static unsigned int mipsxx_pmu_read_control(unsigned int idx)
 {
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
        switch (idx) {
        case 0:
-               return r_c0_perfcntr0();
+               return read_c0_perfctrl0();
        case 1:
-               return r_c0_perfcntr1();
+               return read_c0_perfctrl1();
        case 2:
-               return r_c0_perfcntr2();
+               return read_c0_perfctrl2();
        case 3:
-               return r_c0_perfcntr3();
+               return read_c0_perfctrl3();
        default:
                WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
                return 0;
        }
 }
 
-static inline void
-mipsxx_pmu_write_counter(unsigned int idx, u64 val)
+static void mipsxx_pmu_write_control(unsigned int idx, unsigned int val)
 {
+       idx = mipsxx_pmu_swizzle_perf_idx(idx);
+
        switch (idx) {
        case 0:
-               w_c0_perfcntr0(val);
+               write_c0_perfctrl0(val);
                return;
        case 1:
-               w_c0_perfcntr1(val);
+               write_c0_perfctrl1(val);
                return;
        case 2:
-               w_c0_perfcntr2(val);
+               write_c0_perfctrl2(val);
                return;
        case 3:
-               w_c0_perfcntr3(val);
+               write_c0_perfctrl3(val);
                return;
        }
 }
 
-static inline unsigned int
-mipsxx_pmu_read_control(unsigned int idx)
+static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc,
+                                   struct hw_perf_event *hwc)
+{
+       int i;
+
+       /*
+        * We only need to care the counter mask. The range has been
+        * checked definitely.
+        */
+       unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff;
+
+       for (i = mipspmu.num_counters - 1; i >= 0; i--) {
+               /*
+                * Note that some MIPS perf events can be counted by both
+                * even and odd counters, wheresas many other are only by
+                * even _or_ odd counters. This introduces an issue that
+                * when the former kind of event takes the counter the
+                * latter kind of event wants to use, then the "counter
+                * allocation" for the latter event will fail. In fact if
+                * they can be dynamically swapped, they both feel happy.
+                * But here we leave this issue alone for now.
+                */
+               if (test_bit(i, &cntr_mask) &&
+                       !test_and_set_bit(i, cpuc->used_mask))
+                       return i;
+       }
+
+       return -EAGAIN;
+}
+
+static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+       WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
+
+       cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
+               (evt->config_base & M_PERFCTL_CONFIG_MASK) |
+               /* Make sure interrupt enabled. */
+               M_PERFCTL_INTERRUPT_ENABLE;
+       /*
+        * We do not actually let the counter run. Leave it until start().
+        */
+}
+
+static void mipsxx_pmu_disable_event(int idx)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       unsigned long flags;
+
+       WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
+
+       local_irq_save(flags);
+       cpuc->saved_ctrl[idx] = mipsxx_pmu_read_control(idx) &
+               ~M_PERFCTL_COUNT_EVENT_WHENEVER;
+       mipsxx_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
+       local_irq_restore(flags);
+}
+
+static int mipspmu_event_set_period(struct perf_event *event,
+                                   struct hw_perf_event *hwc,
+                                   int idx)
+{
+       u64 left = local64_read(&hwc->period_left);
+       u64 period = hwc->sample_period;
+       int ret = 0;
+
+       if (unlikely((left + period) & (1ULL << 63))) {
+               /* left underflowed by more than period. */
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       } else  if (unlikely((left + period) <= period)) {
+               /* left underflowed by less than period. */
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (left > mipspmu.max_period) {
+               left = mipspmu.max_period;
+               local64_set(&hwc->period_left, left);
+       }
+
+       local64_set(&hwc->prev_count, mipspmu.overflow - left);
+
+       mipspmu.write_counter(idx, mipspmu.overflow - left);
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+static void mipspmu_event_update(struct perf_event *event,
+                                struct hw_perf_event *hwc,
+                                int idx)
+{
+       u64 prev_raw_count, new_raw_count;
+       u64 delta;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = mipspmu.read_counter(idx);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                               new_raw_count) != prev_raw_count)
+               goto again;
+
+       delta = new_raw_count - prev_raw_count;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+}
+
+static void mipspmu_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
+
+       /* Set the period for the event. */
+       mipspmu_event_set_period(event, hwc, hwc->idx);
+
+       /* Enable the event. */
+       mipsxx_pmu_enable_event(hwc, hwc->idx);
+}
+
+static void mipspmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               /* We are working on a local event. */
+               mipsxx_pmu_disable_event(hwc->idx);
+               barrier();
+               mipspmu_event_update(event, hwc, hwc->idx);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
+}
+
+static int mipspmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+       int err = 0;
+
+       perf_pmu_disable(event->pmu);
+
+       /* To look for a free counter for this event. */
+       idx = mipsxx_pmu_alloc_counter(cpuc, hwc);
+       if (idx < 0) {
+               err = idx;
+               goto out;
+       }
+
+       /*
+        * If there is an event in the counter we are going to use then
+        * make sure it is disabled.
+        */
+       event->hw.idx = idx;
+       mipsxx_pmu_disable_event(idx);
+       cpuc->events[idx] = event;
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               mipspmu_start(event, PERF_EF_RELOAD);
+
+       /* Propagate our changes to the userspace mapping. */
+       perf_event_update_userpage(event);
+
+out:
+       perf_pmu_enable(event->pmu);
+       return err;
+}
+
+static void mipspmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
+
+       mipspmu_stop(event, PERF_EF_UPDATE);
+       cpuc->events[idx] = NULL;
+       clear_bit(idx, cpuc->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
+static void mipspmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* Don't read disabled counters! */
+       if (hwc->idx < 0)
+               return;
+
+       mipspmu_event_update(event, hwc, hwc->idx);
+}
+
+static void mipspmu_enable(struct pmu *pmu)
+{
+#ifdef CONFIG_MIPS_MT_SMP
+       write_unlock(&pmuint_rwlock);
+#endif
+       resume_local_counters();
+}
+
+/*
+ * MIPS performance counters can be per-TC. The control registers can
+ * not be directly accessed accross CPUs. Hence if we want to do global
+ * control, we need cross CPU calls. on_each_cpu() can help us, but we
+ * can not make sure this function is called with interrupts enabled. So
+ * here we pause local counters and then grab a rwlock and leave the
+ * counters on other CPUs alone. If any counter interrupt raises while
+ * we own the write lock, simply pause local counters on that CPU and
+ * spin in the handler. Also we know we won't be switched to another
+ * CPU after pausing local counters and before grabbing the lock.
+ */
+static void mipspmu_disable(struct pmu *pmu)
+{
+       pause_local_counters();
+#ifdef CONFIG_MIPS_MT_SMP
+       write_lock(&pmuint_rwlock);
+#endif
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmu_reserve_mutex);
+static int (*save_perf_irq)(void);
+
+static int mipspmu_get_irq(void)
+{
+       int err;
+
+       if (mipspmu.irq >= 0) {
+               /* Request my own irq handler. */
+               err = request_irq(mipspmu.irq, mipsxx_pmu_handle_irq,
+                       IRQF_PERCPU | IRQF_NOBALANCING,
+                       "mips_perf_pmu", NULL);
+               if (err) {
+                       pr_warning("Unable to request IRQ%d for MIPS "
+                          "performance counters!\n", mipspmu.irq);
+               }
+       } else if (cp0_perfcount_irq < 0) {
+               /*
+                * We are sharing the irq number with the timer interrupt.
+                */
+               save_perf_irq = perf_irq;
+               perf_irq = mipsxx_pmu_handle_shared_irq;
+               err = 0;
+       } else {
+               pr_warning("The platform hasn't properly defined its "
+                       "interrupt controller.\n");
+               err = -ENOENT;
+       }
+
+       return err;
+}
+
+static void mipspmu_free_irq(void)
+{
+       if (mipspmu.irq >= 0)
+               free_irq(mipspmu.irq, NULL);
+       else if (cp0_perfcount_irq < 0)
+               perf_irq = save_perf_irq;
+}
+
+/*
+ * mipsxx/rm9000/loongson2 have different performance counters, they have
+ * specific low-level init routines.
+ */
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+       if (atomic_dec_and_mutex_lock(&active_events,
+                               &pmu_reserve_mutex)) {
+               /*
+                * We must not call the destroy function with interrupts
+                * disabled.
+                */
+               on_each_cpu(reset_counters,
+                       (void *)(long)mipspmu.num_counters, 1);
+               mipspmu_free_irq();
+               mutex_unlock(&pmu_reserve_mutex);
+       }
+}
+
+static int mipspmu_event_init(struct perf_event *event)
+{
+       int err = 0;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       if (event->cpu >= nr_cpumask_bits ||
+           (event->cpu >= 0 && !cpu_online(event->cpu)))
+               return -ENODEV;
+
+       if (!atomic_inc_not_zero(&active_events)) {
+               if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
+                       atomic_dec(&active_events);
+                       return -ENOSPC;
+               }
+
+               mutex_lock(&pmu_reserve_mutex);
+               if (atomic_read(&active_events) == 0)
+                       err = mipspmu_get_irq();
+
+               if (!err)
+                       atomic_inc(&active_events);
+               mutex_unlock(&pmu_reserve_mutex);
+       }
+
+       if (err)
+               return err;
+
+       err = __hw_perf_event_init(event);
+       if (err)
+               hw_perf_event_destroy(event);
+
+       return err;
+}
+
+static struct pmu pmu = {
+       .pmu_enable     = mipspmu_enable,
+       .pmu_disable    = mipspmu_disable,
+       .event_init     = mipspmu_event_init,
+       .add            = mipspmu_add,
+       .del            = mipspmu_del,
+       .start          = mipspmu_start,
+       .stop           = mipspmu_stop,
+       .read           = mipspmu_read,
+};
+
+static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev)
+{
+/*
+ * Top 8 bits for range, next 16 bits for cntr_mask, lowest 8 bits for
+ * event_id.
+ */
+#ifdef CONFIG_MIPS_MT_SMP
+       return ((unsigned int)pev->range << 24) |
+               (pev->cntr_mask & 0xffff00) |
+               (pev->event_id & 0xff);
+#else
+       return (pev->cntr_mask & 0xffff00) |
+               (pev->event_id & 0xff);
+#endif
+}
+
+static const struct mips_perf_event *mipspmu_map_general_event(int idx)
+{
+       const struct mips_perf_event *pev;
+
+       pev = ((*mipspmu.general_event_map)[idx].event_id ==
+               UNSUPPORTED_PERF_EVENT_ID ? ERR_PTR(-EOPNOTSUPP) :
+               &(*mipspmu.general_event_map)[idx]);
+
+       return pev;
+}
+
+static const struct mips_perf_event *mipspmu_map_cache_event(u64 config)
+{
+       unsigned int cache_type, cache_op, cache_result;
+       const struct mips_perf_event *pev;
+
+       cache_type = (config >> 0) & 0xff;
+       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+               return ERR_PTR(-EINVAL);
+
+       cache_op = (config >> 8) & 0xff;
+       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+               return ERR_PTR(-EINVAL);
+
+       cache_result = (config >> 16) & 0xff;
+       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return ERR_PTR(-EINVAL);
+
+       pev = &((*mipspmu.cache_event_map)
+                                       [cache_type]
+                                       [cache_op]
+                                       [cache_result]);
+
+       if (pev->event_id == UNSUPPORTED_PERF_EVENT_ID)
+               return ERR_PTR(-EOPNOTSUPP);
+
+       return pev;
+
+}
+
+static int validate_event(struct cpu_hw_events *cpuc,
+              struct perf_event *event)
+{
+       struct hw_perf_event fake_hwc = event->hw;
+
+       /* Allow mixed event group. So return 1 to pass validation. */
+       if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+               return 1;
+
+       return mipsxx_pmu_alloc_counter(cpuc, &fake_hwc) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *sibling, *leader = event->group_leader;
+       struct cpu_hw_events fake_cpuc;
+
+       memset(&fake_cpuc, 0, sizeof(fake_cpuc));
+
+       if (!validate_event(&fake_cpuc, leader))
+               return -ENOSPC;
+
+       list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+               if (!validate_event(&fake_cpuc, sibling))
+                       return -ENOSPC;
+       }
+
+       if (!validate_event(&fake_cpuc, event))
+               return -ENOSPC;
+
+       return 0;
+}
+
+/* This is needed by specific irq handlers in perf_event_*.c */
+static void handle_associated_event(struct cpu_hw_events *cpuc,
+                                   int idx, struct perf_sample_data *data,
+                                   struct pt_regs *regs)
+{
+       struct perf_event *event = cpuc->events[idx];
+       struct hw_perf_event *hwc = &event->hw;
+
+       mipspmu_event_update(event, hwc, idx);
+       data->period = event->hw.last_period;
+       if (!mipspmu_event_set_period(event, hwc, idx))
+               return;
+
+       if (perf_event_overflow(event, data, regs))
+               mipsxx_pmu_disable_event(idx);
+}
+
+
+static int __n_counters(void)
+{
+       if (!(read_c0_config1() & M_CONFIG1_PC))
+               return 0;
+       if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
+               return 1;
+       if (!(read_c0_perfctrl1() & M_PERFCTL_MORE))
+               return 2;
+       if (!(read_c0_perfctrl2() & M_PERFCTL_MORE))
+               return 3;
+
+       return 4;
+}
+
+static int n_counters(void)
 {
-       switch (idx) {
-       case 0:
-               return r_c0_perfctrl0();
-       case 1:
-               return r_c0_perfctrl1();
-       case 2:
-               return r_c0_perfctrl2();
-       case 3:
-               return r_c0_perfctrl3();
+       int counters;
+
+       switch (current_cpu_type()) {
+       case CPU_R10000:
+               counters = 2;
+               break;
+
+       case CPU_R12000:
+       case CPU_R14000:
+               counters = 4;
+               break;
+
        default:
-               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
-               return 0;
+               counters = __n_counters();
        }
+
+       return counters;
 }
 
-static inline void
-mipsxx_pmu_write_control(unsigned int idx, unsigned int val)
+static void reset_counters(void *arg)
 {
-       switch (idx) {
-       case 0:
-               w_c0_perfctrl0(val);
-               return;
-       case 1:
-               w_c0_perfctrl1(val);
-               return;
-       case 2:
-               w_c0_perfctrl2(val);
-               return;
+       int counters = (int)(long)arg;
+       switch (counters) {
+       case 4:
+               mipsxx_pmu_write_control(3, 0);
+               mipspmu.write_counter(3, 0);
        case 3:
-               w_c0_perfctrl3(val);
-               return;
+               mipsxx_pmu_write_control(2, 0);
+               mipspmu.write_counter(2, 0);
+       case 2:
+               mipsxx_pmu_write_control(1, 0);
+               mipspmu.write_counter(1, 0);
+       case 1:
+               mipsxx_pmu_write_control(0, 0);
+               mipspmu.write_counter(0, 0);
        }
 }
 
-#ifdef CONFIG_MIPS_MT_SMP
-static DEFINE_RWLOCK(pmuint_rwlock);
-#endif
-
 /* 24K/34K/1004K cores can share the same event map. */
 static const struct mips_perf_event mipsxxcore_event_map
                                [PERF_COUNT_HW_MAX] = {
@@ -277,6 +841,16 @@ static const struct mips_perf_event mipsxx74Kcore_event_map
        [PERF_COUNT_HW_BUS_CYCLES] = { UNSUPPORTED_PERF_EVENT_ID },
 };
 
+static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = {
+       [PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL },
+       [PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL },
+       [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x2b, CNTR_ALL },
+       [PERF_COUNT_HW_CACHE_MISSES] = { 0x2e, CNTR_ALL  },
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x08, CNTR_ALL },
+       [PERF_COUNT_HW_BRANCH_MISSES] = { 0x09, CNTR_ALL },
+       [PERF_COUNT_HW_BUS_CYCLES] = { 0x25, CNTR_ALL },
+};
+
 /* 24K/34K/1004K cores can share the same cache event map. */
 static const struct mips_perf_event mipsxxcore_cache_map
                                [PERF_COUNT_HW_CACHE_MAX]
@@ -510,10 +1084,105 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
 },
 };
 
+
+static const struct mips_perf_event octeon_cache_map
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x2b, CNTR_ALL },
+               [C(RESULT_MISS)]        = { 0x2e, CNTR_ALL },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { 0x30, CNTR_ALL },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(L1I)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x18, CNTR_ALL },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { 0x19, CNTR_ALL },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(LL)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(DTLB)] = {
+       /*
+        * Only general DTLB misses are counted use the same event for
+        * read and write.
+        */
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { 0x35, CNTR_ALL },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { 0x35, CNTR_ALL },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(ITLB)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { 0x37, CNTR_ALL },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+[C(BPU)] = {
+       /* Using the same code for *HW_BRANCH* */
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
+};
+
 #ifdef CONFIG_MIPS_MT_SMP
-static void
-check_and_calc_range(struct perf_event *event,
-                       const struct mips_perf_event *pev)
+static void check_and_calc_range(struct perf_event *event,
+                                const struct mips_perf_event *pev)
 {
        struct hw_perf_event *hwc = &event->hw;
 
@@ -536,9 +1205,8 @@ check_and_calc_range(struct perf_event *event,
                hwc->config_base |= M_TC_EN_ALL;
 }
 #else
-static void
-check_and_calc_range(struct perf_event *event,
-                       const struct mips_perf_event *pev)
+static void check_and_calc_range(struct perf_event *event,
+                                const struct mips_perf_event *pev)
 {
 }
 #endif
@@ -560,7 +1228,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        } else if (PERF_TYPE_RAW == event->attr.type) {
                /* We are working on the global raw event. */
                mutex_lock(&raw_event_mutex);
-               pev = mipspmu->map_raw_event(event->attr.config);
+               pev = mipspmu.map_raw_event(event->attr.config);
        } else {
                /* The event type is not (yet) supported. */
                return -EOPNOTSUPP;
@@ -605,7 +1273,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        hwc->config = 0;
 
        if (!hwc->sample_period) {
-               hwc->sample_period  = MAX_PERIOD;
+               hwc->sample_period  = mipspmu.max_period;
                hwc->last_period    = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
        }
@@ -618,70 +1286,47 @@ static int __hw_perf_event_init(struct perf_event *event)
        }
 
        event->destroy = hw_perf_event_destroy;
-
        return err;
 }
 
 static void pause_local_counters(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       int counters = mipspmu->num_counters;
+       int ctr = mipspmu.num_counters;
        unsigned long flags;
 
        local_irq_save(flags);
-       switch (counters) {
-       case 4:
-               cpuc->saved_ctrl[3] = r_c0_perfctrl3();
-               w_c0_perfctrl3(cpuc->saved_ctrl[3] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       case 3:
-               cpuc->saved_ctrl[2] = r_c0_perfctrl2();
-               w_c0_perfctrl2(cpuc->saved_ctrl[2] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       case 2:
-               cpuc->saved_ctrl[1] = r_c0_perfctrl1();
-               w_c0_perfctrl1(cpuc->saved_ctrl[1] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       case 1:
-               cpuc->saved_ctrl[0] = r_c0_perfctrl0();
-               w_c0_perfctrl0(cpuc->saved_ctrl[0] &
-                       ~M_PERFCTL_COUNT_EVENT_WHENEVER);
-       }
+       do {
+               ctr--;
+               cpuc->saved_ctrl[ctr] = mipsxx_pmu_read_control(ctr);
+               mipsxx_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] &
+                                        ~M_PERFCTL_COUNT_EVENT_WHENEVER);
+       } while (ctr > 0);
        local_irq_restore(flags);
 }
 
 static void resume_local_counters(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       int counters = mipspmu->num_counters;
-       unsigned long flags;
+       int ctr = mipspmu.num_counters;
 
-       local_irq_save(flags);
-       switch (counters) {
-       case 4:
-               w_c0_perfctrl3(cpuc->saved_ctrl[3]);
-       case 3:
-               w_c0_perfctrl2(cpuc->saved_ctrl[2]);
-       case 2:
-               w_c0_perfctrl1(cpuc->saved_ctrl[1]);
-       case 1:
-               w_c0_perfctrl0(cpuc->saved_ctrl[0]);
-       }
-       local_irq_restore(flags);
+       do {
+               ctr--;
+               mipsxx_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]);
+       } while (ctr > 0);
 }
 
 static int mipsxx_pmu_handle_shared_irq(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct perf_sample_data data;
-       unsigned int counters = mipspmu->num_counters;
-       unsigned int counter;
+       unsigned int counters = mipspmu.num_counters;
+       u64 counter;
        int handled = IRQ_NONE;
        struct pt_regs *regs;
 
        if (cpu_has_mips_r2 && !(read_c0_cause() & (1 << 26)))
                return handled;
-
        /*
         * First we pause the local counters, so that when we are locked
         * here, the counters are all paused. When it gets locked due to
@@ -702,13 +1347,9 @@ static int mipsxx_pmu_handle_shared_irq(void)
 #define HANDLE_COUNTER(n)                                              \
        case n + 1:                                                     \
                if (test_bit(n, cpuc->used_mask)) {                     \
-                       counter = r_c0_perfcntr ## n();                 \
-                       if (counter & M_COUNTER_OVERFLOW) {             \
-                               w_c0_perfcntr ## n(counter &            \
-                                               VALID_COUNT);           \
-                               if (test_and_change_bit(n, cpuc->msbs)) \
-                                       handle_associated_event(cpuc,   \
-                                               n, &data, regs);        \
+                       counter = mipspmu.read_counter(n);              \
+                       if (counter & mipspmu.overflow) {               \
+                               handle_associated_event(cpuc, n, &data, regs); \
                                handled = IRQ_HANDLED;                  \
                        }                                               \
                }
@@ -733,104 +1374,11 @@ static int mipsxx_pmu_handle_shared_irq(void)
        return handled;
 }
 
-static irqreturn_t
-mipsxx_pmu_handle_irq(int irq, void *dev)
+static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev)
 {
        return mipsxx_pmu_handle_shared_irq();
 }
 
-static void mipsxx_pmu_start(void)
-{
-#ifdef CONFIG_MIPS_MT_SMP
-       write_unlock(&pmuint_rwlock);
-#endif
-       resume_local_counters();
-}
-
-/*
- * MIPS performance counters can be per-TC. The control registers can
- * not be directly accessed across CPUs. Hence if we want to do global
- * control, we need cross CPU calls. on_each_cpu() can help us, but we
- * can not make sure this function is called with interrupts enabled. So
- * here we pause local counters and then grab a rwlock and leave the
- * counters on other CPUs alone. If any counter interrupt raises while
- * we own the write lock, simply pause local counters on that CPU and
- * spin in the handler. Also we know we won't be switched to another
- * CPU after pausing local counters and before grabbing the lock.
- */
-static void mipsxx_pmu_stop(void)
-{
-       pause_local_counters();
-#ifdef CONFIG_MIPS_MT_SMP
-       write_lock(&pmuint_rwlock);
-#endif
-}
-
-static int
-mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc,
-                       struct hw_perf_event *hwc)
-{
-       int i;
-
-       /*
-        * We only need to care the counter mask. The range has been
-        * checked definitely.
-        */
-       unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff;
-
-       for (i = mipspmu->num_counters - 1; i >= 0; i--) {
-               /*
-                * Note that some MIPS perf events can be counted by both
-                * even and odd counters, wheresas many other are only by
-                * even _or_ odd counters. This introduces an issue that
-                * when the former kind of event takes the counter the
-                * latter kind of event wants to use, then the "counter
-                * allocation" for the latter event will fail. In fact if
-                * they can be dynamically swapped, they both feel happy.
-                * But here we leave this issue alone for now.
-                */
-               if (test_bit(i, &cntr_mask) &&
-                       !test_and_set_bit(i, cpuc->used_mask))
-                       return i;
-       }
-
-       return -EAGAIN;
-}
-
-static void
-mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       unsigned long flags;
-
-       WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
-
-       local_irq_save(flags);
-       cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
-               (evt->config_base & M_PERFCTL_CONFIG_MASK) |
-               /* Make sure interrupt enabled. */
-               M_PERFCTL_INTERRUPT_ENABLE;
-       /*
-        * We do not actually let the counter run. Leave it until start().
-        */
-       local_irq_restore(flags);
-}
-
-static void
-mipsxx_pmu_disable_event(int idx)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       unsigned long flags;
-
-       WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
-
-       local_irq_save(flags);
-       cpuc->saved_ctrl[idx] = mipsxx_pmu_read_control(idx) &
-               ~M_PERFCTL_COUNT_EVENT_WHENEVER;
-       mipsxx_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
-       local_irq_restore(flags);
-}
-
 /* 24K */
 #define IS_UNSUPPORTED_24K_EVENT(r, b)                                 \
        ((b) == 12 || (r) == 151 || (r) == 152 || (b) == 26 ||          \
@@ -892,8 +1440,7 @@ mipsxx_pmu_disable_event(int idx)
  * then 128 needs to be added to 15 as the input for the event config,
  * i.e., 143 (0x8F) to be used.
  */
-static const struct mips_perf_event *
-mipsxx_pmu_map_raw_event(u64 config)
+static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 {
        unsigned int raw_id = config & 0xff;
        unsigned int base_id = raw_id & 0x7f;
@@ -970,40 +1517,44 @@ mipsxx_pmu_map_raw_event(u64 config)
        return &raw_event;
 }
 
-static struct mips_pmu mipsxxcore_pmu = {
-       .handle_irq = mipsxx_pmu_handle_irq,
-       .handle_shared_irq = mipsxx_pmu_handle_shared_irq,
-       .start = mipsxx_pmu_start,
-       .stop = mipsxx_pmu_stop,
-       .alloc_counter = mipsxx_pmu_alloc_counter,
-       .read_counter = mipsxx_pmu_read_counter,
-       .write_counter = mipsxx_pmu_write_counter,
-       .enable_event = mipsxx_pmu_enable_event,
-       .disable_event = mipsxx_pmu_disable_event,
-       .map_raw_event = mipsxx_pmu_map_raw_event,
-       .general_event_map = &mipsxxcore_event_map,
-       .cache_event_map = &mipsxxcore_cache_map,
-};
+static const struct mips_perf_event *octeon_pmu_map_raw_event(u64 config)
+{
+       unsigned int raw_id = config & 0xff;
+       unsigned int base_id = raw_id & 0x7f;
 
-static struct mips_pmu mipsxx74Kcore_pmu = {
-       .handle_irq = mipsxx_pmu_handle_irq,
-       .handle_shared_irq = mipsxx_pmu_handle_shared_irq,
-       .start = mipsxx_pmu_start,
-       .stop = mipsxx_pmu_stop,
-       .alloc_counter = mipsxx_pmu_alloc_counter,
-       .read_counter = mipsxx_pmu_read_counter,
-       .write_counter = mipsxx_pmu_write_counter,
-       .enable_event = mipsxx_pmu_enable_event,
-       .disable_event = mipsxx_pmu_disable_event,
-       .map_raw_event = mipsxx_pmu_map_raw_event,
-       .general_event_map = &mipsxx74Kcore_event_map,
-       .cache_event_map = &mipsxx74Kcore_cache_map,
-};
+
+       raw_event.cntr_mask = CNTR_ALL;
+       raw_event.event_id = base_id;
+
+       if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
+               if (base_id > 0x42)
+                       return ERR_PTR(-EOPNOTSUPP);
+       } else {
+               if (base_id > 0x3a)
+                       return ERR_PTR(-EOPNOTSUPP);
+       }
+
+       switch (base_id) {
+       case 0x00:
+       case 0x0f:
+       case 0x1e:
+       case 0x1f:
+       case 0x2f:
+       case 0x34:
+       case 0x3b ... 0x3f:
+               return ERR_PTR(-EOPNOTSUPP);
+       default:
+               break;
+       }
+
+       return &raw_event;
+}
 
 static int __init
 init_hw_perf_events(void)
 {
        int counters, irq;
+       int counter_bits;
 
        pr_info("Performance counters: ");
 
@@ -1035,32 +1586,36 @@ init_hw_perf_events(void)
        }
 #endif
 
-       on_each_cpu(reset_counters, (void *)(long)counters, 1);
+       mipspmu.map_raw_event = mipsxx_pmu_map_raw_event;
 
        switch (current_cpu_type()) {
        case CPU_24K:
-               mipsxxcore_pmu.name = "mips/24K";
-               mipsxxcore_pmu.num_counters = counters;
-               mipsxxcore_pmu.irq = irq;
-               mipspmu = &mipsxxcore_pmu;
+               mipspmu.name = "mips/24K";
+               mipspmu.general_event_map = &mipsxxcore_event_map;
+               mipspmu.cache_event_map = &mipsxxcore_cache_map;
                break;
        case CPU_34K:
-               mipsxxcore_pmu.name = "mips/34K";
-               mipsxxcore_pmu.num_counters = counters;
-               mipsxxcore_pmu.irq = irq;
-               mipspmu = &mipsxxcore_pmu;
+               mipspmu.name = "mips/34K";
+               mipspmu.general_event_map = &mipsxxcore_event_map;
+               mipspmu.cache_event_map = &mipsxxcore_cache_map;
                break;
        case CPU_74K:
-               mipsxx74Kcore_pmu.name = "mips/74K";
-               mipsxx74Kcore_pmu.num_counters = counters;
-               mipsxx74Kcore_pmu.irq = irq;
-               mipspmu = &mipsxx74Kcore_pmu;
+               mipspmu.name = "mips/74K";
+               mipspmu.general_event_map = &mipsxx74Kcore_event_map;
+               mipspmu.cache_event_map = &mipsxx74Kcore_cache_map;
                break;
        case CPU_1004K:
-               mipsxxcore_pmu.name = "mips/1004K";
-               mipsxxcore_pmu.num_counters = counters;
-               mipsxxcore_pmu.irq = irq;
-               mipspmu = &mipsxxcore_pmu;
+               mipspmu.name = "mips/1004K";
+               mipspmu.general_event_map = &mipsxxcore_event_map;
+               mipspmu.cache_event_map = &mipsxxcore_cache_map;
+               break;
+       case CPU_CAVIUM_OCTEON:
+       case CPU_CAVIUM_OCTEON_PLUS:
+       case CPU_CAVIUM_OCTEON2:
+               mipspmu.name = "octeon";
+               mipspmu.general_event_map = &octeon_event_map;
+               mipspmu.cache_event_map = &octeon_cache_map;
+               mipspmu.map_raw_event = octeon_pmu_map_raw_event;
                break;
        default:
                pr_cont("Either hardware does not support performance "
@@ -1068,15 +1623,33 @@ init_hw_perf_events(void)
                return -ENODEV;
        }
 
-       if (mipspmu)
-               pr_cont("%s PMU enabled, %d counters available to each "
-                       "CPU, irq %d%s\n", mipspmu->name, counters, irq,
-                       irq < 0 ? " (share with timer interrupt)" : "");
+       mipspmu.num_counters = counters;
+       mipspmu.irq = irq;
+
+       if (read_c0_perfctrl0() & M_PERFCTL_WIDE) {
+               mipspmu.max_period = (1ULL << 63) - 1;
+               mipspmu.valid_count = (1ULL << 63) - 1;
+               mipspmu.overflow = 1ULL << 63;
+               mipspmu.read_counter = mipsxx_pmu_read_counter_64;
+               mipspmu.write_counter = mipsxx_pmu_write_counter_64;
+               counter_bits = 64;
+       } else {
+               mipspmu.max_period = (1ULL << 31) - 1;
+               mipspmu.valid_count = (1ULL << 31) - 1;
+               mipspmu.overflow = 1ULL << 31;
+               mipspmu.read_counter = mipsxx_pmu_read_counter;
+               mipspmu.write_counter = mipsxx_pmu_write_counter;
+               counter_bits = 32;
+       }
+
+       on_each_cpu(reset_counters, (void *)(long)counters, 1);
+
+       pr_cont("%s PMU enabled, %d %d-bit counters available to each "
+               "CPU, irq %d%s\n", mipspmu.name, counters, counter_bits, irq,
+               irq < 0 ? " (share with timer interrupt)" : "");
 
        perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
 early_initcall(init_hw_perf_events);
-
-#endif /* defined(CONFIG_CPU_MIPS32)... */
index 865bc7a6f5a19ed32d150886658894b215f3617d..47920657968d2e65368edd0c2cbd39c6a5052ef2 100644 (file)
@@ -496,7 +496,7 @@ einval:     li      v0, -ENOSYS
        sys     sys_lookup_dcookie      4
        sys     sys_epoll_create        1
        sys     sys_epoll_ctl           4
-       sys     sys_epoll_wait          3       /* 4250 */
+       sys     sys_epoll_wait          4       /* 4250 */
        sys     sys_remap_file_pages    5
        sys     sys_set_tid_address     1
        sys     sys_restart_syscall     0
index 16c4d256b76f3f7179e5a77b856bd3a7a3825a52..daa81f7284ac89a5411b88d86f77ff6df1585f55 100644 (file)
@@ -169,6 +169,10 @@ static void octeon_flush_cache_page(struct vm_area_struct *vma,
                octeon_flush_icache_all_cores(vma);
 }
 
+static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+       BUG();
+}
 
 /**
  * Probe Octeon's caches
@@ -273,6 +277,8 @@ void __cpuinit octeon_cache_init(void)
        flush_icache_range              = octeon_flush_icache_range;
        local_flush_icache_range        = local_octeon_flush_icache_range;
 
+       __flush_kernel_vmap_range       = octeon_flush_kernel_vmap_range;
+
        build_clear_page();
        build_copy_page();
 }
index e6b0efd3f6a4ed0746141c0ecdad4a9b22c71d76..0765583d0c924f6252ccb313052c46be2d81315e 100644 (file)
@@ -299,6 +299,11 @@ static void r3k_flush_cache_sigtramp(unsigned long addr)
        write_c0_status(flags);
 }
 
+static void r3k_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+       BUG();
+}
+
 static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)
 {
        /* Catch bad driver code */
@@ -323,6 +328,8 @@ void __cpuinit r3k_cache_init(void)
        flush_icache_range = r3k_flush_icache_range;
        local_flush_icache_range = r3k_flush_icache_range;
 
+       __flush_kernel_vmap_range = r3k_flush_kernel_vmap_range;
+
        flush_cache_sigtramp = r3k_flush_cache_sigtramp;
        local_flush_data_cache_page = local_r3k_flush_data_cache_page;
        flush_data_cache_page = r3k_flush_data_cache_page;
index b9aabb998a32ab5856fb53e910785063eea78613..a79fe9aa7721aa56f05d8ab7995b6503cd98f618 100644 (file)
@@ -722,6 +722,39 @@ static void r4k_flush_icache_all(void)
                r4k_blast_icache();
 }
 
+struct flush_kernel_vmap_range_args {
+       unsigned long   vaddr;
+       int             size;
+};
+
+static inline void local_r4k_flush_kernel_vmap_range(void *args)
+{
+       struct flush_kernel_vmap_range_args *vmra = args;
+       unsigned long vaddr = vmra->vaddr;
+       int size = vmra->size;
+
+       /*
+        * Aliases only affect the primary caches so don't bother with
+        * S-caches or T-caches.
+        */
+       if (cpu_has_safe_index_cacheops && size >= dcache_size)
+               r4k_blast_dcache();
+       else {
+               R4600_HIT_CACHEOP_WAR_IMPL;
+               blast_dcache_range(vaddr, vaddr + size);
+       }
+}
+
+static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+       struct flush_kernel_vmap_range_args args;
+
+       args.vaddr = (unsigned long) vaddr;
+       args.size = size;
+
+       r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args);
+}
+
 static inline void rm7k_erratum31(void)
 {
        const unsigned long ic_lsize = 32;
@@ -1403,6 +1436,8 @@ void __cpuinit r4k_cache_init(void)
        flush_cache_page        = r4k_flush_cache_page;
        flush_cache_range       = r4k_flush_cache_range;
 
+       __flush_kernel_vmap_range = r4k_flush_kernel_vmap_range;
+
        flush_cache_sigtramp    = r4k_flush_cache_sigtramp;
        flush_icache_all        = r4k_flush_icache_all;
        local_flush_data_cache_page     = local_r4k_flush_data_cache_page;
index d352fad3e45101b03191fcdd994adfdb6444f9de..a43c197ccf8c48bd02f27281cba92df44221137b 100644 (file)
@@ -253,6 +253,11 @@ static void tx39_flush_icache_range(unsigned long start, unsigned long end)
        }
 }
 
+static void tx39_flush_kernel_vmap_range(unsigned long vaddr, int size)
+{
+       BUG();
+}
+
 static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 {
        unsigned long end;
@@ -394,6 +399,8 @@ void __cpuinit tx39_cache_init(void)
                flush_icache_range = tx39_flush_icache_range;
                local_flush_icache_range = tx39_flush_icache_range;
 
+               __flush_kernel_vmap_range = tx39_flush_kernel_vmap_range;
+
                flush_cache_sigtramp = tx39_flush_cache_sigtramp;
                local_flush_data_cache_page = local_tx39_flush_data_cache_page;
                flush_data_cache_page = tx39_flush_data_cache_page;
index 12af739048fada0927f50e414f30097b8bbb96d6..829320c7b175372f3695248aee63329f18f7661f 100644 (file)
@@ -35,6 +35,11 @@ void (*local_flush_icache_range)(unsigned long start, unsigned long end);
 void (*__flush_cache_vmap)(void);
 void (*__flush_cache_vunmap)(void);
 
+void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size);
+void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size);
+
+EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range);
+
 /* MIPS specific cache operations */
 void (*flush_cache_sigtramp)(unsigned long addr);
 void (*local_flush_data_cache_page)(void * addr);
index 40424affef837ce90ddd108e9c494f356c367c53..87bb85d8d537064d7e3b14b8cc5f342c589a484f 100644 (file)
@@ -223,8 +223,8 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
        local_irq_restore(flags);
 }
 
-void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
-                           unsigned long entryhi, unsigned long pagemask)
+void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+                    unsigned long entryhi, unsigned long pagemask)
 {
        unsigned long flags;
        unsigned long old_ctx;
index ba40325caea66c0c1e2e79337dd11844ae6a8606..0d394e0e8837f640d84fa87003d763f0dc0bd10d 100644 (file)
@@ -337,8 +337,8 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
        EXIT_CRITICAL(flags);
 }
 
-void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
-       unsigned long entryhi, unsigned long pagemask)
+void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+                    unsigned long entryhi, unsigned long pagemask)
 {
        unsigned long flags;
        unsigned long wired;
index f87c1640abb5cd13894c8c2d953c3b4b6e73b218..b648b487fd6674b2cdcedb1420e1d5759decba92 100644 (file)
@@ -4,6 +4,11 @@
 cflags-$(CONFIG_NLM_COMMON)  += -I$(srctree)/arch/mips/include/asm/mach-netlogic
 cflags-$(CONFIG_NLM_COMMON)  += -I$(srctree)/arch/mips/include/asm/netlogic
 
+#
+# use mips64 if xlr is not available
+#
+cflags-$(CONFIG_NLM_XLR)       += $(call cc-option,-march=xlr,-march=mips64)
+
 #
 # NETLOGIC XLR/XLS SoC, Simulator and boards
 #
index 482802569e74919298d35c829327c0ee7d5b0350..cee25ddd0887470ebe167b5e0cb6a91d2e5d3a1e 100644 (file)
@@ -53,7 +53,7 @@ unsigned long netlogic_io_base = (unsigned long)(DEFAULT_NETLOGIC_IO_BASE);
 unsigned long nlm_common_ebase = 0x0;
 struct psb_info nlm_prom_info;
 
-static void nlm_early_serial_setup(void)
+static void __init nlm_early_serial_setup(void)
 {
        struct uart_port s;
        nlm_reg_t *uart_base;
@@ -101,7 +101,7 @@ void __init prom_free_prom_memory(void)
        /* Nothing yet */
 }
 
-static void build_arcs_cmdline(int *argv)
+static void __init build_arcs_cmdline(int *argv)
 {
        int i, remain, len;
        char *arg;
index d842bce5c9405a2f267bb7f3de5cfda97f8e835e..080284ded508369241077d11c8d4176ea76cc825 100644 (file)
@@ -158,6 +158,10 @@ void __init nlm_smp_setup(void)
 
        num_cpus = 1;
        for (i = 0; i < NR_CPUS; i++) {
+               /*
+                * BSP is not set in nlm_cpu_ready array, it is only for
+                * ASPs (goto see smpboot.S)
+                */
                if (nlm_cpu_ready[i]) {
                        cpu_set(i, phys_cpu_present_map);
                        __cpu_number_map[i] = num_cpus;
@@ -191,7 +195,7 @@ struct plat_smp_ops nlm_smp_ops = {
 
 unsigned long secondary_entry_point;
 
-int nlm_wakeup_secondary_cpus(u32 wakeup_mask)
+int __cpuinit nlm_wakeup_secondary_cpus(u32 wakeup_mask)
 {
        unsigned int tid, pid, ipi, i, boot_cpu;
        void *reset_vec;
index b8e074402c990916aebc933568e3f80b633cf52e..8cb7889ce0cca112bef5b2d9ea06ece68aae0689 100644 (file)
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/init.h>
+
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 
-
-/* Don't jump to linux function from Bootloader stack. Change it
- * here. Kernel might allocate bootloader memory before all the CPUs are
- * brought up (eg: Inode cache region) and we better don't overwrite this
- * memory
+/*
+ * Early code for secondary CPUs. This will get them out of the bootloader
+ * code and into linux. Needed because the bootloader area will be taken
+ * and initialized by linux.
  */
+       __CPUINIT
 NESTED(prom_pre_boot_secondary_cpus, 16, sp)
        .set    mips64
        mfc0    t0, $15, 1      # read ebase
@@ -73,7 +75,11 @@ NESTED(prom_pre_boot_secondary_cpus, 16, sp)
        jr      t0
        nop
 END(prom_pre_boot_secondary_cpus)
+       __FINIT
 
+/*
+ * NMI code, used for CPU wakeup, copied to reset entry
+ */
 NESTED(nlm_boot_smp_nmi, 0, sp)
        .set push
        .set noat
index 4df879937446c16d252e9d3a9d00922f5e4c15f3..bb82cbdbc62a8329a887dd275d56043a2f043b1e 100644 (file)
@@ -18,14 +18,13 @@ obj-$(CONFIG_PCI_TX4927)    += ops-tx4927.o
 obj-$(CONFIG_BCM47XX)          += pci-bcm47xx.o
 obj-$(CONFIG_BCM63XX)          += pci-bcm63xx.o fixup-bcm63xx.o \
                                        ops-bcm63xx.o
+obj-$(CONFIG_MIPS_ALCHEMY)     += pci-alchemy.o
 
 #
 # These are still pretty much in the old state, watch, go blind.
 #
 obj-$(CONFIG_LASAT)            += pci-lasat.o
 obj-$(CONFIG_MIPS_COBALT)      += fixup-cobalt.o
-obj-$(CONFIG_SOC_AU1500)       += fixup-au1000.o ops-au1000.o
-obj-$(CONFIG_SOC_AU1550)       += fixup-au1000.o ops-au1000.o
 obj-$(CONFIG_SOC_PNX8550)      += fixup-pnx8550.o ops-pnx8550.o
 obj-$(CONFIG_LEMOTE_FULOONG2E) += fixup-fuloong2e.o ops-loongson2.o
 obj-$(CONFIG_LEMOTE_MACH2F)    += fixup-lemote2f.o ops-loongson2.o
diff --git a/arch/mips/pci/fixup-au1000.c b/arch/mips/pci/fixup-au1000.c
deleted file mode 100644 (file)
index e2ddfc4..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *     Board specific PCI fixups.
- *
- * Copyright 2001-2003, 2008 MontaVista Software Inc.
- * Author: MontaVista Software, Inc. <source@mvista.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/pci.h>
-#include <linux/init.h>
-
-extern char irq_tab_alchemy[][5];
-
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-       return irq_tab_alchemy[slot][pin];
-}
-
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
-       return 0;
-}
diff --git a/arch/mips/pci/ops-au1000.c b/arch/mips/pci/ops-au1000.c
deleted file mode 100644 (file)
index 9a57c5a..0000000
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *     Alchemy/AMD Au1xx0 PCI support.
- *
- * Copyright 2001-2003, 2007-2008 MontaVista Software Inc.
- * Author: MontaVista Software, Inc. <source@mvista.com>
- *
- *  Support for all devices (greater than 16) added by David Gathright.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-
-#include <asm/mach-au1x00/au1000.h>
-
-#undef DEBUG
-#ifdef DEBUG
-#define DBG(x...) printk(KERN_DEBUG x)
-#else
-#define DBG(x...)
-#endif
-
-#define PCI_ACCESS_READ  0
-#define PCI_ACCESS_WRITE 1
-
-int (*board_pci_idsel)(unsigned int devsel, int assert);
-
-void mod_wired_entry(int entry, unsigned long entrylo0,
-               unsigned long entrylo1, unsigned long entryhi,
-               unsigned long pagemask)
-{
-       unsigned long old_pagemask;
-       unsigned long old_ctx;
-
-       /* Save old context and create impossible VPN2 value */
-       old_ctx = read_c0_entryhi() & 0xff;
-       old_pagemask = read_c0_pagemask();
-       write_c0_index(entry);
-       write_c0_pagemask(pagemask);
-       write_c0_entryhi(entryhi);
-       write_c0_entrylo0(entrylo0);
-       write_c0_entrylo1(entrylo1);
-       tlb_write_indexed();
-       write_c0_entryhi(old_ctx);
-       write_c0_pagemask(old_pagemask);
-}
-
-static struct vm_struct *pci_cfg_vm;
-static int pci_cfg_wired_entry;
-static unsigned long last_entryLo0, last_entryLo1;
-
-/*
- * We can't ioremap the entire pci config space because it's too large.
- * Nor can we call ioremap dynamically because some device drivers use
- * the PCI config routines from within interrupt handlers and that
- * becomes a problem in get_vm_area().  We use one wired TLB to handle
- * all config accesses for all busses.
- */
-void __init au1x_pci_cfg_init(void)
-{
-       /* Reserve a wired entry for PCI config accesses */
-       pci_cfg_vm = get_vm_area(0x2000, VM_IOREMAP);
-       if (!pci_cfg_vm)
-               panic(KERN_ERR "PCI unable to get vm area\n");
-       pci_cfg_wired_entry = read_c0_wired();
-       add_wired_entry(0, 0, (unsigned long)pci_cfg_vm->addr, PM_4K);
-       last_entryLo0 = last_entryLo1 = 0xffffffff;
-}
-
-static int config_access(unsigned char access_type, struct pci_bus *bus,
-                        unsigned int dev_fn, unsigned char where, u32 *data)
-{
-#if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
-       unsigned int device = PCI_SLOT(dev_fn);
-       unsigned int function = PCI_FUNC(dev_fn);
-       unsigned long offset, status;
-       unsigned long cfg_base;
-       unsigned long flags;
-       int error = PCIBIOS_SUCCESSFUL;
-       unsigned long entryLo0, entryLo1;
-
-       if (device > 19) {
-               *data = 0xffffffff;
-               return -1;
-       }
-
-       local_irq_save(flags);
-       au_writel(((0x2000 << 16) | (au_readl(Au1500_PCI_STATCMD) & 0xffff)),
-                       Au1500_PCI_STATCMD);
-       au_sync_udelay(1);
-
-       /*
-        * Allow board vendors to implement their own off-chip IDSEL.
-        * If it doesn't succeed, may as well bail out at this point.
-        */
-       if (board_pci_idsel && board_pci_idsel(device, 1) == 0) {
-               *data = 0xffffffff;
-               local_irq_restore(flags);
-               return -1;
-       }
-
-       /* Setup the config window */
-       if (bus->number == 0)
-               cfg_base = (1 << device) << 11;
-       else
-               cfg_base = 0x80000000 | (bus->number << 16) | (device << 11);
-
-       /* Setup the lower bits of the 36-bit address */
-       offset = (function << 8) | (where & ~0x3);
-       /* Pick up any address that falls below the page mask */
-       offset |= cfg_base & ~PAGE_MASK;
-
-       /* Page boundary */
-       cfg_base = cfg_base & PAGE_MASK;
-
-       /*
-        * To improve performance, if the current device is the same as
-        * the last device accessed, we don't touch the TLB.
-        */
-       entryLo0 = (6 << 26) | (cfg_base >> 6) | (2 << 3) | 7;
-       entryLo1 = (6 << 26) | (cfg_base >> 6) | (0x1000 >> 6) | (2 << 3) | 7;
-       if ((entryLo0 != last_entryLo0) || (entryLo1 != last_entryLo1)) {
-               mod_wired_entry(pci_cfg_wired_entry, entryLo0, entryLo1,
-                               (unsigned long)pci_cfg_vm->addr, PM_4K);
-               last_entryLo0 = entryLo0;
-               last_entryLo1 = entryLo1;
-       }
-
-       if (access_type == PCI_ACCESS_WRITE)
-               au_writel(*data, (int)(pci_cfg_vm->addr + offset));
-       else
-               *data = au_readl((int)(pci_cfg_vm->addr + offset));
-
-       au_sync_udelay(2);
-
-       DBG("cfg_access %d bus->number %u dev %u at %x *data %x conf %lx\n",
-           access_type, bus->number, device, where, *data, offset);
-
-       /* Check master abort */
-       status = au_readl(Au1500_PCI_STATCMD);
-
-       if (status & (1 << 29)) {
-               *data = 0xffffffff;
-               error = -1;
-               DBG("Au1x Master Abort\n");
-       } else if ((status >> 28) & 0xf) {
-               DBG("PCI ERR detected: device %u, status %lx\n",
-                   device, (status >> 28) & 0xf);
-
-               /* Clear errors */
-               au_writel(status & 0xf000ffff, Au1500_PCI_STATCMD);
-
-               *data = 0xffffffff;
-               error = -1;
-       }
-
-       /* Take away the IDSEL. */
-       if (board_pci_idsel)
-               (void)board_pci_idsel(device, 0);
-
-       local_irq_restore(flags);
-       return error;
-#endif
-}
-
-static int read_config_byte(struct pci_bus *bus, unsigned int devfn,
-                           int where,  u8 *val)
-{
-       u32 data;
-       int ret;
-
-       ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
-       if (where & 1)
-               data >>= 8;
-       if (where & 2)
-               data >>= 16;
-       *val = data & 0xff;
-       return ret;
-}
-
-static int read_config_word(struct pci_bus *bus, unsigned int devfn,
-                           int where, u16 *val)
-{
-       u32 data;
-       int ret;
-
-       ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
-       if (where & 2)
-               data >>= 16;
-       *val = data & 0xffff;
-       return ret;
-}
-
-static int read_config_dword(struct pci_bus *bus, unsigned int devfn,
-                            int where, u32 *val)
-{
-       int ret;
-
-       ret = config_access(PCI_ACCESS_READ, bus, devfn, where, val);
-       return ret;
-}
-
-static int write_config_byte(struct pci_bus *bus, unsigned int devfn,
-                            int where, u8 val)
-{
-       u32 data = 0;
-
-       if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
-               return -1;
-
-       data = (data & ~(0xff << ((where & 3) << 3))) |
-              (val << ((where & 3) << 3));
-
-       if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
-               return -1;
-
-       return PCIBIOS_SUCCESSFUL;
-}
-
-static int write_config_word(struct pci_bus *bus, unsigned int devfn,
-                            int where, u16 val)
-{
-       u32 data = 0;
-
-       if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
-               return -1;
-
-       data = (data & ~(0xffff << ((where & 3) << 3))) |
-              (val << ((where & 3) << 3));
-
-       if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
-               return -1;
-
-       return PCIBIOS_SUCCESSFUL;
-}
-
-static int write_config_dword(struct pci_bus *bus, unsigned int devfn,
-                             int where, u32 val)
-{
-       if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &val))
-               return -1;
-
-       return PCIBIOS_SUCCESSFUL;
-}
-
-static int config_read(struct pci_bus *bus, unsigned int devfn,
-                      int where, int size, u32 *val)
-{
-       switch (size) {
-       case 1: {
-                       u8 _val;
-                       int rc = read_config_byte(bus, devfn, where, &_val);
-
-                       *val = _val;
-                       return rc;
-               }
-       case 2: {
-                       u16 _val;
-                       int rc = read_config_word(bus, devfn, where, &_val);
-
-                       *val = _val;
-                       return rc;
-               }
-       default:
-               return read_config_dword(bus, devfn, where, val);
-       }
-}
-
-static int config_write(struct pci_bus *bus, unsigned int devfn,
-                       int where, int size, u32 val)
-{
-       switch (size) {
-       case 1:
-               return write_config_byte(bus, devfn, where, (u8) val);
-       case 2:
-               return write_config_word(bus, devfn, where, (u16) val);
-       default:
-               return write_config_dword(bus, devfn, where, val);
-       }
-}
-
-struct pci_ops au1x_pci_ops = {
-       config_read,
-       config_write
-};
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
new file mode 100644 (file)
index 0000000..4ee5710
--- /dev/null
@@ -0,0 +1,516 @@
+/*
+ * Alchemy PCI host mode support.
+ *
+ * Copyright 2001-2003, 2007-2008 MontaVista Software Inc.
+ * Author: MontaVista Software, Inc. <source@mvista.com>
+ *
+ * Support for all devices (greater than 16) added by David Gathright.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+
+#include <asm/mach-au1x00/au1000.h>
+
+#ifdef CONFIG_DEBUG_PCI
+#define DBG(x...) printk(KERN_DEBUG x)
+#else
+#define DBG(x...) do {} while (0)
+#endif
+
+#define PCI_ACCESS_READ                0
+#define PCI_ACCESS_WRITE       1
+
+struct alchemy_pci_context {
+       struct pci_controller alchemy_pci_ctrl; /* leave as first member! */
+       void __iomem *regs;                     /* ctrl base */
+       /* tools for wired entry for config space access */
+       unsigned long last_elo0;
+       unsigned long last_elo1;
+       int wired_entry;
+       struct vm_struct *pci_cfg_vm;
+
+       unsigned long pm[12];
+
+       int (*board_map_irq)(const struct pci_dev *d, u8 slot, u8 pin);
+       int (*board_pci_idsel)(unsigned int devsel, int assert);
+};
+
+/* IO/MEM resources for PCI. Keep the memres in sync with __fixup_bigphys_addr
+ * in arch/mips/alchemy/common/setup.c
+ */
+static struct resource alchemy_pci_def_memres = {
+       .start  = ALCHEMY_PCI_MEMWIN_START,
+       .end    = ALCHEMY_PCI_MEMWIN_END,
+       .name   = "PCI memory space",
+       .flags  = IORESOURCE_MEM
+};
+
+static struct resource alchemy_pci_def_iores = {
+       .start  = ALCHEMY_PCI_IOWIN_START,
+       .end    = ALCHEMY_PCI_IOWIN_END,
+       .name   = "PCI IO space",
+       .flags  = IORESOURCE_IO
+};
+
+static void mod_wired_entry(int entry, unsigned long entrylo0,
+               unsigned long entrylo1, unsigned long entryhi,
+               unsigned long pagemask)
+{
+       unsigned long old_pagemask;
+       unsigned long old_ctx;
+
+       /* Save old context and create impossible VPN2 value */
+       old_ctx = read_c0_entryhi() & 0xff;
+       old_pagemask = read_c0_pagemask();
+       write_c0_index(entry);
+       write_c0_pagemask(pagemask);
+       write_c0_entryhi(entryhi);
+       write_c0_entrylo0(entrylo0);
+       write_c0_entrylo1(entrylo1);
+       tlb_write_indexed();
+       write_c0_entryhi(old_ctx);
+       write_c0_pagemask(old_pagemask);
+}
+
+static void alchemy_pci_wired_entry(struct alchemy_pci_context *ctx)
+{
+       ctx->wired_entry = read_c0_wired();
+       add_wired_entry(0, 0, (unsigned long)ctx->pci_cfg_vm->addr, PM_4K);
+       ctx->last_elo0 = ctx->last_elo1 = ~0;
+}
+
+static int config_access(unsigned char access_type, struct pci_bus *bus,
+                        unsigned int dev_fn, unsigned char where, u32 *data)
+{
+       struct alchemy_pci_context *ctx = bus->sysdata;
+       unsigned int device = PCI_SLOT(dev_fn);
+       unsigned int function = PCI_FUNC(dev_fn);
+       unsigned long offset, status, cfg_base, flags, entryLo0, entryLo1, r;
+       int error = PCIBIOS_SUCCESSFUL;
+
+       if (device > 19) {
+               *data = 0xffffffff;
+               return -1;
+       }
+
+       /* YAMON on all db1xxx boards wipes the TLB and writes zero to C0_wired
+        * on resume, clearing our wired entry.  Unfortunately the ->resume()
+        * callback is called way way way too late (and ->suspend() too early)
+        * to have them destroy and recreate it.  Instead just test if c0_wired
+        * is now lower than the index we retrieved before suspending and then
+        * recreate the entry if necessary.  Of course this is totally bonkers
+        * and breaks as soon as someone else adds another wired entry somewhere
+        * else.  Anyone have any ideas how to handle this better?
+        */
+       if (unlikely(read_c0_wired() < ctx->wired_entry))
+               alchemy_pci_wired_entry(ctx);
+
+       local_irq_save(flags);
+       r = __raw_readl(ctx->regs + PCI_REG_STATCMD) & 0x0000ffff;
+       r |= PCI_STATCMD_STATUS(0x2000);
+       __raw_writel(r, ctx->regs + PCI_REG_STATCMD);
+       wmb();
+
+       /* Allow board vendors to implement their own off-chip IDSEL.
+        * If it doesn't succeed, may as well bail out at this point.
+        */
+       if (ctx->board_pci_idsel(device, 1) == 0) {
+               *data = 0xffffffff;
+               local_irq_restore(flags);
+               return -1;
+       }
+
+       /* Setup the config window */
+       if (bus->number == 0)
+               cfg_base = (1 << device) << 11;
+       else
+               cfg_base = 0x80000000 | (bus->number << 16) | (device << 11);
+
+       /* Setup the lower bits of the 36-bit address */
+       offset = (function << 8) | (where & ~0x3);
+       /* Pick up any address that falls below the page mask */
+       offset |= cfg_base & ~PAGE_MASK;
+
+       /* Page boundary */
+       cfg_base = cfg_base & PAGE_MASK;
+
+       /* To improve performance, if the current device is the same as
+        * the last device accessed, we don't touch the TLB.
+        */
+       entryLo0 = (6 << 26) | (cfg_base >> 6) | (2 << 3) | 7;
+       entryLo1 = (6 << 26) | (cfg_base >> 6) | (0x1000 >> 6) | (2 << 3) | 7;
+       if ((entryLo0 != ctx->last_elo0) || (entryLo1 != ctx->last_elo1)) {
+               mod_wired_entry(ctx->wired_entry, entryLo0, entryLo1,
+                               (unsigned long)ctx->pci_cfg_vm->addr, PM_4K);
+               ctx->last_elo0 = entryLo0;
+               ctx->last_elo1 = entryLo1;
+       }
+
+       if (access_type == PCI_ACCESS_WRITE)
+               __raw_writel(*data, ctx->pci_cfg_vm->addr + offset);
+       else
+               *data = __raw_readl(ctx->pci_cfg_vm->addr + offset);
+       wmb();
+
+       DBG("alchemy-pci: cfg access %d bus %u dev %u at %x dat %x conf %lx\n",
+           access_type, bus->number, device, where, *data, offset);
+
+       /* check for errors, master abort */
+       status = __raw_readl(ctx->regs + PCI_REG_STATCMD);
+       if (status & (1 << 29)) {
+               *data = 0xffffffff;
+               error = -1;
+               DBG("alchemy-pci: master abort on cfg access %d bus %d dev %d",
+                   access_type, bus->number, device);
+       } else if ((status >> 28) & 0xf) {
+               DBG("alchemy-pci: PCI ERR detected: dev %d, status %lx\n",
+                   device, (status >> 28) & 0xf);
+
+               /* clear errors */
+               __raw_writel(status & 0xf000ffff, ctx->regs + PCI_REG_STATCMD);
+
+               *data = 0xffffffff;
+               error = -1;
+       }
+
+       /* Take away the IDSEL. */
+       (void)ctx->board_pci_idsel(device, 0);
+
+       local_irq_restore(flags);
+       return error;
+}
+
+static int read_config_byte(struct pci_bus *bus, unsigned int devfn,
+                           int where,  u8 *val)
+{
+       u32 data;
+       int ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
+
+       if (where & 1)
+               data >>= 8;
+       if (where & 2)
+               data >>= 16;
+       *val = data & 0xff;
+       return ret;
+}
+
+static int read_config_word(struct pci_bus *bus, unsigned int devfn,
+                           int where, u16 *val)
+{
+       u32 data;
+       int ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
+
+       if (where & 2)
+               data >>= 16;
+       *val = data & 0xffff;
+       return ret;
+}
+
+static int read_config_dword(struct pci_bus *bus, unsigned int devfn,
+                            int where, u32 *val)
+{
+       return config_access(PCI_ACCESS_READ, bus, devfn, where, val);
+}
+
+static int write_config_byte(struct pci_bus *bus, unsigned int devfn,
+                            int where, u8 val)
+{
+       u32 data = 0;
+
+       if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+               return -1;
+
+       data = (data & ~(0xff << ((where & 3) << 3))) |
+              (val << ((where & 3) << 3));
+
+       if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+               return -1;
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int write_config_word(struct pci_bus *bus, unsigned int devfn,
+                            int where, u16 val)
+{
+       u32 data = 0;
+
+       if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+               return -1;
+
+       data = (data & ~(0xffff << ((where & 3) << 3))) |
+              (val << ((where & 3) << 3));
+
+       if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+               return -1;
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int write_config_dword(struct pci_bus *bus, unsigned int devfn,
+                             int where, u32 val)
+{
+       return config_access(PCI_ACCESS_WRITE, bus, devfn, where, &val);
+}
+
+static int alchemy_pci_read(struct pci_bus *bus, unsigned int devfn,
+                      int where, int size, u32 *val)
+{
+       switch (size) {
+       case 1: {
+                       u8 _val;
+                       int rc = read_config_byte(bus, devfn, where, &_val);
+
+                       *val = _val;
+                       return rc;
+               }
+       case 2: {
+                       u16 _val;
+                       int rc = read_config_word(bus, devfn, where, &_val);
+
+                       *val = _val;
+                       return rc;
+               }
+       default:
+               return read_config_dword(bus, devfn, where, val);
+       }
+}
+
+static int alchemy_pci_write(struct pci_bus *bus, unsigned int devfn,
+                            int where, int size, u32 val)
+{
+       switch (size) {
+       case 1:
+               return write_config_byte(bus, devfn, where, (u8) val);
+       case 2:
+               return write_config_word(bus, devfn, where, (u16) val);
+       default:
+               return write_config_dword(bus, devfn, where, val);
+       }
+}
+
+static struct pci_ops alchemy_pci_ops = {
+       .read   = alchemy_pci_read,
+       .write  = alchemy_pci_write,
+};
+
+static int alchemy_pci_def_idsel(unsigned int devsel, int assert)
+{
+       return 1;       /* success */
+}
+
+static int __devinit alchemy_pci_probe(struct platform_device *pdev)
+{
+       struct alchemy_pci_platdata *pd = pdev->dev.platform_data;
+       struct alchemy_pci_context *ctx;
+       void __iomem *virt_io;
+       unsigned long val;
+       struct resource *r;
+       int ret;
+
+       /* need at least PCI IRQ mapping table */
+       if (!pd) {
+               dev_err(&pdev->dev, "need platform data for PCI setup\n");
+               ret = -ENODEV;
+               goto out;
+       }
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx) {
+               dev_err(&pdev->dev, "no memory for pcictl context\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(&pdev->dev, "no  pcictl ctrl regs resource\n");
+               ret = -ENODEV;
+               goto out1;
+       }
+
+       if (!request_mem_region(r->start, resource_size(r), pdev->name)) {
+               dev_err(&pdev->dev, "cannot claim pci regs\n");
+               ret = -ENODEV;
+               goto out1;
+       }
+
+       ctx->regs = ioremap_nocache(r->start, resource_size(r));
+       if (!ctx->regs) {
+               dev_err(&pdev->dev, "cannot map pci regs\n");
+               ret = -ENODEV;
+               goto out2;
+       }
+
+       /* map parts of the PCI IO area */
+       /* REVISIT: if this changes with a newer variant (doubt it) make this
+        * a platform resource.
+        */
+       virt_io = ioremap(AU1500_PCI_IO_PHYS_ADDR, 0x00100000);
+       if (!virt_io) {
+               dev_err(&pdev->dev, "cannot remap pci io space\n");
+               ret = -ENODEV;
+               goto out3;
+       }
+       ctx->alchemy_pci_ctrl.io_map_base = (unsigned long)virt_io;
+
+#ifdef CONFIG_DMA_NONCOHERENT
+       /* Au1500 revisions older than AD have borked coherent PCI */
+       if ((alchemy_get_cputype() == ALCHEMY_CPU_AU1500) &&
+           (read_c0_prid() < 0x01030202)) {
+               val = __raw_readl(ctx->regs + PCI_REG_CONFIG);
+               val |= PCI_CONFIG_NC;
+               __raw_writel(val, ctx->regs + PCI_REG_CONFIG);
+               wmb();
+               dev_info(&pdev->dev, "non-coherent PCI on Au1500 AA/AB/AC\n");
+       }
+#endif
+
+       if (pd->board_map_irq)
+               ctx->board_map_irq = pd->board_map_irq;
+
+       if (pd->board_pci_idsel)
+               ctx->board_pci_idsel = pd->board_pci_idsel;
+       else
+               ctx->board_pci_idsel = alchemy_pci_def_idsel;
+
+       /* fill in relevant pci_controller members */
+       ctx->alchemy_pci_ctrl.pci_ops = &alchemy_pci_ops;
+       ctx->alchemy_pci_ctrl.mem_resource = &alchemy_pci_def_memres;
+       ctx->alchemy_pci_ctrl.io_resource = &alchemy_pci_def_iores;
+
+       /* we can't ioremap the entire pci config space because it's too large,
+        * nor can we dynamically ioremap it because some drivers use the
+        * PCI config routines from within atomic contex and that becomes a
+        * problem in get_vm_area().  Instead we use one wired TLB entry to
+        * handle all config accesses for all busses.
+        */
+       ctx->pci_cfg_vm = get_vm_area(0x2000, VM_IOREMAP);
+       if (!ctx->pci_cfg_vm) {
+               dev_err(&pdev->dev, "unable to get vm area\n");
+               ret = -ENOMEM;
+               goto out4;
+       }
+       ctx->wired_entry = 8192;        /* impossibly high value */
+
+       set_io_port_base((unsigned long)ctx->alchemy_pci_ctrl.io_map_base);
+
+       /* board may want to modify bits in the config register, do it now */
+       val = __raw_readl(ctx->regs + PCI_REG_CONFIG);
+       val &= ~pd->pci_cfg_clr;
+       val |= pd->pci_cfg_set;
+       val &= ~PCI_CONFIG_PD;          /* clear disable bit */
+       __raw_writel(val, ctx->regs + PCI_REG_CONFIG);
+       wmb();
+
+       platform_set_drvdata(pdev, ctx);
+       register_pci_controller(&ctx->alchemy_pci_ctrl);
+
+       return 0;
+
+out4:
+       iounmap(virt_io);
+out3:
+       iounmap(ctx->regs);
+out2:
+       release_mem_region(r->start, resource_size(r));
+out1:
+       kfree(ctx);
+out:
+       return ret;
+}
+
+
+#ifdef CONFIG_PM
+/* save PCI controller register contents. */
+static int alchemy_pci_suspend(struct device *dev)
+{
+       struct alchemy_pci_context *ctx = dev_get_drvdata(dev);
+
+       ctx->pm[0]  = __raw_readl(ctx->regs + PCI_REG_CMEM);
+       ctx->pm[1]  = __raw_readl(ctx->regs + PCI_REG_CONFIG) & 0x0009ffff;
+       ctx->pm[2]  = __raw_readl(ctx->regs + PCI_REG_B2BMASK_CCH);
+       ctx->pm[3]  = __raw_readl(ctx->regs + PCI_REG_B2BBASE0_VID);
+       ctx->pm[4]  = __raw_readl(ctx->regs + PCI_REG_B2BBASE1_SID);
+       ctx->pm[5]  = __raw_readl(ctx->regs + PCI_REG_MWMASK_DEV);
+       ctx->pm[6]  = __raw_readl(ctx->regs + PCI_REG_MWBASE_REV_CCL);
+       ctx->pm[7]  = __raw_readl(ctx->regs + PCI_REG_ID);
+       ctx->pm[8]  = __raw_readl(ctx->regs + PCI_REG_CLASSREV);
+       ctx->pm[9]  = __raw_readl(ctx->regs + PCI_REG_PARAM);
+       ctx->pm[10] = __raw_readl(ctx->regs + PCI_REG_MBAR);
+       ctx->pm[11] = __raw_readl(ctx->regs + PCI_REG_TIMEOUT);
+
+       return 0;
+}
+
+static int alchemy_pci_resume(struct device *dev)
+{
+       struct alchemy_pci_context *ctx = dev_get_drvdata(dev);
+
+       __raw_writel(ctx->pm[0],  ctx->regs + PCI_REG_CMEM);
+       __raw_writel(ctx->pm[2],  ctx->regs + PCI_REG_B2BMASK_CCH);
+       __raw_writel(ctx->pm[3],  ctx->regs + PCI_REG_B2BBASE0_VID);
+       __raw_writel(ctx->pm[4],  ctx->regs + PCI_REG_B2BBASE1_SID);
+       __raw_writel(ctx->pm[5],  ctx->regs + PCI_REG_MWMASK_DEV);
+       __raw_writel(ctx->pm[6],  ctx->regs + PCI_REG_MWBASE_REV_CCL);
+       __raw_writel(ctx->pm[7],  ctx->regs + PCI_REG_ID);
+       __raw_writel(ctx->pm[8],  ctx->regs + PCI_REG_CLASSREV);
+       __raw_writel(ctx->pm[9],  ctx->regs + PCI_REG_PARAM);
+       __raw_writel(ctx->pm[10], ctx->regs + PCI_REG_MBAR);
+       __raw_writel(ctx->pm[11], ctx->regs + PCI_REG_TIMEOUT);
+       wmb();
+       __raw_writel(ctx->pm[1],  ctx->regs + PCI_REG_CONFIG);
+       wmb();
+
+       return 0;
+}
+
+static const struct dev_pm_ops alchemy_pci_pmops = {
+       .suspend        = alchemy_pci_suspend,
+       .resume         = alchemy_pci_resume,
+};
+
+#define ALCHEMY_PCICTL_PM      (&alchemy_pci_pmops)
+
+#else
+#define ALCHEMY_PCICTL_PM      NULL
+#endif
+
+static struct platform_driver alchemy_pcictl_driver = {
+       .probe          = alchemy_pci_probe,
+       .driver = {
+               .name   = "alchemy-pci",
+               .owner  = THIS_MODULE,
+               .pm     = ALCHEMY_PCICTL_PM,
+       },
+};
+
+static int __init alchemy_pci_init(void)
+{
+       /* Au1500/Au1550 have PCI */
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1500:
+       case ALCHEMY_CPU_AU1550:
+               return platform_driver_register(&alchemy_pcictl_driver);
+       }
+       return 0;
+}
+arch_initcall(alchemy_pci_init);
+
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+       struct alchemy_pci_context *ctx = dev->sysdata;
+       if (ctx && ctx->board_map_irq)
+               return ctx->board_map_irq(dev, slot, pin);
+       return -1;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+       return 0;
+}
index 0abfbe04ffc9795213e4109b51c8bdffae252d9d..655308a4e1cda2c02c6d63f9202531cc7ffaabd8 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/cacheflush.h>
 #include <asm/r4kcache.h>
 #include <asm/reboot.h>
+#include <asm/smp-ops.h>
 #include <asm/time.h>
 
 #include <msp_prom.h>
index 434d7b1a8c6a7abf4c492a14b975111a21b1a4ab..b7f1d9c4a8a3c7d588b270d6bd6e1bad0590f6b2 100644 (file)
@@ -65,15 +65,11 @@ static unsigned char readb_outer_space(unsigned long long phys)
 
        __asm__ __volatile__ (
        "       .set    mips3           \n"
-       "       .set    push            \n"
-       "       .set    noreorder       \n"
-       "       .set    nomacro         \n"
        "       ld      %0, %1          \n"
-       "       .set    pop             \n"
        "       lbu     %0, (%0)        \n"
        "       .set    mips0           \n"
        : "=r" (res)
-       : "R" (vaddr));
+       : "m" (vaddr));
 
        write_c0_status(sr);
        ssnop_4();
@@ -93,15 +89,11 @@ static void writeb_outer_space(unsigned long long phys, unsigned char c)
 
        __asm__ __volatile__ (
        "       .set    mips3           \n"
-       "       .set    push            \n"
-       "       .set    noreorder       \n"
-       "       .set    nomacro         \n"
        "       ld      %0, %1          \n"
-       "       .set    pop             \n"
        "       sb      %2, (%0)        \n"
        "       .set    mips0           \n"
        : "=&r" (tmp)
-       : "R" (vaddr), "r" (c));
+       : "m" (vaddr), "r" (c));
 
        write_c0_status(sr);
        ssnop_4();
index 32f70097c3c746e1584537e7fb8c451a5f80346c..49639e8120d86d527cca879cb553e2e063426ae3 100644 (file)
@@ -30,7 +30,7 @@ typedef struct
 }t_env_var;
 
 
-char * prom_getcmdline(void)
+char * __init prom_getcmdline(void)
 {
        return &(arcs_cmdline[0]);
 }
index b18b04e48577550f6b42c43d393d06c91741285b..f90dce315e0477b960cd68e01002794612bbb101 100644 (file)
@@ -337,12 +337,12 @@ static struct irq_chip bridge_irq_type = {
        .irq_unmask     = enable_bridge_irq,
 };
 
-void __devinit register_bridge_irq(unsigned int irq)
+void register_bridge_irq(unsigned int irq)
 {
        irq_set_chip_and_handler(irq, &bridge_irq_type, handle_level_irq);
 }
 
-int __devinit request_bridge_irq(struct bridge_controller *bc)
+int request_bridge_irq(struct bridge_controller *bc)
 {
        int irq = allocate_irqno();
        int swlevel, cpu;
index fec13200868f8e6ab40fd834abe7ed4bf6dc89f8..d7efdbf640c7d5e39cd8c3c55f79aa0b8ca64f10 100644 (file)
 
 #ifdef __HAVE_ARCH_PTE_SPECIAL
 
-static inline void get_huge_page_tail(struct page *page)
-{
-       /*
-        * __split_huge_page_refcount() cannot run
-        * from under us.
-        */
-       VM_BUG_ON(atomic_read(&page->_count) < 0);
-       atomic_inc(&page->_count);
-}
-
 /*
  * The performance critical leaf functions are made noinline otherwise gcc
  * inlines everything into a single function which results in too much
@@ -57,8 +47,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                        put_page(page);
                        return 0;
                }
-               if (PageTail(page))
-                       get_huge_page_tail(page);
                pages[*nr] = page;
                (*nr)++;
 
index 0b9a5c1901b9e52a89880f684e9507acb9cc9e09..da5eb388570210ca9aa70ba0de41444659c49d02 100644 (file)
@@ -390,7 +390,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
 {
        unsigned long mask;
        unsigned long pte_end;
-       struct page *head, *page;
+       struct page *head, *page, *tail;
        pte_t pte;
        int refs;
 
@@ -413,6 +413,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
        head = pte_page(pte);
 
        page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
+       tail = page;
        do {
                VM_BUG_ON(compound_head(page) != head);
                pages[*nr] = page;
@@ -428,10 +429,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add
 
        if (unlikely(pte_val(pte) != pte_val(*ptep))) {
                /* Could be optimized better */
-               while (*nr) {
-                       put_page(page);
-                       (*nr)--;
-               }
+               *nr -= refs;
+               while (refs--)
+                       put_page(head);
+               return 0;
+       }
+
+       /*
+        * Any tail page need their mapcount reference taken before we
+        * return.
+        */
+       while (refs--) {
+               if (PageTail(tail))
+                       get_huge_page_tail(tail);
+               tail++;
        }
 
        return 1;
index c65f75aa7ff7be40f4f16bda3fcfd39d3eb45fd5..22ffccd8bef53b69a41fcfcfdf8fb3ed46b3d760 100644 (file)
@@ -1608,6 +1608,7 @@ int fsl_rio_setup(struct platform_device *dev)
        return 0;
 err:
        iounmap(priv->regs_win);
+       release_resource(&port->iores);
 err_res:
        kfree(priv);
 err_priv:
index 6fe874fc5f8efe4456f942da01eddc6b2c9b1d50..481f4f76f6648776a8b82a7d2b1dccab479a47d9 100644 (file)
@@ -108,9 +108,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
                ret->i_gid = hypfs_info->gid;
                ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
                if (mode & S_IFDIR)
-                       ret->i_nlink = 2;
-               else
-                       ret->i_nlink = 1;
+                       set_nlink(ret, 2);
        }
        return ret;
 }
@@ -361,7 +359,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
        } else if (mode & S_IFDIR) {
                inode->i_op = &simple_dir_inode_operations;
                inode->i_fop = &simple_dir_operations;
-               parent->d_inode->i_nlink++;
+               inc_nlink(parent->d_inode);
        } else
                BUG();
        inode->i_private = data;
index 45b405ca25673a416ca33bfdd5b20a81c4876264..65cb06e2af4eaa0a7b18b2e3b0f5b4b1af8be9d8 100644 (file)
@@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
        unsigned long mask, result;
-       struct page *head, *page;
+       struct page *head, *page, *tail;
        int refs;
 
        result = write ? 0 : _SEGMENT_ENTRY_RO;
@@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
        refs = 0;
        head = pmd_page(pmd);
        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+       tail = page;
        do {
                VM_BUG_ON(compound_head(page) != head);
                pages[*nr] = page;
@@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
                *nr -= refs;
                while (refs--)
                        put_page(head);
+               return 0;
+       }
+
+       /*
+        * Any tail page need their mapcount reference taken before we
+        * return.
+        */
+       while (refs--) {
+               if (PageTail(tail))
+                       get_huge_page_tail(tail);
+               tail++;
        }
 
        return 1;
index ff9177c8f6439eec4be3fe13a5bbea6da833b64f..5629e2099130aeee5c24d7c08f9705d48550e995 100644 (file)
@@ -101,10 +101,6 @@ config GENERIC_LOCKBREAK
        def_bool y
        depends on SMP && PREEMPT
 
-config SYS_SUPPORTS_PM
-       bool
-       depends on !SMP
-
 config ARCH_SUSPEND_POSSIBLE
        def_bool n
 
@@ -649,7 +645,7 @@ config CRASH_DUMP
          a specially reserved region and then later executed after
          a crash by kdump/kexec. The crash dump kernel must be compiled
          to a memory address not used by the main kernel using
-         MEMORY_START.
+         PHYSICAL_START.
 
          For more details see Documentation/kdump/kdump.txt
 
@@ -660,6 +656,17 @@ config KEXEC_JUMP
          Jump between original kernel and kexeced kernel and invoke
          code via KEXEC
 
+config PHYSICAL_START
+       hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
+       default MEMORY_START
+       ---help---
+         This gives the physical address where the kernel is loaded
+         and is ordinarily the same as MEMORY_START.
+
+         Different values are primarily used in the case of kexec on panic
+         where the fail safe kernel needs to run at a different address
+         than the panic-ed kernel.
+
 config SECCOMP
        bool "Enable seccomp to safely compute untrusted bytecode"
        depends on PROC_FS
index 99385d0b3f3b64bb08d894a91295f6639784c2de..3fc0f413777cc152ee540fb1807ed282d065c384 100644 (file)
@@ -80,6 +80,7 @@ defaultimage-$(CONFIG_SH_RSK)                 := uImage
 defaultimage-$(CONFIG_SH_URQUELL)              := uImage
 defaultimage-$(CONFIG_SH_MIGOR)                        := uImage
 defaultimage-$(CONFIG_SH_AP325RXA)             := uImage
+defaultimage-$(CONFIG_SH_SH7757LCR)            := uImage
 defaultimage-$(CONFIG_SH_7724_SOLUTION_ENGINE) := uImage
 defaultimage-$(CONFIG_SH_7206_SOLUTION_ENGINE) := vmlinux
 defaultimage-$(CONFIG_SH_7619_SOLUTION_ENGINE) := vmlinux
index 9da92ac36533ec894694ddc6aa13eaa5717cc141..b3ae9d38cbc0fa277f1d82177489f946627ce30e 100644 (file)
@@ -13,9 +13,9 @@
 #include <linux/interrupt.h>
 #include <linux/mtd/physmap.h>
 #include <linux/io.h>
+#include <linux/sh_eth.h>
 #include <asm/machvec.h>
 #include <asm/sizes.h>
-#include <asm/sh_eth.h>
 
 /* NOR Flash */
 static struct mtd_partition espt_nor_flash_partitions[] = {
index f968f17891a4f87980b359ef46bd6db7c40187e1..03820c3c93d4c32da3b9944c67394dc84b4606d0 100644 (file)
@@ -41,8 +41,7 @@ static int __init eraseconfig_init(void)
        printk("SnapGear: EraseConfig init\n");
 
        /* Setup "EraseConfig" switch on external IRQ 0 */
-       if (request_irq(irq, eraseconfig_interrupt, IRQF_DISABLED,
-                               "Erase Config", NULL))
+       if (request_irq(irq, eraseconfig_interrupt, 0, "Erase Config", NULL))
                printk("SnapGear: failed to register IRQ%d for Reset witch\n",
                                irq);
        else
index fa2a208ec6cb585db12f768fcfae41116b93a362..ec8c84c14b17219cd22c08a4d7e1dfdd9c3da0ec 100644 (file)
@@ -18,8 +18,8 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
+#include <linux/sh_eth.h>
 #include <cpu/sh7757.h>
-#include <asm/sh_eth.h>
 #include <asm/heartbeat.h>
 
 static struct resource heartbeat_resource = {
index 311bcebdbd07ab2039cb50be96e6d9d369c684af..724e8b7271f4a465a3008d06de4f65452bf3eb48 100644 (file)
@@ -46,13 +46,11 @@ static irqreturn_t cayman_interrupt_pci2(int irq, void *dev_id)
 static struct irqaction cayman_action_smsc = {
        .name           = "Cayman SMSC Mux",
        .handler        = cayman_interrupt_smsc,
-       .flags          = IRQF_DISABLED,
 };
 
 static struct irqaction cayman_action_pci2 = {
        .name           = "Cayman PCI2 Mux",
        .handler        = cayman_interrupt_pci2,
-       .flags          = IRQF_DISABLED,
 };
 
 static void enable_cayman_irq(struct irq_data *data)
index b24d69d509e7d7ccfe7e78454fca4014b5c286a4..92ddce4b34563faab3c53ac7629140dd05f089f9 100644 (file)
 #include <linux/spi/mmc_spi.h>
 #include <linux/input.h>
 #include <linux/input/sh_keysc.h>
+#include <linux/sh_eth.h>
 #include <video/sh_mobile_lcdc.h>
 #include <sound/sh_fsi.h>
 #include <media/sh_mobile_ceu.h>
 #include <media/tw9910.h>
 #include <media/mt9t112.h>
 #include <asm/heartbeat.h>
-#include <asm/sh_eth.h>
 #include <asm/clock.h>
 #include <asm/suspend.h>
 #include <cpu/sh7724.h>
@@ -248,6 +248,10 @@ static struct renesas_usbhs_platform_info usbhs_info = {
        .driver_param = {
                .buswait_bwait          = 4,
                .detection_delay        = 5,
+               .d0_tx_id = SHDMA_SLAVE_USB1D0_TX,
+               .d0_rx_id = SHDMA_SLAVE_USB1D0_RX,
+               .d1_tx_id = SHDMA_SLAVE_USB1D1_TX,
+               .d1_rx_id = SHDMA_SLAVE_USB1D1_RX,
        },
 };
 
index b49535c0ddd9f528d90eacf6fdfc82209294e1f5..865d8d6e823f89036402139b896f4465c12e85f9 100644 (file)
@@ -86,7 +86,7 @@ static int __init hp6x0_apm_init(void)
        int ret;
 
        ret = request_irq(HP680_BTN_IRQ, hp6x0_apm_interrupt,
-                         IRQF_DISABLED, MODNAME, NULL);
+                         0, MODNAME, NULL);
        if (unlikely(ret < 0)) {
                printk(KERN_ERR MODNAME ": IRQ %d request failed\n",
                       HP680_BTN_IRQ);
index d00756728bd231d577d1d65342768e1cfa237c8e..b747c0ab9264ac3ed5d1894b8c67f471c7bc852d 100644 (file)
 #include <linux/input.h>
 #include <linux/input/sh_keysc.h>
 #include <linux/usb/r8a66597.h>
+#include <linux/sh_eth.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
 #include <sound/sh_fsi.h>
 #include <asm/io.h>
 #include <asm/heartbeat.h>
-#include <asm/sh_eth.h>
 #include <asm/clock.h>
 #include <asm/suspend.h>
 #include <cpu/sh7724.h>
index f3d828f133e5adbfb7b9d0c7d0d4484c9f4a56d8..dd036f1661dbd297133c2c770eaf831ae5866abe 100644 (file)
@@ -17,8 +17,8 @@
 #include <linux/mtd/physmap.h>
 #include <linux/fb.h>
 #include <linux/io.h>
+#include <linux/sh_eth.h>
 #include <mach/sh7763rdp.h>
-#include <asm/sh_eth.h>
 #include <asm/sh7760fb.h>
 
 /* NOR Flash */
index ba515d8002457546db2e5138bbb229dbd39cb8e2..e4ea31a62c55ef8ee7a367148e4df9fe32f568a9 100644 (file)
@@ -19,6 +19,7 @@ CONFIG_MEMORY_START   ?= 0x0c000000
 CONFIG_BOOT_LINK_OFFSET        ?= 0x00800000
 CONFIG_ZERO_PAGE_OFFSET        ?= 0x00001000
 CONFIG_ENTRY_OFFSET    ?= 0x00001000
+CONFIG_PHYSICAL_START  ?= $(CONFIG_MEMORY_START)
 
 suffix-y := bin
 suffix-$(CONFIG_KERNEL_GZIP)   := gz
@@ -48,7 +49,7 @@ $(obj)/romimage/vmlinux: $(obj)/zImage FORCE
        $(Q)$(MAKE) $(build)=$(obj)/romimage $@
 
 KERNEL_MEMORY  := $(shell /bin/bash -c 'printf "0x%08x" \
-                    $$[$(CONFIG_MEMORY_START) & 0x1fffffff]')
+                    $$[$(CONFIG_PHYSICAL_START) & 0x1fffffff]')
 
 KERNEL_LOAD    := $(shell /bin/bash -c 'printf "0x%08x" \
                     $$[$(CONFIG_PAGE_OFFSET)  + \
@@ -114,4 +115,5 @@ $(obj)/uImage: $(obj)/uImage.$(suffix-y)
        @echo '  Image $@ is ready'
 
 export CONFIG_PAGE_OFFSET CONFIG_MEMORY_START CONFIG_BOOT_LINK_OFFSET \
-       CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET KERNEL_MEMORY suffix-y
+       CONFIG_PHYSICAL_START CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET \
+       KERNEL_MEMORY suffix-y
index af7bb589c2c8bcad3a1e902e456c3465095c1b6f..be9ca7ca0ce40a028793fa597903470e17e60fd3 100644 (file)
@@ -170,7 +170,7 @@ static int __init g2_dma_init(void)
 {
        int ret;
 
-       ret = request_irq(HW_EVENT_G2_DMA, g2_dma_interrupt, IRQF_DISABLED,
+       ret = request_irq(HW_EVENT_G2_DMA, g2_dma_interrupt, 0,
                          "g2 DMA handler", &g2_dma_info);
        if (unlikely(ret))
                return -EINVAL;
index 3cee58e7f1e5ff0677a1b003030d3fe19e3ffa09..706a3434af7a6f581b41d968286bc482c628b050 100644 (file)
@@ -70,7 +70,6 @@ static int pvr2_xfer_dma(struct dma_channel *chan)
 static struct irqaction pvr2_dma_irq = {
        .name           = "pvr2 DMA handler",
        .handler        = pvr2_dma_interrupt,
-       .flags          = IRQF_DISABLED,
 };
 
 static struct dma_ops pvr2_dma_ops = {
index 827208781ed5942220401895a1d75f7b7dc0e2b8..a60da6dd4d17780d4c1fb2cf8733ae8be31973c4 100644 (file)
@@ -95,7 +95,7 @@ static int sh_dmac_request_dma(struct dma_channel *chan)
 #if defined(CONFIG_SH_DMA_IRQ_MULTI)
                                IRQF_SHARED,
 #else
-                               IRQF_DISABLED,
+                               0,
 #endif
                                chan->dev_id, chan);
 }
@@ -305,7 +305,7 @@ static int __init sh_dmac_init(void)
 #if defined(CONFIG_SH_DMA_IRQ_MULTI)
                                IRQF_SHARED,
 #else
-                               IRQF_DISABLED,
+                               0,
 #endif
                                dmae_name[n], (void *)dmae_name[n]);
                if (unlikely(i < 0)) {
index 6ab9c4a1543994f3deafbd8dfccfe1e8392ad948..3d66a32ce610dd60770081cee2dd149e9b665b9c 100644 (file)
@@ -174,17 +174,17 @@ static int __init dmabrg_init(void)
        or = __raw_readl(DMAOR);
        __raw_writel(or | DMAOR_BRG | DMAOR_DMEN, DMAOR);
 
-       ret = request_irq(DMABRGI0, dmabrg_irq, IRQF_DISABLED,
+       ret = request_irq(DMABRGI0, dmabrg_irq, 0,
                        "DMABRG USB address error", NULL);
        if (ret)
                goto out0;
 
-       ret = request_irq(DMABRGI1, dmabrg_irq, IRQF_DISABLED,
+       ret = request_irq(DMABRGI1, dmabrg_irq, 0,
                        "DMABRG Transfer End", NULL);
        if (ret)
                goto out1;
 
-       ret = request_irq(DMABRGI2, dmabrg_irq, IRQF_DISABLED,
+       ret = request_irq(DMABRGI2, dmabrg_irq, 0,
                        "DMABRG Transfer Half", NULL);
        if (ret == 0)
                return ret;
index 0bf296c7879593b1d1aa9f9c020b5f54e7682a2e..16c1e721bf5434ebcafaf578422b71bb85ed36cf 100644 (file)
@@ -107,13 +107,13 @@ static int __init sh5pci_init(void)
        u32 uval;
 
         if (request_irq(IRQ_ERR, pcish5_err_irq,
-                        IRQF_DISABLED, "PCI Error",NULL) < 0) {
+                        0, "PCI Error",NULL) < 0) {
                 printk(KERN_ERR "PCISH5: Cannot hook PCI_PERR interrupt\n");
                 return -EINVAL;
         }
 
         if (request_irq(IRQ_SERR, pcish5_serr_irq,
-                        IRQF_DISABLED, "PCI SERR interrupt", NULL) < 0) {
+                        0, "PCI SERR interrupt", NULL) < 0) {
                 printk(KERN_ERR "PCISH5: Cannot hook PCI_SERR interrupt\n");
                 return -EINVAL;
         }
index edb7cca148825f7324c37ecaf215b9f56c540751..fa7b978cc7278212c18bb7059ee937a9d17c9445 100644 (file)
@@ -172,7 +172,7 @@ static int __init sh7780_pci_setup_irqs(struct pci_channel *hose)
                     PCI_STATUS_SIG_TARGET_ABORT | \
                     PCI_STATUS_PARITY, hose->reg_base + PCI_STATUS);
 
-       ret = request_irq(hose->serr_irq, sh7780_pci_serr_irq, IRQF_DISABLED,
+       ret = request_irq(hose->serr_irq, sh7780_pci_serr_irq, 0,
                          "PCI SERR interrupt", hose);
        if (unlikely(ret)) {
                printk(KERN_ERR "PCI: Failed hooking SERR IRQ\n");
index afc24556572bdc2863e6a022c1894a86ad8f92ff..637b79b096576d556e42b5d20cd31d856dd3465f 100644 (file)
@@ -63,7 +63,7 @@ static int switch_drv_probe(struct platform_device *pdev)
        BUG_ON(!psw_info);
 
        ret = request_irq(irq, psw_info->irq_handler,
-                         IRQF_DISABLED | psw_info->irq_flags,
+                         psw_info->irq_flags,
                          psw_info->name ? psw_info->name : DRV_NAME, pdev);
        if (unlikely(ret < 0))
                goto err;
index 822d6084195b7397ff7eeff08fe33ce618630303..0dca9a5c6be6f91b9a800b2f9bcca7fcf1f6e1fe 100644 (file)
@@ -112,6 +112,16 @@ typedef struct page *pgtable_t;
 #define __MEMORY_START         CONFIG_MEMORY_START
 #define __MEMORY_SIZE          CONFIG_MEMORY_SIZE
 
+/*
+ * PHYSICAL_OFFSET is the offset in physical memory where the base
+ * of the kernel is loaded.
+ */
+#ifdef CONFIG_PHYSICAL_START
+#define PHYSICAL_OFFSET (CONFIG_PHYSICAL_START - __MEMORY_START)
+#else
+#define PHYSICAL_OFFSET 0
+#endif
+
 /*
  * PAGE_OFFSET is the virtual address of the start of kernel address
  * space.
index 3b097b09a3ba3c31d0cde0165cf3ce2d2d747274..19222dae8233ab7d8c3b754eca4d48af3aff6914 100644 (file)
@@ -113,7 +113,14 @@ static struct clk_lookup lookups[] = {
 
        /* MSTP32 clocks */
        CLKDEV_CON_ID("sdhi0", &mstp_clks[MSTP004]),
-       CLKDEV_CON_ID("riic", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic0", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic1", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic2", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic3", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic4", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic5", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic6", &mstp_clks[MSTP000]),
+       CLKDEV_CON_ID("riic7", &mstp_clks[MSTP000]),
 
        CLKDEV_ICK_ID("tmu_fck", "sh_tmu.0", &mstp_clks[MSTP113]),
        CLKDEV_ICK_ID("tmu_fck", "sh_tmu.1", &mstp_clks[MSTP114]),
@@ -121,6 +128,7 @@ static struct clk_lookup lookups[] = {
        CLKDEV_ICK_ID("sci_fck", "sh-sci.1", &mstp_clks[MSTP111]),
        CLKDEV_ICK_ID("sci_fck", "sh-sci.0", &mstp_clks[MSTP110]),
 
+       CLKDEV_CON_ID("usb_fck", &mstp_clks[MSTP103]),
        CLKDEV_CON_ID("usb0", &mstp_clks[MSTP102]),
        CLKDEV_CON_ID("mmc0", &mstp_clks[MSTP220]),
 };
index de865cac02ee03153aba1fb3d2eae32a6166c8e5..03f2b55757cfc497ed548d57f830c6ba876308fc 100644 (file)
@@ -79,7 +79,7 @@ static void shx3_prepare_cpus(unsigned int max_cpus)
 
        for (i = 0; i < SMP_MSG_NR; i++)
                request_irq(104 + i, ipi_interrupt_handler,
-                           IRQF_DISABLED | IRQF_PERCPU, "IPI", (void *)(long)i);
+                           IRQF_PERCPU, "IPI", (void *)(long)i);
 
        for (i = 0; i < max_cpus; i++)
                set_cpu_present(i, true);
index 58bff45d1156ee4d36857f28845d71e5bc5a4954..1a0e946679a4d799038f4051811a8c0390156357 100644 (file)
@@ -211,13 +211,16 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
        }
 
        /*
-        *  We don't know which RAM region contains kernel data,
-        *  so we try it repeatedly and let the resource manager
-        *  test it.
+        * We don't know which RAM region contains kernel data or
+        * the reserved crashkernel region, so try it repeatedly
+        * and let the resource manager test it.
         */
        request_resource(res, &code_resource);
        request_resource(res, &data_resource);
        request_resource(res, &bss_resource);
+#ifdef CONFIG_KEXEC
+       request_resource(res, &crashk_res);
+#endif
 
        /*
         * Also make sure that there is a PMB mapping that covers this
index 38e862852dd0ce3fbcd10e9291c09fd35e6699d4..ecc2d3d0f54a3bd142500fae47b207ad421ee699 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/cpumask.h>
 #include <linux/init.h>
 #include <linux/percpu.h>
+#include <linux/topology.h>
 #include <linux/node.h>
 #include <linux/nodemask.h>
 
index 731c10ce67b56dd482e95c2ecdb239de023ea831..c98905f71e28ce78a1ab06e7b310653efa2156a9 100644 (file)
@@ -23,7 +23,7 @@ OUTPUT_ARCH(sh)
 ENTRY(_start)
 SECTIONS
 {
-       . = PAGE_OFFSET + MEMORY_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
+       . = PAGE_OFFSET + MEMORY_OFFSET + PHYSICAL_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
 
        _text = .;              /* Text and read-only data */
 
index 58a93fb3d965055739e8484f0a1045bf338e0194..c9dbace35b16014f8cafcc6d2bbf247abd26c8ea 100644 (file)
@@ -287,6 +287,8 @@ static void __init do_init_bootmem(void)
 static void __init early_reserve_mem(void)
 {
        unsigned long start_pfn;
+       u32 zero_base = (u32)__MEMORY_START + (u32)PHYSICAL_OFFSET;
+       u32 start = zero_base + (u32)CONFIG_ZERO_PAGE_OFFSET;
 
        /*
         * Partially used pages are not usable - thus
@@ -300,15 +302,13 @@ static void __init early_reserve_mem(void)
         * this catches the (definitely buggy) case of us accidentally
         * initializing the bootmem allocator with an invalid RAM area.
         */
-       memblock_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
-                   (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) -
-                   (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
+       memblock_reserve(start, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - start);
 
        /*
         * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
         */
        if (CONFIG_ZERO_PAGE_OFFSET != 0)
-               memblock_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
+               memblock_reserve(zero_base, CONFIG_ZERO_PAGE_OFFSET);
 
        /*
         * Handle additional early reservations
index a986b5d0571281463876534dbe2beeeacdf2609d..42c55df3aec300b5baee79e6af5123ab99c7b126 100644 (file)
@@ -56,6 +56,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                        put_page(head);
                        return 0;
                }
+               if (head != page)
+                       get_huge_page_tail(page);
 
                pages[*nr] = page;
                (*nr)++;
index 70dabd1e0652a1a0725962b28a67abcc729010f9..b9d7c4276682f3b20434c596f4ef457e300b90af 100644 (file)
@@ -1,5 +1,4 @@
-
-menu "Character Devices"
+menu "UML Character Devices"
 
 config STDERR_CONSOLE
        bool "stderr console"
@@ -105,92 +104,6 @@ config SSL_CHAN
           this if you expect the UML that you build to be run in environments
           which don't have a set of /dev/pty* devices.
 
-config UNIX98_PTYS
-       bool "Unix98 PTY support"
-       help
-         A pseudo terminal (PTY) is a software device consisting of two
-         halves: a master and a slave. The slave device behaves identical to
-         a physical terminal; the master device is used by a process to
-         read data from and write data to the slave, thereby emulating a
-         terminal. Typical programs for the master side are telnet servers
-         and xterms.
-
-         Linux has traditionally used the BSD-like names /dev/ptyxx for
-         masters and /dev/ttyxx for slaves of pseudo terminals. This scheme
-         has a number of problems. The GNU C library glibc 2.1 and later,
-         however, supports the Unix98 naming standard: in order to acquire a
-         pseudo terminal, a process opens /dev/ptmx; the number of the pseudo
-         terminal is then made available to the process and the pseudo
-         terminal slave can be accessed as /dev/pts/<number>. What was
-         traditionally /dev/ttyp2 will then be /dev/pts/2, for example.
-
-         All modern Linux systems use the Unix98 ptys.  Say Y unless
-         you're on an embedded system and want to conserve memory.
-
-config LEGACY_PTYS
-       bool "Legacy (BSD) PTY support"
-       default y
-       help
-         A pseudo terminal (PTY) is a software device consisting of two
-         halves: a master and a slave. The slave device behaves identical to
-         a physical terminal; the master device is used by a process to
-         read data from and write data to the slave, thereby emulating a
-         terminal. Typical programs for the master side are telnet servers
-         and xterms.
-
-         Linux has traditionally used the BSD-like names /dev/ptyxx
-         for masters and /dev/ttyxx for slaves of pseudo
-         terminals. This scheme has a number of problems, including
-         security.  This option enables these legacy devices; on most
-         systems, it is safe to say N.
-
-config RAW_DRIVER
-        tristate "RAW driver (/dev/raw/rawN)"
-       depends on BLOCK
-        help
-          The raw driver permits block devices to be bound to /dev/raw/rawN.
-          Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
-          See the raw(8) manpage for more details.
-
-          Applications should preferably open the device (eg /dev/hda1)
-          with the O_DIRECT flag.
-
-config MAX_RAW_DEVS
-        int "Maximum number of RAW devices to support (1-8192)"
-        depends on RAW_DRIVER
-        default "256"
-        help
-          The maximum number of RAW devices that are supported.
-          Default is 256. Increase this number in case you need lots of
-          raw devices.
-
-config LEGACY_PTY_COUNT
-       int "Maximum number of legacy PTY in use"
-       depends on LEGACY_PTYS
-       default "256"
-       help
-         The maximum number of legacy PTYs that can be used at any one time.
-         The default is 256, and should be more than enough.  Embedded
-         systems may want to reduce this to save memory.
-
-         When not in use, each legacy PTY occupies 12 bytes on 32-bit
-         architectures and 24 bytes on 64-bit architectures.
-
-config WATCHDOG
-       bool "Watchdog Timer Support"
-
-config WATCHDOG_NOWAYOUT
-       bool "Disable watchdog shutdown on close"
-       depends on WATCHDOG
-
-config SOFT_WATCHDOG
-       tristate "Software Watchdog"
-       depends on WATCHDOG
-
-config UML_WATCHDOG
-       tristate "UML watchdog"
-       depends on WATCHDOG
-
 config UML_SOUND
        tristate "Sound support"
        help
@@ -211,29 +124,4 @@ config HOSTAUDIO
        tristate
        default UML_SOUND
 
-#It is selected elsewhere, so kconfig would warn without this.
-config HW_RANDOM
-       tristate
-       default n
-
-config UML_RANDOM
-       tristate "Hardware random number generator"
-       help
-         This option enables UML's "hardware" random number generator.  It
-         attaches itself to the host's /dev/random, supplying as much entropy
-         as the host has, rather than the small amount the UML gets from its
-         own drivers.  It registers itself as a standard hardware random number
-         generator, major 10, minor 183, and the canonical device name is
-         /dev/hwrng.
-         The way to make use of this is to install the rng-tools package
-         (check your distro, or download from
-         http://sourceforge.net/projects/gkernel/).  rngd periodically reads
-         /dev/hwrng and injects the entropy into /dev/random.
-
-config MMAPPER
-       tristate "iomem emulation driver"
-       help
-         This driver allows a host file to be used as emulated IO memory inside
-         UML.
-
 endmenu
index 0ccad0ff6d6ea5a155bbfcd5c33b7927263a494d..567eb5fc21dfeb9ac56893c4bc1fa2d02350a266 100644 (file)
@@ -2,20 +2,14 @@ source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
 
-source "drivers/block/Kconfig"
-
 source "arch/um/Kconfig.char"
 
-source "drivers/base/Kconfig"
+source "drivers/Kconfig"
 
 source "net/Kconfig"
 
 source "arch/um/Kconfig.net"
 
-source "drivers/net/Kconfig"
-
-source "drivers/connector/Kconfig"
-
 source "fs/Kconfig"
 
 source "security/Kconfig"
@@ -24,19 +18,4 @@ source "crypto/Kconfig"
 
 source "lib/Kconfig"
 
-source "drivers/scsi/Kconfig"
-
-source "drivers/md/Kconfig"
-
-if BROKEN
-       source "drivers/mtd/Kconfig"
-endif
-
-source "drivers/leds/Kconfig"
-
-#This is just to shut up some Kconfig warnings, so no prompt.
-config INPUT
-       tristate
-       default n
-
 source "arch/um/Kconfig.debug"
index b5e675e370c67203ef301ee0c0766d2babd5a330..70fd690964e4ef0e71b21817dcffc1827ce402dd 100644 (file)
@@ -148,5 +148,11 @@ config KERNEL_STACK_ORDER
          be 1 << order pages.  The default is OK unless you're running Valgrind
          on UML, in which case, set this to 3.
 
+config MMAPPER
+       tristate "iomem emulation driver"
+       help
+         This driver allows a host file to be used as emulated IO memory inside
+         UML.
+
 config NO_DMA
        def_bool y
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
deleted file mode 100644 (file)
index 21bebe6..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration"
-
-source "arch/um/Kconfig.common"
-
-menu "UML-specific options"
-
-menu "Host processor type and features"
-
-config CMPXCHG_LOCAL
-       bool
-       default n
-
-config CMPXCHG_DOUBLE
-       bool
-       default n
-
-source "arch/x86/Kconfig.cpu"
-
-endmenu
-
-config UML_X86
-       def_bool y
-       select GENERIC_FIND_FIRST_BIT
-
-config 64BIT
-       bool
-       default SUBARCH = "x86_64"
-
-config X86_32
-       def_bool !64BIT
-       select HAVE_AOUT
-
-config X86_64
-       def_bool 64BIT
-
-config RWSEM_XCHGADD_ALGORITHM
-       def_bool X86_XADD && 64BIT
-
-config RWSEM_GENERIC_SPINLOCK
-       def_bool !RWSEM_XCHGADD_ALGORITHM
-
-config 3_LEVEL_PGTABLES
-       bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT
-       default 64BIT
-       depends on EXPERIMENTAL
-       help
-       Three-level pagetables will let UML have more than 4G of physical
-       memory.  All the memory that can't be mapped directly will be treated
-       as high memory.
-
-       However, this it experimental on 32-bit architectures, so if unsure say
-       N (on x86-64 it's automatically enabled, instead, as it's safe there).
-
-config ARCH_HAS_SC_SIGNALS
-       def_bool !64BIT
-
-config ARCH_REUSE_HOST_VSYSCALL_AREA
-       def_bool !64BIT
-
-config SMP_BROKEN
-       def_bool 64BIT
-
-config GENERIC_HWEIGHT
-       def_bool y
-
-source "arch/um/Kconfig.um"
-
-endmenu
-
-source "arch/um/Kconfig.rest"
index c0f712cc7c5fb1c0a216df717c70daeac0dab138..7730af6ec13feba4898ca3a8e85e3e416c57c33d 100644 (file)
@@ -20,15 +20,27 @@ core-y                      += $(ARCH_DIR)/kernel/          \
 
 MODE_INCLUDE   += -I$(srctree)/$(ARCH_DIR)/include/shared/skas
 
+HEADER_ARCH    := $(SUBARCH)
+
+# Additional ARCH settings for x86
+ifeq ($(SUBARCH),i386)
+        HEADER_ARCH := x86
+endif
+ifeq ($(SUBARCH),x86_64)
+        HEADER_ARCH := x86
+endif
+
+HOST_DIR := arch/$(HEADER_ARCH)
+
 include $(srctree)/$(ARCH_DIR)/Makefile-skas
+include $(srctree)/$(HOST_DIR)/Makefile.um
+
+core-y += $(HOST_DIR)/um/
 
 SHARED_HEADERS := $(ARCH_DIR)/include/shared
 ARCH_INCLUDE   := -I$(srctree)/$(SHARED_HEADERS)
-ARCH_INCLUDE   += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared
-ifneq ($(KBUILD_SRC),)
-ARCH_INCLUDE   += -I$(SHARED_HEADERS)
-endif
-KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)
+ARCH_INCLUDE   += -I$(srctree)/$(HOST_DIR)/um/shared
+KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um
 
 # -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so
 # named - it's a common symbol in libpcap, so we get a binary which crashes.
@@ -47,14 +59,12 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE)
 
 USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
        $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
-       $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64
-
-include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
+       $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
 
 #This will adjust *FLAGS accordingly to the platform.
 include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
-KBUILD_CPPFLAGS += -I$(srctree)/arch/$(HEADER_ARCH)/include
+KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
@@ -84,10 +94,9 @@ define archhelp
   echo '                  find in the kernel root.'
 endef
 
-KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH)
+KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig
 
-archprepare: $(SHARED_HEADERS)/user_constants.h
-archprepare: $(SHARED_HEADERS)/kern_constants.h
+archprepare: include/generated/user_constants.h
 
 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
 LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
@@ -118,9 +127,7 @@ endef
 
 # When cleaning we don't include .config, so we don't include
 # TT or skas makefiles and don't clean skas_ptregs.h.
-CLEAN_FILES += linux x.i gmon.out \
-       $(SHARED_HEADERS)/user_constants.h \
-       $(SHARED_HEADERS)/kern_constants.h
+CLEAN_FILES += linux x.i gmon.out
 
 archclean:
        @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
@@ -128,8 +135,8 @@ archclean:
 
 # Generated files
 
-$(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE
-       $(Q)$(MAKE) $(build)=$(ARCH_DIR)/sys-$(SUBARCH) $@
+$(HOST_DIR)/um/user-offsets.s: FORCE
+       $(Q)$(MAKE) $(build)=$(HOST_DIR)/um $@
 
 define filechk_gen-asm-offsets
         (set -e; \
@@ -144,11 +151,7 @@ define filechk_gen-asm-offsets
          echo ""; )
 endef
 
-$(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
+include/generated/user_constants.h: $(HOST_DIR)/um/user-offsets.s
        $(call filechk,gen-asm-offsets)
 
-$(SHARED_HEADERS)/kern_constants.h:
-       $(Q)mkdir -p $(dir $@)
-       $(Q)echo '#include "../../../../include/generated/asm-offsets.h"' >$@
-
-export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH
+export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH
diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
deleted file mode 100644 (file)
index 302cbe5..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-core-y += arch/um/sys-i386/ arch/x86/crypto/
-
-TOP_ADDR := $(CONFIG_TOP_ADDR)
-
-START := 0x8048000
-
-LDFLAGS                        += -m elf_i386
-ELF_ARCH               := $(SUBARCH)
-ELF_FORMAT             := elf32-$(SUBARCH)
-OBJCOPYFLAGS           := -O binary -R .note -R .comment -S
-HEADER_ARCH            := x86
-CHECKFLAGS     += -D__i386__
-
-ifeq ("$(origin SUBARCH)", "command line")
-ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
-KBUILD_CFLAGS          += $(call cc-option,-m32)
-KBUILD_AFLAGS          += $(call cc-option,-m32)
-LINK-y                 += $(call cc-option,-m32)
-UML_OBJCOPYFLAGS       += -F $(ELF_FORMAT)
-
-export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS
-endif
-endif
-
-# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
-include $(srctree)/arch/x86/Makefile_32.cpu
-
-# prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
-cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
-
-# Prevent sprintf in nfsd from being converted to strcpy and resulting in
-# an unresolved reference.
-cflags-y += -ffreestanding
-
-# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
-# a lot more stack due to the lack of sharing of stacklots.  Also, gcc
-# 4.3.0 needs -funit-at-a-time for extern inline functions.
-KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
-                       echo $(call cc-option,-fno-unit-at-a-time); \
-                       else echo $(call cc-option,-funit-at-a-time); fi ;)
-
-KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
deleted file mode 100644 (file)
index a9cd7e7..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2003 - 2004 Pathscale, Inc
-# Released under the GPL
-
-core-y += arch/um/sys-x86_64/ arch/x86/crypto/
-START := 0x60000000
-
-_extra_flags_ = -fno-builtin -m64
-
-KBUILD_CFLAGS += $(_extra_flags_)
-
-CHECKFLAGS  += -m64 -D__x86_64__
-KBUILD_AFLAGS += -m64
-LDFLAGS += -m elf_x86_64
-KBUILD_CPPFLAGS += -m64
-
-ELF_ARCH := i386:x86-64
-ELF_FORMAT := elf64-x86-64
-HEADER_ARCH := x86
-
-# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
-
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
-LINK-y += -m64
-
-# Do unit-at-a-time unconditionally on x86_64, following the host
-KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h
new file mode 100644 (file)
index 0000000..8df0fd9
--- /dev/null
@@ -0,0 +1,50 @@
+/* 
+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __CHAN_KERN_H__
+#define __CHAN_KERN_H__
+
+#include <linux/tty.h>
+#include <linux/list.h>
+#include <linux/console.h>
+#include "chan_user.h"
+#include "line.h"
+
+struct chan {
+       struct list_head list;
+       struct list_head free_list;
+       struct line *line;
+       char *dev;
+       unsigned int primary:1;
+       unsigned int input:1;
+       unsigned int output:1;
+       unsigned int opened:1;
+       unsigned int enabled:1;
+       int fd;
+       const struct chan_ops *ops;
+       void *data;
+};
+
+extern void chan_interrupt(struct list_head *chans, struct delayed_work *task,
+                          struct tty_struct *tty, int irq);
+extern int parse_chan_pair(char *str, struct line *line, int device,
+                          const struct chan_opts *opts, char **error_out);
+extern int write_chan(struct list_head *chans, const char *buf, int len,
+                            int write_irq);
+extern int console_write_chan(struct list_head *chans, const char *buf, 
+                             int len);
+extern int console_open_chan(struct line *line, struct console *co);
+extern void deactivate_chan(struct list_head *chans, int irq);
+extern void reactivate_chan(struct list_head *chans, int irq);
+extern void chan_enable_winch(struct list_head *chans, struct tty_struct *tty);
+extern int enable_chan(struct line *line);
+extern void close_chan(struct list_head *chans, int delay_free_irq);
+extern int chan_window_size(struct list_head *chans, 
+                            unsigned short *rows_out, 
+                            unsigned short *cols_out);
+extern int chan_config_string(struct list_head *chans, char *str, int size,
+                             char **error_out);
+
+#endif
index d4191fe1ceded2e87b1ed08dacc748810f0a79d8..420e2c8007992cc98d1f5ed68ff4be26dad7fa8c 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
-#include "chan_kern.h"
+#include "chan.h"
 #include "os.h"
 
 #ifdef CONFIG_NOCONFIG_CHAN
@@ -358,11 +358,11 @@ int chan_window_size(struct list_head *chans, unsigned short *rows_out,
        return 0;
 }
 
-static void free_one_chan(struct chan *chan, int delay_free_irq)
+static void free_one_chan(struct chan *chan)
 {
        list_del(&chan->list);
 
-       close_one_chan(chan, delay_free_irq);
+       close_one_chan(chan, 0);
 
        if (chan->ops->free != NULL)
                (*chan->ops->free)(chan->data);
@@ -372,14 +372,14 @@ static void free_one_chan(struct chan *chan, int delay_free_irq)
        kfree(chan);
 }
 
-static void free_chan(struct list_head *chans, int delay_free_irq)
+static void free_chan(struct list_head *chans)
 {
        struct list_head *ele, *next;
        struct chan *chan;
 
        list_for_each_safe(ele, next, chans) {
                chan = list_entry(ele, struct chan, list);
-               free_one_chan(chan, delay_free_irq);
+               free_one_chan(chan);
        }
 }
 
@@ -547,7 +547,7 @@ int parse_chan_pair(char *str, struct line *line, int device,
        char *in, *out;
 
        if (!list_empty(chans)) {
-               free_chan(chans, 0);
+               free_chan(chans);
                INIT_LIST_HEAD(chans);
        }
 
index cfeb3f4a44afc0ef9cb6a5edf5e08d0d81ec32a1..f180813ce2c7cb0caf950fa1d90852ab1f0ada55 100644 (file)
 #include <termios.h>
 #include <sys/ioctl.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 void generic_close(int fd, void *unused)
 {
@@ -283,7 +281,12 @@ void register_winch(int fd, struct tty_struct *tty)
                return;
 
        pid = tcgetpgrp(fd);
-       if (!is_skas_winch(pid, fd, tty) && (pid == -1)) {
+       if (is_skas_winch(pid, fd, tty)) {
+               register_winch_irq(-1, fd, -1, tty, 0);
+               return;
+       }
+
+       if (pid == -1) {
                thread = winch_tramp(fd, tty, &thread_fd, &stack);
                if (thread < 0)
                        return;
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
new file mode 100644 (file)
index 0000000..9b9ced8
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __CHAN_USER_H__
+#define __CHAN_USER_H__
+
+#include "init.h"
+
+struct chan_opts {
+       void (*const announce)(char *dev_name, int dev);
+       char *xterm_title;
+       const int raw;
+};
+
+enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE };
+
+struct chan_ops {
+       char *type;
+       void *(*init)(char *, int, const struct chan_opts *);
+       int (*open)(int, int, int, void *, char **);
+       void (*close)(int, void *);
+       int (*read)(int, char *, void *);
+       int (*write)(int, const char *, int, void *);
+       int (*console_write)(int, const char *, int);
+       int (*window_size)(int, void *, unsigned short *, unsigned short *);
+       void (*free)(void *);
+       int winch;
+};
+
+extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops,
+       tty_ops, xterm_ops;
+
+extern void generic_close(int fd, void *unused);
+extern int generic_read(int fd, char *c_out, void *unused);
+extern int generic_write(int fd, const char *buf, int n, void *unused);
+extern int generic_console_write(int fd, const char *buf, int n);
+extern int generic_window_size(int fd, void *unused, unsigned short *rows_out,
+                              unsigned short *cols_out);
+extern void generic_free(void *data);
+
+struct tty_struct;
+extern void register_winch(int fd,  struct tty_struct *tty);
+extern void register_winch_irq(int fd, int tty_fd, int pid,
+                              struct tty_struct *tty, unsigned long stack);
+
+#define __channel_help(fn, prefix) \
+__uml_help(fn, prefix "[0-9]*=<channel description>\n" \
+"    Attach a console or serial line to a host channel.  See\n" \
+"    http://user-mode-linux.sourceforge.net/old/input.html for a complete\n" \
+"    description of this switch.\n\n" \
+);
+
+#endif
index f5701fd2ef90b2d8d1964c3222ebb155c2594ec1..7f2ed0b8824a53ea085be063f860c8a3844b55a1 100644 (file)
@@ -3,7 +3,6 @@
 
 #include "kern_util.h"
 #include "os.h"
-#include "user.h"
 #include "um_malloc.h"
 
 static inline void *cow_malloc(int size)
index f8e85e0bdace20fddc82ca805e6101bcc9c76a8d..a4fd7bc14af7ddbf3e290ec2084cc80b40d5c60a 100644 (file)
@@ -17,7 +17,6 @@
 #include "net_user.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 enum request_type { REQ_NEW_CONTROL };
 
index f5a981a16240e4d8dd92afca3fb710d9d4d84588..5b81d2574415e8a3984bd228517111c6e1378642 100644 (file)
@@ -9,10 +9,8 @@
 #include <errno.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct fd_chan {
        int fd;
index 84dce3fc590cb90eb9238107dc463cb70efd9fc0..0345d6206d40c6a03c9fa43097a1914ee195aacc 100644 (file)
@@ -7,7 +7,6 @@
 #include <unistd.h>
 #include <errno.h>
 #include "os.h"
-#include "user.h"
 
 struct dog_data {
        int stdin;
index 364c8a15c4c33a6113a2ff36f526a88089a062e5..c1cf2206b84bcebb6326a48787c93f965326763b 100644 (file)
@@ -7,7 +7,7 @@
 #include "linux/kd.h"
 #include "linux/sched.h"
 #include "linux/slab.h"
-#include "chan_kern.h"
+#include "chan.h"
 #include "irq_kern.h"
 #include "irq_user.h"
 #include "kern_util.h"
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
new file mode 100644 (file)
index 0000000..63df3ca
--- /dev/null
@@ -0,0 +1,106 @@
+/* 
+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __LINE_H__
+#define __LINE_H__
+
+#include "linux/list.h"
+#include "linux/workqueue.h"
+#include "linux/tty.h"
+#include "linux/interrupt.h"
+#include "linux/spinlock.h"
+#include "linux/mutex.h"
+#include "chan_user.h"
+#include "mconsole_kern.h"
+
+/* There's only one modifiable field in this - .mc.list */
+struct line_driver {
+       const char *name;
+       const char *device_name;
+       const short major;
+       const short minor_start;
+       const short type;
+       const short subtype;
+       const int read_irq;
+       const char *read_irq_name;
+       const int write_irq;
+       const char *write_irq_name;
+       struct mc_device mc;
+};
+
+struct line {
+       struct tty_struct *tty;
+       spinlock_t count_lock;
+       unsigned long count;
+       int valid;
+
+       char *init_str;
+       int init_pri;
+       struct list_head chan_list;
+
+       /*This lock is actually, mostly, local to*/
+       spinlock_t lock;
+       int throttled;
+       /* Yes, this is a real circular buffer.
+        * XXX: And this should become a struct kfifo!
+        *
+        * buffer points to a buffer allocated on demand, of length
+        * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/
+       char *buffer;
+       char *head;
+       char *tail;
+
+       int sigio;
+       struct delayed_work task;
+       const struct line_driver *driver;
+       int have_irq;
+};
+
+#define LINE_INIT(str, d) \
+       { .count_lock = __SPIN_LOCK_UNLOCKED((str).count_lock), \
+         .init_str =   str,    \
+         .init_pri =   INIT_STATIC, \
+         .valid =      1, \
+         .lock =       __SPIN_LOCK_UNLOCKED((str).lock), \
+         .driver =     d }
+
+extern void line_close(struct tty_struct *tty, struct file * filp);
+extern int line_open(struct line *lines, struct tty_struct *tty);
+extern int line_setup(struct line *lines, unsigned int sizeof_lines,
+                     char *init, char **error_out);
+extern int line_write(struct tty_struct *tty, const unsigned char *buf,
+                     int len);
+extern int line_put_char(struct tty_struct *tty, unsigned char ch);
+extern void line_set_termios(struct tty_struct *tty, struct ktermios * old);
+extern int line_chars_in_buffer(struct tty_struct *tty);
+extern void line_flush_buffer(struct tty_struct *tty);
+extern void line_flush_chars(struct tty_struct *tty);
+extern int line_write_room(struct tty_struct *tty);
+extern int line_ioctl(struct tty_struct *tty, unsigned int cmd,
+                               unsigned long arg);
+extern void line_throttle(struct tty_struct *tty);
+extern void line_unthrottle(struct tty_struct *tty);
+
+extern char *add_xterm_umid(char *base);
+extern int line_setup_irq(int fd, int input, int output, struct line *line,
+                         void *data);
+extern void line_close_chan(struct line *line);
+extern struct tty_driver *register_lines(struct line_driver *line_driver,
+                                        const struct tty_operations *driver,
+                                        struct line *lines, int nlines);
+extern void lines_init(struct line *lines, int nlines, struct chan_opts *opts);
+extern void close_lines(struct line *lines, int nlines);
+
+extern int line_config(struct line *lines, unsigned int sizeof_lines,
+                      char *str, const struct chan_opts *opts,
+                      char **error_out);
+extern int line_id(char **str, int *start_out, int *end_out);
+extern int line_remove(struct line *lines, unsigned int sizeof_lines, int n,
+                      char **error_out);
+extern int line_get_config(char *dev, struct line *lines,
+                          unsigned int sizeof_lines, char *str,
+                          int size, char **error_out);
+
+#endif
diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h
new file mode 100644 (file)
index 0000000..c139ae1
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __MCONSOLE_H__
+#define __MCONSOLE_H__
+
+#ifndef __KERNEL__
+#include <stdint.h>
+#define u32 uint32_t
+#endif
+
+#include "sysdep/ptrace.h"
+
+#define MCONSOLE_MAGIC (0xcafebabe)
+#define MCONSOLE_MAX_DATA (512)
+#define MCONSOLE_VERSION 2
+
+struct mconsole_request {
+       u32 magic;
+       u32 version;
+       u32 len;
+       char data[MCONSOLE_MAX_DATA];
+};
+
+struct mconsole_reply {
+       u32 err;
+       u32 more;
+       u32 len;
+       char data[MCONSOLE_MAX_DATA];
+};
+
+struct mconsole_notify {
+       u32 magic;
+       u32 version;
+       enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG,
+              MCONSOLE_USER_NOTIFY } type;
+       u32 len;
+       char data[MCONSOLE_MAX_DATA];
+};
+
+struct mc_request;
+
+enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC };
+
+struct mconsole_command
+{
+       char *command;
+       void (*handler)(struct mc_request *req);
+       enum mc_context context;
+};
+
+struct mc_request
+{
+       int len;
+       int as_interrupt;
+
+       int originating_fd;
+       unsigned int originlen;
+       unsigned char origin[128];                      /* sockaddr_un */
+
+       struct mconsole_request request;
+       struct mconsole_command *cmd;
+       struct uml_pt_regs regs;
+};
+
+extern char mconsole_socket_name[];
+
+extern int mconsole_unlink_socket(void);
+extern int mconsole_reply_len(struct mc_request *req, const char *reply,
+                             int len, int err, int more);
+extern int mconsole_reply(struct mc_request *req, const char *str, int err,
+                         int more);
+
+extern void mconsole_version(struct mc_request *req);
+extern void mconsole_help(struct mc_request *req);
+extern void mconsole_halt(struct mc_request *req);
+extern void mconsole_reboot(struct mc_request *req);
+extern void mconsole_config(struct mc_request *req);
+extern void mconsole_remove(struct mc_request *req);
+extern void mconsole_sysrq(struct mc_request *req);
+extern void mconsole_cad(struct mc_request *req);
+extern void mconsole_stop(struct mc_request *req);
+extern void mconsole_go(struct mc_request *req);
+extern void mconsole_log(struct mc_request *req);
+extern void mconsole_proc(struct mc_request *req);
+extern void mconsole_stack(struct mc_request *req);
+
+extern int mconsole_get_request(int fd, struct mc_request *req);
+extern int mconsole_notify(char *sock_name, int type, const void *data,
+                          int len);
+extern char *mconsole_notify_socket(void);
+extern void lock_notify(void);
+extern void unlock_notify(void);
+
+#endif
diff --git a/arch/um/drivers/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h
new file mode 100644 (file)
index 0000000..d2fe07e
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __MCONSOLE_KERN_H__
+#define __MCONSOLE_KERN_H__
+
+#include "linux/list.h"
+#include "mconsole.h"
+
+struct mconsole_entry {
+       struct list_head list;
+       struct mc_request request;
+};
+
+/* All these methods are called in process context. */
+struct mc_device {
+       struct list_head list;
+       char *name;
+       int (*config)(char *, char **);
+       int (*get_config)(char *, char *, int, char **);
+       int (*id)(char **, int *, int *);
+       int (*remove)(int, char **);
+};
+
+#define CONFIG_CHUNK(str, size, current, chunk, end) \
+do { \
+       current += strlen(chunk); \
+       if(current >= size) \
+               str = NULL; \
+       if(str != NULL){ \
+               strcpy(str, chunk); \
+               str += strlen(chunk); \
+       } \
+       if(end) \
+               current++; \
+} while(0)
+
+#ifdef CONFIG_MCONSOLE
+
+extern void mconsole_register_dev(struct mc_device *new);
+
+#else
+
+static inline void mconsole_register_dev(struct mc_device *new)
+{
+}
+
+#endif
+
+#endif
index f8cf4c8bedef3eb91f161a6da67e474ae508464d..99209826adb15daffcd8ed299e00b42ae8ea752d 100644 (file)
@@ -10,9 +10,7 @@
 #include <sys/socket.h>
 #include <sys/uio.h>
 #include <sys/un.h>
-#include "kern_constants.h"
 #include "mconsole.h"
-#include "user.h"
 
 static struct mconsole_command commands[] = {
        /*
index 520118888f163755fce86c802f55b1ac2c83c8a5..05090c37fa8439417dc2cfbd456a39c911a9f7e5 100644 (file)
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include "net_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 int tap_open_common(void *dev, char *gate_addr)
 {
index 5f903587d69e4ae322672270c0bba17d2ec41295..702a75b190ee552876e2e0a7e715e536e3cd154c 100644 (file)
@@ -9,9 +9,7 @@
 #include <asm/types.h>
 #include "net_user.h"
 #include "pcap_user.h"
-#include "kern_constants.h"
 #include "um_malloc.h"
-#include "user.h"
 
 #define PCAP_FD(p) (*(int *)(p))
 
index b49bf56a56aa4a7b0b94a2b3709c972a144302f4..7b010b76ddf092cc7cd6322fcf49dd519a4d2ecf 100644 (file)
 #include <unistd.h>
 #include <netinet/in.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "port.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct port_chan {
        int raw;
index 1113911dcb2bfb70696785ac194089705c9c8693..cff2b75d31fd7b3bf53be49c75f6a887f8159129 100644 (file)
 #include <termios.h>
 #include <sys/stat.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct pty_chan {
        void (*announce)(char *dev_name, int dev);
index cbacfc4e63e674341eded605cd508b2b61f06792..932b4d69bec20e9929f12426376aa617665be60a 100644 (file)
 #include <string.h>
 #include <sys/termios.h>
 #include <sys/wait.h>
-#include "kern_constants.h"
 #include "net_user.h"
 #include "os.h"
 #include "slip.h"
 #include "um_malloc.h"
-#include "user.h"
 
 static int slip_user_init(void *data, void *dev)
 {
index a0ada8fec72ab5cde45462e3a2ed0311a0561e50..db4adb639ff82e41c0f3025145953fab810ebb91 100644 (file)
@@ -7,11 +7,9 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/wait.h>
-#include "kern_constants.h"
 #include "net_user.h"
 #include "os.h"
 #include "slirp.h"
-#include "user.h"
 
 static int slirp_user_init(void *data, void *dev)
 {
index f1786e64607f11d785ab333bbe46e6ba3c8d7f0e..9d8c20af6f80dac061641ce61de33975c176b892 100644 (file)
 #include "linux/console.h"
 #include "asm/termbits.h"
 #include "asm/irq.h"
-#include "line.h"
 #include "ssl.h"
-#include "chan_kern.h"
-#include "kern.h"
+#include "chan.h"
 #include "init.h"
 #include "irq_user.h"
 #include "mconsole_kern.h"
index 49266f6108c480c3afb7e551c9b0d788f9c90f00..088776f01908e2eb5a27281cbac37b5927012881 100644 (file)
@@ -20,8 +20,7 @@
 #include "asm/current.h"
 #include "asm/irq.h"
 #include "stdio_console.h"
-#include "line.h"
-#include "chan_kern.h"
+#include "chan.h"
 #include "irq_user.h"
 #include "mconsole_kern.h"
 #include "init.h"
index 495858a090e4f49e50d4a79db016173ea4b3a6df..a97391f9ec54eea1c4cd290d87c9b2b243d9fa68 100644 (file)
@@ -7,10 +7,8 @@
 #include <fcntl.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct tty_chan {
        char *dev;
index 620f5b70957d55c4001a31fc9b9682a2e4feb687..944453a3ec9972f136060054893f7e7aaa3ea568 100644 (file)
@@ -46,7 +46,6 @@
 #include "asm/tlbflush.h"
 #include "mem_user.h"
 #include "kern_util.h"
-#include "kern.h"
 #include "mconsole_kern.h"
 #include "init.h"
 #include "irq_user.h"
@@ -54,7 +53,6 @@
 #include "ubd_user.h"
 #include "os.h"
 #include "mem.h"
-#include "mem_kern.h"
 #include "cow.h"
 
 enum ubd_req { UBD_READ, UBD_WRITE };
@@ -513,8 +511,37 @@ __uml_exitcall(kill_io_thread);
 static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 {
        char *file;
+       int fd;
+       int err;
+
+       __u32 version;
+       __u32 align;
+       char *backing_file;
+       time_t mtime;
+       unsigned long long size;
+       int sector_size;
+       int bitmap_offset;
+
+       if (ubd_dev->file && ubd_dev->cow.file) {
+               file = ubd_dev->cow.file;
+
+               goto out;
+       }
 
-       file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
+       fd = os_open_file(ubd_dev->file, global_openflags, 0);
+       if (fd < 0)
+               return fd;
+
+       err = read_cow_header(file_reader, &fd, &version, &backing_file, \
+               &mtime, &size, &sector_size, &align, &bitmap_offset);
+       os_close_file(fd);
+
+       if(err == -EINVAL)
+               file = ubd_dev->file;
+       else
+               file = backing_file;
+
+out:
        return os_file_size(file, size_out);
 }
 
index b591bb9c41dd83f4b72152c42c8ae70c60218698..007b94d97726e7864a6ccd543038f76d7b488a88 100644 (file)
@@ -16,7 +16,6 @@
 #include <sys/mman.h>
 #include <sys/param.h>
 #include "asm/types.h"
-#include "user.h"
 #include "ubd_user.h"
 #include "os.h"
 #include "cow.h"
diff --git a/arch/um/drivers/ubd_user.h b/arch/um/drivers/ubd_user.h
new file mode 100644 (file)
index 0000000..3845051
--- /dev/null
@@ -0,0 +1,16 @@
+/* 
+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_UBD_USER_H
+#define __UM_UBD_USER_H
+
+extern void ignore_sigwinch_sig(void);
+extern int start_io_thread(unsigned long sp, int *fds_out);
+extern int io_thread(void *arg);
+extern int kernel_fd;
+
+#endif
+
index 59c56fd6f52a1698493ae5d010196602d998212d..010fa2d849ecbf501c652914afe51737705261bc 100644 (file)
 #include <unistd.h>
 #include <errno.h>
 #include <netinet/in.h>
-#include "kern_constants.h"
 #include "umcast.h"
 #include "net_user.h"
 #include "um_malloc.h"
-#include "user.h"
 
 static struct sockaddr_in *new_addr(char *addr, unsigned short port)
 {
index c5c43253e6ce4994778205e1853e2026f19a0cae..b8c286748d3d3be5008a9c30e070948f56941d20 100644 (file)
@@ -6,10 +6,8 @@
 #include <stddef.h>
 #include <errno.h>
 #include <libvdeplug.h>
-#include "kern_constants.h"
 #include "net_user.h"
 #include "um_malloc.h"
-#include "user.h"
 #include "vde.h"
 
 static int vde_user_init(void *data, void *dev)
index 2e1de57286045ab9cf606250059a4d094b486ab2..969110e56487cef57ea99787cb77890a84e81876 100644 (file)
 #include <string.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 #include "xterm.h"
 
 struct xterm_chan {
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
new file mode 100644 (file)
index 0000000..451f451
--- /dev/null
@@ -0,0 +1,3 @@
+generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
+generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
+generic-y += ftrace.h
diff --git a/arch/um/include/asm/apic.h b/arch/um/include/asm/apic.h
deleted file mode 100644 (file)
index 876dee8..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __UM_APIC_H
-#define __UM_APIC_H
-
-#endif
diff --git a/arch/um/include/asm/arch_hweight.h b/arch/um/include/asm/arch_hweight.h
deleted file mode 100644 (file)
index c656cf4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_UM_HWEIGHT_H
-#define _ASM_UM_HWEIGHT_H
-
-#include <asm-generic/bitops/arch_hweight.h>
-
-#endif
diff --git a/arch/um/include/asm/bug.h b/arch/um/include/asm/bug.h
deleted file mode 100644 (file)
index 9e33b86..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_BUG_H
-#define __UM_BUG_H
-
-#include <asm-generic/bug.h>
-
-#endif
diff --git a/arch/um/include/asm/checksum.h b/arch/um/include/asm/checksum.h
deleted file mode 100644 (file)
index 5b50136..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_CHECKSUM_H
-#define __UM_CHECKSUM_H
-
-#include "sysdep/checksum.h"
-
-#endif
diff --git a/arch/um/include/asm/cputime.h b/arch/um/include/asm/cputime.h
deleted file mode 100644 (file)
index c84acba..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_CPUTIME_H
-#define __UM_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __UM_CPUTIME_H */
diff --git a/arch/um/include/asm/desc.h b/arch/um/include/asm/desc.h
deleted file mode 100644 (file)
index 4ec34a5..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __UM_DESC_H
-#define __UM_DESC_H
-
-/* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't
- * compile, and has never been used. */
-#define LDT_empty(info) (\
-       (info)->base_addr       == 0    && \
-       (info)->limit           == 0    && \
-       (info)->contents        == 0    && \
-       (info)->read_exec_only  == 1    && \
-       (info)->seg_32bit       == 0    && \
-       (info)->limit_in_pages  == 0    && \
-       (info)->seg_not_present == 1    && \
-       (info)->useable         == 0    )
-
-#endif
diff --git a/arch/um/include/asm/device.h b/arch/um/include/asm/device.h
deleted file mode 100644 (file)
index d8f9872..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/um/include/asm/emergency-restart.h b/arch/um/include/asm/emergency-restart.h
deleted file mode 100644 (file)
index 108d8c4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/um/include/asm/ftrace.h b/arch/um/include/asm/ftrace.h
deleted file mode 100644 (file)
index 40a8c17..0000000
+++ /dev/null
@@ -1 +0,0 @@
-/* empty */
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
deleted file mode 100644 (file)
index 6a332a9..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
deleted file mode 100644 (file)
index fb3c05a..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/hardirq.h>
diff --git a/arch/um/include/asm/hw_irq.h b/arch/um/include/asm/hw_irq.h
deleted file mode 100644 (file)
index 1cf84cf..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_UM_HW_IRQ_H
-#define _ASM_UM_HW_IRQ_H
-
-#include "asm/irq.h"
-#include "asm/archparam.h"
-
-#endif
diff --git a/arch/um/include/asm/irq_regs.h b/arch/um/include/asm/irq_regs.h
deleted file mode 100644 (file)
index 3dd9c0b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/um/include/asm/irq_vectors.h b/arch/um/include/asm/irq_vectors.h
deleted file mode 100644 (file)
index 272a81e..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_IRQ_VECTORS_H
-#define __UM_IRQ_VECTORS_H
-
-#endif
-
index 659b9abdfdba6f0f968c5f81b6dc993223e1ecdb..c780d8a1677351ab99cd4bdf643e5d363ac4e9e5 100644 (file)
@@ -1,6 +1,42 @@
 #ifndef __UM_IRQFLAGS_H
 #define __UM_IRQFLAGS_H
 
-/* Empty for now */
+extern int get_signals(void);
+extern int set_signals(int enable);
+extern void block_signals(void);
+extern void unblock_signals(void);
+
+static inline unsigned long arch_local_save_flags(void)
+{
+       return get_signals();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+       set_signals(flags);
+}
+
+static inline void arch_local_irq_enable(void)
+{
+       unblock_signals();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+       block_signals();
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+       unsigned long flags;
+       flags = arch_local_save_flags();
+       arch_local_irq_disable();
+       return flags;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+       return arch_local_save_flags() == 0;
+}
 
 #endif
diff --git a/arch/um/include/asm/kdebug.h b/arch/um/include/asm/kdebug.h
deleted file mode 100644 (file)
index 6ece1b0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
index cf259de5153160be38b8d2073d0fe111e05a4f4c..30509b9f37fdc3a348c175b548af9c7f19b47362 100644 (file)
@@ -1,12 +1,24 @@
 /* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#ifndef __MMU_H
-#define __MMU_H
+#ifndef __ARCH_UM_MMU_H
+#define __ARCH_UM_MMU_H
 
-#include "um_mmu.h"
+#include "mm_id.h"
+#include <asm/mm_context.h>
 
-#endif
+typedef struct mm_context {
+       struct mm_id id;
+       struct uml_arch_mm_context arch;
+       struct page **stub_pages;
+} mm_context_t;
+
+extern void __switch_mm(struct mm_id * mm_idp);
 
+/* Avoid tangled inclusion with asm/ldt.h */
+extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
+extern void free_ldt(struct mm_context *mm);
+
+#endif
index 34d813011b7a803ab3ee5ef2ae5221f918a61f90..591b3d8d7614540680042ebcb8e90f190f195b13 100644 (file)
@@ -6,15 +6,12 @@
 #ifndef __UM_MMU_CONTEXT_H
 #define __UM_MMU_CONTEXT_H
 
-#include "linux/sched.h"
-#include "um_mmu.h"
+#include <linux/sched.h>
+#include <asm/mmu.h>
 
 extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
 extern void arch_exit_mmap(struct mm_struct *mm);
 
-#define get_mmu_context(task) do ; while(0)
-#define activate_context(tsk) do ; while(0)
-
 #define deactivate_mm(tsk,mm)  do { } while (0)
 
 extern void force_flush_all(void);
index 4cc9b6cf480a299dc201791a189babd4362177d4..7cfc3cedce8411b392a7c96f5104a24022e58bce 100644 (file)
@@ -19,7 +19,7 @@
 struct page;
 
 #include <linux/types.h>
-#include <sysdep/vm-flags.h>
+#include <asm/vm-flags.h>
 
 /*
  * These are used to make use of C type-checking..
diff --git a/arch/um/include/asm/page_offset.h b/arch/um/include/asm/page_offset.h
deleted file mode 100644 (file)
index 1c168df..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#define PAGE_OFFSET_RAW (uml_physmem)
diff --git a/arch/um/include/asm/pda.h b/arch/um/include/asm/pda.h
deleted file mode 100644 (file)
index ddcd774..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PDA_X86_64_H
-#define __UM_PDA_X86_64_H
-
-/* XXX */
-struct foo {
-       unsigned int __softirq_pending;
-       unsigned int __nmi_count;
-};
-
-extern struct foo me;
-
-#define read_pda(me) (&me)
-
-#endif
-
diff --git a/arch/um/include/asm/percpu.h b/arch/um/include/asm/percpu.h
deleted file mode 100644 (file)
index efe7508..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_PERCPU_H
-#define __UM_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* __UM_PERCPU_H */
index 1a7d2757fe0524aeb7878801d0d1c49079755772..f605d3c4844cb1e2883a31daf36bc96f2b2e536e 100644 (file)
@@ -23,17 +23,10 @@ struct pt_regs {
 #define PT_REGS_IP(r) UPT_IP(&(r)->regs)
 #define PT_REGS_SP(r) UPT_SP(&(r)->regs)
 
-#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg)
-#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val)
-
-#define PT_REGS_SET_SYSCALL_RETURN(r, res) \
-       UPT_SET_SYSCALL_RETURN(&(r)->regs, res)
 #define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs)
 
 #define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs)
 
-#define PT_REGS_SC(r) UPT_SC(&(r)->regs)
-
 #define instruction_pointer(regs) PT_REGS_IP(regs)
 
 struct task_struct;
diff --git a/arch/um/include/asm/required-features.h b/arch/um/include/asm/required-features.h
deleted file mode 100644 (file)
index dfb967b..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __UM_REQUIRED_FEATURES_H
-#define __UM_REQUIRED_FEATURES_H
-
-/*
- * Nothing to see, just need something for the i386 and x86_64 asm
- * headers to include.
- */
-
-#endif
diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h
deleted file mode 100644 (file)
index 6b0231e..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _UM_SECTIONS_H
-#define _UM_SECTIONS_H
-
-/* nothing to see, move along */
-#include <asm-generic/sections.h>
-
-#endif
diff --git a/arch/um/include/asm/segment.h b/arch/um/include/asm/segment.h
deleted file mode 100644 (file)
index 45183fc..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __UM_SEGMENT_H
-#define __UM_SEGMENT_H
-
-extern int host_gdt_entry_tls_min;
-
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define GDT_ENTRY_TLS_MIN host_gdt_entry_tls_min
-#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-
-#endif
diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
deleted file mode 100644 (file)
index 68a90ec..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef __UM_SYSTEM_GENERIC_H
-#define __UM_SYSTEM_GENERIC_H
-
-#include "sysdep/system.h"
-
-extern int get_signals(void);
-extern int set_signals(int enable);
-extern void block_signals(void);
-extern void unblock_signals(void);
-
-static inline unsigned long arch_local_save_flags(void)
-{
-       return get_signals();
-}
-
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-       set_signals(flags);
-}
-
-static inline void arch_local_irq_enable(void)
-{
-       unblock_signals();
-}
-
-static inline void arch_local_irq_disable(void)
-{
-       block_signals();
-}
-
-static inline unsigned long arch_local_irq_save(void)
-{
-       unsigned long flags;
-       flags = arch_local_save_flags();
-       arch_local_irq_disable();
-       return flags;
-}
-
-static inline bool arch_irqs_disabled(void)
-{
-       return arch_local_save_flags() == 0;
-}
-
-extern void *_switch_to(void *prev, void *next, void *last);
-#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
-
-#endif
diff --git a/arch/um/include/asm/topology.h b/arch/um/include/asm/topology.h
deleted file mode 100644 (file)
index 0905e4f..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_UM_TOPOLOGY_H
-#define _ASM_UM_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif
index b9a895d6fa1df7cf0447573e58983f5b095e36d2..3f22fbf7ca1d5a7dcc4bc9e01dac40db9eb783df 100644 (file)
@@ -6,15 +6,15 @@
 #ifndef __UM_UACCESS_H
 #define __UM_UACCESS_H
 
-#include <asm/errno.h>
-#include <asm/processor.h>
-
 /* thread_info has a mm_segment_t in it, so put the definition up here */
 typedef struct {
        unsigned long seg;
 } mm_segment_t;
 
-#include "linux/thread_info.h"
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <asm/elf.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -38,7 +38,86 @@ typedef struct {
 
 #define segment_eq(a, b) ((a).seg == (b).seg)
 
-#include "um_uaccess.h"
+#define __under_task_size(addr, size) \
+       (((unsigned long) (addr) < TASK_SIZE) && \
+        (((unsigned long) (addr) + (size)) < TASK_SIZE))
+
+#define __access_ok_vsyscall(type, addr, size) \
+        ((type == VERIFY_READ) && \
+         ((unsigned long) (addr) >= FIXADDR_USER_START) && \
+         ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
+         ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
+
+#define __addr_range_nowrap(addr, size) \
+       ((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
+
+#define access_ok(type, addr, size) \
+       (__addr_range_nowrap(addr, size) && \
+        (__under_task_size(addr, size) || \
+         __access_ok_vsyscall(type, addr, size) || \
+         segment_eq(get_fs(), KERNEL_DS)))
+
+extern int copy_from_user(void *to, const void __user *from, int n);
+extern int copy_to_user(void __user *to, const void *from, int n);
+
+/*
+ * strncpy_from_user: - Copy a NUL terminated string from userspace.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Copies a NUL-terminated string from userspace to kernel space.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+
+extern int strncpy_from_user(char *dst, const char __user *src, int count);
+
+/*
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern int __clear_user(void __user *mem, int len);
+
+/*
+ * clear_user: - Zero a block of memory in user space.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern int clear_user(void __user *mem, int len);
+
+/*
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ * @n:   The maximum valid length
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than @n.
+ */
+extern int strnlen_user(const void __user *str, int len);
 
 #define __copy_from_user(to, from, n) copy_from_user(to, from, n)
 
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
deleted file mode 100644 (file)
index a19db3e..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_XOR_H
-#define __UM_XOR_H
-
-#include "asm-generic/xor.h"
-
-#endif
index a92b678503cf78b0d760a436ca9b54440f3aac5c..896e16602176026d4cbb8a0030f88b5f9ac3c23e 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef __START_H__
 #define __START_H__
 
-#include "kern_constants.h"
+#include <generated/asm-offsets.h>
 
 /*
  * Stolen from linux/const.h, which can't be directly included since
diff --git a/arch/um/include/shared/chan_kern.h b/arch/um/include/shared/chan_kern.h
deleted file mode 100644 (file)
index 1e65145..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/* 
- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __CHAN_KERN_H__
-#define __CHAN_KERN_H__
-
-#include "linux/tty.h"
-#include "linux/list.h"
-#include "linux/console.h"
-#include "chan_user.h"
-#include "line.h"
-
-struct chan {
-       struct list_head list;
-       struct list_head free_list;
-       struct line *line;
-       char *dev;
-       unsigned int primary:1;
-       unsigned int input:1;
-       unsigned int output:1;
-       unsigned int opened:1;
-       unsigned int enabled:1;
-       int fd;
-       const struct chan_ops *ops;
-       void *data;
-};
-
-extern void chan_interrupt(struct list_head *chans, struct delayed_work *task,
-                          struct tty_struct *tty, int irq);
-extern int parse_chan_pair(char *str, struct line *line, int device,
-                          const struct chan_opts *opts, char **error_out);
-extern int write_chan(struct list_head *chans, const char *buf, int len,
-                            int write_irq);
-extern int console_write_chan(struct list_head *chans, const char *buf, 
-                             int len);
-extern int console_open_chan(struct line *line, struct console *co);
-extern void deactivate_chan(struct list_head *chans, int irq);
-extern void reactivate_chan(struct list_head *chans, int irq);
-extern void chan_enable_winch(struct list_head *chans, struct tty_struct *tty);
-extern int enable_chan(struct line *line);
-extern void close_chan(struct list_head *chans, int delay_free_irq);
-extern int chan_window_size(struct list_head *chans, 
-                            unsigned short *rows_out, 
-                            unsigned short *cols_out);
-extern int chan_config_string(struct list_head *chans, char *str, int size,
-                             char **error_out);
-
-#endif
diff --git a/arch/um/include/shared/chan_user.h b/arch/um/include/shared/chan_user.h
deleted file mode 100644 (file)
index 9b9ced8..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __CHAN_USER_H__
-#define __CHAN_USER_H__
-
-#include "init.h"
-
-struct chan_opts {
-       void (*const announce)(char *dev_name, int dev);
-       char *xterm_title;
-       const int raw;
-};
-
-enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE };
-
-struct chan_ops {
-       char *type;
-       void *(*init)(char *, int, const struct chan_opts *);
-       int (*open)(int, int, int, void *, char **);
-       void (*close)(int, void *);
-       int (*read)(int, char *, void *);
-       int (*write)(int, const char *, int, void *);
-       int (*console_write)(int, const char *, int);
-       int (*window_size)(int, void *, unsigned short *, unsigned short *);
-       void (*free)(void *);
-       int winch;
-};
-
-extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops,
-       tty_ops, xterm_ops;
-
-extern void generic_close(int fd, void *unused);
-extern int generic_read(int fd, char *c_out, void *unused);
-extern int generic_write(int fd, const char *buf, int n, void *unused);
-extern int generic_console_write(int fd, const char *buf, int n);
-extern int generic_window_size(int fd, void *unused, unsigned short *rows_out,
-                              unsigned short *cols_out);
-extern void generic_free(void *data);
-
-struct tty_struct;
-extern void register_winch(int fd,  struct tty_struct *tty);
-extern void register_winch_irq(int fd, int tty_fd, int pid,
-                              struct tty_struct *tty, unsigned long stack);
-
-#define __channel_help(fn, prefix) \
-__uml_help(fn, prefix "[0-9]*=<channel description>\n" \
-"    Attach a console or serial line to a host channel.  See\n" \
-"    http://user-mode-linux.sourceforge.net/old/input.html for a complete\n" \
-"    description of this switch.\n\n" \
-);
-
-#endif
index 72009c7e3210a036cbbef0b688bd08718eacbb59..d7fe563aa7e78429aa6a10816974ce40c4219f93 100644 (file)
@@ -2,7 +2,6 @@
 
 DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
 
-OFFSET(HOST_TASK_REGS, task_struct, thread.regs);
 OFFSET(HOST_TASK_PID, task_struct, pid);
 
 DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/um/include/shared/initrd.h b/arch/um/include/shared/initrd.h
deleted file mode 100644 (file)
index 22673bc..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __INITRD_USER_H__
-#define __INITRD_USER_H__
-
-extern int load_initrd(char *filename, void *buf, int size);
-
-#endif
-
index 4ce3fc650e573b823ee7e17bb0b5c21a242a8845..6cd01240bbf098a6e55536c45dc11f1714ad8392 100644 (file)
  * includes.
  */
 
-extern int errno;
-
-extern int clone(int (*proc)(void *), void *sp, int flags, void *data);
-extern int sleep(int);
 extern int printf(const char *fmt, ...);
-extern char *strerror(int errnum);
-extern char *ptsname(int __fd);
-extern int munmap(void *, int);
 extern void *sbrk(int increment);
-extern void *malloc(int size);
-extern void perror(char *err);
-extern int kill(int pid, int sig);
-extern int getuid(void);
-extern int getgid(void);
 extern int pause(void);
-extern int write(int, const void *, int);
 extern void exit(int);
-extern int close(int);
-extern int read(unsigned int, char *, int);
-extern int pipe(int *);
-extern int sched_yield(void);
-extern int ptrace(int op, int pid, long addr, long data);
 
 #endif
 
index 3c341222d2525e740a5beda96fa09c8819d97323..0f1483852460a279bf32b80e68d6d3fbbcaa9d69 100644 (file)
@@ -21,7 +21,6 @@ extern unsigned long alloc_stack(int order, int atomic);
 extern void free_stack(unsigned long stack, int order);
 
 extern int do_signal(void);
-extern void copy_sc(struct uml_pt_regs *regs, void *from);
 extern void interrupt_end(void);
 extern void relay_signal(int sig, struct uml_pt_regs *regs);
 
diff --git a/arch/um/include/shared/ldt.h b/arch/um/include/shared/ldt.h
deleted file mode 100644 (file)
index a7f999a..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Licensed under the GPL
- *
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- */
-
-#ifndef __ASM_LDT_H
-#define __ASM_LDT_H
-
-#include <linux/mutex.h>
-#include <sysdep/host_ldt.h>
-
-extern void ldt_host_info(void);
-
-#define LDT_PAGES_MAX \
-       ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
-#define LDT_ENTRIES_PER_PAGE \
-       (PAGE_SIZE/LDT_ENTRY_SIZE)
-#define LDT_DIRECT_ENTRIES \
-       ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
-
-struct ldt_entry {
-       __u32 a;
-       __u32 b;
-};
-
-typedef struct uml_ldt {
-       int entry_count;
-       struct mutex lock;
-       union {
-               struct ldt_entry * pages[LDT_PAGES_MAX];
-               struct ldt_entry entries[LDT_DIRECT_ENTRIES];
-       } u;
-} uml_ldt_t;
-
-#endif
diff --git a/arch/um/include/shared/line.h b/arch/um/include/shared/line.h
deleted file mode 100644 (file)
index 63df3ca..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __LINE_H__
-#define __LINE_H__
-
-#include "linux/list.h"
-#include "linux/workqueue.h"
-#include "linux/tty.h"
-#include "linux/interrupt.h"
-#include "linux/spinlock.h"
-#include "linux/mutex.h"
-#include "chan_user.h"
-#include "mconsole_kern.h"
-
-/* There's only one modifiable field in this - .mc.list */
-struct line_driver {
-       const char *name;
-       const char *device_name;
-       const short major;
-       const short minor_start;
-       const short type;
-       const short subtype;
-       const int read_irq;
-       const char *read_irq_name;
-       const int write_irq;
-       const char *write_irq_name;
-       struct mc_device mc;
-};
-
-struct line {
-       struct tty_struct *tty;
-       spinlock_t count_lock;
-       unsigned long count;
-       int valid;
-
-       char *init_str;
-       int init_pri;
-       struct list_head chan_list;
-
-       /*This lock is actually, mostly, local to*/
-       spinlock_t lock;
-       int throttled;
-       /* Yes, this is a real circular buffer.
-        * XXX: And this should become a struct kfifo!
-        *
-        * buffer points to a buffer allocated on demand, of length
-        * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/
-       char *buffer;
-       char *head;
-       char *tail;
-
-       int sigio;
-       struct delayed_work task;
-       const struct line_driver *driver;
-       int have_irq;
-};
-
-#define LINE_INIT(str, d) \
-       { .count_lock = __SPIN_LOCK_UNLOCKED((str).count_lock), \
-         .init_str =   str,    \
-         .init_pri =   INIT_STATIC, \
-         .valid =      1, \
-         .lock =       __SPIN_LOCK_UNLOCKED((str).lock), \
-         .driver =     d }
-
-extern void line_close(struct tty_struct *tty, struct file * filp);
-extern int line_open(struct line *lines, struct tty_struct *tty);
-extern int line_setup(struct line *lines, unsigned int sizeof_lines,
-                     char *init, char **error_out);
-extern int line_write(struct tty_struct *tty, const unsigned char *buf,
-                     int len);
-extern int line_put_char(struct tty_struct *tty, unsigned char ch);
-extern void line_set_termios(struct tty_struct *tty, struct ktermios * old);
-extern int line_chars_in_buffer(struct tty_struct *tty);
-extern void line_flush_buffer(struct tty_struct *tty);
-extern void line_flush_chars(struct tty_struct *tty);
-extern int line_write_room(struct tty_struct *tty);
-extern int line_ioctl(struct tty_struct *tty, unsigned int cmd,
-                               unsigned long arg);
-extern void line_throttle(struct tty_struct *tty);
-extern void line_unthrottle(struct tty_struct *tty);
-
-extern char *add_xterm_umid(char *base);
-extern int line_setup_irq(int fd, int input, int output, struct line *line,
-                         void *data);
-extern void line_close_chan(struct line *line);
-extern struct tty_driver *register_lines(struct line_driver *line_driver,
-                                        const struct tty_operations *driver,
-                                        struct line *lines, int nlines);
-extern void lines_init(struct line *lines, int nlines, struct chan_opts *opts);
-extern void close_lines(struct line *lines, int nlines);
-
-extern int line_config(struct line *lines, unsigned int sizeof_lines,
-                      char *str, const struct chan_opts *opts,
-                      char **error_out);
-extern int line_id(char **str, int *start_out, int *end_out);
-extern int line_remove(struct line *lines, unsigned int sizeof_lines, int n,
-                      char **error_out);
-extern int line_get_config(char *dev, struct line *lines,
-                          unsigned int sizeof_lines, char *str,
-                          int size, char **error_out);
-
-#endif
diff --git a/arch/um/include/shared/mconsole.h b/arch/um/include/shared/mconsole.h
deleted file mode 100644 (file)
index c139ae1..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __MCONSOLE_H__
-#define __MCONSOLE_H__
-
-#ifndef __KERNEL__
-#include <stdint.h>
-#define u32 uint32_t
-#endif
-
-#include "sysdep/ptrace.h"
-
-#define MCONSOLE_MAGIC (0xcafebabe)
-#define MCONSOLE_MAX_DATA (512)
-#define MCONSOLE_VERSION 2
-
-struct mconsole_request {
-       u32 magic;
-       u32 version;
-       u32 len;
-       char data[MCONSOLE_MAX_DATA];
-};
-
-struct mconsole_reply {
-       u32 err;
-       u32 more;
-       u32 len;
-       char data[MCONSOLE_MAX_DATA];
-};
-
-struct mconsole_notify {
-       u32 magic;
-       u32 version;
-       enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG,
-              MCONSOLE_USER_NOTIFY } type;
-       u32 len;
-       char data[MCONSOLE_MAX_DATA];
-};
-
-struct mc_request;
-
-enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC };
-
-struct mconsole_command
-{
-       char *command;
-       void (*handler)(struct mc_request *req);
-       enum mc_context context;
-};
-
-struct mc_request
-{
-       int len;
-       int as_interrupt;
-
-       int originating_fd;
-       unsigned int originlen;
-       unsigned char origin[128];                      /* sockaddr_un */
-
-       struct mconsole_request request;
-       struct mconsole_command *cmd;
-       struct uml_pt_regs regs;
-};
-
-extern char mconsole_socket_name[];
-
-extern int mconsole_unlink_socket(void);
-extern int mconsole_reply_len(struct mc_request *req, const char *reply,
-                             int len, int err, int more);
-extern int mconsole_reply(struct mc_request *req, const char *str, int err,
-                         int more);
-
-extern void mconsole_version(struct mc_request *req);
-extern void mconsole_help(struct mc_request *req);
-extern void mconsole_halt(struct mc_request *req);
-extern void mconsole_reboot(struct mc_request *req);
-extern void mconsole_config(struct mc_request *req);
-extern void mconsole_remove(struct mc_request *req);
-extern void mconsole_sysrq(struct mc_request *req);
-extern void mconsole_cad(struct mc_request *req);
-extern void mconsole_stop(struct mc_request *req);
-extern void mconsole_go(struct mc_request *req);
-extern void mconsole_log(struct mc_request *req);
-extern void mconsole_proc(struct mc_request *req);
-extern void mconsole_stack(struct mc_request *req);
-
-extern int mconsole_get_request(int fd, struct mc_request *req);
-extern int mconsole_notify(char *sock_name, int type, const void *data,
-                          int len);
-extern char *mconsole_notify_socket(void);
-extern void lock_notify(void);
-extern void unlock_notify(void);
-
-#endif
diff --git a/arch/um/include/shared/mconsole_kern.h b/arch/um/include/shared/mconsole_kern.h
deleted file mode 100644 (file)
index d2fe07e..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MCONSOLE_KERN_H__
-#define __MCONSOLE_KERN_H__
-
-#include "linux/list.h"
-#include "mconsole.h"
-
-struct mconsole_entry {
-       struct list_head list;
-       struct mc_request request;
-};
-
-/* All these methods are called in process context. */
-struct mc_device {
-       struct list_head list;
-       char *name;
-       int (*config)(char *, char **);
-       int (*get_config)(char *, char *, int, char **);
-       int (*id)(char **, int *, int *);
-       int (*remove)(int, char **);
-};
-
-#define CONFIG_CHUNK(str, size, current, chunk, end) \
-do { \
-       current += strlen(chunk); \
-       if(current >= size) \
-               str = NULL; \
-       if(str != NULL){ \
-               strcpy(str, chunk); \
-               str += strlen(chunk); \
-       } \
-       if(end) \
-               current++; \
-} while(0)
-
-#ifdef CONFIG_MCONSOLE
-
-extern void mconsole_register_dev(struct mc_device *new);
-
-#else
-
-static inline void mconsole_register_dev(struct mc_device *new)
-{
-}
-
-#endif
-
-#endif
diff --git a/arch/um/include/shared/mem_kern.h b/arch/um/include/shared/mem_kern.h
deleted file mode 100644 (file)
index 69be0fd..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __MEM_KERN_H__
-#define __MEM_KERN_H__
-
-#include "linux/list.h"
-#include "linux/types.h"
-
-struct remapper {
-       struct list_head list;
-       int (*proc)(int, unsigned long, int, __u64);
-};
-
-extern void register_remapper(struct remapper *info);
-
-#endif
-
index 83c7c2ecd61422b7fccd054e79597bad10d4f9a7..89b686c1a3ea8f9703354c2c1cafd8bb6635cd26 100644 (file)
@@ -10,7 +10,6 @@
 #include "irq_user.h"
 #include "longjmp.h"
 #include "mm_id.h"
-#include "sysdep/tls.h"
 
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
@@ -203,12 +202,6 @@ extern int os_drop_memory(void *addr, int length);
 extern int can_drop_memory(void);
 extern void os_flush_stdout(void);
 
-/* uaccess.c */
-extern unsigned long __do_user_copy(void *to, const void *from, int n,
-                                   void **fault_addr, jmp_buf **fault_catcher,
-                                   void (*op)(void *to, const void *from,
-                                              int n), int *faulted_out);
-
 /* execvp.c */
 extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
 /* helper.c */
@@ -218,10 +211,6 @@ extern int run_helper_thread(int (*proc)(void *), void *arg,
 extern int helper_wait(int pid);
 
 
-/* tls.c */
-extern int os_set_thread_area(user_desc_t *info, int pid);
-extern int os_get_thread_area(user_desc_t *info, int pid);
-
 /* umid.c */
 extern int umid_file_name(char *name, char *buf, int len);
 extern int set_umid(char *name);
@@ -231,7 +220,7 @@ extern char *get_umid(void);
 extern void timer_init(void);
 extern void set_sigstack(void *sig_stack, int size);
 extern void remove_sigstack(void);
-extern void set_handler(int sig, void (*handler)(int), int flags, ...);
+extern void set_handler(int sig);
 extern int change_sig(int signal, int on);
 extern void block_signals(void);
 extern void unblock_signals(void);
diff --git a/arch/um/include/shared/process.h b/arch/um/include/shared/process.h
deleted file mode 100644 (file)
index bb873a5..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __PROCESS_H__
-#define __PROCESS_H__
-
-#include <signal.h>
-
-/* Copied from linux/compiler-gcc.h since we can't include it directly */
-#define barrier() __asm__ __volatile__("": : :"memory")
-
-extern void sig_handler(int sig, struct sigcontext *sc);
-extern void alarm_handler(int sig, struct sigcontext *sc);
-
-#endif
index 7fd8539bc19a1ab0be7b9f9cfe29270d577a05d6..56b2f284b108e91f1e70b8573fffe37176a1a4e1 100644 (file)
@@ -6,7 +6,8 @@
 #ifndef __PTRACE_USER_H__
 #define __PTRACE_USER_H__
 
-#include "sysdep/ptrace_user.h"
+#include <sys/ptrace.h>
+#include <sysdep/ptrace_user.h>
 
 extern int ptrace_getregs(long pid, unsigned long *regs_out);
 extern int ptrace_setregs(long pid, unsigned long *regs_in);
diff --git a/arch/um/include/shared/skas_ptregs.h b/arch/um/include/shared/skas_ptregs.h
deleted file mode 100644 (file)
index 73db19e..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SKAS_PT_REGS_
-#define __SKAS_PT_REGS_
-
-#include <user_constants.h>
-
-#endif
diff --git a/arch/um/include/shared/syscall.h b/arch/um/include/shared/syscall.h
deleted file mode 100644 (file)
index dda1df9..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSCALL_USER_H
-#define __SYSCALL_USER_H
-
-extern int record_syscall_start(int syscall);
-extern void record_syscall_end(int index, long result);
-
-#endif
diff --git a/arch/um/include/shared/task.h b/arch/um/include/shared/task.h
deleted file mode 100644 (file)
index 3fe726b..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __TASK_H
-#define __TASK_H
-
-#include <kern_constants.h>
-
-#define TASK_REGS(task) ((struct uml_pt_regs *) &(((char *) (task))[HOST_TASK_REGS]))
-#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
-
-#endif
diff --git a/arch/um/include/shared/tlb.h b/arch/um/include/shared/tlb.h
deleted file mode 100644 (file)
index ecd2265..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TLB_H__
-#define __TLB_H__
-
-#include "um_mmu.h"
-
-extern void force_flush_all(void);
-extern int flush_tlb_kernel_range_common(unsigned long start,
-                                        unsigned long end);
-
-#endif
diff --git a/arch/um/include/shared/ubd_user.h b/arch/um/include/shared/ubd_user.h
deleted file mode 100644 (file)
index 3845051..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_UBD_USER_H
-#define __UM_UBD_USER_H
-
-extern void ignore_sigwinch_sig(void);
-extern int start_io_thread(unsigned long sp, int *fds_out);
-extern int io_thread(void *arg);
-extern int kernel_fd;
-
-#endif
-
index c554d706d1060b9a1e9502f33525a23c64aed60b..6395fef6b69b443fcea86e5ddd64dc3a9820d7e0 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef __UM_MALLOC_H__
 #define __UM_MALLOC_H__
 
-#include "kern_constants.h"
+#include <generated/asm-offsets.h>
 
 extern void *uml_kmalloc(int size, int flags);
 extern void kfree(const void *ptr);
diff --git a/arch/um/include/shared/um_mmu.h b/arch/um/include/shared/um_mmu.h
deleted file mode 100644 (file)
index b1a7e47..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* 
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __ARCH_UM_MMU_H
-#define __ARCH_UM_MMU_H
-
-#include "mm_id.h"
-#include "ldt.h"
-
-typedef struct mm_context {
-       struct mm_id id;
-       struct uml_ldt ldt;
-       struct page **stub_pages;
-} mm_context_t;
-
-extern void __switch_mm(struct mm_id * mm_idp);
-
-/* Avoid tangled inclusion with asm/ldt.h */
-extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
-extern void free_ldt(struct mm_context *mm);
-
-#endif
diff --git a/arch/um/include/shared/um_uaccess.h b/arch/um/include/shared/um_uaccess.h
deleted file mode 100644 (file)
index 45c0499..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/* 
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __ARCH_UM_UACCESS_H
-#define __ARCH_UM_UACCESS_H
-
-#include <asm/elf.h>
-#include <asm/fixmap.h>
-#include "sysdep/archsetjmp.h"
-
-#define __under_task_size(addr, size) \
-       (((unsigned long) (addr) < TASK_SIZE) && \
-        (((unsigned long) (addr) + (size)) < TASK_SIZE))
-
-#define __access_ok_vsyscall(type, addr, size) \
-        ((type == VERIFY_READ) && \
-         ((unsigned long) (addr) >= FIXADDR_USER_START) && \
-         ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
-         ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
-
-#define __addr_range_nowrap(addr, size) \
-       ((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
-
-#define access_ok(type, addr, size) \
-       (__addr_range_nowrap(addr, size) && \
-        (__under_task_size(addr, size) || \
-         __access_ok_vsyscall(type, addr, size) || \
-         segment_eq(get_fs(), KERNEL_DS)))
-
-extern int copy_from_user(void *to, const void __user *from, int n);
-extern int copy_to_user(void __user *to, const void *from, int n);
-
-extern int __do_copy_to_user(void *to, const void *from, int n,
-                            void **fault_addr, jmp_buf **fault_catcher);
-
-/*
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-
-extern int strncpy_from_user(char *dst, const char __user *src, int count);
-
-/*
- * __clear_user: - Zero a block of memory in user space, with less checking.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.  Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-extern int __clear_user(void __user *mem, int len);
-
-/*
- * clear_user: - Zero a block of memory in user space.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-extern int clear_user(void __user *mem, int len);
-
-/*
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- * @n:   The maximum valid length
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- * If the string is too long, returns a value greater than @n.
- */
-extern int strnlen_user(const void __user *str, int len);
-
-#endif
index 293f7c794faa46a38c4767125d5d37201c72c675..4fa82c055aab8d53576b06787aa1033644a16697 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef __USER_H__
 #define __USER_H__
 
-#include "kern_constants.h"
+#include <generated/asm-offsets.h>
 
 /*
  * The usual definition - copied here because the kernel provides its own,
@@ -36,10 +36,11 @@ static inline int printk(const char *fmt, ...)
 }
 #endif
 
-extern void schedule(void);
 extern int in_aton(char *str);
-extern int open_gdb_chan(void);
 extern size_t strlcpy(char *, const char *, size_t);
 extern size_t strlcat(char *, const char *, size_t);
 
+/* Copied from linux/compiler-gcc.h since we can't include it directly */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
 #endif
index c4491c15afb213064995f3aa490d2a749a39f8d3..bc494741b1f35f55b96421b00c581ab554c47d32 100644 (file)
@@ -11,7 +11,7 @@ clean-files :=
 
 obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \
        physmem.o process.o ptrace.o reboot.o sigio.o \
-       signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o uaccess.o \
+       signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
        um_arch.o umid.o skas/
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
index 939a4a67f0fdd6819522c3f86f5451a4063fe617..6cade9366364d03f2fd58e2cee85336ffdea56e5 100644 (file)
@@ -3,14 +3,15 @@
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/fs.h"
-#include "linux/ptrace.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "asm/current.h"
-#include "asm/processor.h"
-#include "asm/uaccess.h"
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
 #include "as-layout.h"
 #include "mem_user.h"
 #include "skas.h"
@@ -41,6 +42,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
        PT_REGS_IP(regs) = eip;
        PT_REGS_SP(regs) = esp;
 }
+EXPORT_SYMBOL(start_thread);
 
 static long execve1(const char *file,
                    const char __user *const __user *argv,
index 72eccd2a41132daae3d3dd746901f8535913e290..e9bcf247bcee1ff323db3fdff6ee49b4465480c6 100644 (file)
@@ -7,18 +7,3 @@
 
 extern void __bb_init_func(void *)  __attribute__((weak));
 EXPORT_SYMBOL(__bb_init_func);
-
-/*
- * This is defined (and referred to in profiling stub code) only by some GCC
- * versions in libgcov.
- *
- * Since SuSE backported the fix, we cannot handle it depending on GCC version.
- * So, unconditionally export it. But also give it a weak declaration, which
- * will be overridden by any other one.
- */
-
-extern void __gcov_init(void *) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_init);
-
-extern void __gcov_merge_add(void *) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_merge_add);
index d386c75c88eb2ee117c3cc1c84ff18100e0705f4..10cc18f729fdde317b4fea334f0d8604ae3c5baf 100644 (file)
@@ -7,12 +7,12 @@
 #include "linux/bootmem.h"
 #include "linux/initrd.h"
 #include "asm/types.h"
-#include "initrd.h"
 #include "init.h"
 #include "os.h"
 
 /* Changed by uml_initrd_setup, which is a setup */
 static char *initrd __initdata = NULL;
+static int load_initrd(char *filename, void *buf, int size);
 
 static int __init read_initrd(void)
 {
@@ -62,7 +62,7 @@ __uml_setup("initrd=", uml_initrd_setup,
 "    name of the file containing the image.\n\n"
 );
 
-int load_initrd(char *filename, void *buf, int size)
+static int load_initrd(char *filename, void *buf, int size)
 {
        int fd, n;
 
index 9e485c770308d21fe7148f5ed5004f8f93b5d70d..71b8c947e5efde749730ee3c8844b860dab15b4c 100644 (file)
@@ -258,6 +258,7 @@ void deactivate_fd(int fd, int irqnum)
 
        ignore_sigio_fd(fd);
 }
+EXPORT_SYMBOL(deactivate_fd);
 
 /*
  * Called just before shutdown in order to provide a clean exec
index 0ae0dfcfbffbbc76fdc49dcdf14dfd0ca246744e..e17bea0b22e179a2f1cd364dcf1dc5ffa076b216 100644 (file)
@@ -3,33 +3,11 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
-#include "linux/syscalls.h"
-#include "asm/tlbflush.h"
-#include "asm/uaccess.h"
-#include "as-layout.h"
-#include "kern_util.h"
-#include "mem_user.h"
+#include <linux/module.h>
 #include "os.h"
 
-EXPORT_SYMBOL(uml_physmem);
 EXPORT_SYMBOL(set_signals);
 EXPORT_SYMBOL(get_signals);
-EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(sys_waitpid);
-EXPORT_SYMBOL(flush_tlb_range);
-
-EXPORT_SYMBOL(high_physmem);
-EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(handle_page_fault);
-EXPORT_SYMBOL(find_iomem);
-
-EXPORT_SYMBOL(strnlen_user);
-EXPORT_SYMBOL(strncpy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(clear_user);
-EXPORT_SYMBOL(uml_strdup);
 
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
@@ -57,24 +35,10 @@ EXPORT_SYMBOL(os_connect_socket);
 EXPORT_SYMBOL(os_accept_connection);
 EXPORT_SYMBOL(os_rcv_fd);
 EXPORT_SYMBOL(run_helper);
-EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(os_major);
 EXPORT_SYMBOL(os_minor);
 EXPORT_SYMBOL(os_makedev);
 
 EXPORT_SYMBOL(add_sigio_fd);
 EXPORT_SYMBOL(ignore_sigio_fd);
-EXPORT_SYMBOL(deactivate_fd);
 EXPORT_SYMBOL(sigio_broken);
-
-#ifdef CONFIG_SMP
-
-/* required for SMP */
-
-extern void __write_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__write_lock_failed);
-
-extern void __read_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__read_lock_failed);
-
-#endif
index 8137ccc9635b5c98ae902fb8f6c5884faf67b8fe..ebb86b2184456396cf723fcef4b3981a4c68703d 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/stddef.h>
+#include <linux/module.h>
 #include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
@@ -20,6 +21,7 @@
 
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
+EXPORT_SYMBOL(empty_zero_page);
 /* allocated in paging_init and unchanged thereafter */
 static unsigned long *empty_bad_page = NULL;
 
index a1a9090254c29fdd07ad9f982a46d54dd29563ee..f116db15d4028217767f2a7f51dc0bba38b151a9 100644 (file)
@@ -3,20 +3,22 @@
  * Licensed under the GPL
  */
 
-#include "linux/bootmem.h"
-#include "linux/mm.h"
-#include "linux/pfn.h"
-#include "asm/page.h"
-#include "as-layout.h"
-#include "init.h"
-#include "kern.h"
-#include "mem_user.h"
-#include "os.h"
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <asm/page.h>
+#include <as-layout.h>
+#include <init.h>
+#include <kern.h>
+#include <mem_user.h>
+#include <os.h>
 
 static int physmem_fd = -1;
 
 /* Changed during early boot */
 unsigned long high_physmem;
+EXPORT_SYMBOL(high_physmem);
 
 extern unsigned long long physmem_size;
 
@@ -184,6 +186,7 @@ unsigned long find_iomem(char *driver, unsigned long *len_out)
 
        return 0;
 }
+EXPORT_SYMBOL(find_iomem);
 
 static int setup_iomem(void)
 {
index 21c1ae7c3d7579fbbd17c66a0029805bebbd6916..c5338351aecd20606738198b2694a983b10a38af 100644 (file)
 #include <linux/threads.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
+#include <asm/mmu_context.h>
 #include <asm/uaccess.h>
 #include "as-layout.h"
 #include "kern_util.h"
 #include "os.h"
 #include "skas.h"
-#include "tlb.h"
 
 /*
  * This is a per-cpu array.  A processor only modifies its entry and it only
@@ -78,6 +78,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
                      &current->thread.regs, 0, NULL, NULL);
        return pid;
 }
+EXPORT_SYMBOL(kernel_thread);
 
 static inline void set_current(struct task_struct *task)
 {
@@ -286,6 +287,7 @@ char *uml_strdup(const char *string)
 {
        return kstrdup(string, GFP_KERNEL);
 }
+EXPORT_SYMBOL(uml_strdup);
 
 int copy_to_user_proc(void __user *to, void *from, int size)
 {
index b5c094c4ade4020f379856fe9117dce8124be57b..e8b889d3bce760aaf7cc1b8a5bd0297bab66a74d 100644 (file)
@@ -11,7 +11,6 @@
 #include <asm/unistd.h>
 #include "frame_kern.h"
 #include "kern_util.h"
-#include <sysdep/sigcontext.h>
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
index 2c8583c1a344df2003ee0f2371ca6ead0a085982..e1fd066a3525c20f4820c0227c42613984edabd4 100644 (file)
@@ -8,7 +8,6 @@
 #include <asm/unistd.h>
 #include <sys/time.h>
 #include "as-layout.h"
-#include "kern_constants.h"
 #include "ptrace_user.h"
 #include "stub-data.h"
 #include "sysdep/stub.h"
index 696634214dc65551557ab88535b40557fed4eb7c..9fefd924fb4989a5ccc79e9288caabc3a5d48309 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/err.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/current.h>
 #include <asm/page.h>
@@ -149,6 +150,7 @@ int copy_from_user(void *to, const void __user *from, int n)
               buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
               n;
 }
+EXPORT_SYMBOL(copy_from_user);
 
 static int copy_chunk_to_user(unsigned long to, int len, void *arg)
 {
@@ -170,6 +172,7 @@ int copy_to_user(void __user *to, const void *from, int n)
               buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
               n;
 }
+EXPORT_SYMBOL(copy_to_user);
 
 static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
 {
@@ -204,6 +207,7 @@ int strncpy_from_user(char *dst, const char __user *src, int count)
                return -EFAULT;
        return strnlen(dst, count);
 }
+EXPORT_SYMBOL(strncpy_from_user);
 
 static int clear_chunk(unsigned long addr, int len, void *unused)
 {
@@ -226,6 +230,7 @@ int clear_user(void __user *mem, int len)
        return access_ok(VERIFY_WRITE, mem, len) ?
               buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len;
 }
+EXPORT_SYMBOL(clear_user);
 
 static int strnlen_chunk(unsigned long str, int len, void *arg)
 {
@@ -251,3 +256,4 @@ int strnlen_user(const void __user *str, int len)
                return count + 1;
        return -EFAULT;
 }
+EXPORT_SYMBOL(strnlen_user);
index d175d0566af02d35acde18d805040c61a4bd29f2..7f3d4d86431a0801ffbc37fc2273b2f373ed5444 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -11,7 +12,6 @@
 #include "mem_user.h"
 #include "os.h"
 #include "skas.h"
-#include "tlb.h"
 
 struct host_vm_change {
        struct host_vm_op {
@@ -287,7 +287,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
        }
 }
 
-int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
+static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 {
        struct mm_struct *mm;
        pgd_t *pgd;
@@ -499,6 +499,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                flush_tlb_kernel_range_common(start, end);
        else fix_range(vma->vm_mm, start, end, 0);
 }
+EXPORT_SYMBOL(flush_tlb_range);
 
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                        unsigned long end)
index 8c7b8823d1f0e1257f3a410620048bbd3556d95b..dafc9471595021748eda5e750a80d3483a662379 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/hardirq.h>
+#include <linux/module.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -14,7 +15,6 @@
 #include "kern_util.h"
 #include "os.h"
 #include "skas.h"
-#include "sysdep/sigcontext.h"
 
 /*
  * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
@@ -112,6 +112,7 @@ out_of_memory:
        pagefault_out_of_memory();
        return 0;
 }
+EXPORT_SYMBOL(handle_page_fault);
 
 static void show_segv_info(struct uml_pt_regs *regs)
 {
diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c
deleted file mode 100644 (file)
index dd33f04..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-/*
- * These are here rather than tt/uaccess.c because skas mode needs them in
- * order to do SIGBUS recovery when a tmpfs mount runs out of room.
- */
-
-#include <linux/string.h>
-#include "os.h"
-
-static void __do_copy(void *to, const void *from, int n)
-{
-       memcpy(to, from, n);
-}
-
-
-int __do_copy_to_user(void *to, const void *from, int n,
-                     void **fault_addr, jmp_buf **fault_catcher)
-{
-       unsigned long fault;
-       int faulted;
-
-       fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
-                              __do_copy, &faulted);
-       if (!faulted)
-               return 0;
-       else
-               return n - (fault - (unsigned long) to);
-}
index 8d84250324b3d2c41d2a6718e7fd0531ecd025b4..ba00eae45aad1aaf7254875a50ab455544a295cf 100644 (file)
@@ -102,6 +102,8 @@ const struct seq_operations cpuinfo_op = {
 
 /* Set in linux_main */
 unsigned long uml_physmem;
+EXPORT_SYMBOL(uml_physmem);
+
 unsigned long uml_reserved; /* Also modified in mem_init */
 unsigned long start_vm;
 unsigned long end_vm;
index b33f4dfe7ae5066fd263226e0ac3bc0d4df27b8f..dd764101e48823b23113cf5c32f09368382dca4c 100644 (file)
@@ -4,14 +4,14 @@
 #
 
 obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
-       registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \
-       umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/
+       registers.o sigio.o signal.o start_up.o time.o tty.o \
+       umid.o user_syms.o util.o drivers/ skas/
 
 obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
 
 USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \
        main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
-       tty.o tls.o uaccess.o umid.o util.o
+       tty.o umid.o util.o
 
 CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
 
index 57e3d46c989cf0ba4ec4546d6e613e969537be3e..c5d039e1ff3b779736d987c213dc3e062160d66f 100644 (file)
 #include <asm/unistd.h>
 #include "aio.h"
 #include "init.h"
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
-#include "user.h"
 
 struct aio_thread_req {
        enum aio_type type;
index cc72cb2c1af61302ff98cc88516d0889dad40829..db3d6481375a7c55cf5885771093f9a784d1db43 100644 (file)
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include "etap.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "net_user.h"
 #include "um_malloc.h"
-#include "user.h"
 
 #define MAX_PACKET ETH_MAX_PACKET
 
index 2448be03fd7a5018691d3656d963d934b604c810..a2aacffdd907d1830aab84ba1148dd72450caa98 100644 (file)
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/uio.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "tuntap.h"
-#include "user.h"
 
 static int tuntap_user_init(void *data, void *dev)
 {
index 953323799381574477272bf4b14d230672ffe1e3..d895271ad6f711485c94b6c8c17ebeb3d4314366 100644 (file)
@@ -12,7 +12,6 @@
 #include "init.h"
 #include "elf_user.h"
 #include "mem_user.h"
-#include <kern_constants.h>
 
 typedef Elf32_auxv_t elf_auxv_t;
 
index 140e587bc0adedf23debeb7183ad24c0c49d9267..b049a63bb74b060899888fdc731ca5cd5a137eee 100644 (file)
@@ -13,9 +13,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/un.h>
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
 {
index feff22d64672d45c7652dcefdabf2278ba9e98e5..cf26c4a9a43a7c38549a2bcd76564eb554c32409 100644 (file)
 #include <linux/limits.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "user.h"
 
 struct helper_data {
        void (*pre_exec)(void*);
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
new file mode 100644 (file)
index 0000000..2c3c3ec
--- /dev/null
@@ -0,0 +1 @@
+void alarm_handler(int, mcontext_t *);
index 0348b975e81cb3a5a4d067a8f3b39c5d2db1fff5..9a49908b576c20c80c124854f04167081873cb7c 100644 (file)
@@ -9,11 +9,8 @@
 #include <signal.h>
 #include <string.h>
 #include "irq_user.h"
-#include "kern_constants.h"
 #include "os.h"
-#include "process.h"
 #include "um_malloc.h"
-#include "user.h"
 
 /*
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
index 8471b817d94f1b40bd7556dca9933123c22eef87..7a86dd516eb185476232df8581bee12b38bed196 100644 (file)
@@ -12,7 +12,6 @@
 #include <sys/resource.h>
 #include "as-layout.h"
 #include "init.h"
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
 #include "um_malloc.h"
index 62878cf1d33fb8efce33ff9572d080544207cf18..8e421e1d6d365450947771828160753d83c4c60b 100644 (file)
@@ -14,9 +14,7 @@
 #include <sys/mman.h>
 #include <sys/param.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 /* Modified by which_tmpdir, which is called during early boot */
 static char *default_tmpdir = "/tmp";
index 0c45dc8efb055243d9259da46f702760b773ce11..307f173e7f82fec41a5bc74b28d796783bafe08e 100644 (file)
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "longjmp.h"
 #include "os.h"
-#include "process.h"
 #include "skas_ptrace.h"
-#include "user.h"
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
@@ -237,21 +234,13 @@ out:
 
 void init_new_thread_signals(void)
 {
-       set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK,
-                   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-       set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK,
-                   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-       set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK,
-                   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-       set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK,
-                   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-       set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK,
-                   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
+       set_handler(SIGSEGV);
+       set_handler(SIGTRAP);
+       set_handler(SIGFPE);
+       set_handler(SIGILL);
+       set_handler(SIGBUS);
        signal(SIGHUP, SIG_IGN);
-
-       set_handler(SIGIO, (__sighandler_t) sig_handler,
-                   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM,
-                   SIGVTALRM, -1);
+       set_handler(SIGIO);
        signal(SIGWINCH, SIG_IGN);
        signal(SIGTERM, SIG_DFL);
 }
index 63d299df152bb266f614d576b47e9039df531cf4..3c161218c671519b1443891438f09a54ca31976a 100644 (file)
 #include <sched.h>
 #include <signal.h>
 #include <string.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "init.h"
 #include "os.h"
-#include "process.h"
 #include "sigio.h"
 #include "um_malloc.h"
-#include "user.h"
 
 /*
  * Protected by sigio_lock(), also used by sigio_cleanup, which is an
index 6ae180703a6361bf9a8f0a7efd01b17e72e6da0f..2d22f1fcd8e244269ed0e55c7c25e0f09a3b7e8a 100644 (file)
 #include "as-layout.h"
 #include "kern_util.h"
 #include "os.h"
-#include "process.h"
-#include "sysdep/barrier.h"
-#include "sysdep/sigcontext.h"
-#include "user.h"
-
-/* Copied from linux/compiler-gcc.h since we can't include it directly */
-#define barrier() __asm__ __volatile__("": : :"memory")
+#include "sysdep/mcontext.h"
 
 void (*sig_info[NSIG])(int, struct uml_pt_regs *) = {
        [SIGTRAP]       = relay_signal,
@@ -30,7 +24,7 @@ void (*sig_info[NSIG])(int, struct uml_pt_regs *) = {
        [SIGIO]         = sigio_handler,
        [SIGVTALRM]     = timer_handler };
 
-static void sig_handler_common(int sig, struct sigcontext *sc)
+static void sig_handler_common(int sig, mcontext_t *mc)
 {
        struct uml_pt_regs r;
        int save_errno = errno;
@@ -38,8 +32,8 @@ static void sig_handler_common(int sig, struct sigcontext *sc)
        r.is_user = 0;
        if (sig == SIGSEGV) {
                /* For segfaults, we want the data from the sigcontext. */
-               copy_sc(&r, sc);
-               GET_FAULTINFO_FROM_SC(r.faultinfo, sc);
+               get_regs_from_mc(&r, mc);
+               GET_FAULTINFO_FROM_MC(r.faultinfo, mc);
        }
 
        /* enable signals if sig isn't IRQ signal */
@@ -66,7 +60,7 @@ static void sig_handler_common(int sig, struct sigcontext *sc)
 static int signals_enabled;
 static unsigned int signals_pending;
 
-void sig_handler(int sig, struct sigcontext *sc)
+void sig_handler(int sig, mcontext_t *mc)
 {
        int enabled;
 
@@ -78,23 +72,23 @@ void sig_handler(int sig, struct sigcontext *sc)
 
        block_signals();
 
-       sig_handler_common(sig, sc);
+       sig_handler_common(sig, mc);
 
        set_signals(enabled);
 }
 
-static void real_alarm_handler(struct sigcontext *sc)
+static void real_alarm_handler(mcontext_t *mc)
 {
        struct uml_pt_regs regs;
 
-       if (sc != NULL)
-               copy_sc(&regs, sc);
+       if (mc != NULL)
+               get_regs_from_mc(&regs, mc);
        regs.is_user = 0;
        unblock_signals();
        timer_handler(SIGVTALRM, &regs);
 }
 
-void alarm_handler(int sig, struct sigcontext *sc)
+void alarm_handler(int sig, mcontext_t *mc)
 {
        int enabled;
 
@@ -106,14 +100,13 @@ void alarm_handler(int sig, struct sigcontext *sc)
 
        block_signals();
 
-       real_alarm_handler(sc);
+       real_alarm_handler(mc);
        set_signals(enabled);
 }
 
 void timer_init(void)
 {
-       set_handler(SIGVTALRM, (__sighandler_t) alarm_handler,
-                   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1);
+       set_handler(SIGVTALRM);
 }
 
 void set_sigstack(void *sig_stack, int size)
@@ -126,10 +119,23 @@ void set_sigstack(void *sig_stack, int size)
                panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-static void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
+static void (*handlers[_NSIG])(int sig, mcontext_t *mc) = {
+       [SIGSEGV] = sig_handler,
+       [SIGBUS] = sig_handler,
+       [SIGILL] = sig_handler,
+       [SIGFPE] = sig_handler,
+       [SIGTRAP] = sig_handler,
+
+       [SIGIO] = sig_handler,
+       [SIGWINCH] = sig_handler,
+       [SIGVTALRM] = alarm_handler
+};
+
 
-void handle_signal(int sig, struct sigcontext *sc)
+static void hard_handler(int sig, siginfo_t *info, void *p)
 {
+       struct ucontext *uc = p;
+       mcontext_t *mc = &uc->uc_mcontext;
        unsigned long pending = 1UL << sig;
 
        do {
@@ -155,7 +161,7 @@ void handle_signal(int sig, struct sigcontext *sc)
                while ((sig = ffs(pending)) != 0){
                        sig--;
                        pending &= ~(1 << sig);
-                       (*handlers[sig])(sig, sc);
+                       (*handlers[sig])(sig, mc);
                }
 
                /*
@@ -169,28 +175,26 @@ void handle_signal(int sig, struct sigcontext *sc)
        } while (pending);
 }
 
-extern void hard_handler(int sig);
-
-void set_handler(int sig, void (*handler)(int), int flags, ...)
+void set_handler(int sig)
 {
        struct sigaction action;
-       va_list ap;
+       int flags = SA_SIGINFO | SA_ONSTACK;
        sigset_t sig_mask;
-       int mask;
 
-       handlers[sig] = (void (*)(int, struct sigcontext *)) handler;
-       action.sa_handler = hard_handler;
+       action.sa_sigaction = hard_handler;
 
+       /* block irq ones */
        sigemptyset(&action.sa_mask);
-
-       va_start(ap, flags);
-       while ((mask = va_arg(ap, int)) != -1)
-               sigaddset(&action.sa_mask, mask);
-       va_end(ap);
+       sigaddset(&action.sa_mask, SIGVTALRM);
+       sigaddset(&action.sa_mask, SIGIO);
+       sigaddset(&action.sa_mask, SIGWINCH);
 
        if (sig == SIGSEGV)
                flags |= SA_NODEFER;
 
+       if (sigismember(&action.sa_mask, sig))
+               flags |= SA_RESTART; /* if it's an irq signal */
+
        action.sa_flags = flags;
        action.sa_restorer = NULL;
        if (sigaction(sig, &action, NULL) < 0)
index e771398be5f3fe97cf54fee51e5408d63ccaeea8..c0afff7af4bd0f8151c2f2210ae8a89d9768d4bd 100644 (file)
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <sys/mman.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "as-layout.h"
 #include "mm_id.h"
 #include "os.h"
@@ -17,7 +16,6 @@
 #include "ptrace_user.h"
 #include "registers.h"
 #include "skas.h"
-#include "user.h"
 #include "sysdep/ptrace.h"
 #include "sysdep/stub.h"
 
index dee0e8cf8ad0c32e2a0f6315751fd96f3a0caf5a..cd65727854eb607b54291e23f2708ccf02717b87 100644 (file)
@@ -9,31 +9,23 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/mman.h>
-#include <sys/ptrace.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include "as-layout.h"
-#include "chan_user.h"
-#include "kern_constants.h"
+#include "init.h"
 #include "kern_util.h"
 #include "mem.h"
 #include "os.h"
-#include "process.h"
 #include "proc_mm.h"
 #include "ptrace_user.h"
 #include "registers.h"
 #include "skas.h"
 #include "skas_ptrace.h"
-#include "user.h"
 #include "sysdep/stub.h"
 
 int is_skas_winch(int pid, int fd, void *data)
 {
-       if (pid != getpgrp())
-               return 0;
-
-       register_winch_irq(-1, fd, -1, data, 0);
-       return 1;
+       return pid == getpgrp();
 }
 
 static int ptrace_dump_regs(int pid)
@@ -169,7 +161,7 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
 
        if (!local_using_sysemu)
        {
-               err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
+               err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
                             __NR_getpid);
                if (err < 0) {
                        printk(UM_KERN_ERR "handle_trap - nullifying syscall "
@@ -257,8 +249,8 @@ static int userspace_tramp(void *stack)
 
                set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
                sigemptyset(&sa.sa_mask);
-               sa.sa_flags = SA_ONSTACK | SA_NODEFER;
-               sa.sa_handler = (void *) v;
+               sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+               sa.sa_sigaction = (void *) v;
                sa.sa_restorer = NULL;
                if (sigaction(SIGSEGV, &sa, NULL) < 0) {
                        printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
@@ -661,8 +653,7 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 {
        int n;
 
-       set_handler(SIGWINCH, (__sighandler_t) sig_handler,
-                   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1);
+       set_handler(SIGWINCH);
 
        /*
         * Can't use UML_SETJMP or UML_LONGJMP here because they save
index 02ee9adff54ad2a8913e94aee9bf927723194e58..425162e22af5d0d877ac04a0415e0bfe9b9a6907 100644 (file)
 #include <signal.h>
 #include <string.h>
 #include <sys/mman.h>
-#include <sys/ptrace.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "os.h"
 #include "mem_user.h"
 #include "ptrace_user.h"
@@ -225,7 +223,7 @@ static void __init check_sysemu(void)
                goto fail;
        }
 
-       n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
+       n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
        if (n < 0) {
                non_fatal("check_sysemu : failed to modify system call "
                          "return");
@@ -261,7 +259,7 @@ static void __init check_sysemu(void)
                                          "doesn't singlestep");
                                goto fail;
                        }
-                       n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET,
+                       n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET,
                                   os_getpid());
                        if (n < 0)
                                fatal_perror("check_sysemu : failed to modify "
@@ -317,10 +315,10 @@ static void __init check_ptrace(void)
                        fatal("check_ptrace : expected (SIGTRAP|0x80), "
                               "got status = %d", status);
 
-               syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
+               syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET,
                                 0);
                if (syscall == __NR_getpid) {
-                       n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
+                       n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
                                   __NR_getppid);
                        if (n < 0)
                                fatal_perror("check_ptrace : failed to modify "
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
deleted file mode 100644 (file)
index b4bc6ac..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
-#
-
-obj-y = registers.o signal.o task_size.o tls.o
-
-USER_OBJS := $(obj-y)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
deleted file mode 100644 (file)
index 229f7a5..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2004 PathScale, Inc
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include <sys/ptrace.h>
-#include <sys/user.h>
-#include "kern_constants.h"
-#include "longjmp.h"
-#include "user.h"
-#include "sysdep/ptrace_user.h"
-
-int save_fp_registers(int pid, unsigned long *fp_regs)
-{
-       if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
-               return -errno;
-       return 0;
-}
-
-int restore_fp_registers(int pid, unsigned long *fp_regs)
-{
-       if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
-               return -errno;
-       return 0;
-}
-
-int save_fpx_registers(int pid, unsigned long *fp_regs)
-{
-       if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
-               return -errno;
-       return 0;
-}
-
-int restore_fpx_registers(int pid, unsigned long *fp_regs)
-{
-       if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0)
-               return -errno;
-       return 0;
-}
-
-unsigned long get_thread_reg(int reg, jmp_buf *buf)
-{
-       switch (reg) {
-       case EIP:
-               return buf[0]->__eip;
-       case UESP:
-               return buf[0]->__esp;
-       case EBP:
-               return buf[0]->__ebp;
-       default:
-               printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
-                      reg);
-               return 0;
-       }
-}
-
-int have_fpx_regs = 1;
-
-int get_fp_registers(int pid, unsigned long *regs)
-{
-       if (have_fpx_regs)
-               return save_fpx_registers(pid, regs);
-       else
-               return save_fp_registers(pid, regs);
-}
-
-int put_fp_registers(int pid, unsigned long *regs)
-{
-       if (have_fpx_regs)
-               return restore_fpx_registers(pid, regs);
-       else
-               return restore_fp_registers(pid, regs);
-}
-
-void arch_init_registers(int pid)
-{
-       struct user_fpxregs_struct fpx_regs;
-       int err;
-
-       err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs);
-       if (!err)
-               return;
-
-       if (errno != EIO)
-               panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d",
-                     errno);
-
-       have_fpx_regs = 0;
-}
diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c
deleted file mode 100644 (file)
index f311609..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-
-extern void handle_signal(int sig, struct sigcontext *sc);
-
-void hard_handler(int sig)
-{
-       handle_signal(sig, (struct sigcontext *) (&sig + 1));
-}
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
deleted file mode 100644 (file)
index be04c1e..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <sys/mman.h>
-#include "longjmp.h"
-#include "kern_constants.h"
-
-static jmp_buf buf;
-
-static void segfault(int sig)
-{
-       longjmp(buf, 1);
-}
-
-static int page_ok(unsigned long page)
-{
-       unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
-       unsigned long n = ~0UL;
-       void *mapped = NULL;
-       int ok = 0;
-
-       /*
-        * First see if the page is readable.  If it is, it may still
-        * be a VDSO, so we go on to see if it's writable.  If not
-        * then try mapping memory there.  If that fails, then we're
-        * still in the kernel area.  As a sanity check, we'll fail if
-        * the mmap succeeds, but gives us an address different from
-        * what we wanted.
-        */
-       if (setjmp(buf) == 0)
-               n = *address;
-       else {
-               mapped = mmap(address, UM_KERN_PAGE_SIZE,
-                             PROT_READ | PROT_WRITE,
-                             MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-               if (mapped == MAP_FAILED)
-                       return 0;
-               if (mapped != address)
-                       goto out;
-       }
-
-       /*
-        * Now, is it writeable?  If so, then we're in user address
-        * space.  If not, then try mprotecting it and try the write
-        * again.
-        */
-       if (setjmp(buf) == 0) {
-               *address = n;
-               ok = 1;
-               goto out;
-       } else if (mprotect(address, UM_KERN_PAGE_SIZE,
-                           PROT_READ | PROT_WRITE) != 0)
-               goto out;
-
-       if (setjmp(buf) == 0) {
-               *address = n;
-               ok = 1;
-       }
-
- out:
-       if (mapped != NULL)
-               munmap(mapped, UM_KERN_PAGE_SIZE);
-       return ok;
-}
-
-unsigned long os_get_top_address(void)
-{
-       struct sigaction sa, old;
-       unsigned long bottom = 0;
-       /*
-        * A 32-bit UML on a 64-bit host gets confused about the VDSO at
-        * 0xffffe000.  It is mapped, is readable, can be reprotected writeable
-        * and written.  However, exec discovers later that it can't be
-        * unmapped.  So, just set the highest address to be checked to just
-        * below it.  This might waste some address space on 4G/4G 32-bit
-        * hosts, but shouldn't hurt otherwise.
-        */
-       unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
-       unsigned long test, original;
-
-       printf("Locating the bottom of the address space ... ");
-       fflush(stdout);
-
-       /*
-        * We're going to be longjmping out of the signal handler, so
-        * SA_DEFER needs to be set.
-        */
-       sa.sa_handler = segfault;
-       sigemptyset(&sa.sa_mask);
-       sa.sa_flags = SA_NODEFER;
-       if (sigaction(SIGSEGV, &sa, &old)) {
-               perror("os_get_top_address");
-               exit(1);
-       }
-
-       /* Manually scan the address space, bottom-up, until we find
-        * the first valid page (or run out of them).
-        */
-       for (bottom = 0; bottom < top; bottom++) {
-               if (page_ok(bottom))
-                       break;
-       }
-
-       /* If we've got this far, we ran out of pages. */
-       if (bottom == top) {
-               fprintf(stderr, "Unable to determine bottom of address "
-                       "space.\n");
-               exit(1);
-       }
-
-       printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT);
-       printf("Locating the top of the address space ... ");
-       fflush(stdout);
-
-       original = bottom;
-
-       /* This could happen with a 4G/4G split */
-       if (page_ok(top))
-               goto out;
-
-       do {
-               test = bottom + (top - bottom) / 2;
-               if (page_ok(test))
-                       bottom = test;
-               else
-                       top = test;
-       } while (top - bottom > 1);
-
-out:
-       /* Restore the old SIGSEGV handling */
-       if (sigaction(SIGSEGV, &old, NULL)) {
-               perror("os_get_top_address");
-               exit(1);
-       }
-       top <<= UM_KERN_PAGE_SHIFT;
-       printf("0x%x\n", top);
-
-       return top;
-}
diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/um/os-Linux/sys-i386/tls.c
deleted file mode 100644 (file)
index 32ed41e..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <errno.h>
-#include <linux/unistd.h>
-
-#include <sys/syscall.h>
-#include <unistd.h>
-
-#include "sysdep/tls.h"
-#include "user.h"
-
-/* Checks whether host supports TLS, and sets *tls_min according to the value
- * valid on the host.
- * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
-void check_host_supports_tls(int *supports_tls, int *tls_min) {
-       /* Values for x86 and x86_64.*/
-       int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(val); i++) {
-               user_desc_t info;
-               info.entry_number = val[i];
-
-               if (syscall(__NR_get_thread_area, &info) == 0) {
-                       *tls_min = val[i];
-                       *supports_tls = 1;
-                       return;
-               } else {
-                       if (errno == EINVAL)
-                               continue;
-                       else if (errno == ENOSYS)
-                               *supports_tls = 0;
-                               return;
-               }
-       }
-
-       *supports_tls = 0;
-}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
deleted file mode 100644 (file)
index a44a47f..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
-#
-
-obj-y = registers.o prctl.o signal.o task_size.o
-
-USER_OBJS := $(obj-y)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/sys-x86_64/prctl.c b/arch/um/os-Linux/sys-x86_64/prctl.c
deleted file mode 100644 (file)
index 9d34edd..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com})
- * Licensed under the GPL
- */
-
-#include <sys/ptrace.h>
-#include <linux/ptrace.h>
-
-int os_arch_prctl(int pid, int code, unsigned long *addr)
-{
-        return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
-}
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
deleted file mode 100644 (file)
index 594d97a..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2006 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include <sys/ptrace.h>
-#define __FRAME_OFFSETS
-#include <asm/ptrace.h>
-#include "kern_constants.h"
-#include "longjmp.h"
-#include "user.h"
-
-int save_fp_registers(int pid, unsigned long *fp_regs)
-{
-       if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
-               return -errno;
-       return 0;
-}
-
-int restore_fp_registers(int pid, unsigned long *fp_regs)
-{
-       if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
-               return -errno;
-       return 0;
-}
-
-unsigned long get_thread_reg(int reg, jmp_buf *buf)
-{
-       switch (reg) {
-       case RIP:
-               return buf[0]->__rip;
-       case RSP:
-               return buf[0]->__rsp;
-       case RBP:
-               return buf[0]->__rbp;
-       default:
-               printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
-                      reg);
-               return 0;
-       }
-}
-
-int get_fp_registers(int pid, unsigned long *regs)
-{
-       return save_fp_registers(pid, regs);
-}
-
-int put_fp_registers(int pid, unsigned long *regs)
-{
-       return restore_fp_registers(pid, regs);
-}
diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c
deleted file mode 100644 (file)
index 82a3888..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-
-extern void handle_signal(int sig, struct sigcontext *sc);
-
-void hard_handler(int sig)
-{
-       struct ucontext *uc;
-       asm("movq %%rdx, %0" : "=r" (uc));
-
-       handle_signal(sig, (struct sigcontext *) &uc->uc_mcontext);
-}
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
deleted file mode 100644 (file)
index 26a0dd1..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-unsigned long os_get_top_address(unsigned long shift)
-{
-       /* The old value of CONFIG_TOP_ADDR */
-       return 0x7fc0000000;
-}
index 6e3359d6a8394c0d2f811a28a24f41e673e8793c..910499d76a678a1830f5ea634c86acfd67276493 100644 (file)
@@ -8,11 +8,9 @@
 #include <signal.h>
 #include <time.h>
 #include <sys/time.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
-#include "process.h"
-#include "user.h"
+#include "internal.h"
 
 int set_interval(void)
 {
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
deleted file mode 100644 (file)
index 7327780..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <errno.h>
-#include <sys/ptrace.h>
-#include "sysdep/tls.h"
-
-/* TLS support - we basically rely on the host's one.*/
-
-#ifndef PTRACE_GET_THREAD_AREA
-#define PTRACE_GET_THREAD_AREA 25
-#endif
-
-#ifndef PTRACE_SET_THREAD_AREA
-#define PTRACE_SET_THREAD_AREA 26
-#endif
-
-int os_set_thread_area(user_desc_t *info, int pid)
-{
-       int ret;
-
-       ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
-                    (unsigned long) info);
-       if (ret < 0)
-               ret = -errno;
-       return ret;
-}
-
-int os_get_thread_area(user_desc_t *info, int pid)
-{
-       int ret;
-
-       ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
-                    (unsigned long) info);
-       if (ret < 0)
-               ret = -errno;
-       return ret;
-}
index b09ff66a77eeaf7578d84df8839e2a2961813cd3..dd12b99dcb595e4b5f4fba75e6454833ece42ece 100644 (file)
@@ -7,10 +7,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
-#include "kern_constants.h"
 #include "kern_util.h"
 #include "os.h"
-#include "user.h"
 
 struct grantpt_info {
        int fd;
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
deleted file mode 100644 (file)
index 087ed74..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <stddef.h>
-#include "longjmp.h"
-
-unsigned long __do_user_copy(void *to, const void *from, int n,
-                            void **fault_addr, jmp_buf **fault_catcher,
-                            void (*op)(void *to, const void *from,
-                                       int n), int *faulted_out)
-{
-       unsigned long *faddrp = (unsigned long *) fault_addr, ret;
-
-       jmp_buf jbuf;
-       *fault_catcher = &jbuf;
-       if (UML_SETJMP(&jbuf) == 0) {
-               (*op)(to, from, n);
-               ret = 0;
-               *faulted_out = 0;
-       }
-       else {
-               ret = *faddrp;
-               *faulted_out = 1;
-       }
-       *fault_addr = NULL;
-       *fault_catcher = NULL;
-       return ret;
-}
-
index a27defb81884e294ddab6dca2260a3b3cc867c58..4832eb519f8dfa9492803de9615e6d04b896faa3 100644 (file)
@@ -13,9 +13,7 @@
 #include <unistd.h>
 #include <sys/stat.h>
 #include "init.h"
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 #define UML_DIR "~/.uml/"
 
index 5803b188767213eb1a60399c594677a661547287..9e3b43bb84c99398407a9305feeb480b3f03462c 100644 (file)
@@ -13,9 +13,7 @@
 #include <wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
-#include "kern_constants.h"
 #include "os.h"
-#include "user.h"
 
 void stack_protections(unsigned long address)
 {
index 61107b68e05be1f6daac8b00cf9e963579c4f22c..2eb2843b06343c909b6138511722ea2d7a74cd52 100644 (file)
@@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m)  $(USER_SINGLE_OBJS))
 USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
 
 $(USER_OBJS:.o=.%): \
-       c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(basetarget).o)
+       c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o)
 $(USER_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
        -Dunix -D__unix__ -D__$(SUBARCH)__ $(CF)
 
@@ -25,8 +25,3 @@ $(UNPROFILE_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
 define unprofile
        $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1)))
 endef
-
-ifdef subarch-obj-y
-obj-y += subarch.o
-subarch-y = $(addprefix ../../$(HEADER_ARCH)/,$(subarch-obj-y))
-endif
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
deleted file mode 100644 (file)
index 3923cfb..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-#
-
-obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
-       ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
-       sys_call_table.o tls.o atomic64_cx8_32.o mem.o
-
-obj-$(CONFIG_BINFMT_ELF) += elfcore.o
-
-subarch-obj-y = lib/string_32.o
-subarch-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += lib/rwsem.o
-subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module.o
-
-USER_OBJS := bugs.o ptrace_user.o fault.o
-
-USER_OBJS += user-offsets.s
-extra-y += user-offsets.s
-
-UNPROFILE_OBJS := stub_segv.o
-CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-i386/asm/archparam.h b/arch/um/sys-i386/asm/archparam.h
deleted file mode 100644 (file)
index 2a18a88..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_ARCHPARAM_I386_H
-#define __UM_ARCHPARAM_I386_H
-
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-#endif
-
diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
deleted file mode 100644 (file)
index 4230555..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-#ifndef __UM_ELF_I386_H
-#define __UM_ELF_I386_H
-
-#include <asm/user.h>
-#include "skas.h"
-
-#define R_386_NONE     0
-#define R_386_32       1
-#define R_386_PC32     2
-#define R_386_GOT32    3
-#define R_386_PLT32    4
-#define R_386_COPY     5
-#define R_386_GLOB_DAT 6
-#define R_386_JMP_SLOT 7
-#define R_386_RELATIVE 8
-#define R_386_GOTOFF   9
-#define R_386_GOTPC    10
-#define R_386_NUM      11
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_i387_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-       (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
-
-#define ELF_CLASS      ELFCLASS32
-#define ELF_DATA        ELFDATA2LSB
-#define ELF_ARCH        EM_386
-
-#define ELF_PLAT_INIT(regs, load_addr) do { \
-       PT_REGS_EBX(regs) = 0; \
-       PT_REGS_ECX(regs) = 0; \
-       PT_REGS_EDX(regs) = 0; \
-       PT_REGS_ESI(regs) = 0; \
-       PT_REGS_EDI(regs) = 0; \
-       PT_REGS_EBP(regs) = 0; \
-       PT_REGS_EAX(regs) = 0; \
-} while (0)
-
-#define ELF_EXEC_PAGESIZE 4096
-
-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
-
-/* Shamelessly stolen from include/asm-i386/elf.h */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs) do {  \
-       pr_reg[0] = PT_REGS_EBX(regs);          \
-       pr_reg[1] = PT_REGS_ECX(regs);          \
-       pr_reg[2] = PT_REGS_EDX(regs);          \
-       pr_reg[3] = PT_REGS_ESI(regs);          \
-       pr_reg[4] = PT_REGS_EDI(regs);          \
-       pr_reg[5] = PT_REGS_EBP(regs);          \
-       pr_reg[6] = PT_REGS_EAX(regs);          \
-       pr_reg[7] = PT_REGS_DS(regs);           \
-       pr_reg[8] = PT_REGS_ES(regs);           \
-       /* fake once used fs and gs selectors? */       \
-       pr_reg[9] = PT_REGS_DS(regs);           \
-       pr_reg[10] = PT_REGS_DS(regs);          \
-       pr_reg[11] = PT_REGS_SYSCALL_NR(regs);  \
-       pr_reg[12] = PT_REGS_IP(regs);          \
-       pr_reg[13] = PT_REGS_CS(regs);          \
-       pr_reg[14] = PT_REGS_EFLAGS(regs);      \
-       pr_reg[15] = PT_REGS_SP(regs);          \
-       pr_reg[16] = PT_REGS_SS(regs);          \
-} while (0);
-
-#define task_pt_regs(t) (&(t)->thread.regs)
-
-struct task_struct;
-
-extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
-
-#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
-
-extern long elf_aux_hwcap;
-#define ELF_HWCAP (elf_aux_hwcap)
-
-extern char * elf_aux_platform;
-#define ELF_PLATFORM (elf_aux_platform)
-
-#define SET_PERSONALITY(ex) do { } while (0)
-
-extern unsigned long vsyscall_ehdr;
-extern unsigned long vsyscall_end;
-extern unsigned long __kernel_vsyscall;
-
-#define VSYSCALL_BASE vsyscall_ehdr
-#define VSYSCALL_END vsyscall_end
-
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START      VSYSCALL_BASE
-#define FIXADDR_USER_END        VSYSCALL_END
-
-#define __HAVE_ARCH_GATE_AREA 1
-
-/*
- * Architecture-neutral AT_ values in 0-17, leave some room
- * for more of them, start the x86-specific ones at 32.
- */
-#define AT_SYSINFO             32
-#define AT_SYSINFO_EHDR                33
-
-#define ARCH_DLINFO                                            \
-do {                                                           \
-       if ( vsyscall_ehdr ) {                                  \
-               NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall);     \
-               NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr);    \
-       }                                                       \
-} while (0)
-
-#endif
diff --git a/arch/um/sys-i386/asm/module.h b/arch/um/sys-i386/asm/module.h
deleted file mode 100644 (file)
index 5ead4a0..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __UM_MODULE_I386_H
-#define __UM_MODULE_I386_H
-
-/* UML is simple */
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#endif
diff --git a/arch/um/sys-i386/asm/processor.h b/arch/um/sys-i386/asm/processor.h
deleted file mode 100644 (file)
index 82a9061..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_PROCESSOR_I386_H
-#define __UM_PROCESSOR_I386_H
-
-#include "linux/string.h"
-#include <sysdep/host_ldt.h>
-#include "asm/segment.h"
-
-extern int host_has_cmov;
-
-/* include faultinfo structure */
-#include "sysdep/faultinfo.h"
-
-struct uml_tls_struct {
-       struct user_desc tls;
-       unsigned flushed:1;
-       unsigned present:1;
-};
-
-struct arch_thread {
-       struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
-       unsigned long debugregs[8];
-       int debugregs_seq;
-       struct faultinfo faultinfo;
-};
-
-#define INIT_ARCH_THREAD { \
-       .tls_array              = { [ 0 ... GDT_ENTRY_TLS_ENTRIES - 1 ] = \
-                                   { .present = 0, .flushed = 0 } }, \
-       .debugregs              = { [ 0 ... 7 ] = 0 }, \
-       .debugregs_seq          = 0, \
-       .faultinfo              = { 0, 0, 0 } \
-}
-
-static inline void arch_flush_thread(struct arch_thread *thread)
-{
-       /* Clear any TLS still hanging */
-       memset(&thread->tls_array, 0, sizeof(thread->tls_array));
-}
-
-static inline void arch_copy_thread(struct arch_thread *from,
-                                    struct arch_thread *to)
-{
-        memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
-}
-
-#include <asm/user.h>
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
-       __asm__ __volatile__("rep;nop": : :"memory");
-}
-
-#define cpu_relax()    rep_nop()
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter"). Stolen
- * from asm-i386/processor.h
- */
-#define current_text_addr() \
-       ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
-
-#define ARCH_IS_STACKGROW(address) \
-       (address + 32 >= UPT_SP(&current->thread.regs.regs))
-
-#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP)
-#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP)
-#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP)
-
-#include "asm/processor-generic.h"
-
-#endif
diff --git a/arch/um/sys-i386/asm/ptrace.h b/arch/um/sys-i386/asm/ptrace.h
deleted file mode 100644 (file)
index 5d2a591..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_I386_H
-#define __UM_PTRACE_I386_H
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
-
-#include "linux/compiler.h"
-#include "asm/ptrace-generic.h"
-
-#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
-#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
-#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs)
-#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs)
-#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs)
-#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs)
-#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs)
-
-#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
-#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
-#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
-#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
-#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
-#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
-
-#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
-
-#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r)
-#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
-#define PT_FIX_EXEC_STACK(sp) do ; while(0)
-
-#define profile_pc(regs) PT_REGS_IP(regs)
-
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-/*
- * Forward declaration to avoid including sysdep/tls.h, which causes a
- * circular include, and compilation failures.
- */
-struct user_desc;
-
-extern int ptrace_get_thread_area(struct task_struct *child, int idx,
-                                  struct user_desc __user *user_desc);
-
-extern int ptrace_set_thread_area(struct task_struct *child, int idx,
-                                  struct user_desc __user *user_desc);
-
-#endif
diff --git a/arch/um/sys-i386/atomic64_cx8_32.S b/arch/um/sys-i386/atomic64_cx8_32.S
deleted file mode 100644 (file)
index 1e901d3..0000000
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * atomic64_t for 586+
- *
- * Copied from arch/x86/lib/atomic64_cx8_32.S
- *
- * Copyright Â© 2010  Luca Barbieri
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/dwarf2.h>
-
-.macro SAVE reg
-       pushl_cfi %\reg
-       CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-       popl_cfi %\reg
-       CFI_RESTORE \reg
-.endm
-
-.macro read64 reg
-       movl %ebx, %eax
-       movl %ecx, %edx
-/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
-       LOCK_PREFIX
-       cmpxchg8b (\reg)
-.endm
-
-ENTRY(atomic64_read_cx8)
-       CFI_STARTPROC
-
-       read64 %ecx
-       ret
-       CFI_ENDPROC
-ENDPROC(atomic64_read_cx8)
-
-ENTRY(atomic64_set_cx8)
-       CFI_STARTPROC
-
-1:
-/* we don't need LOCK_PREFIX since aligned 64-bit writes
- * are atomic on 586 and newer */
-       cmpxchg8b (%esi)
-       jne 1b
-
-       ret
-       CFI_ENDPROC
-ENDPROC(atomic64_set_cx8)
-
-ENTRY(atomic64_xchg_cx8)
-       CFI_STARTPROC
-
-       movl %ebx, %eax
-       movl %ecx, %edx
-1:
-       LOCK_PREFIX
-       cmpxchg8b (%esi)
-       jne 1b
-
-       ret
-       CFI_ENDPROC
-ENDPROC(atomic64_xchg_cx8)
-
-.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
-       CFI_STARTPROC
-       SAVE ebp
-       SAVE ebx
-       SAVE esi
-       SAVE edi
-
-       movl %eax, %esi
-       movl %edx, %edi
-       movl %ecx, %ebp
-
-       read64 %ebp
-1:
-       movl %eax, %ebx
-       movl %edx, %ecx
-       \ins\()l %esi, %ebx
-       \insc\()l %edi, %ecx
-       LOCK_PREFIX
-       cmpxchg8b (%ebp)
-       jne 1b
-
-10:
-       movl %ebx, %eax
-       movl %ecx, %edx
-       RESTORE edi
-       RESTORE esi
-       RESTORE ebx
-       RESTORE ebp
-       ret
-       CFI_ENDPROC
-ENDPROC(atomic64_\func\()_return_cx8)
-.endm
-
-addsub_return add add adc
-addsub_return sub sub sbb
-
-.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
-       CFI_STARTPROC
-       SAVE ebx
-
-       read64 %esi
-1:
-       movl %eax, %ebx
-       movl %edx, %ecx
-       \ins\()l $1, %ebx
-       \insc\()l $0, %ecx
-       LOCK_PREFIX
-       cmpxchg8b (%esi)
-       jne 1b
-
-10:
-       movl %ebx, %eax
-       movl %ecx, %edx
-       RESTORE ebx
-       ret
-       CFI_ENDPROC
-ENDPROC(atomic64_\func\()_return_cx8)
-.endm
-
-incdec_return inc add adc
-incdec_return dec sub sbb
-
-ENTRY(atomic64_dec_if_positive_cx8)
-       CFI_STARTPROC
-       SAVE ebx
-
-       read64 %esi
-1:
-       movl %eax, %ebx
-       movl %edx, %ecx
-       subl $1, %ebx
-       sbb $0, %ecx
-       js 2f
-       LOCK_PREFIX
-       cmpxchg8b (%esi)
-       jne 1b
-
-2:
-       movl %ebx, %eax
-       movl %ecx, %edx
-       RESTORE ebx
-       ret
-       CFI_ENDPROC
-ENDPROC(atomic64_dec_if_positive_cx8)
-
-ENTRY(atomic64_add_unless_cx8)
-       CFI_STARTPROC
-       SAVE ebp
-       SAVE ebx
-/* these just push these two parameters on the stack */
-       SAVE edi
-       SAVE esi
-
-       movl %ecx, %ebp
-       movl %eax, %esi
-       movl %edx, %edi
-
-       read64 %ebp
-1:
-       cmpl %eax, 0(%esp)
-       je 4f
-2:
-       movl %eax, %ebx
-       movl %edx, %ecx
-       addl %esi, %ebx
-       adcl %edi, %ecx
-       LOCK_PREFIX
-       cmpxchg8b (%ebp)
-       jne 1b
-
-       movl $1, %eax
-3:
-       addl $8, %esp
-       CFI_ADJUST_CFA_OFFSET -8
-       RESTORE ebx
-       RESTORE ebp
-       ret
-4:
-       cmpl %edx, 4(%esp)
-       jne 2b
-       xorl %eax, %eax
-       jmp 3b
-       CFI_ENDPROC
-ENDPROC(atomic64_add_unless_cx8)
-
-ENTRY(atomic64_inc_not_zero_cx8)
-       CFI_STARTPROC
-       SAVE ebx
-
-       read64 %esi
-1:
-       testl %eax, %eax
-       je 4f
-2:
-       movl %eax, %ebx
-       movl %edx, %ecx
-       addl $1, %ebx
-       adcl $0, %ecx
-       LOCK_PREFIX
-       cmpxchg8b (%esi)
-       jne 1b
-
-       movl $1, %eax
-3:
-       RESTORE ebx
-       ret
-4:
-       testl %edx, %edx
-       jne 2b
-       jmp 3b
-       CFI_ENDPROC
-ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/um/sys-i386/bug.c b/arch/um/sys-i386/bug.c
deleted file mode 100644 (file)
index 8d4f273..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL V2
- */
-
-#include <linux/uaccess.h>
-#include <asm/errno.h>
-
-/* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
- * that's not relevant in skas mode.
- */
-
-int is_valid_bugaddr(unsigned long eip)
-{
-       unsigned short ud2;
-
-       if (probe_kernel_address((unsigned short __user *)eip, ud2))
-               return 0;
-
-       return ud2 == 0x0b0f;
-}
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
deleted file mode 100644 (file)
index 2c6d0d7..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-#include "kern_constants.h"
-#include "kern_util.h"
-#include "longjmp.h"
-#include "task.h"
-#include "user.h"
-#include "sysdep/ptrace.h"
-
-/* Set during early boot */
-static int host_has_cmov = 1;
-static jmp_buf cmov_test_return;
-
-static void cmov_sigill_test_handler(int sig)
-{
-       host_has_cmov = 0;
-       longjmp(cmov_test_return, 1);
-}
-
-void arch_check_bugs(void)
-{
-       struct sigaction old, new;
-
-       printk(UM_KERN_INFO "Checking for host processor cmov support...");
-       new.sa_handler = cmov_sigill_test_handler;
-
-       /* Make sure that SIGILL is enabled after the handler longjmps back */
-       new.sa_flags = SA_NODEFER;
-       sigemptyset(&new.sa_mask);
-       sigaction(SIGILL, &new, &old);
-
-       if (setjmp(cmov_test_return) == 0) {
-               unsigned long foo = 0;
-               __asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo));
-               printk(UM_KERN_CONT "Yes\n");
-       } else
-               printk(UM_KERN_CONT "No\n");
-
-       sigaction(SIGILL, &old, &new);
-}
-
-void arch_examine_signal(int sig, struct uml_pt_regs *regs)
-{
-       unsigned char tmp[2];
-
-       /*
-        * This is testing for a cmov (0x0f 0x4x) instruction causing a
-        * SIGILL in init.
-        */
-       if ((sig != SIGILL) || (TASK_PID(get_current()) != 1))
-               return;
-
-       if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
-               printk(UM_KERN_ERR "SIGILL in init, could not read "
-                      "instructions!\n");
-               return;
-       }
-
-       if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40))
-               return;
-
-       if (host_has_cmov == 0)
-               printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
-                      "processor doesn't implement.  Boot a filesystem "
-                      "compiled for older processors");
-       else if (host_has_cmov == 1)
-               printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
-                      "processor claims to implement");
-       else
-               printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)",
-                       host_has_cmov);
-}
diff --git a/arch/um/sys-i386/checksum.S b/arch/um/sys-i386/checksum.S
deleted file mode 100644 (file)
index f058d2f..0000000
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * INET                An implementation of the TCP/IP protocol suite for the LINUX
- *             operating system.  INET is implemented using the  BSD Socket
- *             interface as the means of communication with the user level.
- *
- *             IP/TCP/UDP checksumming routines
- *
- * Authors:    Jorge Cwik, <jorge@laser.satlink.net>
- *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *             Tom May, <ftom@netcom.com>
- *              Pentium Pro/II routines:
- *              Alexander Kjeldaas <astor@guardian.no>
- *              Finn Arne Gangstad <finnag@guardian.no>
- *             Lots of code moved from tcp.c and ip.c; see those files
- *             for more names.
- *
- * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- *                          handling.
- *             Andi Kleen,  add zeroing on error
- *                   converted to pure assembler
- *
- *             This program is free software; you can redistribute it and/or
- *             modify it under the terms of the GNU General Public License
- *             as published by the Free Software Foundation; either version
- *             2 of the License, or (at your option) any later version.
- */
-
-#include <asm/errno.h>
-                               
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-/*     
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
- */
-               
-.text
-.align 4
-.globl csum_partial
-               
-#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-
-         /*            
-          * Experiments with Ethernet and SLIP connections show that buff
-          * is aligned on either a 2-byte or 4-byte boundary.  We get at
-          * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
-          * Fortunately, it is easy to convert 2-byte alignment to 4-byte
-          * alignment for the unrolled loop.
-          */           
-csum_partial:
-       pushl %esi
-       pushl %ebx
-       movl 20(%esp),%eax      # Function arg: unsigned int sum
-       movl 16(%esp),%ecx      # Function arg: int len
-       movl 12(%esp),%esi      # Function arg: unsigned char *buff
-       testl $2, %esi          # Check alignment.
-       jz 2f                   # Jump if alignment is ok.
-       subl $2, %ecx           # Alignment uses up two bytes.
-       jae 1f                  # Jump if we had at least two bytes.
-       addl $2, %ecx           # ecx was < 2.  Deal with it.
-       jmp 4f
-1:     movw (%esi), %bx
-       addl $2, %esi
-       addw %bx, %ax
-       adcl $0, %eax
-2:
-       movl %ecx, %edx
-       shrl $5, %ecx
-       jz 2f
-       testl %esi, %esi
-1:     movl (%esi), %ebx
-       adcl %ebx, %eax
-       movl 4(%esi), %ebx
-       adcl %ebx, %eax
-       movl 8(%esi), %ebx
-       adcl %ebx, %eax
-       movl 12(%esi), %ebx
-       adcl %ebx, %eax
-       movl 16(%esi), %ebx
-       adcl %ebx, %eax
-       movl 20(%esi), %ebx
-       adcl %ebx, %eax
-       movl 24(%esi), %ebx
-       adcl %ebx, %eax
-       movl 28(%esi), %ebx
-       adcl %ebx, %eax
-       lea 32(%esi), %esi
-       dec %ecx
-       jne 1b
-       adcl $0, %eax
-2:     movl %edx, %ecx
-       andl $0x1c, %edx
-       je 4f
-       shrl $2, %edx           # This clears CF
-3:     adcl (%esi), %eax
-       lea 4(%esi), %esi
-       dec %edx
-       jne 3b
-       adcl $0, %eax
-4:     andl $3, %ecx
-       jz 7f
-       cmpl $2, %ecx
-       jb 5f
-       movw (%esi),%cx
-       leal 2(%esi),%esi
-       je 6f
-       shll $16,%ecx
-5:     movb (%esi),%cl
-6:     addl %ecx,%eax
-       adcl $0, %eax 
-7:     
-       popl %ebx
-       popl %esi
-       ret
-
-#else
-
-/* Version for PentiumII/PPro */
-
-csum_partial:
-       pushl %esi
-       pushl %ebx
-       movl 20(%esp),%eax      # Function arg: unsigned int sum
-       movl 16(%esp),%ecx      # Function arg: int len
-       movl 12(%esp),%esi      # Function arg: const unsigned char *buf
-
-       testl $2, %esi         
-       jnz 30f                 
-10:
-       movl %ecx, %edx
-       movl %ecx, %ebx
-       andl $0x7c, %ebx
-       shrl $7, %ecx
-       addl %ebx,%esi
-       shrl $2, %ebx  
-       negl %ebx
-       lea 45f(%ebx,%ebx,2), %ebx
-       testl %esi, %esi
-       jmp *%ebx
-
-       # Handle 2-byte-aligned regions
-20:    addw (%esi), %ax
-       lea 2(%esi), %esi
-       adcl $0, %eax
-       jmp 10b
-
-30:    subl $2, %ecx          
-       ja 20b                 
-       je 32f
-       movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
-       addl %ebx, %eax
-       adcl $0, %eax
-       jmp 80f
-32:
-       addw (%esi), %ax        # csumming 2 bytes, 2-aligned
-       adcl $0, %eax
-       jmp 80f
-
-40: 
-       addl -128(%esi), %eax
-       adcl -124(%esi), %eax
-       adcl -120(%esi), %eax
-       adcl -116(%esi), %eax   
-       adcl -112(%esi), %eax   
-       adcl -108(%esi), %eax
-       adcl -104(%esi), %eax
-       adcl -100(%esi), %eax
-       adcl -96(%esi), %eax
-       adcl -92(%esi), %eax
-       adcl -88(%esi), %eax
-       adcl -84(%esi), %eax
-       adcl -80(%esi), %eax
-       adcl -76(%esi), %eax
-       adcl -72(%esi), %eax
-       adcl -68(%esi), %eax
-       adcl -64(%esi), %eax     
-       adcl -60(%esi), %eax     
-       adcl -56(%esi), %eax     
-       adcl -52(%esi), %eax   
-       adcl -48(%esi), %eax   
-       adcl -44(%esi), %eax
-       adcl -40(%esi), %eax
-       adcl -36(%esi), %eax
-       adcl -32(%esi), %eax
-       adcl -28(%esi), %eax
-       adcl -24(%esi), %eax
-       adcl -20(%esi), %eax
-       adcl -16(%esi), %eax
-       adcl -12(%esi), %eax
-       adcl -8(%esi), %eax
-       adcl -4(%esi), %eax
-45:
-       lea 128(%esi), %esi
-       adcl $0, %eax
-       dec %ecx
-       jge 40b
-       movl %edx, %ecx
-50:    andl $3, %ecx
-       jz 80f
-
-       # Handle the last 1-3 bytes without jumping
-       notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
-       movl $0xffffff,%ebx     # by the shll and shrl instructions
-       shll $3,%ecx
-       shrl %cl,%ebx
-       andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
-       addl %ebx,%eax
-       adcl $0,%eax
-80: 
-       popl %ebx
-       popl %esi
-       ret
-                               
-#endif
-
-/*
-unsigned int csum_partial_copy_generic (const char *src, char *dst,
-                                 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
- */ 
-
-/*
- * Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- *       DST definitions? It's damn hard to trigger all cases.  I hope I got
- *       them all but there's no guarantee.
- */
-
-#define SRC(y...)                      \
-       9999: y;                        \
-       .section __ex_table, "a";       \
-       .long 9999b, 6001f      ;       \
-       .previous
-
-#define DST(y...)                      \
-       9999: y;                        \
-       .section __ex_table, "a";       \
-       .long 9999b, 6002f      ;       \
-       .previous
-
-.align 4
-
-#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-
-#define ARGBASE 16             
-#define FP             12
-
-csum_partial_copy_generic_i386:
-       subl  $4,%esp   
-       pushl %edi
-       pushl %esi
-       pushl %ebx
-       movl ARGBASE+16(%esp),%eax      # sum
-       movl ARGBASE+12(%esp),%ecx      # len
-       movl ARGBASE+4(%esp),%esi       # src
-       movl ARGBASE+8(%esp),%edi       # dst
-
-       testl $2, %edi                  # Check alignment. 
-       jz 2f                           # Jump if alignment is ok.
-       subl $2, %ecx                   # Alignment uses up two bytes.
-       jae 1f                          # Jump if we had at least two bytes.
-       addl $2, %ecx                   # ecx was < 2.  Deal with it.
-       jmp 4f
-SRC(1: movw (%esi), %bx        )
-       addl $2, %esi
-DST(   movw %bx, (%edi)        )
-       addl $2, %edi
-       addw %bx, %ax   
-       adcl $0, %eax
-2:
-       movl %ecx, FP(%esp)
-       shrl $5, %ecx
-       jz 2f
-       testl %esi, %esi
-SRC(1: movl (%esi), %ebx       )
-SRC(   movl 4(%esi), %edx      )
-       adcl %ebx, %eax
-DST(   movl %ebx, (%edi)       )
-       adcl %edx, %eax
-DST(   movl %edx, 4(%edi)      )
-
-SRC(   movl 8(%esi), %ebx      )
-SRC(   movl 12(%esi), %edx     )
-       adcl %ebx, %eax
-DST(   movl %ebx, 8(%edi)      )
-       adcl %edx, %eax
-DST(   movl %edx, 12(%edi)     )
-
-SRC(   movl 16(%esi), %ebx     )
-SRC(   movl 20(%esi), %edx     )
-       adcl %ebx, %eax
-DST(   movl %ebx, 16(%edi)     )
-       adcl %edx, %eax
-DST(   movl %edx, 20(%edi)     )
-
-SRC(   movl 24(%esi), %ebx     )
-SRC(   movl 28(%esi), %edx     )
-       adcl %ebx, %eax
-DST(   movl %ebx, 24(%edi)     )
-       adcl %edx, %eax
-DST(   movl %edx, 28(%edi)     )
-
-       lea 32(%esi), %esi
-       lea 32(%edi), %edi
-       dec %ecx
-       jne 1b
-       adcl $0, %eax
-2:     movl FP(%esp), %edx
-       movl %edx, %ecx
-       andl $0x1c, %edx
-       je 4f
-       shrl $2, %edx                   # This clears CF
-SRC(3: movl (%esi), %ebx       )
-       adcl %ebx, %eax
-DST(   movl %ebx, (%edi)       )
-       lea 4(%esi), %esi
-       lea 4(%edi), %edi
-       dec %edx
-       jne 3b
-       adcl $0, %eax
-4:     andl $3, %ecx
-       jz 7f
-       cmpl $2, %ecx
-       jb 5f
-SRC(   movw (%esi), %cx        )
-       leal 2(%esi), %esi
-DST(   movw %cx, (%edi)        )
-       leal 2(%edi), %edi
-       je 6f
-       shll $16,%ecx
-SRC(5: movb (%esi), %cl        )
-DST(   movb %cl, (%edi)        )
-6:     addl %ecx, %eax
-       adcl $0, %eax
-7:
-5000:
-
-# Exception handler:
-.section .fixup, "ax"                                                  
-
-6001:
-       movl ARGBASE+20(%esp), %ebx     # src_err_ptr
-       movl $-EFAULT, (%ebx)
-
-       # zero the complete destination - computing the rest
-       # is too much work 
-       movl ARGBASE+8(%esp), %edi      # dst
-       movl ARGBASE+12(%esp), %ecx     # len
-       xorl %eax,%eax
-       rep ; stosb
-
-       jmp 5000b
-
-6002:
-       movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
-       movl $-EFAULT,(%ebx)
-       jmp 5000b
-
-.previous
-
-       popl %ebx
-       popl %esi
-       popl %edi
-       popl %ecx                       # equivalent to addl $4,%esp
-       ret     
-
-#else
-
-/* Version for PentiumII/PPro */
-
-#define ROUND1(x) \
-       SRC(movl x(%esi), %ebx  )       ;       \
-       addl %ebx, %eax                 ;       \
-       DST(movl %ebx, x(%edi)  )       ; 
-
-#define ROUND(x) \
-       SRC(movl x(%esi), %ebx  )       ;       \
-       adcl %ebx, %eax                 ;       \
-       DST(movl %ebx, x(%edi)  )       ;
-
-#define ARGBASE 12
-               
-csum_partial_copy_generic_i386:
-       pushl %ebx
-       pushl %edi
-       pushl %esi
-       movl ARGBASE+4(%esp),%esi       #src
-       movl ARGBASE+8(%esp),%edi       #dst    
-       movl ARGBASE+12(%esp),%ecx      #len
-       movl ARGBASE+16(%esp),%eax      #sum
-#      movl %ecx, %edx  
-       movl %ecx, %ebx  
-       movl %esi, %edx
-       shrl $6, %ecx     
-       andl $0x3c, %ebx  
-       negl %ebx
-       subl %ebx, %esi  
-       subl %ebx, %edi  
-       lea  -1(%esi),%edx
-       andl $-32,%edx
-       lea 3f(%ebx,%ebx), %ebx
-       testl %esi, %esi 
-       jmp *%ebx
-1:     addl $64,%esi
-       addl $64,%edi 
-       SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
-       ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
-       ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
-       ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
-       ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
-3:     adcl $0,%eax
-       addl $64, %edx
-       dec %ecx
-       jge 1b
-4:     movl ARGBASE+12(%esp),%edx      #len
-       andl $3, %edx
-       jz 7f
-       cmpl $2, %edx
-       jb 5f
-SRC(   movw (%esi), %dx         )
-       leal 2(%esi), %esi
-DST(   movw %dx, (%edi)         )
-       leal 2(%edi), %edi
-       je 6f
-       shll $16,%edx
-5:
-SRC(   movb (%esi), %dl         )
-DST(   movb %dl, (%edi)         )
-6:     addl %edx, %eax
-       adcl $0, %eax
-7:
-.section .fixup, "ax"
-6001:  movl    ARGBASE+20(%esp), %ebx  # src_err_ptr   
-       movl $-EFAULT, (%ebx)
-       # zero the complete destination (computing the rest is too much work)
-       movl ARGBASE+8(%esp),%edi       # dst
-       movl ARGBASE+12(%esp),%ecx      # len
-       xorl %eax,%eax
-       rep; stosb
-       jmp 7b
-6002:  movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
-       movl $-EFAULT, (%ebx)
-       jmp  7b                 
-.previous                              
-
-       popl %esi
-       popl %edi
-       popl %ebx
-       ret
-                               
-#undef ROUND
-#undef ROUND1          
-               
-#endif
diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c
deleted file mode 100644 (file)
index f3fe1a6..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- * Mostly copied from arch/x86/lib/delay.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <asm/param.h>
-
-void __delay(unsigned long loops)
-{
-       asm volatile(
-               "test %0,%0\n"
-               "jz 3f\n"
-               "jmp 1f\n"
-
-               ".align 16\n"
-               "1: jmp 2f\n"
-
-               ".align 16\n"
-               "2: dec %0\n"
-               " jnz 2b\n"
-               "3: dec %0\n"
-
-               : /* we don't need output */
-               : "a" (loops)
-       );
-}
-EXPORT_SYMBOL(__delay);
-
-inline void __const_udelay(unsigned long xloops)
-{
-       int d0;
-
-       xloops *= 4;
-       asm("mull %%edx"
-               : "=d" (xloops), "=&a" (d0)
-               : "1" (xloops), "0"
-               (loops_per_jiffy * (HZ/4)));
-
-       __delay(++xloops);
-}
-EXPORT_SYMBOL(__const_udelay);
-
-void __udelay(unsigned long usecs)
-{
-       __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
-}
-EXPORT_SYMBOL(__udelay);
-
-void __ndelay(unsigned long nsecs)
-{
-       __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
-}
-EXPORT_SYMBOL(__ndelay);
diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
deleted file mode 100644 (file)
index 6bb49b6..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <linux/elf.h>
-#include <linux/coredump.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-
-#include <asm/elf.h>
-
-
-Elf32_Half elf_core_extra_phdrs(void)
-{
-       return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
-}
-
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-                              unsigned long limit)
-{
-       if ( vsyscall_ehdr ) {
-               const struct elfhdr *const ehdrp =
-                       (struct elfhdr *) vsyscall_ehdr;
-               const struct elf_phdr *const phdrp =
-                       (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
-               int i;
-               Elf32_Off ofs = 0;
-
-               for (i = 0; i < ehdrp->e_phnum; ++i) {
-                       struct elf_phdr phdr = phdrp[i];
-
-                       if (phdr.p_type == PT_LOAD) {
-                               ofs = phdr.p_offset = offset;
-                               offset += phdr.p_filesz;
-                       } else {
-                               phdr.p_offset += ofs;
-                       }
-                       phdr.p_paddr = 0; /* match other core phdrs */
-                       *size += sizeof(phdr);
-                       if (*size > limit
-                           || !dump_write(file, &phdr, sizeof(phdr)))
-                               return 0;
-               }
-       }
-       return 1;
-}
-
-int elf_core_write_extra_data(struct file *file, size_t *size,
-                             unsigned long limit)
-{
-       if ( vsyscall_ehdr ) {
-               const struct elfhdr *const ehdrp =
-                       (struct elfhdr *) vsyscall_ehdr;
-               const struct elf_phdr *const phdrp =
-                       (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
-               int i;
-
-               for (i = 0; i < ehdrp->e_phnum; ++i) {
-                       if (phdrp[i].p_type == PT_LOAD) {
-                               void *addr = (void *) phdrp[i].p_vaddr;
-                               size_t filesz = phdrp[i].p_filesz;
-
-                               *size += filesz;
-                               if (*size > limit
-                                   || !dump_write(file, addr, filesz))
-                                       return 0;
-                       }
-               }
-       }
-       return 1;
-}
-
-size_t elf_core_extra_data_size(void)
-{
-       if ( vsyscall_ehdr ) {
-               const struct elfhdr *const ehdrp =
-                       (struct elfhdr *)vsyscall_ehdr;
-               const struct elf_phdr *const phdrp =
-                       (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
-               int i;
-
-               for (i = 0; i < ehdrp->e_phnum; ++i)
-                       if (phdrp[i].p_type == PT_LOAD)
-                               return (size_t) phdrp[i].p_filesz;
-       }
-       return 0;
-}
diff --git a/arch/um/sys-i386/fault.c b/arch/um/sys-i386/fault.c
deleted file mode 100644 (file)
index d670f68..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/* 
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include "sysdep/ptrace.h"
-
-/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
-struct exception_table_entry
-{
-       unsigned long insn;
-       unsigned long fixup;
-};
-
-const struct exception_table_entry *search_exception_tables(unsigned long add);
-
-/* Compare this to arch/i386/mm/extable.c:fixup_exception() */
-int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
-{
-       const struct exception_table_entry *fixup;
-
-       fixup = search_exception_tables(address);
-       if (fixup != 0) {
-               UPT_IP(regs) = fixup->fixup;
-               return 1;
-       }
-       return 0;
-}
diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c
deleted file mode 100644 (file)
index bfbefd3..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-#include "linux/module.h"
-#include "asm/checksum.h"
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
deleted file mode 100644 (file)
index 3f2bf20..0000000
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <asm/unistd.h>
-#include "os.h"
-#include "proc_mm.h"
-#include "skas.h"
-#include "skas_ptrace.h"
-#include "sysdep/tls.h"
-
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
-
-static long write_ldt_entry(struct mm_id *mm_idp, int func,
-                    struct user_desc *desc, void **addr, int done)
-{
-       long res;
-
-       if (proc_mm) {
-               /*
-                * This is a special handling for the case, that the mm to
-                * modify isn't current->active_mm.
-                * If this is called directly by modify_ldt,
-                *     (current->active_mm->context.skas.u == mm_idp)
-                * will be true. So no call to __switch_mm(mm_idp) is done.
-                * If this is called in case of init_new_ldt or PTRACE_LDT,
-                * mm_idp won't belong to current->active_mm, but child->mm.
-                * So we need to switch child's mm into our userspace, then
-                * later switch back.
-                *
-                * Note: I'm unsure: should interrupts be disabled here?
-                */
-               if (!current->active_mm || current->active_mm == &init_mm ||
-                   mm_idp != &current->active_mm->context.id)
-                       __switch_mm(mm_idp);
-       }
-
-       if (ptrace_ldt) {
-               struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
-                       .func = func,
-                       .ptr = desc,
-                       .bytecount = sizeof(*desc)};
-               u32 cpu;
-               int pid;
-
-               if (!proc_mm)
-                       pid = mm_idp->u.pid;
-               else {
-                       cpu = get_cpu();
-                       pid = userspace_pid[cpu];
-               }
-
-               res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op);
-
-               if (proc_mm)
-                       put_cpu();
-       }
-       else {
-               void *stub_addr;
-               res = syscall_stub_data(mm_idp, (unsigned long *)desc,
-                                       (sizeof(*desc) + sizeof(long) - 1) &
-                                           ~(sizeof(long) - 1),
-                                       addr, &stub_addr);
-               if (!res) {
-                       unsigned long args[] = { func,
-                                                (unsigned long)stub_addr,
-                                                sizeof(*desc),
-                                                0, 0, 0 };
-                       res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
-                                              0, addr, done);
-               }
-       }
-
-       if (proc_mm) {
-               /*
-                * This is the second part of special handling, that makes
-                * PTRACE_LDT possible to implement.
-                */
-               if (current->active_mm && current->active_mm != &init_mm &&
-                   mm_idp != &current->active_mm->context.id)
-                       __switch_mm(&current->active_mm->context.id);
-       }
-
-       return res;
-}
-
-static long read_ldt_from_host(void __user * ptr, unsigned long bytecount)
-{
-       int res, n;
-       struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) {
-                       .func = 0,
-                       .bytecount = bytecount,
-                       .ptr = kmalloc(bytecount, GFP_KERNEL)};
-       u32 cpu;
-
-       if (ptrace_ldt.ptr == NULL)
-               return -ENOMEM;
-
-       /*
-        * This is called from sys_modify_ldt only, so userspace_pid gives
-        * us the right number
-        */
-
-       cpu = get_cpu();
-       res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt);
-       put_cpu();
-       if (res < 0)
-               goto out;
-
-       n = copy_to_user(ptr, ptrace_ldt.ptr, res);
-       if (n != 0)
-               res = -EFAULT;
-
-  out:
-       kfree(ptrace_ldt.ptr);
-
-       return res;
-}
-
-/*
- * In skas mode, we hold our own ldt data in UML.
- * Thus, the code implementing sys_modify_ldt_skas
- * is very similar to (and mostly stolen from) sys_modify_ldt
- * for arch/i386/kernel/ldt.c
- * The routines copied and modified in part are:
- * - read_ldt
- * - read_default_ldt
- * - write_ldt
- * - sys_modify_ldt_skas
- */
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
-       int i, err = 0;
-       unsigned long size;
-       uml_ldt_t * ldt = &current->mm->context.ldt;
-
-       if (!ldt->entry_count)
-               goto out;
-       if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
-               bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-       err = bytecount;
-
-       if (ptrace_ldt)
-               return read_ldt_from_host(ptr, bytecount);
-
-       mutex_lock(&ldt->lock);
-       if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
-               size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
-               if (size > bytecount)
-                       size = bytecount;
-               if (copy_to_user(ptr, ldt->u.entries, size))
-                       err = -EFAULT;
-               bytecount -= size;
-               ptr += size;
-       }
-       else {
-               for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
-                    i++) {
-                       size = PAGE_SIZE;
-                       if (size > bytecount)
-                               size = bytecount;
-                       if (copy_to_user(ptr, ldt->u.pages[i], size)) {
-                               err = -EFAULT;
-                               break;
-                       }
-                       bytecount -= size;
-                       ptr += size;
-               }
-       }
-       mutex_unlock(&ldt->lock);
-
-       if (bytecount == 0 || err == -EFAULT)
-               goto out;
-
-       if (clear_user(ptr, bytecount))
-               err = -EFAULT;
-
-out:
-       return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
-       int err;
-
-       if (bytecount > 5*LDT_ENTRY_SIZE)
-               bytecount = 5*LDT_ENTRY_SIZE;
-
-       err = bytecount;
-       /*
-        * UML doesn't support lcall7 and lcall27.
-        * So, we don't really have a default ldt, but emulate
-        * an empty ldt of common host default ldt size.
-        */
-       if (clear_user(ptr, bytecount))
-               err = -EFAULT;
-
-       return err;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
-{
-       uml_ldt_t * ldt = &current->mm->context.ldt;
-       struct mm_id * mm_idp = &current->mm->context.id;
-       int i, err;
-       struct user_desc ldt_info;
-       struct ldt_entry entry0, *ldt_p;
-       void *addr = NULL;
-
-       err = -EINVAL;
-       if (bytecount != sizeof(ldt_info))
-               goto out;
-       err = -EFAULT;
-       if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
-               goto out;
-
-       err = -EINVAL;
-       if (ldt_info.entry_number >= LDT_ENTRIES)
-               goto out;
-       if (ldt_info.contents == 3) {
-               if (func == 1)
-                       goto out;
-               if (ldt_info.seg_not_present == 0)
-                       goto out;
-       }
-
-       if (!ptrace_ldt)
-               mutex_lock(&ldt->lock);
-
-       err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
-       if (err)
-               goto out_unlock;
-       else if (ptrace_ldt) {
-               /* With PTRACE_LDT available, this is used as a flag only */
-               ldt->entry_count = 1;
-               goto out;
-       }
-
-       if (ldt_info.entry_number >= ldt->entry_count &&
-           ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
-               for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
-                    i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
-                    i++) {
-                       if (i == 0)
-                               memcpy(&entry0, ldt->u.entries,
-                                      sizeof(entry0));
-                       ldt->u.pages[i] = (struct ldt_entry *)
-                               __get_free_page(GFP_KERNEL|__GFP_ZERO);
-                       if (!ldt->u.pages[i]) {
-                               err = -ENOMEM;
-                               /* Undo the change in host */
-                               memset(&ldt_info, 0, sizeof(ldt_info));
-                               write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
-                               goto out_unlock;
-                       }
-                       if (i == 0) {
-                               memcpy(ldt->u.pages[0], &entry0,
-                                      sizeof(entry0));
-                               memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
-                                      sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
-                       }
-                       ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
-               }
-       }
-       if (ldt->entry_count <= ldt_info.entry_number)
-               ldt->entry_count = ldt_info.entry_number + 1;
-
-       if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
-               ldt_p = ldt->u.entries + ldt_info.entry_number;
-       else
-               ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
-                       ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
-
-       if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
-          (func == 1 || LDT_empty(&ldt_info))) {
-               ldt_p->a = 0;
-               ldt_p->b = 0;
-       }
-       else{
-               if (func == 1)
-                       ldt_info.useable = 0;
-               ldt_p->a = LDT_entry_a(&ldt_info);
-               ldt_p->b = LDT_entry_b(&ldt_info);
-       }
-       err = 0;
-
-out_unlock:
-       mutex_unlock(&ldt->lock);
-out:
-       return err;
-}
-
-static long do_modify_ldt_skas(int func, void __user *ptr,
-                              unsigned long bytecount)
-{
-       int ret = -ENOSYS;
-
-       switch (func) {
-               case 0:
-                       ret = read_ldt(ptr, bytecount);
-                       break;
-               case 1:
-               case 0x11:
-                       ret = write_ldt(ptr, bytecount, func);
-                       break;
-               case 2:
-                       ret = read_default_ldt(ptr, bytecount);
-                       break;
-       }
-       return ret;
-}
-
-static DEFINE_SPINLOCK(host_ldt_lock);
-static short dummy_list[9] = {0, -1};
-static short * host_ldt_entries = NULL;
-
-static void ldt_get_host_info(void)
-{
-       long ret;
-       struct ldt_entry * ldt;
-       short *tmp;
-       int i, size, k, order;
-
-       spin_lock(&host_ldt_lock);
-
-       if (host_ldt_entries != NULL) {
-               spin_unlock(&host_ldt_lock);
-               return;
-       }
-       host_ldt_entries = dummy_list+1;
-
-       spin_unlock(&host_ldt_lock);
-
-       for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
-               ;
-
-       ldt = (struct ldt_entry *)
-             __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-       if (ldt == NULL) {
-               printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
-                      "for host ldt\n");
-               return;
-       }
-
-       ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
-       if (ret < 0) {
-               printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
-               goto out_free;
-       }
-       if (ret == 0) {
-               /* default_ldt is active, simply write an empty entry 0 */
-               host_ldt_entries = dummy_list;
-               goto out_free;
-       }
-
-       for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
-               if (ldt[i].a != 0 || ldt[i].b != 0)
-                       size++;
-       }
-
-       if (size < ARRAY_SIZE(dummy_list))
-               host_ldt_entries = dummy_list;
-       else {
-               size = (size + 1) * sizeof(dummy_list[0]);
-               tmp = kmalloc(size, GFP_KERNEL);
-               if (tmp == NULL) {
-                       printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
-                              "host ldt list\n");
-                       goto out_free;
-               }
-               host_ldt_entries = tmp;
-       }
-
-       for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
-               if (ldt[i].a != 0 || ldt[i].b != 0)
-                       host_ldt_entries[k++] = i;
-       }
-       host_ldt_entries[k] = -1;
-
-out_free:
-       free_pages((unsigned long)ldt, order);
-}
-
-long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
-{
-       struct user_desc desc;
-       short * num_p;
-       int i;
-       long page, err=0;
-       void *addr = NULL;
-       struct proc_mm_op copy;
-
-
-       if (!ptrace_ldt)
-               mutex_init(&new_mm->ldt.lock);
-
-       if (!from_mm) {
-               memset(&desc, 0, sizeof(desc));
-               /*
-                * We have to initialize a clean ldt.
-                */
-               if (proc_mm) {
-                       /*
-                        * If the new mm was created using proc_mm, host's
-                        * default-ldt currently is assigned, which normally
-                        * contains the call-gates for lcall7 and lcall27.
-                        * To remove these gates, we simply write an empty
-                        * entry as number 0 to the host.
-                        */
-                       err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1);
-               }
-               else{
-                       /*
-                        * Now we try to retrieve info about the ldt, we
-                        * inherited from the host. All ldt-entries found
-                        * will be reset in the following loop
-                        */
-                       ldt_get_host_info();
-                       for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
-                               desc.entry_number = *num_p;
-                               err = write_ldt_entry(&new_mm->id, 1, &desc,
-                                                     &addr, *(num_p + 1) == -1);
-                               if (err)
-                                       break;
-                       }
-               }
-               new_mm->ldt.entry_count = 0;
-
-               goto out;
-       }
-
-       if (proc_mm) {
-               /*
-                * We have a valid from_mm, so we now have to copy the LDT of
-                * from_mm to new_mm, because using proc_mm an new mm with
-                * an empty/default LDT was created in new_mm()
-                */
-               copy = ((struct proc_mm_op) { .op       = MM_COPY_SEGMENTS,
-                                             .u        =
-                                             { .copy_segments =
-                                                       from_mm->id.u.mm_fd } } );
-               i = os_write_file(new_mm->id.u.mm_fd, &copy, sizeof(copy));
-               if (i != sizeof(copy))
-                       printk(KERN_ERR "new_mm : /proc/mm copy_segments "
-                              "failed, err = %d\n", -i);
-       }
-
-       if (!ptrace_ldt) {
-               /*
-                * Our local LDT is used to supply the data for
-                * modify_ldt(READLDT), if PTRACE_LDT isn't available,
-                * i.e., we have to use the stub for modify_ldt, which
-                * can't handle the big read buffer of up to 64kB.
-                */
-               mutex_lock(&from_mm->ldt.lock);
-               if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES)
-                       memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries,
-                              sizeof(new_mm->ldt.u.entries));
-               else {
-                       i = from_mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-                       while (i-->0) {
-                               page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
-                               if (!page) {
-                                       err = -ENOMEM;
-                                       break;
-                               }
-                               new_mm->ldt.u.pages[i] =
-                                       (struct ldt_entry *) page;
-                               memcpy(new_mm->ldt.u.pages[i],
-                                      from_mm->ldt.u.pages[i], PAGE_SIZE);
-                       }
-               }
-               new_mm->ldt.entry_count = from_mm->ldt.entry_count;
-               mutex_unlock(&from_mm->ldt.lock);
-       }
-
-    out:
-       return err;
-}
-
-
-void free_ldt(struct mm_context *mm)
-{
-       int i;
-
-       if (!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES) {
-               i = mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-               while (i-- > 0)
-                       free_page((long) mm->ldt.u.pages[i]);
-       }
-       mm->ldt.entry_count = 0;
-}
-
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
-       return do_modify_ldt_skas(func, ptr, bytecount);
-}
diff --git a/arch/um/sys-i386/mem.c b/arch/um/sys-i386/mem.c
deleted file mode 100644 (file)
index 639900a..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/mm.h>
-#include <asm/page.h>
-#include <asm/mman.h>
-
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
-       if (!FIXADDR_USER_START)
-               return 0;
-
-       gate_vma.vm_mm = NULL;
-       gate_vma.vm_start = FIXADDR_USER_START;
-       gate_vma.vm_end = FIXADDR_USER_END;
-       gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-       gate_vma.vm_page_prot = __P101;
-
-       /*
-        * Make sure the vDSO gets into every core dump.
-        * Dumping its contents makes post-mortem fully interpretable later
-        * without matching up the same kernel and hardware config to see
-        * what PC values meant.
-        */
-       gate_vma.vm_flags |= VM_ALWAYSDUMP;
-
-       return 0;
-}
-__initcall(gate_vma_init);
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return FIXADDR_USER_START ? &gate_vma : NULL;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-       if (!FIXADDR_USER_START)
-               return 0;
-
-       if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
-               return 1;
-
-       return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       struct vm_area_struct *vma = get_gate_vma(mm);
-
-       if (!vma)
-               return 0;
-
-       return (addr >= vma->vm_start) && (addr < vma->vm_end);
-}
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
deleted file mode 100644 (file)
index 3375c27..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "skas.h"
-
-extern int arch_switch_tls(struct task_struct *to);
-
-void arch_switch_to(struct task_struct *to)
-{
-       int err = arch_switch_tls(to);
-       if (!err)
-               return;
-
-       if (err != -EINVAL)
-               printk(KERN_WARNING "arch_switch_tls failed, errno %d, "
-                      "not EINVAL\n", -err);
-       else
-               printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
-}
-
-int is_syscall(unsigned long addr)
-{
-       unsigned short instr;
-       int n;
-
-       n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-       if (n) {
-               /* access_process_vm() grants access to vsyscall and stub,
-                * while copy_from_user doesn't. Maybe access_process_vm is
-                * slow, but that doesn't matter, since it will be called only
-                * in case of singlestepping, if copy_from_user failed.
-                */
-               n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
-               if (n != sizeof(instr)) {
-                       printk(KERN_ERR "is_syscall : failed to read "
-                              "instruction from 0x%lx\n", addr);
-                       return 1;
-               }
-       }
-       /* int 0x80 or sysenter */
-       return (instr == 0x80cd) || (instr == 0x340f);
-}
-
-/* determines which flags the user has access to. */
-/* 1 = access 0 = no access */
-#define FLAG_MASK 0x00044dd5
-
-int putreg(struct task_struct *child, int regno, unsigned long value)
-{
-       regno >>= 2;
-       switch (regno) {
-       case FS:
-               if (value && (value & 3) != 3)
-                       return -EIO;
-               PT_REGS_FS(&child->thread.regs) = value;
-               return 0;
-       case GS:
-               if (value && (value & 3) != 3)
-                       return -EIO;
-               PT_REGS_GS(&child->thread.regs) = value;
-               return 0;
-       case DS:
-       case ES:
-               if (value && (value & 3) != 3)
-                       return -EIO;
-               value &= 0xffff;
-               break;
-       case SS:
-       case CS:
-               if ((value & 3) != 3)
-                       return -EIO;
-               value &= 0xffff;
-               break;
-       case EFL:
-               value &= FLAG_MASK;
-               value |= PT_REGS_EFLAGS(&child->thread.regs);
-               break;
-       }
-       PT_REGS_SET(&child->thread.regs, regno, value);
-       return 0;
-}
-
-int poke_user(struct task_struct *child, long addr, long data)
-{
-       if ((addr & 3) || addr < 0)
-               return -EIO;
-
-       if (addr < MAX_REG_OFFSET)
-               return putreg(child, addr, data);
-       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
-                (addr <= offsetof(struct user, u_debugreg[7]))) {
-               addr -= offsetof(struct user, u_debugreg[0]);
-               addr = addr >> 2;
-               if ((addr == 4) || (addr == 5))
-                       return -EIO;
-               child->thread.arch.debugregs[addr] = data;
-               return 0;
-       }
-       return -EIO;
-}
-
-unsigned long getreg(struct task_struct *child, int regno)
-{
-       unsigned long retval = ~0UL;
-
-       regno >>= 2;
-       switch (regno) {
-       case FS:
-       case GS:
-       case DS:
-       case ES:
-       case SS:
-       case CS:
-               retval = 0xffff;
-               /* fall through */
-       default:
-               retval &= PT_REG(&child->thread.regs, regno);
-       }
-       return retval;
-}
-
-/* read the word at location addr in the USER area. */
-int peek_user(struct task_struct *child, long addr, long data)
-{
-       unsigned long tmp;
-
-       if ((addr & 3) || addr < 0)
-               return -EIO;
-
-       tmp = 0;  /* Default return condition */
-       if (addr < MAX_REG_OFFSET) {
-               tmp = getreg(child, addr);
-       }
-       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
-                (addr <= offsetof(struct user, u_debugreg[7]))) {
-               addr -= offsetof(struct user, u_debugreg[0]);
-               addr = addr >> 2;
-               tmp = child->thread.arch.debugregs[addr];
-       }
-       return put_user(tmp, (unsigned long __user *) data);
-}
-
-static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
-       int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
-       struct user_i387_struct fpregs;
-
-       err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
-       if (err)
-               return err;
-
-       n = copy_to_user(buf, &fpregs, sizeof(fpregs));
-       if(n > 0)
-               return -EFAULT;
-
-       return n;
-}
-
-static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
-       int n, cpu = ((struct thread_info *) child->stack)->cpu;
-       struct user_i387_struct fpregs;
-
-       n = copy_from_user(&fpregs, buf, sizeof(fpregs));
-       if (n > 0)
-               return -EFAULT;
-
-       return restore_fp_registers(userspace_pid[cpu],
-                                   (unsigned long *) &fpregs);
-}
-
-static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
-{
-       int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
-       struct user_fxsr_struct fpregs;
-
-       err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
-       if (err)
-               return err;
-
-       n = copy_to_user(buf, &fpregs, sizeof(fpregs));
-       if(n > 0)
-               return -EFAULT;
-
-       return n;
-}
-
-static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
-{
-       int n, cpu = ((struct thread_info *) child->stack)->cpu;
-       struct user_fxsr_struct fpregs;
-
-       n = copy_from_user(&fpregs, buf, sizeof(fpregs));
-       if (n > 0)
-               return -EFAULT;
-
-       return restore_fpx_registers(userspace_pid[cpu],
-                                    (unsigned long *) &fpregs);
-}
-
-long subarch_ptrace(struct task_struct *child, long request,
-                   unsigned long addr, unsigned long data)
-{
-       int ret = -EIO;
-       void __user *datap = (void __user *) data;
-       switch (request) {
-       case PTRACE_GETFPREGS: /* Get the child FPU state. */
-               ret = get_fpregs(datap, child);
-               break;
-       case PTRACE_SETFPREGS: /* Set the child FPU state. */
-               ret = set_fpregs(datap, child);
-               break;
-       case PTRACE_GETFPXREGS: /* Get the child FPU state. */
-               ret = get_fpxregs(datap, child);
-               break;
-       case PTRACE_SETFPXREGS: /* Set the child FPU state. */
-               ret = set_fpxregs(datap, child);
-               break;
-       default:
-               ret = -EIO;
-       }
-       return ret;
-}
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
deleted file mode 100644 (file)
index 0b10c3e..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include <sys/ptrace.h>
-
-int ptrace_getregs(long pid, unsigned long *regs_out)
-{
-       if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
-               return -errno;
-       return 0;
-}
-
-int ptrace_setregs(long pid, unsigned long *regs)
-{
-       if (ptrace(PTRACE_SETREGS, pid, 0, regs) < 0)
-               return -errno;
-       return 0;
-}
diff --git a/arch/um/sys-i386/setjmp.S b/arch/um/sys-i386/setjmp.S
deleted file mode 100644 (file)
index b766792..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-#
-# arch/i386/setjmp.S
-#
-# setjmp/longjmp for the i386 architecture
-#
-
-#
-# The jmp_buf is assumed to contain the following, in order:
-#      %ebx
-#      %esp
-#      %ebp
-#      %esi
-#      %edi
-#      <return address>
-#
-
-       .text
-       .align 4
-       .globl setjmp
-       .type setjmp, @function
-setjmp:
-#ifdef _REGPARM
-       movl %eax,%edx
-#else
-       movl 4(%esp),%edx
-#endif
-       popl %ecx                       # Return address, and adjust the stack
-       xorl %eax,%eax                  # Return value
-       movl %ebx,(%edx)
-       movl %esp,4(%edx)               # Post-return %esp!
-       pushl %ecx                      # Make the call/return stack happy
-       movl %ebp,8(%edx)
-       movl %esi,12(%edx)
-       movl %edi,16(%edx)
-       movl %ecx,20(%edx)              # Return address
-       ret
-
-       .size setjmp,.-setjmp
-
-       .text
-       .align 4
-       .globl longjmp
-       .type longjmp, @function
-longjmp:
-#ifdef _REGPARM
-       xchgl %eax,%edx
-#else
-       movl 4(%esp),%edx               # jmp_ptr address
-       movl 8(%esp),%eax               # Return value
-#endif
-       movl (%edx),%ebx
-       movl 4(%edx),%esp
-       movl 8(%edx),%ebp
-       movl 12(%edx),%esi
-       movl 16(%edx),%edi
-       jmp *20(%edx)
-
-       .size longjmp,.-longjmp
diff --git a/arch/um/sys-i386/shared/sysdep/archsetjmp.h b/arch/um/sys-i386/shared/sysdep/archsetjmp.h
deleted file mode 100644 (file)
index 0f31208..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * arch/um/include/sysdep-i386/archsetjmp.h
- */
-
-#ifndef _KLIBC_ARCHSETJMP_H
-#define _KLIBC_ARCHSETJMP_H
-
-struct __jmp_buf {
-       unsigned int __ebx;
-       unsigned int __esp;
-       unsigned int __ebp;
-       unsigned int __esi;
-       unsigned int __edi;
-       unsigned int __eip;
-};
-
-typedef struct __jmp_buf jmp_buf[1];
-
-#define JB_IP __eip
-#define JB_SP __esp
-
-#endif                         /* _SETJMP_H */
diff --git a/arch/um/sys-i386/shared/sysdep/barrier.h b/arch/um/sys-i386/shared/sysdep/barrier.h
deleted file mode 100644 (file)
index b58d52c..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __SYSDEP_I386_BARRIER_H
-#define __SYSDEP_I386_BARRIER_H
-
-/* Copied from include/asm-i386 for use by userspace.  i386 has the option
- * of using mfence, but I'm just using this, which works everywhere, for now.
- */
-#define mb() asm volatile("lock; addl $0,0(%esp)")
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/checksum.h b/arch/um/sys-i386/shared/sysdep/checksum.h
deleted file mode 100644 (file)
index ed47445..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/* 
- * Licensed under the GPL
- */
-
-#ifndef __UM_SYSDEP_CHECKSUM_H
-#define __UM_SYSDEP_CHECKSUM_H
-
-#include "linux/in6.h"
-#include "linux/string.h"
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- *     Note: when you get a NULL pointer exception here this means someone
- *     passed in an incorrect kernel address to one of these functions.
- *
- *     If you use these functions directly please don't forget the
- *     access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-                                      int len, __wsum sum)
-{
-       memcpy(dst, src, len);
-       return csum_partial(dst, len, sum);
-}
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums, and handles user-space pointer exceptions correctly, when needed.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-                                        int len, __wsum sum, int *err_ptr)
-{
-       if (copy_from_user(dst, src, len)) {
-               *err_ptr = -EFAULT;
-               return (__force __wsum)-1;
-       }
-
-       return csum_partial(dst, len, sum);
-}
-
-/*
- *     This is a version of ip_compute_csum() optimized for IP headers,
- *     which always checksum on 4 octet boundaries.
- *
- *     By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
- *     Arnt Gulbrandsen.
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-       unsigned int sum;
-
-       __asm__ __volatile__(
-           "movl (%1), %0      ;\n"
-           "subl $4, %2        ;\n"
-           "jbe 2f             ;\n"
-           "addl 4(%1), %0     ;\n"
-           "adcl 8(%1), %0     ;\n"
-           "adcl 12(%1), %0    ;\n"
-"1:        adcl 16(%1), %0     ;\n"
-           "lea 4(%1), %1      ;\n"
-           "decl %2            ;\n"
-           "jne 1b             ;\n"
-           "adcl $0, %0        ;\n"
-           "movl %0, %2        ;\n"
-           "shrl $16, %0       ;\n"
-           "addw %w2, %w0      ;\n"
-           "adcl $0, %0        ;\n"
-           "notl %0            ;\n"
-"2:                            ;\n"
-       /* Since the input registers which are loaded with iph and ipl
-          are modified, we must also specify them as outputs, or gcc
-          will assume they contain their original values. */
-       : "=r" (sum), "=r" (iph), "=r" (ihl)
-       : "1" (iph), "2" (ihl)
-       : "memory");
-       return (__force __sum16)sum;
-}
-
-/*
- *     Fold a partial checksum
- */
-
-static inline __sum16 csum_fold(__wsum sum)
-{
-       __asm__(
-               "addl %1, %0            ;\n"
-               "adcl $0xffff, %0       ;\n"
-               : "=r" (sum)
-               : "r" ((__force u32)sum << 16),
-                 "0" ((__force u32)sum & 0xffff0000)
-       );
-       return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-                                                  unsigned short len,
-                                                  unsigned short proto,
-                                                  __wsum sum)
-{
-    __asm__(
-       "addl %1, %0    ;\n"
-       "adcl %2, %0    ;\n"
-       "adcl %3, %0    ;\n"
-       "adcl $0, %0    ;\n"
-       : "=r" (sum)
-       : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum));
-    return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-                                                  unsigned short len,
-                                                  unsigned short proto,
-                                                  __wsum sum)
-{
-       return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-
-static inline __sum16 ip_compute_csum(const void *buff, int len)
-{
-    return csum_fold (csum_partial(buff, len, 0));
-}
-
-#define _HAVE_ARCH_IPV6_CSUM
-static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
-                                         const struct in6_addr *daddr,
-                                         __u32 len, unsigned short proto,
-                                         __wsum sum)
-{
-       __asm__(
-               "addl 0(%1), %0         ;\n"
-               "adcl 4(%1), %0         ;\n"
-               "adcl 8(%1), %0         ;\n"
-               "adcl 12(%1), %0        ;\n"
-               "adcl 0(%2), %0         ;\n"
-               "adcl 4(%2), %0         ;\n"
-               "adcl 8(%2), %0         ;\n"
-               "adcl 12(%2), %0        ;\n"
-               "adcl %3, %0            ;\n"
-               "adcl %4, %0            ;\n"
-               "adcl $0, %0            ;\n"
-               : "=&r" (sum)
-               : "r" (saddr), "r" (daddr),
-                 "r"(htonl(len)), "r"(htonl(proto)), "0"(sum));
-
-       return csum_fold(sum);
-}
-
-/*
- *     Copy and checksum to user
- */
-#define HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user(const void *src,
-                                                    void __user *dst,
-                                                    int len, __wsum sum, int *err_ptr)
-{
-       if (access_ok(VERIFY_WRITE, dst, len)) {
-               if (copy_to_user(dst, src, len)) {
-                       *err_ptr = -EFAULT;
-                       return (__force __wsum)-1;
-               }
-
-               return csum_partial(src, len, sum);
-       }
-
-       if (len)
-               *err_ptr = -EFAULT;
-
-       return (__force __wsum)-1; /* invalid checksum */
-}
-
-#endif
-
diff --git a/arch/um/sys-i386/shared/sysdep/faultinfo.h b/arch/um/sys-i386/shared/sysdep/faultinfo.h
deleted file mode 100644 (file)
index db437cc..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- * Licensed under the GPL
- */
-
-#ifndef __FAULTINFO_I386_H
-#define __FAULTINFO_I386_H
-
-/* this structure contains the full arch-specific faultinfo
- * from the traps.
- * On i386, ptrace_faultinfo unfortunately doesn't provide
- * all the info, since trap_no is missing.
- * All common elements are defined at the same position in
- * both structures, thus making it easy to copy the
- * contents without knowledge about the structure elements.
- */
-struct faultinfo {
-        int error_code; /* in ptrace_faultinfo misleadingly called is_write */
-        unsigned long cr2; /* in ptrace_faultinfo called addr */
-        int trap_no; /* missing in ptrace_faultinfo */
-};
-
-#define FAULT_WRITE(fi) ((fi).error_code & 2)
-#define FAULT_ADDRESS(fi) ((fi).cr2)
-
-#define PTRACE_FULL_FAULTINFO 0
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/host_ldt.h b/arch/um/sys-i386/shared/sysdep/host_ldt.h
deleted file mode 100644 (file)
index 0953cc4..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ASM_HOST_LDT_I386_H
-#define __ASM_HOST_LDT_I386_H
-
-#include <asm/ldt.h>
-
-/*
- * macros stolen from include/asm-i386/desc.h
- */
-#define LDT_entry_a(info) \
-       ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
-       (((info)->base_addr & 0xff000000) | \
-       (((info)->base_addr & 0x00ff0000) >> 16) | \
-       ((info)->limit & 0xf0000) | \
-       (((info)->read_exec_only ^ 1) << 9) | \
-       ((info)->contents << 10) | \
-       (((info)->seg_not_present ^ 1) << 15) | \
-       ((info)->seg_32bit << 22) | \
-       ((info)->limit_in_pages << 23) | \
-       ((info)->useable << 20) | \
-       0x7000)
-
-#define LDT_empty(info) (\
-       (info)->base_addr       == 0    && \
-       (info)->limit           == 0    && \
-       (info)->contents        == 0    && \
-       (info)->read_exec_only  == 1    && \
-       (info)->seg_32bit       == 0    && \
-       (info)->limit_in_pages  == 0    && \
-       (info)->seg_not_present == 1    && \
-       (info)->useable         == 0    )
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/kernel-offsets.h b/arch/um/sys-i386/shared/sysdep/kernel-offsets.h
deleted file mode 100644 (file)
index 5868526..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/elf.h>
-#include <linux/crypto.h>
-#include <asm/mman.h>
-
-#define DEFINE(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define STR(x) #x
-#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : )
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
-       DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-#include <common-offsets.h>
-}
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace.h b/arch/um/sys-i386/shared/sysdep/ptrace.h
deleted file mode 100644 (file)
index c398a50..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_I386_PTRACE_H
-#define __SYSDEP_I386_PTRACE_H
-
-#include "user_constants.h"
-#include "sysdep/faultinfo.h"
-
-#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
-#define MAX_REG_OFFSET (UM_FRAME_SIZE)
-
-static inline void update_debugregs(int seq) {}
-
-/* syscall emulation path in ptrace */
-
-#ifndef PTRACE_SYSEMU
-#define PTRACE_SYSEMU 31
-#endif
-
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
-#include "skas_ptregs.h"
-
-#define REGS_IP(r) ((r)[HOST_IP])
-#define REGS_SP(r) ((r)[HOST_SP])
-#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
-#define REGS_EAX(r) ((r)[HOST_EAX])
-#define REGS_EBX(r) ((r)[HOST_EBX])
-#define REGS_ECX(r) ((r)[HOST_ECX])
-#define REGS_EDX(r) ((r)[HOST_EDX])
-#define REGS_ESI(r) ((r)[HOST_ESI])
-#define REGS_EDI(r) ((r)[HOST_EDI])
-#define REGS_EBP(r) ((r)[HOST_EBP])
-#define REGS_CS(r) ((r)[HOST_CS])
-#define REGS_SS(r) ((r)[HOST_SS])
-#define REGS_DS(r) ((r)[HOST_DS])
-#define REGS_ES(r) ((r)[HOST_ES])
-#define REGS_FS(r) ((r)[HOST_FS])
-#define REGS_GS(r) ((r)[HOST_GS])
-
-#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
-
-#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
-
-#ifndef PTRACE_SYSEMU_SINGLESTEP
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#endif
-
-struct uml_pt_regs {
-       unsigned long gp[MAX_REG_NR];
-       unsigned long fp[HOST_FPX_SIZE];
-       struct faultinfo faultinfo;
-       long syscall;
-       int is_user;
-};
-
-#define EMPTY_UML_PT_REGS { }
-
-#define UPT_IP(r) REGS_IP((r)->gp)
-#define UPT_SP(r) REGS_SP((r)->gp)
-#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
-#define UPT_EAX(r) REGS_EAX((r)->gp)
-#define UPT_EBX(r) REGS_EBX((r)->gp)
-#define UPT_ECX(r) REGS_ECX((r)->gp)
-#define UPT_EDX(r) REGS_EDX((r)->gp)
-#define UPT_ESI(r) REGS_ESI((r)->gp)
-#define UPT_EDI(r) REGS_EDI((r)->gp)
-#define UPT_EBP(r) REGS_EBP((r)->gp)
-#define UPT_ORIG_EAX(r) ((r)->syscall)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_SS(r) REGS_SS((r)->gp)
-#define UPT_DS(r) REGS_DS((r)->gp)
-#define UPT_ES(r) REGS_ES((r)->gp)
-#define UPT_FS(r) REGS_FS((r)->gp)
-#define UPT_GS(r) REGS_GS((r)->gp)
-
-#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
-#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
-#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
-#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
-#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
-#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
-
-extern int user_context(unsigned long sp);
-
-#define UPT_IS_USER(r) ((r)->is_user)
-
-struct syscall_args {
-       unsigned long args[6];
-};
-
-#define SYSCALL_ARGS(r) ((struct syscall_args) \
-                        { .args = { UPT_SYSCALL_ARG1(r),       \
-                                    UPT_SYSCALL_ARG2(r),       \
-                                    UPT_SYSCALL_ARG3(r),       \
-                                    UPT_SYSCALL_ARG4(r),       \
-                                    UPT_SYSCALL_ARG5(r),       \
-                                    UPT_SYSCALL_ARG6(r) } } )
-
-#define UPT_REG(regs, reg) \
-       ({      unsigned long val; \
-               switch(reg){ \
-               case EIP: val = UPT_IP(regs); break; \
-               case UESP: val = UPT_SP(regs); break; \
-               case EAX: val = UPT_EAX(regs); break; \
-               case EBX: val = UPT_EBX(regs); break; \
-               case ECX: val = UPT_ECX(regs); break; \
-               case EDX: val = UPT_EDX(regs); break; \
-               case ESI: val = UPT_ESI(regs); break; \
-               case EDI: val = UPT_EDI(regs); break; \
-               case EBP: val = UPT_EBP(regs); break; \
-               case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \
-               case CS: val = UPT_CS(regs); break; \
-               case SS: val = UPT_SS(regs); break; \
-               case DS: val = UPT_DS(regs); break; \
-               case ES: val = UPT_ES(regs); break; \
-               case FS: val = UPT_FS(regs); break; \
-               case GS: val = UPT_GS(regs); break; \
-               case EFL: val = UPT_EFLAGS(regs); break; \
-               default :  \
-                       panic("Bad register in UPT_REG : %d\n", reg);  \
-                       val = -1; \
-               } \
-               val; \
-       })
-
-#define UPT_SET(regs, reg, val) \
-       do { \
-               switch(reg){ \
-               case EIP: UPT_IP(regs) = val; break; \
-               case UESP: UPT_SP(regs) = val; break; \
-               case EAX: UPT_EAX(regs) = val; break; \
-               case EBX: UPT_EBX(regs) = val; break; \
-               case ECX: UPT_ECX(regs) = val; break; \
-               case EDX: UPT_EDX(regs) = val; break; \
-               case ESI: UPT_ESI(regs) = val; break; \
-               case EDI: UPT_EDI(regs) = val; break; \
-               case EBP: UPT_EBP(regs) = val; break; \
-               case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \
-               case CS: UPT_CS(regs) = val; break; \
-               case SS: UPT_SS(regs) = val; break; \
-               case DS: UPT_DS(regs) = val; break; \
-               case ES: UPT_ES(regs) = val; break; \
-               case FS: UPT_FS(regs) = val; break; \
-               case GS: UPT_GS(regs) = val; break; \
-               case EFL: UPT_EFLAGS(regs) = val; break; \
-               default :  \
-                       panic("Bad register in UPT_SET : %d\n", reg);  \
-                       break; \
-               } \
-       } while (0)
-
-#define UPT_SET_SYSCALL_RETURN(r, res) \
-       REGS_SET_SYSCALL_RETURN((r)->regs, (res))
-
-#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
-
-#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
-#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
-#define UPT_SYSCALL_RET(r) UPT_EAX(r)
-
-#define UPT_FAULTINFO(r) (&(r)->faultinfo)
-
-extern void arch_init_registers(int pid);
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace_user.h b/arch/um/sys-i386/shared/sysdep/ptrace_user.h
deleted file mode 100644 (file)
index ef56247..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_I386_PTRACE_USER_H__
-#define __SYSDEP_I386_PTRACE_USER_H__
-
-#include <sys/ptrace.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#include "user_constants.h"
-
-#define PT_OFFSET(r) ((r) * sizeof(long))
-
-#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX])
-#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX)
-
-#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX)
-#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX)
-#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX)
-#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI)
-#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI)
-#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP)
-
-#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX)
-
-#define REGS_SYSCALL_NR EAX /* This is used before a system call */
-#define REGS_SYSCALL_ARG1 EBX
-#define REGS_SYSCALL_ARG2 ECX
-#define REGS_SYSCALL_ARG3 EDX
-#define REGS_SYSCALL_ARG4 ESI
-#define REGS_SYSCALL_ARG5 EDI
-#define REGS_SYSCALL_ARG6 EBP
-
-#define REGS_IP_INDEX EIP
-#define REGS_SP_INDEX UESP
-
-#define PT_IP_OFFSET PT_OFFSET(EIP)
-#define PT_IP(regs) ((regs)[EIP])
-#define PT_SP_OFFSET PT_OFFSET(UESP)
-#define PT_SP(regs) ((regs)[UESP])
-
-#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
-
-#ifndef FRAME_SIZE
-#define FRAME_SIZE (17)
-#endif
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/sc.h b/arch/um/sys-i386/shared/sysdep/sc.h
deleted file mode 100644 (file)
index c57d178..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef __SYSDEP_I386_SC_H
-#define __SYSDEP_I386_SC_H
-
-#include <user_constants.h>
-
-#define SC_OFFSET(sc, field) \
-       *((unsigned long *) &(((char *) (sc))[HOST_##field]))
-#define SC_FP_OFFSET(sc, field) \
-       *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field]))
-#define SC_FP_OFFSET_PTR(sc, field, type) \
-       ((type *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field]))
-
-#define SC_IP(sc) SC_OFFSET(sc, SC_IP)
-#define SC_SP(sc) SC_OFFSET(sc, SC_SP)
-#define SC_FS(sc) SC_OFFSET(sc, SC_FS)
-#define SC_GS(sc) SC_OFFSET(sc, SC_GS)
-#define SC_DS(sc) SC_OFFSET(sc, SC_DS)
-#define SC_ES(sc) SC_OFFSET(sc, SC_ES)
-#define SC_SS(sc) SC_OFFSET(sc, SC_SS)
-#define SC_CS(sc) SC_OFFSET(sc, SC_CS)
-#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS)
-#define SC_EAX(sc) SC_OFFSET(sc, SC_EAX)
-#define SC_EBX(sc) SC_OFFSET(sc, SC_EBX)
-#define SC_ECX(sc) SC_OFFSET(sc, SC_ECX)
-#define SC_EDX(sc) SC_OFFSET(sc, SC_EDX)
-#define SC_EDI(sc) SC_OFFSET(sc, SC_EDI)
-#define SC_ESI(sc) SC_OFFSET(sc, SC_ESI)
-#define SC_EBP(sc) SC_OFFSET(sc, SC_EBP)
-#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO)
-#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR)
-#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2)
-#define SC_FPSTATE(sc) SC_OFFSET(sc, SC_FPSTATE)
-#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK)
-#define SC_FP_CW(sc) SC_FP_OFFSET(sc, SC_FP_CW)
-#define SC_FP_SW(sc) SC_FP_OFFSET(sc, SC_FP_SW)
-#define SC_FP_TAG(sc) SC_FP_OFFSET(sc, SC_FP_TAG)
-#define SC_FP_IPOFF(sc) SC_FP_OFFSET(sc, SC_FP_IPOFF)
-#define SC_FP_CSSEL(sc) SC_FP_OFFSET(sc, SC_FP_CSSEL)
-#define SC_FP_DATAOFF(sc) SC_FP_OFFSET(sc, SC_FP_DATAOFF)
-#define SC_FP_DATASEL(sc) SC_FP_OFFSET(sc, SC_FP_DATASEL)
-#define SC_FP_ST(sc) SC_FP_OFFSET_PTR(sc, SC_FP_ST, struct _fpstate)
-#define SC_FXSR_ENV(sc) SC_FP_OFFSET_PTR(sc, SC_FXSR_ENV, void)
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/sigcontext.h b/arch/um/sys-i386/shared/sysdep/sigcontext.h
deleted file mode 100644 (file)
index f583c87..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYS_SIGCONTEXT_I386_H
-#define __SYS_SIGCONTEXT_I386_H
-
-#include "sysdep/sc.h"
-
-#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
-
-#define GET_FAULTINFO_FROM_SC(fi, sc) \
-       { \
-               (fi).cr2 = SC_CR2(sc); \
-               (fi).error_code = SC_ERR(sc); \
-               (fi).trap_no = SC_TRAPNO(sc); \
-       }
-
-/* This is Page Fault */
-#define SEGV_IS_FIXABLE(fi)    ((fi)->trap_no == 14)
-
-/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
-#define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo)
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/skas_ptrace.h b/arch/um/sys-i386/shared/sysdep/skas_ptrace.h
deleted file mode 100644 (file)
index e27b8a7..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_I386_SKAS_PTRACE_H
-#define __SYSDEP_I386_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/stub.h b/arch/um/sys-i386/shared/sysdep/stub.h
deleted file mode 100644 (file)
index 977dedd..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_STUB_H
-#define __SYSDEP_STUB_H
-
-#include <sys/mman.h>
-#include <asm/ptrace.h>
-#include <asm/unistd.h>
-#include "as-layout.h"
-#include "stub-data.h"
-#include "kern_constants.h"
-
-extern void stub_segv_handler(int sig);
-extern void stub_clone_handler(void);
-
-#define STUB_SYSCALL_RET EAX
-#define STUB_MMAP_NR __NR_mmap2
-#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
-
-static inline long stub_syscall0(long syscall)
-{
-       long ret;
-
-       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall));
-
-       return ret;
-}
-
-static inline long stub_syscall1(long syscall, long arg1)
-{
-       long ret;
-
-       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1));
-
-       return ret;
-}
-
-static inline long stub_syscall2(long syscall, long arg1, long arg2)
-{
-       long ret;
-
-       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-                       "c" (arg2));
-
-       return ret;
-}
-
-static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
-{
-       long ret;
-
-       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-                       "c" (arg2), "d" (arg3));
-
-       return ret;
-}
-
-static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
-                                long arg4)
-{
-       long ret;
-
-       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-                       "c" (arg2), "d" (arg3), "S" (arg4));
-
-       return ret;
-}
-
-static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
-                                long arg4, long arg5)
-{
-       long ret;
-
-       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-                       "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5));
-
-       return ret;
-}
-
-static inline void trap_myself(void)
-{
-       __asm("int3");
-}
-
-static inline void remap_stack(int fd, unsigned long offset)
-{
-       __asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;"
-                         "movl %7, %%ebx ; movl %%eax, (%%ebx)"
-                         : : "g" (STUB_MMAP_NR), "b" (STUB_DATA),
-                           "c" (UM_KERN_PAGE_SIZE),
-                           "d" (PROT_READ | PROT_WRITE),
-                           "S" (MAP_FIXED | MAP_SHARED), "D" (fd),
-                           "a" (offset),
-                           "i" (&((struct stub_data *) STUB_DATA)->err)
-                         : "memory");
-}
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/um/sys-i386/shared/sysdep/syscalls.h
deleted file mode 100644 (file)
index 05cb796..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include "asm/unistd.h"
-#include "sysdep/ptrace.h"
-
-typedef long syscall_handler_t(struct pt_regs);
-
-/* Not declared on x86, incompatible declarations on x86_64, so these have
- * to go here rather than in sys_call_table.c
- */
-extern syscall_handler_t sys_rt_sigaction;
-
-extern syscall_handler_t *sys_call_table[];
-
-#define EXECUTE_SYSCALL(syscall, regs) \
-       ((long (*)(struct syscall_args)) \
-        (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
diff --git a/arch/um/sys-i386/shared/sysdep/system.h b/arch/um/sys-i386/shared/sysdep/system.h
deleted file mode 100644 (file)
index d1b93c4..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-#ifndef _ASM_X86_SYSTEM_H_
-#define _ASM_X86_SYSTEM_H_
-
-#include <asm/asm.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-#include <asm/nops.h>
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
-
-/* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
-# define AT_VECTOR_SIZE_ARCH 2
-#else
-# define AT_VECTOR_SIZE_ARCH 1
-#endif
-
-extern unsigned long arch_align_stack(unsigned long sp);
-
-void default_idle(void);
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-#else
-#define mb()   asm volatile("mfence":::"memory")
-#define rmb()  asm volatile("lfence":::"memory")
-#define wmb()  asm volatile("sfence" ::: "memory")
-#endif
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *     CPU 0                           CPU 1
- *
- *     b = 2;
- *     memory_barrier();
- *     p = &b;                         q = p;
- *                                     read_barrier_depends();
- *                                     d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *     CPU 0                           CPU 1
- *
- *     a = 2;
- *     memory_barrier();
- *     b = 3;                          y = b;
- *                                     read_barrier_depends();
- *                                     x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()       mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb()     rmb()
-#else
-# define smp_rmb()     barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb()     wmb()
-#else
-# define smp_wmb()     barrier()
-#endif
-#define smp_read_barrier_depends()     read_barrier_depends()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
-#define smp_mb()       barrier()
-#define smp_rmb()      barrier()
-#define smp_wmb()      barrier()
-#define smp_read_barrier_depends()     do { } while (0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-/*
- * Stop RDTSC speculation. This is needed when you need to use RDTSC
- * (or get_cycles or vread that possibly accesses the TSC) in a defined
- * code region.
- *
- * (Could use an alternative three way for this if there was one.)
- */
-static inline void rdtsc_barrier(void)
-{
-       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
-}
-
-#endif
diff --git a/arch/um/sys-i386/shared/sysdep/tls.h b/arch/um/sys-i386/shared/sysdep/tls.h
deleted file mode 100644 (file)
index 3455075..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _SYSDEP_TLS_H
-#define _SYSDEP_TLS_H
-
-# ifndef __KERNEL__
-
-/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
- * may be named user_desc (but in 2.4 and in header matching its API was named
- * modify_ldt_ldt_s). */
-
-typedef struct um_dup_user_desc {
-       unsigned int  entry_number;
-       unsigned int  base_addr;
-       unsigned int  limit;
-       unsigned int  seg_32bit:1;
-       unsigned int  contents:2;
-       unsigned int  read_exec_only:1;
-       unsigned int  limit_in_pages:1;
-       unsigned int  seg_not_present:1;
-       unsigned int  useable:1;
-} user_desc_t;
-
-# else /* __KERNEL__ */
-
-#  include <ldt.h>
-typedef struct user_desc user_desc_t;
-
-# endif /* __KERNEL__ */
-
-#define GDT_ENTRY_TLS_MIN_I386 6
-#define GDT_ENTRY_TLS_MIN_X86_64 12
-
-#endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/sys-i386/shared/sysdep/vm-flags.h b/arch/um/sys-i386/shared/sysdep/vm-flags.h
deleted file mode 100644 (file)
index e0d24c5..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __VM_FLAGS_I386_H
-#define __VM_FLAGS_I386_H
-
-#define VM_DATA_DEFAULT_FLAGS \
-       (VM_READ | VM_WRITE | \
-       ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
-                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#endif
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
deleted file mode 100644 (file)
index 89a4662..0000000
+++ /dev/null
@@ -1,508 +0,0 @@
-/*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/ptrace.h>
-#include <asm/unistd.h>
-#include <asm/uaccess.h>
-#include <asm/ucontext.h>
-#include "frame_kern.h"
-#include "skas.h"
-
-void copy_sc(struct uml_pt_regs *regs, void *from)
-{
-       struct sigcontext *sc = from;
-
-       REGS_GS(regs->gp) = sc->gs;
-       REGS_FS(regs->gp) = sc->fs;
-       REGS_ES(regs->gp) = sc->es;
-       REGS_DS(regs->gp) = sc->ds;
-       REGS_EDI(regs->gp) = sc->di;
-       REGS_ESI(regs->gp) = sc->si;
-       REGS_EBP(regs->gp) = sc->bp;
-       REGS_SP(regs->gp) = sc->sp;
-       REGS_EBX(regs->gp) = sc->bx;
-       REGS_EDX(regs->gp) = sc->dx;
-       REGS_ECX(regs->gp) = sc->cx;
-       REGS_EAX(regs->gp) = sc->ax;
-       REGS_IP(regs->gp) = sc->ip;
-       REGS_CS(regs->gp) = sc->cs;
-       REGS_EFLAGS(regs->gp) = sc->flags;
-       REGS_SS(regs->gp) = sc->ss;
-}
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
-       unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
-       /* Transform each pair of bits into 01 (valid) or 00 (empty) */
-       tmp = ~twd;
-       tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-       /* and move the valid bits to the lower byte. */
-       tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-       tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-       tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-       return tmp;
-}
-
-static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
-{
-       struct _fpxreg *st = NULL;
-       unsigned long twd = (unsigned long) fxsave->twd;
-       unsigned long tag;
-       unsigned long ret = 0xffff0000;
-       int i;
-
-#define FPREG_ADDR(f, n)       ((char *)&(f)->st_space + (n) * 16)
-
-       for (i = 0; i < 8; i++) {
-               if (twd & 0x1) {
-                       st = (struct _fpxreg *) FPREG_ADDR(fxsave, i);
-
-                       switch (st->exponent & 0x7fff) {
-                       case 0x7fff:
-                               tag = 2;                /* Special */
-                               break;
-                       case 0x0000:
-                               if ( !st->significand[0] &&
-                                    !st->significand[1] &&
-                                    !st->significand[2] &&
-                                    !st->significand[3] ) {
-                                       tag = 1;        /* Zero */
-                               } else {
-                                       tag = 2;        /* Special */
-                               }
-                               break;
-                       default:
-                               if (st->significand[3] & 0x8000) {
-                                       tag = 0;        /* Valid */
-                               } else {
-                                       tag = 2;        /* Special */
-                               }
-                               break;
-                       }
-               } else {
-                       tag = 3;                        /* Empty */
-               }
-               ret |= (tag << (2 * i));
-               twd = twd >> 1;
-       }
-       return ret;
-}
-
-static int convert_fxsr_to_user(struct _fpstate __user *buf,
-                               struct user_fxsr_struct *fxsave)
-{
-       unsigned long env[7];
-       struct _fpreg __user *to;
-       struct _fpxreg *from;
-       int i;
-
-       env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
-       env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
-       env[2] = twd_fxsr_to_i387(fxsave);
-       env[3] = fxsave->fip;
-       env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
-       env[5] = fxsave->foo;
-       env[6] = fxsave->fos;
-
-       if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
-               return 1;
-
-       to = &buf->_st[0];
-       from = (struct _fpxreg *) &fxsave->st_space[0];
-       for (i = 0; i < 8; i++, to++, from++) {
-               unsigned long __user *t = (unsigned long __user *)to;
-               unsigned long *f = (unsigned long *)from;
-
-               if (__put_user(*f, t) ||
-                               __put_user(*(f + 1), t + 1) ||
-                               __put_user(from->exponent, &to->exponent))
-                       return 1;
-       }
-       return 0;
-}
-
-static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave,
-                                 struct _fpstate __user *buf)
-{
-       unsigned long env[7];
-       struct _fpxreg *to;
-       struct _fpreg __user *from;
-       int i;
-
-       if (copy_from_user( env, buf, 7 * sizeof(long)))
-               return 1;
-
-       fxsave->cwd = (unsigned short)(env[0] & 0xffff);
-       fxsave->swd = (unsigned short)(env[1] & 0xffff);
-       fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
-       fxsave->fip = env[3];
-       fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
-       fxsave->fcs = (env[4] & 0xffff);
-       fxsave->foo = env[5];
-       fxsave->fos = env[6];
-
-       to = (struct _fpxreg *) &fxsave->st_space[0];
-       from = &buf->_st[0];
-       for (i = 0; i < 8; i++, to++, from++) {
-               unsigned long *t = (unsigned long *)to;
-               unsigned long __user *f = (unsigned long __user *)from;
-
-               if (__get_user(*t, f) ||
-                   __get_user(*(t + 1), f + 1) ||
-                   __get_user(to->exponent, &from->exponent))
-                       return 1;
-       }
-       return 0;
-}
-
-extern int have_fpx_regs;
-
-static int copy_sc_from_user(struct pt_regs *regs,
-                            struct sigcontext __user *from)
-{
-       struct sigcontext sc;
-       int err, pid;
-
-       err = copy_from_user(&sc, from, sizeof(sc));
-       if (err)
-               return err;
-
-       pid = userspace_pid[current_thread_info()->cpu];
-       copy_sc(&regs->regs, &sc);
-       if (have_fpx_regs) {
-               struct user_fxsr_struct fpx;
-
-               err = copy_from_user(&fpx,
-                       &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
-                                    sizeof(struct user_fxsr_struct));
-               if (err)
-                       return 1;
-
-               err = convert_fxsr_from_user(&fpx, sc.fpstate);
-               if (err)
-                       return 1;
-
-               err = restore_fpx_registers(pid, (unsigned long *) &fpx);
-               if (err < 0) {
-                       printk(KERN_ERR "copy_sc_from_user - "
-                              "restore_fpx_registers failed, errno = %d\n",
-                              -err);
-                       return 1;
-               }
-       }
-       else {
-               struct user_i387_struct fp;
-
-               err = copy_from_user(&fp, sc.fpstate,
-                                    sizeof(struct user_i387_struct));
-               if (err)
-                       return 1;
-
-               err = restore_fp_registers(pid, (unsigned long *) &fp);
-               if (err < 0) {
-                       printk(KERN_ERR "copy_sc_from_user - "
-                              "restore_fp_registers failed, errno = %d\n",
-                              -err);
-                       return 1;
-               }
-       }
-
-       return 0;
-}
-
-static int copy_sc_to_user(struct sigcontext __user *to,
-                          struct _fpstate __user *to_fp, struct pt_regs *regs,
-                          unsigned long sp)
-{
-       struct sigcontext sc;
-       struct faultinfo * fi = &current->thread.arch.faultinfo;
-       int err, pid;
-
-       sc.gs = REGS_GS(regs->regs.gp);
-       sc.fs = REGS_FS(regs->regs.gp);
-       sc.es = REGS_ES(regs->regs.gp);
-       sc.ds = REGS_DS(regs->regs.gp);
-       sc.di = REGS_EDI(regs->regs.gp);
-       sc.si = REGS_ESI(regs->regs.gp);
-       sc.bp = REGS_EBP(regs->regs.gp);
-       sc.sp = sp;
-       sc.bx = REGS_EBX(regs->regs.gp);
-       sc.dx = REGS_EDX(regs->regs.gp);
-       sc.cx = REGS_ECX(regs->regs.gp);
-       sc.ax = REGS_EAX(regs->regs.gp);
-       sc.ip = REGS_IP(regs->regs.gp);
-       sc.cs = REGS_CS(regs->regs.gp);
-       sc.flags = REGS_EFLAGS(regs->regs.gp);
-       sc.sp_at_signal = regs->regs.gp[UESP];
-       sc.ss = regs->regs.gp[SS];
-       sc.cr2 = fi->cr2;
-       sc.err = fi->error_code;
-       sc.trapno = fi->trap_no;
-
-       to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1));
-       sc.fpstate = to_fp;
-
-       pid = userspace_pid[current_thread_info()->cpu];
-       if (have_fpx_regs) {
-               struct user_fxsr_struct fpx;
-
-               err = save_fpx_registers(pid, (unsigned long *) &fpx);
-               if (err < 0){
-                       printk(KERN_ERR "copy_sc_to_user - save_fpx_registers "
-                              "failed, errno = %d\n", err);
-                       return 1;
-               }
-
-               err = convert_fxsr_to_user(to_fp, &fpx);
-               if (err)
-                       return 1;
-
-               err |= __put_user(fpx.swd, &to_fp->status);
-               err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic);
-               if (err)
-                       return 1;
-
-               if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
-                                sizeof(struct user_fxsr_struct)))
-                       return 1;
-       }
-       else {
-               struct user_i387_struct fp;
-
-               err = save_fp_registers(pid, (unsigned long *) &fp);
-               if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
-                       return 1;
-       }
-
-       return copy_to_user(to, &sc, sizeof(sc));
-}
-
-static int copy_ucontext_to_user(struct ucontext __user *uc,
-                                struct _fpstate __user *fp, sigset_t *set,
-                                unsigned long sp)
-{
-       int err = 0;
-
-       err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
-       err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
-       err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
-       err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, sp);
-       err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
-       return err;
-}
-
-struct sigframe
-{
-       char __user *pretcode;
-       int sig;
-       struct sigcontext sc;
-       struct _fpstate fpstate;
-       unsigned long extramask[_NSIG_WORDS-1];
-       char retcode[8];
-};
-
-struct rt_sigframe
-{
-       char __user *pretcode;
-       int sig;
-       struct siginfo __user *pinfo;
-       void __user *puc;
-       struct siginfo info;
-       struct ucontext uc;
-       struct _fpstate fpstate;
-       char retcode[8];
-};
-
-int setup_signal_stack_sc(unsigned long stack_top, int sig,
-                         struct k_sigaction *ka, struct pt_regs *regs,
-                         sigset_t *mask)
-{
-       struct sigframe __user *frame;
-       void __user *restorer;
-       unsigned long save_sp = PT_REGS_SP(regs);
-       int err = 0;
-
-       /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
-       stack_top = ((stack_top + 4) & -16UL) - 4;
-       frame = (struct sigframe __user *) stack_top - 1;
-       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-               return 1;
-
-       restorer = frame->retcode;
-       if (ka->sa.sa_flags & SA_RESTORER)
-               restorer = ka->sa.sa_restorer;
-
-       /* Update SP now because the page fault handler refuses to extend
-        * the stack if the faulting address is too far below the current
-        * SP, which frame now certainly is.  If there's an error, the original
-        * value is restored on the way out.
-        * When writing the sigcontext to the stack, we have to write the
-        * original value, so that's passed to copy_sc_to_user, which does
-        * the right thing with it.
-        */
-       PT_REGS_SP(regs) = (unsigned long) frame;
-
-       err |= __put_user(restorer, &frame->pretcode);
-       err |= __put_user(sig, &frame->sig);
-       err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp);
-       err |= __put_user(mask->sig[0], &frame->sc.oldmask);
-       if (_NSIG_WORDS > 1)
-               err |= __copy_to_user(&frame->extramask, &mask->sig[1],
-                                     sizeof(frame->extramask));
-
-       /*
-        * This is popl %eax ; movl $,%eax ; int $0x80
-        *
-        * WE DO NOT USE IT ANY MORE! It's only left here for historical
-        * reasons and because gdb uses it as a signature to notice
-        * signal handler stack frames.
-        */
-       err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
-       err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
-       err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
-
-       if (err)
-               goto err;
-
-       PT_REGS_SP(regs) = (unsigned long) frame;
-       PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
-       PT_REGS_EAX(regs) = (unsigned long) sig;
-       PT_REGS_EDX(regs) = (unsigned long) 0;
-       PT_REGS_ECX(regs) = (unsigned long) 0;
-
-       if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-               ptrace_notify(SIGTRAP);
-       return 0;
-
-err:
-       PT_REGS_SP(regs) = save_sp;
-       return err;
-}
-
-int setup_signal_stack_si(unsigned long stack_top, int sig,
-                         struct k_sigaction *ka, struct pt_regs *regs,
-                         siginfo_t *info, sigset_t *mask)
-{
-       struct rt_sigframe __user *frame;
-       void __user *restorer;
-       unsigned long save_sp = PT_REGS_SP(regs);
-       int err = 0;
-
-       stack_top &= -8UL;
-       frame = (struct rt_sigframe __user *) stack_top - 1;
-       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-               return 1;
-
-       restorer = frame->retcode;
-       if (ka->sa.sa_flags & SA_RESTORER)
-               restorer = ka->sa.sa_restorer;
-
-       /* See comment above about why this is here */
-       PT_REGS_SP(regs) = (unsigned long) frame;
-
-       err |= __put_user(restorer, &frame->pretcode);
-       err |= __put_user(sig, &frame->sig);
-       err |= __put_user(&frame->info, &frame->pinfo);
-       err |= __put_user(&frame->uc, &frame->puc);
-       err |= copy_siginfo_to_user(&frame->info, info);
-       err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
-                                    save_sp);
-
-       /*
-        * This is movl $,%eax ; int $0x80
-        *
-        * WE DO NOT USE IT ANY MORE! It's only left here for historical
-        * reasons and because gdb uses it as a signature to notice
-        * signal handler stack frames.
-        */
-       err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
-       err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
-       err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
-
-       if (err)
-               goto err;
-
-       PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
-       PT_REGS_EAX(regs) = (unsigned long) sig;
-       PT_REGS_EDX(regs) = (unsigned long) &frame->info;
-       PT_REGS_ECX(regs) = (unsigned long) &frame->uc;
-
-       if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-               ptrace_notify(SIGTRAP);
-       return 0;
-
-err:
-       PT_REGS_SP(regs) = save_sp;
-       return err;
-}
-
-long sys_sigreturn(struct pt_regs regs)
-{
-       unsigned long sp = PT_REGS_SP(&current->thread.regs);
-       struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
-       sigset_t set;
-       struct sigcontext __user *sc = &frame->sc;
-       unsigned long __user *oldmask = &sc->oldmask;
-       unsigned long __user *extramask = frame->extramask;
-       int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
-
-       if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
-           copy_from_user(&set.sig[1], extramask, sig_size))
-               goto segfault;
-
-       sigdelsetmask(&set, ~_BLOCKABLE);
-
-       spin_lock_irq(&current->sighand->siglock);
-       current->blocked = set;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
-
-       if (copy_sc_from_user(&current->thread.regs, sc))
-               goto segfault;
-
-       /* Avoid ERESTART handling */
-       PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
-       return PT_REGS_SYSCALL_RET(&current->thread.regs);
-
- segfault:
-       force_sig(SIGSEGV, current);
-       return 0;
-}
-
-long sys_rt_sigreturn(struct pt_regs regs)
-{
-       unsigned long sp = PT_REGS_SP(&current->thread.regs);
-       struct rt_sigframe __user *frame =
-               (struct rt_sigframe __user *) (sp - 4);
-       sigset_t set;
-       struct ucontext __user *uc = &frame->uc;
-       int sig_size = _NSIG_WORDS * sizeof(unsigned long);
-
-       if (copy_from_user(&set, &uc->uc_sigmask, sig_size))
-               goto segfault;
-
-       sigdelsetmask(&set, ~_BLOCKABLE);
-
-       spin_lock_irq(&current->sighand->siglock);
-       current->blocked = set;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
-
-       if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
-               goto segfault;
-
-       /* Avoid ERESTART handling */
-       PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
-       return PT_REGS_SYSCALL_RET(&current->thread.regs);
-
- segfault:
-       force_sig(SIGSEGV, current);
-       return 0;
-}
diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S
deleted file mode 100644 (file)
index 54a36ec..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "as-layout.h"
-
-       .globl syscall_stub
-.section .__syscall_stub, "ax"
-
-       .globl batch_syscall_stub
-batch_syscall_stub:
-       /* load pointer to first operation */
-       mov     $(STUB_DATA+8), %esp
-
-again:
-       /* load length of additional data */
-       mov     0x0(%esp), %eax
-
-       /* if(length == 0) : end of list */
-       /* write possible 0 to header */
-       mov     %eax, STUB_DATA+4
-       cmpl    $0, %eax
-       jz      done
-
-       /* save current pointer */
-       mov     %esp, STUB_DATA+4
-
-       /* skip additional data */
-       add     %eax, %esp
-
-       /* load syscall-# */
-       pop     %eax
-
-       /* load syscall params */
-       pop     %ebx
-       pop     %ecx
-       pop     %edx
-       pop     %esi
-       pop     %edi
-       pop     %ebp
-
-       /* execute syscall */
-       int     $0x80
-
-       /* check return value */
-       pop     %ebx
-       cmp     %ebx, %eax
-       je      again
-
-done:
-       /* save return value */
-       mov     %eax, STUB_DATA
-
-       /* stop */
-       int3
diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c
deleted file mode 100644 (file)
index 28ccf73..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include "sysdep/stub.h"
-#include "sysdep/sigcontext.h"
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_segv_handler(int sig)
-{
-       struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
-
-       GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc);
-
-       trap_myself();
-}
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S
deleted file mode 100644 (file)
index de27407..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <linux/linkage.h>
-/* Steal i386 syscall table for our purposes, but with some slight changes.*/
-
-#define sys_iopl sys_ni_syscall
-#define sys_ioperm sys_ni_syscall
-
-#define sys_vm86old sys_ni_syscall
-#define sys_vm86 sys_ni_syscall
-
-#define old_mmap sys_old_mmap
-
-#define ptregs_fork sys_fork
-#define ptregs_execve sys_execve
-#define ptregs_iopl sys_iopl
-#define ptregs_vm86old sys_vm86old
-#define ptregs_sigreturn sys_sigreturn
-#define ptregs_clone sys_clone
-#define ptregs_vm86 sys_vm86
-#define ptregs_rt_sigreturn sys_rt_sigreturn
-#define ptregs_sigaltstack sys_sigaltstack
-#define ptregs_vfork sys_vfork
-
-.section .rodata,"a"
-
-#include "../../x86/kernel/syscall_table_32.S"
-
-ENTRY(syscall_table_size)
-.long .-sys_call_table
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
deleted file mode 100644 (file)
index 70ca357..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#include "linux/sched.h"
-#include "linux/shm.h"
-#include "linux/ipc.h"
-#include "linux/syscalls.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
-
-/*
- * The prototype on i386 is:
- *
- *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
- *
- * and the "newtls" arg. on i386 is read by copy_thread directly from the
- * register saved on the stack.
- */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-              int __user *parent_tid, void *newtls, int __user *child_tid)
-{
-       long ret;
-
-       if (!newsp)
-               newsp = UPT_SP(&current->thread.regs.regs);
-
-       current->thread.forking = 1;
-       ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-                     child_tid);
-       current->thread.forking = 0;
-       return ret;
-}
-
-long sys_sigaction(int sig, const struct old_sigaction __user *act,
-                        struct old_sigaction __user *oact)
-{
-       struct k_sigaction new_ka, old_ka;
-       int ret;
-
-       if (act) {
-               old_sigset_t mask;
-               if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-                   __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-                   __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
-                       return -EFAULT;
-               __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-               __get_user(mask, &act->sa_mask);
-               siginitset(&new_ka.sa.sa_mask, mask);
-       }
-
-       ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-       if (!ret && oact) {
-               if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-                   __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-                   __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
-                       return -EFAULT;
-               __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-               __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-       }
-
-       return ret;
-}
diff --git a/arch/um/sys-i386/sysrq.c b/arch/um/sys-i386/sysrq.c
deleted file mode 100644 (file)
index 171b3e9..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#include "linux/kernel.h"
-#include "linux/smp.h"
-#include "linux/sched.h"
-#include "linux/kallsyms.h"
-#include "asm/ptrace.h"
-#include "sysrq.h"
-
-/* This is declared by <linux/sched.h> */
-void show_regs(struct pt_regs *regs)
-{
-        printk("\n");
-        printk("EIP: %04lx:[<%08lx>] CPU: %d %s", 
-              0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs),
-              smp_processor_id(), print_tainted());
-        if (PT_REGS_CS(regs) & 3)
-                printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs),
-                      PT_REGS_SP(regs));
-        printk(" EFLAGS: %08lx\n    %s\n", PT_REGS_EFLAGS(regs),
-              print_tainted());
-        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-                PT_REGS_EAX(regs), PT_REGS_EBX(regs), 
-              PT_REGS_ECX(regs), 
-              PT_REGS_EDX(regs));
-        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
-              PT_REGS_ESI(regs), PT_REGS_EDI(regs), 
-              PT_REGS_EBP(regs));
-        printk(" DS: %04lx ES: %04lx\n",
-              0xffff & PT_REGS_DS(regs), 
-              0xffff & PT_REGS_ES(regs));
-
-        show_trace(NULL, (unsigned long *) &regs);
-}
-
-/* Copied from i386. */
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
-{
-       return  p > (void *)tinfo &&
-               p < (void *)tinfo + THREAD_SIZE - 3;
-}
-
-/* Adapted from i386 (we also print the address we read from). */
-static inline unsigned long print_context_stack(struct thread_info *tinfo,
-                               unsigned long *stack, unsigned long ebp)
-{
-       unsigned long addr;
-
-#ifdef CONFIG_FRAME_POINTER
-       while (valid_stack_ptr(tinfo, (void *)ebp)) {
-               addr = *(unsigned long *)(ebp + 4);
-               printk("%08lx:  [<%08lx>]", ebp + 4, addr);
-               print_symbol(" %s", addr);
-               printk("\n");
-               ebp = *(unsigned long *)ebp;
-       }
-#else
-       while (valid_stack_ptr(tinfo, stack)) {
-               addr = *stack;
-               if (__kernel_text_address(addr)) {
-                       printk("%08lx:  [<%08lx>]", (unsigned long) stack, addr);
-                       print_symbol(" %s", addr);
-                       printk("\n");
-               }
-               stack++;
-       }
-#endif
-       return ebp;
-}
-
-void show_trace(struct task_struct* task, unsigned long * stack)
-{
-       unsigned long ebp;
-       struct thread_info *context;
-
-       /* Turn this into BUG_ON if possible. */
-       if (!stack) {
-               stack = (unsigned long*) &stack;
-               printk("show_trace: got NULL stack, implicit assumption task == current");
-               WARN_ON(1);
-       }
-
-       if (!task)
-               task = current;
-
-       if (task != current) {
-               ebp = (unsigned long) KSTK_EBP(task);
-       } else {
-               asm ("movl %%ebp, %0" : "=r" (ebp) : );
-       }
-
-       context = (struct thread_info *)
-               ((unsigned long)stack & (~(THREAD_SIZE - 1)));
-       print_context_stack(context, stack, ebp);
-
-       printk("\n");
-}
-
diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c
deleted file mode 100644 (file)
index c6c7131..0000000
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
- * Licensed under the GPL
- */
-
-#include "linux/percpu.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "os.h"
-#include "skas.h"
-#include "sysdep/tls.h"
-
-/*
- * If needed we can detect when it's uninitialized.
- *
- * These are initialized in an initcall and unchanged thereafter.
- */
-static int host_supports_tls = -1;
-int host_gdt_entry_tls_min;
-
-int do_set_thread_area(struct user_desc *info)
-{
-       int ret;
-       u32 cpu;
-
-       cpu = get_cpu();
-       ret = os_set_thread_area(info, userspace_pid[cpu]);
-       put_cpu();
-
-       if (ret)
-               printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
-                      "index = %d\n", ret, info->entry_number);
-
-       return ret;
-}
-
-int do_get_thread_area(struct user_desc *info)
-{
-       int ret;
-       u32 cpu;
-
-       cpu = get_cpu();
-       ret = os_get_thread_area(info, userspace_pid[cpu]);
-       put_cpu();
-
-       if (ret)
-               printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, "
-                      "index = %d\n", ret, info->entry_number);
-
-       return ret;
-}
-
-/*
- * sys_get_thread_area: get a yet unused TLS descriptor index.
- * XXX: Consider leaving one free slot for glibc usage at first place. This must
- * be done here (and by changing GDT_ENTRY_TLS_* macros) and nowhere else.
- *
- * Also, this must be tested when compiling in SKAS mode with dynamic linking
- * and running against NPTL.
- */
-static int get_free_idx(struct task_struct* task)
-{
-       struct thread_struct *t = &task->thread;
-       int idx;
-
-       if (!t->arch.tls_array)
-               return GDT_ENTRY_TLS_MIN;
-
-       for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
-               if (!t->arch.tls_array[idx].present)
-                       return idx + GDT_ENTRY_TLS_MIN;
-       return -ESRCH;
-}
-
-static inline void clear_user_desc(struct user_desc* info)
-{
-       /* Postcondition: LDT_empty(info) returns true. */
-       memset(info, 0, sizeof(*info));
-
-       /*
-        * Check the LDT_empty or the i386 sys_get_thread_area code - we obtain
-        * indeed an empty user_desc.
-        */
-       info->read_exec_only = 1;
-       info->seg_not_present = 1;
-}
-
-#define O_FORCE 1
-
-static int load_TLS(int flags, struct task_struct *to)
-{
-       int ret = 0;
-       int idx;
-
-       for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) {
-               struct uml_tls_struct* curr =
-                       &to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN];
-
-               /*
-                * Actually, now if it wasn't flushed it gets cleared and
-                * flushed to the host, which will clear it.
-                */
-               if (!curr->present) {
-                       if (!curr->flushed) {
-                               clear_user_desc(&curr->tls);
-                               curr->tls.entry_number = idx;
-                       } else {
-                               WARN_ON(!LDT_empty(&curr->tls));
-                               continue;
-                       }
-               }
-
-               if (!(flags & O_FORCE) && curr->flushed)
-                       continue;
-
-               ret = do_set_thread_area(&curr->tls);
-               if (ret)
-                       goto out;
-
-               curr->flushed = 1;
-       }
-out:
-       return ret;
-}
-
-/*
- * Verify if we need to do a flush for the new process, i.e. if there are any
- * present desc's, only if they haven't been flushed.
- */
-static inline int needs_TLS_update(struct task_struct *task)
-{
-       int i;
-       int ret = 0;
-
-       for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
-               struct uml_tls_struct* curr =
-                       &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
-
-               /*
-                * Can't test curr->present, we may need to clear a descriptor
-                * which had a value.
-                */
-               if (curr->flushed)
-                       continue;
-               ret = 1;
-               break;
-       }
-       return ret;
-}
-
-/*
- * On a newly forked process, the TLS descriptors haven't yet been flushed. So
- * we mark them as such and the first switch_to will do the job.
- */
-void clear_flushed_tls(struct task_struct *task)
-{
-       int i;
-
-       for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
-               struct uml_tls_struct* curr =
-                       &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
-
-               /*
-                * Still correct to do this, if it wasn't present on the host it
-                * will remain as flushed as it was.
-                */
-               if (!curr->present)
-                       continue;
-
-               curr->flushed = 0;
-       }
-}
-
-/*
- * In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a
- * common host process. So this is needed in SKAS0 too.
- *
- * However, if each thread had a different host process (and this was discussed
- * for SMP support) this won't be needed.
- *
- * And this will not need be used when (and if) we'll add support to the host
- * SKAS patch.
- */
-
-int arch_switch_tls(struct task_struct *to)
-{
-       if (!host_supports_tls)
-               return 0;
-
-       /*
-        * We have no need whatsoever to switch TLS for kernel threads; beyond
-        * that, that would also result in us calling os_set_thread_area with
-        * userspace_pid[cpu] == 0, which gives an error.
-        */
-       if (likely(to->mm))
-               return load_TLS(O_FORCE, to);
-
-       return 0;
-}
-
-static int set_tls_entry(struct task_struct* task, struct user_desc *info,
-                        int idx, int flushed)
-{
-       struct thread_struct *t = &task->thread;
-
-       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-               return -EINVAL;
-
-       t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls = *info;
-       t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present = 1;
-       t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed = flushed;
-
-       return 0;
-}
-
-int arch_copy_tls(struct task_struct *new)
-{
-       struct user_desc info;
-       int idx, ret = -EFAULT;
-
-       if (copy_from_user(&info,
-                          (void __user *) UPT_ESI(&new->thread.regs.regs),
-                          sizeof(info)))
-               goto out;
-
-       ret = -EINVAL;
-       if (LDT_empty(&info))
-               goto out;
-
-       idx = info.entry_number;
-
-       ret = set_tls_entry(new, &info, idx, 0);
-out:
-       return ret;
-}
-
-/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */
-static int get_tls_entry(struct task_struct *task, struct user_desc *info,
-                        int idx)
-{
-       struct thread_struct *t = &task->thread;
-
-       if (!t->arch.tls_array)
-               goto clear;
-
-       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-               return -EINVAL;
-
-       if (!t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present)
-               goto clear;
-
-       *info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls;
-
-out:
-       /*
-        * Temporary debugging check, to make sure that things have been
-        * flushed. This could be triggered if load_TLS() failed.
-        */
-       if (unlikely(task == current &&
-                    !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
-               printk(KERN_ERR "get_tls_entry: task with pid %d got here "
-                               "without flushed TLS.", current->pid);
-       }
-
-       return 0;
-clear:
-       /*
-        * When the TLS entry has not been set, the values read to user in the
-        * tls_array are 0 (because it's cleared at boot, see
-        * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that.
-        */
-       clear_user_desc(info);
-       info->entry_number = idx;
-       goto out;
-}
-
-int sys_set_thread_area(struct user_desc __user *user_desc)
-{
-       struct user_desc info;
-       int idx, ret;
-
-       if (!host_supports_tls)
-               return -ENOSYS;
-
-       if (copy_from_user(&info, user_desc, sizeof(info)))
-               return -EFAULT;
-
-       idx = info.entry_number;
-
-       if (idx == -1) {
-               idx = get_free_idx(current);
-               if (idx < 0)
-                       return idx;
-               info.entry_number = idx;
-               /* Tell the user which slot we chose for him.*/
-               if (put_user(idx, &user_desc->entry_number))
-                       return -EFAULT;
-       }
-
-       ret = do_set_thread_area(&info);
-       if (ret)
-               return ret;
-       return set_tls_entry(current, &info, idx, 1);
-}
-
-/*
- * Perform set_thread_area on behalf of the traced child.
- * Note: error handling is not done on the deferred load, and this differ from
- * i386. However the only possible error are caused by bugs.
- */
-int ptrace_set_thread_area(struct task_struct *child, int idx,
-                          struct user_desc __user *user_desc)
-{
-       struct user_desc info;
-
-       if (!host_supports_tls)
-               return -EIO;
-
-       if (copy_from_user(&info, user_desc, sizeof(info)))
-               return -EFAULT;
-
-       return set_tls_entry(child, &info, idx, 0);
-}
-
-int sys_get_thread_area(struct user_desc __user *user_desc)
-{
-       struct user_desc info;
-       int idx, ret;
-
-       if (!host_supports_tls)
-               return -ENOSYS;
-
-       if (get_user(idx, &user_desc->entry_number))
-               return -EFAULT;
-
-       ret = get_tls_entry(current, &info, idx);
-       if (ret < 0)
-               goto out;
-
-       if (copy_to_user(user_desc, &info, sizeof(info)))
-               ret = -EFAULT;
-
-out:
-       return ret;
-}
-
-/*
- * Perform get_thread_area on behalf of the traced child.
- */
-int ptrace_get_thread_area(struct task_struct *child, int idx,
-               struct user_desc __user *user_desc)
-{
-       struct user_desc info;
-       int ret;
-
-       if (!host_supports_tls)
-               return -EIO;
-
-       ret = get_tls_entry(child, &info, idx);
-       if (ret < 0)
-               goto out;
-
-       if (copy_to_user(user_desc, &info, sizeof(info)))
-               ret = -EFAULT;
-out:
-       return ret;
-}
-
-/*
- * This code is really i386-only, but it detects and logs x86_64 GDT indexes
- * if a 32-bit UML is running on a 64-bit host.
- */
-static int __init __setup_host_supports_tls(void)
-{
-       check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min);
-       if (host_supports_tls) {
-               printk(KERN_INFO "Host TLS support detected\n");
-               printk(KERN_INFO "Detected host type: ");
-               switch (host_gdt_entry_tls_min) {
-               case GDT_ENTRY_TLS_MIN_I386:
-                       printk(KERN_CONT "i386");
-                       break;
-               case GDT_ENTRY_TLS_MIN_X86_64:
-                       printk(KERN_CONT "x86_64");
-                       break;
-               }
-               printk(KERN_CONT " (GDT indexes %d to %d)\n",
-                      host_gdt_entry_tls_min,
-                      host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES);
-       } else
-               printk(KERN_ERR "  Host TLS support NOT detected! "
-                               "TLS support inside UML will not work\n");
-       return 0;
-}
-
-__initcall(__setup_host_supports_tls);
diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c
deleted file mode 100644 (file)
index 5f883bf..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#include <stdio.h>
-#include <stddef.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/user.h>
-#include <sys/mman.h>
-#include <asm/ptrace.h>
-
-#define DEFINE(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_LONGS(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
-
-#define OFFSET(sym, str, mem) \
-       DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-       OFFSET(HOST_SC_TRAPNO, sigcontext, trapno);
-       OFFSET(HOST_SC_ERR, sigcontext, err);
-       OFFSET(HOST_SC_CR2, sigcontext, cr2);
-
-       DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
-       DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
-
-       DEFINE(HOST_IP, EIP);
-       DEFINE(HOST_SP, UESP);
-       DEFINE(HOST_EFLAGS, EFL);
-       DEFINE(HOST_EAX, EAX);
-       DEFINE(HOST_EBX, EBX);
-       DEFINE(HOST_ECX, ECX);
-       DEFINE(HOST_EDX, EDX);
-       DEFINE(HOST_ESI, ESI);
-       DEFINE(HOST_EDI, EDI);
-       DEFINE(HOST_EBP, EBP);
-       DEFINE(HOST_CS, CS);
-       DEFINE(HOST_SS, SS);
-       DEFINE(HOST_DS, DS);
-       DEFINE(HOST_FS, FS);
-       DEFINE(HOST_ES, ES);
-       DEFINE(HOST_GS, GS);
-       DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
-
-       /* XXX Duplicated between i386 and x86_64 */
-       DEFINE(UM_POLLIN, POLLIN);
-       DEFINE(UM_POLLPRI, POLLPRI);
-       DEFINE(UM_POLLOUT, POLLOUT);
-
-       DEFINE(UM_PROT_READ, PROT_READ);
-       DEFINE(UM_PROT_WRITE, PROT_WRITE);
-       DEFINE(UM_PROT_EXEC, PROT_EXEC);
-}
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
deleted file mode 100644 (file)
index bd4d1d3..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Copyright 2003 PathScale, Inc.
-#
-# Licensed under the GPL
-#
-
-obj-y = bug.o bugs.o delay.o fault.o ldt.o ptrace.o ptrace_user.o mem.o \
-       setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
-       sysrq.o ksyms.o tls.o
-
-obj-y += vdso/
-
-subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \
-               lib/rwsem.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module.o
-
-ldt-y = ../sys-i386/ldt.o
-
-USER_OBJS := ptrace_user.o
-
-USER_OBJS += user-offsets.s
-extra-y += user-offsets.s
-
-UNPROFILE_OBJS := stub_segv.o
-CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-x86_64/asm/archparam.h b/arch/um/sys-x86_64/asm/archparam.h
deleted file mode 100644 (file)
index 6c08366..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_ARCHPARAM_X86_64_H
-#define __UM_ARCHPARAM_X86_64_H
-
-
-/* No user-accessible fixmap addresses, i.e. vsyscall */
-#define FIXADDR_USER_START     0
-#define FIXADDR_USER_END       0
-
-#endif
-
diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/um/sys-x86_64/asm/elf.h
deleted file mode 100644 (file)
index 11a2bfb..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * Licensed under the GPL
- */
-#ifndef __UM_ELF_X86_64_H
-#define __UM_ELF_X86_64_H
-
-#include <asm/user.h>
-#include "skas.h"
-
-/* x86-64 relocation types, taken from asm-x86_64/elf.h */
-#define R_X86_64_NONE          0       /* No reloc */
-#define R_X86_64_64            1       /* Direct 64 bit  */
-#define R_X86_64_PC32          2       /* PC relative 32 bit signed */
-#define R_X86_64_GOT32         3       /* 32 bit GOT entry */
-#define R_X86_64_PLT32         4       /* 32 bit PLT address */
-#define R_X86_64_COPY          5       /* Copy symbol at runtime */
-#define R_X86_64_GLOB_DAT      6       /* Create GOT entry */
-#define R_X86_64_JUMP_SLOT     7       /* Create PLT entry */
-#define R_X86_64_RELATIVE      8       /* Adjust by program base */
-#define R_X86_64_GOTPCREL      9       /* 32 bit signed pc relative
-                                          offset to GOT */
-#define R_X86_64_32            10      /* Direct 32 bit zero extended */
-#define R_X86_64_32S           11      /* Direct 32 bit sign extended */
-#define R_X86_64_16            12      /* Direct 16 bit zero extended */
-#define R_X86_64_PC16          13      /* 16 bit sign extended pc relative */
-#define R_X86_64_8             14      /* Direct 8 bit sign extended  */
-#define R_X86_64_PC8           15      /* 8 bit sign extended pc relative */
-
-#define R_X86_64_NUM           16
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_i387_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-       ((x)->e_machine == EM_X86_64)
-
-#define ELF_CLASS      ELFCLASS64
-#define ELF_DATA        ELFDATA2LSB
-#define ELF_ARCH        EM_X86_64
-
-#define ELF_PLAT_INIT(regs, load_addr)    do { \
-       PT_REGS_RBX(regs) = 0; \
-       PT_REGS_RCX(regs) = 0; \
-       PT_REGS_RDX(regs) = 0; \
-       PT_REGS_RSI(regs) = 0; \
-       PT_REGS_RDI(regs) = 0; \
-       PT_REGS_RBP(regs) = 0; \
-       PT_REGS_RAX(regs) = 0; \
-       PT_REGS_R8(regs) = 0; \
-       PT_REGS_R9(regs) = 0; \
-       PT_REGS_R10(regs) = 0; \
-       PT_REGS_R11(regs) = 0; \
-       PT_REGS_R12(regs) = 0; \
-       PT_REGS_R13(regs) = 0; \
-       PT_REGS_R14(regs) = 0; \
-       PT_REGS_R15(regs) = 0; \
-} while (0)
-
-#define ELF_CORE_COPY_REGS(pr_reg, _regs)              \
-       (pr_reg)[0] = (_regs)->regs.gp[0];                      \
-       (pr_reg)[1] = (_regs)->regs.gp[1];                      \
-       (pr_reg)[2] = (_regs)->regs.gp[2];                      \
-       (pr_reg)[3] = (_regs)->regs.gp[3];                      \
-       (pr_reg)[4] = (_regs)->regs.gp[4];                      \
-       (pr_reg)[5] = (_regs)->regs.gp[5];                      \
-       (pr_reg)[6] = (_regs)->regs.gp[6];                      \
-       (pr_reg)[7] = (_regs)->regs.gp[7];                      \
-       (pr_reg)[8] = (_regs)->regs.gp[8];                      \
-       (pr_reg)[9] = (_regs)->regs.gp[9];                      \
-       (pr_reg)[10] = (_regs)->regs.gp[10];                    \
-       (pr_reg)[11] = (_regs)->regs.gp[11];                    \
-       (pr_reg)[12] = (_regs)->regs.gp[12];                    \
-       (pr_reg)[13] = (_regs)->regs.gp[13];                    \
-       (pr_reg)[14] = (_regs)->regs.gp[14];                    \
-       (pr_reg)[15] = (_regs)->regs.gp[15];                    \
-       (pr_reg)[16] = (_regs)->regs.gp[16];                    \
-       (pr_reg)[17] = (_regs)->regs.gp[17];                    \
-       (pr_reg)[18] = (_regs)->regs.gp[18];                    \
-       (pr_reg)[19] = (_regs)->regs.gp[19];                    \
-       (pr_reg)[20] = (_regs)->regs.gp[20];                    \
-       (pr_reg)[21] = current->thread.arch.fs;                 \
-       (pr_reg)[22] = 0;                                       \
-       (pr_reg)[23] = 0;                                       \
-       (pr_reg)[24] = 0;                                       \
-       (pr_reg)[25] = 0;                                       \
-       (pr_reg)[26] = 0;
-
-#define task_pt_regs(t) (&(t)->thread.regs)
-
-struct task_struct;
-
-extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
-
-#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
-
-#ifdef TIF_IA32 /* XXX */
-#error XXX, indeed
-        clear_thread_flag(TIF_IA32);
-#endif
-
-#define ELF_EXEC_PAGESIZE 4096
-
-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
-
-extern long elf_aux_hwcap;
-#define ELF_HWCAP (elf_aux_hwcap)
-
-#define ELF_PLATFORM "x86_64"
-
-#define SET_PERSONALITY(ex) do ; while(0)
-
-#define __HAVE_ARCH_GATE_AREA 1
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-struct linux_binprm;
-extern int arch_setup_additional_pages(struct linux_binprm *bprm,
-       int uses_interp);
-
-extern unsigned long um_vdso_addr;
-#define AT_SYSINFO_EHDR 33
-#define ARCH_DLINFO    NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
-
-#endif
diff --git a/arch/um/sys-x86_64/asm/module.h b/arch/um/sys-x86_64/asm/module.h
deleted file mode 100644 (file)
index 8eb79c2..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_MODULE_X86_64_H
-#define __UM_MODULE_X86_64_H
-
-/* UML is simple */
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-
-#endif
-
diff --git a/arch/um/sys-x86_64/asm/processor.h b/arch/um/sys-x86_64/asm/processor.h
deleted file mode 100644 (file)
index 875a26a..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PROCESSOR_X86_64_H
-#define __UM_PROCESSOR_X86_64_H
-
-/* include faultinfo structure */
-#include "sysdep/faultinfo.h"
-
-struct arch_thread {
-        unsigned long debugregs[8];
-        int debugregs_seq;
-        unsigned long fs;
-        struct faultinfo faultinfo;
-};
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
-       __asm__ __volatile__("rep;nop": : :"memory");
-}
-
-#define cpu_relax()   rep_nop()
-
-#define INIT_ARCH_THREAD { .debugregs                  = { [ 0 ... 7 ] = 0 }, \
-                          .debugregs_seq       = 0, \
-                          .fs                  = 0, \
-                          .faultinfo           = { 0, 0, 0 } }
-
-static inline void arch_flush_thread(struct arch_thread *thread)
-{
-}
-
-static inline void arch_copy_thread(struct arch_thread *from,
-                                    struct arch_thread *to)
-{
-       to->fs = from->fs;
-}
-
-#include <asm/user.h>
-
-#define current_text_addr() \
-       ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
-
-#define ARCH_IS_STACKGROW(address) \
-        (address + 128 >= UPT_SP(&current->thread.regs.regs))
-
-#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP)
-#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP)
-
-#include "asm/processor-generic.h"
-
-#endif
diff --git a/arch/um/sys-x86_64/asm/ptrace.h b/arch/um/sys-x86_64/asm/ptrace.h
deleted file mode 100644 (file)
index 83d8c47..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_X86_64_H
-#define __UM_PTRACE_X86_64_H
-
-#include "linux/compiler.h"
-#include "asm/errno.h"
-
-#define __FRAME_OFFSETS /* Needed to get the R* macros */
-#include "asm/ptrace-generic.h"
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-
-#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs)
-#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs)
-#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs)
-#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs)
-#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs)
-#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs)
-#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs)
-#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
-#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
-#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
-#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
-#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
-#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
-#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
-#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
-
-#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
-#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
-#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
-#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
-#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
-#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
-
-#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
-#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
-#define PT_REGS_RSP(r) UPT_SP(&(r)->regs)
-
-#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
-
-/* XXX */
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r)
-#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r)
-
-#define PT_FIX_EXEC_STACK(sp) do ; while(0)
-
-#define profile_pc(regs) PT_REGS_IP(regs)
-
-struct user_desc;
-
-static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
-                                         struct user_desc __user *user_desc)
-{
-        return -ENOSYS;
-}
-
-static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
-                                         struct user_desc __user *user_desc)
-{
-        return -ENOSYS;
-}
-
-extern long arch_prctl(struct task_struct *task, int code,
-                      unsigned long __user *addr);
-#endif
diff --git a/arch/um/sys-x86_64/bug.c b/arch/um/sys-x86_64/bug.c
deleted file mode 100644 (file)
index e8034e3..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL V2
- */
-
-#include <linux/uaccess.h>
-
-/*
- * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
- * that's not relevant in skas mode.
- */
-
-int is_valid_bugaddr(unsigned long eip)
-{
-       unsigned short ud2;
-
-       if (probe_kernel_address((unsigned short __user *)eip, ud2))
-               return 0;
-
-       return ud2 == 0x0b0f;
-}
diff --git a/arch/um/sys-x86_64/bugs.c b/arch/um/sys-x86_64/bugs.c
deleted file mode 100644 (file)
index 44e02ba..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include "sysdep/ptrace.h"
-
-void arch_check_bugs(void)
-{
-}
-
-void arch_examine_signal(int sig, struct uml_pt_regs *regs)
-{
-}
diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c
deleted file mode 100644 (file)
index f3fe1a6..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- * Mostly copied from arch/x86/lib/delay.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <asm/param.h>
-
-void __delay(unsigned long loops)
-{
-       asm volatile(
-               "test %0,%0\n"
-               "jz 3f\n"
-               "jmp 1f\n"
-
-               ".align 16\n"
-               "1: jmp 2f\n"
-
-               ".align 16\n"
-               "2: dec %0\n"
-               " jnz 2b\n"
-               "3: dec %0\n"
-
-               : /* we don't need output */
-               : "a" (loops)
-       );
-}
-EXPORT_SYMBOL(__delay);
-
-inline void __const_udelay(unsigned long xloops)
-{
-       int d0;
-
-       xloops *= 4;
-       asm("mull %%edx"
-               : "=d" (xloops), "=&a" (d0)
-               : "1" (xloops), "0"
-               (loops_per_jiffy * (HZ/4)));
-
-       __delay(++xloops);
-}
-EXPORT_SYMBOL(__const_udelay);
-
-void __udelay(unsigned long usecs)
-{
-       __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
-}
-EXPORT_SYMBOL(__udelay);
-
-void __ndelay(unsigned long nsecs)
-{
-       __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
-}
-EXPORT_SYMBOL(__ndelay);
diff --git a/arch/um/sys-x86_64/fault.c b/arch/um/sys-x86_64/fault.c
deleted file mode 100644 (file)
index ce85117..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include "sysdep/ptrace.h"
-
-/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
-struct exception_table_entry
-{
-       unsigned long insn;
-       unsigned long fixup;
-};
-
-const struct exception_table_entry *search_exception_tables(unsigned long add);
-
-int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
-{
-       const struct exception_table_entry *fixup;
-
-       fixup = search_exception_tables(address);
-       if (fixup != 0) {
-               UPT_IP(regs) = fixup->fixup;
-               return 1;
-       }
-       return 0;
-}
diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c
deleted file mode 100644 (file)
index 1db2fce..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <linux/module.h>
-#include <asm/string.h>
-#include <asm/checksum.h>
-
-/*XXX: we need them because they would be exported by x86_64 */
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-EXPORT_SYMBOL(memcpy);
-#else
-EXPORT_SYMBOL(__memcpy);
-#endif
-EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-x86_64/mem.c b/arch/um/sys-x86_64/mem.c
deleted file mode 100644 (file)
index 5465187..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/mman.h"
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
-       if (vma->vm_mm && vma->vm_start == um_vdso_addr)
-               return "[vdso]";
-
-       return NULL;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-       return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-       return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-       return 0;
-}
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c
deleted file mode 100644 (file)
index 4005506..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * Licensed under the GPL
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#define __FRAME_OFFSETS
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-/*
- * determines which flags the user has access to.
- * 1 = access 0 = no access
- */
-#define FLAG_MASK 0x44dd5UL
-
-int putreg(struct task_struct *child, int regno, unsigned long value)
-{
-       unsigned long tmp;
-
-#ifdef TIF_IA32
-       /*
-        * Some code in the 64bit emulation may not be 64bit clean.
-        * Don't take any chances.
-        */
-       if (test_tsk_thread_flag(child, TIF_IA32))
-               value &= 0xffffffff;
-#endif
-       switch (regno) {
-       case FS:
-       case GS:
-       case DS:
-       case ES:
-       case SS:
-       case CS:
-               if (value && (value & 3) != 3)
-                       return -EIO;
-               value &= 0xffff;
-               break;
-
-       case FS_BASE:
-       case GS_BASE:
-               if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
-                       return -EIO;
-               break;
-
-       case EFLAGS:
-               value &= FLAG_MASK;
-               tmp = PT_REGS_EFLAGS(&child->thread.regs) & ~FLAG_MASK;
-               value |= tmp;
-               break;
-       }
-
-       PT_REGS_SET(&child->thread.regs, regno, value);
-       return 0;
-}
-
-int poke_user(struct task_struct *child, long addr, long data)
-{
-       if ((addr & 3) || addr < 0)
-               return -EIO;
-
-       if (addr < MAX_REG_OFFSET)
-               return putreg(child, addr, data);
-       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
-               (addr <= offsetof(struct user, u_debugreg[7]))) {
-               addr -= offsetof(struct user, u_debugreg[0]);
-               addr = addr >> 2;
-               if ((addr == 4) || (addr == 5))
-                       return -EIO;
-               child->thread.arch.debugregs[addr] = data;
-               return 0;
-       }
-       return -EIO;
-}
-
-unsigned long getreg(struct task_struct *child, int regno)
-{
-       unsigned long retval = ~0UL;
-       switch (regno) {
-       case FS:
-       case GS:
-       case DS:
-       case ES:
-       case SS:
-       case CS:
-               retval = 0xffff;
-               /* fall through */
-       default:
-               retval &= PT_REG(&child->thread.regs, regno);
-#ifdef TIF_IA32
-               if (test_tsk_thread_flag(child, TIF_IA32))
-                       retval &= 0xffffffff;
-#endif
-       }
-       return retval;
-}
-
-int peek_user(struct task_struct *child, long addr, long data)
-{
-       /* read the word at location addr in the USER area. */
-       unsigned long tmp;
-
-       if ((addr & 3) || addr < 0)
-               return -EIO;
-
-       tmp = 0;  /* Default return condition */
-       if (addr < MAX_REG_OFFSET)
-               tmp = getreg(child, addr);
-       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
-               (addr <= offsetof(struct user, u_debugreg[7]))) {
-               addr -= offsetof(struct user, u_debugreg[0]);
-               addr = addr >> 2;
-               tmp = child->thread.arch.debugregs[addr];
-       }
-       return put_user(tmp, (unsigned long *) data);
-}
-
-/* XXX Mostly copied from sys-i386 */
-int is_syscall(unsigned long addr)
-{
-       unsigned short instr;
-       int n;
-
-       n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-       if (n) {
-               /*
-                * access_process_vm() grants access to vsyscall and stub,
-                * while copy_from_user doesn't. Maybe access_process_vm is
-                * slow, but that doesn't matter, since it will be called only
-                * in case of singlestepping, if copy_from_user failed.
-                */
-               n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
-               if (n != sizeof(instr)) {
-                       printk("is_syscall : failed to read instruction from "
-                              "0x%lx\n", addr);
-                       return 1;
-               }
-       }
-       /* sysenter */
-       return instr == 0x050f;
-}
-
-static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
-       int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
-       long fpregs[HOST_FP_SIZE];
-
-       BUG_ON(sizeof(*buf) != sizeof(fpregs));
-       err = save_fp_registers(userspace_pid[cpu], fpregs);
-       if (err)
-               return err;
-
-       n = copy_to_user(buf, fpregs, sizeof(fpregs));
-       if (n > 0)
-               return -EFAULT;
-
-       return n;
-}
-
-static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
-       int n, cpu = ((struct thread_info *) child->stack)->cpu;
-       long fpregs[HOST_FP_SIZE];
-
-       BUG_ON(sizeof(*buf) != sizeof(fpregs));
-       n = copy_from_user(fpregs, buf, sizeof(fpregs));
-       if (n > 0)
-               return -EFAULT;
-
-       return restore_fp_registers(userspace_pid[cpu], fpregs);
-}
-
-long subarch_ptrace(struct task_struct *child, long request,
-                   unsigned long addr, unsigned long data)
-{
-       int ret = -EIO;
-       void __user *datap = (void __user *) data;
-
-       switch (request) {
-       case PTRACE_GETFPREGS: /* Get the child FPU state. */
-               ret = get_fpregs(datap, child);
-               break;
-       case PTRACE_SETFPREGS: /* Set the child FPU state. */
-               ret = set_fpregs(datap, child);
-               break;
-       case PTRACE_ARCH_PRCTL:
-               /* XXX Calls ptrace on the host - needs some SMP thinking */
-               ret = arch_prctl(child, data, (void __user *) addr);
-               break;
-       }
-
-       return ret;
-}
diff --git a/arch/um/sys-x86_64/ptrace_user.c b/arch/um/sys-x86_64/ptrace_user.c
deleted file mode 100644 (file)
index c57a496..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include <errno.h>
-#include "ptrace_user.h"
-
-int ptrace_getregs(long pid, unsigned long *regs_out)
-{
-       if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
-               return -errno;
-       return(0);
-}
-
-int ptrace_setregs(long pid, unsigned long *regs_out)
-{
-       if (ptrace(PTRACE_SETREGS, pid, 0, regs_out) < 0)
-               return -errno;
-       return(0);
-}
diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/um/sys-x86_64/setjmp.S
deleted file mode 100644 (file)
index 45f547b..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# arch/x86_64/setjmp.S
-#
-# setjmp/longjmp for the x86-64 architecture
-#
-
-#
-# The jmp_buf is assumed to contain the following, in order:
-#      %rbx
-#      %rsp (post-return)
-#      %rbp
-#      %r12
-#      %r13
-#      %r14
-#      %r15
-#      <return address>
-#
-
-       .text
-       .align 4
-       .globl setjmp
-       .type setjmp, @function
-setjmp:
-       pop  %rsi                       # Return address, and adjust the stack
-       xorl %eax,%eax                  # Return value
-       movq %rbx,(%rdi)
-       movq %rsp,8(%rdi)               # Post-return %rsp!
-       push %rsi                       # Make the call/return stack happy
-       movq %rbp,16(%rdi)
-       movq %r12,24(%rdi)
-       movq %r13,32(%rdi)
-       movq %r14,40(%rdi)
-       movq %r15,48(%rdi)
-       movq %rsi,56(%rdi)              # Return address
-       ret
-
-       .size setjmp,.-setjmp
-
-       .text
-       .align 4
-       .globl longjmp
-       .type longjmp, @function
-longjmp:
-       movl %esi,%eax                  # Return value (int)
-       movq (%rdi),%rbx
-       movq 8(%rdi),%rsp
-       movq 16(%rdi),%rbp
-       movq 24(%rdi),%r12
-       movq 32(%rdi),%r13
-       movq 40(%rdi),%r14
-       movq 48(%rdi),%r15
-       jmp *56(%rdi)
-
-       .size longjmp,.-longjmp
diff --git a/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h b/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h
deleted file mode 100644 (file)
index 2af8f12..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * arch/um/include/sysdep-x86_64/archsetjmp.h
- */
-
-#ifndef _KLIBC_ARCHSETJMP_H
-#define _KLIBC_ARCHSETJMP_H
-
-struct __jmp_buf {
-       unsigned long __rbx;
-       unsigned long __rsp;
-       unsigned long __rbp;
-       unsigned long __r12;
-       unsigned long __r13;
-       unsigned long __r14;
-       unsigned long __r15;
-       unsigned long __rip;
-};
-
-typedef struct __jmp_buf jmp_buf[1];
-
-#define JB_IP __rip
-#define JB_SP __rsp
-
-#endif                         /* _SETJMP_H */
diff --git a/arch/um/sys-x86_64/shared/sysdep/barrier.h b/arch/um/sys-x86_64/shared/sysdep/barrier.h
deleted file mode 100644 (file)
index 7b610be..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __SYSDEP_X86_64_BARRIER_H
-#define __SYSDEP_X86_64_BARRIER_H
-
-/* Copied from include/asm-x86_64 for use by userspace. */
-#define mb()   asm volatile("mfence":::"memory")
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/checksum.h b/arch/um/sys-x86_64/shared/sysdep/checksum.h
deleted file mode 100644 (file)
index a5be903..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed under the GPL
- */
-
-#ifndef __UM_SYSDEP_CHECKSUM_H
-#define __UM_SYSDEP_CHECKSUM_H
-
-#include "linux/string.h"
-#include "linux/in6.h"
-#include "asm/uaccess.h"
-
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- *     Note: when you get a NULL pointer exception here this means someone
- *     passed in an incorrect kernel address to one of these functions.
- *
- *     If you use these functions directly please don't forget the
- *     access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-                                      int len, __wsum sum)
-{
-       memcpy(dst, src, len);
-       return(csum_partial(dst, len, sum));
-}
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src,
-                                         void *dst, int len, __wsum sum,
-                                         int *err_ptr)
-{
-        if (copy_from_user(dst, src, len)) {
-                *err_ptr = -EFAULT;
-                return (__force __wsum)-1;
-        }
-        return csum_partial(dst, len, sum);
-}
-
-/**
- * csum_fold - Fold and invert a 32bit checksum.
- * sum: 32bit unfolded sum
- *
- * Fold a 32bit running checksum to 16bit and invert it. This is usually
- * the last step before putting a checksum into a packet.
- * Make sure not to mix with 64bit checksums.
- */
-static inline __sum16 csum_fold(__wsum sum)
-{
-       __asm__(
-               "  addl %1,%0\n"
-               "  adcl $0xffff,%0"
-               : "=r" (sum)
-               : "r" ((__force u32)sum << 16),
-                 "0" ((__force u32)sum & 0xffff0000)
-       );
-       return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-/**
- * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
- * @saddr: source address
- * @daddr: destination address
- * @len: length of packet
- * @proto: ip protocol of packet
- * @sum: initial sum to be added in (32bit unfolded)
- *
- * Returns the pseudo header checksum the input data. Result is
- * 32bit unfolded.
- */
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
-                  unsigned short proto, __wsum sum)
-{
-       asm("  addl %1, %0\n"
-           "  adcl %2, %0\n"
-           "  adcl %3, %0\n"
-           "  adcl $0, %0\n"
-               : "=r" (sum)
-           : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
-       return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-                                          unsigned short len,
-                                          unsigned short proto,
-                                          __wsum sum)
-{
-       return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/**
- * ip_fast_csum - Compute the IPv4 header checksum efficiently.
- * iph: ipv4 header
- * ihl: length of header / 4
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-       unsigned int sum;
-
-       asm(    "  movl (%1), %0\n"
-               "  subl $4, %2\n"
-               "  jbe 2f\n"
-               "  addl 4(%1), %0\n"
-               "  adcl 8(%1), %0\n"
-               "  adcl 12(%1), %0\n"
-               "1: adcl 16(%1), %0\n"
-               "  lea 4(%1), %1\n"
-               "  decl %2\n"
-               "  jne  1b\n"
-               "  adcl $0, %0\n"
-               "  movl %0, %2\n"
-               "  shrl $16, %0\n"
-               "  addw %w2, %w0\n"
-               "  adcl $0, %0\n"
-               "  notl %0\n"
-               "2:"
-       /* Since the input registers which are loaded with iph and ipl
-          are modified, we must also specify them as outputs, or gcc
-          will assume they contain their original values. */
-       : "=r" (sum), "=r" (iph), "=r" (ihl)
-       : "1" (iph), "2" (ihl)
-       : "memory");
-       return (__force __sum16)sum;
-}
-
-static inline unsigned add32_with_carry(unsigned a, unsigned b)
-{
-        asm("addl %2,%0\n\t"
-            "adcl $0,%0"
-            : "=r" (a)
-            : "0" (a), "r" (b));
-        return a;
-}
-
-extern __sum16 ip_compute_csum(const void *buff, int len);
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/faultinfo.h b/arch/um/sys-x86_64/shared/sysdep/faultinfo.h
deleted file mode 100644 (file)
index cb917b0..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- * Licensed under the GPL
- */
-
-#ifndef __FAULTINFO_X86_64_H
-#define __FAULTINFO_X86_64_H
-
-/* this structure contains the full arch-specific faultinfo
- * from the traps.
- * On i386, ptrace_faultinfo unfortunately doesn't provide
- * all the info, since trap_no is missing.
- * All common elements are defined at the same position in
- * both structures, thus making it easy to copy the
- * contents without knowledge about the structure elements.
- */
-struct faultinfo {
-        int error_code; /* in ptrace_faultinfo misleadingly called is_write */
-        unsigned long cr2; /* in ptrace_faultinfo called addr */
-        int trap_no; /* missing in ptrace_faultinfo */
-};
-
-#define FAULT_WRITE(fi) ((fi).error_code & 2)
-#define FAULT_ADDRESS(fi) ((fi).cr2)
-
-#define PTRACE_FULL_FAULTINFO 1
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/host_ldt.h b/arch/um/sys-x86_64/shared/sysdep/host_ldt.h
deleted file mode 100644 (file)
index e8b1be1..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef __ASM_HOST_LDT_X86_64_H
-#define __ASM_HOST_LDT_X86_64_H
-
-#include <asm/ldt.h>
-
-/*
- * macros stolen from include/asm-x86_64/desc.h
- */
-#define LDT_entry_a(info) \
-       ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-/* Don't allow setting of the lm bit. It is useless anyways because
- * 64bit system calls require __USER_CS. */
-#define LDT_entry_b(info) \
-       (((info)->base_addr & 0xff000000) | \
-       (((info)->base_addr & 0x00ff0000) >> 16) | \
-       ((info)->limit & 0xf0000) | \
-       (((info)->read_exec_only ^ 1) << 9) | \
-       ((info)->contents << 10) | \
-       (((info)->seg_not_present ^ 1) << 15) | \
-       ((info)->seg_32bit << 22) | \
-       ((info)->limit_in_pages << 23) | \
-       ((info)->useable << 20) | \
-       /* ((info)->lm << 21) | */ \
-       0x7000)
-
-#define LDT_empty(info) (\
-       (info)->base_addr       == 0    && \
-       (info)->limit           == 0    && \
-       (info)->contents        == 0    && \
-       (info)->read_exec_only  == 1    && \
-       (info)->seg_32bit       == 0    && \
-       (info)->limit_in_pages  == 0    && \
-       (info)->seg_not_present == 1    && \
-       (info)->useable         == 0    && \
-       (info)->lm              == 0)
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h b/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h
deleted file mode 100644 (file)
index a307237..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/elf.h>
-#include <linux/crypto.h>
-#include <asm/page.h>
-#include <asm/mman.h>
-
-#define DEFINE(sym, val) \
-       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_STR1(x) #x
-#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " DEFINE_STR1(val) " " #val: : )
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
-       DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-#include <common-offsets.h>
-}
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace.h b/arch/um/sys-x86_64/shared/sysdep/ptrace.h
deleted file mode 100644 (file)
index 8ee8f8e..0000000
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_PTRACE_H
-#define __SYSDEP_X86_64_PTRACE_H
-
-#include "user_constants.h"
-#include "sysdep/faultinfo.h"
-
-#define MAX_REG_OFFSET (UM_FRAME_SIZE)
-#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
-
-#include "skas_ptregs.h"
-
-#define REGS_IP(r) ((r)[HOST_IP])
-#define REGS_SP(r) ((r)[HOST_SP])
-
-#define REGS_RBX(r) ((r)[HOST_RBX])
-#define REGS_RCX(r) ((r)[HOST_RCX])
-#define REGS_RDX(r) ((r)[HOST_RDX])
-#define REGS_RSI(r) ((r)[HOST_RSI])
-#define REGS_RDI(r) ((r)[HOST_RDI])
-#define REGS_RBP(r) ((r)[HOST_RBP])
-#define REGS_RAX(r) ((r)[HOST_RAX])
-#define REGS_R8(r) ((r)[HOST_R8])
-#define REGS_R9(r) ((r)[HOST_R9])
-#define REGS_R10(r) ((r)[HOST_R10])
-#define REGS_R11(r) ((r)[HOST_R11])
-#define REGS_R12(r) ((r)[HOST_R12])
-#define REGS_R13(r) ((r)[HOST_R13])
-#define REGS_R14(r) ((r)[HOST_R14])
-#define REGS_R15(r) ((r)[HOST_R15])
-#define REGS_CS(r) ((r)[HOST_CS])
-#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
-#define REGS_SS(r) ((r)[HOST_SS])
-
-#define HOST_FS_BASE 21
-#define HOST_GS_BASE 22
-#define HOST_DS 23
-#define HOST_ES 24
-#define HOST_FS 25
-#define HOST_GS 26
-
-/* Also defined in asm/ptrace-x86_64.h, but not in libc headers.  So, these
- * are already defined for kernel code, but not for userspace code.
- */
-#ifndef FS_BASE
-/* These aren't defined in ptrace.h, but exist in struct user_regs_struct,
- * which is what x86_64 ptrace actually uses.
- */
-#define FS_BASE (HOST_FS_BASE * sizeof(long))
-#define GS_BASE (HOST_GS_BASE * sizeof(long))
-#define DS (HOST_DS * sizeof(long))
-#define ES (HOST_ES * sizeof(long))
-#define FS (HOST_FS * sizeof(long))
-#define GS (HOST_GS * sizeof(long))
-#endif
-
-#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
-#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
-#define REGS_DS(r) ((r)[HOST_DS])
-#define REGS_ES(r) ((r)[HOST_ES])
-#define REGS_FS(r) ((r)[HOST_FS])
-#define REGS_GS(r) ((r)[HOST_GS])
-
-#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_RAX])
-
-#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
-
-#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
-
-#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type)
-
-#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
-
-#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
-
-#define REGS_TRAP(r) ((r)->trap_type)
-
-#define REGS_ERR(r) ((r)->fault_type)
-
-struct uml_pt_regs {
-       unsigned long gp[MAX_REG_NR];
-       unsigned long fp[HOST_FP_SIZE];
-       struct faultinfo faultinfo;
-       long syscall;
-       int is_user;
-};
-
-#define EMPTY_UML_PT_REGS { }
-
-#define UPT_RBX(r) REGS_RBX((r)->gp)
-#define UPT_RCX(r) REGS_RCX((r)->gp)
-#define UPT_RDX(r) REGS_RDX((r)->gp)
-#define UPT_RSI(r) REGS_RSI((r)->gp)
-#define UPT_RDI(r) REGS_RDI((r)->gp)
-#define UPT_RBP(r) REGS_RBP((r)->gp)
-#define UPT_RAX(r) REGS_RAX((r)->gp)
-#define UPT_R8(r) REGS_R8((r)->gp)
-#define UPT_R9(r) REGS_R9((r)->gp)
-#define UPT_R10(r) REGS_R10((r)->gp)
-#define UPT_R11(r) REGS_R11((r)->gp)
-#define UPT_R12(r) REGS_R12((r)->gp)
-#define UPT_R13(r) REGS_R13((r)->gp)
-#define UPT_R14(r) REGS_R14((r)->gp)
-#define UPT_R15(r) REGS_R15((r)->gp)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
-#define UPT_FS(r) REGS_FS((r)->gp)
-#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
-#define UPT_GS(r) REGS_GS((r)->gp)
-#define UPT_DS(r) REGS_DS((r)->gp)
-#define UPT_ES(r) REGS_ES((r)->gp)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_SS(r) REGS_SS((r)->gp)
-#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
-
-#define UPT_IP(r) REGS_IP((r)->gp)
-#define UPT_SP(r) REGS_SP((r)->gp)
-
-#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
-#define UPT_SYSCALL_NR(r) ((r)->syscall)
-#define UPT_SYSCALL_RET(r) UPT_RAX(r)
-
-extern int user_context(unsigned long sp);
-
-#define UPT_IS_USER(r) ((r)->is_user)
-
-#define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
-#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
-#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
-#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
-#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
-#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
-
-struct syscall_args {
-       unsigned long args[6];
-};
-
-#define SYSCALL_ARGS(r) ((struct syscall_args) \
-                        { .args = { UPT_SYSCALL_ARG1(r),        \
-                                    UPT_SYSCALL_ARG2(r),        \
-                                    UPT_SYSCALL_ARG3(r),        \
-                                    UPT_SYSCALL_ARG4(r),        \
-                                    UPT_SYSCALL_ARG5(r),        \
-                                    UPT_SYSCALL_ARG6(r) } } )
-
-#define UPT_REG(regs, reg) \
-       ({      unsigned long val;              \
-               switch(reg){                                            \
-               case R8: val = UPT_R8(regs); break;                     \
-               case R9: val = UPT_R9(regs); break;                     \
-               case R10: val = UPT_R10(regs); break;                   \
-               case R11: val = UPT_R11(regs); break;                   \
-               case R12: val = UPT_R12(regs); break;                   \
-               case R13: val = UPT_R13(regs); break;                   \
-               case R14: val = UPT_R14(regs); break;                   \
-               case R15: val = UPT_R15(regs); break;                   \
-               case RIP: val = UPT_IP(regs); break;                    \
-               case RSP: val = UPT_SP(regs); break;                    \
-               case RAX: val = UPT_RAX(regs); break;                   \
-               case RBX: val = UPT_RBX(regs); break;                   \
-               case RCX: val = UPT_RCX(regs); break;                   \
-               case RDX: val = UPT_RDX(regs); break;                   \
-               case RSI: val = UPT_RSI(regs); break;                   \
-               case RDI: val = UPT_RDI(regs); break;                   \
-               case RBP: val = UPT_RBP(regs); break;                   \
-               case ORIG_RAX: val = UPT_ORIG_RAX(regs); break;         \
-               case CS: val = UPT_CS(regs); break;                     \
-               case SS: val = UPT_SS(regs); break;                     \
-               case FS_BASE: val = UPT_FS_BASE(regs); break;           \
-               case GS_BASE: val = UPT_GS_BASE(regs); break;           \
-               case DS: val = UPT_DS(regs); break;                     \
-               case ES: val = UPT_ES(regs); break;                     \
-               case FS : val = UPT_FS (regs); break;                   \
-               case GS: val = UPT_GS(regs); break;                     \
-               case EFLAGS: val = UPT_EFLAGS(regs); break;             \
-               default :                                               \
-                       panic("Bad register in UPT_REG : %d\n", reg);   \
-                       val = -1;                                       \
-               }                                                       \
-               val;                                                    \
-       })
-
-
-#define UPT_SET(regs, reg, val) \
-       ({      unsigned long __upt_val = val;  \
-               switch(reg){                                            \
-               case R8: UPT_R8(regs) = __upt_val; break;               \
-               case R9: UPT_R9(regs) = __upt_val; break;               \
-               case R10: UPT_R10(regs) = __upt_val; break;             \
-               case R11: UPT_R11(regs) = __upt_val; break;             \
-               case R12: UPT_R12(regs) = __upt_val; break;             \
-               case R13: UPT_R13(regs) = __upt_val; break;             \
-               case R14: UPT_R14(regs) = __upt_val; break;             \
-               case R15: UPT_R15(regs) = __upt_val; break;             \
-               case RIP: UPT_IP(regs) = __upt_val; break;              \
-               case RSP: UPT_SP(regs) = __upt_val; break;              \
-               case RAX: UPT_RAX(regs) = __upt_val; break;             \
-               case RBX: UPT_RBX(regs) = __upt_val; break;             \
-               case RCX: UPT_RCX(regs) = __upt_val; break;             \
-               case RDX: UPT_RDX(regs) = __upt_val; break;             \
-               case RSI: UPT_RSI(regs) = __upt_val; break;             \
-               case RDI: UPT_RDI(regs) = __upt_val; break;             \
-               case RBP: UPT_RBP(regs) = __upt_val; break;             \
-               case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break;   \
-               case CS: UPT_CS(regs) = __upt_val; break;               \
-               case SS: UPT_SS(regs) = __upt_val; break;               \
-               case FS_BASE: UPT_FS_BASE(regs) = __upt_val; break;     \
-               case GS_BASE: UPT_GS_BASE(regs) = __upt_val; break;     \
-               case DS: UPT_DS(regs) = __upt_val; break;               \
-               case ES: UPT_ES(regs) = __upt_val; break;               \
-               case FS: UPT_FS(regs) = __upt_val; break;               \
-               case GS: UPT_GS(regs) = __upt_val; break;               \
-               case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break;       \
-               default :                                               \
-                       panic("Bad register in UPT_SET : %d\n", reg);   \
-                       break;                                          \
-               }                                                       \
-               __upt_val;                                              \
-       })
-
-#define UPT_SET_SYSCALL_RETURN(r, res) \
-       REGS_SET_SYSCALL_RETURN((r)->regs, (res))
-
-#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
-
-#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas)
-
-#define UPT_FAULTINFO(r) (&(r)->faultinfo)
-
-static inline void arch_init_registers(int pid)
-{
-}
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h b/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h
deleted file mode 100644 (file)
index 4dbccdb..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_PTRACE_USER_H__
-#define __SYSDEP_X86_64_PTRACE_USER_H__
-
-#define __FRAME_OFFSETS
-#include <sys/ptrace.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#undef __FRAME_OFFSETS
-#include "user_constants.h"
-
-#define PT_INDEX(off) ((off) / sizeof(unsigned long))
-
-#define PT_SYSCALL_NR(regs) ((regs)[PT_INDEX(ORIG_RAX)])
-#define PT_SYSCALL_NR_OFFSET (ORIG_RAX)
-
-#define PT_SYSCALL_ARG1(regs) (((unsigned long *) (regs))[PT_INDEX(RDI)])
-#define PT_SYSCALL_ARG1_OFFSET (RDI)
-
-#define PT_SYSCALL_ARG2(regs) (((unsigned long *) (regs))[PT_INDEX(RSI)])
-#define PT_SYSCALL_ARG2_OFFSET (RSI)
-
-#define PT_SYSCALL_ARG3(regs) (((unsigned long *) (regs))[PT_INDEX(RDX)])
-#define PT_SYSCALL_ARG3_OFFSET (RDX)
-
-#define PT_SYSCALL_ARG4(regs) (((unsigned long *) (regs))[PT_INDEX(RCX)])
-#define PT_SYSCALL_ARG4_OFFSET (RCX)
-
-#define PT_SYSCALL_ARG5(regs) (((unsigned long *) (regs))[PT_INDEX(R8)])
-#define PT_SYSCALL_ARG5_OFFSET (R8)
-
-#define PT_SYSCALL_ARG6(regs) (((unsigned long *) (regs))[PT_INDEX(R9)])
-#define PT_SYSCALL_ARG6_OFFSET (R9)
-
-#define PT_SYSCALL_RET_OFFSET (RAX)
-
-#define PT_IP_OFFSET (RIP)
-#define PT_IP(regs) ((regs)[PT_INDEX(RIP)])
-
-#define PT_SP_OFFSET (RSP)
-#define PT_SP(regs) ((regs)[PT_INDEX(RSP)])
-
-#define PT_ORIG_RAX_OFFSET (ORIG_RAX)
-#define PT_ORIG_RAX(regs) ((regs)[PT_INDEX(ORIG_RAX)])
-
-/*
- * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
- * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
- * 2.4 name and value for 2.4 host compatibility.
- */
-#ifndef PTRACE_OLDSETOPTIONS
-#define PTRACE_OLDSETOPTIONS 21
-#endif
-
-/*
- * These are before the system call, so the system call number is RAX
- * rather than ORIG_RAX, and arg4 is R10 rather than RCX
- */
-#define REGS_SYSCALL_NR PT_INDEX(RAX)
-#define REGS_SYSCALL_ARG1 PT_INDEX(RDI)
-#define REGS_SYSCALL_ARG2 PT_INDEX(RSI)
-#define REGS_SYSCALL_ARG3 PT_INDEX(RDX)
-#define REGS_SYSCALL_ARG4 PT_INDEX(R10)
-#define REGS_SYSCALL_ARG5 PT_INDEX(R8)
-#define REGS_SYSCALL_ARG6 PT_INDEX(R9)
-
-#define REGS_IP_INDEX PT_INDEX(RIP)
-#define REGS_SP_INDEX PT_INDEX(RSP)
-
-#define FP_SIZE (HOST_FP_SIZE)
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/sc.h b/arch/um/sys-x86_64/shared/sysdep/sc.h
deleted file mode 100644 (file)
index 8aee45b..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef __SYSDEP_X86_64_SC_H
-#define __SYSDEP_X86_64_SC_H
-
-/* Copyright (C) 2003 - 2004 PathScale, Inc
- * Released under the GPL
- */
-
-#include <user_constants.h>
-
-#define SC_OFFSET(sc, field) \
-        *((unsigned long *) &(((char *) (sc))[HOST_##field]))
-
-#define SC_RBX(sc) SC_OFFSET(sc, SC_RBX)
-#define SC_RCX(sc) SC_OFFSET(sc, SC_RCX)
-#define SC_RDX(sc) SC_OFFSET(sc, SC_RDX)
-#define SC_RSI(sc) SC_OFFSET(sc, SC_RSI)
-#define SC_RDI(sc) SC_OFFSET(sc, SC_RDI)
-#define SC_RBP(sc) SC_OFFSET(sc, SC_RBP)
-#define SC_RAX(sc) SC_OFFSET(sc, SC_RAX)
-#define SC_R8(sc) SC_OFFSET(sc, SC_R8)
-#define SC_R9(sc) SC_OFFSET(sc, SC_R9)
-#define SC_R10(sc) SC_OFFSET(sc, SC_R10)
-#define SC_R11(sc) SC_OFFSET(sc, SC_R11)
-#define SC_R12(sc) SC_OFFSET(sc, SC_R12)
-#define SC_R13(sc) SC_OFFSET(sc, SC_R13)
-#define SC_R14(sc) SC_OFFSET(sc, SC_R14)
-#define SC_R15(sc) SC_OFFSET(sc, SC_R15)
-#define SC_IP(sc) SC_OFFSET(sc, SC_IP)
-#define SC_SP(sc) SC_OFFSET(sc, SC_SP)
-#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2)
-#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR)
-#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO)
-#define SC_CS(sc) SC_OFFSET(sc, SC_CS)
-#define SC_FS(sc) SC_OFFSET(sc, SC_FS)
-#define SC_GS(sc) SC_OFFSET(sc, SC_GS)
-#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS)
-#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK)
-#define SC_SS(sc) SC_OFFSET(sc, SC_SS)
-#if 0
-#define SC_ORIG_RAX(sc) SC_OFFSET(sc, SC_ORIG_RAX)
-#define SC_DS(sc) SC_OFFSET(sc, SC_DS)
-#define SC_ES(sc) SC_OFFSET(sc, SC_ES)
-#endif
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/sigcontext.h b/arch/um/sys-x86_64/shared/sysdep/sigcontext.h
deleted file mode 100644 (file)
index 0155133..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_SIGCONTEXT_H
-#define __SYSDEP_X86_64_SIGCONTEXT_H
-
-#include <sysdep/sc.h>
-
-#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
-
-#define GET_FAULTINFO_FROM_SC(fi, sc) \
-       { \
-               (fi).cr2 = SC_CR2(sc); \
-               (fi).error_code = SC_ERR(sc); \
-               (fi).trap_no = SC_TRAPNO(sc); \
-       }
-
-/* This is Page Fault */
-#define SEGV_IS_FIXABLE(fi)    ((fi)->trap_no == 14)
-
-/* No broken SKAS API, which doesn't pass trap_no, here. */
-#define SEGV_MAYBE_FIXABLE(fi) 0
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h b/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h
deleted file mode 100644 (file)
index 95db4be..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_SKAS_PTRACE_H
-#define __SYSDEP_X86_64_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/stub.h b/arch/um/sys-x86_64/shared/sysdep/stub.h
deleted file mode 100644 (file)
index 3432aa2..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_STUB_H
-#define __SYSDEP_STUB_H
-
-#include <sys/mman.h>
-#include <asm/unistd.h>
-#include <sysdep/ptrace_user.h>
-#include "as-layout.h"
-#include "stub-data.h"
-#include "kern_constants.h"
-
-extern void stub_segv_handler(int sig);
-extern void stub_clone_handler(void);
-
-#define STUB_SYSCALL_RET PT_INDEX(RAX)
-#define STUB_MMAP_NR __NR_mmap
-#define MMAP_OFFSET(o) (o)
-
-#define __syscall_clobber "r11","rcx","memory"
-#define __syscall "syscall"
-
-static inline long stub_syscall0(long syscall)
-{
-       long ret;
-
-       __asm__ volatile (__syscall
-               : "=a" (ret)
-               : "0" (syscall) : __syscall_clobber );
-
-       return ret;
-}
-
-static inline long stub_syscall2(long syscall, long arg1, long arg2)
-{
-       long ret;
-
-       __asm__ volatile (__syscall
-               : "=a" (ret)
-               : "0" (syscall), "D" (arg1), "S" (arg2) : __syscall_clobber );
-
-       return ret;
-}
-
-static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
-{
-       long ret;
-
-       __asm__ volatile (__syscall
-               : "=a" (ret)
-               : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3)
-               : __syscall_clobber );
-
-       return ret;
-}
-
-static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
-                                long arg4)
-{
-       long ret;
-
-       __asm__ volatile ("movq %5,%%r10 ; " __syscall
-               : "=a" (ret)
-               : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
-                 "g" (arg4)
-               : __syscall_clobber, "r10" );
-
-       return ret;
-}
-
-static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
-                                long arg4, long arg5)
-{
-       long ret;
-
-       __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall
-               : "=a" (ret)
-               : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
-                 "g" (arg4), "g" (arg5)
-               : __syscall_clobber, "r10", "r8" );
-
-       return ret;
-}
-
-static inline void trap_myself(void)
-{
-       __asm("int3");
-}
-
-static inline void remap_stack(long fd, unsigned long offset)
-{
-       __asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; "
-                         "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; "
-                         "movq %%rax, (%%rbx)":
-                         : "a" (STUB_MMAP_NR), "D" (STUB_DATA),
-                           "S" (UM_KERN_PAGE_SIZE),
-                           "d" (PROT_READ | PROT_WRITE),
-                            "g" (MAP_FIXED | MAP_SHARED), "g" (fd),
-                           "g" (offset),
-                           "i" (&((struct stub_data *) STUB_DATA)->err)
-                         : __syscall_clobber, "r10", "r8", "r9" );
-}
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/syscalls.h b/arch/um/sys-x86_64/shared/sysdep/syscalls.h
deleted file mode 100644 (file)
index 7cfb0b0..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_X86_64_SYSCALLS_H__
-#define __SYSDEP_X86_64_SYSCALLS_H__
-
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <kern_constants.h>
-
-typedef long syscall_handler_t(void);
-
-extern syscall_handler_t *sys_call_table[];
-
-#define EXECUTE_SYSCALL(syscall, regs) \
-       (((long (*)(long, long, long, long, long, long)) \
-         (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
-                                     UPT_SYSCALL_ARG2(&regs->regs), \
-                                     UPT_SYSCALL_ARG3(&regs->regs), \
-                                     UPT_SYSCALL_ARG4(&regs->regs), \
-                                     UPT_SYSCALL_ARG5(&regs->regs), \
-                                     UPT_SYSCALL_ARG6(&regs->regs)))
-
-extern long old_mmap(unsigned long addr, unsigned long len,
-                    unsigned long prot, unsigned long flags,
-                    unsigned long fd, unsigned long pgoff);
-extern syscall_handler_t sys_modify_ldt;
-extern syscall_handler_t sys_arch_prctl;
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/system.h b/arch/um/sys-x86_64/shared/sysdep/system.h
deleted file mode 100644 (file)
index d1b93c4..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-#ifndef _ASM_X86_SYSTEM_H_
-#define _ASM_X86_SYSTEM_H_
-
-#include <asm/asm.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-#include <asm/nops.h>
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
-
-/* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
-# define AT_VECTOR_SIZE_ARCH 2
-#else
-# define AT_VECTOR_SIZE_ARCH 1
-#endif
-
-extern unsigned long arch_align_stack(unsigned long sp);
-
-void default_idle(void);
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-#else
-#define mb()   asm volatile("mfence":::"memory")
-#define rmb()  asm volatile("lfence":::"memory")
-#define wmb()  asm volatile("sfence" ::: "memory")
-#endif
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *     CPU 0                           CPU 1
- *
- *     b = 2;
- *     memory_barrier();
- *     p = &b;                         q = p;
- *                                     read_barrier_depends();
- *                                     d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *     CPU 0                           CPU 1
- *
- *     a = 2;
- *     memory_barrier();
- *     b = 3;                          y = b;
- *                                     read_barrier_depends();
- *                                     x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()       mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb()     rmb()
-#else
-# define smp_rmb()     barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb()     wmb()
-#else
-# define smp_wmb()     barrier()
-#endif
-#define smp_read_barrier_depends()     read_barrier_depends()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
-#define smp_mb()       barrier()
-#define smp_rmb()      barrier()
-#define smp_wmb()      barrier()
-#define smp_read_barrier_depends()     do { } while (0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-/*
- * Stop RDTSC speculation. This is needed when you need to use RDTSC
- * (or get_cycles or vread that possibly accesses the TSC) in a defined
- * code region.
- *
- * (Could use an alternative three way for this if there was one.)
- */
-static inline void rdtsc_barrier(void)
-{
-       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
-}
-
-#endif
diff --git a/arch/um/sys-x86_64/shared/sysdep/tls.h b/arch/um/sys-x86_64/shared/sysdep/tls.h
deleted file mode 100644 (file)
index 18c000d..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _SYSDEP_TLS_H
-#define _SYSDEP_TLS_H
-
-# ifndef __KERNEL__
-
-/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
- * may be named user_desc (but in 2.4 and in header matching its API was named
- * modify_ldt_ldt_s). */
-
-typedef struct um_dup_user_desc {
-       unsigned int  entry_number;
-       unsigned int  base_addr;
-       unsigned int  limit;
-       unsigned int  seg_32bit:1;
-       unsigned int  contents:2;
-       unsigned int  read_exec_only:1;
-       unsigned int  limit_in_pages:1;
-       unsigned int  seg_not_present:1;
-       unsigned int  useable:1;
-       unsigned int  lm:1;
-} user_desc_t;
-
-# else /* __KERNEL__ */
-
-#  include <ldt.h>
-typedef struct user_desc user_desc_t;
-
-# endif /* __KERNEL__ */
-#endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h b/arch/um/sys-x86_64/shared/sysdep/vm-flags.h
deleted file mode 100644 (file)
index 3978e55..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
- * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
- */
-
-#ifndef __VM_FLAGS_X86_64_H
-#define __VM_FLAGS_X86_64_H
-
-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
-       VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \
-       VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#endif
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
deleted file mode 100644 (file)
index b6b65c7..0000000
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (C) 2003 PathScale, Inc.
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/personality.h>
-#include <linux/ptrace.h>
-#include <linux/kernel.h>
-#include <asm/unistd.h>
-#include <asm/uaccess.h>
-#include <asm/ucontext.h>
-#include "frame_kern.h"
-#include "skas.h"
-
-void copy_sc(struct uml_pt_regs *regs, void *from)
-{
-       struct sigcontext *sc = from;
-
-#define GETREG(regs, regno, sc, regname)                               \
-       (regs)->gp[(regno) / sizeof(unsigned long)] = (sc)->regname
-
-       GETREG(regs, R8, sc, r8);
-       GETREG(regs, R9, sc, r9);
-       GETREG(regs, R10, sc, r10);
-       GETREG(regs, R11, sc, r11);
-       GETREG(regs, R12, sc, r12);
-       GETREG(regs, R13, sc, r13);
-       GETREG(regs, R14, sc, r14);
-       GETREG(regs, R15, sc, r15);
-       GETREG(regs, RDI, sc, di);
-       GETREG(regs, RSI, sc, si);
-       GETREG(regs, RBP, sc, bp);
-       GETREG(regs, RBX, sc, bx);
-       GETREG(regs, RDX, sc, dx);
-       GETREG(regs, RAX, sc, ax);
-       GETREG(regs, RCX, sc, cx);
-       GETREG(regs, RSP, sc, sp);
-       GETREG(regs, RIP, sc, ip);
-       GETREG(regs, EFLAGS, sc, flags);
-       GETREG(regs, CS, sc, cs);
-
-#undef GETREG
-}
-
-static int copy_sc_from_user(struct pt_regs *regs,
-                            struct sigcontext __user *from,
-                            struct _fpstate __user *fpp)
-{
-       struct user_i387_struct fp;
-       int err = 0;
-
-#define GETREG(regs, regno, sc, regname)                               \
-       __get_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],    \
-                  &(sc)->regname)
-
-       err |= GETREG(regs, R8, from, r8);
-       err |= GETREG(regs, R9, from, r9);
-       err |= GETREG(regs, R10, from, r10);
-       err |= GETREG(regs, R11, from, r11);
-       err |= GETREG(regs, R12, from, r12);
-       err |= GETREG(regs, R13, from, r13);
-       err |= GETREG(regs, R14, from, r14);
-       err |= GETREG(regs, R15, from, r15);
-       err |= GETREG(regs, RDI, from, di);
-       err |= GETREG(regs, RSI, from, si);
-       err |= GETREG(regs, RBP, from, bp);
-       err |= GETREG(regs, RBX, from, bx);
-       err |= GETREG(regs, RDX, from, dx);
-       err |= GETREG(regs, RAX, from, ax);
-       err |= GETREG(regs, RCX, from, cx);
-       err |= GETREG(regs, RSP, from, sp);
-       err |= GETREG(regs, RIP, from, ip);
-       err |= GETREG(regs, EFLAGS, from, flags);
-       err |= GETREG(regs, CS, from, cs);
-       if (err)
-               return 1;
-
-#undef GETREG
-
-       err = copy_from_user(&fp, fpp, sizeof(struct user_i387_struct));
-       if (err)
-               return 1;
-
-       err = restore_fp_registers(userspace_pid[current_thread_info()->cpu],
-                                  (unsigned long *) &fp);
-       if (err < 0) {
-               printk(KERN_ERR "copy_sc_from_user - "
-                      "restore_fp_registers failed, errno = %d\n",
-                      -err);
-               return 1;
-       }
-
-       return 0;
-}
-
-static int copy_sc_to_user(struct sigcontext __user *to,
-                          struct _fpstate __user *to_fp, struct pt_regs *regs,
-                          unsigned long mask, unsigned long sp)
-{
-       struct faultinfo * fi = &current->thread.arch.faultinfo;
-       struct user_i387_struct fp;
-       int err = 0;
-
-       err |= __put_user(0, &to->gs);
-       err |= __put_user(0, &to->fs);
-
-#define PUTREG(regs, regno, sc, regname)                               \
-       __put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],    \
-                  &(sc)->regname)
-
-       err |= PUTREG(regs, RDI, to, di);
-       err |= PUTREG(regs, RSI, to, si);
-       err |= PUTREG(regs, RBP, to, bp);
-       /*
-        * Must use original RSP, which is passed in, rather than what's in
-        * the pt_regs, because that's already been updated to point at the
-        * signal frame.
-        */
-       err |= __put_user(sp, &to->sp);
-       err |= PUTREG(regs, RBX, to, bx);
-       err |= PUTREG(regs, RDX, to, dx);
-       err |= PUTREG(regs, RCX, to, cx);
-       err |= PUTREG(regs, RAX, to, ax);
-       err |= PUTREG(regs, R8, to, r8);
-       err |= PUTREG(regs, R9, to, r9);
-       err |= PUTREG(regs, R10, to, r10);
-       err |= PUTREG(regs, R11, to, r11);
-       err |= PUTREG(regs, R12, to, r12);
-       err |= PUTREG(regs, R13, to, r13);
-       err |= PUTREG(regs, R14, to, r14);
-       err |= PUTREG(regs, R15, to, r15);
-       err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */
-
-       err |= __put_user(fi->cr2, &to->cr2);
-       err |= __put_user(fi->error_code, &to->err);
-       err |= __put_user(fi->trap_no, &to->trapno);
-
-       err |= PUTREG(regs, RIP, to, ip);
-       err |= PUTREG(regs, EFLAGS, to, flags);
-#undef PUTREG
-
-       err |= __put_user(mask, &to->oldmask);
-       if (err)
-               return 1;
-
-       err = save_fp_registers(userspace_pid[current_thread_info()->cpu],
-                               (unsigned long *) &fp);
-       if (err < 0) {
-               printk(KERN_ERR "copy_sc_from_user - restore_fp_registers "
-                      "failed, errno = %d\n", -err);
-               return 1;
-       }
-
-       if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
-               return 1;
-
-       return err;
-}
-
-struct rt_sigframe
-{
-       char __user *pretcode;
-       struct ucontext uc;
-       struct siginfo info;
-       struct _fpstate fpstate;
-};
-
-int setup_signal_stack_si(unsigned long stack_top, int sig,
-                         struct k_sigaction *ka, struct pt_regs * regs,
-                         siginfo_t *info, sigset_t *set)
-{
-       struct rt_sigframe __user *frame;
-       unsigned long save_sp = PT_REGS_RSP(regs);
-       int err = 0;
-       struct task_struct *me = current;
-
-       frame = (struct rt_sigframe __user *)
-               round_down(stack_top - sizeof(struct rt_sigframe), 16);
-       /* Subtract 128 for a red zone and 8 for proper alignment */
-       frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
-
-       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-               goto out;
-
-       if (ka->sa.sa_flags & SA_SIGINFO) {
-               err |= copy_siginfo_to_user(&frame->info, info);
-               if (err)
-                       goto out;
-       }
-
-       /*
-        * Update SP now because the page fault handler refuses to extend
-        * the stack if the faulting address is too far below the current
-        * SP, which frame now certainly is.  If there's an error, the original
-        * value is restored on the way out.
-        * When writing the sigcontext to the stack, we have to write the
-        * original value, so that's passed to copy_sc_to_user, which does
-        * the right thing with it.
-        */
-       PT_REGS_RSP(regs) = (unsigned long) frame;
-
-       /* Create the ucontext.  */
-       err |= __put_user(0, &frame->uc.uc_flags);
-       err |= __put_user(0, &frame->uc.uc_link);
-       err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-       err |= __put_user(sas_ss_flags(save_sp),
-                         &frame->uc.uc_stack.ss_flags);
-       err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-       err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
-                              set->sig[0], save_sp);
-       err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
-       if (sizeof(*set) == 16) {
-               __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-               __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
-       }
-       else
-               err |= __copy_to_user(&frame->uc.uc_sigmask, set,
-                                     sizeof(*set));
-
-       /*
-        * Set up to return from userspace.  If provided, use a stub
-        * already in userspace.
-        */
-       /* x86-64 should always use SA_RESTORER. */
-       if (ka->sa.sa_flags & SA_RESTORER)
-               err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
-       else
-               /* could use a vstub here */
-               goto restore_sp;
-
-       if (err)
-               goto restore_sp;
-
-       /* Set up registers for signal handler */
-       {
-               struct exec_domain *ed = current_thread_info()->exec_domain;
-               if (unlikely(ed && ed->signal_invmap && sig < 32))
-                       sig = ed->signal_invmap[sig];
-       }
-
-       PT_REGS_RDI(regs) = sig;
-       /* In case the signal handler was declared without prototypes */
-       PT_REGS_RAX(regs) = 0;
-
-       /*
-        * This also works for non SA_SIGINFO handlers because they expect the
-        * next argument after the signal number on the stack.
-        */
-       PT_REGS_RSI(regs) = (unsigned long) &frame->info;
-       PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
-       PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
- out:
-       return err;
-
-restore_sp:
-       PT_REGS_RSP(regs) = save_sp;
-       return err;
-}
-
-long sys_rt_sigreturn(struct pt_regs *regs)
-{
-       unsigned long sp = PT_REGS_SP(&current->thread.regs);
-       struct rt_sigframe __user *frame =
-               (struct rt_sigframe __user *)(sp - 8);
-       struct ucontext __user *uc = &frame->uc;
-       sigset_t set;
-
-       if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
-               goto segfault;
-
-       sigdelsetmask(&set, ~_BLOCKABLE);
-
-       spin_lock_irq(&current->sighand->siglock);
-       current->blocked = set;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
-
-       if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext,
-                             &frame->fpstate))
-               goto segfault;
-
-       /* Avoid ERESTART handling */
-       PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
-       return PT_REGS_SYSCALL_RET(&current->thread.regs);
-
- segfault:
-       force_sig(SIGSEGV, current);
-       return 0;
-}
diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S
deleted file mode 100644 (file)
index 20e4a96..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "as-layout.h"
-
-       .globl syscall_stub
-.section .__syscall_stub, "ax"
-syscall_stub:
-       syscall
-       /* We don't have 64-bit constants, so this constructs the address
-        * we need.
-        */
-       movq    $(STUB_DATA >> 32), %rbx
-       salq    $32, %rbx
-       movq    $(STUB_DATA & 0xffffffff), %rcx
-       or      %rcx, %rbx
-       movq    %rax, (%rbx)
-       int3
-
-       .globl batch_syscall_stub
-batch_syscall_stub:
-       mov     $(STUB_DATA >> 32), %rbx
-       sal     $32, %rbx
-       mov     $(STUB_DATA & 0xffffffff), %rax
-       or      %rax, %rbx
-       /* load pointer to first operation */
-       mov     %rbx, %rsp
-       add     $0x10, %rsp
-again:
-       /* load length of additional data */
-       mov     0x0(%rsp), %rax
-
-       /* if(length == 0) : end of list */
-       /* write possible 0 to header */
-       mov     %rax, 8(%rbx)
-       cmp     $0, %rax
-       jz      done
-
-       /* save current pointer */
-       mov     %rsp, 8(%rbx)
-
-       /* skip additional data */
-       add     %rax, %rsp
-
-       /* load syscall-# */
-       pop     %rax
-
-       /* load syscall params */
-       pop     %rdi
-       pop     %rsi
-       pop     %rdx
-       pop     %r10
-       pop     %r8
-       pop     %r9
-
-       /* execute syscall */
-       syscall
-
-       /* check return value */
-       pop     %rcx
-       cmp     %rcx, %rax
-       je      again
-
-done:
-       /* save return value */
-       mov     %rax, (%rbx)
-
-       /* stop */
-       int3
diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c
deleted file mode 100644 (file)
index ced051a..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-#include "as-layout.h"
-#include "sysdep/stub.h"
-#include "sysdep/faultinfo.h"
-#include "sysdep/sigcontext.h"
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_segv_handler(int sig)
-{
-       struct ucontext *uc;
-
-       __asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :);
-       GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA),
-                             &uc->uc_mcontext);
-       trap_myself();
-}
-
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c
deleted file mode 100644 (file)
index 47d469e..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c
- * with some changes for UML.
- */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <kern_constants.h>
-
-#define __NO_STUBS
-
-/*
- * Below you can see, in terms of #define's, the differences between the x86-64
- * and the UML syscall table.
- */
-
-/* Not going to be implemented by UML, since we have no hardware. */
-#define stub_iopl sys_ni_syscall
-#define sys_ioperm sys_ni_syscall
-
-/*
- * The UML TLS problem. Note that x86_64 does not implement this, so the below
- * is needed only for the ia32 compatibility.
- */
-
-/* On UML we call it this way ("old" means it's not mmap2) */
-#define sys_mmap old_mmap
-
-#define stub_clone sys_clone
-#define stub_fork sys_fork
-#define stub_vfork sys_vfork
-#define stub_execve sys_execve
-#define stub_rt_sigsuspend sys_rt_sigsuspend
-#define stub_sigaltstack sys_sigaltstack
-#define stub_rt_sigreturn sys_rt_sigreturn
-
-#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_UNISTD_64_H
-#include "../../x86/include/asm/unistd_64.h"
-
-#undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
-#undef _ASM_X86_UNISTD_64_H
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void sys_ni_syscall(void);
-
-/*
- * We used to have a trick here which made sure that holes in the
- * x86_64 table were filled in with sys_ni_syscall, but a comment in
- * unistd_64.h says that holes aren't allowed, so the trick was
- * removed.
- * The trick looked like this
- *     [0 ... UM_NR_syscall_max] = &sys_ni_syscall
- * before including unistd_64.h - the later initializations overwrote
- * the sys_ni_syscall filler.
- */
-
-sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
-#include "../../x86/include/asm/unistd_64.h"
-};
-
-int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c
deleted file mode 100644 (file)
index f3d82bb..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include "linux/linkage.h"
-#include "linux/personality.h"
-#include "linux/utsname.h"
-#include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "asm/uaccess.h"
-#include "os.h"
-
-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
-{
-       unsigned long *ptr = addr, tmp;
-       long ret;
-       int pid = task->mm->context.id.u.pid;
-
-       /*
-        * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
-        * be safe), we need to call arch_prctl on the host because
-        * setting %fs may result in something else happening (like a
-        * GDT or thread.fs being set instead).  So, we let the host
-        * fiddle the registers and thread struct and restore the
-        * registers afterwards.
-        *
-        * So, the saved registers are stored to the process (this
-        * needed because a stub may have been the last thing to run),
-        * arch_prctl is run on the host, then the registers are read
-        * back.
-        */
-       switch (code) {
-       case ARCH_SET_FS:
-       case ARCH_SET_GS:
-               ret = restore_registers(pid, &current->thread.regs.regs);
-               if (ret)
-                       return ret;
-               break;
-       case ARCH_GET_FS:
-       case ARCH_GET_GS:
-               /*
-                * With these two, we read to a local pointer and
-                * put_user it to the userspace pointer that we were
-                * given.  If addr isn't valid (because it hasn't been
-                * faulted in or is just bogus), we want put_user to
-                * fault it in (or return -EFAULT) instead of having
-                * the host return -EFAULT.
-                */
-               ptr = &tmp;
-       }
-
-       ret = os_arch_prctl(pid, code, ptr);
-       if (ret)
-               return ret;
-
-       switch (code) {
-       case ARCH_SET_FS:
-               current->thread.arch.fs = (unsigned long) ptr;
-               ret = save_registers(pid, &current->thread.regs.regs);
-               break;
-       case ARCH_SET_GS:
-               ret = save_registers(pid, &current->thread.regs.regs);
-               break;
-       case ARCH_GET_FS:
-               ret = put_user(tmp, addr);
-               break;
-       case ARCH_GET_GS:
-               ret = put_user(tmp, addr);
-               break;
-       }
-
-       return ret;
-}
-
-long sys_arch_prctl(int code, unsigned long addr)
-{
-       return arch_prctl(current, code, (unsigned long __user *) addr);
-}
-
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-              void __user *parent_tid, void __user *child_tid)
-{
-       long ret;
-
-       if (!newsp)
-               newsp = UPT_SP(&current->thread.regs.regs);
-       current->thread.forking = 1;
-       ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-                     child_tid);
-       current->thread.forking = 0;
-       return ret;
-}
-
-void arch_switch_to(struct task_struct *to)
-{
-       if ((to->thread.arch.fs == 0) || (to->mm == NULL))
-               return;
-
-       arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
-}
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c
deleted file mode 100644 (file)
index f4f82be..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/utsname.h>
-#include <asm/current.h>
-#include <asm/ptrace.h>
-#include "sysrq.h"
-
-void __show_regs(struct pt_regs *regs)
-{
-       printk("\n");
-       print_modules();
-       printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
-               current->comm, print_tainted(), init_utsname()->release);
-       printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
-              PT_REGS_RIP(regs));
-       printk(KERN_INFO "RSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
-              PT_REGS_EFLAGS(regs));
-       printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
-              PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
-       printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
-              PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs));
-       printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
-              PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
-       printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
-              PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
-       printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
-              PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs));
-}
-
-void show_regs(struct pt_regs *regs)
-{
-       __show_regs(regs);
-       show_trace(current, (unsigned long *) &regs);
-}
diff --git a/arch/um/sys-x86_64/tls.c b/arch/um/sys-x86_64/tls.c
deleted file mode 100644 (file)
index f7ba462..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "linux/sched.h"
-
-void clear_flushed_tls(struct task_struct *task)
-{
-}
-
-int arch_copy_tls(struct task_struct *t)
-{
-       /*
-        * If CLONE_SETTLS is set, we need to save the thread id
-        * (which is argument 5, child_tid, of clone) so it can be set
-        * during context switches.
-        */
-       t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)];
-
-       return 0;
-}
diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c
deleted file mode 100644 (file)
index 9735854..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <stdio.h>
-#include <stddef.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#define __FRAME_OFFSETS
-#include <asm/ptrace.h>
-#include <asm/types.h>
-
-#define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_LONGS(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
-
-#define OFFSET(sym, str, mem) \
-       DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
-       OFFSET(HOST_SC_CR2, sigcontext, cr2);
-       OFFSET(HOST_SC_ERR, sigcontext, err);
-       OFFSET(HOST_SC_TRAPNO, sigcontext, trapno);
-
-       DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
-       DEFINE_LONGS(HOST_RBX, RBX);
-       DEFINE_LONGS(HOST_RCX, RCX);
-       DEFINE_LONGS(HOST_RDI, RDI);
-       DEFINE_LONGS(HOST_RSI, RSI);
-       DEFINE_LONGS(HOST_RDX, RDX);
-       DEFINE_LONGS(HOST_RBP, RBP);
-       DEFINE_LONGS(HOST_RAX, RAX);
-       DEFINE_LONGS(HOST_R8, R8);
-       DEFINE_LONGS(HOST_R9, R9);
-       DEFINE_LONGS(HOST_R10, R10);
-       DEFINE_LONGS(HOST_R11, R11);
-       DEFINE_LONGS(HOST_R12, R12);
-       DEFINE_LONGS(HOST_R13, R13);
-       DEFINE_LONGS(HOST_R14, R14);
-       DEFINE_LONGS(HOST_R15, R15);
-       DEFINE_LONGS(HOST_ORIG_RAX, ORIG_RAX);
-       DEFINE_LONGS(HOST_CS, CS);
-       DEFINE_LONGS(HOST_SS, SS);
-       DEFINE_LONGS(HOST_EFLAGS, EFLAGS);
-#if 0
-       DEFINE_LONGS(HOST_FS, FS);
-       DEFINE_LONGS(HOST_GS, GS);
-       DEFINE_LONGS(HOST_DS, DS);
-       DEFINE_LONGS(HOST_ES, ES);
-#endif
-
-       DEFINE_LONGS(HOST_IP, RIP);
-       DEFINE_LONGS(HOST_SP, RSP);
-       DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
-
-       /* XXX Duplicated between i386 and x86_64 */
-       DEFINE(UM_POLLIN, POLLIN);
-       DEFINE(UM_POLLPRI, POLLPRI);
-       DEFINE(UM_POLLOUT, POLLOUT);
-
-       DEFINE(UM_PROT_READ, PROT_READ);
-       DEFINE(UM_PROT_WRITE, PROT_WRITE);
-       DEFINE(UM_PROT_EXEC, PROT_EXEC);
-}
diff --git a/arch/um/sys-x86_64/vdso/Makefile b/arch/um/sys-x86_64/vdso/Makefile
deleted file mode 100644 (file)
index 5dffe6d..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-#
-# Building vDSO images for x86.
-#
-
-VDSO64-y               := y
-
-vdso-install-$(VDSO64-y)       += vdso.so
-
-
-# files to link into the vdso
-vobjs-y := vdso-note.o um_vdso.o
-
-# files to link into kernel
-obj-$(VDSO64-y)                        += vdso.o vma.o
-
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
-
-$(obj)/vdso.o: $(obj)/vdso.so
-
-targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
-
-export CPPFLAGS_vdso.lds += -P -C
-
-VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-       -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-
-$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
-
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
-       $(call if_changed,vdso)
-
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-       $(call if_changed,objcopy)
-
-#
-# Don't omit frame pointers for ease of userspace debugging, but do
-# optimize sibling calls.
-#
-CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
-       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-       -fno-omit-frame-pointer -foptimize-sibling-calls
-
-$(vobjs): KBUILD_CFLAGS += $(CFL)
-
-#
-# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
-#
-CFLAGS_REMOVE_vdso-note.o = -pg
-CFLAGS_REMOVE_um_vdso.o = -pg
-
-targets += vdso-syms.lds
-obj-$(VDSO64-y)                        += vdso-syms.lds
-
-#
-# Match symbols in the DSO that look like VDSO*; produce a file of constants.
-#
-sed-vdsosym := -e 's/^00*/0/' \
-       -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
-quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
-       $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
-endef
-
-$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
-       $(call if_changed,vdsosym)
-
-#
-# The DSO images are built using a special linker script.
-#
-quiet_cmd_vdso = VDSO    $@
-      cmd_vdso = $(CC) -nostdlib -o $@ \
-                      $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-                      -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
-                sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-GCOV_PROFILE := n
-
-#
-# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
-#
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
-       @mkdir -p $(MODLIB)/vdso
-       $(call cmd,vdso_install)
-
-PHONY += vdso_install $(vdso-install-y)
-vdso_install: $(vdso-install-y)
diff --git a/arch/um/sys-x86_64/vdso/checkundef.sh b/arch/um/sys-x86_64/vdso/checkundef.sh
deleted file mode 100644 (file)
index 7ee90a9..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-nm="$1"
-file="$2"
-$nm "$file" | grep '^ *U' > /dev/null 2>&1
-if [ $? -eq 1 ]; then
-    exit 0
-else
-    echo "$file: undefined symbols found" >&2
-    exit 1
-fi
diff --git a/arch/um/sys-x86_64/vdso/um_vdso.c b/arch/um/sys-x86_64/vdso/um_vdso.c
deleted file mode 100644 (file)
index 7c441b5..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This vDSO turns all calls into a syscall so that UML can trap them.
- */
-
-
-/* Disable profiling for userspace code */
-#define DISABLE_BRANCH_PROFILING
-
-#include <linux/time.h>
-#include <linux/getcpu.h>
-#include <asm/unistd.h>
-
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
-{
-       long ret;
-
-       asm("syscall" : "=a" (ret) :
-               "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
-
-       return ret;
-}
-int clock_gettime(clockid_t, struct timespec *)
-       __attribute__((weak, alias("__vdso_clock_gettime")));
-
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       long ret;
-
-       asm("syscall" : "=a" (ret) :
-               "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
-
-       return ret;
-}
-int gettimeofday(struct timeval *, struct timezone *)
-       __attribute__((weak, alias("__vdso_gettimeofday")));
-
-time_t __vdso_time(time_t *t)
-{
-       long secs;
-
-       asm volatile("syscall"
-               : "=a" (secs)
-               : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
-
-       return secs;
-}
-int time(time_t *t) __attribute__((weak, alias("__vdso_time")));
-
-long
-__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
-{
-       /*
-        * UML does not support SMP, we can cheat here. :)
-        */
-
-       if (cpu)
-               *cpu = 0;
-       if (node)
-               *node = 0;
-
-       return 0;
-}
-
-long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
-       __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/um/sys-x86_64/vdso/vdso-layout.lds.S b/arch/um/sys-x86_64/vdso/vdso-layout.lds.S
deleted file mode 100644 (file)
index 634a2cf..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Linker script for vDSO.  This is an ELF shared object prelinked to
- * its virtual address, and with only one read-only segment.
- * This script controls its layout.
- */
-
-SECTIONS
-{
-       . = VDSO_PRELINK + SIZEOF_HEADERS;
-
-       .hash           : { *(.hash) }                  :text
-       .gnu.hash       : { *(.gnu.hash) }
-       .dynsym         : { *(.dynsym) }
-       .dynstr         : { *(.dynstr) }
-       .gnu.version    : { *(.gnu.version) }
-       .gnu.version_d  : { *(.gnu.version_d) }
-       .gnu.version_r  : { *(.gnu.version_r) }
-
-       .note           : { *(.note.*) }                :text   :note
-
-       .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
-       .eh_frame       : { KEEP (*(.eh_frame)) }       :text
-
-       .dynamic        : { *(.dynamic) }               :text   :dynamic
-
-       .rodata         : { *(.rodata*) }               :text
-       .data           : {
-             *(.data*)
-             *(.sdata*)
-             *(.got.plt) *(.got)
-             *(.gnu.linkonce.d.*)
-             *(.bss*)
-             *(.dynbss*)
-             *(.gnu.linkonce.b.*)
-       }
-
-       .altinstructions        : { *(.altinstructions) }
-       .altinstr_replacement   : { *(.altinstr_replacement) }
-
-       /*
-        * Align the actual code well away from the non-instruction data.
-        * This is the best thing for the I-cache.
-        */
-       . = ALIGN(0x100);
-
-       .text           : { *(.text*) }                 :text   =0x90909090
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME        0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-       text            PT_LOAD         FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-       dynamic         PT_DYNAMIC      FLAGS(4);               /* PF_R */
-       note            PT_NOTE         FLAGS(4);               /* PF_R */
-       eh_frame_hdr    PT_GNU_EH_FRAME;
-}
diff --git a/arch/um/sys-x86_64/vdso/vdso-note.S b/arch/um/sys-x86_64/vdso/vdso-note.S
deleted file mode 100644 (file)
index 79a071e..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-       .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/um/sys-x86_64/vdso/vdso.S b/arch/um/sys-x86_64/vdso/vdso.S
deleted file mode 100644 (file)
index ec82c16..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#include <linux/init.h>
-
-__INITDATA
-
-       .globl vdso_start, vdso_end
-vdso_start:
-       .incbin "arch/um/sys-x86_64/vdso/vdso.so"
-vdso_end:
-
-__FINIT
diff --git a/arch/um/sys-x86_64/vdso/vdso.lds.S b/arch/um/sys-x86_64/vdso/vdso.lds.S
deleted file mode 100644 (file)
index b96b267..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Linker script for 64-bit vDSO.
- * We #include the file to define the layout details.
- * Here we only choose the prelinked virtual address.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.  We can define local symbols here called VDSO* to make their
- * values visible using the asm-x86/vdso.h macros from the kernel proper.
- */
-
-#define VDSO_PRELINK 0xffffffffff700000
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-       LINUX_2.6 {
-       global:
-               clock_gettime;
-               __vdso_clock_gettime;
-               gettimeofday;
-               __vdso_gettimeofday;
-               getcpu;
-               __vdso_getcpu;
-               time;
-               __vdso_time;
-       local: *;
-       };
-}
-
-VDSO64_PRELINK = VDSO_PRELINK;
diff --git a/arch/um/sys-x86_64/vdso/vma.c b/arch/um/sys-x86_64/vdso/vma.c
deleted file mode 100644 (file)
index 9495c8d..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/page.h>
-#include <linux/init.h>
-
-unsigned int __read_mostly vdso_enabled = 1;
-unsigned long um_vdso_addr;
-
-extern unsigned long task_size;
-extern char vdso_start[], vdso_end[];
-
-static struct page **vdsop;
-
-static int __init init_vdso(void)
-{
-       struct page *um_vdso;
-
-       BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
-
-       um_vdso_addr = task_size - PAGE_SIZE;
-
-       vdsop = kmalloc(GFP_KERNEL, sizeof(struct page *));
-       if (!vdsop)
-               goto oom;
-
-       um_vdso = alloc_page(GFP_KERNEL);
-       if (!um_vdso) {
-               kfree(vdsop);
-
-               goto oom;
-       }
-
-       copy_page(page_address(um_vdso), vdso_start);
-       *vdsop = um_vdso;
-
-       return 0;
-
-oom:
-       printk(KERN_ERR "Cannot allocate vdso\n");
-       vdso_enabled = 0;
-
-       return -ENOMEM;
-}
-subsys_initcall(init_vdso);
-
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
-       int err;
-       struct mm_struct *mm = current->mm;
-
-       if (!vdso_enabled)
-               return 0;
-
-       down_write(&mm->mmap_sem);
-
-       err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
-               VM_READ|VM_EXEC|
-               VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-               VM_ALWAYSDUMP,
-               vdsop);
-
-       up_write(&mm->mmap_sem);
-
-       return err;
-}
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
new file mode 100644 (file)
index 0000000..36ddec6
--- /dev/null
@@ -0,0 +1,61 @@
+core-y += arch/x86/crypto/
+
+ifeq ($(CONFIG_X86_32),y)
+START := 0x8048000
+
+LDFLAGS                        += -m elf_i386
+ELF_ARCH               := i386
+ELF_FORMAT             := elf32-i386
+CHECKFLAGS     += -D__i386__
+
+ifeq ("$(origin SUBARCH)", "command line")
+ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
+KBUILD_CFLAGS          += $(call cc-option,-m32)
+KBUILD_AFLAGS          += $(call cc-option,-m32)
+LINK-y                 += $(call cc-option,-m32)
+
+export LDFLAGS
+endif
+endif
+
+# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
+include $(srctree)/arch/x86/Makefile_32.cpu
+
+# prevent gcc from keeping the stack 16 byte aligned. Taken from i386.
+cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
+
+# Prevent sprintf in nfsd from being converted to strcpy and resulting in
+# an unresolved reference.
+cflags-y += -ffreestanding
+
+# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
+# a lot more stack due to the lack of sharing of stacklots.  Also, gcc
+# 4.3.0 needs -funit-at-a-time for extern inline functions.
+KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+                       echo $(call cc-option,-fno-unit-at-a-time); \
+                       else echo $(call cc-option,-funit-at-a-time); fi ;)
+
+KBUILD_CFLAGS += $(cflags-y)
+
+else
+
+START := 0x60000000
+
+KBUILD_CFLAGS += -fno-builtin -m64 
+
+CHECKFLAGS  += -m64 -D__x86_64__
+KBUILD_AFLAGS += -m64
+LDFLAGS += -m elf_x86_64
+KBUILD_CPPFLAGS += -m64
+
+ELF_ARCH := i386:x86-64
+ELF_FORMAT := elf64-x86-64
+
+# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
+
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
+LINK-y += -m64
+
+# Do unit-at-a-time unconditionally on x86_64, following the host
+KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
+endif
index 29f66793cc55894e0edd4b9c8692f9503cd53fe9..4420993acc4734c962922d58ee63e0b700cf330e 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_INTEL_SCU_IPC_H_
 #define  _ASM_X86_INTEL_SCU_IPC_H_
 
+#include <linux/notifier.h>
+
 #define IPCMSG_VRTC    0xFA     /* Set vRTC device */
 
 /* Command id associated with message IPCMSG_VRTC */
@@ -44,4 +46,24 @@ int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
 /* Update FW version */
 int intel_scu_ipc_fw_update(u8 *buffer, u32 length);
 
+extern struct blocking_notifier_head intel_scu_notifier;
+
+static inline void intel_scu_notifier_add(struct notifier_block *nb)
+{
+       blocking_notifier_chain_register(&intel_scu_notifier, nb);
+}
+
+static inline void intel_scu_notifier_remove(struct notifier_block *nb)
+{
+       blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
+}
+
+static inline int intel_scu_notifier_post(unsigned long v, void *p)
+{
+       return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
+}
+
+#define                SCU_AVAILABLE           1
+#define                SCU_DOWN                2
+
 #endif
index 7b5063a6ad422c72aae62565103f4263f5c2ae65..864830e1dd655839dec78d5b45ed2e27442305c8 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/debugfs.h>
-#include <linux/edac_mce.h>
 #include <linux/irq_work.h>
 
 #include <asm/processor.h>
@@ -144,23 +143,20 @@ static struct mce_log mcelog = {
 void mce_log(struct mce *mce)
 {
        unsigned next, entry;
+       int ret = 0;
 
        /* Emit the trace record: */
        trace_mce_record(mce);
 
+       ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+       if (ret == NOTIFY_STOP)
+               return;
+
        mce->finished = 0;
        wmb();
        for (;;) {
                entry = rcu_dereference_check_mce(mcelog.next);
                for (;;) {
-                       /*
-                        * If edac_mce is enabled, it will check the error type
-                        * and will process it, if it is a known error.
-                        * Otherwise, the error will be sent through mcelog
-                        * interface
-                        */
-                       if (edac_mce_parse(mce))
-                               return;
 
                        /*
                         * When the buffer fills up discard new entries.
@@ -556,10 +552,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
                 * Don't get the IP here because it's unlikely to
                 * have anything to do with the actual error location.
                 */
-               if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
+               if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
                        mce_log(&m);
-                       atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
-               }
 
                /*
                 * Clear state for this bank.
index dbe34b9313743f1cae72ac28aee5c99e5d2c9369..ea305856151cefc62fccd7f216519bc9d5f2945c 100644 (file)
@@ -108,16 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr)
        SetPageReferenced(page);
 }
 
-static inline void get_huge_page_tail(struct page *page)
-{
-       /*
-        * __split_huge_page_refcount() cannot run
-        * from under us.
-        */
-       VM_BUG_ON(atomic_read(&page->_count) < 0);
-       atomic_inc(&page->_count);
-}
-
 static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
index e6379526675ba1efd4d686d99d56358b68304564..6ed7afdaf4afa5e194ea75a43c08e84d20c5d915 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/mfd/intel_msic.h>
 
 #include <asm/setup.h>
 #include <asm/mpspec_def.h>
@@ -483,6 +485,128 @@ static void __init *no_platform_data(void *info)
        return NULL;
 }
 
+static struct resource msic_resources[] = {
+       {
+               .start  = INTEL_MSIC_IRQ_PHYS_BASE,
+               .end    = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static struct intel_msic_platform_data msic_pdata;
+
+static struct platform_device msic_device = {
+       .name           = "intel_msic",
+       .id             = -1,
+       .dev            = {
+               .platform_data  = &msic_pdata,
+       },
+       .num_resources  = ARRAY_SIZE(msic_resources),
+       .resource       = msic_resources,
+};
+
+static inline bool mrst_has_msic(void)
+{
+       return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL;
+}
+
+static int msic_scu_status_change(struct notifier_block *nb,
+                                 unsigned long code, void *data)
+{
+       if (code == SCU_DOWN) {
+               platform_device_unregister(&msic_device);
+               return 0;
+       }
+
+       return platform_device_register(&msic_device);
+}
+
+static int __init msic_init(void)
+{
+       static struct notifier_block msic_scu_notifier = {
+               .notifier_call  = msic_scu_status_change,
+       };
+
+       /*
+        * We need to be sure that the SCU IPC is ready before MSIC device
+        * can be registered.
+        */
+       if (mrst_has_msic())
+               intel_scu_notifier_add(&msic_scu_notifier);
+
+       return 0;
+}
+arch_initcall(msic_init);
+
+/*
+ * msic_generic_platform_data - sets generic platform data for the block
+ * @info: pointer to the SFI device table entry for this block
+ * @block: MSIC block
+ *
+ * Function sets IRQ number from the SFI table entry for given device to
+ * the MSIC platform data.
+ */
+static void *msic_generic_platform_data(void *info, enum intel_msic_block block)
+{
+       struct sfi_device_table_entry *entry = info;
+
+       BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
+       msic_pdata.irq[block] = entry->irq;
+
+       return no_platform_data(info);
+}
+
+static void *msic_battery_platform_data(void *info)
+{
+       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
+}
+
+static void *msic_gpio_platform_data(void *info)
+{
+       static struct intel_msic_gpio_pdata pdata;
+       int gpio = get_gpio_by_name("msic_gpio_base");
+
+       if (gpio < 0)
+               return NULL;
+
+       pdata.gpio_base = gpio;
+       msic_pdata.gpio = &pdata;
+
+       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
+}
+
+static void *msic_audio_platform_data(void *info)
+{
+       struct platform_device *pdev;
+
+       pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
+       if (IS_ERR(pdev)) {
+               pr_err("failed to create audio platform device\n");
+               return NULL;
+       }
+
+       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
+}
+
+static void *msic_power_btn_platform_data(void *info)
+{
+       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
+}
+
+static void *msic_ocd_platform_data(void *info)
+{
+       static struct intel_msic_ocd_pdata pdata;
+       int gpio = get_gpio_by_name("ocd_gpio");
+
+       if (gpio < 0)
+               return NULL;
+
+       pdata.gpio = gpio;
+       msic_pdata.ocd = &pdata;
+
+       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
+}
+
 static const struct devs_id __initconst device_ids[] = {
        {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
        {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
@@ -491,7 +615,14 @@ static const struct devs_id __initconst device_ids[] = {
        {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
        {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
        {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
-       {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+
+       /* MSIC subdevices */
+       {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
+       {"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data},
+       {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
+       {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
+       {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
+
        {},
 };
 
@@ -558,6 +689,9 @@ static void __init intel_scu_i2c_device_register(int bus,
        i2c_devs[i2c_next_dev++] = new_dev;
 }
 
+BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
+EXPORT_SYMBOL_GPL(intel_scu_notifier);
+
 /* Called by IPC driver */
 void intel_scu_devices_create(void)
 {
@@ -582,6 +716,7 @@ void intel_scu_devices_create(void)
                } else
                        i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
        }
+       intel_scu_notifier_post(SCU_AVAILABLE, 0L);
 }
 EXPORT_SYMBOL_GPL(intel_scu_devices_create);
 
@@ -590,6 +725,8 @@ void intel_scu_devices_destroy(void)
 {
        int i;
 
+       intel_scu_notifier_post(SCU_DOWN, 0L);
+
        for (i = 0; i < ipc_next_dev; i++)
                platform_device_del(ipc_devs[i]);
 }
@@ -606,19 +743,37 @@ static void __init install_irq_resource(struct platform_device *pdev, int irq)
        platform_device_add_resources(pdev, &res, 1);
 }
 
-static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
+static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
 {
        const struct devs_id *dev = device_ids;
+       struct platform_device *pdev;
        void *pdata = NULL;
 
        while (dev->name[0]) {
                if (dev->type == SFI_DEV_TYPE_IPC &&
-                       !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
-                       pdata = dev->get_platform_data(pdev);
+                       !strncmp(dev->name, entry->name, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(entry);
                        break;
                }
                dev++;
        }
+
+       /*
+        * On Medfield the platform device creation is handled by the MSIC
+        * MFD driver so we don't need to do it here.
+        */
+       if (mrst_has_msic())
+               return;
+
+       /* ID as IRQ is a hack that will go away */
+       pdev = platform_device_alloc(entry->name, entry->irq);
+       if (pdev == NULL) {
+               pr_err("out of memory for SFI platform device '%s'.\n",
+                       entry->name);
+               return;
+       }
+       install_irq_resource(pdev, entry->irq);
+
        pdev->dev.platform_data = pdata;
        intel_scu_device_register(pdev);
 }
@@ -671,7 +826,6 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
        struct sfi_device_table_entry *pentry;
        struct spi_board_info spi_info;
        struct i2c_board_info i2c_info;
-       struct platform_device *pdev;
        int num, i, bus;
        int ioapic;
        struct io_apic_irq_attr irq_attr;
@@ -699,17 +853,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 
                switch (pentry->type) {
                case SFI_DEV_TYPE_IPC:
-                       /* ID as IRQ is a hack that will go away */
-                       pdev = platform_device_alloc(pentry->name, irq);
-                       if (pdev == NULL) {
-                               pr_err("out of memory for SFI platform device '%s'.\n",
-                                                       pentry->name);
-                               continue;
-                       }
-                       install_irq_resource(pdev, irq);
                        pr_debug("info[%2d]: IPC bus, name = %16.16s, "
-                               "irq = 0x%2x\n", i, pentry->name, irq);
-                       sfi_handle_ipc_dev(pdev);
+                               "irq = 0x%2x\n", i, pentry->name, pentry->irq);
+                       sfi_handle_ipc_dev(pentry);
                        break;
                case SFI_DEV_TYPE_SPI:
                        memset(&spi_info, 0, sizeof(spi_info));
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
new file mode 100644 (file)
index 0000000..21bebe6
--- /dev/null
@@ -0,0 +1,70 @@
+mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration"
+
+source "arch/um/Kconfig.common"
+
+menu "UML-specific options"
+
+menu "Host processor type and features"
+
+config CMPXCHG_LOCAL
+       bool
+       default n
+
+config CMPXCHG_DOUBLE
+       bool
+       default n
+
+source "arch/x86/Kconfig.cpu"
+
+endmenu
+
+config UML_X86
+       def_bool y
+       select GENERIC_FIND_FIRST_BIT
+
+config 64BIT
+       bool
+       default SUBARCH = "x86_64"
+
+config X86_32
+       def_bool !64BIT
+       select HAVE_AOUT
+
+config X86_64
+       def_bool 64BIT
+
+config RWSEM_XCHGADD_ALGORITHM
+       def_bool X86_XADD && 64BIT
+
+config RWSEM_GENERIC_SPINLOCK
+       def_bool !RWSEM_XCHGADD_ALGORITHM
+
+config 3_LEVEL_PGTABLES
+       bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT
+       default 64BIT
+       depends on EXPERIMENTAL
+       help
+       Three-level pagetables will let UML have more than 4G of physical
+       memory.  All the memory that can't be mapped directly will be treated
+       as high memory.
+
+       However, this it experimental on 32-bit architectures, so if unsure say
+       N (on x86-64 it's automatically enabled, instead, as it's safe there).
+
+config ARCH_HAS_SC_SIGNALS
+       def_bool !64BIT
+
+config ARCH_REUSE_HOST_VSYSCALL_AREA
+       def_bool !64BIT
+
+config SMP_BROKEN
+       def_bool 64BIT
+
+config GENERIC_HWEIGHT
+       def_bool y
+
+source "arch/um/Kconfig.um"
+
+endmenu
+
+source "arch/um/Kconfig.rest"
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
new file mode 100644 (file)
index 0000000..8fb5840
--- /dev/null
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+#
+
+ifeq ($(CONFIG_X86_32),y)
+       BITS := 32
+else
+       BITS := 64
+endif
+
+obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
+       ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
+       stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \
+       sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
+       mem_$(BITS).o subarch.o os-$(OS)/
+
+ifeq ($(CONFIG_X86_32),y)
+
+obj-y += checksum_32.o
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+
+subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
+subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
+subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
+
+else
+
+obj-y += vdso/
+
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
+               ../lib/rwsem.o
+
+endif
+
+subarch-$(CONFIG_MODULES) += ../kernel/module.o
+
+USER_OBJS := bugs_$(BITS).o ptrace_user.o fault.o
+
+extra-y += user-offsets.s
+$(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS)
+
+UNPROFILE_OBJS := stub_segv.o
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
+include arch/um/scripts/Makefile.rules
diff --git a/arch/x86/um/asm/apic.h b/arch/x86/um/asm/apic.h
new file mode 100644 (file)
index 0000000..876dee8
--- /dev/null
@@ -0,0 +1,4 @@
+#ifndef __UM_APIC_H
+#define __UM_APIC_H
+
+#endif
diff --git a/arch/x86/um/asm/arch_hweight.h b/arch/x86/um/asm/arch_hweight.h
new file mode 100644 (file)
index 0000000..c656cf4
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _ASM_UM_HWEIGHT_H
+#define _ASM_UM_HWEIGHT_H
+
+#include <asm-generic/bitops/arch_hweight.h>
+
+#endif
diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h
new file mode 100644 (file)
index 0000000..c17cf68
--- /dev/null
@@ -0,0 +1,20 @@
+/* 
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright 2003 PathScale, Inc.
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_ARCHPARAM_H
+#define __UM_ARCHPARAM_H
+
+#ifdef CONFIG_X86_32
+
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
new file mode 100644 (file)
index 0000000..b6efe23
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __UM_CHECKSUM_H
+#define __UM_CHECKSUM_H
+
+#ifdef CONFIG_X86_32
+# include "checksum_32.h"
+#else
+# include "checksum_64.h"
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
new file mode 100644 (file)
index 0000000..caab742
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_SYSDEP_CHECKSUM_H
+#define __UM_SYSDEP_CHECKSUM_H
+
+#include "linux/in6.h"
+#include "linux/string.h"
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ *     Note: when you get a NULL pointer exception here this means someone
+ *     passed in an incorrect kernel address to one of these functions.
+ *
+ *     If you use these functions directly please don't forget the
+ *     access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+                                      int len, __wsum sum)
+{
+       memcpy(dst, src, len);
+       return csum_partial(dst, len, sum);
+}
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+                                        int len, __wsum sum, int *err_ptr)
+{
+       if (copy_from_user(dst, src, len)) {
+               *err_ptr = -EFAULT;
+               return (__force __wsum)-1;
+       }
+
+       return csum_partial(dst, len, sum);
+}
+
+/*
+ *     This is a version of ip_compute_csum() optimized for IP headers,
+ *     which always checksum on 4 octet boundaries.
+ *
+ *     By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
+ *     Arnt Gulbrandsen.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+       unsigned int sum;
+
+       __asm__ __volatile__(
+           "movl (%1), %0      ;\n"
+           "subl $4, %2        ;\n"
+           "jbe 2f             ;\n"
+           "addl 4(%1), %0     ;\n"
+           "adcl 8(%1), %0     ;\n"
+           "adcl 12(%1), %0    ;\n"
+"1:        adcl 16(%1), %0     ;\n"
+           "lea 4(%1), %1      ;\n"
+           "decl %2            ;\n"
+           "jne 1b             ;\n"
+           "adcl $0, %0        ;\n"
+           "movl %0, %2        ;\n"
+           "shrl $16, %0       ;\n"
+           "addw %w2, %w0      ;\n"
+           "adcl $0, %0        ;\n"
+           "notl %0            ;\n"
+"2:                            ;\n"
+       /* Since the input registers which are loaded with iph and ipl
+          are modified, we must also specify them as outputs, or gcc
+          will assume they contain their original values. */
+       : "=r" (sum), "=r" (iph), "=r" (ihl)
+       : "1" (iph), "2" (ihl)
+       : "memory");
+       return (__force __sum16)sum;
+}
+
+/*
+ *     Fold a partial checksum
+ */
+
+static inline __sum16 csum_fold(__wsum sum)
+{
+       __asm__(
+               "addl %1, %0            ;\n"
+               "adcl $0xffff, %0       ;\n"
+               : "=r" (sum)
+               : "r" ((__force u32)sum << 16),
+                 "0" ((__force u32)sum & 0xffff0000)
+       );
+       return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+                                                  unsigned short len,
+                                                  unsigned short proto,
+                                                  __wsum sum)
+{
+    __asm__(
+       "addl %1, %0    ;\n"
+       "adcl %2, %0    ;\n"
+       "adcl %3, %0    ;\n"
+       "adcl $0, %0    ;\n"
+       : "=r" (sum)
+       : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum));
+    return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+                                                  unsigned short len,
+                                                  unsigned short proto,
+                                                  __wsum sum)
+{
+       return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+    return csum_fold (csum_partial(buff, len, 0));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+                                         const struct in6_addr *daddr,
+                                         __u32 len, unsigned short proto,
+                                         __wsum sum)
+{
+       __asm__(
+               "addl 0(%1), %0         ;\n"
+               "adcl 4(%1), %0         ;\n"
+               "adcl 8(%1), %0         ;\n"
+               "adcl 12(%1), %0        ;\n"
+               "adcl 0(%2), %0         ;\n"
+               "adcl 4(%2), %0         ;\n"
+               "adcl 8(%2), %0         ;\n"
+               "adcl 12(%2), %0        ;\n"
+               "adcl %3, %0            ;\n"
+               "adcl %4, %0            ;\n"
+               "adcl $0, %0            ;\n"
+               : "=&r" (sum)
+               : "r" (saddr), "r" (daddr),
+                 "r"(htonl(len)), "r"(htonl(proto)), "0"(sum));
+
+       return csum_fold(sum);
+}
+
+/*
+ *     Copy and checksum to user
+ */
+#define HAVE_CSUM_COPY_USER
+static __inline__ __wsum csum_and_copy_to_user(const void *src,
+                                                    void __user *dst,
+                                                    int len, __wsum sum, int *err_ptr)
+{
+       if (access_ok(VERIFY_WRITE, dst, len)) {
+               if (copy_to_user(dst, src, len)) {
+                       *err_ptr = -EFAULT;
+                       return (__force __wsum)-1;
+               }
+
+               return csum_partial(src, len, sum);
+       }
+
+       if (len)
+               *err_ptr = -EFAULT;
+
+       return (__force __wsum)-1; /* invalid checksum */
+}
+
+#endif
+
diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h
new file mode 100644 (file)
index 0000000..a5be903
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_SYSDEP_CHECKSUM_H
+#define __UM_SYSDEP_CHECKSUM_H
+
+#include "linux/string.h"
+#include "linux/in6.h"
+#include "asm/uaccess.h"
+
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ *     Note: when you get a NULL pointer exception here this means someone
+ *     passed in an incorrect kernel address to one of these functions.
+ *
+ *     If you use these functions directly please don't forget the
+ *     access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+                                      int len, __wsum sum)
+{
+       memcpy(dst, src, len);
+       return(csum_partial(dst, len, sum));
+}
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src,
+                                         void *dst, int len, __wsum sum,
+                                         int *err_ptr)
+{
+        if (copy_from_user(dst, src, len)) {
+                *err_ptr = -EFAULT;
+                return (__force __wsum)-1;
+        }
+        return csum_partial(dst, len, sum);
+}
+
+/**
+ * csum_fold - Fold and invert a 32bit checksum.
+ * sum: 32bit unfolded sum
+ *
+ * Fold a 32bit running checksum to 16bit and invert it. This is usually
+ * the last step before putting a checksum into a packet.
+ * Make sure not to mix with 64bit checksums.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+       __asm__(
+               "  addl %1,%0\n"
+               "  adcl $0xffff,%0"
+               : "=r" (sum)
+               : "r" ((__force u32)sum << 16),
+                 "0" ((__force u32)sum & 0xffff0000)
+       );
+       return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+/**
+ * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
+ * @saddr: source address
+ * @daddr: destination address
+ * @len: length of packet
+ * @proto: ip protocol of packet
+ * @sum: initial sum to be added in (32bit unfolded)
+ *
+ * Returns the pseudo header checksum the input data. Result is
+ * 32bit unfolded.
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+                  unsigned short proto, __wsum sum)
+{
+       asm("  addl %1, %0\n"
+           "  adcl %2, %0\n"
+           "  adcl %3, %0\n"
+           "  adcl $0, %0\n"
+               : "=r" (sum)
+           : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
+       return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+                                          unsigned short len,
+                                          unsigned short proto,
+                                          __wsum sum)
+{
+       return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/**
+ * ip_fast_csum - Compute the IPv4 header checksum efficiently.
+ * iph: ipv4 header
+ * ihl: length of header / 4
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+       unsigned int sum;
+
+       asm(    "  movl (%1), %0\n"
+               "  subl $4, %2\n"
+               "  jbe 2f\n"
+               "  addl 4(%1), %0\n"
+               "  adcl 8(%1), %0\n"
+               "  adcl 12(%1), %0\n"
+               "1: adcl 16(%1), %0\n"
+               "  lea 4(%1), %1\n"
+               "  decl %2\n"
+               "  jne  1b\n"
+               "  adcl $0, %0\n"
+               "  movl %0, %2\n"
+               "  shrl $16, %0\n"
+               "  addw %w2, %w0\n"
+               "  adcl $0, %0\n"
+               "  notl %0\n"
+               "2:"
+       /* Since the input registers which are loaded with iph and ipl
+          are modified, we must also specify them as outputs, or gcc
+          will assume they contain their original values. */
+       : "=r" (sum), "=r" (iph), "=r" (ihl)
+       : "1" (iph), "2" (ihl)
+       : "memory");
+       return (__force __sum16)sum;
+}
+
+static inline unsigned add32_with_carry(unsigned a, unsigned b)
+{
+        asm("addl %2,%0\n\t"
+            "adcl $0,%0"
+            : "=r" (a)
+            : "0" (a), "r" (b));
+        return a;
+}
+
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif
diff --git a/arch/x86/um/asm/desc.h b/arch/x86/um/asm/desc.h
new file mode 100644 (file)
index 0000000..4ec34a5
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef __UM_DESC_H
+#define __UM_DESC_H
+
+/* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't
+ * compile, and has never been used. */
+#define LDT_empty(info) (\
+       (info)->base_addr       == 0    && \
+       (info)->limit           == 0    && \
+       (info)->contents        == 0    && \
+       (info)->read_exec_only  == 1    && \
+       (info)->seg_32bit       == 0    && \
+       (info)->limit_in_pages  == 0    && \
+       (info)->seg_not_present == 1    && \
+       (info)->useable         == 0    )
+
+#endif
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
new file mode 100644 (file)
index 0000000..f3b0633
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+#ifndef __UM_ELF_X86_H
+#define __UM_ELF_X86_H
+
+#include <asm/user.h>
+#include "skas.h"
+
+#ifdef CONFIG_X86_32
+
+#define R_386_NONE     0
+#define R_386_32       1
+#define R_386_PC32     2
+#define R_386_GOT32    3
+#define R_386_PLT32    4
+#define R_386_COPY     5
+#define R_386_GLOB_DAT 6
+#define R_386_JMP_SLOT 7
+#define R_386_RELATIVE 8
+#define R_386_GOTOFF   9
+#define R_386_GOTPC    10
+#define R_386_NUM      11
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+       (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
+#define ELF_CLASS      ELFCLASS32
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_386
+
+#define ELF_PLAT_INIT(regs, load_addr) do { \
+       PT_REGS_EBX(regs) = 0; \
+       PT_REGS_ECX(regs) = 0; \
+       PT_REGS_EDX(regs) = 0; \
+       PT_REGS_ESI(regs) = 0; \
+       PT_REGS_EDI(regs) = 0; \
+       PT_REGS_EBP(regs) = 0; \
+       PT_REGS_EAX(regs) = 0; \
+} while (0)
+
+/* Shamelessly stolen from include/asm-i386/elf.h */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) do {  \
+       pr_reg[0] = PT_REGS_EBX(regs);          \
+       pr_reg[1] = PT_REGS_ECX(regs);          \
+       pr_reg[2] = PT_REGS_EDX(regs);          \
+       pr_reg[3] = PT_REGS_ESI(regs);          \
+       pr_reg[4] = PT_REGS_EDI(regs);          \
+       pr_reg[5] = PT_REGS_EBP(regs);          \
+       pr_reg[6] = PT_REGS_EAX(regs);          \
+       pr_reg[7] = PT_REGS_DS(regs);           \
+       pr_reg[8] = PT_REGS_ES(regs);           \
+       /* fake once used fs and gs selectors? */       \
+       pr_reg[9] = PT_REGS_DS(regs);           \
+       pr_reg[10] = PT_REGS_DS(regs);          \
+       pr_reg[11] = PT_REGS_SYSCALL_NR(regs);  \
+       pr_reg[12] = PT_REGS_IP(regs);          \
+       pr_reg[13] = PT_REGS_CS(regs);          \
+       pr_reg[14] = PT_REGS_EFLAGS(regs);      \
+       pr_reg[15] = PT_REGS_SP(regs);          \
+       pr_reg[16] = PT_REGS_SS(regs);          \
+} while (0);
+
+extern char * elf_aux_platform;
+#define ELF_PLATFORM (elf_aux_platform)
+
+extern unsigned long vsyscall_ehdr;
+extern unsigned long vsyscall_end;
+extern unsigned long __kernel_vsyscall;
+
+/*
+ * This is the range that is readable by user mode, and things
+ * acting like user mode such as get_user_pages.
+ */
+#define FIXADDR_USER_START      vsyscall_ehdr
+#define FIXADDR_USER_END        vsyscall_end
+
+
+/*
+ * Architecture-neutral AT_ values in 0-17, leave some room
+ * for more of them, start the x86-specific ones at 32.
+ */
+#define AT_SYSINFO             32
+#define AT_SYSINFO_EHDR                33
+
+#define ARCH_DLINFO                                            \
+do {                                                           \
+       if ( vsyscall_ehdr ) {                                  \
+               NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall);     \
+               NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr);    \
+       }                                                       \
+} while (0)
+
+#else
+
+/* x86-64 relocation types, taken from asm-x86_64/elf.h */
+#define R_X86_64_NONE          0       /* No reloc */
+#define R_X86_64_64            1       /* Direct 64 bit  */
+#define R_X86_64_PC32          2       /* PC relative 32 bit signed */
+#define R_X86_64_GOT32         3       /* 32 bit GOT entry */
+#define R_X86_64_PLT32         4       /* 32 bit PLT address */
+#define R_X86_64_COPY          5       /* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT      6       /* Create GOT entry */
+#define R_X86_64_JUMP_SLOT     7       /* Create PLT entry */
+#define R_X86_64_RELATIVE      8       /* Adjust by program base */
+#define R_X86_64_GOTPCREL      9       /* 32 bit signed pc relative
+                                          offset to GOT */
+#define R_X86_64_32            10      /* Direct 32 bit zero extended */
+#define R_X86_64_32S           11      /* Direct 32 bit sign extended */
+#define R_X86_64_16            12      /* Direct 16 bit zero extended */
+#define R_X86_64_PC16          13      /* 16 bit sign extended pc relative */
+#define R_X86_64_8             14      /* Direct 8 bit sign extended  */
+#define R_X86_64_PC8           15      /* 8 bit sign extended pc relative */
+
+#define R_X86_64_NUM           16
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+       ((x)->e_machine == EM_X86_64)
+
+#define ELF_CLASS      ELFCLASS64
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_X86_64
+
+#define ELF_PLAT_INIT(regs, load_addr)    do { \
+       PT_REGS_RBX(regs) = 0; \
+       PT_REGS_RCX(regs) = 0; \
+       PT_REGS_RDX(regs) = 0; \
+       PT_REGS_RSI(regs) = 0; \
+       PT_REGS_RDI(regs) = 0; \
+       PT_REGS_RBP(regs) = 0; \
+       PT_REGS_RAX(regs) = 0; \
+       PT_REGS_R8(regs) = 0; \
+       PT_REGS_R9(regs) = 0; \
+       PT_REGS_R10(regs) = 0; \
+       PT_REGS_R11(regs) = 0; \
+       PT_REGS_R12(regs) = 0; \
+       PT_REGS_R13(regs) = 0; \
+       PT_REGS_R14(regs) = 0; \
+       PT_REGS_R15(regs) = 0; \
+} while (0)
+
+#define ELF_CORE_COPY_REGS(pr_reg, _regs)              \
+       (pr_reg)[0] = (_regs)->regs.gp[0];                      \
+       (pr_reg)[1] = (_regs)->regs.gp[1];                      \
+       (pr_reg)[2] = (_regs)->regs.gp[2];                      \
+       (pr_reg)[3] = (_regs)->regs.gp[3];                      \
+       (pr_reg)[4] = (_regs)->regs.gp[4];                      \
+       (pr_reg)[5] = (_regs)->regs.gp[5];                      \
+       (pr_reg)[6] = (_regs)->regs.gp[6];                      \
+       (pr_reg)[7] = (_regs)->regs.gp[7];                      \
+       (pr_reg)[8] = (_regs)->regs.gp[8];                      \
+       (pr_reg)[9] = (_regs)->regs.gp[9];                      \
+       (pr_reg)[10] = (_regs)->regs.gp[10];                    \
+       (pr_reg)[11] = (_regs)->regs.gp[11];                    \
+       (pr_reg)[12] = (_regs)->regs.gp[12];                    \
+       (pr_reg)[13] = (_regs)->regs.gp[13];                    \
+       (pr_reg)[14] = (_regs)->regs.gp[14];                    \
+       (pr_reg)[15] = (_regs)->regs.gp[15];                    \
+       (pr_reg)[16] = (_regs)->regs.gp[16];                    \
+       (pr_reg)[17] = (_regs)->regs.gp[17];                    \
+       (pr_reg)[18] = (_regs)->regs.gp[18];                    \
+       (pr_reg)[19] = (_regs)->regs.gp[19];                    \
+       (pr_reg)[20] = (_regs)->regs.gp[20];                    \
+       (pr_reg)[21] = current->thread.arch.fs;                 \
+       (pr_reg)[22] = 0;                                       \
+       (pr_reg)[23] = 0;                                       \
+       (pr_reg)[24] = 0;                                       \
+       (pr_reg)[25] = 0;                                       \
+       (pr_reg)[26] = 0;
+
+#define ELF_PLATFORM "x86_64"
+
+/* No user-accessible fixmap addresses, i.e. vsyscall */
+#define FIXADDR_USER_START      0
+#define FIXADDR_USER_END        0
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+       int uses_interp);
+
+extern unsigned long um_vdso_addr;
+#define AT_SYSINFO_EHDR 33
+#define ARCH_DLINFO    NEW_AUX_ENT(AT_SYSINFO_EHDR, um_vdso_addr)
+
+#endif
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_i387_struct elf_fpregset_t;
+
+#define task_pt_regs(t) (&(t)->thread.regs)
+
+struct task_struct;
+
+extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
+
+#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
+
+#define ELF_EXEC_PAGESIZE 4096
+
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
+
+extern long elf_aux_hwcap;
+#define ELF_HWCAP (elf_aux_hwcap)
+
+#define SET_PERSONALITY(ex) do ; while(0)
+#define __HAVE_ARCH_GATE_AREA 1
+
+#endif
diff --git a/arch/x86/um/asm/irq_vectors.h b/arch/x86/um/asm/irq_vectors.h
new file mode 100644 (file)
index 0000000..272a81e
--- /dev/null
@@ -0,0 +1,10 @@
+/* 
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_IRQ_VECTORS_H
+#define __UM_IRQ_VECTORS_H
+
+#endif
+
diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h
new file mode 100644 (file)
index 0000000..4a73d63
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Licensed under the GPL
+ *
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ */
+
+#ifndef __ASM_LDT_H
+#define __ASM_LDT_H
+
+#include <linux/mutex.h>
+#include <asm/ldt.h>
+
+extern void ldt_host_info(void);
+
+#define LDT_PAGES_MAX \
+       ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
+#define LDT_ENTRIES_PER_PAGE \
+       (PAGE_SIZE/LDT_ENTRY_SIZE)
+#define LDT_DIRECT_ENTRIES \
+       ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
+
+struct ldt_entry {
+       __u32 a;
+       __u32 b;
+};
+
+typedef struct uml_ldt {
+       int entry_count;
+       struct mutex lock;
+       union {
+               struct ldt_entry * pages[LDT_PAGES_MAX];
+               struct ldt_entry entries[LDT_DIRECT_ENTRIES];
+       } u;
+} uml_ldt_t;
+
+#define LDT_entry_a(info) \
+       ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+#define LDT_entry_b(info) \
+       (((info)->base_addr & 0xff000000) | \
+       (((info)->base_addr & 0x00ff0000) >> 16) | \
+       ((info)->limit & 0xf0000) | \
+       (((info)->read_exec_only ^ 1) << 9) | \
+       ((info)->contents << 10) | \
+       (((info)->seg_not_present ^ 1) << 15) | \
+       ((info)->seg_32bit << 22) | \
+       ((info)->limit_in_pages << 23) | \
+       ((info)->useable << 20) | \
+       0x7000)
+
+#define _LDT_empty(info) (\
+       (info)->base_addr       == 0    && \
+       (info)->limit           == 0    && \
+       (info)->contents        == 0    && \
+       (info)->read_exec_only  == 1    && \
+       (info)->seg_32bit       == 0    && \
+       (info)->limit_in_pages  == 0    && \
+       (info)->seg_not_present == 1    && \
+       (info)->useable         == 0    )
+
+#ifdef CONFIG_X86_64
+#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+#else
+#define LDT_empty(info) (_LDT_empty(info))
+#endif
+
+struct uml_arch_mm_context {
+       uml_ldt_t ldt;
+};
+
+#endif
diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h
new file mode 100644 (file)
index 0000000..61af80e
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef __UM_MODULE_H
+#define __UM_MODULE_H
+
+/* UML is simple */
+struct mod_arch_specific
+{
+};
+
+#ifdef CONFIG_X86_32
+
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+
+#else
+
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
+
+#endif
+
+#endif
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
new file mode 100644 (file)
index 0000000..118c143
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef __UM_PROCESSOR_H
+#define __UM_PROCESSOR_H
+
+/* include faultinfo structure */
+#include <sysdep/faultinfo.h>
+
+#ifdef CONFIG_X86_32
+# include "processor_32.h"
+#else
+# include "processor_64.h"
+#endif
+
+#define KSTK_EIP(tsk) KSTK_REG(tsk, HOST_IP)
+#define KSTK_ESP(tsk) KSTK_REG(tsk, HOST_IP)
+#define KSTK_EBP(tsk) KSTK_REG(tsk, HOST_BP)
+
+#define ARCH_IS_STACKGROW(address) \
+       (address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
+
+#include <asm/processor-generic.h>
+
+#endif
diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h
new file mode 100644 (file)
index 0000000..018f732
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PROCESSOR_I386_H
+#define __UM_PROCESSOR_I386_H
+
+#include <linux/string.h>
+#include <asm/segment.h>
+#include <asm/ldt.h>
+
+extern int host_has_cmov;
+
+struct uml_tls_struct {
+       struct user_desc tls;
+       unsigned flushed:1;
+       unsigned present:1;
+};
+
+struct arch_thread {
+       struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+       unsigned long debugregs[8];
+       int debugregs_seq;
+       struct faultinfo faultinfo;
+};
+
+#define INIT_ARCH_THREAD { \
+       .tls_array              = { [ 0 ... GDT_ENTRY_TLS_ENTRIES - 1 ] = \
+                                   { .present = 0, .flushed = 0 } }, \
+       .debugregs              = { [ 0 ... 7 ] = 0 }, \
+       .debugregs_seq          = 0, \
+       .faultinfo              = { 0, 0, 0 } \
+}
+
+static inline void arch_flush_thread(struct arch_thread *thread)
+{
+       /* Clear any TLS still hanging */
+       memset(&thread->tls_array, 0, sizeof(thread->tls_array));
+}
+
+static inline void arch_copy_thread(struct arch_thread *from,
+                                    struct arch_thread *to)
+{
+        memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
+}
+
+#include <asm/user.h>
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+       __asm__ __volatile__("rep;nop": : :"memory");
+}
+
+#define cpu_relax()    rep_nop()
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter"). Stolen
+ * from asm-i386/processor.h
+ */
+#define current_text_addr() \
+       ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+#endif
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
new file mode 100644 (file)
index 0000000..61de92d
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PROCESSOR_X86_64_H
+#define __UM_PROCESSOR_X86_64_H
+
+struct arch_thread {
+        unsigned long debugregs[8];
+        int debugregs_seq;
+        unsigned long fs;
+        struct faultinfo faultinfo;
+};
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+       __asm__ __volatile__("rep;nop": : :"memory");
+}
+
+#define cpu_relax()   rep_nop()
+
+#define INIT_ARCH_THREAD { .debugregs                  = { [ 0 ... 7 ] = 0 }, \
+                          .debugregs_seq       = 0, \
+                          .fs                  = 0, \
+                          .faultinfo           = { 0, 0, 0 } }
+
+static inline void arch_flush_thread(struct arch_thread *thread)
+{
+}
+
+static inline void arch_copy_thread(struct arch_thread *from,
+                                    struct arch_thread *to)
+{
+       to->fs = from->fs;
+}
+
+#include <asm/user.h>
+
+#define current_text_addr() \
+       ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
+
+#endif
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
new file mode 100644 (file)
index 0000000..c8aca8c
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef CONFIG_X86_32
+# include "ptrace_32.h"
+#else
+# include "ptrace_64.h"
+#endif
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
new file mode 100644 (file)
index 0000000..5d2a591
--- /dev/null
@@ -0,0 +1,51 @@
+/* 
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PTRACE_I386_H
+#define __UM_PTRACE_I386_H
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
+
+#include "linux/compiler.h"
+#include "asm/ptrace-generic.h"
+
+#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
+#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
+#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs)
+#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs)
+#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs)
+#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs)
+#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs)
+
+#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
+#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
+#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
+#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
+#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
+#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
+
+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
+
+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r)
+#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
+#define PT_FIX_EXEC_STACK(sp) do ; while(0)
+
+#define profile_pc(regs) PT_REGS_IP(regs)
+
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
+
+/*
+ * Forward declaration to avoid including sysdep/tls.h, which causes a
+ * circular include, and compilation failures.
+ */
+struct user_desc;
+
+extern int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+extern int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+#endif
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
new file mode 100644 (file)
index 0000000..706a0d8
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_PTRACE_X86_64_H
+#define __UM_PTRACE_X86_64_H
+
+#include "linux/compiler.h"
+#include "asm/errno.h"
+
+#define __FRAME_OFFSETS /* Needed to get the R* macros */
+#include "asm/ptrace-generic.h"
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
+
+#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs)
+#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs)
+#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs)
+#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs)
+#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs)
+#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs)
+#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs)
+#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
+#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
+#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
+#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
+#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
+#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
+#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
+#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
+
+#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
+#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
+#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
+#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
+#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
+#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
+
+#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
+#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
+#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
+
+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
+
+/* XXX */
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r)
+#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r)
+
+#define PT_FIX_EXEC_STACK(sp) do ; while(0)
+
+#define profile_pc(regs) PT_REGS_IP(regs)
+
+struct user_desc;
+
+static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+extern long arch_prctl(struct task_struct *task, int code,
+                      unsigned long __user *addr);
+#endif
diff --git a/arch/x86/um/asm/required-features.h b/arch/x86/um/asm/required-features.h
new file mode 100644 (file)
index 0000000..dfb967b
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __UM_REQUIRED_FEATURES_H
+#define __UM_REQUIRED_FEATURES_H
+
+/*
+ * Nothing to see, just need something for the i386 and x86_64 asm
+ * headers to include.
+ */
+
+#endif
diff --git a/arch/x86/um/asm/segment.h b/arch/x86/um/asm/segment.h
new file mode 100644 (file)
index 0000000..45183fc
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __UM_SEGMENT_H
+#define __UM_SEGMENT_H
+
+extern int host_gdt_entry_tls_min;
+
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define GDT_ENTRY_TLS_MIN host_gdt_entry_tls_min
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+
+#endif
diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h
new file mode 100644 (file)
index 0000000..a459fd9
--- /dev/null
@@ -0,0 +1,135 @@
+#ifndef _ASM_X86_SYSTEM_H_
+#define _ASM_X86_SYSTEM_H_
+
+#include <asm/asm.h>
+#include <asm/segment.h>
+#include <asm/cpufeature.h>
+#include <asm/cmpxchg.h>
+#include <asm/nops.h>
+
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+
+/* entries in ARCH_DLINFO: */
+#ifdef CONFIG_IA32_EMULATION
+# define AT_VECTOR_SIZE_ARCH 2
+#else
+# define AT_VECTOR_SIZE_ARCH 1
+#endif
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+void default_idle(void);
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+#ifdef CONFIG_X86_32
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
+#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#else
+#define mb()   asm volatile("mfence":::"memory")
+#define rmb()  asm volatile("lfence":::"memory")
+#define wmb()  asm volatile("sfence" ::: "memory")
+#endif
+
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *     CPU 0                           CPU 1
+ *
+ *     b = 2;
+ *     memory_barrier();
+ *     p = &b;                         q = p;
+ *                                     read_barrier_depends();
+ *                                     d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *     CPU 0                           CPU 1
+ *
+ *     a = 2;
+ *     memory_barrier();
+ *     b = 3;                          y = b;
+ *                                     read_barrier_depends();
+ *                                     x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ **/
+
+#define read_barrier_depends() do { } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb()       mb()
+#ifdef CONFIG_X86_PPRO_FENCE
+# define smp_rmb()     rmb()
+#else
+# define smp_rmb()     barrier()
+#endif
+#ifdef CONFIG_X86_OOSTORE
+# define smp_wmb()     wmb()
+#else
+# define smp_wmb()     barrier()
+#endif
+#define smp_read_barrier_depends()     read_barrier_depends()
+#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#else
+#define smp_mb()       barrier()
+#define smp_rmb()      barrier()
+#define smp_wmb()      barrier()
+#define smp_read_barrier_depends()     do { } while (0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+/*
+ * Stop RDTSC speculation. This is needed when you need to use RDTSC
+ * (or get_cycles or vread that possibly accesses the TSC) in a defined
+ * code region.
+ *
+ * (Could use an alternative three way for this if there was one.)
+ */
+static inline void rdtsc_barrier(void)
+{
+       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
+       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+}
+
+extern void *_switch_to(void *prev, void *next, void *last);
+#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
+
+#endif
diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h
new file mode 100644 (file)
index 0000000..7c297e9
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright 2003 PathScale, Inc.
+ * Licensed under the GPL
+ */
+
+#ifndef __VM_FLAGS_X86_H
+#define __VM_FLAGS_X86_H
+
+#ifdef CONFIG_X86_32
+
+#define VM_DATA_DEFAULT_FLAGS \
+       (VM_READ | VM_WRITE | \
+       ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#else
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+       VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \
+       VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#endif
+#endif
diff --git a/arch/x86/um/bug.c b/arch/x86/um/bug.c
new file mode 100644 (file)
index 0000000..e8034e3
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL V2
+ */
+
+#include <linux/uaccess.h>
+
+/*
+ * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because
+ * that's not relevant in skas mode.
+ */
+
+int is_valid_bugaddr(unsigned long eip)
+{
+       unsigned short ud2;
+
+       if (probe_kernel_address((unsigned short __user *)eip, ud2))
+               return 0;
+
+       return ud2 == 0x0b0f;
+}
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
new file mode 100644 (file)
index 0000000..a1fba5f
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+#include "kern_util.h"
+#include "longjmp.h"
+#include "sysdep/ptrace.h"
+#include <generated/asm-offsets.h>
+
+/* Set during early boot */
+static int host_has_cmov = 1;
+static jmp_buf cmov_test_return;
+
+#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
+
+static void cmov_sigill_test_handler(int sig)
+{
+       host_has_cmov = 0;
+       longjmp(cmov_test_return, 1);
+}
+
+void arch_check_bugs(void)
+{
+       struct sigaction old, new;
+
+       printk(UM_KERN_INFO "Checking for host processor cmov support...");
+       new.sa_handler = cmov_sigill_test_handler;
+
+       /* Make sure that SIGILL is enabled after the handler longjmps back */
+       new.sa_flags = SA_NODEFER;
+       sigemptyset(&new.sa_mask);
+       sigaction(SIGILL, &new, &old);
+
+       if (setjmp(cmov_test_return) == 0) {
+               unsigned long foo = 0;
+               __asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo));
+               printk(UM_KERN_CONT "Yes\n");
+       } else
+               printk(UM_KERN_CONT "No\n");
+
+       sigaction(SIGILL, &old, &new);
+}
+
+void arch_examine_signal(int sig, struct uml_pt_regs *regs)
+{
+       unsigned char tmp[2];
+
+       /*
+        * This is testing for a cmov (0x0f 0x4x) instruction causing a
+        * SIGILL in init.
+        */
+       if ((sig != SIGILL) || (TASK_PID(get_current()) != 1))
+               return;
+
+       if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
+               printk(UM_KERN_ERR "SIGILL in init, could not read "
+                      "instructions!\n");
+               return;
+       }
+
+       if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40))
+               return;
+
+       if (host_has_cmov == 0)
+               printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
+                      "processor doesn't implement.  Boot a filesystem "
+                      "compiled for older processors");
+       else if (host_has_cmov == 1)
+               printk(UM_KERN_ERR "SIGILL caused by cmov, which this "
+                      "processor claims to implement");
+       else
+               printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)",
+                       host_has_cmov);
+}
diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c
new file mode 100644 (file)
index 0000000..44e02ba
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include "sysdep/ptrace.h"
+
+void arch_check_bugs(void)
+{
+}
+
+void arch_examine_signal(int sig, struct uml_pt_regs *regs)
+{
+}
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
new file mode 100644 (file)
index 0000000..f058d2f
--- /dev/null
@@ -0,0 +1,458 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             IP/TCP/UDP checksumming routines
+ *
+ * Authors:    Jorge Cwik, <jorge@laser.satlink.net>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *             Lots of code moved from tcp.c and ip.c; see those files
+ *             for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *                          handling.
+ *             Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/errno.h>
+                               
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*     
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+               
+.text
+.align 4
+.globl csum_partial
+               
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+         /*            
+          * Experiments with Ethernet and SLIP connections show that buff
+          * is aligned on either a 2-byte or 4-byte boundary.  We get at
+          * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+          * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+          * alignment for the unrolled loop.
+          */           
+csum_partial:
+       pushl %esi
+       pushl %ebx
+       movl 20(%esp),%eax      # Function arg: unsigned int sum
+       movl 16(%esp),%ecx      # Function arg: int len
+       movl 12(%esp),%esi      # Function arg: unsigned char *buff
+       testl $2, %esi          # Check alignment.
+       jz 2f                   # Jump if alignment is ok.
+       subl $2, %ecx           # Alignment uses up two bytes.
+       jae 1f                  # Jump if we had at least two bytes.
+       addl $2, %ecx           # ecx was < 2.  Deal with it.
+       jmp 4f
+1:     movw (%esi), %bx
+       addl $2, %esi
+       addw %bx, %ax
+       adcl $0, %eax
+2:
+       movl %ecx, %edx
+       shrl $5, %ecx
+       jz 2f
+       testl %esi, %esi
+1:     movl (%esi), %ebx
+       adcl %ebx, %eax
+       movl 4(%esi), %ebx
+       adcl %ebx, %eax
+       movl 8(%esi), %ebx
+       adcl %ebx, %eax
+       movl 12(%esi), %ebx
+       adcl %ebx, %eax
+       movl 16(%esi), %ebx
+       adcl %ebx, %eax
+       movl 20(%esi), %ebx
+       adcl %ebx, %eax
+       movl 24(%esi), %ebx
+       adcl %ebx, %eax
+       movl 28(%esi), %ebx
+       adcl %ebx, %eax
+       lea 32(%esi), %esi
+       dec %ecx
+       jne 1b
+       adcl $0, %eax
+2:     movl %edx, %ecx
+       andl $0x1c, %edx
+       je 4f
+       shrl $2, %edx           # This clears CF
+3:     adcl (%esi), %eax
+       lea 4(%esi), %esi
+       dec %edx
+       jne 3b
+       adcl $0, %eax
+4:     andl $3, %ecx
+       jz 7f
+       cmpl $2, %ecx
+       jb 5f
+       movw (%esi),%cx
+       leal 2(%esi),%esi
+       je 6f
+       shll $16,%ecx
+5:     movb (%esi),%cl
+6:     addl %ecx,%eax
+       adcl $0, %eax 
+7:     
+       popl %ebx
+       popl %esi
+       ret
+
+#else
+
+/* Version for PentiumII/PPro */
+
+csum_partial:
+       pushl %esi
+       pushl %ebx
+       movl 20(%esp),%eax      # Function arg: unsigned int sum
+       movl 16(%esp),%ecx      # Function arg: int len
+       movl 12(%esp),%esi      # Function arg: const unsigned char *buf
+
+       testl $2, %esi         
+       jnz 30f                 
+10:
+       movl %ecx, %edx
+       movl %ecx, %ebx
+       andl $0x7c, %ebx
+       shrl $7, %ecx
+       addl %ebx,%esi
+       shrl $2, %ebx  
+       negl %ebx
+       lea 45f(%ebx,%ebx,2), %ebx
+       testl %esi, %esi
+       jmp *%ebx
+
+       # Handle 2-byte-aligned regions
+20:    addw (%esi), %ax
+       lea 2(%esi), %esi
+       adcl $0, %eax
+       jmp 10b
+
+30:    subl $2, %ecx          
+       ja 20b                 
+       je 32f
+       movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
+       addl %ebx, %eax
+       adcl $0, %eax
+       jmp 80f
+32:
+       addw (%esi), %ax        # csumming 2 bytes, 2-aligned
+       adcl $0, %eax
+       jmp 80f
+
+40: 
+       addl -128(%esi), %eax
+       adcl -124(%esi), %eax
+       adcl -120(%esi), %eax
+       adcl -116(%esi), %eax   
+       adcl -112(%esi), %eax   
+       adcl -108(%esi), %eax
+       adcl -104(%esi), %eax
+       adcl -100(%esi), %eax
+       adcl -96(%esi), %eax
+       adcl -92(%esi), %eax
+       adcl -88(%esi), %eax
+       adcl -84(%esi), %eax
+       adcl -80(%esi), %eax
+       adcl -76(%esi), %eax
+       adcl -72(%esi), %eax
+       adcl -68(%esi), %eax
+       adcl -64(%esi), %eax     
+       adcl -60(%esi), %eax     
+       adcl -56(%esi), %eax     
+       adcl -52(%esi), %eax   
+       adcl -48(%esi), %eax   
+       adcl -44(%esi), %eax
+       adcl -40(%esi), %eax
+       adcl -36(%esi), %eax
+       adcl -32(%esi), %eax
+       adcl -28(%esi), %eax
+       adcl -24(%esi), %eax
+       adcl -20(%esi), %eax
+       adcl -16(%esi), %eax
+       adcl -12(%esi), %eax
+       adcl -8(%esi), %eax
+       adcl -4(%esi), %eax
+45:
+       lea 128(%esi), %esi
+       adcl $0, %eax
+       dec %ecx
+       jge 40b
+       movl %edx, %ecx
+50:    andl $3, %ecx
+       jz 80f
+
+       # Handle the last 1-3 bytes without jumping
+       notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
+       movl $0xffffff,%ebx     # by the shll and shrl instructions
+       shll $3,%ecx
+       shrl %cl,%ebx
+       andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
+       addl %ebx,%eax
+       adcl $0,%eax
+80: 
+       popl %ebx
+       popl %esi
+       ret
+                               
+#endif
+
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+                                 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */ 
+
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ *       DST definitions? It's damn hard to trigger all cases.  I hope I got
+ *       them all but there's no guarantee.
+ */
+
+#define SRC(y...)                      \
+       9999: y;                        \
+       .section __ex_table, "a";       \
+       .long 9999b, 6001f      ;       \
+       .previous
+
+#define DST(y...)                      \
+       9999: y;                        \
+       .section __ex_table, "a";       \
+       .long 9999b, 6002f      ;       \
+       .previous
+
+.align 4
+
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+#define ARGBASE 16             
+#define FP             12
+
+csum_partial_copy_generic_i386:
+       subl  $4,%esp   
+       pushl %edi
+       pushl %esi
+       pushl %ebx
+       movl ARGBASE+16(%esp),%eax      # sum
+       movl ARGBASE+12(%esp),%ecx      # len
+       movl ARGBASE+4(%esp),%esi       # src
+       movl ARGBASE+8(%esp),%edi       # dst
+
+       testl $2, %edi                  # Check alignment. 
+       jz 2f                           # Jump if alignment is ok.
+       subl $2, %ecx                   # Alignment uses up two bytes.
+       jae 1f                          # Jump if we had at least two bytes.
+       addl $2, %ecx                   # ecx was < 2.  Deal with it.
+       jmp 4f
+SRC(1: movw (%esi), %bx        )
+       addl $2, %esi
+DST(   movw %bx, (%edi)        )
+       addl $2, %edi
+       addw %bx, %ax   
+       adcl $0, %eax
+2:
+       movl %ecx, FP(%esp)
+       shrl $5, %ecx
+       jz 2f
+       testl %esi, %esi
+SRC(1: movl (%esi), %ebx       )
+SRC(   movl 4(%esi), %edx      )
+       adcl %ebx, %eax
+DST(   movl %ebx, (%edi)       )
+       adcl %edx, %eax
+DST(   movl %edx, 4(%edi)      )
+
+SRC(   movl 8(%esi), %ebx      )
+SRC(   movl 12(%esi), %edx     )
+       adcl %ebx, %eax
+DST(   movl %ebx, 8(%edi)      )
+       adcl %edx, %eax
+DST(   movl %edx, 12(%edi)     )
+
+SRC(   movl 16(%esi), %ebx     )
+SRC(   movl 20(%esi), %edx     )
+       adcl %ebx, %eax
+DST(   movl %ebx, 16(%edi)     )
+       adcl %edx, %eax
+DST(   movl %edx, 20(%edi)     )
+
+SRC(   movl 24(%esi), %ebx     )
+SRC(   movl 28(%esi), %edx     )
+       adcl %ebx, %eax
+DST(   movl %ebx, 24(%edi)     )
+       adcl %edx, %eax
+DST(   movl %edx, 28(%edi)     )
+
+       lea 32(%esi), %esi
+       lea 32(%edi), %edi
+       dec %ecx
+       jne 1b
+       adcl $0, %eax
+2:     movl FP(%esp), %edx
+       movl %edx, %ecx
+       andl $0x1c, %edx
+       je 4f
+       shrl $2, %edx                   # This clears CF
+SRC(3: movl (%esi), %ebx       )
+       adcl %ebx, %eax
+DST(   movl %ebx, (%edi)       )
+       lea 4(%esi), %esi
+       lea 4(%edi), %edi
+       dec %edx
+       jne 3b
+       adcl $0, %eax
+4:     andl $3, %ecx
+       jz 7f
+       cmpl $2, %ecx
+       jb 5f
+SRC(   movw (%esi), %cx        )
+       leal 2(%esi), %esi
+DST(   movw %cx, (%edi)        )
+       leal 2(%edi), %edi
+       je 6f
+       shll $16,%ecx
+SRC(5: movb (%esi), %cl        )
+DST(   movb %cl, (%edi)        )
+6:     addl %ecx, %eax
+       adcl $0, %eax
+7:
+5000:
+
+# Exception handler:
+.section .fixup, "ax"                                                  
+
+6001:
+       movl ARGBASE+20(%esp), %ebx     # src_err_ptr
+       movl $-EFAULT, (%ebx)
+
+       # zero the complete destination - computing the rest
+       # is too much work 
+       movl ARGBASE+8(%esp), %edi      # dst
+       movl ARGBASE+12(%esp), %ecx     # len
+       xorl %eax,%eax
+       rep ; stosb
+
+       jmp 5000b
+
+6002:
+       movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
+       movl $-EFAULT,(%ebx)
+       jmp 5000b
+
+.previous
+
+       popl %ebx
+       popl %esi
+       popl %edi
+       popl %ecx                       # equivalent to addl $4,%esp
+       ret     
+
+#else
+
+/* Version for PentiumII/PPro */
+
+#define ROUND1(x) \
+       SRC(movl x(%esi), %ebx  )       ;       \
+       addl %ebx, %eax                 ;       \
+       DST(movl %ebx, x(%edi)  )       ; 
+
+#define ROUND(x) \
+       SRC(movl x(%esi), %ebx  )       ;       \
+       adcl %ebx, %eax                 ;       \
+       DST(movl %ebx, x(%edi)  )       ;
+
+#define ARGBASE 12
+               
+csum_partial_copy_generic_i386:
+       pushl %ebx
+       pushl %edi
+       pushl %esi
+       movl ARGBASE+4(%esp),%esi       #src
+       movl ARGBASE+8(%esp),%edi       #dst    
+       movl ARGBASE+12(%esp),%ecx      #len
+       movl ARGBASE+16(%esp),%eax      #sum
+#      movl %ecx, %edx  
+       movl %ecx, %ebx  
+       movl %esi, %edx
+       shrl $6, %ecx     
+       andl $0x3c, %ebx  
+       negl %ebx
+       subl %ebx, %esi  
+       subl %ebx, %edi  
+       lea  -1(%esi),%edx
+       andl $-32,%edx
+       lea 3f(%ebx,%ebx), %ebx
+       testl %esi, %esi 
+       jmp *%ebx
+1:     addl $64,%esi
+       addl $64,%edi 
+       SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
+       ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
+       ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
+       ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
+       ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
+3:     adcl $0,%eax
+       addl $64, %edx
+       dec %ecx
+       jge 1b
+4:     movl ARGBASE+12(%esp),%edx      #len
+       andl $3, %edx
+       jz 7f
+       cmpl $2, %edx
+       jb 5f
+SRC(   movw (%esi), %dx         )
+       leal 2(%esi), %esi
+DST(   movw %dx, (%edi)         )
+       leal 2(%edi), %edi
+       je 6f
+       shll $16,%edx
+5:
+SRC(   movb (%esi), %dl         )
+DST(   movb %dl, (%edi)         )
+6:     addl %edx, %eax
+       adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001:  movl    ARGBASE+20(%esp), %ebx  # src_err_ptr   
+       movl $-EFAULT, (%ebx)
+       # zero the complete destination (computing the rest is too much work)
+       movl ARGBASE+8(%esp),%edi       # dst
+       movl ARGBASE+12(%esp),%ecx      # len
+       xorl %eax,%eax
+       rep; stosb
+       jmp 7b
+6002:  movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
+       movl $-EFAULT, (%ebx)
+       jmp  7b                 
+.previous                              
+
+       popl %esi
+       popl %edi
+       popl %ebx
+       ret
+                               
+#undef ROUND
+#undef ROUND1          
+               
+#endif
diff --git a/arch/x86/um/delay.c b/arch/x86/um/delay.c
new file mode 100644 (file)
index 0000000..f3fe1a6
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ * Mostly copied from arch/x86/lib/delay.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <asm/param.h>
+
+void __delay(unsigned long loops)
+{
+       asm volatile(
+               "test %0,%0\n"
+               "jz 3f\n"
+               "jmp 1f\n"
+
+               ".align 16\n"
+               "1: jmp 2f\n"
+
+               ".align 16\n"
+               "2: dec %0\n"
+               " jnz 2b\n"
+               "3: dec %0\n"
+
+               : /* we don't need output */
+               : "a" (loops)
+       );
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+       int d0;
+
+       xloops *= 4;
+       asm("mull %%edx"
+               : "=d" (xloops), "=&a" (d0)
+               : "1" (xloops), "0"
+               (loops_per_jiffy * (HZ/4)));
+
+       __delay(++xloops);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+       __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+       __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
new file mode 100644 (file)
index 0000000..6bb49b6
--- /dev/null
@@ -0,0 +1,83 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf32_Half elf_core_extra_phdrs(void)
+{
+       return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+                              unsigned long limit)
+{
+       if ( vsyscall_ehdr ) {
+               const struct elfhdr *const ehdrp =
+                       (struct elfhdr *) vsyscall_ehdr;
+               const struct elf_phdr *const phdrp =
+                       (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+               int i;
+               Elf32_Off ofs = 0;
+
+               for (i = 0; i < ehdrp->e_phnum; ++i) {
+                       struct elf_phdr phdr = phdrp[i];
+
+                       if (phdr.p_type == PT_LOAD) {
+                               ofs = phdr.p_offset = offset;
+                               offset += phdr.p_filesz;
+                       } else {
+                               phdr.p_offset += ofs;
+                       }
+                       phdr.p_paddr = 0; /* match other core phdrs */
+                       *size += sizeof(phdr);
+                       if (*size > limit
+                           || !dump_write(file, &phdr, sizeof(phdr)))
+                               return 0;
+               }
+       }
+       return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+                             unsigned long limit)
+{
+       if ( vsyscall_ehdr ) {
+               const struct elfhdr *const ehdrp =
+                       (struct elfhdr *) vsyscall_ehdr;
+               const struct elf_phdr *const phdrp =
+                       (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+               int i;
+
+               for (i = 0; i < ehdrp->e_phnum; ++i) {
+                       if (phdrp[i].p_type == PT_LOAD) {
+                               void *addr = (void *) phdrp[i].p_vaddr;
+                               size_t filesz = phdrp[i].p_filesz;
+
+                               *size += filesz;
+                               if (*size > limit
+                                   || !dump_write(file, addr, filesz))
+                                       return 0;
+                       }
+               }
+       }
+       return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+       if ( vsyscall_ehdr ) {
+               const struct elfhdr *const ehdrp =
+                       (struct elfhdr *)vsyscall_ehdr;
+               const struct elf_phdr *const phdrp =
+                       (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+               int i;
+
+               for (i = 0; i < ehdrp->e_phnum; ++i)
+                       if (phdrp[i].p_type == PT_LOAD)
+                               return (size_t) phdrp[i].p_filesz;
+       }
+       return 0;
+}
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
new file mode 100644 (file)
index 0000000..d670f68
--- /dev/null
@@ -0,0 +1,28 @@
+/* 
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "sysdep/ptrace.h"
+
+/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
+struct exception_table_entry
+{
+       unsigned long insn;
+       unsigned long fixup;
+};
+
+const struct exception_table_entry *search_exception_tables(unsigned long add);
+
+/* Compare this to arch/i386/mm/extable.c:fixup_exception() */
+int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
+{
+       const struct exception_table_entry *fixup;
+
+       fixup = search_exception_tables(address);
+       if (fixup != 0) {
+               UPT_IP(regs) = fixup->fixup;
+               return 1;
+       }
+       return 0;
+}
diff --git a/arch/x86/um/ksyms.c b/arch/x86/um/ksyms.c
new file mode 100644 (file)
index 0000000..2e8f43e
--- /dev/null
@@ -0,0 +1,13 @@
+#include <linux/module.h>
+#include <asm/string.h>
+#include <asm/checksum.h>
+
+#ifndef CONFIG_X86_32
+/*XXX: we need them because they would be exported by x86_64 */
+#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
+EXPORT_SYMBOL(memcpy);
+#else
+EXPORT_SYMBOL(__memcpy);
+#endif
+#endif
+EXPORT_SYMBOL(csum_partial);
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
new file mode 100644 (file)
index 0000000..26b0e39
--- /dev/null
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/unistd.h>
+#include "os.h"
+#include "proc_mm.h"
+#include "skas.h"
+#include "skas_ptrace.h"
+#include "sysdep/tls.h"
+
+extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
+
+static long write_ldt_entry(struct mm_id *mm_idp, int func,
+                    struct user_desc *desc, void **addr, int done)
+{
+       long res;
+
+       if (proc_mm) {
+               /*
+                * This is a special handling for the case, that the mm to
+                * modify isn't current->active_mm.
+                * If this is called directly by modify_ldt,
+                *     (current->active_mm->context.skas.u == mm_idp)
+                * will be true. So no call to __switch_mm(mm_idp) is done.
+                * If this is called in case of init_new_ldt or PTRACE_LDT,
+                * mm_idp won't belong to current->active_mm, but child->mm.
+                * So we need to switch child's mm into our userspace, then
+                * later switch back.
+                *
+                * Note: I'm unsure: should interrupts be disabled here?
+                */
+               if (!current->active_mm || current->active_mm == &init_mm ||
+                   mm_idp != &current->active_mm->context.id)
+                       __switch_mm(mm_idp);
+       }
+
+       if (ptrace_ldt) {
+               struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
+                       .func = func,
+                       .ptr = desc,
+                       .bytecount = sizeof(*desc)};
+               u32 cpu;
+               int pid;
+
+               if (!proc_mm)
+                       pid = mm_idp->u.pid;
+               else {
+                       cpu = get_cpu();
+                       pid = userspace_pid[cpu];
+               }
+
+               res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op);
+
+               if (proc_mm)
+                       put_cpu();
+       }
+       else {
+               void *stub_addr;
+               res = syscall_stub_data(mm_idp, (unsigned long *)desc,
+                                       (sizeof(*desc) + sizeof(long) - 1) &
+                                           ~(sizeof(long) - 1),
+                                       addr, &stub_addr);
+               if (!res) {
+                       unsigned long args[] = { func,
+                                                (unsigned long)stub_addr,
+                                                sizeof(*desc),
+                                                0, 0, 0 };
+                       res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
+                                              0, addr, done);
+               }
+       }
+
+       if (proc_mm) {
+               /*
+                * This is the second part of special handling, that makes
+                * PTRACE_LDT possible to implement.
+                */
+               if (current->active_mm && current->active_mm != &init_mm &&
+                   mm_idp != &current->active_mm->context.id)
+                       __switch_mm(&current->active_mm->context.id);
+       }
+
+       return res;
+}
+
+static long read_ldt_from_host(void __user * ptr, unsigned long bytecount)
+{
+       int res, n;
+       struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) {
+                       .func = 0,
+                       .bytecount = bytecount,
+                       .ptr = kmalloc(bytecount, GFP_KERNEL)};
+       u32 cpu;
+
+       if (ptrace_ldt.ptr == NULL)
+               return -ENOMEM;
+
+       /*
+        * This is called from sys_modify_ldt only, so userspace_pid gives
+        * us the right number
+        */
+
+       cpu = get_cpu();
+       res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt);
+       put_cpu();
+       if (res < 0)
+               goto out;
+
+       n = copy_to_user(ptr, ptrace_ldt.ptr, res);
+       if (n != 0)
+               res = -EFAULT;
+
+  out:
+       kfree(ptrace_ldt.ptr);
+
+       return res;
+}
+
+/*
+ * In skas mode, we hold our own ldt data in UML.
+ * Thus, the code implementing sys_modify_ldt_skas
+ * is very similar to (and mostly stolen from) sys_modify_ldt
+ * for arch/i386/kernel/ldt.c
+ * The routines copied and modified in part are:
+ * - read_ldt
+ * - read_default_ldt
+ * - write_ldt
+ * - sys_modify_ldt_skas
+ */
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+       int i, err = 0;
+       unsigned long size;
+       uml_ldt_t *ldt = &current->mm->context.arch.ldt;
+
+       if (!ldt->entry_count)
+               goto out;
+       if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+               bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+       err = bytecount;
+
+       if (ptrace_ldt)
+               return read_ldt_from_host(ptr, bytecount);
+
+       mutex_lock(&ldt->lock);
+       if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
+               size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
+               if (size > bytecount)
+                       size = bytecount;
+               if (copy_to_user(ptr, ldt->u.entries, size))
+                       err = -EFAULT;
+               bytecount -= size;
+               ptr += size;
+       }
+       else {
+               for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
+                    i++) {
+                       size = PAGE_SIZE;
+                       if (size > bytecount)
+                               size = bytecount;
+                       if (copy_to_user(ptr, ldt->u.pages[i], size)) {
+                               err = -EFAULT;
+                               break;
+                       }
+                       bytecount -= size;
+                       ptr += size;
+               }
+       }
+       mutex_unlock(&ldt->lock);
+
+       if (bytecount == 0 || err == -EFAULT)
+               goto out;
+
+       if (clear_user(ptr, bytecount))
+               err = -EFAULT;
+
+out:
+       return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+       int err;
+
+       if (bytecount > 5*LDT_ENTRY_SIZE)
+               bytecount = 5*LDT_ENTRY_SIZE;
+
+       err = bytecount;
+       /*
+        * UML doesn't support lcall7 and lcall27.
+        * So, we don't really have a default ldt, but emulate
+        * an empty ldt of common host default ldt size.
+        */
+       if (clear_user(ptr, bytecount))
+               err = -EFAULT;
+
+       return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
+{
+       uml_ldt_t *ldt = &current->mm->context.arch.ldt;
+       struct mm_id * mm_idp = &current->mm->context.id;
+       int i, err;
+       struct user_desc ldt_info;
+       struct ldt_entry entry0, *ldt_p;
+       void *addr = NULL;
+
+       err = -EINVAL;
+       if (bytecount != sizeof(ldt_info))
+               goto out;
+       err = -EFAULT;
+       if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+               goto out;
+
+       err = -EINVAL;
+       if (ldt_info.entry_number >= LDT_ENTRIES)
+               goto out;
+       if (ldt_info.contents == 3) {
+               if (func == 1)
+                       goto out;
+               if (ldt_info.seg_not_present == 0)
+                       goto out;
+       }
+
+       if (!ptrace_ldt)
+               mutex_lock(&ldt->lock);
+
+       err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
+       if (err)
+               goto out_unlock;
+       else if (ptrace_ldt) {
+               /* With PTRACE_LDT available, this is used as a flag only */
+               ldt->entry_count = 1;
+               goto out;
+       }
+
+       if (ldt_info.entry_number >= ldt->entry_count &&
+           ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
+               for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
+                    i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
+                    i++) {
+                       if (i == 0)
+                               memcpy(&entry0, ldt->u.entries,
+                                      sizeof(entry0));
+                       ldt->u.pages[i] = (struct ldt_entry *)
+                               __get_free_page(GFP_KERNEL|__GFP_ZERO);
+                       if (!ldt->u.pages[i]) {
+                               err = -ENOMEM;
+                               /* Undo the change in host */
+                               memset(&ldt_info, 0, sizeof(ldt_info));
+                               write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
+                               goto out_unlock;
+                       }
+                       if (i == 0) {
+                               memcpy(ldt->u.pages[0], &entry0,
+                                      sizeof(entry0));
+                               memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
+                                      sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
+                       }
+                       ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
+               }
+       }
+       if (ldt->entry_count <= ldt_info.entry_number)
+               ldt->entry_count = ldt_info.entry_number + 1;
+
+       if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
+               ldt_p = ldt->u.entries + ldt_info.entry_number;
+       else
+               ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
+                       ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
+
+       if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
+          (func == 1 || LDT_empty(&ldt_info))) {
+               ldt_p->a = 0;
+               ldt_p->b = 0;
+       }
+       else{
+               if (func == 1)
+                       ldt_info.useable = 0;
+               ldt_p->a = LDT_entry_a(&ldt_info);
+               ldt_p->b = LDT_entry_b(&ldt_info);
+       }
+       err = 0;
+
+out_unlock:
+       mutex_unlock(&ldt->lock);
+out:
+       return err;
+}
+
+static long do_modify_ldt_skas(int func, void __user *ptr,
+                              unsigned long bytecount)
+{
+       int ret = -ENOSYS;
+
+       switch (func) {
+               case 0:
+                       ret = read_ldt(ptr, bytecount);
+                       break;
+               case 1:
+               case 0x11:
+                       ret = write_ldt(ptr, bytecount, func);
+                       break;
+               case 2:
+                       ret = read_default_ldt(ptr, bytecount);
+                       break;
+       }
+       return ret;
+}
+
+static DEFINE_SPINLOCK(host_ldt_lock);
+static short dummy_list[9] = {0, -1};
+static short * host_ldt_entries = NULL;
+
+static void ldt_get_host_info(void)
+{
+       long ret;
+       struct ldt_entry * ldt;
+       short *tmp;
+       int i, size, k, order;
+
+       spin_lock(&host_ldt_lock);
+
+       if (host_ldt_entries != NULL) {
+               spin_unlock(&host_ldt_lock);
+               return;
+       }
+       host_ldt_entries = dummy_list+1;
+
+       spin_unlock(&host_ldt_lock);
+
+       for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
+               ;
+
+       ldt = (struct ldt_entry *)
+             __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+       if (ldt == NULL) {
+               printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
+                      "for host ldt\n");
+               return;
+       }
+
+       ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
+       if (ret < 0) {
+               printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
+               goto out_free;
+       }
+       if (ret == 0) {
+               /* default_ldt is active, simply write an empty entry 0 */
+               host_ldt_entries = dummy_list;
+               goto out_free;
+       }
+
+       for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
+               if (ldt[i].a != 0 || ldt[i].b != 0)
+                       size++;
+       }
+
+       if (size < ARRAY_SIZE(dummy_list))
+               host_ldt_entries = dummy_list;
+       else {
+               size = (size + 1) * sizeof(dummy_list[0]);
+               tmp = kmalloc(size, GFP_KERNEL);
+               if (tmp == NULL) {
+                       printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
+                              "host ldt list\n");
+                       goto out_free;
+               }
+               host_ldt_entries = tmp;
+       }
+
+       for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
+               if (ldt[i].a != 0 || ldt[i].b != 0)
+                       host_ldt_entries[k++] = i;
+       }
+       host_ldt_entries[k] = -1;
+
+out_free:
+       free_pages((unsigned long)ldt, order);
+}
+
+long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
+{
+       struct user_desc desc;
+       short * num_p;
+       int i;
+       long page, err=0;
+       void *addr = NULL;
+       struct proc_mm_op copy;
+
+
+       if (!ptrace_ldt)
+               mutex_init(&new_mm->arch.ldt.lock);
+
+       if (!from_mm) {
+               memset(&desc, 0, sizeof(desc));
+               /*
+                * We have to initialize a clean ldt.
+                */
+               if (proc_mm) {
+                       /*
+                        * If the new mm was created using proc_mm, host's
+                        * default-ldt currently is assigned, which normally
+                        * contains the call-gates for lcall7 and lcall27.
+                        * To remove these gates, we simply write an empty
+                        * entry as number 0 to the host.
+                        */
+                       err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1);
+               }
+               else{
+                       /*
+                        * Now we try to retrieve info about the ldt, we
+                        * inherited from the host. All ldt-entries found
+                        * will be reset in the following loop
+                        */
+                       ldt_get_host_info();
+                       for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
+                               desc.entry_number = *num_p;
+                               err = write_ldt_entry(&new_mm->id, 1, &desc,
+                                                     &addr, *(num_p + 1) == -1);
+                               if (err)
+                                       break;
+                       }
+               }
+               new_mm->arch.ldt.entry_count = 0;
+
+               goto out;
+       }
+
+       if (proc_mm) {
+               /*
+                * We have a valid from_mm, so we now have to copy the LDT of
+                * from_mm to new_mm, because using proc_mm an new mm with
+                * an empty/default LDT was created in new_mm()
+                */
+               copy = ((struct proc_mm_op) { .op       = MM_COPY_SEGMENTS,
+                                             .u        =
+                                             { .copy_segments =
+                                                       from_mm->id.u.mm_fd } } );
+               i = os_write_file(new_mm->id.u.mm_fd, &copy, sizeof(copy));
+               if (i != sizeof(copy))
+                       printk(KERN_ERR "new_mm : /proc/mm copy_segments "
+                              "failed, err = %d\n", -i);
+       }
+
+       if (!ptrace_ldt) {
+               /*
+                * Our local LDT is used to supply the data for
+                * modify_ldt(READLDT), if PTRACE_LDT isn't available,
+                * i.e., we have to use the stub for modify_ldt, which
+                * can't handle the big read buffer of up to 64kB.
+                */
+               mutex_lock(&from_mm->arch.ldt.lock);
+               if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
+                       memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
+                              sizeof(new_mm->arch.ldt.u.entries));
+               else {
+                       i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+                       while (i-->0) {
+                               page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+                               if (!page) {
+                                       err = -ENOMEM;
+                                       break;
+                               }
+                               new_mm->arch.ldt.u.pages[i] =
+                                       (struct ldt_entry *) page;
+                               memcpy(new_mm->arch.ldt.u.pages[i],
+                                      from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
+                       }
+               }
+               new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
+               mutex_unlock(&from_mm->arch.ldt.lock);
+       }
+
+    out:
+       return err;
+}
+
+
+void free_ldt(struct mm_context *mm)
+{
+       int i;
+
+       if (!ptrace_ldt && mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
+               i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
+               while (i-- > 0)
+                       free_page((long) mm->arch.ldt.u.pages[i]);
+       }
+       mm->arch.ldt.entry_count = 0;
+}
+
+int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+       return do_modify_ldt_skas(func, ptr, bytecount);
+}
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
new file mode 100644 (file)
index 0000000..639900a
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/mman.h>
+
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+       if (!FIXADDR_USER_START)
+               return 0;
+
+       gate_vma.vm_mm = NULL;
+       gate_vma.vm_start = FIXADDR_USER_START;
+       gate_vma.vm_end = FIXADDR_USER_END;
+       gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+       gate_vma.vm_page_prot = __P101;
+
+       /*
+        * Make sure the vDSO gets into every core dump.
+        * Dumping its contents makes post-mortem fully interpretable later
+        * without matching up the same kernel and hardware config to see
+        * what PC values meant.
+        */
+       gate_vma.vm_flags |= VM_ALWAYSDUMP;
+
+       return 0;
+}
+__initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+       return FIXADDR_USER_START ? &gate_vma : NULL;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+       if (!FIXADDR_USER_START)
+               return 0;
+
+       if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+               return 1;
+
+       return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+       struct vm_area_struct *vma = get_gate_vma(mm);
+
+       if (!vma)
+               return 0;
+
+       return (addr >= vma->vm_start) && (addr < vma->vm_end);
+}
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
new file mode 100644 (file)
index 0000000..5465187
--- /dev/null
@@ -0,0 +1,26 @@
+#include "linux/mm.h"
+#include "asm/page.h"
+#include "asm/mman.h"
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+       if (vma->vm_mm && vma->vm_start == um_vdso_addr)
+               return "[vdso]";
+
+       return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+       return NULL;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+       return 0;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+       return 0;
+}
diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile
new file mode 100644 (file)
index 0000000..253bfb8
--- /dev/null
@@ -0,0 +1,13 @@
+#
+# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+# Licensed under the GPL
+#
+
+obj-y = registers.o task_size.o mcontext.o
+
+obj-$(CONFIG_X86_32) += tls.o
+obj-$(CONFIG_64BIT) += prctl.o
+
+USER_OBJS := $(obj-y)
+
+include arch/um/scripts/Makefile.rules
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
new file mode 100644 (file)
index 0000000..1d33d72
--- /dev/null
@@ -0,0 +1,31 @@
+#include <sys/ucontext.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <sysdep/ptrace.h>
+
+void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
+{
+#ifdef __i386__
+#define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y]
+#define COPY(X) regs->gp[X] = mc->gregs[REG_##X]
+#define COPY_SEG(X) regs->gp[X] = mc->gregs[REG_##X] & 0xffff;
+#define COPY_SEG_CPL3(X) regs->gp[X] = (mc->gregs[REG_##X] & 0xffff) | 3;
+       COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
+       COPY(EDI); COPY(ESI); COPY(EBP);
+       COPY2(UESP, ESP); /* sic */
+       COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
+       COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#else
+#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
+#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
+       COPY(R8); COPY(R9); COPY(R10); COPY(R11);
+       COPY(R12); COPY(R13); COPY(R14); COPY(R15);
+       COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
+       COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
+       COPY(RIP);
+       COPY2(EFLAGS, EFL);
+       COPY2(CS, CSGSFS);
+       regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
+       regs->gp[CS / sizeof(unsigned long)] |= 3;
+#endif
+}
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
new file mode 100644 (file)
index 0000000..9d34edd
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com})
+ * Licensed under the GPL
+ */
+
+#include <sys/ptrace.h>
+#include <linux/ptrace.h>
+
+int os_arch_prctl(int pid, int code, unsigned long *addr)
+{
+        return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+}
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
new file mode 100644 (file)
index 0000000..0cdbb86
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2004 PathScale, Inc
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <errno.h>
+#include <sys/ptrace.h>
+#ifdef __i386__
+#include <sys/user.h>
+#endif
+#include "longjmp.h"
+#include "sysdep/ptrace_user.h"
+
+int save_fp_registers(int pid, unsigned long *fp_regs)
+{
+       if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
+               return -errno;
+       return 0;
+}
+
+int restore_fp_registers(int pid, unsigned long *fp_regs)
+{
+       if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
+               return -errno;
+       return 0;
+}
+
+#ifdef __i386__
+int have_fpx_regs = 1;
+int save_fpx_registers(int pid, unsigned long *fp_regs)
+{
+       if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
+               return -errno;
+       return 0;
+}
+
+int restore_fpx_registers(int pid, unsigned long *fp_regs)
+{
+       if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0)
+               return -errno;
+       return 0;
+}
+
+int get_fp_registers(int pid, unsigned long *regs)
+{
+       if (have_fpx_regs)
+               return save_fpx_registers(pid, regs);
+       else
+               return save_fp_registers(pid, regs);
+}
+
+int put_fp_registers(int pid, unsigned long *regs)
+{
+       if (have_fpx_regs)
+               return restore_fpx_registers(pid, regs);
+       else
+               return restore_fp_registers(pid, regs);
+}
+
+void arch_init_registers(int pid)
+{
+       struct user_fpxregs_struct fpx_regs;
+       int err;
+
+       err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs);
+       if (!err)
+               return;
+
+       if (errno != EIO)
+               panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d",
+                     errno);
+
+       have_fpx_regs = 0;
+}
+#else
+
+int get_fp_registers(int pid, unsigned long *regs)
+{
+       return save_fp_registers(pid, regs);
+}
+
+int put_fp_registers(int pid, unsigned long *regs)
+{
+       return restore_fp_registers(pid, regs);
+}
+
+#endif
+
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
+{
+       switch (reg) {
+#ifdef __i386__
+       case HOST_IP:
+               return buf[0]->__eip;
+       case HOST_SP:
+               return buf[0]->__esp;
+       case HOST_BP:
+               return buf[0]->__ebp;
+#else
+       case HOST_IP:
+               return buf[0]->__rip;
+       case HOST_SP:
+               return buf[0]->__rsp;
+       case HOST_BP:
+               return buf[0]->__rbp;
+#endif
+       default:
+               printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
+                      reg);
+               return 0;
+       }
+}
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
new file mode 100644 (file)
index 0000000..efb16c5
--- /dev/null
@@ -0,0 +1,150 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include "longjmp.h"
+
+#ifdef __i386__
+
+static jmp_buf buf;
+
+static void segfault(int sig)
+{
+       longjmp(buf, 1);
+}
+
+static int page_ok(unsigned long page)
+{
+       unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
+       unsigned long n = ~0UL;
+       void *mapped = NULL;
+       int ok = 0;
+
+       /*
+        * First see if the page is readable.  If it is, it may still
+        * be a VDSO, so we go on to see if it's writable.  If not
+        * then try mapping memory there.  If that fails, then we're
+        * still in the kernel area.  As a sanity check, we'll fail if
+        * the mmap succeeds, but gives us an address different from
+        * what we wanted.
+        */
+       if (setjmp(buf) == 0)
+               n = *address;
+       else {
+               mapped = mmap(address, UM_KERN_PAGE_SIZE,
+                             PROT_READ | PROT_WRITE,
+                             MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+               if (mapped == MAP_FAILED)
+                       return 0;
+               if (mapped != address)
+                       goto out;
+       }
+
+       /*
+        * Now, is it writeable?  If so, then we're in user address
+        * space.  If not, then try mprotecting it and try the write
+        * again.
+        */
+       if (setjmp(buf) == 0) {
+               *address = n;
+               ok = 1;
+               goto out;
+       } else if (mprotect(address, UM_KERN_PAGE_SIZE,
+                           PROT_READ | PROT_WRITE) != 0)
+               goto out;
+
+       if (setjmp(buf) == 0) {
+               *address = n;
+               ok = 1;
+       }
+
+ out:
+       if (mapped != NULL)
+               munmap(mapped, UM_KERN_PAGE_SIZE);
+       return ok;
+}
+
+unsigned long os_get_top_address(void)
+{
+       struct sigaction sa, old;
+       unsigned long bottom = 0;
+       /*
+        * A 32-bit UML on a 64-bit host gets confused about the VDSO at
+        * 0xffffe000.  It is mapped, is readable, can be reprotected writeable
+        * and written.  However, exec discovers later that it can't be
+        * unmapped.  So, just set the highest address to be checked to just
+        * below it.  This might waste some address space on 4G/4G 32-bit
+        * hosts, but shouldn't hurt otherwise.
+        */
+       unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
+       unsigned long test, original;
+
+       printf("Locating the bottom of the address space ... ");
+       fflush(stdout);
+
+       /*
+        * We're going to be longjmping out of the signal handler, so
+        * SA_DEFER needs to be set.
+        */
+       sa.sa_handler = segfault;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_NODEFER;
+       if (sigaction(SIGSEGV, &sa, &old)) {
+               perror("os_get_top_address");
+               exit(1);
+       }
+
+       /* Manually scan the address space, bottom-up, until we find
+        * the first valid page (or run out of them).
+        */
+       for (bottom = 0; bottom < top; bottom++) {
+               if (page_ok(bottom))
+                       break;
+       }
+
+       /* If we've got this far, we ran out of pages. */
+       if (bottom == top) {
+               fprintf(stderr, "Unable to determine bottom of address "
+                       "space.\n");
+               exit(1);
+       }
+
+       printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT);
+       printf("Locating the top of the address space ... ");
+       fflush(stdout);
+
+       original = bottom;
+
+       /* This could happen with a 4G/4G split */
+       if (page_ok(top))
+               goto out;
+
+       do {
+               test = bottom + (top - bottom) / 2;
+               if (page_ok(test))
+                       bottom = test;
+               else
+                       top = test;
+       } while (top - bottom > 1);
+
+out:
+       /* Restore the old SIGSEGV handling */
+       if (sigaction(SIGSEGV, &old, NULL)) {
+               perror("os_get_top_address");
+               exit(1);
+       }
+       top <<= UM_KERN_PAGE_SHIFT;
+       printf("0x%x\n", top);
+
+       return top;
+}
+
+#else
+
+unsigned long os_get_top_address(void)
+{
+       /* The old value of CONFIG_TOP_ADDR */
+       return 0x7fc0000000;
+}
+
+#endif
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
new file mode 100644 (file)
index 0000000..82276b6
--- /dev/null
@@ -0,0 +1,67 @@
+#include <errno.h>
+#include <linux/unistd.h>
+
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "sysdep/tls.h"
+
+#ifndef PTRACE_GET_THREAD_AREA
+#define PTRACE_GET_THREAD_AREA 25
+#endif
+
+#ifndef PTRACE_SET_THREAD_AREA
+#define PTRACE_SET_THREAD_AREA 26
+#endif
+
+/* Checks whether host supports TLS, and sets *tls_min according to the value
+ * valid on the host.
+ * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
+void check_host_supports_tls(int *supports_tls, int *tls_min)
+{
+       /* Values for x86 and x86_64.*/
+       int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(val); i++) {
+               user_desc_t info;
+               info.entry_number = val[i];
+
+               if (syscall(__NR_get_thread_area, &info) == 0) {
+                       *tls_min = val[i];
+                       *supports_tls = 1;
+                       return;
+               } else {
+                       if (errno == EINVAL)
+                               continue;
+                       else if (errno == ENOSYS)
+                               *supports_tls = 0;
+                               return;
+               }
+       }
+
+       *supports_tls = 0;
+}
+
+int os_set_thread_area(user_desc_t *info, int pid)
+{
+       int ret;
+
+       ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
+                    (unsigned long) info);
+       if (ret < 0)
+               ret = -errno;
+       return ret;
+}
+
+int os_get_thread_area(user_desc_t *info, int pid)
+{
+       int ret;
+
+       ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
+                    (unsigned long) info);
+       if (ret < 0)
+               ret = -errno;
+       return ret;
+}
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
new file mode 100644 (file)
index 0000000..3b949da
--- /dev/null
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/mm.h"
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+#include "skas.h"
+
+extern int arch_switch_tls(struct task_struct *to);
+
+void arch_switch_to(struct task_struct *to)
+{
+       int err = arch_switch_tls(to);
+       if (!err)
+               return;
+
+       if (err != -EINVAL)
+               printk(KERN_WARNING "arch_switch_tls failed, errno %d, "
+                      "not EINVAL\n", -err);
+       else
+               printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
+}
+
+int is_syscall(unsigned long addr)
+{
+       unsigned short instr;
+       int n;
+
+       n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
+       if (n) {
+               /* access_process_vm() grants access to vsyscall and stub,
+                * while copy_from_user doesn't. Maybe access_process_vm is
+                * slow, but that doesn't matter, since it will be called only
+                * in case of singlestepping, if copy_from_user failed.
+                */
+               n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+               if (n != sizeof(instr)) {
+                       printk(KERN_ERR "is_syscall : failed to read "
+                              "instruction from 0x%lx\n", addr);
+                       return 1;
+               }
+       }
+       /* int 0x80 or sysenter */
+       return (instr == 0x80cd) || (instr == 0x340f);
+}
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+static const int reg_offsets[] = {
+       [EBX] = HOST_BX,
+       [ECX] = HOST_CX,
+       [EDX] = HOST_DX,
+       [ESI] = HOST_SI,
+       [EDI] = HOST_DI,
+       [EBP] = HOST_BP,
+       [EAX] = HOST_AX,
+       [DS] = HOST_DS,
+       [ES] = HOST_ES,
+       [FS] = HOST_FS,
+       [GS] = HOST_GS,
+       [EIP] = HOST_IP,
+       [CS] = HOST_CS,
+       [EFL] = HOST_EFLAGS,
+       [UESP] = HOST_SP,
+       [SS] = HOST_SS,
+};
+
+int putreg(struct task_struct *child, int regno, unsigned long value)
+{
+       regno >>= 2;
+       switch (regno) {
+       case EBX:
+       case ECX:
+       case EDX:
+       case ESI:
+       case EDI:
+       case EBP:
+       case EAX:
+       case EIP:
+       case UESP:
+               break;
+       case FS:
+               if (value && (value & 3) != 3)
+                       return -EIO;
+               break;
+       case GS:
+               if (value && (value & 3) != 3)
+                       return -EIO;
+               break;
+       case DS:
+       case ES:
+               if (value && (value & 3) != 3)
+                       return -EIO;
+               value &= 0xffff;
+               break;
+       case SS:
+       case CS:
+               if ((value & 3) != 3)
+                       return -EIO;
+               value &= 0xffff;
+               break;
+       case EFL:
+               value &= FLAG_MASK;
+               child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
+               return 0;
+       case ORIG_EAX:
+               child->thread.regs.regs.syscall = value;
+               return 0;
+       default :
+               panic("Bad register in putreg() : %d\n", regno);
+       }
+       child->thread.regs.regs.gp[reg_offsets[regno]] = value;
+       return 0;
+}
+
+int poke_user(struct task_struct *child, long addr, long data)
+{
+       if ((addr & 3) || addr < 0)
+               return -EIO;
+
+       if (addr < MAX_REG_OFFSET)
+               return putreg(child, addr, data);
+       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+                (addr <= offsetof(struct user, u_debugreg[7]))) {
+               addr -= offsetof(struct user, u_debugreg[0]);
+               addr = addr >> 2;
+               if ((addr == 4) || (addr == 5))
+                       return -EIO;
+               child->thread.arch.debugregs[addr] = data;
+               return 0;
+       }
+       return -EIO;
+}
+
+unsigned long getreg(struct task_struct *child, int regno)
+{
+       unsigned long mask = ~0UL;
+
+       regno >>= 2;
+       switch (regno) {
+       case ORIG_EAX:
+               return child->thread.regs.regs.syscall;
+       case FS:
+       case GS:
+       case DS:
+       case ES:
+       case SS:
+       case CS:
+               mask = 0xffff;
+               break;
+       case EIP:
+       case UESP:
+       case EAX:
+       case EBX:
+       case ECX:
+       case EDX:
+       case ESI:
+       case EDI:
+       case EBP:
+       case EFL:
+               break;
+       default:
+               panic("Bad register in getreg() : %d\n", regno);
+       }
+       return mask & child->thread.regs.regs.gp[reg_offsets[regno]];
+}
+
+/* read the word at location addr in the USER area. */
+int peek_user(struct task_struct *child, long addr, long data)
+{
+       unsigned long tmp;
+
+       if ((addr & 3) || addr < 0)
+               return -EIO;
+
+       tmp = 0;  /* Default return condition */
+       if (addr < MAX_REG_OFFSET) {
+               tmp = getreg(child, addr);
+       }
+       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+                (addr <= offsetof(struct user, u_debugreg[7]))) {
+               addr -= offsetof(struct user, u_debugreg[0]);
+               addr = addr >> 2;
+               tmp = child->thread.arch.debugregs[addr];
+       }
+       return put_user(tmp, (unsigned long __user *) data);
+}
+
+static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+       int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+       struct user_i387_struct fpregs;
+
+       err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+       if (err)
+               return err;
+
+       n = copy_to_user(buf, &fpregs, sizeof(fpregs));
+       if(n > 0)
+               return -EFAULT;
+
+       return n;
+}
+
+static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+       int n, cpu = ((struct thread_info *) child->stack)->cpu;
+       struct user_i387_struct fpregs;
+
+       n = copy_from_user(&fpregs, buf, sizeof(fpregs));
+       if (n > 0)
+               return -EFAULT;
+
+       return restore_fp_registers(userspace_pid[cpu],
+                                   (unsigned long *) &fpregs);
+}
+
+static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+{
+       int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+       struct user_fxsr_struct fpregs;
+
+       err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+       if (err)
+               return err;
+
+       n = copy_to_user(buf, &fpregs, sizeof(fpregs));
+       if(n > 0)
+               return -EFAULT;
+
+       return n;
+}
+
+static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+{
+       int n, cpu = ((struct thread_info *) child->stack)->cpu;
+       struct user_fxsr_struct fpregs;
+
+       n = copy_from_user(&fpregs, buf, sizeof(fpregs));
+       if (n > 0)
+               return -EFAULT;
+
+       return restore_fpx_registers(userspace_pid[cpu],
+                                    (unsigned long *) &fpregs);
+}
+
+long subarch_ptrace(struct task_struct *child, long request,
+                   unsigned long addr, unsigned long data)
+{
+       int ret = -EIO;
+       void __user *datap = (void __user *) data;
+       switch (request) {
+       case PTRACE_GETFPREGS: /* Get the child FPU state. */
+               ret = get_fpregs(datap, child);
+               break;
+       case PTRACE_SETFPREGS: /* Set the child FPU state. */
+               ret = set_fpregs(datap, child);
+               break;
+       case PTRACE_GETFPXREGS: /* Get the child FPU state. */
+               ret = get_fpxregs(datap, child);
+               break;
+       case PTRACE_SETFPXREGS: /* Set the child FPU state. */
+               ret = set_fpxregs(datap, child);
+               break;
+       default:
+               ret = -EIO;
+       }
+       return ret;
+}
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
new file mode 100644 (file)
index 0000000..3b52bf0
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ *
+ * Licensed under the GPL
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+/*
+ * determines which flags the user has access to.
+ * 1 = access 0 = no access
+ */
+#define FLAG_MASK 0x44dd5UL
+
+static const int reg_offsets[] =
+{
+       [R8 >> 3] = HOST_R8,
+       [R9 >> 3] = HOST_R9,
+       [R10 >> 3] = HOST_R10,
+       [R11 >> 3] = HOST_R11,
+       [R12 >> 3] = HOST_R12,
+       [R13 >> 3] = HOST_R13,
+       [R14 >> 3] = HOST_R14,
+       [R15 >> 3] = HOST_R15,
+       [RIP >> 3] = HOST_IP,
+       [RSP >> 3] = HOST_SP,
+       [RAX >> 3] = HOST_AX,
+       [RBX >> 3] = HOST_BX,
+       [RCX >> 3] = HOST_CX,
+       [RDX >> 3] = HOST_DX,
+       [RSI >> 3] = HOST_SI,
+       [RDI >> 3] = HOST_DI,
+       [RBP >> 3] = HOST_BP,
+       [CS >> 3] = HOST_CS,
+       [SS >> 3] = HOST_SS,
+       [FS_BASE >> 3] = HOST_FS_BASE,
+       [GS_BASE >> 3] = HOST_GS_BASE,
+       [DS >> 3] = HOST_DS,
+       [ES >> 3] = HOST_ES,
+       [FS >> 3] = HOST_FS,
+       [GS >> 3] = HOST_GS,
+       [EFLAGS >> 3] = HOST_EFLAGS,
+       [ORIG_RAX >> 3] = HOST_ORIG_AX,
+};
+
+int putreg(struct task_struct *child, int regno, unsigned long value)
+{
+#ifdef TIF_IA32
+       /*
+        * Some code in the 64bit emulation may not be 64bit clean.
+        * Don't take any chances.
+        */
+       if (test_tsk_thread_flag(child, TIF_IA32))
+               value &= 0xffffffff;
+#endif
+       switch (regno) {
+       case R8:
+       case R9:
+       case R10:
+       case R11:
+       case R12:
+       case R13:
+       case R14:
+       case R15:
+       case RIP:
+       case RSP:
+       case RAX:
+       case RBX:
+       case RCX:
+       case RDX:
+       case RSI:
+       case RDI:
+       case RBP:
+       case ORIG_RAX:
+               break;
+
+       case FS:
+       case GS:
+       case DS:
+       case ES:
+       case SS:
+       case CS:
+               if (value && (value & 3) != 3)
+                       return -EIO;
+               value &= 0xffff;
+               break;
+
+       case FS_BASE:
+       case GS_BASE:
+               if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
+                       return -EIO;
+               break;
+
+       case EFLAGS:
+               value &= FLAG_MASK;
+               child->thread.regs.regs.gp[HOST_EFLAGS] |= value;
+               return 0;
+
+       default:
+               panic("Bad register in putreg(): %d\n", regno);
+       }
+
+       child->thread.regs.regs.gp[reg_offsets[regno >> 3]] = value;
+       return 0;
+}
+
+int poke_user(struct task_struct *child, long addr, long data)
+{
+       if ((addr & 3) || addr < 0)
+               return -EIO;
+
+       if (addr < MAX_REG_OFFSET)
+               return putreg(child, addr, data);
+       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+               (addr <= offsetof(struct user, u_debugreg[7]))) {
+               addr -= offsetof(struct user, u_debugreg[0]);
+               addr = addr >> 2;
+               if ((addr == 4) || (addr == 5))
+                       return -EIO;
+               child->thread.arch.debugregs[addr] = data;
+               return 0;
+       }
+       return -EIO;
+}
+
+unsigned long getreg(struct task_struct *child, int regno)
+{
+       unsigned long mask = ~0UL;
+#ifdef TIF_IA32
+       if (test_tsk_thread_flag(child, TIF_IA32))
+               mask = 0xffffffff;
+#endif
+       switch (regno) {
+       case R8:
+       case R9:
+       case R10:
+       case R11:
+       case R12:
+       case R13:
+       case R14:
+       case R15:
+       case RIP:
+       case RSP:
+       case RAX:
+       case RBX:
+       case RCX:
+       case RDX:
+       case RSI:
+       case RDI:
+       case RBP:
+       case ORIG_RAX:
+       case EFLAGS:
+       case FS_BASE:
+       case GS_BASE:
+               break;
+       case FS:
+       case GS:
+       case DS:
+       case ES:
+       case SS:
+       case CS:
+               mask = 0xffff;
+               break;
+       default:
+               panic("Bad register in getreg: %d\n", regno);
+       }
+       return mask & child->thread.regs.regs.gp[reg_offsets[regno >> 3]];
+}
+
+int peek_user(struct task_struct *child, long addr, long data)
+{
+       /* read the word at location addr in the USER area. */
+       unsigned long tmp;
+
+       if ((addr & 3) || addr < 0)
+               return -EIO;
+
+       tmp = 0;  /* Default return condition */
+       if (addr < MAX_REG_OFFSET)
+               tmp = getreg(child, addr);
+       else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+               (addr <= offsetof(struct user, u_debugreg[7]))) {
+               addr -= offsetof(struct user, u_debugreg[0]);
+               addr = addr >> 2;
+               tmp = child->thread.arch.debugregs[addr];
+       }
+       return put_user(tmp, (unsigned long *) data);
+}
+
+/* XXX Mostly copied from sys-i386 */
+int is_syscall(unsigned long addr)
+{
+       unsigned short instr;
+       int n;
+
+       n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
+       if (n) {
+               /*
+                * access_process_vm() grants access to vsyscall and stub,
+                * while copy_from_user doesn't. Maybe access_process_vm is
+                * slow, but that doesn't matter, since it will be called only
+                * in case of singlestepping, if copy_from_user failed.
+                */
+               n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+               if (n != sizeof(instr)) {
+                       printk("is_syscall : failed to read instruction from "
+                              "0x%lx\n", addr);
+                       return 1;
+               }
+       }
+       /* sysenter */
+       return instr == 0x050f;
+}
+
+static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+       int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+       long fpregs[HOST_FP_SIZE];
+
+       BUG_ON(sizeof(*buf) != sizeof(fpregs));
+       err = save_fp_registers(userspace_pid[cpu], fpregs);
+       if (err)
+               return err;
+
+       n = copy_to_user(buf, fpregs, sizeof(fpregs));
+       if (n > 0)
+               return -EFAULT;
+
+       return n;
+}
+
+static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+{
+       int n, cpu = ((struct thread_info *) child->stack)->cpu;
+       long fpregs[HOST_FP_SIZE];
+
+       BUG_ON(sizeof(*buf) != sizeof(fpregs));
+       n = copy_from_user(fpregs, buf, sizeof(fpregs));
+       if (n > 0)
+               return -EFAULT;
+
+       return restore_fp_registers(userspace_pid[cpu], fpregs);
+}
+
+long subarch_ptrace(struct task_struct *child, long request,
+                   unsigned long addr, unsigned long data)
+{
+       int ret = -EIO;
+       void __user *datap = (void __user *) data;
+
+       switch (request) {
+       case PTRACE_GETFPREGS: /* Get the child FPU state. */
+               ret = get_fpregs(datap, child);
+               break;
+       case PTRACE_SETFPREGS: /* Set the child FPU state. */
+               ret = set_fpregs(datap, child);
+               break;
+       case PTRACE_ARCH_PRCTL:
+               /* XXX Calls ptrace on the host - needs some SMP thinking */
+               ret = arch_prctl(child, data, (void __user *) addr);
+               break;
+       }
+
+       return ret;
+}
diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c
new file mode 100644 (file)
index 0000000..3960ca1
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <errno.h>
+#include "ptrace_user.h"
+
+int ptrace_getregs(long pid, unsigned long *regs_out)
+{
+       if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0)
+               return -errno;
+       return 0;
+}
+
+int ptrace_setregs(long pid, unsigned long *regs)
+{
+       if (ptrace(PTRACE_SETREGS, pid, 0, regs) < 0)
+               return -errno;
+       return 0;
+}
diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S
new file mode 100644 (file)
index 0000000..b766792
--- /dev/null
@@ -0,0 +1,58 @@
+#
+# arch/i386/setjmp.S
+#
+# setjmp/longjmp for the i386 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+#      %ebx
+#      %esp
+#      %ebp
+#      %esi
+#      %edi
+#      <return address>
+#
+
+       .text
+       .align 4
+       .globl setjmp
+       .type setjmp, @function
+setjmp:
+#ifdef _REGPARM
+       movl %eax,%edx
+#else
+       movl 4(%esp),%edx
+#endif
+       popl %ecx                       # Return address, and adjust the stack
+       xorl %eax,%eax                  # Return value
+       movl %ebx,(%edx)
+       movl %esp,4(%edx)               # Post-return %esp!
+       pushl %ecx                      # Make the call/return stack happy
+       movl %ebp,8(%edx)
+       movl %esi,12(%edx)
+       movl %edi,16(%edx)
+       movl %ecx,20(%edx)              # Return address
+       ret
+
+       .size setjmp,.-setjmp
+
+       .text
+       .align 4
+       .globl longjmp
+       .type longjmp, @function
+longjmp:
+#ifdef _REGPARM
+       xchgl %eax,%edx
+#else
+       movl 4(%esp),%edx               # jmp_ptr address
+       movl 8(%esp),%eax               # Return value
+#endif
+       movl (%edx),%ebx
+       movl 4(%edx),%esp
+       movl 8(%edx),%ebp
+       movl 12(%edx),%esi
+       movl 16(%edx),%edi
+       jmp *20(%edx)
+
+       .size longjmp,.-longjmp
diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S
new file mode 100644 (file)
index 0000000..45f547b
--- /dev/null
@@ -0,0 +1,54 @@
+#
+# arch/x86_64/setjmp.S
+#
+# setjmp/longjmp for the x86-64 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+#      %rbx
+#      %rsp (post-return)
+#      %rbp
+#      %r12
+#      %r13
+#      %r14
+#      %r15
+#      <return address>
+#
+
+       .text
+       .align 4
+       .globl setjmp
+       .type setjmp, @function
+setjmp:
+       pop  %rsi                       # Return address, and adjust the stack
+       xorl %eax,%eax                  # Return value
+       movq %rbx,(%rdi)
+       movq %rsp,8(%rdi)               # Post-return %rsp!
+       push %rsi                       # Make the call/return stack happy
+       movq %rbp,16(%rdi)
+       movq %r12,24(%rdi)
+       movq %r13,32(%rdi)
+       movq %r14,40(%rdi)
+       movq %r15,48(%rdi)
+       movq %rsi,56(%rdi)              # Return address
+       ret
+
+       .size setjmp,.-setjmp
+
+       .text
+       .align 4
+       .globl longjmp
+       .type longjmp, @function
+longjmp:
+       movl %esi,%eax                  # Return value (int)
+       movq (%rdi),%rbx
+       movq 8(%rdi),%rsp
+       movq 16(%rdi),%rbp
+       movq 24(%rdi),%r12
+       movq 32(%rdi),%r13
+       movq 40(%rdi),%r14
+       movq 48(%rdi),%r15
+       jmp *56(%rdi)
+
+       .size longjmp,.-longjmp
diff --git a/arch/x86/um/shared/sysdep/archsetjmp.h b/arch/x86/um/shared/sysdep/archsetjmp.h
new file mode 100644 (file)
index 0000000..ff7766d
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "archsetjmp_32.h"
+#else
+#include "archsetjmp_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/archsetjmp_32.h b/arch/x86/um/shared/sysdep/archsetjmp_32.h
new file mode 100644 (file)
index 0000000..0f31208
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * arch/um/include/sysdep-i386/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+       unsigned int __ebx;
+       unsigned int __esp;
+       unsigned int __ebp;
+       unsigned int __esi;
+       unsigned int __edi;
+       unsigned int __eip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#define JB_IP __eip
+#define JB_SP __esp
+
+#endif                         /* _SETJMP_H */
diff --git a/arch/x86/um/shared/sysdep/archsetjmp_64.h b/arch/x86/um/shared/sysdep/archsetjmp_64.h
new file mode 100644 (file)
index 0000000..2af8f12
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * arch/um/include/sysdep-x86_64/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+       unsigned long __rbx;
+       unsigned long __rsp;
+       unsigned long __rbp;
+       unsigned long __r12;
+       unsigned long __r13;
+       unsigned long __r14;
+       unsigned long __r15;
+       unsigned long __rip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#define JB_IP __rip
+#define JB_SP __rsp
+
+#endif                         /* _SETJMP_H */
diff --git a/arch/x86/um/shared/sysdep/faultinfo.h b/arch/x86/um/shared/sysdep/faultinfo.h
new file mode 100644 (file)
index 0000000..862ecb1
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "faultinfo_32.h"
+#else
+#include "faultinfo_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h
new file mode 100644 (file)
index 0000000..a26086b
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ * Licensed under the GPL
+ */
+
+#ifndef __FAULTINFO_I386_H
+#define __FAULTINFO_I386_H
+
+/* this structure contains the full arch-specific faultinfo
+ * from the traps.
+ * On i386, ptrace_faultinfo unfortunately doesn't provide
+ * all the info, since trap_no is missing.
+ * All common elements are defined at the same position in
+ * both structures, thus making it easy to copy the
+ * contents without knowledge about the structure elements.
+ */
+struct faultinfo {
+        int error_code; /* in ptrace_faultinfo misleadingly called is_write */
+        unsigned long cr2; /* in ptrace_faultinfo called addr */
+        int trap_no; /* missing in ptrace_faultinfo */
+};
+
+#define FAULT_WRITE(fi) ((fi).error_code & 2)
+#define FAULT_ADDRESS(fi) ((fi).cr2)
+
+/* This is Page Fault */
+#define SEGV_IS_FIXABLE(fi)    ((fi)->trap_no == 14)
+
+/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
+#define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo)
+
+#define PTRACE_FULL_FAULTINFO 0
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
new file mode 100644 (file)
index 0000000..f811cbe
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ * Licensed under the GPL
+ */
+
+#ifndef __FAULTINFO_X86_64_H
+#define __FAULTINFO_X86_64_H
+
+/* this structure contains the full arch-specific faultinfo
+ * from the traps.
+ * On i386, ptrace_faultinfo unfortunately doesn't provide
+ * all the info, since trap_no is missing.
+ * All common elements are defined at the same position in
+ * both structures, thus making it easy to copy the
+ * contents without knowledge about the structure elements.
+ */
+struct faultinfo {
+        int error_code; /* in ptrace_faultinfo misleadingly called is_write */
+        unsigned long cr2; /* in ptrace_faultinfo called addr */
+        int trap_no; /* missing in ptrace_faultinfo */
+};
+
+#define FAULT_WRITE(fi) ((fi).error_code & 2)
+#define FAULT_ADDRESS(fi) ((fi).cr2)
+
+/* This is Page Fault */
+#define SEGV_IS_FIXABLE(fi)    ((fi)->trap_no == 14)
+
+/* No broken SKAS API, which doesn't pass trap_no, here. */
+#define SEGV_MAYBE_FIXABLE(fi) 0
+
+#define PTRACE_FULL_FAULTINFO 1
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
new file mode 100644 (file)
index 0000000..5868526
--- /dev/null
@@ -0,0 +1,21 @@
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/crypto.h>
+#include <asm/mman.h>
+
+#define DEFINE(sym, val) \
+       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define STR(x) #x
+#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : )
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+       DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+#include <common-offsets.h>
+}
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
new file mode 100644 (file)
index 0000000..b724c54
--- /dev/null
@@ -0,0 +1,31 @@
+/* 
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYS_SIGCONTEXT_X86_H
+#define __SYS_SIGCONTEXT_X86_H
+
+extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
+
+#ifdef __i386__
+
+#define GET_FAULTINFO_FROM_MC(fi, mc) \
+       { \
+               (fi).cr2 = (mc)->cr2; \
+               (fi).error_code = (mc)->gregs[REG_ERR]; \
+               (fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
+       }
+
+#else
+
+#define GET_FAULTINFO_FROM_MC(fi, mc) \
+       { \
+               (fi).cr2 = (mc)->gregs[REG_CR2]; \
+               (fi).error_code = (mc)->gregs[REG_ERR]; \
+               (fi).trap_no = (mc)->gregs[REG_TRAPNO]; \
+       }
+
+#endif
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
new file mode 100644 (file)
index 0000000..711b162
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "ptrace_32.h"
+#else
+#include "ptrace_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
new file mode 100644 (file)
index 0000000..befd1df
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_I386_PTRACE_H
+#define __SYSDEP_I386_PTRACE_H
+
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+
+static inline void update_debugregs(int seq) {}
+
+/* syscall emulation path in ptrace */
+
+#ifndef PTRACE_SYSEMU
+#define PTRACE_SYSEMU 31
+#endif
+
+void set_using_sysemu(int value);
+int get_using_sysemu(void);
+extern int sysemu_supported;
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_EAX(r) ((r)[HOST_AX])
+#define REGS_EBX(r) ((r)[HOST_BX])
+#define REGS_ECX(r) ((r)[HOST_CX])
+#define REGS_EDX(r) ((r)[HOST_DX])
+#define REGS_ESI(r) ((r)[HOST_SI])
+#define REGS_EDI(r) ((r)[HOST_DI])
+#define REGS_EBP(r) ((r)[HOST_BP])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_SS(r) ((r)[HOST_SS])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+#define REGS_FS(r) ((r)[HOST_FS])
+#define REGS_GS(r) ((r)[HOST_GS])
+
+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
+
+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
+
+#ifndef PTRACE_SYSEMU_SINGLESTEP
+#define PTRACE_SYSEMU_SINGLESTEP 32
+#endif
+
+struct uml_pt_regs {
+       unsigned long gp[MAX_REG_NR];
+       unsigned long fp[HOST_FPX_SIZE];
+       struct faultinfo faultinfo;
+       long syscall;
+       int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_EAX(r) REGS_EAX((r)->gp)
+#define UPT_EBX(r) REGS_EBX((r)->gp)
+#define UPT_ECX(r) REGS_ECX((r)->gp)
+#define UPT_EDX(r) REGS_EDX((r)->gp)
+#define UPT_ESI(r) REGS_ESI((r)->gp)
+#define UPT_EDI(r) REGS_EDI((r)->gp)
+#define UPT_EBP(r) REGS_EBP((r)->gp)
+#define UPT_ORIG_EAX(r) ((r)->syscall)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+
+#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
+#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
+#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
+#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
+#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
+
+extern int user_context(unsigned long sp);
+
+#define UPT_IS_USER(r) ((r)->is_user)
+
+struct syscall_args {
+       unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+                        { .args = { UPT_SYSCALL_ARG1(r),       \
+                                    UPT_SYSCALL_ARG2(r),       \
+                                    UPT_SYSCALL_ARG3(r),       \
+                                    UPT_SYSCALL_ARG4(r),       \
+                                    UPT_SYSCALL_ARG5(r),       \
+                                    UPT_SYSCALL_ARG6(r) } } )
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
+
+#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
+#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
+#define UPT_SYSCALL_RET(r) UPT_EAX(r)
+
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+
+extern void arch_init_registers(int pid);
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
new file mode 100644 (file)
index 0000000..031edc5
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_64_PTRACE_H
+#define __SYSDEP_X86_64_PTRACE_H
+
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+
+#define REGS_RBX(r) ((r)[HOST_BX])
+#define REGS_RCX(r) ((r)[HOST_CX])
+#define REGS_RDX(r) ((r)[HOST_DX])
+#define REGS_RSI(r) ((r)[HOST_SI])
+#define REGS_RDI(r) ((r)[HOST_DI])
+#define REGS_RBP(r) ((r)[HOST_BP])
+#define REGS_RAX(r) ((r)[HOST_AX])
+#define REGS_R8(r) ((r)[HOST_R8])
+#define REGS_R9(r) ((r)[HOST_R9])
+#define REGS_R10(r) ((r)[HOST_R10])
+#define REGS_R11(r) ((r)[HOST_R11])
+#define REGS_R12(r) ((r)[HOST_R12])
+#define REGS_R13(r) ((r)[HOST_R13])
+#define REGS_R14(r) ((r)[HOST_R14])
+#define REGS_R15(r) ((r)[HOST_R15])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_SS(r) ((r)[HOST_SS])
+
+#define HOST_FS_BASE 21
+#define HOST_GS_BASE 22
+#define HOST_DS 23
+#define HOST_ES 24
+#define HOST_FS 25
+#define HOST_GS 26
+
+/* Also defined in asm/ptrace-x86_64.h, but not in libc headers.  So, these
+ * are already defined for kernel code, but not for userspace code.
+ */
+#ifndef FS_BASE
+/* These aren't defined in ptrace.h, but exist in struct user_regs_struct,
+ * which is what x86_64 ptrace actually uses.
+ */
+#define FS_BASE (HOST_FS_BASE * sizeof(long))
+#define GS_BASE (HOST_GS_BASE * sizeof(long))
+#define DS (HOST_DS * sizeof(long))
+#define ES (HOST_ES * sizeof(long))
+#define FS (HOST_FS * sizeof(long))
+#define GS (HOST_GS * sizeof(long))
+#endif
+
+#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
+#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+#define REGS_FS(r) ((r)[HOST_FS])
+#define REGS_GS(r) ((r)[HOST_GS])
+
+#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
+
+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
+
+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
+
+#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
+
+#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
+
+#define REGS_TRAP(r) ((r)->trap_type)
+
+#define REGS_ERR(r) ((r)->fault_type)
+
+struct uml_pt_regs {
+       unsigned long gp[MAX_REG_NR];
+       unsigned long fp[HOST_FP_SIZE];
+       struct faultinfo faultinfo;
+       long syscall;
+       int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_RBX(r) REGS_RBX((r)->gp)
+#define UPT_RCX(r) REGS_RCX((r)->gp)
+#define UPT_RDX(r) REGS_RDX((r)->gp)
+#define UPT_RSI(r) REGS_RSI((r)->gp)
+#define UPT_RDI(r) REGS_RDI((r)->gp)
+#define UPT_RBP(r) REGS_RBP((r)->gp)
+#define UPT_RAX(r) REGS_RAX((r)->gp)
+#define UPT_R8(r) REGS_R8((r)->gp)
+#define UPT_R9(r) REGS_R9((r)->gp)
+#define UPT_R10(r) REGS_R10((r)->gp)
+#define UPT_R11(r) REGS_R11((r)->gp)
+#define UPT_R12(r) REGS_R12((r)->gp)
+#define UPT_R13(r) REGS_R13((r)->gp)
+#define UPT_R14(r) REGS_R14((r)->gp)
+#define UPT_R15(r) REGS_R15((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_SYSCALL_NR(r) ((r)->syscall)
+#define UPT_SYSCALL_RET(r) UPT_RAX(r)
+
+extern int user_context(unsigned long sp);
+
+#define UPT_IS_USER(r) ((r)->is_user)
+
+#define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
+#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
+#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
+#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
+#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
+
+struct syscall_args {
+       unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+                        { .args = { UPT_SYSCALL_ARG1(r),        \
+                                    UPT_SYSCALL_ARG2(r),        \
+                                    UPT_SYSCALL_ARG3(r),        \
+                                    UPT_SYSCALL_ARG4(r),        \
+                                    UPT_SYSCALL_ARG5(r),        \
+                                    UPT_SYSCALL_ARG6(r) } } )
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
+
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+
+static inline void arch_init_registers(int pid)
+{
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h
new file mode 100644 (file)
index 0000000..16cd6b5
--- /dev/null
@@ -0,0 +1,27 @@
+#include <generated/user_constants.h>
+
+#define PT_OFFSET(r) ((r) * sizeof(long))
+
+#define PT_SYSCALL_NR(regs) ((regs)[HOST_ORIG_AX])
+#define PT_SYSCALL_NR_OFFSET PT_OFFSET(HOST_ORIG_AX)
+
+#define PT_SYSCALL_RET_OFFSET PT_OFFSET(HOST_AX)
+
+#define REGS_IP_INDEX HOST_IP
+#define REGS_SP_INDEX HOST_SP
+
+#ifdef __i386__
+#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
+#else
+#define FP_SIZE HOST_FP_SIZE
+
+/*
+ * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
+ * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
+ * 2.4 name and value for 2.4 host compatibility.
+ */
+#ifndef PTRACE_OLDSETOPTIONS
+#define PTRACE_OLDSETOPTIONS 21
+#endif
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/skas_ptrace.h b/arch/x86/um/shared/sysdep/skas_ptrace.h
new file mode 100644 (file)
index 0000000..453febe
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_SKAS_PTRACE_H
+#define __SYSDEP_X86_SKAS_PTRACE_H
+
+struct ptrace_faultinfo {
+        int is_write;
+        unsigned long addr;
+};
+
+struct ptrace_ldt {
+        int func;
+        void *ptr;
+        unsigned long bytecount;
+};
+
+#define PTRACE_LDT 54
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
new file mode 100644 (file)
index 0000000..bd161e3
--- /dev/null
@@ -0,0 +1,14 @@
+#include <asm/unistd.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include "as-layout.h"
+#include "stub-data.h"
+
+#ifdef __i386__
+#include "stub_32.h"
+#else
+#include "stub_64.h"
+#endif
+
+extern void stub_segv_handler(int, siginfo_t *, void *);
+extern void stub_clone_handler(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
new file mode 100644 (file)
index 0000000..51fd256
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_STUB_H
+#define __SYSDEP_STUB_H
+
+#include <asm/ptrace.h>
+
+#define STUB_SYSCALL_RET EAX
+#define STUB_MMAP_NR __NR_mmap2
+#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
+
+static inline long stub_syscall0(long syscall)
+{
+       long ret;
+
+       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall));
+
+       return ret;
+}
+
+static inline long stub_syscall1(long syscall, long arg1)
+{
+       long ret;
+
+       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1));
+
+       return ret;
+}
+
+static inline long stub_syscall2(long syscall, long arg1, long arg2)
+{
+       long ret;
+
+       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+                       "c" (arg2));
+
+       return ret;
+}
+
+static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+{
+       long ret;
+
+       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+                       "c" (arg2), "d" (arg3));
+
+       return ret;
+}
+
+static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+                                long arg4)
+{
+       long ret;
+
+       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+                       "c" (arg2), "d" (arg3), "S" (arg4));
+
+       return ret;
+}
+
+static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
+                                long arg4, long arg5)
+{
+       long ret;
+
+       __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
+                       "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5));
+
+       return ret;
+}
+
+static inline void trap_myself(void)
+{
+       __asm("int3");
+}
+
+static inline void remap_stack(int fd, unsigned long offset)
+{
+       __asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;"
+                         "movl %7, %%ebx ; movl %%eax, (%%ebx)"
+                         : : "g" (STUB_MMAP_NR), "b" (STUB_DATA),
+                           "c" (UM_KERN_PAGE_SIZE),
+                           "d" (PROT_READ | PROT_WRITE),
+                           "S" (MAP_FIXED | MAP_SHARED), "D" (fd),
+                           "a" (offset),
+                           "i" (&((struct stub_data *) STUB_DATA)->err)
+                         : "memory");
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
new file mode 100644 (file)
index 0000000..994df93
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_STUB_H
+#define __SYSDEP_STUB_H
+
+#include <sysdep/ptrace_user.h>
+
+#define STUB_SYSCALL_RET PT_INDEX(RAX)
+#define STUB_MMAP_NR __NR_mmap
+#define MMAP_OFFSET(o) (o)
+
+#define __syscall_clobber "r11","rcx","memory"
+#define __syscall "syscall"
+
+static inline long stub_syscall0(long syscall)
+{
+       long ret;
+
+       __asm__ volatile (__syscall
+               : "=a" (ret)
+               : "0" (syscall) : __syscall_clobber );
+
+       return ret;
+}
+
+static inline long stub_syscall2(long syscall, long arg1, long arg2)
+{
+       long ret;
+
+       __asm__ volatile (__syscall
+               : "=a" (ret)
+               : "0" (syscall), "D" (arg1), "S" (arg2) : __syscall_clobber );
+
+       return ret;
+}
+
+static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+{
+       long ret;
+
+       __asm__ volatile (__syscall
+               : "=a" (ret)
+               : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3)
+               : __syscall_clobber );
+
+       return ret;
+}
+
+static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+                                long arg4)
+{
+       long ret;
+
+       __asm__ volatile ("movq %5,%%r10 ; " __syscall
+               : "=a" (ret)
+               : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+                 "g" (arg4)
+               : __syscall_clobber, "r10" );
+
+       return ret;
+}
+
+static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
+                                long arg4, long arg5)
+{
+       long ret;
+
+       __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall
+               : "=a" (ret)
+               : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+                 "g" (arg4), "g" (arg5)
+               : __syscall_clobber, "r10", "r8" );
+
+       return ret;
+}
+
+static inline void trap_myself(void)
+{
+       __asm("int3");
+}
+
+static inline void remap_stack(long fd, unsigned long offset)
+{
+       __asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; "
+                         "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; "
+                         "movq %%rax, (%%rbx)":
+                         : "a" (STUB_MMAP_NR), "D" (STUB_DATA),
+                           "S" (UM_KERN_PAGE_SIZE),
+                           "d" (PROT_READ | PROT_WRITE),
+                            "g" (MAP_FIXED | MAP_SHARED), "g" (fd),
+                           "g" (offset),
+                           "i" (&((struct stub_data *) STUB_DATA)->err)
+                         : __syscall_clobber, "r10", "r8", "r9" );
+}
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
new file mode 100644 (file)
index 0000000..bd9a89b
--- /dev/null
@@ -0,0 +1,5 @@
+#ifdef __i386__
+#include "syscalls_32.h"
+#else
+#include "syscalls_64.h"
+#endif
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
new file mode 100644 (file)
index 0000000..05cb796
--- /dev/null
@@ -0,0 +1,20 @@
+/* 
+ * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "asm/unistd.h"
+#include "sysdep/ptrace.h"
+
+typedef long syscall_handler_t(struct pt_regs);
+
+/* Not declared on x86, incompatible declarations on x86_64, so these have
+ * to go here rather than in sys_call_table.c
+ */
+extern syscall_handler_t sys_rt_sigaction;
+
+extern syscall_handler_t *sys_call_table[];
+
+#define EXECUTE_SYSCALL(syscall, regs) \
+       ((long (*)(struct syscall_args)) \
+        (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h
new file mode 100644 (file)
index 0000000..8a7d5e1
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_X86_64_SYSCALLS_H__
+#define __SYSDEP_X86_64_SYSCALLS_H__
+
+#include <linux/msg.h>
+#include <linux/shm.h>
+
+typedef long syscall_handler_t(void);
+
+extern syscall_handler_t *sys_call_table[];
+
+#define EXECUTE_SYSCALL(syscall, regs) \
+       (((long (*)(long, long, long, long, long, long)) \
+         (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
+                                     UPT_SYSCALL_ARG2(&regs->regs), \
+                                     UPT_SYSCALL_ARG3(&regs->regs), \
+                                     UPT_SYSCALL_ARG4(&regs->regs), \
+                                     UPT_SYSCALL_ARG5(&regs->regs), \
+                                     UPT_SYSCALL_ARG6(&regs->regs)))
+
+extern long old_mmap(unsigned long addr, unsigned long len,
+                    unsigned long prot, unsigned long flags,
+                    unsigned long fd, unsigned long pgoff);
+extern syscall_handler_t sys_modify_ldt;
+extern syscall_handler_t sys_arch_prctl;
+
+#endif
diff --git a/arch/x86/um/shared/sysdep/tls.h b/arch/x86/um/shared/sysdep/tls.h
new file mode 100644 (file)
index 0000000..27cce00
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef _SYSDEP_TLS_H
+#define _SYSDEP_TLS_H
+
+# ifndef __KERNEL__
+
+/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
+ * may be named user_desc (but in 2.4 and in header matching its API was named
+ * modify_ldt_ldt_s). */
+
+typedef struct um_dup_user_desc {
+       unsigned int  entry_number;
+       unsigned int  base_addr;
+       unsigned int  limit;
+       unsigned int  seg_32bit:1;
+       unsigned int  contents:2;
+       unsigned int  read_exec_only:1;
+       unsigned int  limit_in_pages:1;
+       unsigned int  seg_not_present:1;
+       unsigned int  useable:1;
+#ifdef __x86_64__
+       unsigned int  lm:1;
+#endif
+} user_desc_t;
+
+# else /* __KERNEL__ */
+
+typedef struct user_desc user_desc_t;
+
+# endif /* __KERNEL__ */
+
+extern int os_set_thread_area(user_desc_t *info, int pid);
+extern int os_get_thread_area(user_desc_t *info, int pid);
+
+#ifdef __i386__
+#define GDT_ENTRY_TLS_MIN_I386 6
+#define GDT_ENTRY_TLS_MIN_X86_64 12
+#endif
+
+#endif /* _SYSDEP_TLS_H */
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
new file mode 100644 (file)
index 0000000..4883b95
--- /dev/null
@@ -0,0 +1,624 @@
+/*
+ * Copyright (C) 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
+#include "frame_kern.h"
+#include "skas.h"
+
+#ifdef CONFIG_X86_32
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+       unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+       /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+       tmp = ~twd;
+       tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+       /* and move the valid bits to the lower byte. */
+       tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+       tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+       tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+       return tmp;
+}
+
+static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
+{
+       struct _fpxreg *st = NULL;
+       unsigned long twd = (unsigned long) fxsave->twd;
+       unsigned long tag;
+       unsigned long ret = 0xffff0000;
+       int i;
+
+#define FPREG_ADDR(f, n)       ((char *)&(f)->st_space + (n) * 16)
+
+       for (i = 0; i < 8; i++) {
+               if (twd & 0x1) {
+                       st = (struct _fpxreg *) FPREG_ADDR(fxsave, i);
+
+                       switch (st->exponent & 0x7fff) {
+                       case 0x7fff:
+                               tag = 2;                /* Special */
+                               break;
+                       case 0x0000:
+                               if ( !st->significand[0] &&
+                                    !st->significand[1] &&
+                                    !st->significand[2] &&
+                                    !st->significand[3] ) {
+                                       tag = 1;        /* Zero */
+                               } else {
+                                       tag = 2;        /* Special */
+                               }
+                               break;
+                       default:
+                               if (st->significand[3] & 0x8000) {
+                                       tag = 0;        /* Valid */
+                               } else {
+                                       tag = 2;        /* Special */
+                               }
+                               break;
+                       }
+               } else {
+                       tag = 3;                        /* Empty */
+               }
+               ret |= (tag << (2 * i));
+               twd = twd >> 1;
+       }
+       return ret;
+}
+
+static int convert_fxsr_to_user(struct _fpstate __user *buf,
+                               struct user_fxsr_struct *fxsave)
+{
+       unsigned long env[7];
+       struct _fpreg __user *to;
+       struct _fpxreg *from;
+       int i;
+
+       env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
+       env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
+       env[2] = twd_fxsr_to_i387(fxsave);
+       env[3] = fxsave->fip;
+       env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
+       env[5] = fxsave->foo;
+       env[6] = fxsave->fos;
+
+       if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
+               return 1;
+
+       to = &buf->_st[0];
+       from = (struct _fpxreg *) &fxsave->st_space[0];
+       for (i = 0; i < 8; i++, to++, from++) {
+               unsigned long __user *t = (unsigned long __user *)to;
+               unsigned long *f = (unsigned long *)from;
+
+               if (__put_user(*f, t) ||
+                               __put_user(*(f + 1), t + 1) ||
+                               __put_user(from->exponent, &to->exponent))
+                       return 1;
+       }
+       return 0;
+}
+
+static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave,
+                                 struct _fpstate __user *buf)
+{
+       unsigned long env[7];
+       struct _fpxreg *to;
+       struct _fpreg __user *from;
+       int i;
+
+       if (copy_from_user( env, buf, 7 * sizeof(long)))
+               return 1;
+
+       fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+       fxsave->swd = (unsigned short)(env[1] & 0xffff);
+       fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+       fxsave->fip = env[3];
+       fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
+       fxsave->fcs = (env[4] & 0xffff);
+       fxsave->foo = env[5];
+       fxsave->fos = env[6];
+
+       to = (struct _fpxreg *) &fxsave->st_space[0];
+       from = &buf->_st[0];
+       for (i = 0; i < 8; i++, to++, from++) {
+               unsigned long *t = (unsigned long *)to;
+               unsigned long __user *f = (unsigned long __user *)from;
+
+               if (__get_user(*t, f) ||
+                   __get_user(*(t + 1), f + 1) ||
+                   __get_user(to->exponent, &from->exponent))
+                       return 1;
+       }
+       return 0;
+}
+
+extern int have_fpx_regs;
+
+#endif
+
+static int copy_sc_from_user(struct pt_regs *regs,
+                            struct sigcontext __user *from)
+{
+       struct sigcontext sc;
+       int err, pid;
+
+       err = copy_from_user(&sc, from, sizeof(sc));
+       if (err)
+               return err;
+
+#define GETREG(regno, regname) regs->regs.gp[HOST_##regno] = sc.regname
+
+#ifdef CONFIG_X86_32
+       GETREG(GS, gs);
+       GETREG(FS, fs);
+       GETREG(ES, es);
+       GETREG(DS, ds);
+#endif
+       GETREG(DI, di);
+       GETREG(SI, si);
+       GETREG(BP, bp);
+       GETREG(SP, sp);
+       GETREG(BX, bx);
+       GETREG(DX, dx);
+       GETREG(CX, cx);
+       GETREG(AX, ax);
+       GETREG(IP, ip);
+
+#ifdef CONFIG_X86_64
+       GETREG(R8, r8);
+       GETREG(R9, r9);
+       GETREG(R10, r10);
+       GETREG(R11, r11);
+       GETREG(R12, r12);
+       GETREG(R13, r13);
+       GETREG(R14, r14);
+       GETREG(R15, r15);
+#endif
+
+       GETREG(CS, cs);
+       GETREG(EFLAGS, flags);
+#ifdef CONFIG_X86_32
+       GETREG(SS, ss);
+#endif
+
+#undef GETREG
+
+       pid = userspace_pid[current_thread_info()->cpu];
+#ifdef CONFIG_X86_32
+       if (have_fpx_regs) {
+               struct user_fxsr_struct fpx;
+
+               err = copy_from_user(&fpx,
+                       &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
+                                    sizeof(struct user_fxsr_struct));
+               if (err)
+                       return 1;
+
+               err = convert_fxsr_from_user(&fpx, sc.fpstate);
+               if (err)
+                       return 1;
+
+               err = restore_fpx_registers(pid, (unsigned long *) &fpx);
+               if (err < 0) {
+                       printk(KERN_ERR "copy_sc_from_user - "
+                              "restore_fpx_registers failed, errno = %d\n",
+                              -err);
+                       return 1;
+               }
+       } else
+#endif
+       {
+               struct user_i387_struct fp;
+
+               err = copy_from_user(&fp, sc.fpstate,
+                                    sizeof(struct user_i387_struct));
+               if (err)
+                       return 1;
+
+               err = restore_fp_registers(pid, (unsigned long *) &fp);
+               if (err < 0) {
+                       printk(KERN_ERR "copy_sc_from_user - "
+                              "restore_fp_registers failed, errno = %d\n",
+                              -err);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static int copy_sc_to_user(struct sigcontext __user *to,
+                          struct _fpstate __user *to_fp, struct pt_regs *regs,
+                          unsigned long mask)
+{
+       struct sigcontext sc;
+       struct faultinfo * fi = &current->thread.arch.faultinfo;
+       int err, pid;
+       memset(&sc, 0, sizeof(struct sigcontext));
+
+#define PUTREG(regno, regname) sc.regname = regs->regs.gp[HOST_##regno]
+
+#ifdef CONFIG_X86_32
+       PUTREG(GS, gs);
+       PUTREG(FS, fs);
+       PUTREG(ES, es);
+       PUTREG(DS, ds);
+#endif
+       PUTREG(DI, di);
+       PUTREG(SI, si);
+       PUTREG(BP, bp);
+       PUTREG(SP, sp);
+       PUTREG(BX, bx);
+       PUTREG(DX, dx);
+       PUTREG(CX, cx);
+       PUTREG(AX, ax);
+#ifdef CONFIG_X86_64
+       PUTREG(R8, r8);
+       PUTREG(R9, r9);
+       PUTREG(R10, r10);
+       PUTREG(R11, r11);
+       PUTREG(R12, r12);
+       PUTREG(R13, r13);
+       PUTREG(R14, r14);
+       PUTREG(R15, r15);
+#endif
+
+       sc.cr2 = fi->cr2;
+       sc.err = fi->error_code;
+       sc.trapno = fi->trap_no;
+       PUTREG(IP, ip);
+       PUTREG(CS, cs);
+       PUTREG(EFLAGS, flags);
+#ifdef CONFIG_X86_32
+       PUTREG(SP, sp_at_signal);
+       PUTREG(SS, ss);
+#endif
+#undef PUTREG
+       sc.oldmask = mask;
+       sc.fpstate = to_fp;
+
+       err = copy_to_user(to, &sc, sizeof(struct sigcontext));
+       if (err)
+               return 1;
+
+       pid = userspace_pid[current_thread_info()->cpu];
+
+#ifdef CONFIG_X86_32
+       if (have_fpx_regs) {
+               struct user_fxsr_struct fpx;
+
+               err = save_fpx_registers(pid, (unsigned long *) &fpx);
+               if (err < 0){
+                       printk(KERN_ERR "copy_sc_to_user - save_fpx_registers "
+                              "failed, errno = %d\n", err);
+                       return 1;
+               }
+
+               err = convert_fxsr_to_user(to_fp, &fpx);
+               if (err)
+                       return 1;
+
+               err |= __put_user(fpx.swd, &to_fp->status);
+               err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic);
+               if (err)
+                       return 1;
+
+               if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
+                                sizeof(struct user_fxsr_struct)))
+                       return 1;
+       } else
+#endif
+       {
+               struct user_i387_struct fp;
+
+               err = save_fp_registers(pid, (unsigned long *) &fp);
+               if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
+                       return 1;
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_X86_32
+static int copy_ucontext_to_user(struct ucontext __user *uc,
+                                struct _fpstate __user *fp, sigset_t *set,
+                                unsigned long sp)
+{
+       int err = 0;
+
+       err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
+       err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
+       err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
+       err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
+       err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
+       return err;
+}
+
+struct sigframe
+{
+       char __user *pretcode;
+       int sig;
+       struct sigcontext sc;
+       struct _fpstate fpstate;
+       unsigned long extramask[_NSIG_WORDS-1];
+       char retcode[8];
+};
+
+struct rt_sigframe
+{
+       char __user *pretcode;
+       int sig;
+       struct siginfo __user *pinfo;
+       void __user *puc;
+       struct siginfo info;
+       struct ucontext uc;
+       struct _fpstate fpstate;
+       char retcode[8];
+};
+
+int setup_signal_stack_sc(unsigned long stack_top, int sig,
+                         struct k_sigaction *ka, struct pt_regs *regs,
+                         sigset_t *mask)
+{
+       struct sigframe __user *frame;
+       void __user *restorer;
+       int err = 0;
+
+       /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
+       stack_top = ((stack_top + 4) & -16UL) - 4;
+       frame = (struct sigframe __user *) stack_top - 1;
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               return 1;
+
+       restorer = frame->retcode;
+       if (ka->sa.sa_flags & SA_RESTORER)
+               restorer = ka->sa.sa_restorer;
+
+       err |= __put_user(restorer, &frame->pretcode);
+       err |= __put_user(sig, &frame->sig);
+       err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]);
+       if (_NSIG_WORDS > 1)
+               err |= __copy_to_user(&frame->extramask, &mask->sig[1],
+                                     sizeof(frame->extramask));
+
+       /*
+        * This is popl %eax ; movl $,%eax ; int $0x80
+        *
+        * WE DO NOT USE IT ANY MORE! It's only left here for historical
+        * reasons and because gdb uses it as a signature to notice
+        * signal handler stack frames.
+        */
+       err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
+       err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
+       err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+
+       if (err)
+               return err;
+
+       PT_REGS_SP(regs) = (unsigned long) frame;
+       PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
+       PT_REGS_EAX(regs) = (unsigned long) sig;
+       PT_REGS_EDX(regs) = (unsigned long) 0;
+       PT_REGS_ECX(regs) = (unsigned long) 0;
+
+       if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+               ptrace_notify(SIGTRAP);
+       return 0;
+}
+
+int setup_signal_stack_si(unsigned long stack_top, int sig,
+                         struct k_sigaction *ka, struct pt_regs *regs,
+                         siginfo_t *info, sigset_t *mask)
+{
+       struct rt_sigframe __user *frame;
+       void __user *restorer;
+       int err = 0;
+
+       stack_top &= -8UL;
+       frame = (struct rt_sigframe __user *) stack_top - 1;
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               return 1;
+
+       restorer = frame->retcode;
+       if (ka->sa.sa_flags & SA_RESTORER)
+               restorer = ka->sa.sa_restorer;
+
+       err |= __put_user(restorer, &frame->pretcode);
+       err |= __put_user(sig, &frame->sig);
+       err |= __put_user(&frame->info, &frame->pinfo);
+       err |= __put_user(&frame->uc, &frame->puc);
+       err |= copy_siginfo_to_user(&frame->info, info);
+       err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
+                                       PT_REGS_SP(regs));
+
+       /*
+        * This is movl $,%eax ; int $0x80
+        *
+        * WE DO NOT USE IT ANY MORE! It's only left here for historical
+        * reasons and because gdb uses it as a signature to notice
+        * signal handler stack frames.
+        */
+       err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
+       err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
+       err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+
+       if (err)
+               return err;
+
+       PT_REGS_SP(regs) = (unsigned long) frame;
+       PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
+       PT_REGS_EAX(regs) = (unsigned long) sig;
+       PT_REGS_EDX(regs) = (unsigned long) &frame->info;
+       PT_REGS_ECX(regs) = (unsigned long) &frame->uc;
+
+       if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+               ptrace_notify(SIGTRAP);
+       return 0;
+}
+
+long sys_sigreturn(struct pt_regs *regs)
+{
+       unsigned long sp = PT_REGS_SP(&current->thread.regs);
+       struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
+       sigset_t set;
+       struct sigcontext __user *sc = &frame->sc;
+       unsigned long __user *oldmask = &sc->oldmask;
+       unsigned long __user *extramask = frame->extramask;
+       int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
+
+       if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
+           copy_from_user(&set.sig[1], extramask, sig_size))
+               goto segfault;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       set_current_blocked(&set);
+
+       if (copy_sc_from_user(&current->thread.regs, sc))
+               goto segfault;
+
+       /* Avoid ERESTART handling */
+       PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
+       return PT_REGS_SYSCALL_RET(&current->thread.regs);
+
+ segfault:
+       force_sig(SIGSEGV, current);
+       return 0;
+}
+
+#else
+
+struct rt_sigframe
+{
+       char __user *pretcode;
+       struct ucontext uc;
+       struct siginfo info;
+       struct _fpstate fpstate;
+};
+
+int setup_signal_stack_si(unsigned long stack_top, int sig,
+                         struct k_sigaction *ka, struct pt_regs * regs,
+                         siginfo_t *info, sigset_t *set)
+{
+       struct rt_sigframe __user *frame;
+       int err = 0;
+       struct task_struct *me = current;
+
+       frame = (struct rt_sigframe __user *)
+               round_down(stack_top - sizeof(struct rt_sigframe), 16);
+       /* Subtract 128 for a red zone and 8 for proper alignment */
+       frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
+
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto out;
+
+       if (ka->sa.sa_flags & SA_SIGINFO) {
+               err |= copy_siginfo_to_user(&frame->info, info);
+               if (err)
+                       goto out;
+       }
+
+       /* Create the ucontext.  */
+       err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, &frame->uc.uc_link);
+       err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+       err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
+                         &frame->uc.uc_stack.ss_flags);
+       err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+       err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
+                              set->sig[0]);
+       err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
+       if (sizeof(*set) == 16) {
+               __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
+               __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+       }
+       else
+               err |= __copy_to_user(&frame->uc.uc_sigmask, set,
+                                     sizeof(*set));
+
+       /*
+        * Set up to return from userspace.  If provided, use a stub
+        * already in userspace.
+        */
+       /* x86-64 should always use SA_RESTORER. */
+       if (ka->sa.sa_flags & SA_RESTORER)
+               err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+       else
+               /* could use a vstub here */
+               return err;
+
+       if (err)
+               return err;
+
+       /* Set up registers for signal handler */
+       {
+               struct exec_domain *ed = current_thread_info()->exec_domain;
+               if (unlikely(ed && ed->signal_invmap && sig < 32))
+                       sig = ed->signal_invmap[sig];
+       }
+
+       PT_REGS_SP(regs) = (unsigned long) frame;
+       PT_REGS_RDI(regs) = sig;
+       /* In case the signal handler was declared without prototypes */
+       PT_REGS_RAX(regs) = 0;
+
+       /*
+        * This also works for non SA_SIGINFO handlers because they expect the
+        * next argument after the signal number on the stack.
+        */
+       PT_REGS_RSI(regs) = (unsigned long) &frame->info;
+       PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
+       PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
+ out:
+       return err;
+}
+#endif
+
+long sys_rt_sigreturn(struct pt_regs *regs)
+{
+       unsigned long sp = PT_REGS_SP(&current->thread.regs);
+       struct rt_sigframe __user *frame =
+               (struct rt_sigframe __user *)(sp - sizeof(long));
+       struct ucontext __user *uc = &frame->uc;
+       sigset_t set;
+
+       if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+               goto segfault;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       set_current_blocked(&set);
+
+       if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
+               goto segfault;
+
+       /* Avoid ERESTART handling */
+       PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
+       return PT_REGS_SYSCALL_RET(&current->thread.regs);
+
+ segfault:
+       force_sig(SIGSEGV, current);
+       return 0;
+}
+
+#ifdef CONFIG_X86_32
+long ptregs_sigreturn(void)
+{
+       return sys_sigreturn(NULL);
+}
+long ptregs_rt_sigreturn(void)
+{
+       return sys_rt_sigreturn(NULL);
+}
+#endif
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
new file mode 100644 (file)
index 0000000..54a36ec
--- /dev/null
@@ -0,0 +1,51 @@
+#include "as-layout.h"
+
+       .globl syscall_stub
+.section .__syscall_stub, "ax"
+
+       .globl batch_syscall_stub
+batch_syscall_stub:
+       /* load pointer to first operation */
+       mov     $(STUB_DATA+8), %esp
+
+again:
+       /* load length of additional data */
+       mov     0x0(%esp), %eax
+
+       /* if(length == 0) : end of list */
+       /* write possible 0 to header */
+       mov     %eax, STUB_DATA+4
+       cmpl    $0, %eax
+       jz      done
+
+       /* save current pointer */
+       mov     %esp, STUB_DATA+4
+
+       /* skip additional data */
+       add     %eax, %esp
+
+       /* load syscall-# */
+       pop     %eax
+
+       /* load syscall params */
+       pop     %ebx
+       pop     %ecx
+       pop     %edx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
+
+       /* execute syscall */
+       int     $0x80
+
+       /* check return value */
+       pop     %ebx
+       cmp     %ebx, %eax
+       je      again
+
+done:
+       /* save return value */
+       mov     %eax, STUB_DATA
+
+       /* stop */
+       int3
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
new file mode 100644 (file)
index 0000000..20e4a96
--- /dev/null
@@ -0,0 +1,66 @@
+#include "as-layout.h"
+
+       .globl syscall_stub
+.section .__syscall_stub, "ax"
+syscall_stub:
+       syscall
+       /* We don't have 64-bit constants, so this constructs the address
+        * we need.
+        */
+       movq    $(STUB_DATA >> 32), %rbx
+       salq    $32, %rbx
+       movq    $(STUB_DATA & 0xffffffff), %rcx
+       or      %rcx, %rbx
+       movq    %rax, (%rbx)
+       int3
+
+       .globl batch_syscall_stub
+batch_syscall_stub:
+       mov     $(STUB_DATA >> 32), %rbx
+       sal     $32, %rbx
+       mov     $(STUB_DATA & 0xffffffff), %rax
+       or      %rax, %rbx
+       /* load pointer to first operation */
+       mov     %rbx, %rsp
+       add     $0x10, %rsp
+again:
+       /* load length of additional data */
+       mov     0x0(%rsp), %rax
+
+       /* if(length == 0) : end of list */
+       /* write possible 0 to header */
+       mov     %rax, 8(%rbx)
+       cmp     $0, %rax
+       jz      done
+
+       /* save current pointer */
+       mov     %rsp, 8(%rbx)
+
+       /* skip additional data */
+       add     %rax, %rsp
+
+       /* load syscall-# */
+       pop     %rax
+
+       /* load syscall params */
+       pop     %rdi
+       pop     %rsi
+       pop     %rdx
+       pop     %r10
+       pop     %r8
+       pop     %r9
+
+       /* execute syscall */
+       syscall
+
+       /* check return value */
+       pop     %rcx
+       cmp     %rcx, %rax
+       je      again
+
+done:
+       /* save return value */
+       mov     %rax, (%rbx)
+
+       /* stop */
+       int3
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
new file mode 100644 (file)
index 0000000..b7450bd
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include "sysdep/stub.h"
+#include "sysdep/faultinfo.h"
+#include "sysdep/mcontext.h"
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_segv_handler(int sig, siginfo_t *info, void *p)
+{
+       struct ucontext *uc = p;
+
+       GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
+                             &uc->uc_mcontext);
+       trap_myself();
+}
+
diff --git a/arch/x86/um/sys_call_table_32.S b/arch/x86/um/sys_call_table_32.S
new file mode 100644 (file)
index 0000000..a7ca80d
--- /dev/null
@@ -0,0 +1,26 @@
+#include <linux/linkage.h>
+/* Steal i386 syscall table for our purposes, but with some slight changes.*/
+
+#define sys_iopl sys_ni_syscall
+#define sys_ioperm sys_ni_syscall
+
+#define sys_vm86old sys_ni_syscall
+#define sys_vm86 sys_ni_syscall
+
+#define old_mmap sys_old_mmap
+
+#define ptregs_fork sys_fork
+#define ptregs_execve sys_execve
+#define ptregs_iopl sys_iopl
+#define ptregs_vm86old sys_vm86old
+#define ptregs_clone sys_clone
+#define ptregs_vm86 sys_vm86
+#define ptregs_sigaltstack sys_sigaltstack
+#define ptregs_vfork sys_vfork
+
+.section .rodata,"a"
+
+#include "../kernel/syscall_table_32.S"
+
+ENTRY(syscall_table_size)
+.long .-sys_call_table
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
new file mode 100644 (file)
index 0000000..99522f7
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c
+ * with some changes for UML.
+ */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+
+#define __NO_STUBS
+
+/*
+ * Below you can see, in terms of #define's, the differences between the x86-64
+ * and the UML syscall table.
+ */
+
+/* Not going to be implemented by UML, since we have no hardware. */
+#define stub_iopl sys_ni_syscall
+#define sys_ioperm sys_ni_syscall
+
+/*
+ * The UML TLS problem. Note that x86_64 does not implement this, so the below
+ * is needed only for the ia32 compatibility.
+ */
+
+/* On UML we call it this way ("old" means it's not mmap2) */
+#define sys_mmap old_mmap
+
+#define stub_clone sys_clone
+#define stub_fork sys_fork
+#define stub_vfork sys_vfork
+#define stub_execve sys_execve
+#define stub_rt_sigsuspend sys_rt_sigsuspend
+#define stub_sigaltstack sys_sigaltstack
+#define stub_rt_sigreturn sys_rt_sigreturn
+
+#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
+#undef _ASM_X86_UNISTD_64_H
+#include "../../x86/include/asm/unistd_64.h"
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym,
+#undef _ASM_X86_UNISTD_64_H
+
+typedef void (*sys_call_ptr_t)(void);
+
+extern void sys_ni_syscall(void);
+
+/*
+ * We used to have a trick here which made sure that holes in the
+ * x86_64 table were filled in with sys_ni_syscall, but a comment in
+ * unistd_64.h says that holes aren't allowed, so the trick was
+ * removed.
+ * The trick looked like this
+ *     [0 ... UM_NR_syscall_max] = &sys_ni_syscall
+ * before including unistd_64.h - the later initializations overwrote
+ * the sys_ni_syscall filler.
+ */
+
+sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
+#include <asm/unistd_64.h>
+};
+
+int syscall_table_size = sizeof(sys_call_table);
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644 (file)
index 0000000..70ca357
--- /dev/null
@@ -0,0 +1,66 @@
+/* 
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sched.h"
+#include "linux/shm.h"
+#include "linux/ipc.h"
+#include "linux/syscalls.h"
+#include "asm/mman.h"
+#include "asm/uaccess.h"
+#include "asm/unistd.h"
+
+/*
+ * The prototype on i386 is:
+ *
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *
+ * and the "newtls" arg. on i386 is read by copy_thread directly from the
+ * register saved on the stack.
+ */
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+              int __user *parent_tid, void *newtls, int __user *child_tid)
+{
+       long ret;
+
+       if (!newsp)
+               newsp = UPT_SP(&current->thread.regs.regs);
+
+       current->thread.forking = 1;
+       ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+                     child_tid);
+       current->thread.forking = 0;
+       return ret;
+}
+
+long sys_sigaction(int sig, const struct old_sigaction __user *act,
+                        struct old_sigaction __user *oact)
+{
+       struct k_sigaction new_ka, old_ka;
+       int ret;
+
+       if (act) {
+               old_sigset_t mask;
+               if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+                   __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+                   __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+                       return -EFAULT;
+               __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+               __get_user(mask, &act->sa_mask);
+               siginitset(&new_ka.sa.sa_mask, mask);
+       }
+
+       ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+       if (!ret && oact) {
+               if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+                   __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+                   __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+                       return -EFAULT;
+               __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+               __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+       }
+
+       return ret;
+}
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
new file mode 100644 (file)
index 0000000..f3d82bb
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include "linux/linkage.h"
+#include "linux/personality.h"
+#include "linux/utsname.h"
+#include "asm/prctl.h" /* XXX This should get the constants from libc */
+#include "asm/uaccess.h"
+#include "os.h"
+
+long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+{
+       unsigned long *ptr = addr, tmp;
+       long ret;
+       int pid = task->mm->context.id.u.pid;
+
+       /*
+        * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
+        * be safe), we need to call arch_prctl on the host because
+        * setting %fs may result in something else happening (like a
+        * GDT or thread.fs being set instead).  So, we let the host
+        * fiddle the registers and thread struct and restore the
+        * registers afterwards.
+        *
+        * So, the saved registers are stored to the process (this
+        * needed because a stub may have been the last thing to run),
+        * arch_prctl is run on the host, then the registers are read
+        * back.
+        */
+       switch (code) {
+       case ARCH_SET_FS:
+       case ARCH_SET_GS:
+               ret = restore_registers(pid, &current->thread.regs.regs);
+               if (ret)
+                       return ret;
+               break;
+       case ARCH_GET_FS:
+       case ARCH_GET_GS:
+               /*
+                * With these two, we read to a local pointer and
+                * put_user it to the userspace pointer that we were
+                * given.  If addr isn't valid (because it hasn't been
+                * faulted in or is just bogus), we want put_user to
+                * fault it in (or return -EFAULT) instead of having
+                * the host return -EFAULT.
+                */
+               ptr = &tmp;
+       }
+
+       ret = os_arch_prctl(pid, code, ptr);
+       if (ret)
+               return ret;
+
+       switch (code) {
+       case ARCH_SET_FS:
+               current->thread.arch.fs = (unsigned long) ptr;
+               ret = save_registers(pid, &current->thread.regs.regs);
+               break;
+       case ARCH_SET_GS:
+               ret = save_registers(pid, &current->thread.regs.regs);
+               break;
+       case ARCH_GET_FS:
+               ret = put_user(tmp, addr);
+               break;
+       case ARCH_GET_GS:
+               ret = put_user(tmp, addr);
+               break;
+       }
+
+       return ret;
+}
+
+long sys_arch_prctl(int code, unsigned long addr)
+{
+       return arch_prctl(current, code, (unsigned long __user *) addr);
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+              void __user *parent_tid, void __user *child_tid)
+{
+       long ret;
+
+       if (!newsp)
+               newsp = UPT_SP(&current->thread.regs.regs);
+       current->thread.forking = 1;
+       ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+                     child_tid);
+       current->thread.forking = 0;
+       return ret;
+}
+
+void arch_switch_to(struct task_struct *to)
+{
+       if ((to->thread.arch.fs == 0) || (to->mm == NULL))
+               return;
+
+       arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
+}
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
new file mode 100644 (file)
index 0000000..171b3e9
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/kernel.h"
+#include "linux/smp.h"
+#include "linux/sched.h"
+#include "linux/kallsyms.h"
+#include "asm/ptrace.h"
+#include "sysrq.h"
+
+/* This is declared by <linux/sched.h> */
+void show_regs(struct pt_regs *regs)
+{
+        printk("\n");
+        printk("EIP: %04lx:[<%08lx>] CPU: %d %s", 
+              0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs),
+              smp_processor_id(), print_tainted());
+        if (PT_REGS_CS(regs) & 3)
+                printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs),
+                      PT_REGS_SP(regs));
+        printk(" EFLAGS: %08lx\n    %s\n", PT_REGS_EFLAGS(regs),
+              print_tainted());
+        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+                PT_REGS_EAX(regs), PT_REGS_EBX(regs), 
+              PT_REGS_ECX(regs), 
+              PT_REGS_EDX(regs));
+        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+              PT_REGS_ESI(regs), PT_REGS_EDI(regs), 
+              PT_REGS_EBP(regs));
+        printk(" DS: %04lx ES: %04lx\n",
+              0xffff & PT_REGS_DS(regs), 
+              0xffff & PT_REGS_ES(regs));
+
+        show_trace(NULL, (unsigned long *) &regs);
+}
+
+/* Copied from i386. */
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+       return  p > (void *)tinfo &&
+               p < (void *)tinfo + THREAD_SIZE - 3;
+}
+
+/* Adapted from i386 (we also print the address we read from). */
+static inline unsigned long print_context_stack(struct thread_info *tinfo,
+                               unsigned long *stack, unsigned long ebp)
+{
+       unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+       while (valid_stack_ptr(tinfo, (void *)ebp)) {
+               addr = *(unsigned long *)(ebp + 4);
+               printk("%08lx:  [<%08lx>]", ebp + 4, addr);
+               print_symbol(" %s", addr);
+               printk("\n");
+               ebp = *(unsigned long *)ebp;
+       }
+#else
+       while (valid_stack_ptr(tinfo, stack)) {
+               addr = *stack;
+               if (__kernel_text_address(addr)) {
+                       printk("%08lx:  [<%08lx>]", (unsigned long) stack, addr);
+                       print_symbol(" %s", addr);
+                       printk("\n");
+               }
+               stack++;
+       }
+#endif
+       return ebp;
+}
+
+void show_trace(struct task_struct* task, unsigned long * stack)
+{
+       unsigned long ebp;
+       struct thread_info *context;
+
+       /* Turn this into BUG_ON if possible. */
+       if (!stack) {
+               stack = (unsigned long*) &stack;
+               printk("show_trace: got NULL stack, implicit assumption task == current");
+               WARN_ON(1);
+       }
+
+       if (!task)
+               task = current;
+
+       if (task != current) {
+               ebp = (unsigned long) KSTK_EBP(task);
+       } else {
+               asm ("movl %%ebp, %0" : "=r" (ebp) : );
+       }
+
+       context = (struct thread_info *)
+               ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+       print_context_stack(context, stack, ebp);
+
+       printk("\n");
+}
+
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
new file mode 100644 (file)
index 0000000..e891343
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <asm/current.h>
+#include <asm/ptrace.h>
+#include "sysrq.h"
+
+void __show_regs(struct pt_regs *regs)
+{
+       printk("\n");
+       print_modules();
+       printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
+               current->comm, print_tainted(), init_utsname()->release);
+       printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
+              PT_REGS_RIP(regs));
+       printk(KERN_INFO "RSP: %016lx  EFLAGS: %08lx\n", PT_REGS_SP(regs),
+              PT_REGS_EFLAGS(regs));
+       printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
+              PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
+       printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
+              PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs));
+       printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
+              PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
+       printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+              PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
+       printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+              PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs));
+}
+
+void show_regs(struct pt_regs *regs)
+{
+       __show_regs(regs);
+       show_trace(current, (unsigned long *) &regs);
+}
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
new file mode 100644 (file)
index 0000000..c6c7131
--- /dev/null
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
+ * Licensed under the GPL
+ */
+
+#include "linux/percpu.h"
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+#include "os.h"
+#include "skas.h"
+#include "sysdep/tls.h"
+
+/*
+ * If needed we can detect when it's uninitialized.
+ *
+ * These are initialized in an initcall and unchanged thereafter.
+ */
+static int host_supports_tls = -1;
+int host_gdt_entry_tls_min;
+
+int do_set_thread_area(struct user_desc *info)
+{
+       int ret;
+       u32 cpu;
+
+       cpu = get_cpu();
+       ret = os_set_thread_area(info, userspace_pid[cpu]);
+       put_cpu();
+
+       if (ret)
+               printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
+                      "index = %d\n", ret, info->entry_number);
+
+       return ret;
+}
+
+int do_get_thread_area(struct user_desc *info)
+{
+       int ret;
+       u32 cpu;
+
+       cpu = get_cpu();
+       ret = os_get_thread_area(info, userspace_pid[cpu]);
+       put_cpu();
+
+       if (ret)
+               printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, "
+                      "index = %d\n", ret, info->entry_number);
+
+       return ret;
+}
+
+/*
+ * sys_get_thread_area: get a yet unused TLS descriptor index.
+ * XXX: Consider leaving one free slot for glibc usage at first place. This must
+ * be done here (and by changing GDT_ENTRY_TLS_* macros) and nowhere else.
+ *
+ * Also, this must be tested when compiling in SKAS mode with dynamic linking
+ * and running against NPTL.
+ */
+static int get_free_idx(struct task_struct* task)
+{
+       struct thread_struct *t = &task->thread;
+       int idx;
+
+       if (!t->arch.tls_array)
+               return GDT_ENTRY_TLS_MIN;
+
+       for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+               if (!t->arch.tls_array[idx].present)
+                       return idx + GDT_ENTRY_TLS_MIN;
+       return -ESRCH;
+}
+
+static inline void clear_user_desc(struct user_desc* info)
+{
+       /* Postcondition: LDT_empty(info) returns true. */
+       memset(info, 0, sizeof(*info));
+
+       /*
+        * Check the LDT_empty or the i386 sys_get_thread_area code - we obtain
+        * indeed an empty user_desc.
+        */
+       info->read_exec_only = 1;
+       info->seg_not_present = 1;
+}
+
+#define O_FORCE 1
+
+static int load_TLS(int flags, struct task_struct *to)
+{
+       int ret = 0;
+       int idx;
+
+       for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) {
+               struct uml_tls_struct* curr =
+                       &to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN];
+
+               /*
+                * Actually, now if it wasn't flushed it gets cleared and
+                * flushed to the host, which will clear it.
+                */
+               if (!curr->present) {
+                       if (!curr->flushed) {
+                               clear_user_desc(&curr->tls);
+                               curr->tls.entry_number = idx;
+                       } else {
+                               WARN_ON(!LDT_empty(&curr->tls));
+                               continue;
+                       }
+               }
+
+               if (!(flags & O_FORCE) && curr->flushed)
+                       continue;
+
+               ret = do_set_thread_area(&curr->tls);
+               if (ret)
+                       goto out;
+
+               curr->flushed = 1;
+       }
+out:
+       return ret;
+}
+
+/*
+ * Verify if we need to do a flush for the new process, i.e. if there are any
+ * present desc's, only if they haven't been flushed.
+ */
+static inline int needs_TLS_update(struct task_struct *task)
+{
+       int i;
+       int ret = 0;
+
+       for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
+               struct uml_tls_struct* curr =
+                       &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+
+               /*
+                * Can't test curr->present, we may need to clear a descriptor
+                * which had a value.
+                */
+               if (curr->flushed)
+                       continue;
+               ret = 1;
+               break;
+       }
+       return ret;
+}
+
+/*
+ * On a newly forked process, the TLS descriptors haven't yet been flushed. So
+ * we mark them as such and the first switch_to will do the job.
+ */
+void clear_flushed_tls(struct task_struct *task)
+{
+       int i;
+
+       for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
+               struct uml_tls_struct* curr =
+                       &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+
+               /*
+                * Still correct to do this, if it wasn't present on the host it
+                * will remain as flushed as it was.
+                */
+               if (!curr->present)
+                       continue;
+
+               curr->flushed = 0;
+       }
+}
+
+/*
+ * In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a
+ * common host process. So this is needed in SKAS0 too.
+ *
+ * However, if each thread had a different host process (and this was discussed
+ * for SMP support) this won't be needed.
+ *
+ * And this will not need be used when (and if) we'll add support to the host
+ * SKAS patch.
+ */
+
+int arch_switch_tls(struct task_struct *to)
+{
+       if (!host_supports_tls)
+               return 0;
+
+       /*
+        * We have no need whatsoever to switch TLS for kernel threads; beyond
+        * that, that would also result in us calling os_set_thread_area with
+        * userspace_pid[cpu] == 0, which gives an error.
+        */
+       if (likely(to->mm))
+               return load_TLS(O_FORCE, to);
+
+       return 0;
+}
+
+static int set_tls_entry(struct task_struct* task, struct user_desc *info,
+                        int idx, int flushed)
+{
+       struct thread_struct *t = &task->thread;
+
+       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+               return -EINVAL;
+
+       t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls = *info;
+       t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present = 1;
+       t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed = flushed;
+
+       return 0;
+}
+
+int arch_copy_tls(struct task_struct *new)
+{
+       struct user_desc info;
+       int idx, ret = -EFAULT;
+
+       if (copy_from_user(&info,
+                          (void __user *) UPT_ESI(&new->thread.regs.regs),
+                          sizeof(info)))
+               goto out;
+
+       ret = -EINVAL;
+       if (LDT_empty(&info))
+               goto out;
+
+       idx = info.entry_number;
+
+       ret = set_tls_entry(new, &info, idx, 0);
+out:
+       return ret;
+}
+
+/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */
+static int get_tls_entry(struct task_struct *task, struct user_desc *info,
+                        int idx)
+{
+       struct thread_struct *t = &task->thread;
+
+       if (!t->arch.tls_array)
+               goto clear;
+
+       if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+               return -EINVAL;
+
+       if (!t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present)
+               goto clear;
+
+       *info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls;
+
+out:
+       /*
+        * Temporary debugging check, to make sure that things have been
+        * flushed. This could be triggered if load_TLS() failed.
+        */
+       if (unlikely(task == current &&
+                    !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
+               printk(KERN_ERR "get_tls_entry: task with pid %d got here "
+                               "without flushed TLS.", current->pid);
+       }
+
+       return 0;
+clear:
+       /*
+        * When the TLS entry has not been set, the values read to user in the
+        * tls_array are 0 (because it's cleared at boot, see
+        * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that.
+        */
+       clear_user_desc(info);
+       info->entry_number = idx;
+       goto out;
+}
+
+int sys_set_thread_area(struct user_desc __user *user_desc)
+{
+       struct user_desc info;
+       int idx, ret;
+
+       if (!host_supports_tls)
+               return -ENOSYS;
+
+       if (copy_from_user(&info, user_desc, sizeof(info)))
+               return -EFAULT;
+
+       idx = info.entry_number;
+
+       if (idx == -1) {
+               idx = get_free_idx(current);
+               if (idx < 0)
+                       return idx;
+               info.entry_number = idx;
+               /* Tell the user which slot we chose for him.*/
+               if (put_user(idx, &user_desc->entry_number))
+                       return -EFAULT;
+       }
+
+       ret = do_set_thread_area(&info);
+       if (ret)
+               return ret;
+       return set_tls_entry(current, &info, idx, 1);
+}
+
+/*
+ * Perform set_thread_area on behalf of the traced child.
+ * Note: error handling is not done on the deferred load, and this differ from
+ * i386. However the only possible error are caused by bugs.
+ */
+int ptrace_set_thread_area(struct task_struct *child, int idx,
+                          struct user_desc __user *user_desc)
+{
+       struct user_desc info;
+
+       if (!host_supports_tls)
+               return -EIO;
+
+       if (copy_from_user(&info, user_desc, sizeof(info)))
+               return -EFAULT;
+
+       return set_tls_entry(child, &info, idx, 0);
+}
+
+int sys_get_thread_area(struct user_desc __user *user_desc)
+{
+       struct user_desc info;
+       int idx, ret;
+
+       if (!host_supports_tls)
+               return -ENOSYS;
+
+       if (get_user(idx, &user_desc->entry_number))
+               return -EFAULT;
+
+       ret = get_tls_entry(current, &info, idx);
+       if (ret < 0)
+               goto out;
+
+       if (copy_to_user(user_desc, &info, sizeof(info)))
+               ret = -EFAULT;
+
+out:
+       return ret;
+}
+
+/*
+ * Perform get_thread_area on behalf of the traced child.
+ */
+int ptrace_get_thread_area(struct task_struct *child, int idx,
+               struct user_desc __user *user_desc)
+{
+       struct user_desc info;
+       int ret;
+
+       if (!host_supports_tls)
+               return -EIO;
+
+       ret = get_tls_entry(child, &info, idx);
+       if (ret < 0)
+               goto out;
+
+       if (copy_to_user(user_desc, &info, sizeof(info)))
+               ret = -EFAULT;
+out:
+       return ret;
+}
+
+/*
+ * This code is really i386-only, but it detects and logs x86_64 GDT indexes
+ * if a 32-bit UML is running on a 64-bit host.
+ */
+static int __init __setup_host_supports_tls(void)
+{
+       check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min);
+       if (host_supports_tls) {
+               printk(KERN_INFO "Host TLS support detected\n");
+               printk(KERN_INFO "Detected host type: ");
+               switch (host_gdt_entry_tls_min) {
+               case GDT_ENTRY_TLS_MIN_I386:
+                       printk(KERN_CONT "i386");
+                       break;
+               case GDT_ENTRY_TLS_MIN_X86_64:
+                       printk(KERN_CONT "x86_64");
+                       break;
+               }
+               printk(KERN_CONT " (GDT indexes %d to %d)\n",
+                      host_gdt_entry_tls_min,
+                      host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES);
+       } else
+               printk(KERN_ERR "  Host TLS support NOT detected! "
+                               "TLS support inside UML will not work\n");
+       return 0;
+}
+
+__initcall(__setup_host_supports_tls);
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
new file mode 100644 (file)
index 0000000..f7ba462
--- /dev/null
@@ -0,0 +1,17 @@
+#include "linux/sched.h"
+
+void clear_flushed_tls(struct task_struct *task)
+{
+}
+
+int arch_copy_tls(struct task_struct *t)
+{
+       /*
+        * If CLONE_SETTLS is set, we need to save the thread id
+        * (which is argument 5, child_tid, of clone) so it can be set
+        * during context switches.
+        */
+       t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)];
+
+       return 0;
+}
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
new file mode 100644 (file)
index 0000000..ca49be8
--- /dev/null
@@ -0,0 +1,80 @@
+#include <stdio.h>
+#include <stddef.h>
+#include <signal.h>
+#include <sys/poll.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
+#include <asm/types.h>
+
+#define DEFINE(sym, val) \
+       asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define DEFINE_LONGS(sym, val) \
+       asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+
+void foo(void)
+{
+#ifdef __i386__
+       DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
+       DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
+
+       DEFINE(HOST_IP, EIP);
+       DEFINE(HOST_SP, UESP);
+       DEFINE(HOST_EFLAGS, EFL);
+       DEFINE(HOST_AX, EAX);
+       DEFINE(HOST_BX, EBX);
+       DEFINE(HOST_CX, ECX);
+       DEFINE(HOST_DX, EDX);
+       DEFINE(HOST_SI, ESI);
+       DEFINE(HOST_DI, EDI);
+       DEFINE(HOST_BP, EBP);
+       DEFINE(HOST_CS, CS);
+       DEFINE(HOST_SS, SS);
+       DEFINE(HOST_DS, DS);
+       DEFINE(HOST_FS, FS);
+       DEFINE(HOST_ES, ES);
+       DEFINE(HOST_GS, GS);
+       DEFINE(HOST_ORIG_AX, ORIG_EAX);
+#else
+       DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
+       DEFINE_LONGS(HOST_BX, RBX);
+       DEFINE_LONGS(HOST_CX, RCX);
+       DEFINE_LONGS(HOST_DI, RDI);
+       DEFINE_LONGS(HOST_SI, RSI);
+       DEFINE_LONGS(HOST_DX, RDX);
+       DEFINE_LONGS(HOST_BP, RBP);
+       DEFINE_LONGS(HOST_AX, RAX);
+       DEFINE_LONGS(HOST_R8, R8);
+       DEFINE_LONGS(HOST_R9, R9);
+       DEFINE_LONGS(HOST_R10, R10);
+       DEFINE_LONGS(HOST_R11, R11);
+       DEFINE_LONGS(HOST_R12, R12);
+       DEFINE_LONGS(HOST_R13, R13);
+       DEFINE_LONGS(HOST_R14, R14);
+       DEFINE_LONGS(HOST_R15, R15);
+       DEFINE_LONGS(HOST_ORIG_AX, ORIG_RAX);
+       DEFINE_LONGS(HOST_CS, CS);
+       DEFINE_LONGS(HOST_SS, SS);
+       DEFINE_LONGS(HOST_EFLAGS, EFLAGS);
+#if 0
+       DEFINE_LONGS(HOST_FS, FS);
+       DEFINE_LONGS(HOST_GS, GS);
+       DEFINE_LONGS(HOST_DS, DS);
+       DEFINE_LONGS(HOST_ES, ES);
+#endif
+
+       DEFINE_LONGS(HOST_IP, RIP);
+       DEFINE_LONGS(HOST_SP, RSP);
+#endif
+
+       DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct));
+       DEFINE(UM_POLLIN, POLLIN);
+       DEFINE(UM_POLLPRI, POLLPRI);
+       DEFINE(UM_POLLOUT, POLLOUT);
+
+       DEFINE(UM_PROT_READ, PROT_READ);
+       DEFINE(UM_PROT_WRITE, PROT_WRITE);
+       DEFINE(UM_PROT_EXEC, PROT_EXEC);
+}
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
new file mode 100644 (file)
index 0000000..6c803ca
--- /dev/null
@@ -0,0 +1,90 @@
+#
+# Building vDSO images for x86.
+#
+
+VDSO64-y               := y
+
+vdso-install-$(VDSO64-y)       += vdso.so
+
+
+# files to link into the vdso
+vobjs-y := vdso-note.o um_vdso.o
+
+# files to link into kernel
+obj-$(VDSO64-y)                        += vdso.o vma.o
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
+
+export CPPFLAGS_vdso.lds += -P -C
+
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+       -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+
+$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
+
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+       $(call if_changed,vdso)
+
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+       $(call if_changed,objcopy)
+
+#
+# Don't omit frame pointers for ease of userspace debugging, but do
+# optimize sibling calls.
+#
+CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
+       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
+       -fno-omit-frame-pointer -foptimize-sibling-calls
+
+$(vobjs): KBUILD_CFLAGS += $(CFL)
+
+#
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+#
+CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
+CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
+
+targets += vdso-syms.lds
+obj-$(VDSO64-y)                        += vdso-syms.lds
+
+#
+# Match symbols in the DSO that look like VDSO*; produce a file of constants.
+#
+sed-vdsosym := -e 's/^00*/0/' \
+       -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+quiet_cmd_vdsosym = VDSOSYM $@
+define cmd_vdsosym
+       $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
+endef
+
+$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
+       $(call if_changed,vdsosym)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+                      $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+                      -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
+                sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+GCOV_PROFILE := n
+
+#
+# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+#
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
+       @mkdir -p $(MODLIB)/vdso
+       $(call cmd,vdso_install)
+
+PHONY += vdso_install $(vdso-install-y)
+vdso_install: $(vdso-install-y)
diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh
new file mode 100644 (file)
index 0000000..7ee90a9
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh
+nm="$1"
+file="$2"
+$nm "$file" | grep '^ *U' > /dev/null 2>&1
+if [ $? -eq 1 ]; then
+    exit 0
+else
+    echo "$file: undefined symbols found" >&2
+    exit 1
+fi
diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
new file mode 100644 (file)
index 0000000..7c441b5
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This vDSO turns all calls into a syscall so that UML can trap them.
+ */
+
+
+/* Disable profiling for userspace code */
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/time.h>
+#include <linux/getcpu.h>
+#include <asm/unistd.h>
+
+int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+       long ret;
+
+       asm("syscall" : "=a" (ret) :
+               "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+
+       return ret;
+}
+int clock_gettime(clockid_t, struct timespec *)
+       __attribute__((weak, alias("__vdso_clock_gettime")));
+
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       long ret;
+
+       asm("syscall" : "=a" (ret) :
+               "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+
+       return ret;
+}
+int gettimeofday(struct timeval *, struct timezone *)
+       __attribute__((weak, alias("__vdso_gettimeofday")));
+
+time_t __vdso_time(time_t *t)
+{
+       long secs;
+
+       asm volatile("syscall"
+               : "=a" (secs)
+               : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
+
+       return secs;
+}
+int time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+
+long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+{
+       /*
+        * UML does not support SMP, we can cheat here. :)
+        */
+
+       if (cpu)
+               *cpu = 0;
+       if (node)
+               *node = 0;
+
+       return 0;
+}
+
+long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+       __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86/um/vdso/vdso-layout.lds.S b/arch/x86/um/vdso/vdso-layout.lds.S
new file mode 100644 (file)
index 0000000..634a2cf
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+SECTIONS
+{
+       . = VDSO_PRELINK + SIZEOF_HEADERS;
+
+       .hash           : { *(.hash) }                  :text
+       .gnu.hash       : { *(.gnu.hash) }
+       .dynsym         : { *(.dynsym) }
+       .dynstr         : { *(.dynstr) }
+       .gnu.version    : { *(.gnu.version) }
+       .gnu.version_d  : { *(.gnu.version_d) }
+       .gnu.version_r  : { *(.gnu.version_r) }
+
+       .note           : { *(.note.*) }                :text   :note
+
+       .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
+       .eh_frame       : { KEEP (*(.eh_frame)) }       :text
+
+       .dynamic        : { *(.dynamic) }               :text   :dynamic
+
+       .rodata         : { *(.rodata*) }               :text
+       .data           : {
+             *(.data*)
+             *(.sdata*)
+             *(.got.plt) *(.got)
+             *(.gnu.linkonce.d.*)
+             *(.bss*)
+             *(.dynbss*)
+             *(.gnu.linkonce.b.*)
+       }
+
+       .altinstructions        : { *(.altinstructions) }
+       .altinstr_replacement   : { *(.altinstr_replacement) }
+
+       /*
+        * Align the actual code well away from the non-instruction data.
+        * This is the best thing for the I-cache.
+        */
+       . = ALIGN(0x100);
+
+       .text           : { *(.text*) }                 :text   =0x90909090
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME        0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+       text            PT_LOAD         FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+       dynamic         PT_DYNAMIC      FLAGS(4);               /* PF_R */
+       note            PT_NOTE         FLAGS(4);               /* PF_R */
+       eh_frame_hdr    PT_GNU_EH_FRAME;
+}
diff --git a/arch/x86/um/vdso/vdso-note.S b/arch/x86/um/vdso/vdso-note.S
new file mode 100644 (file)
index 0000000..79a071e
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
new file mode 100644 (file)
index 0000000..1cb468a
--- /dev/null
@@ -0,0 +1,10 @@
+#include <linux/init.h>
+
+__INITDATA
+
+       .globl vdso_start, vdso_end
+vdso_start:
+       .incbin "arch/x86/um/vdso/vdso.so"
+vdso_end:
+
+__FINIT
diff --git a/arch/x86/um/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S
new file mode 100644 (file)
index 0000000..b96b267
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.  We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ */
+
+#define VDSO_PRELINK 0xffffffffff700000
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+       LINUX_2.6 {
+       global:
+               clock_gettime;
+               __vdso_clock_gettime;
+               gettimeofday;
+               __vdso_gettimeofday;
+               getcpu;
+               __vdso_getcpu;
+               time;
+               __vdso_time;
+       local: *;
+       };
+}
+
+VDSO64_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
new file mode 100644 (file)
index 0000000..91f4ec9
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <linux/init.h>
+
+unsigned int __read_mostly vdso_enabled = 1;
+unsigned long um_vdso_addr;
+
+extern unsigned long task_size;
+extern char vdso_start[], vdso_end[];
+
+static struct page **vdsop;
+
+static int __init init_vdso(void)
+{
+       struct page *um_vdso;
+
+       BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
+
+       um_vdso_addr = task_size - PAGE_SIZE;
+
+       vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
+       if (!vdsop)
+               goto oom;
+
+       um_vdso = alloc_page(GFP_KERNEL);
+       if (!um_vdso) {
+               kfree(vdsop);
+
+               goto oom;
+       }
+
+       copy_page(page_address(um_vdso), vdso_start);
+       *vdsop = um_vdso;
+
+       return 0;
+
+oom:
+       printk(KERN_ERR "Cannot allocate vdso\n");
+       vdso_enabled = 0;
+
+       return -ENOMEM;
+}
+subsys_initcall(init_vdso);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+       int err;
+       struct mm_struct *mm = current->mm;
+
+       if (!vdso_enabled)
+               return 0;
+
+       down_write(&mm->mmap_sem);
+
+       err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
+               VM_READ|VM_EXEC|
+               VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+               VM_ALWAYSDUMP,
+               vdsop);
+
+       up_write(&mm->mmap_sem);
+
+       return err;
+}
index 079c08808d8a41b7fe5d27b00d389579ffae0930..e7a5750a93d945657f0aa1e63175ad66369aeaa6 100644 (file)
@@ -8,10 +8,13 @@
 #include <linux/scatterlist.h>
 #include <linux/string_helpers.h>
 #include <scsi/scsi_cmnd.h>
+#include <linux/idr.h>
 
 #define PART_BITS 4
 
-static int major, index;
+static int major;
+static DEFINE_IDA(vd_index_ida);
+
 struct workqueue_struct *virtblk_wq;
 
 struct virtio_blk
@@ -35,6 +38,9 @@ struct virtio_blk
        /* What host tells us, plus 2 for header & tailer. */
        unsigned int sg_elems;
 
+       /* Ida index - used to track minor number allocations. */
+       int index;
+
        /* Scatterlist: can be too big for stack. */
        struct scatterlist sg[/*sg_elems*/];
 };
@@ -276,6 +282,11 @@ static int index_to_minor(int index)
        return index << PART_BITS;
 }
 
+static int minor_to_index(int minor)
+{
+       return minor >> PART_BITS;
+}
+
 static ssize_t virtblk_serial_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
@@ -341,14 +352,17 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
        struct virtio_blk *vblk;
        struct request_queue *q;
-       int err;
+       int err, index;
        u64 cap;
        u32 v, blk_size, sg_elems, opt_io_size;
        u16 min_io_size;
        u8 physical_block_exp, alignment_offset;
 
-       if (index_to_minor(index) >= 1 << MINORBITS)
-               return -ENOSPC;
+       err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
+                            GFP_KERNEL);
+       if (err < 0)
+               goto out;
+       index = err;
 
        /* We need to know how many segments before we allocate. */
        err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
@@ -365,7 +379,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
                                    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
        if (!vblk) {
                err = -ENOMEM;
-               goto out;
+               goto out_free_index;
        }
 
        INIT_LIST_HEAD(&vblk->reqs);
@@ -421,7 +435,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
        vblk->disk->private_data = vblk;
        vblk->disk->fops = &virtblk_fops;
        vblk->disk->driverfs_dev = &vdev->dev;
-       index++;
+       vblk->index = index;
 
        /* configure queue flush support */
        if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
@@ -516,6 +530,8 @@ out_free_vq:
        vdev->config->del_vqs(vdev);
 out_free_vblk:
        kfree(vblk);
+out_free_index:
+       ida_simple_remove(&vd_index_ida, index);
 out:
        return err;
 }
@@ -523,6 +539,7 @@ out:
 static void __devexit virtblk_remove(struct virtio_device *vdev)
 {
        struct virtio_blk *vblk = vdev->priv;
+       int index = vblk->index;
 
        flush_work(&vblk->config_work);
 
@@ -538,6 +555,7 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
        mempool_destroy(vblk->pool);
        vdev->config->del_vqs(vdev);
        kfree(vblk);
+       ida_simple_remove(&vd_index_ida, index);
 }
 
 static const struct virtio_device_id id_table[] = {
index 423fd56bf6128edc9e49f72b49929cdcc14b2915..43643033a3ae2973daa6c30bc94d7cb47223a509 100644 (file)
@@ -298,7 +298,7 @@ if RTC_LIB=n
 config RTC
        tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)"
        depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV \
-                       && !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN
+                       && !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN && !UML
        ---help---
          If you say Y here and create a character special file /dev/rtc with
          major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -346,7 +346,7 @@ config JS_RTC
 
 config GEN_RTC
        tristate "Generic /dev/rtc emulation"
-       depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN
+       depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN && !UML
        ---help---
          If you say Y here and create a character special file /dev/rtc with
          major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -490,7 +490,7 @@ config SCx200_GPIO
 
 config PC8736x_GPIO
        tristate "NatSemi PC8736x GPIO Support"
-       depends on X86_32
+       depends on X86_32 && !UML
        default SCx200_GPIO     # mostly N
        select NSC_GPIO         # needed for support routines
        help
index 056b289a1e89e2452613bbf875e49b28f6db9d16..3695773ce7c3565791b7c64b30c2e2806e02c444 100644 (file)
@@ -336,7 +336,8 @@ hp_zx1_insert_memory (struct agp_memory *mem, off_t pg_start, int type)
        off_t j, io_pg_start;
        int io_pg_count;
 
-       if (type != 0 || mem->type != 0) {
+       if (type != mem->type ||
+               agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) {
                return -EINVAL;
        }
 
@@ -380,7 +381,8 @@ hp_zx1_remove_memory (struct agp_memory *mem, off_t pg_start, int type)
        struct _hp_private *hp = &hp_private;
        int i, io_pg_start, io_pg_count;
 
-       if (type != 0 || mem->type != 0) {
+       if (type != mem->type ||
+               agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) {
                return -EINVAL;
        }
 
index e0135873ba9da2fa5ba1c748806944ca9893d5aa..0689bf6b01833495967faf00e68c3b1136a44b9c 100644 (file)
@@ -235,3 +235,18 @@ config HW_RANDOM_PPC4XX
         module will be called ppc4xx-rng.
 
         If unsure, say N.
+
+config UML_RANDOM
+       depends on UML
+       tristate "Hardware random number generator"
+       help
+         This option enables UML's "hardware" random number generator.  It
+         attaches itself to the host's /dev/random, supplying as much entropy
+         as the host has, rather than the small amount the UML gets from its
+         own drivers.  It registers itself as a standard hardware random number
+         generator, major 10, minor 183, and the canonical device name is
+         /dev/hwrng.
+         The way to make use of this is to install the rng-tools package
+         (check your distro, or download from
+         http://sourceforge.net/projects/gkernel/).  rngd periodically reads
+         /dev/hwrng and injects the entropy into /dev/random.
index a1f68af4ccf476b5e58dd3942c08337e55303d81..f22861511909e9f66c366642415368931704974c 100644 (file)
@@ -170,7 +170,7 @@ static const struct tty_operations ttyprintk_ops = {
        .ioctl = tpk_ioctl,
 };
 
-struct tty_port_operations null_ops = { };
+static struct tty_port_operations null_ops = { };
 
 static struct tty_driver *ttyprintk_driver;
 
index fb68b1295373e7287aa92a577b6aa937ba7ffe09..4ca181f1378b5b7c3a13de0efb6f47b31d28966d 100644 (file)
  */
 #include <linux/cdev.h>
 #include <linux/debugfs.h>
+#include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -73,6 +75,7 @@ struct ports_driver_data {
 static struct ports_driver_data pdrvdata;
 
 DEFINE_SPINLOCK(pdrvdata_lock);
+DECLARE_COMPLETION(early_console_added);
 
 /* This struct holds information that's relevant only for console ports */
 struct console {
@@ -151,6 +154,10 @@ struct ports_device {
        int chr_major;
 };
 
+struct port_stats {
+       unsigned long bytes_sent, bytes_received, bytes_discarded;
+};
+
 /* This struct holds the per-port data */
 struct port {
        /* Next port in the list, head is in the ports_device */
@@ -178,6 +185,13 @@ struct port {
        /* File in the debugfs directory that exposes this port's information */
        struct dentry *debugfs_file;
 
+       /*
+        * Keep count of the bytes sent, received and discarded for
+        * this port for accounting and debugging purposes.  These
+        * counts are not reset across port open / close events.
+        */
+       struct port_stats stats;
+
        /*
         * The entries in this struct will be valid if this port is
         * hooked up to an hvc console
@@ -347,17 +361,19 @@ fail:
 }
 
 /* Callers should take appropriate locks */
-static void *get_inbuf(struct port *port)
+static struct port_buffer *get_inbuf(struct port *port)
 {
        struct port_buffer *buf;
-       struct virtqueue *vq;
        unsigned int len;
 
-       vq = port->in_vq;
-       buf = virtqueue_get_buf(vq, &len);
+       if (port->inbuf)
+               return port->inbuf;
+
+       buf = virtqueue_get_buf(port->in_vq, &len);
        if (buf) {
                buf->len = len;
                buf->offset = 0;
+               port->stats.bytes_received += len;
        }
        return buf;
 }
@@ -384,32 +400,27 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf)
 static void discard_port_data(struct port *port)
 {
        struct port_buffer *buf;
-       struct virtqueue *vq;
-       unsigned int len;
-       int ret;
+       unsigned int err;
 
        if (!port->portdev) {
                /* Device has been unplugged.  vqs are already gone. */
                return;
        }
-       vq = port->in_vq;
-       if (port->inbuf)
-               buf = port->inbuf;
-       else
-               buf = virtqueue_get_buf(vq, &len);
+       buf = get_inbuf(port);
 
-       ret = 0;
+       err = 0;
        while (buf) {
-               if (add_inbuf(vq, buf) < 0) {
-                       ret++;
+               port->stats.bytes_discarded += buf->len - buf->offset;
+               if (add_inbuf(port->in_vq, buf) < 0) {
+                       err++;
                        free_buf(buf);
                }
-               buf = virtqueue_get_buf(vq, &len);
+               port->inbuf = NULL;
+               buf = get_inbuf(port);
        }
-       port->inbuf = NULL;
-       if (ret)
+       if (err)
                dev_warn(port->dev, "Errors adding %d buffers back to vq\n",
-                        ret);
+                        err);
 }
 
 static bool port_has_data(struct port *port)
@@ -417,18 +428,12 @@ static bool port_has_data(struct port *port)
        unsigned long flags;
        bool ret;
 
+       ret = false;
        spin_lock_irqsave(&port->inbuf_lock, flags);
-       if (port->inbuf) {
-               ret = true;
-               goto out;
-       }
        port->inbuf = get_inbuf(port);
-       if (port->inbuf) {
+       if (port->inbuf)
                ret = true;
-               goto out;
-       }
-       ret = false;
-out:
+
        spin_unlock_irqrestore(&port->inbuf_lock, flags);
        return ret;
 }
@@ -529,6 +534,8 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
                cpu_relax();
 done:
        spin_unlock_irqrestore(&port->outvq_lock, flags);
+
+       port->stats.bytes_sent += in_count;
        /*
         * We're expected to return the amount of data we wrote -- all
         * of it
@@ -633,8 +640,8 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
                if (filp->f_flags & O_NONBLOCK)
                        return -EAGAIN;
 
-               ret = wait_event_interruptible(port->waitqueue,
-                                              !will_read_block(port));
+               ret = wait_event_freezable(port->waitqueue,
+                                          !will_read_block(port));
                if (ret < 0)
                        return ret;
        }
@@ -677,8 +684,8 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
                if (nonblock)
                        return -EAGAIN;
 
-               ret = wait_event_interruptible(port->waitqueue,
-                                              !will_write_block(port));
+               ret = wait_event_freezable(port->waitqueue,
+                                          !will_write_block(port));
                if (ret < 0)
                        return ret;
        }
@@ -1058,6 +1065,14 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf,
                               "host_connected: %d\n", port->host_connected);
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "outvq_full: %d\n", port->outvq_full);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "bytes_sent: %lu\n", port->stats.bytes_sent);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "bytes_received: %lu\n",
+                              port->stats.bytes_received);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "bytes_discarded: %lu\n",
+                              port->stats.bytes_discarded);
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "is_console: %s\n",
                               is_console_port(port) ? "yes" : "no");
@@ -1143,6 +1158,7 @@ static int add_port(struct ports_device *portdev, u32 id)
        port->cons.ws.ws_row = port->cons.ws.ws_col = 0;
 
        port->host_connected = port->guest_connected = false;
+       port->stats = (struct port_stats) { 0 };
 
        port->outvq_full = false;
 
@@ -1352,6 +1368,7 @@ static void handle_control_message(struct ports_device *portdev,
                        break;
 
                init_port_console(port);
+               complete(&early_console_added);
                /*
                 * Could remove the port here in case init fails - but
                 * have to notify the host first.
@@ -1393,6 +1410,13 @@ static void handle_control_message(struct ports_device *portdev,
                send_sigio_to_port(port);
                break;
        case VIRTIO_CONSOLE_PORT_NAME:
+               /*
+                * If we woke up after hibernation, we can get this
+                * again.  Skip it in that case.
+                */
+               if (port->name)
+                       break;
+
                /*
                 * Skip the size of the header and the cpkt to get the size
                 * of the name that was sent
@@ -1481,8 +1505,7 @@ static void in_intr(struct virtqueue *vq)
                return;
 
        spin_lock_irqsave(&port->inbuf_lock, flags);
-       if (!port->inbuf)
-               port->inbuf = get_inbuf(port);
+       port->inbuf = get_inbuf(port);
 
        /*
         * Don't queue up data when port is closed.  This condition
@@ -1563,7 +1586,7 @@ static int init_vqs(struct ports_device *portdev)
        portdev->out_vqs = kmalloc(nr_ports * sizeof(struct virtqueue *),
                                   GFP_KERNEL);
        if (!vqs || !io_callbacks || !io_names || !portdev->in_vqs ||
-                       !portdev->out_vqs) {
+           !portdev->out_vqs) {
                err = -ENOMEM;
                goto free;
        }
@@ -1648,6 +1671,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
        struct ports_device *portdev;
        int err;
        bool multiport;
+       bool early = early_put_chars != NULL;
+
+       /* Ensure to read early_put_chars now */
+       barrier();
 
        portdev = kmalloc(sizeof(*portdev), GFP_KERNEL);
        if (!portdev) {
@@ -1675,13 +1702,11 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
 
        multiport = false;
        portdev->config.max_nr_ports = 1;
-       if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
+       if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT,
+                             offsetof(struct virtio_console_config,
+                                      max_nr_ports),
+                             &portdev->config.max_nr_ports) == 0)
                multiport = true;
-               vdev->config->get(vdev, offsetof(struct virtio_console_config,
-                                                max_nr_ports),
-                                 &portdev->config.max_nr_ports,
-                                 sizeof(portdev->config.max_nr_ports));
-       }
 
        err = init_vqs(portdev);
        if (err < 0) {
@@ -1719,6 +1744,19 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
 
        __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
                           VIRTIO_CONSOLE_DEVICE_READY, 1);
+
+       /*
+        * If there was an early virtio console, assume that there are no
+        * other consoles. We need to wait until the hvc_alloc matches the
+        * hvc_instantiate, otherwise tty_open will complain, resulting in
+        * a "Warning: unable to open an initial console" boot failure.
+        * Without multiport this is done in add_port above. With multiport
+        * this might take some host<->guest communication - thus we have to
+        * wait.
+        */
+       if (multiport && early)
+               wait_for_completion(&early_console_added);
+
        return 0;
 
 free_vqs:
index d90456a809f9cb8cf92d7daf11e3efbf25f18917..edaa987621ea31a5add7a48736dc1822fca572c0 100644 (file)
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/mfd/db8500-prcmu.h>
+#include <linux/mfd/dbx500-prcmu.h>
 #include <mach/id.h>
 
 static struct cpufreq_frequency_table freq_table[] = {
        [0] = {
                .index = 0,
-               .frequency = 300000,
+               .frequency = 200000,
        },
        [1] = {
                .index = 1,
-               .frequency = 600000,
+               .frequency = 300000,
        },
        [2] = {
-               /* Used for MAX_OPP, if available */
                .index = 2,
-               .frequency = CPUFREQ_TABLE_END,
+               .frequency = 600000,
        },
        [3] = {
+               /* Used for MAX_OPP, if available */
                .index = 3,
                .frequency = CPUFREQ_TABLE_END,
        },
+       [4] = {
+               .index = 4,
+               .frequency = CPUFREQ_TABLE_END,
+       },
 };
 
 static enum arm_opp idx2opp[] = {
+       ARM_EXTCLK,
        ARM_50_OPP,
        ARM_100_OPP,
        ARM_MAX_OPP
@@ -72,13 +77,13 @@ static int db8500_cpufreq_target(struct cpufreq_policy *policy,
 
        freqs.old = policy->cur;
        freqs.new = freq_table[idx].frequency;
-       freqs.cpu = policy->cpu;
 
        if (freqs.old == freqs.new)
                return 0;
 
        /* pre-change notification */
-       cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+       for_each_cpu(freqs.cpu, policy->cpus)
+               cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
        /* request the PRCM unit for opp change */
        if (prcmu_set_arm_opp(idx2opp[idx])) {
@@ -87,7 +92,8 @@ static int db8500_cpufreq_target(struct cpufreq_policy *policy,
        }
 
        /* post change notification */
-       cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+       for_each_cpu(freqs.cpu, policy->cpus)
+               cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
        return 0;
 }
@@ -104,16 +110,18 @@ static unsigned int db8500_cpufreq_getspeed(unsigned int cpu)
 static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy)
 {
        int res;
-       int i;
 
        BUILD_BUG_ON(ARRAY_SIZE(idx2opp) + 1 != ARRAY_SIZE(freq_table));
 
-       if (cpu_is_u8500v2() && !prcmu_is_u8400()) {
-               freq_table[0].frequency = 400000;
-               freq_table[1].frequency = 800000;
+       if (!prcmu_is_u8400()) {
+               freq_table[1].frequency = 400000;
+               freq_table[2].frequency = 800000;
                if (prcmu_has_arm_maxopp())
-                       freq_table[2].frequency = 1000000;
+                       freq_table[3].frequency = 1000000;
        }
+       pr_info("db8500-cpufreq : Available frequencies:\n");
+       while (freq_table[i].frequency != CPUFREQ_TABLE_END)
+               pr_info("  %d Mhz\n", freq_table[i++].frequency/1000);
 
        /* get policy fields based on the table */
        res = cpufreq_frequency_table_cpuinfo(policy, freq_table);
@@ -127,10 +135,6 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy)
        policy->min = policy->cpuinfo.min_freq;
        policy->max = policy->cpuinfo.max_freq;
        policy->cur = db8500_cpufreq_getspeed(policy->cpu);
-
-       for (i = 0; freq_table[i].frequency != policy->cur; i++)
-               ;
-
        policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 
        /*
index 35a257dd4bb76f848762bccf5fe9e10db3248a77..4bd6815d317bb86631dc3713a7e2e3eebc7332ac 100644 (file)
 #include <asm/msr.h>
 #include <asm/tsc.h>
 
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
 #define EPS_BRAND_C7M  0
 #define EPS_BRAND_C7   1
 #define EPS_BRAND_EDEN 2
 
 struct eps_cpu_data {
        u32 fsb;
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+       u32 bios_limit;
+#endif
        struct cpufreq_frequency_table freq_table[];
 };
 
 static struct eps_cpu_data *eps_cpu[NR_CPUS];
 
+/* Module parameters */
+static int freq_failsafe_off;
+static int voltage_failsafe_off;
+static int set_max_voltage;
+
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+static int ignore_acpi_limit;
+
+static struct acpi_processor_performance *eps_acpi_cpu_perf;
+
+/* Minimum necessary to get acpi_processor_get_bios_limit() working */
+static int eps_acpi_init(void)
+{
+       eps_acpi_cpu_perf = kzalloc(sizeof(struct acpi_processor_performance),
+                                     GFP_KERNEL);
+       if (!eps_acpi_cpu_perf)
+               return -ENOMEM;
+
+       if (!zalloc_cpumask_var(&eps_acpi_cpu_perf->shared_cpu_map,
+                                                               GFP_KERNEL)) {
+               kfree(eps_acpi_cpu_perf);
+               eps_acpi_cpu_perf = NULL;
+               return -ENOMEM;
+       }
+
+       if (acpi_processor_register_performance(eps_acpi_cpu_perf, 0)) {
+               free_cpumask_var(eps_acpi_cpu_perf->shared_cpu_map);
+               kfree(eps_acpi_cpu_perf);
+               eps_acpi_cpu_perf = NULL;
+               return -EIO;
+       }
+       return 0;
+}
+
+static int eps_acpi_exit(struct cpufreq_policy *policy)
+{
+       if (eps_acpi_cpu_perf) {
+               acpi_processor_unregister_performance(eps_acpi_cpu_perf, 0);
+               free_cpumask_var(eps_acpi_cpu_perf->shared_cpu_map);
+               kfree(eps_acpi_cpu_perf);
+               eps_acpi_cpu_perf = NULL;
+       }
+       return 0;
+}
+#endif
 
 static unsigned int eps_get(unsigned int cpu)
 {
@@ -164,6 +217,9 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
        int k, step, voltage;
        int ret;
        int states;
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+       unsigned int limit;
+#endif
 
        if (policy->cpu != 0)
                return -ENODEV;
@@ -244,11 +300,62 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
                return -EINVAL;
        if (current_voltage > 0x1f || max_voltage > 0x1f)
                return -EINVAL;
-       if (max_voltage < min_voltage)
+       if (max_voltage < min_voltage
+           || current_voltage < min_voltage
+           || current_voltage > max_voltage)
                return -EINVAL;
 
+       /* Check for systems using underclocked CPU */
+       if (!freq_failsafe_off && max_multiplier != current_multiplier) {
+               printk(KERN_INFO "eps: Your processor is running at different "
+                       "frequency then its maximum. Aborting.\n");
+               printk(KERN_INFO "eps: You can use freq_failsafe_off option "
+                       "to disable this check.\n");
+               return -EINVAL;
+       }
+       if (!voltage_failsafe_off && max_voltage != current_voltage) {
+               printk(KERN_INFO "eps: Your processor is running at different "
+                       "voltage then its maximum. Aborting.\n");
+               printk(KERN_INFO "eps: You can use voltage_failsafe_off "
+                       "option to disable this check.\n");
+               return -EINVAL;
+       }
+
        /* Calc FSB speed */
        fsb = cpu_khz / current_multiplier;
+
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+       /* Check for ACPI processor speed limit */
+       if (!ignore_acpi_limit && !eps_acpi_init()) {
+               if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) {
+                       printk(KERN_INFO "eps: ACPI limit %u.%uGHz\n",
+                               limit/1000000,
+                               (limit%1000000)/10000);
+                       eps_acpi_exit(policy);
+                       /* Check if max_multiplier is in BIOS limits */
+                       if (limit && max_multiplier * fsb > limit) {
+                               printk(KERN_INFO "eps: Aborting.\n");
+                               return -EINVAL;
+                       }
+               }
+       }
+#endif
+
+       /* Allow user to set lower maximum voltage then that reported
+        * by processor */
+       if (brand == EPS_BRAND_C7M && set_max_voltage) {
+               u32 v;
+
+               /* Change mV to something hardware can use */
+               v = (set_max_voltage - 700) / 16;
+               /* Check if voltage is within limits */
+               if (v >= min_voltage && v <= max_voltage) {
+                       printk(KERN_INFO "eps: Setting %dmV as maximum.\n",
+                               v * 16 + 700);
+                       max_voltage = v;
+               }
+       }
+
        /* Calc number of p-states supported */
        if (brand == EPS_BRAND_C7M)
                states = max_multiplier - min_multiplier + 1;
@@ -265,6 +372,9 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 
        /* Copy basic values */
        centaur->fsb = fsb;
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+       centaur->bios_limit = limit;
+#endif
 
        /* Fill frequency and MSR value table */
        f_table = &centaur->freq_table[0];
@@ -303,17 +413,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 static int eps_cpu_exit(struct cpufreq_policy *policy)
 {
        unsigned int cpu = policy->cpu;
-       struct eps_cpu_data *centaur;
-       u32 lo, hi;
 
-       if (eps_cpu[cpu] == NULL)
-               return -ENODEV;
-       centaur = eps_cpu[cpu];
-
-       /* Get max frequency */
-       rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-       /* Set max frequency */
-       eps_set_state(centaur, cpu, hi & 0xffff);
        /* Bye */
        cpufreq_frequency_table_put_attr(policy->cpu);
        kfree(eps_cpu[cpu]);
@@ -359,6 +459,19 @@ static void __exit eps_exit(void)
        cpufreq_unregister_driver(&eps_driver);
 }
 
+/* Allow user to overclock his machine or to change frequency to higher after
+ * unloading module */
+module_param(freq_failsafe_off, int, 0644);
+MODULE_PARM_DESC(freq_failsafe_off, "Disable current vs max frequency check");
+module_param(voltage_failsafe_off, int, 0644);
+MODULE_PARM_DESC(voltage_failsafe_off, "Disable current vs max voltage check");
+#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+module_param(ignore_acpi_limit, int, 0644);
+MODULE_PARM_DESC(ignore_acpi_limit, "Don't check ACPI's processor speed limit");
+#endif
+module_param(set_max_voltage, int, 0644);
+MODULE_PARM_DESC(set_max_voltage, "Set maximum CPU voltage (mV) C7-M only");
+
 MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
 MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
 MODULE_LICENSE("GPL");
index b7c3a84c4cfa86cf6fe5b4fa14f2acf319d1b380..ab9741fab92e2341af91ca7a30d664851894666e 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/cpufreq.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
 
 #include <mach/map.h>
 #include <mach/regs-clock.h>
@@ -36,6 +38,10 @@ static struct regulator *int_regulator;
 static struct cpufreq_freqs freqs;
 static unsigned int memtype;
 
+static unsigned int locking_frequency;
+static bool frequency_locked;
+static DEFINE_MUTEX(cpufreq_lock);
+
 enum exynos4_memory_type {
        DDR2 = 4,
        LPDDR2,
@@ -405,22 +411,32 @@ static int exynos4_target(struct cpufreq_policy *policy,
 {
        unsigned int index, old_index;
        unsigned int arm_volt, int_volt;
+       int err = -EINVAL;
 
        freqs.old = exynos4_getspeed(policy->cpu);
 
+       mutex_lock(&cpufreq_lock);
+
+       if (frequency_locked && target_freq != locking_frequency) {
+               err = -EAGAIN;
+               goto out;
+       }
+
        if (cpufreq_frequency_table_target(policy, exynos4_freq_table,
                                           freqs.old, relation, &old_index))
-               return -EINVAL;
+               goto out;
 
        if (cpufreq_frequency_table_target(policy, exynos4_freq_table,
                                           target_freq, relation, &index))
-               return -EINVAL;
+               goto out;
+
+       err = 0;
 
        freqs.new = exynos4_freq_table[index].frequency;
        freqs.cpu = policy->cpu;
 
        if (freqs.new == freqs.old)
-               return 0;
+               goto out;
 
        /* get the voltage value */
        arm_volt = exynos4_volt_table[index].arm_volt;
@@ -447,10 +463,16 @@ static int exynos4_target(struct cpufreq_policy *policy,
 
        cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
-       return 0;
+out:
+       mutex_unlock(&cpufreq_lock);
+       return err;
 }
 
 #ifdef CONFIG_PM
+/*
+ * These suspend/resume are used as syscore_ops, it is already too
+ * late to set regulator voltages at this stage.
+ */
 static int exynos4_cpufreq_suspend(struct cpufreq_policy *policy)
 {
        return 0;
@@ -462,8 +484,82 @@ static int exynos4_cpufreq_resume(struct cpufreq_policy *policy)
 }
 #endif
 
+/**
+ * exynos4_cpufreq_pm_notifier - block CPUFREQ's activities in suspend-resume
+ *                     context
+ * @notifier
+ * @pm_event
+ * @v
+ *
+ * While frequency_locked == true, target() ignores every frequency but
+ * locking_frequency. The locking_frequency value is the initial frequency,
+ * which is set by the bootloader. In order to eliminate possible
+ * inconsistency in clock values, we save and restore frequencies during
+ * suspend and resume and block CPUFREQ activities. Note that the standard
+ * suspend/resume cannot be used as they are too deep (syscore_ops) for
+ * regulator actions.
+ */
+static int exynos4_cpufreq_pm_notifier(struct notifier_block *notifier,
+                                      unsigned long pm_event, void *v)
+{
+       struct cpufreq_policy *policy = cpufreq_cpu_get(0); /* boot CPU */
+       static unsigned int saved_frequency;
+       unsigned int temp;
+
+       mutex_lock(&cpufreq_lock);
+       switch (pm_event) {
+       case PM_SUSPEND_PREPARE:
+               if (frequency_locked)
+                       goto out;
+               frequency_locked = true;
+
+               if (locking_frequency) {
+                       saved_frequency = exynos4_getspeed(0);
+
+                       mutex_unlock(&cpufreq_lock);
+                       exynos4_target(policy, locking_frequency,
+                                      CPUFREQ_RELATION_H);
+                       mutex_lock(&cpufreq_lock);
+               }
+
+               break;
+       case PM_POST_SUSPEND:
+
+               if (saved_frequency) {
+                       /*
+                        * While frequency_locked, only locking_frequency
+                        * is valid for target(). In order to use
+                        * saved_frequency while keeping frequency_locked,
+                        * we temporarly overwrite locking_frequency.
+                        */
+                       temp = locking_frequency;
+                       locking_frequency = saved_frequency;
+
+                       mutex_unlock(&cpufreq_lock);
+                       exynos4_target(policy, locking_frequency,
+                                      CPUFREQ_RELATION_H);
+                       mutex_lock(&cpufreq_lock);
+
+                       locking_frequency = temp;
+               }
+
+               frequency_locked = false;
+               break;
+       }
+out:
+       mutex_unlock(&cpufreq_lock);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block exynos4_cpufreq_nb = {
+       .notifier_call = exynos4_cpufreq_pm_notifier,
+};
+
 static int exynos4_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
+       int ret;
+
        policy->cur = policy->min = policy->max = exynos4_getspeed(policy->cpu);
 
        cpufreq_frequency_table_get_attr(exynos4_freq_table, policy->cpu);
@@ -479,16 +575,35 @@ static int exynos4_cpufreq_cpu_init(struct cpufreq_policy *policy)
         */
        cpumask_setall(policy->cpus);
 
-       return cpufreq_frequency_table_cpuinfo(policy, exynos4_freq_table);
+       ret = cpufreq_frequency_table_cpuinfo(policy, exynos4_freq_table);
+       if (ret)
+               return ret;
+
+       cpufreq_frequency_table_get_attr(exynos4_freq_table, policy->cpu);
+
+       return 0;
+}
+
+static int exynos4_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+       cpufreq_frequency_table_put_attr(policy->cpu);
+       return 0;
 }
 
+static struct freq_attr *exynos4_cpufreq_attr[] = {
+       &cpufreq_freq_attr_scaling_available_freqs,
+       NULL,
+};
+
 static struct cpufreq_driver exynos4_driver = {
        .flags          = CPUFREQ_STICKY,
        .verify         = exynos4_verify_speed,
        .target         = exynos4_target,
        .get            = exynos4_getspeed,
        .init           = exynos4_cpufreq_cpu_init,
+       .exit           = exynos4_cpufreq_cpu_exit,
        .name           = "exynos4_cpufreq",
+       .attr           = exynos4_cpufreq_attr,
 #ifdef CONFIG_PM
        .suspend        = exynos4_cpufreq_suspend,
        .resume         = exynos4_cpufreq_resume,
@@ -501,6 +616,8 @@ static int __init exynos4_cpufreq_init(void)
        if (IS_ERR(cpu_clk))
                return PTR_ERR(cpu_clk);
 
+       locking_frequency = exynos4_getspeed(0);
+
        moutcore = clk_get(NULL, "moutcore");
        if (IS_ERR(moutcore))
                goto out;
@@ -540,6 +657,8 @@ static int __init exynos4_cpufreq_init(void)
                printk(KERN_DEBUG "%s: memtype= 0x%x\n", __func__, memtype);
        }
 
+       register_pm_notifier(&exynos4_cpufreq_nb);
+
        return cpufreq_register_driver(&exynos4_driver);
 
 out:
index af1a17d42bd76ad09309e9cba305e2d5e2bc8fbd..5948a2194f503e80e19f2ab55e8a22cc096c9e75 100644 (file)
@@ -41,7 +41,7 @@ config EDAC_DEBUG
 
 config EDAC_DECODE_MCE
        tristate "Decode MCEs in human-readable form (only on AMD for now)"
-       depends on CPU_SUP_AMD && X86_MCE
+       depends on CPU_SUP_AMD && X86_MCE_AMD
        default y
        ---help---
          Enable this option if you want to decode Machine Check Exceptions
@@ -71,9 +71,6 @@ config EDAC_MM_EDAC
          occurred so that a particular failing memory module can be
          replaced.  If unsure, select 'Y'.
 
-config EDAC_MCE
-       bool
-
 config EDAC_AMD64
        tristate "AMD64 (Opteron, Athlon64) K8, F10h"
        depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
@@ -173,8 +170,7 @@ config EDAC_I5400
 
 config EDAC_I7CORE
        tristate "Intel i7 Core (Nehalem) processors"
-       depends on EDAC_MM_EDAC && PCI && X86
-       select EDAC_MCE
+       depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
        help
          Support for error detection and correction the Intel
          i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -216,6 +212,14 @@ config EDAC_I7300
          Support for error detection and correction the Intel
          Clarksboro MCH (Intel 7300 chipset).
 
+config EDAC_SBRIDGE
+       tristate "Intel Sandy-Bridge Integrated MC"
+       depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+       depends on EXPERIMENTAL
+       help
+         Support for error detection and correction the Intel
+         Sandy Bridge Integrated Memory Controller.
+
 config EDAC_MPC85XX
        tristate "Freescale MPC83xx / MPC85xx"
        depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
index 3e239133e29e792a54c9c2a4432806ff56535a96..196a63dd37c5c41e0914502620fb3a8424c7d2a4 100644 (file)
@@ -8,7 +8,6 @@
 
 obj-$(CONFIG_EDAC)                     := edac_stub.o
 obj-$(CONFIG_EDAC_MM_EDAC)             += edac_core.o
-obj-$(CONFIG_EDAC_MCE)                 += edac_mce.o
 
 edac_core-y    := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
 edac_core-y    += edac_module.o edac_device_sysfs.o
@@ -29,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100)              += i5100_edac.o
 obj-$(CONFIG_EDAC_I5400)               += i5400_edac.o
 obj-$(CONFIG_EDAC_I7300)               += i7300_edac.o
 obj-$(CONFIG_EDAC_I7CORE)              += i7core_edac.o
+obj-$(CONFIG_EDAC_SBRIDGE)             += sb_edac.o
 obj-$(CONFIG_EDAC_E7XXX)               += e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)               += e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)          += i82443bxgx_edac.o
index 55b8278bb172f6feda52e225581b258b3404402a..fe90cd4a7ebc4192da2d1e354cc72e3436d6ab45 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/sysdev.h>
 #include <linux/workqueue.h>
+#include <linux/edac.h>
 
-#define EDAC_MC_LABEL_LEN      31
 #define EDAC_DEVICE_NAME_LEN   31
 #define EDAC_ATTRIB_VALUE_LEN  15
-#define MC_PROC_NAME_MAX_LEN   7
 
 #if PAGE_SHIFT < 20
 #define PAGES_TO_MiB(pages)    ((pages) >> (20 - PAGE_SHIFT))
@@ -101,353 +100,6 @@ extern int edac_debug_level;
 
 #define edac_dev_name(dev) (dev)->dev_name
 
-/* memory devices */
-enum dev_type {
-       DEV_UNKNOWN = 0,
-       DEV_X1,
-       DEV_X2,
-       DEV_X4,
-       DEV_X8,
-       DEV_X16,
-       DEV_X32,                /* Do these parts exist? */
-       DEV_X64                 /* Do these parts exist? */
-};
-
-#define DEV_FLAG_UNKNOWN       BIT(DEV_UNKNOWN)
-#define DEV_FLAG_X1            BIT(DEV_X1)
-#define DEV_FLAG_X2            BIT(DEV_X2)
-#define DEV_FLAG_X4            BIT(DEV_X4)
-#define DEV_FLAG_X8            BIT(DEV_X8)
-#define DEV_FLAG_X16           BIT(DEV_X16)
-#define DEV_FLAG_X32           BIT(DEV_X32)
-#define DEV_FLAG_X64           BIT(DEV_X64)
-
-/* memory types */
-enum mem_type {
-       MEM_EMPTY = 0,          /* Empty csrow */
-       MEM_RESERVED,           /* Reserved csrow type */
-       MEM_UNKNOWN,            /* Unknown csrow type */
-       MEM_FPM,                /* Fast page mode */
-       MEM_EDO,                /* Extended data out */
-       MEM_BEDO,               /* Burst Extended data out */
-       MEM_SDR,                /* Single data rate SDRAM */
-       MEM_RDR,                /* Registered single data rate SDRAM */
-       MEM_DDR,                /* Double data rate SDRAM */
-       MEM_RDDR,               /* Registered Double data rate SDRAM */
-       MEM_RMBS,               /* Rambus DRAM */
-       MEM_DDR2,               /* DDR2 RAM */
-       MEM_FB_DDR2,            /* fully buffered DDR2 */
-       MEM_RDDR2,              /* Registered DDR2 RAM */
-       MEM_XDR,                /* Rambus XDR */
-       MEM_DDR3,               /* DDR3 RAM */
-       MEM_RDDR3,              /* Registered DDR3 RAM */
-};
-
-#define MEM_FLAG_EMPTY         BIT(MEM_EMPTY)
-#define MEM_FLAG_RESERVED      BIT(MEM_RESERVED)
-#define MEM_FLAG_UNKNOWN       BIT(MEM_UNKNOWN)
-#define MEM_FLAG_FPM           BIT(MEM_FPM)
-#define MEM_FLAG_EDO           BIT(MEM_EDO)
-#define MEM_FLAG_BEDO          BIT(MEM_BEDO)
-#define MEM_FLAG_SDR           BIT(MEM_SDR)
-#define MEM_FLAG_RDR           BIT(MEM_RDR)
-#define MEM_FLAG_DDR           BIT(MEM_DDR)
-#define MEM_FLAG_RDDR          BIT(MEM_RDDR)
-#define MEM_FLAG_RMBS          BIT(MEM_RMBS)
-#define MEM_FLAG_DDR2           BIT(MEM_DDR2)
-#define MEM_FLAG_FB_DDR2        BIT(MEM_FB_DDR2)
-#define MEM_FLAG_RDDR2          BIT(MEM_RDDR2)
-#define MEM_FLAG_XDR            BIT(MEM_XDR)
-#define MEM_FLAG_DDR3           BIT(MEM_DDR3)
-#define MEM_FLAG_RDDR3          BIT(MEM_RDDR3)
-
-/* chipset Error Detection and Correction capabilities and mode */
-enum edac_type {
-       EDAC_UNKNOWN = 0,       /* Unknown if ECC is available */
-       EDAC_NONE,              /* Doesn't support ECC */
-       EDAC_RESERVED,          /* Reserved ECC type */
-       EDAC_PARITY,            /* Detects parity errors */
-       EDAC_EC,                /* Error Checking - no correction */
-       EDAC_SECDED,            /* Single bit error correction, Double detection */
-       EDAC_S2ECD2ED,          /* Chipkill x2 devices - do these exist? */
-       EDAC_S4ECD4ED,          /* Chipkill x4 devices */
-       EDAC_S8ECD8ED,          /* Chipkill x8 devices */
-       EDAC_S16ECD16ED,        /* Chipkill x16 devices */
-};
-
-#define EDAC_FLAG_UNKNOWN      BIT(EDAC_UNKNOWN)
-#define EDAC_FLAG_NONE         BIT(EDAC_NONE)
-#define EDAC_FLAG_PARITY       BIT(EDAC_PARITY)
-#define EDAC_FLAG_EC           BIT(EDAC_EC)
-#define EDAC_FLAG_SECDED       BIT(EDAC_SECDED)
-#define EDAC_FLAG_S2ECD2ED     BIT(EDAC_S2ECD2ED)
-#define EDAC_FLAG_S4ECD4ED     BIT(EDAC_S4ECD4ED)
-#define EDAC_FLAG_S8ECD8ED     BIT(EDAC_S8ECD8ED)
-#define EDAC_FLAG_S16ECD16ED   BIT(EDAC_S16ECD16ED)
-
-/* scrubbing capabilities */
-enum scrub_type {
-       SCRUB_UNKNOWN = 0,      /* Unknown if scrubber is available */
-       SCRUB_NONE,             /* No scrubber */
-       SCRUB_SW_PROG,          /* SW progressive (sequential) scrubbing */
-       SCRUB_SW_SRC,           /* Software scrub only errors */
-       SCRUB_SW_PROG_SRC,      /* Progressive software scrub from an error */
-       SCRUB_SW_TUNABLE,       /* Software scrub frequency is tunable */
-       SCRUB_HW_PROG,          /* HW progressive (sequential) scrubbing */
-       SCRUB_HW_SRC,           /* Hardware scrub only errors */
-       SCRUB_HW_PROG_SRC,      /* Progressive hardware scrub from an error */
-       SCRUB_HW_TUNABLE        /* Hardware scrub frequency is tunable */
-};
-
-#define SCRUB_FLAG_SW_PROG     BIT(SCRUB_SW_PROG)
-#define SCRUB_FLAG_SW_SRC      BIT(SCRUB_SW_SRC)
-#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
-#define SCRUB_FLAG_SW_TUN      BIT(SCRUB_SW_SCRUB_TUNABLE)
-#define SCRUB_FLAG_HW_PROG     BIT(SCRUB_HW_PROG)
-#define SCRUB_FLAG_HW_SRC      BIT(SCRUB_HW_SRC)
-#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
-#define SCRUB_FLAG_HW_TUN      BIT(SCRUB_HW_TUNABLE)
-
-/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
-
-/* EDAC internal operation states */
-#define        OP_ALLOC                0x100
-#define OP_RUNNING_POLL                0x201
-#define OP_RUNNING_INTERRUPT   0x202
-#define OP_RUNNING_POLL_INTR   0x203
-#define OP_OFFLINE             0x300
-
-/*
- * There are several things to be aware of that aren't at all obvious:
- *
- *
- * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
- *
- * These are some of the many terms that are thrown about that don't always
- * mean what people think they mean (Inconceivable!).  In the interest of
- * creating a common ground for discussion, terms and their definitions
- * will be established.
- *
- * Memory devices:     The individual chip on a memory stick.  These devices
- *                     commonly output 4 and 8 bits each.  Grouping several
- *                     of these in parallel provides 64 bits which is common
- *                     for a memory stick.
- *
- * Memory Stick:       A printed circuit board that aggregates multiple
- *                     memory devices in parallel.  This is the atomic
- *                     memory component that is purchaseable by Joe consumer
- *                     and loaded into a memory socket.
- *
- * Socket:             A physical connector on the motherboard that accepts
- *                     a single memory stick.
- *
- * Channel:            Set of memory devices on a memory stick that must be
- *                     grouped in parallel with one or more additional
- *                     channels from other memory sticks.  This parallel
- *                     grouping of the output from multiple channels are
- *                     necessary for the smallest granularity of memory access.
- *                     Some memory controllers are capable of single channel -
- *                     which means that memory sticks can be loaded
- *                     individually.  Other memory controllers are only
- *                     capable of dual channel - which means that memory
- *                     sticks must be loaded as pairs (see "socket set").
- *
- * Chip-select row:    All of the memory devices that are selected together.
- *                     for a single, minimum grain of memory access.
- *                     This selects all of the parallel memory devices across
- *                     all of the parallel channels.  Common chip-select rows
- *                     for single channel are 64 bits, for dual channel 128
- *                     bits.
- *
- * Single-Ranked stick:        A Single-ranked stick has 1 chip-select row of memory.
- *                     Motherboards commonly drive two chip-select pins to
- *                     a memory stick. A single-ranked stick, will occupy
- *                     only one of those rows. The other will be unused.
- *
- * Double-Ranked stick:        A double-ranked stick has two chip-select rows which
- *                     access different sets of memory devices.  The two
- *                     rows cannot be accessed concurrently.
- *
- * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
- *                     A double-sided stick has two chip-select rows which
- *                     access different sets of memory devices.  The two
- *                     rows cannot be accessed concurrently.  "Double-sided"
- *                     is irrespective of the memory devices being mounted
- *                     on both sides of the memory stick.
- *
- * Socket set:         All of the memory sticks that are required for
- *                     a single memory access or all of the memory sticks
- *                     spanned by a chip-select row.  A single socket set
- *                     has two chip-select rows and if double-sided sticks
- *                     are used these will occupy those chip-select rows.
- *
- * Bank:               This term is avoided because it is unclear when
- *                     needing to distinguish between chip-select rows and
- *                     socket sets.
- *
- * Controller pages:
- *
- * Physical pages:
- *
- * Virtual pages:
- *
- *
- * STRUCTURE ORGANIZATION AND CHOICES
- *
- *
- *
- * PS - I enjoyed writing all that about as much as you enjoyed reading it.
- */
-
-struct channel_info {
-       int chan_idx;           /* channel index */
-       u32 ce_count;           /* Correctable Errors for this CHANNEL */
-       char label[EDAC_MC_LABEL_LEN + 1];      /* DIMM label on motherboard */
-       struct csrow_info *csrow;       /* the parent */
-};
-
-struct csrow_info {
-       unsigned long first_page;       /* first page number in dimm */
-       unsigned long last_page;        /* last page number in dimm */
-       unsigned long page_mask;        /* used for interleaving -
-                                        * 0UL for non intlv
-                                        */
-       u32 nr_pages;           /* number of pages in csrow */
-       u32 grain;              /* granularity of reported error in bytes */
-       int csrow_idx;          /* the chip-select row */
-       enum dev_type dtype;    /* memory device type */
-       u32 ue_count;           /* Uncorrectable Errors for this csrow */
-       u32 ce_count;           /* Correctable Errors for this csrow */
-       enum mem_type mtype;    /* memory csrow type */
-       enum edac_type edac_mode;       /* EDAC mode for this csrow */
-       struct mem_ctl_info *mci;       /* the parent */
-
-       struct kobject kobj;    /* sysfs kobject for this csrow */
-
-       /* channel information for this csrow */
-       u32 nr_channels;
-       struct channel_info *channels;
-};
-
-struct mcidev_sysfs_group {
-       const char *name;                               /* group name */
-       const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
-};
-
-struct mcidev_sysfs_group_kobj {
-       struct list_head list;          /* list for all instances within a mc */
-
-       struct kobject kobj;            /* kobj for the group */
-
-       const struct mcidev_sysfs_group *grp;   /* group description table */
-       struct mem_ctl_info *mci;       /* the parent */
-};
-
-/* mcidev_sysfs_attribute structure
- *     used for driver sysfs attributes and in mem_ctl_info
- *     sysfs top level entries
- */
-struct mcidev_sysfs_attribute {
-       /* It should use either attr or grp */
-       struct attribute attr;
-       const struct mcidev_sysfs_group *grp;   /* Points to a group of attributes */
-
-       /* Ops for show/store values at the attribute - not used on group */
-        ssize_t (*show)(struct mem_ctl_info *,char *);
-        ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
-};
-
-/* MEMORY controller information structure
- */
-struct mem_ctl_info {
-       struct list_head link;  /* for global list of mem_ctl_info structs */
-
-       struct module *owner;   /* Module owner of this control struct */
-
-       unsigned long mtype_cap;        /* memory types supported by mc */
-       unsigned long edac_ctl_cap;     /* Mem controller EDAC capabilities */
-       unsigned long edac_cap; /* configuration capabilities - this is
-                                * closely related to edac_ctl_cap.  The
-                                * difference is that the controller may be
-                                * capable of s4ecd4ed which would be listed
-                                * in edac_ctl_cap, but if channels aren't
-                                * capable of s4ecd4ed then the edac_cap would
-                                * not have that capability.
-                                */
-       unsigned long scrub_cap;        /* chipset scrub capabilities */
-       enum scrub_type scrub_mode;     /* current scrub mode */
-
-       /* Translates sdram memory scrub rate given in bytes/sec to the
-          internal representation and configures whatever else needs
-          to be configured.
-        */
-       int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
-
-       /* Get the current sdram memory scrub rate from the internal
-          representation and converts it to the closest matching
-          bandwidth in bytes/sec.
-        */
-       int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
-
-
-       /* pointer to edac checking routine */
-       void (*edac_check) (struct mem_ctl_info * mci);
-
-       /*
-        * Remaps memory pages: controller pages to physical pages.
-        * For most MC's, this will be NULL.
-        */
-       /* FIXME - why not send the phys page to begin with? */
-       unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
-                                          unsigned long page);
-       int mc_idx;
-       int nr_csrows;
-       struct csrow_info *csrows;
-       /*
-        * FIXME - what about controllers on other busses? - IDs must be
-        * unique.  dev pointer should be sufficiently unique, but
-        * BUS:SLOT.FUNC numbers may not be unique.
-        */
-       struct device *dev;
-       const char *mod_name;
-       const char *mod_ver;
-       const char *ctl_name;
-       const char *dev_name;
-       char proc_name[MC_PROC_NAME_MAX_LEN + 1];
-       void *pvt_info;
-       u32 ue_noinfo_count;    /* Uncorrectable Errors w/o info */
-       u32 ce_noinfo_count;    /* Correctable Errors w/o info */
-       u32 ue_count;           /* Total Uncorrectable Errors for this MC */
-       u32 ce_count;           /* Total Correctable Errors for this MC */
-       unsigned long start_time;       /* mci load start time (in jiffies) */
-
-       struct completion complete;
-
-       /* edac sysfs device control */
-       struct kobject edac_mci_kobj;
-
-       /* list for all grp instances within a mc */
-       struct list_head grp_kobj_list;
-
-       /* Additional top controller level attributes, but specified
-        * by the low level driver.
-        *
-        * Set by the low level driver to provide attributes at the
-        * controller level, same level as 'ue_count' and 'ce_count' above.
-        * An array of structures, NULL terminated
-        *
-        * If attributes are desired, then set to array of attributes
-        * If no attributes are desired, leave NULL
-        */
-       const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
-
-       /* work struct for this MC */
-       struct delayed_work work;
-
-       /* the internal state of this controller instance */
-       int op_state;
-};
-
 /*
  * The following are the structures to provide for a generic
  * or abstract 'edac_device'. This set of structures and the
diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c
deleted file mode 100644 (file)
index 9ccdc5b..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Provides edac interface to mcelog events
- *
- * This file may be distributed under the terms of the
- * GNU General Public License version 2.
- *
- * Copyright (c) 2009 by:
- *      Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * Red Hat Inc. http://www.redhat.com
- */
-
-#include <linux/module.h>
-#include <linux/edac_mce.h>
-#include <asm/mce.h>
-
-int edac_mce_enabled;
-EXPORT_SYMBOL_GPL(edac_mce_enabled);
-
-
-/*
- * Extension interface
- */
-
-static LIST_HEAD(edac_mce_list);
-static DEFINE_MUTEX(edac_mce_lock);
-
-int edac_mce_register(struct edac_mce *edac_mce)
-{
-       mutex_lock(&edac_mce_lock);
-       list_add_tail(&edac_mce->list, &edac_mce_list);
-       mutex_unlock(&edac_mce_lock);
-       return 0;
-}
-EXPORT_SYMBOL(edac_mce_register);
-
-void edac_mce_unregister(struct edac_mce *edac_mce)
-{
-       mutex_lock(&edac_mce_lock);
-       list_del(&edac_mce->list);
-       mutex_unlock(&edac_mce_lock);
-}
-EXPORT_SYMBOL(edac_mce_unregister);
-
-int edac_mce_parse(struct mce *mce)
-{
-       struct edac_mce *edac_mce;
-
-       list_for_each_entry(edac_mce, &edac_mce_list, list) {
-               if (edac_mce->check_error(edac_mce->priv, mce))
-                       return 1;
-       }
-
-       /* Nobody queued the error */
-       return 0;
-}
-EXPORT_SYMBOL_GPL(edac_mce_parse);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
-MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors");
index a76fe8366b681d14cc2e6158fba49133525aeaa3..6104dba380b626443512a889fd6c66629f376092 100644 (file)
@@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos)
 static void i7300_process_error_global(struct mem_ctl_info *mci)
 {
        struct i7300_pvt *pvt;
-       u32 errnum, value;
+       u32 errnum, error_reg;
        unsigned long errors;
        const char *specific;
        bool is_fatal;
@@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
 
        /* read in the 1st FATAL error register */
        pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
-                             FERR_GLOBAL_HI, &value);
-       if (unlikely(value)) {
-               errors = value;
+                             FERR_GLOBAL_HI, &error_reg);
+       if (unlikely(error_reg)) {
+               errors = error_reg;
                errnum = find_first_bit(&errors,
                                        ARRAY_SIZE(ferr_global_hi_name));
                specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
@@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
 
                /* Clear the error bit */
                pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
-                                      FERR_GLOBAL_HI, value);
+                                      FERR_GLOBAL_HI, error_reg);
 
                goto error_global;
        }
 
        pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
-                             FERR_GLOBAL_LO, &value);
-       if (unlikely(value)) {
-               errors = value;
+                             FERR_GLOBAL_LO, &error_reg);
+       if (unlikely(error_reg)) {
+               errors = error_reg;
                errnum = find_first_bit(&errors,
                                        ARRAY_SIZE(ferr_global_lo_name));
                specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
@@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
 
                /* Clear the error bit */
                pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
-                                      FERR_GLOBAL_LO, value);
+                                      FERR_GLOBAL_LO, error_reg);
 
                goto error_global;
        }
@@ -427,7 +427,7 @@ error_global:
 static void i7300_process_fbd_error(struct mem_ctl_info *mci)
 {
        struct i7300_pvt *pvt;
-       u32 errnum, value;
+       u32 errnum, value, error_reg;
        u16 val16;
        unsigned branch, channel, bank, rank, cas, ras;
        u32 syndrome;
@@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
 
        /* read in the 1st FATAL error register */
        pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
-                             FERR_FAT_FBD, &value);
-       if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) {
-               errors = value & FERR_FAT_FBD_ERR_MASK ;
+                             FERR_FAT_FBD, &error_reg);
+       if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
+               errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
                errnum = find_first_bit(&errors,
                                        ARRAY_SIZE(ferr_fat_fbd_name));
                specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
+               branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
 
-               branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
                pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
                                     NRECMEMA, &val16);
                bank = NRECMEMA_BANK(val16);
@@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
 
                pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
                                NRECMEMB, &value);
-
                is_wr = NRECMEMB_IS_WR(value);
                cas = NRECMEMB_CAS(value);
                ras = NRECMEMB_RAS(value);
 
+               /* Clean the error register */
+               pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+                               FERR_FAT_FBD, error_reg);
+
                snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
                        "FATAL (Branch=%d DRAM-Bank=%d %s "
                        "RAS=%d CAS=%d Err=0x%lx (%s))",
@@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
 
        /* read in the 1st NON-FATAL error register */
        pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
-                             FERR_NF_FBD, &value);
-       if (unlikely(value & FERR_NF_FBD_ERR_MASK)) {
-               errors = value & FERR_NF_FBD_ERR_MASK;
+                             FERR_NF_FBD, &error_reg);
+       if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
+               errors = error_reg & FERR_NF_FBD_ERR_MASK;
                errnum = find_first_bit(&errors,
                                        ARRAY_SIZE(ferr_nf_fbd_name));
                specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
-
-               /* Clear the error bit */
-               pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
-                                      FERR_GLOBAL_LO, value);
+               branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
 
                pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
                        REDMEMA, &syndrome);
 
-               branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
                pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
                                     RECMEMA, &val16);
                bank = RECMEMA_BANK(val16);
@@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
 
                pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
                                RECMEMB, &value);
-
                is_wr = RECMEMB_IS_WR(value);
                cas = RECMEMB_CAS(value);
                ras = RECMEMB_RAS(value);
 
                pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
                                     REDMEMB, &value);
-
                channel = (branch << 1);
                if (IS_SECOND_CH(value))
                        channel++;
 
+               /* Clear the error bit */
+               pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+                               FERR_NF_FBD, error_reg);
+
                /* Form out message */
                snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
                        "Corrected error (Branch=%d, Channel %d), "
index f6cf448d69b4f468160e19fc29fab0ea473f04b8..70ad8923f1d75d79a26c479e17e725499091a340 100644 (file)
 #include <linux/pci_ids.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/edac.h>
 #include <linux/mmzone.h>
-#include <linux/edac_mce.h>
 #include <linux/smp.h>
+#include <asm/mce.h>
 #include <asm/processor.h>
+#include <asm/div64.h>
 
 #include "edac_core.h"
 
@@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
        /* OFFSETS for Device 0 Function 0 */
 
 #define MC_CFG_CONTROL 0x90
+  #define MC_CFG_UNLOCK                0x02
+  #define MC_CFG_LOCK          0x00
 
        /* OFFSETS for Device 3 Function 0 */
 
@@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
   #define DIMM0_COR_ERR(r)                     ((r) & 0x7fff)
 
 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
+#define MC_SSRCONTROL          0x48
+  #define SSR_MODE_DISABLE     0x00
+  #define SSR_MODE_ENABLE      0x01
+  #define SSR_MODE_MASK                0x03
+
+#define MC_SCRUB_CONTROL       0x4c
+  #define STARTSCRUB           (1 << 24)
+  #define SCRUBINTERVAL_MASK    0xffffff
+
 #define MC_COR_ECC_CNT_0       0x80
 #define MC_COR_ECC_CNT_1       0x84
 #define MC_COR_ECC_CNT_2       0x88
@@ -253,10 +266,7 @@ struct i7core_pvt {
        unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
        int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
 
-       unsigned int    is_registered;
-
-       /* mcelog glue */
-       struct edac_mce         edac_mce;
+       bool            is_registered, enable_scrub;
 
        /* Fifo double buffers */
        struct mce              mce_entry[MCE_LOG_LEN];
@@ -268,6 +278,9 @@ struct i7core_pvt {
        /* Count indicator to show errors not got */
        unsigned                mce_overrun;
 
+       /* DCLK Frequency used for computing scrub rate */
+       int                     dclk_freq;
+
        /* Struct to control EDAC polling */
        struct edac_pci_ctl_info *i7core_pci;
 };
@@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
                /* Memory controller */
        { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
        { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
-
-               /* Exists only for RDIMM */
+                       /* Exists only for RDIMM */
        { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
        { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
 
@@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
        { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
        { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
        { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
+
+               /* Generic Non-core registers */
+       /*
+        * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
+        * On Xeon 55xx, however, it has a different id (8086:2c40). So,
+        * the probing code needs to test for the other address in case of
+        * failure of this one
+        */
+       { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
+
 };
 
 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
@@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
        { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
        { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
        { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
+
+       /*
+        * This is the PCI device has an alternate address on some
+        * processors like Core i7 860
+        */
+       { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
 };
 
 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
@@ -346,6 +374,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
        { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
        { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
        { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
+
+               /* Generic Non-core registers */
+       { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
+
 };
 
 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
@@ -714,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 
                        csr->edac_mode = mode;
                        csr->mtype = mtype;
+                       snprintf(csr->channels[0].label,
+                                       sizeof(csr->channels[0].label),
+                                       "CPU#%uChannel#%u_DIMM#%u",
+                                       pvt->i7core_dev->socket, i, j);
 
                        csrow++;
                }
@@ -731,7 +767,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
                        debugf1("\t\t%#x\t%#x\t%#x\n",
                                (value[j] >> 27) & 0x1,
                                (value[j] >> 24) & 0x7,
-                               (value[j] && ((1 << 24) - 1)));
+                               (value[j] & ((1 << 24) - 1)));
        }
 
        return 0;
@@ -1324,6 +1360,20 @@ static int i7core_get_onedevice(struct pci_dev **prev,
        pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
                              dev_descr->dev_id, *prev);
 
+       /*
+        * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
+        * is at addr 8086:2c40, instead of 8086:2c41. So, we need
+        * to probe for the alternate address in case of failure
+        */
+       if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                                     PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
+
+       if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                                     PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
+                                     *prev);
+
        if (!pdev) {
                if (*prev) {
                        *prev = pdev;
@@ -1444,8 +1494,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
        struct i7core_pvt *pvt = mci->pvt_info;
        struct pci_dev *pdev;
        int i, func, slot;
+       char *family;
 
-       pvt->is_registered = 0;
+       pvt->is_registered = false;
+       pvt->enable_scrub  = false;
        for (i = 0; i < i7core_dev->n_devs; i++) {
                pdev = i7core_dev->pdev[i];
                if (!pdev)
@@ -1461,9 +1513,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
                        if (unlikely(func > MAX_CHAN_FUNC))
                                goto error;
                        pvt->pci_ch[slot - 4][func] = pdev;
-               } else if (!slot && !func)
+               } else if (!slot && !func) {
                        pvt->pci_noncore = pdev;
-               else
+
+                       /* Detect the processor family */
+                       switch (pdev->device) {
+                       case PCI_DEVICE_ID_INTEL_I7_NONCORE:
+                               family = "Xeon 35xx/ i7core";
+                               pvt->enable_scrub = false;
+                               break;
+                       case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
+                               family = "i7-800/i5-700";
+                               pvt->enable_scrub = false;
+                               break;
+                       case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
+                               family = "Xeon 34xx";
+                               pvt->enable_scrub = false;
+                               break;
+                       case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
+                               family = "Xeon 55xx";
+                               pvt->enable_scrub = true;
+                               break;
+                       case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
+                               family = "Xeon 56xx / i7-900";
+                               pvt->enable_scrub = true;
+                               break;
+                       default:
+                               family = "unknown";
+                               pvt->enable_scrub = false;
+                       }
+                       debugf0("Detected a processor type %s\n", family);
+               } else
                        goto error;
 
                debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
@@ -1472,7 +1552,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
 
                if (PCI_SLOT(pdev->devfn) == 3 &&
                        PCI_FUNC(pdev->devfn) == 2)
-                       pvt->is_registered = 1;
+                       pvt->is_registered = true;
        }
 
        return 0;
@@ -1826,33 +1906,43 @@ check_ce_error:
  * WARNING: As this routine should be called at NMI time, extra care should
  * be taken to avoid deadlocks, and to be as fast as possible.
  */
-static int i7core_mce_check_error(void *priv, struct mce *mce)
+static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
+                                 void *data)
 {
-       struct mem_ctl_info *mci = priv;
-       struct i7core_pvt *pvt = mci->pvt_info;
+       struct mce *mce = (struct mce *)data;
+       struct i7core_dev *i7_dev;
+       struct mem_ctl_info *mci;
+       struct i7core_pvt *pvt;
+
+       i7_dev = get_i7core_dev(mce->socketid);
+       if (!i7_dev)
+               return NOTIFY_BAD;
+
+       mci = i7_dev->mci;
+       pvt = mci->pvt_info;
 
        /*
         * Just let mcelog handle it if the error is
         * outside the memory controller
         */
        if (((mce->status & 0xffff) >> 7) != 1)
-               return 0;
+               return NOTIFY_DONE;
 
        /* Bank 8 registers are the only ones that we know how to handle */
        if (mce->bank != 8)
-               return 0;
+               return NOTIFY_DONE;
 
 #ifdef CONFIG_SMP
        /* Only handle if it is the right mc controller */
-       if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
-               return 0;
+       if (mce->socketid != pvt->i7core_dev->socket)
+               return NOTIFY_DONE;
 #endif
 
        smp_rmb();
        if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
                smp_wmb();
                pvt->mce_overrun++;
-               return 0;
+               return NOTIFY_DONE;
        }
 
        /* Copy memory error at the ringbuffer */
@@ -1865,7 +1955,240 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)
                i7core_check_error(mci);
 
        /* Advise mcelog that the errors were handled */
-       return 1;
+       return NOTIFY_STOP;
+}
+
+static struct notifier_block i7_mce_dec = {
+       .notifier_call  = i7core_mce_check_error,
+};
+
+struct memdev_dmi_entry {
+       u8 type;
+       u8 length;
+       u16 handle;
+       u16 phys_mem_array_handle;
+       u16 mem_err_info_handle;
+       u16 total_width;
+       u16 data_width;
+       u16 size;
+       u8 form;
+       u8 device_set;
+       u8 device_locator;
+       u8 bank_locator;
+       u8 memory_type;
+       u16 type_detail;
+       u16 speed;
+       u8 manufacturer;
+       u8 serial_number;
+       u8 asset_tag;
+       u8 part_number;
+       u8 attributes;
+       u32 extended_size;
+       u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+
+/*
+ * Decode the DRAM Clock Frequency, be paranoid, make sure that all
+ * memory devices show the same speed, and if they don't then consider
+ * all speeds to be invalid.
+ */
+static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
+{
+       int *dclk_freq = _dclk_freq;
+       u16 dmi_mem_clk_speed;
+
+       if (*dclk_freq == -1)
+               return;
+
+       if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+               struct memdev_dmi_entry *memdev_dmi_entry =
+                       (struct memdev_dmi_entry *)dh;
+               unsigned long conf_mem_clk_speed_offset =
+                       (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
+                       (unsigned long)&memdev_dmi_entry->type;
+               unsigned long speed_offset =
+                       (unsigned long)&memdev_dmi_entry->speed -
+                       (unsigned long)&memdev_dmi_entry->type;
+
+               /* Check that a DIMM is present */
+               if (memdev_dmi_entry->size == 0)
+                       return;
+
+               /*
+                * Pick the configured speed if it's available, otherwise
+                * pick the DIMM speed, or we don't have a speed.
+                */
+               if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
+                       dmi_mem_clk_speed =
+                               memdev_dmi_entry->conf_mem_clk_speed;
+               } else if (memdev_dmi_entry->length > speed_offset) {
+                       dmi_mem_clk_speed = memdev_dmi_entry->speed;
+               } else {
+                       *dclk_freq = -1;
+                       return;
+               }
+
+               if (*dclk_freq == 0) {
+                       /* First pass, speed was 0 */
+                       if (dmi_mem_clk_speed > 0) {
+                               /* Set speed if a valid speed is read */
+                               *dclk_freq = dmi_mem_clk_speed;
+                       } else {
+                               /* Otherwise we don't have a valid speed */
+                               *dclk_freq = -1;
+                       }
+               } else if (*dclk_freq > 0 &&
+                          *dclk_freq != dmi_mem_clk_speed) {
+                       /*
+                        * If we have a speed, check that all DIMMS are the same
+                        * speed, otherwise set the speed as invalid.
+                        */
+                       *dclk_freq = -1;
+               }
+       }
+}
+
+/*
+ * The default DCLK frequency is used as a fallback if we
+ * fail to find anything reliable in the DMI. The value
+ * is taken straight from the datasheet.
+ */
+#define DEFAULT_DCLK_FREQ 800
+
+static int get_dclk_freq(void)
+{
+       int dclk_freq = 0;
+
+       dmi_walk(decode_dclk, (void *)&dclk_freq);
+
+       if (dclk_freq < 1)
+               return DEFAULT_DCLK_FREQ;
+
+       return dclk_freq;
+}
+
+/*
+ * set_sdram_scrub_rate                This routine sets byte/sec bandwidth scrub rate
+ *                             to hardware according to SCRUBINTERVAL formula
+ *                             found in datasheet.
+ */
+static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
+{
+       struct i7core_pvt *pvt = mci->pvt_info;
+       struct pci_dev *pdev;
+       u32 dw_scrub;
+       u32 dw_ssr;
+
+       /* Get data from the MC register, function 2 */
+       pdev = pvt->pci_mcr[2];
+       if (!pdev)
+               return -ENODEV;
+
+       pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
+
+       if (new_bw == 0) {
+               /* Prepare to disable petrol scrub */
+               dw_scrub &= ~STARTSCRUB;
+               /* Stop the patrol scrub engine */
+               write_and_test(pdev, MC_SCRUB_CONTROL,
+                              dw_scrub & ~SCRUBINTERVAL_MASK);
+
+               /* Get current status of scrub rate and set bit to disable */
+               pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
+               dw_ssr &= ~SSR_MODE_MASK;
+               dw_ssr |= SSR_MODE_DISABLE;
+       } else {
+               const int cache_line_size = 64;
+               const u32 freq_dclk_mhz = pvt->dclk_freq;
+               unsigned long long scrub_interval;
+               /*
+                * Translate the desired scrub rate to a register value and
+                * program the corresponding register value.
+                */
+               scrub_interval = (unsigned long long)freq_dclk_mhz *
+                       cache_line_size * 1000000;
+               do_div(scrub_interval, new_bw);
+
+               if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
+                       return -EINVAL;
+
+               dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
+
+               /* Start the patrol scrub engine */
+               pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
+                                      STARTSCRUB | dw_scrub);
+
+               /* Get current status of scrub rate and set bit to enable */
+               pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
+               dw_ssr &= ~SSR_MODE_MASK;
+               dw_ssr |= SSR_MODE_ENABLE;
+       }
+       /* Disable or enable scrubbing */
+       pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
+
+       return new_bw;
+}
+
+/*
+ * get_sdram_scrub_rate                This routine convert current scrub rate value
+ *                             into byte/sec bandwidth accourding to
+ *                             SCRUBINTERVAL formula found in datasheet.
+ */
+static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
+{
+       struct i7core_pvt *pvt = mci->pvt_info;
+       struct pci_dev *pdev;
+       const u32 cache_line_size = 64;
+       const u32 freq_dclk_mhz = pvt->dclk_freq;
+       unsigned long long scrub_rate;
+       u32 scrubval;
+
+       /* Get data from the MC register, function 2 */
+       pdev = pvt->pci_mcr[2];
+       if (!pdev)
+               return -ENODEV;
+
+       /* Get current scrub control data */
+       pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
+
+       /* Mask highest 8-bits to 0 */
+       scrubval &=  SCRUBINTERVAL_MASK;
+       if (!scrubval)
+               return 0;
+
+       /* Calculate scrub rate value into byte/sec bandwidth */
+       scrub_rate =  (unsigned long long)freq_dclk_mhz *
+               1000000 * cache_line_size;
+       do_div(scrub_rate, scrubval);
+       return (int)scrub_rate;
+}
+
+static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
+{
+       struct i7core_pvt *pvt = mci->pvt_info;
+       u32 pci_lock;
+
+       /* Unlock writes to pci registers */
+       pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
+       pci_lock &= ~0x3;
+       pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
+                              pci_lock | MC_CFG_UNLOCK);
+
+       mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
+       mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
+}
+
+static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
+{
+       struct i7core_pvt *pvt = mci->pvt_info;
+       u32 pci_lock;
+
+       /* Lock writes to pci registers */
+       pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
+       pci_lock &= ~0x3;
+       pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
+                              pci_lock | MC_CFG_LOCK);
 }
 
 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
@@ -1874,7 +2197,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
                                                &pvt->i7core_dev->pdev[0]->dev,
                                                EDAC_MOD_STR);
        if (unlikely(!pvt->i7core_pci))
-               pr_warn("Unable to setup PCI error report via EDAC\n");
+               i7core_printk(KERN_WARNING,
+                             "Unable to setup PCI error report via EDAC\n");
 }
 
 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
@@ -1906,8 +2230,11 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
        debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
                __func__, mci, &i7core_dev->pdev[0]->dev);
 
-       /* Disable MCE NMI handler */
-       edac_mce_unregister(&pvt->edac_mce);
+       /* Disable scrubrate setting */
+       if (pvt->enable_scrub)
+               disable_sdram_scrub_setting(mci);
+
+       atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
 
        /* Disable EDAC polling */
        i7core_pci_ctl_release(pvt);
@@ -1979,6 +2306,10 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
        /* Set the function pointer to an actual operation function */
        mci->edac_check = i7core_check_error;
 
+       /* Enable scrubrate setting */
+       if (pvt->enable_scrub)
+               enable_sdram_scrub_setting(mci);
+
        /* add this new MC control structure to EDAC's list of MCs */
        if (unlikely(edac_mc_add_mc(mci))) {
                debugf0("MC: " __FILE__
@@ -2002,21 +2333,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
        /* allocating generic PCI control info */
        i7core_pci_ctl_create(pvt);
 
-       /* Registers on edac_mce in order to receive memory errors */
-       pvt->edac_mce.priv = mci;
-       pvt->edac_mce.check_error = i7core_mce_check_error;
-       rc = edac_mce_register(&pvt->edac_mce);
-       if (unlikely(rc < 0)) {
-               debugf0("MC: " __FILE__
-                       ": %s(): failed edac_mce_register()\n", __func__);
-               goto fail1;
-       }
+       /* DCLK for scrub rate setting */
+       pvt->dclk_freq = get_dclk_freq();
+
+       atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
 
        return 0;
 
-fail1:
-       i7core_pci_ctl_release(pvt);
-       edac_mc_del_mc(mci->dev);
 fail0:
        kfree(mci->ctl_name);
        edac_mc_free(mci);
@@ -2035,7 +2358,7 @@ fail0:
 static int __devinit i7core_probe(struct pci_dev *pdev,
                                  const struct pci_device_id *id)
 {
-       int rc;
+       int rc, count = 0;
        struct i7core_dev *i7core_dev;
 
        /* get the pci devices we want to reserve for our use */
@@ -2055,12 +2378,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
                goto fail0;
 
        list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
+               count++;
                rc = i7core_register_mci(i7core_dev);
                if (unlikely(rc < 0))
                        goto fail1;
        }
 
-       i7core_printk(KERN_INFO, "Driver loaded.\n");
+       /*
+        * Nehalem-EX uses a different memory controller. However, as the
+        * memory controller is not visible on some Nehalem/Nehalem-EP, we
+        * need to indirectly probe via a X58 PCI device. The same devices
+        * are found on (some) Nehalem-EX. So, on those machines, the
+        * probe routine needs to return -ENODEV, as the actual Memory
+        * Controller registers won't be detected.
+        */
+       if (!count) {
+               rc = -ENODEV;
+               goto fail1;
+       }
+
+       i7core_printk(KERN_INFO,
+                     "Driver loaded, %d memory controller(s) found.\n",
+                     count);
 
        mutex_unlock(&i7core_edac_lock);
        return 0;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
new file mode 100644 (file)
index 0000000..7a402bf
--- /dev/null
@@ -0,0 +1,1893 @@
+/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
+ *
+ * This driver supports the memory controllers found on the Intel
+ * processor family Sandy Bridge.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License version 2 only.
+ *
+ * Copyright (c) 2011 by:
+ *      Mauro Carvalho Chehab <mchehab@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_core.h"
+
+/* Static vars */
+static LIST_HEAD(sbridge_edac_list);
+static DEFINE_MUTEX(sbridge_edac_lock);
+static int probed;
+
+/*
+ * Alter this version for the module when modifications are made
+ */
+#define SBRIDGE_REVISION    " Ver: 1.0.0 "
+#define EDAC_MOD_STR      "sbridge_edac"
+
+/*
+ * Debug macros
+ */
+#define sbridge_printk(level, fmt, arg...)                     \
+       edac_printk(level, "sbridge", fmt, ##arg)
+
+#define sbridge_mc_printk(mci, level, fmt, arg...)             \
+       edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi)        \
+       (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
+
+/*
+ * sbridge Memory Controller Registers
+ */
+
+/*
+ * FIXME: For now, let's order by device function, as it makes
+ * easier for driver's development proccess. This table should be
+ * moved to pci_id.h when submitted upstream
+ */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0       0x3cf4  /* 12.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1       0x3cf6  /* 12.7 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR         0x3cf5  /* 13.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0    0x3ca0  /* 14.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA     0x3ca8  /* 15.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS    0x3c71  /* 15.1 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0   0x3caa  /* 15.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1   0x3cab  /* 15.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2   0x3cac  /* 15.4 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3   0x3cad  /* 15.5 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO  0x3cb8  /* 17.0 */
+
+       /*
+        * Currently, unused, but will be needed in the future
+        * implementations, as they hold the error counters
+        */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0   0x3c72  /* 16.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1   0x3c73  /* 16.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2   0x3c76  /* 16.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3   0x3c77  /* 16.7 */
+
+/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
+static const u32 dram_rule[] = {
+       0x80, 0x88, 0x90, 0x98, 0xa0,
+       0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
+};
+#define MAX_SAD                ARRAY_SIZE(dram_rule)
+
+#define SAD_LIMIT(reg)         ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
+#define DRAM_ATTR(reg)         GET_BITFIELD(reg, 2,  3)
+#define INTERLEAVE_MODE(reg)   GET_BITFIELD(reg, 1,  1)
+#define DRAM_RULE_ENABLE(reg)  GET_BITFIELD(reg, 0,  0)
+
+static char *get_dram_attr(u32 reg)
+{
+       switch(DRAM_ATTR(reg)) {
+               case 0:
+                       return "DRAM";
+               case 1:
+                       return "MMCFG";
+               case 2:
+                       return "NXM";
+               default:
+                       return "unknown";
+       }
+}
+
+static const u32 interleave_list[] = {
+       0x84, 0x8c, 0x94, 0x9c, 0xa4,
+       0xac, 0xb4, 0xbc, 0xc4, 0xcc,
+};
+#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list)
+
+#define SAD_PKG0(reg)          GET_BITFIELD(reg, 0, 2)
+#define SAD_PKG1(reg)          GET_BITFIELD(reg, 3, 5)
+#define SAD_PKG2(reg)          GET_BITFIELD(reg, 8, 10)
+#define SAD_PKG3(reg)          GET_BITFIELD(reg, 11, 13)
+#define SAD_PKG4(reg)          GET_BITFIELD(reg, 16, 18)
+#define SAD_PKG5(reg)          GET_BITFIELD(reg, 19, 21)
+#define SAD_PKG6(reg)          GET_BITFIELD(reg, 24, 26)
+#define SAD_PKG7(reg)          GET_BITFIELD(reg, 27, 29)
+
+static inline int sad_pkg(u32 reg, int interleave)
+{
+       switch (interleave) {
+       case 0:
+               return SAD_PKG0(reg);
+       case 1:
+               return SAD_PKG1(reg);
+       case 2:
+               return SAD_PKG2(reg);
+       case 3:
+               return SAD_PKG3(reg);
+       case 4:
+               return SAD_PKG4(reg);
+       case 5:
+               return SAD_PKG5(reg);
+       case 6:
+               return SAD_PKG6(reg);
+       case 7:
+               return SAD_PKG7(reg);
+       default:
+               return -EINVAL;
+       }
+}
+
+/* Devices 12 Function 7 */
+
+#define TOLM           0x80
+#define        TOHM            0x84
+
+#define GET_TOLM(reg)          ((GET_BITFIELD(reg, 0,  3) << 28) | 0x3ffffff)
+#define GET_TOHM(reg)          ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
+
+/* Device 13 Function 6 */
+
+#define SAD_TARGET     0xf0
+
+#define SOURCE_ID(reg)         GET_BITFIELD(reg, 9, 11)
+
+#define SAD_CONTROL    0xf4
+
+#define NODE_ID(reg)           GET_BITFIELD(reg, 0, 2)
+
+/* Device 14 function 0 */
+
+static const u32 tad_dram_rule[] = {
+       0x40, 0x44, 0x48, 0x4c,
+       0x50, 0x54, 0x58, 0x5c,
+       0x60, 0x64, 0x68, 0x6c,
+};
+#define MAX_TAD        ARRAY_SIZE(tad_dram_rule)
+
+#define TAD_LIMIT(reg)         ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
+#define TAD_SOCK(reg)          GET_BITFIELD(reg, 10, 11)
+#define TAD_CH(reg)            GET_BITFIELD(reg,  8,  9)
+#define TAD_TGT3(reg)          GET_BITFIELD(reg,  6,  7)
+#define TAD_TGT2(reg)          GET_BITFIELD(reg,  4,  5)
+#define TAD_TGT1(reg)          GET_BITFIELD(reg,  2,  3)
+#define TAD_TGT0(reg)          GET_BITFIELD(reg,  0,  1)
+
+/* Device 15, function 0 */
+
+#define MCMTR                  0x7c
+
+#define IS_ECC_ENABLED(mcmtr)          GET_BITFIELD(mcmtr, 2, 2)
+#define IS_LOCKSTEP_ENABLED(mcmtr)     GET_BITFIELD(mcmtr, 1, 1)
+#define IS_CLOSE_PG(mcmtr)             GET_BITFIELD(mcmtr, 0, 0)
+
+/* Device 15, function 1 */
+
+#define RASENABLES             0xac
+#define IS_MIRROR_ENABLED(reg)         GET_BITFIELD(reg, 0, 0)
+
+/* Device 15, functions 2-5 */
+
+static const int mtr_regs[] = {
+       0x80, 0x84, 0x88,
+};
+
+#define RANK_DISABLE(mtr)              GET_BITFIELD(mtr, 16, 19)
+#define IS_DIMM_PRESENT(mtr)           GET_BITFIELD(mtr, 14, 14)
+#define RANK_CNT_BITS(mtr)             GET_BITFIELD(mtr, 12, 13)
+#define RANK_WIDTH_BITS(mtr)           GET_BITFIELD(mtr, 2, 4)
+#define COL_WIDTH_BITS(mtr)            GET_BITFIELD(mtr, 0, 1)
+
+static const u32 tad_ch_nilv_offset[] = {
+       0x90, 0x94, 0x98, 0x9c,
+       0xa0, 0xa4, 0xa8, 0xac,
+       0xb0, 0xb4, 0xb8, 0xbc,
+};
+#define CHN_IDX_OFFSET(reg)            GET_BITFIELD(reg, 28, 29)
+#define TAD_OFFSET(reg)                        (GET_BITFIELD(reg,  6, 25) << 26)
+
+static const u32 rir_way_limit[] = {
+       0x108, 0x10c, 0x110, 0x114, 0x118,
+};
+#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
+
+#define IS_RIR_VALID(reg)      GET_BITFIELD(reg, 31, 31)
+#define RIR_WAY(reg)           GET_BITFIELD(reg, 28, 29)
+#define RIR_LIMIT(reg)         ((GET_BITFIELD(reg,  1, 10) << 29)| 0x1fffffff)
+
+#define MAX_RIR_WAY    8
+
+static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
+       { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
+       { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
+       { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
+       { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
+       { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
+};
+
+#define RIR_RNK_TGT(reg)               GET_BITFIELD(reg, 16, 19)
+#define RIR_OFFSET(reg)                GET_BITFIELD(reg,  2, 14)
+
+/* Device 16, functions 2-7 */
+
+/*
+ * FIXME: Implement the error count reads directly
+ */
+
+static const u32 correrrcnt[] = {
+       0x104, 0x108, 0x10c, 0x110,
+};
+
+#define RANK_ODD_OV(reg)               GET_BITFIELD(reg, 31, 31)
+#define RANK_ODD_ERR_CNT(reg)          GET_BITFIELD(reg, 16, 30)
+#define RANK_EVEN_OV(reg)              GET_BITFIELD(reg, 15, 15)
+#define RANK_EVEN_ERR_CNT(reg)         GET_BITFIELD(reg,  0, 14)
+
+static const u32 correrrthrsld[] = {
+       0x11c, 0x120, 0x124, 0x128,
+};
+
+#define RANK_ODD_ERR_THRSLD(reg)       GET_BITFIELD(reg, 16, 30)
+#define RANK_EVEN_ERR_THRSLD(reg)      GET_BITFIELD(reg,  0, 14)
+
+
+/* Device 17, function 0 */
+
+#define RANK_CFG_A             0x0328
+
+#define IS_RDIMM_ENABLED(reg)          GET_BITFIELD(reg, 11, 11)
+
+/*
+ * sbridge structs
+ */
+
+#define NUM_CHANNELS   4
+#define MAX_DIMMS      3               /* Max DIMMS per channel */
+
+struct sbridge_info {
+       u32     mcmtr;
+};
+
+struct sbridge_channel {
+       u32             ranks;
+       u32             dimms;
+};
+
+struct pci_id_descr {
+       int                     dev;
+       int                     func;
+       int                     dev_id;
+       int                     optional;
+};
+
+struct pci_id_table {
+       const struct pci_id_descr       *descr;
+       int                             n_devs;
+};
+
+struct sbridge_dev {
+       struct list_head        list;
+       u8                      bus, mc;
+       u8                      node_id, source_id;
+       struct pci_dev          **pdev;
+       int                     n_devs;
+       struct mem_ctl_info     *mci;
+};
+
+struct sbridge_pvt {
+       struct pci_dev          *pci_ta, *pci_ddrio, *pci_ras;
+       struct pci_dev          *pci_sad0, *pci_sad1, *pci_ha0;
+       struct pci_dev          *pci_br;
+       struct pci_dev          *pci_tad[NUM_CHANNELS];
+
+       struct sbridge_dev      *sbridge_dev;
+
+       struct sbridge_info     info;
+       struct sbridge_channel  channel[NUM_CHANNELS];
+
+       int                     csrow_map[NUM_CHANNELS][MAX_DIMMS];
+
+       /* Memory type detection */
+       bool                    is_mirrored, is_lockstep, is_close_pg;
+
+       /* Fifo double buffers */
+       struct mce              mce_entry[MCE_LOG_LEN];
+       struct mce              mce_outentry[MCE_LOG_LEN];
+
+       /* Fifo in/out counters */
+       unsigned                mce_in, mce_out;
+
+       /* Count indicator to show errors not got */
+       unsigned                mce_overrun;
+
+       /* Memory description */
+       u64                     tolm, tohm;
+};
+
+#define PCI_DESCR(device, function, device_id) \
+       .dev = (device),                        \
+       .func = (function),                     \
+       .dev_id = (device_id)
+
+static const struct pci_id_descr pci_dev_descr_sbridge[] = {
+               /* Processor Home Agent */
+       { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)         },
+
+               /* Memory controller */
+       { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)          },
+       { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS)         },
+       { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0)        },
+       { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1)        },
+       { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2)        },
+       { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3)        },
+       { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO)       },
+
+               /* System Address Decoder */
+       { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0)            },
+       { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1)            },
+
+               /* Broadcast Registers */
+       { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR)              },
+};
+
+#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
+static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
+       PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
+       {0,}                    /* 0 terminated list. */
+};
+
+/*
+ *     pci_device_id   table for which devices we are looking for
+ */
+static const struct pci_device_id sbridge_pci_tbl[] __devinitdata = {
+       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
+       {0,}                    /* 0 terminated list. */
+};
+
+
+/****************************************************************************
+                       Anciliary status routines
+ ****************************************************************************/
+
+static inline int numrank(u32 mtr)
+{
+       int ranks = (1 << RANK_CNT_BITS(mtr));
+
+       if (ranks > 4) {
+               debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)",
+                       ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
+               return -EINVAL;
+       }
+
+       return ranks;
+}
+
+static inline int numrow(u32 mtr)
+{
+       int rows = (RANK_WIDTH_BITS(mtr) + 12);
+
+       if (rows < 13 || rows > 18) {
+               debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)",
+                       rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
+               return -EINVAL;
+       }
+
+       return 1 << rows;
+}
+
+static inline int numcol(u32 mtr)
+{
+       int cols = (COL_WIDTH_BITS(mtr) + 10);
+
+       if (cols > 12) {
+               debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)",
+                       cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
+               return -EINVAL;
+       }
+
+       return 1 << cols;
+}
+
+static struct sbridge_dev *get_sbridge_dev(u8 bus)
+{
+       struct sbridge_dev *sbridge_dev;
+
+       list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
+               if (sbridge_dev->bus == bus)
+                       return sbridge_dev;
+       }
+
+       return NULL;
+}
+
+static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
+                                          const struct pci_id_table *table)
+{
+       struct sbridge_dev *sbridge_dev;
+
+       sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
+       if (!sbridge_dev)
+               return NULL;
+
+       sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
+                                  GFP_KERNEL);
+       if (!sbridge_dev->pdev) {
+               kfree(sbridge_dev);
+               return NULL;
+       }
+
+       sbridge_dev->bus = bus;
+       sbridge_dev->n_devs = table->n_devs;
+       list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
+
+       return sbridge_dev;
+}
+
+static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
+{
+       list_del(&sbridge_dev->list);
+       kfree(sbridge_dev->pdev);
+       kfree(sbridge_dev);
+}
+
+/****************************************************************************
+                       Memory check routines
+ ****************************************************************************/
+static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
+                                         unsigned func)
+{
+       struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
+       int i;
+
+       if (!sbridge_dev)
+               return NULL;
+
+       for (i = 0; i < sbridge_dev->n_devs; i++) {
+               if (!sbridge_dev->pdev[i])
+                       continue;
+
+               if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
+                   PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
+                       debugf1("Associated %02x.%02x.%d with %p\n",
+                               bus, slot, func, sbridge_dev->pdev[i]);
+                       return sbridge_dev->pdev[i];
+               }
+       }
+
+       return NULL;
+}
+
+/**
+ * sbridge_get_active_channels() - gets the number of channels and csrows
+ * bus:                Device bus
+ * @channels:  Number of channels that will be returned
+ * @csrows:    Number of csrows found
+ *
+ * Since EDAC core needs to know in advance the number of available channels
+ * and csrows, in order to allocate memory for csrows/channels, it is needed
+ * to run two similar steps. At the first step, implemented on this function,
+ * it checks the number of csrows/channels present at one socket, identified
+ * by the associated PCI bus.
+ * this is used in order to properly allocate the size of mci components.
+ * Note: one csrow is one dimm.
+ */
+static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
+                                     unsigned *csrows)
+{
+       struct pci_dev *pdev = NULL;
+       int i, j;
+       u32 mcmtr;
+
+       *channels = 0;
+       *csrows = 0;
+
+       pdev = get_pdev_slot_func(bus, 15, 0);
+       if (!pdev) {
+               sbridge_printk(KERN_ERR, "Couldn't find PCI device "
+                                       "%2x.%02d.%d!!!\n",
+                                       bus, 15, 0);
+               return -ENODEV;
+       }
+
+       pci_read_config_dword(pdev, MCMTR, &mcmtr);
+       if (!IS_ECC_ENABLED(mcmtr)) {
+               sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
+               return -ENODEV;
+       }
+
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               u32 mtr;
+
+               /* Device 15 functions 2 - 5  */
+               pdev = get_pdev_slot_func(bus, 15, 2 + i);
+               if (!pdev) {
+                       sbridge_printk(KERN_ERR, "Couldn't find PCI device "
+                                                "%2x.%02d.%d!!!\n",
+                                                bus, 15, 2 + i);
+                       return -ENODEV;
+               }
+               (*channels)++;
+
+               for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+                       pci_read_config_dword(pdev, mtr_regs[j], &mtr);
+                       debugf1("Bus#%02x channel #%d  MTR%d = %x\n", bus, i, j, mtr);
+                       if (IS_DIMM_PRESENT(mtr))
+                               (*csrows)++;
+               }
+       }
+
+       debugf0("Number of active channels: %d, number of active dimms: %d\n",
+               *channels, *csrows);
+
+       return 0;
+}
+
+static int get_dimm_config(const struct mem_ctl_info *mci)
+{
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       struct csrow_info *csr;
+       int i, j, banks, ranks, rows, cols, size, npages;
+       int csrow = 0;
+       unsigned long last_page = 0;
+       u32 reg;
+       enum edac_type mode;
+       enum mem_type mtype;
+
+       pci_read_config_dword(pvt->pci_br, SAD_TARGET, &reg);
+       pvt->sbridge_dev->source_id = SOURCE_ID(reg);
+
+       pci_read_config_dword(pvt->pci_br, SAD_CONTROL, &reg);
+       pvt->sbridge_dev->node_id = NODE_ID(reg);
+       debugf0("mc#%d: Node ID: %d, source ID: %d\n",
+               pvt->sbridge_dev->mc,
+               pvt->sbridge_dev->node_id,
+               pvt->sbridge_dev->source_id);
+
+       pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
+       if (IS_MIRROR_ENABLED(reg)) {
+               debugf0("Memory mirror is enabled\n");
+               pvt->is_mirrored = true;
+       } else {
+               debugf0("Memory mirror is disabled\n");
+               pvt->is_mirrored = false;
+       }
+
+       pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
+       if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
+               debugf0("Lockstep is enabled\n");
+               mode = EDAC_S8ECD8ED;
+               pvt->is_lockstep = true;
+       } else {
+               debugf0("Lockstep is disabled\n");
+               mode = EDAC_S4ECD4ED;
+               pvt->is_lockstep = false;
+       }
+       if (IS_CLOSE_PG(pvt->info.mcmtr)) {
+               debugf0("address map is on closed page mode\n");
+               pvt->is_close_pg = true;
+       } else {
+               debugf0("address map is on open page mode\n");
+               pvt->is_close_pg = false;
+       }
+
+       pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, &reg);
+       if (IS_RDIMM_ENABLED(reg)) {
+               /* FIXME: Can also be LRDIMM */
+               debugf0("Memory is registered\n");
+               mtype = MEM_RDDR3;
+       } else {
+               debugf0("Memory is unregistered\n");
+               mtype = MEM_DDR3;
+       }
+
+       /* On all supported DDR3 DIMM types, there are 8 banks available */
+       banks = 8;
+
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               u32 mtr;
+
+               for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+                       pci_read_config_dword(pvt->pci_tad[i],
+                                             mtr_regs[j], &mtr);
+                       debugf4("Channel #%d  MTR%d = %x\n", i, j, mtr);
+                       if (IS_DIMM_PRESENT(mtr)) {
+                               pvt->channel[i].dimms++;
+
+                               ranks = numrank(mtr);
+                               rows = numrow(mtr);
+                               cols = numcol(mtr);
+
+                               /* DDR3 has 8 I/O banks */
+                               size = (rows * cols * banks * ranks) >> (20 - 3);
+                               npages = MiB_TO_PAGES(size);
+
+                               debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+                                       pvt->sbridge_dev->mc, i, j,
+                                       size, npages,
+                                       banks, ranks, rows, cols);
+                               csr = &mci->csrows[csrow];
+
+                               csr->first_page = last_page;
+                               csr->last_page = last_page + npages - 1;
+                               csr->page_mask = 0UL;   /* Unused */
+                               csr->nr_pages = npages;
+                               csr->grain = 32;
+                               csr->csrow_idx = csrow;
+                               csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
+                               csr->ce_count = 0;
+                               csr->ue_count = 0;
+                               csr->mtype = mtype;
+                               csr->edac_mode = mode;
+                               csr->nr_channels = 1;
+                               csr->channels[0].chan_idx = i;
+                               csr->channels[0].ce_count = 0;
+                               pvt->csrow_map[i][j] = csrow;
+                               snprintf(csr->channels[0].label,
+                                        sizeof(csr->channels[0].label),
+                                        "CPU_SrcID#%u_Channel#%u_DIMM#%u",
+                                        pvt->sbridge_dev->source_id, i, j);
+                               last_page += npages;
+                               csrow++;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void get_memory_layout(const struct mem_ctl_info *mci)
+{
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       int i, j, k, n_sads, n_tads, sad_interl;
+       u32 reg;
+       u64 limit, prv = 0;
+       u64 tmp_mb;
+       u32 rir_way;
+
+       /*
+        * Step 1) Get TOLM/TOHM ranges
+        */
+
+       /* Address range is 32:28 */
+       pci_read_config_dword(pvt->pci_sad1, TOLM,
+                             &reg);
+       pvt->tolm = GET_TOLM(reg);
+       tmp_mb = (1 + pvt->tolm) >> 20;
+
+       debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n",
+               tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm);
+
+       /* Address range is already 45:25 */
+       pci_read_config_dword(pvt->pci_sad1, TOHM,
+                             &reg);
+       pvt->tohm = GET_TOHM(reg);
+       tmp_mb = (1 + pvt->tohm) >> 20;
+
+       debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)",
+               tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm);
+
+       /*
+        * Step 2) Get SAD range and SAD Interleave list
+        * TAD registers contain the interleave wayness. However, it
+        * seems simpler to just discover it indirectly, with the
+        * algorithm bellow.
+        */
+       prv = 0;
+       for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
+               /* SAD_LIMIT Address range is 45:26 */
+               pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
+                                     &reg);
+               limit = SAD_LIMIT(reg);
+
+               if (!DRAM_RULE_ENABLE(reg))
+                       continue;
+
+               if (limit <= prv)
+                       break;
+
+               tmp_mb = (limit + 1) >> 20;
+               debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n",
+                       n_sads,
+                       get_dram_attr(reg),
+                       tmp_mb / 1000, tmp_mb % 1000,
+                       ((u64)tmp_mb) << 20L,
+                       INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]",
+                       reg);
+               prv = limit;
+
+               pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
+                                     &reg);
+               sad_interl = sad_pkg(reg, 0);
+               for (j = 0; j < 8; j++) {
+                       if (j > 0 && sad_interl == sad_pkg(reg, j))
+                               break;
+
+                       debugf0("SAD#%d, interleave #%d: %d\n",
+                       n_sads, j, sad_pkg(reg, j));
+               }
+       }
+
+       /*
+        * Step 3) Get TAD range
+        */
+       prv = 0;
+       for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
+               pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
+                                     &reg);
+               limit = TAD_LIMIT(reg);
+               if (limit <= prv)
+                       break;
+               tmp_mb = (limit + 1) >> 20;
+
+               debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
+                       n_tads, tmp_mb / 1000, tmp_mb % 1000,
+                       ((u64)tmp_mb) << 20L,
+                       (u32)TAD_SOCK(reg),
+                       (u32)TAD_CH(reg),
+                       (u32)TAD_TGT0(reg),
+                       (u32)TAD_TGT1(reg),
+                       (u32)TAD_TGT2(reg),
+                       (u32)TAD_TGT3(reg),
+                       reg);
+               prv = tmp_mb;
+       }
+
+       /*
+        * Step 4) Get TAD offsets, per each channel
+        */
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               if (!pvt->channel[i].dimms)
+                       continue;
+               for (j = 0; j < n_tads; j++) {
+                       pci_read_config_dword(pvt->pci_tad[i],
+                                             tad_ch_nilv_offset[j],
+                                             &reg);
+                       tmp_mb = TAD_OFFSET(reg) >> 20;
+                       debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n",
+                               i, j,
+                               tmp_mb / 1000, tmp_mb % 1000,
+                               ((u64)tmp_mb) << 20L,
+                               reg);
+               }
+       }
+
+       /*
+        * Step 6) Get RIR Wayness/Limit, per each channel
+        */
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               if (!pvt->channel[i].dimms)
+                       continue;
+               for (j = 0; j < MAX_RIR_RANGES; j++) {
+                       pci_read_config_dword(pvt->pci_tad[i],
+                                             rir_way_limit[j],
+                                             &reg);
+
+                       if (!IS_RIR_VALID(reg))
+                               continue;
+
+                       tmp_mb = RIR_LIMIT(reg) >> 20;
+                       rir_way = 1 << RIR_WAY(reg);
+                       debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n",
+                               i, j,
+                               tmp_mb / 1000, tmp_mb % 1000,
+                               ((u64)tmp_mb) << 20L,
+                               rir_way,
+                               reg);
+
+                       for (k = 0; k < rir_way; k++) {
+                               pci_read_config_dword(pvt->pci_tad[i],
+                                                     rir_offset[j][k],
+                                                     &reg);
+                               tmp_mb = RIR_OFFSET(reg) << 6;
+
+                               debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
+                                       i, j, k,
+                                       tmp_mb / 1000, tmp_mb % 1000,
+                                       ((u64)tmp_mb) << 20L,
+                                       (u32)RIR_RNK_TGT(reg),
+                                       reg);
+                       }
+               }
+       }
+}
+
+struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
+{
+       struct sbridge_dev *sbridge_dev;
+
+       list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
+               if (sbridge_dev->node_id == node_id)
+                       return sbridge_dev->mci;
+       }
+       return NULL;
+}
+
+static int get_memory_error_data(struct mem_ctl_info *mci,
+                                u64 addr,
+                                u8 *socket,
+                                long *channel_mask,
+                                u8 *rank,
+                                char *area_type)
+{
+       struct mem_ctl_info     *new_mci;
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       char                    msg[256];
+       int                     n_rir, n_sads, n_tads, sad_way, sck_xch;
+       int                     sad_interl, idx, base_ch;
+       int                     interleave_mode;
+       unsigned                sad_interleave[MAX_INTERLEAVE];
+       u32                     reg;
+       u8                      ch_way,sck_way;
+       u32                     tad_offset;
+       u32                     rir_way;
+       u64                     ch_addr, offset, limit, prv = 0;
+
+
+       /*
+        * Step 0) Check if the address is at special memory ranges
+        * The check bellow is probably enough to fill all cases where
+        * the error is not inside a memory, except for the legacy
+        * range (e. g. VGA addresses). It is unlikely, however, that the
+        * memory controller would generate an error on that range.
+        */
+       if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) {
+               sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       if (addr >= (u64)pvt->tohm) {
+               sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+
+       /*
+        * Step 1) Get socket
+        */
+       for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
+               pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
+                                     &reg);
+
+               if (!DRAM_RULE_ENABLE(reg))
+                       continue;
+
+               limit = SAD_LIMIT(reg);
+               if (limit <= prv) {
+                       sprintf(msg, "Can't discover the memory socket");
+                       edac_mc_handle_ce_no_info(mci, msg);
+                       return -EINVAL;
+               }
+               if  (addr <= limit)
+                       break;
+               prv = limit;
+       }
+       if (n_sads == MAX_SAD) {
+               sprintf(msg, "Can't discover the memory socket");
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       area_type = get_dram_attr(reg);
+       interleave_mode = INTERLEAVE_MODE(reg);
+
+       pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
+                             &reg);
+       sad_interl = sad_pkg(reg, 0);
+       for (sad_way = 0; sad_way < 8; sad_way++) {
+               if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
+                       break;
+               sad_interleave[sad_way] = sad_pkg(reg, sad_way);
+               debugf0("SAD interleave #%d: %d\n",
+                       sad_way, sad_interleave[sad_way]);
+       }
+       debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
+               pvt->sbridge_dev->mc,
+               n_sads,
+               addr,
+               limit,
+               sad_way + 7,
+               INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]");
+       if (interleave_mode)
+               idx = ((addr >> 6) ^ (addr >> 16)) & 7;
+       else
+               idx = (addr >> 6) & 7;
+       switch (sad_way) {
+       case 1:
+               idx = 0;
+               break;
+       case 2:
+               idx = idx & 1;
+               break;
+       case 4:
+               idx = idx & 3;
+               break;
+       case 8:
+               break;
+       default:
+               sprintf(msg, "Can't discover socket interleave");
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       *socket = sad_interleave[idx];
+       debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n",
+               idx, sad_way, *socket);
+
+       /*
+        * Move to the proper node structure, in order to access the
+        * right PCI registers
+        */
+       new_mci = get_mci_for_node_id(*socket);
+       if (!new_mci) {
+               sprintf(msg, "Struct for socket #%u wasn't initialized",
+                       *socket);
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       mci = new_mci;
+       pvt = mci->pvt_info;
+
+       /*
+        * Step 2) Get memory channel
+        */
+       prv = 0;
+       for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
+               pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
+                                     &reg);
+               limit = TAD_LIMIT(reg);
+               if (limit <= prv) {
+                       sprintf(msg, "Can't discover the memory channel");
+                       edac_mc_handle_ce_no_info(mci, msg);
+                       return -EINVAL;
+               }
+               if  (addr <= limit)
+                       break;
+               prv = limit;
+       }
+       ch_way = TAD_CH(reg) + 1;
+       sck_way = TAD_SOCK(reg) + 1;
+       /*
+        * FIXME: Is it right to always use channel 0 for offsets?
+        */
+       pci_read_config_dword(pvt->pci_tad[0],
+                               tad_ch_nilv_offset[n_tads],
+                               &tad_offset);
+
+       if (ch_way == 3)
+               idx = addr >> 6;
+       else
+               idx = addr >> (6 + sck_way);
+       idx = idx % ch_way;
+
+       /*
+        * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
+        */
+       switch (idx) {
+       case 0:
+               base_ch = TAD_TGT0(reg);
+               break;
+       case 1:
+               base_ch = TAD_TGT1(reg);
+               break;
+       case 2:
+               base_ch = TAD_TGT2(reg);
+               break;
+       case 3:
+               base_ch = TAD_TGT3(reg);
+               break;
+       default:
+               sprintf(msg, "Can't discover the TAD target");
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       *channel_mask = 1 << base_ch;
+
+       if (pvt->is_mirrored) {
+               *channel_mask |= 1 << ((base_ch + 2) % 4);
+               switch(ch_way) {
+               case 2:
+               case 4:
+                       sck_xch = 1 << sck_way * (ch_way >> 1);
+                       break;
+               default:
+                       sprintf(msg, "Invalid mirror set. Can't decode addr");
+                       edac_mc_handle_ce_no_info(mci, msg);
+                       return -EINVAL;
+               }
+       } else
+               sck_xch = (1 << sck_way) * ch_way;
+
+       if (pvt->is_lockstep)
+               *channel_mask |= 1 << ((base_ch + 1) % 4);
+
+       offset = TAD_OFFSET(tad_offset);
+
+       debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
+               n_tads,
+               addr,
+               limit,
+               (u32)TAD_SOCK(reg),
+               ch_way,
+               offset,
+               idx,
+               base_ch,
+               *channel_mask);
+
+       /* Calculate channel address */
+       /* Remove the TAD offset */
+
+       if (offset > addr) {
+               sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
+                       offset, addr);
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       addr -= offset;
+       /* Store the low bits [0:6] of the addr */
+       ch_addr = addr & 0x7f;
+       /* Remove socket wayness and remove 6 bits */
+       addr >>= 6;
+       addr /= sck_xch;
+#if 0
+       /* Divide by channel way */
+       addr = addr / ch_way;
+#endif
+       /* Recover the last 6 bits */
+       ch_addr |= addr << 6;
+
+       /*
+        * Step 3) Decode rank
+        */
+       for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
+               pci_read_config_dword(pvt->pci_tad[base_ch],
+                                     rir_way_limit[n_rir],
+                                     &reg);
+
+               if (!IS_RIR_VALID(reg))
+                       continue;
+
+               limit = RIR_LIMIT(reg);
+
+               debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n",
+                       n_rir,
+                       (limit >> 20) / 1000, (limit >> 20) % 1000,
+                       limit,
+                       1 << RIR_WAY(reg));
+               if  (ch_addr <= limit)
+                       break;
+       }
+       if (n_rir == MAX_RIR_RANGES) {
+               sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
+                       ch_addr);
+               edac_mc_handle_ce_no_info(mci, msg);
+               return -EINVAL;
+       }
+       rir_way = RIR_WAY(reg);
+       if (pvt->is_close_pg)
+               idx = (ch_addr >> 6);
+       else
+               idx = (ch_addr >> 13);  /* FIXME: Datasheet says to shift by 15 */
+       idx %= 1 << rir_way;
+
+       pci_read_config_dword(pvt->pci_tad[base_ch],
+                             rir_offset[n_rir][idx],
+                             &reg);
+       *rank = RIR_RNK_TGT(reg);
+
+       debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
+               n_rir,
+               ch_addr,
+               limit,
+               rir_way,
+               idx);
+
+       return 0;
+}
+
+/****************************************************************************
+       Device initialization routines: put/get, init/exit
+ ****************************************************************************/
+
+/*
+ *     sbridge_put_all_devices 'put' all the devices that we have
+ *                             reserved via 'get'
+ */
+static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
+{
+       int i;
+
+       debugf0(__FILE__ ": %s()\n", __func__);
+       for (i = 0; i < sbridge_dev->n_devs; i++) {
+               struct pci_dev *pdev = sbridge_dev->pdev[i];
+               if (!pdev)
+                       continue;
+               debugf0("Removing dev %02x:%02x.%d\n",
+                       pdev->bus->number,
+                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+               pci_dev_put(pdev);
+       }
+}
+
+static void sbridge_put_all_devices(void)
+{
+       struct sbridge_dev *sbridge_dev, *tmp;
+
+       list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
+               sbridge_put_devices(sbridge_dev);
+               free_sbridge_dev(sbridge_dev);
+       }
+}
+
+/*
+ *     sbridge_get_all_devices Find and perform 'get' operation on the MCH's
+ *                     device/functions we want to reference for this driver
+ *
+ *                     Need to 'get' device 16 func 1 and func 2
+ */
+static int sbridge_get_onedevice(struct pci_dev **prev,
+                                u8 *num_mc,
+                                const struct pci_id_table *table,
+                                const unsigned devno)
+{
+       struct sbridge_dev *sbridge_dev;
+       const struct pci_id_descr *dev_descr = &table->descr[devno];
+
+       struct pci_dev *pdev = NULL;
+       u8 bus = 0;
+
+       sbridge_printk(KERN_INFO,
+               "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
+               dev_descr->dev, dev_descr->func,
+               PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+
+       pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                             dev_descr->dev_id, *prev);
+
+       if (!pdev) {
+               if (*prev) {
+                       *prev = pdev;
+                       return 0;
+               }
+
+               if (dev_descr->optional)
+                       return 0;
+
+               if (devno == 0)
+                       return -ENODEV;
+
+               sbridge_printk(KERN_INFO,
+                       "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
+                       dev_descr->dev, dev_descr->func,
+                       PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+
+               /* End of list, leave */
+               return -ENODEV;
+       }
+       bus = pdev->bus->number;
+
+       sbridge_dev = get_sbridge_dev(bus);
+       if (!sbridge_dev) {
+               sbridge_dev = alloc_sbridge_dev(bus, table);
+               if (!sbridge_dev) {
+                       pci_dev_put(pdev);
+                       return -ENOMEM;
+               }
+               (*num_mc)++;
+       }
+
+       if (sbridge_dev->pdev[devno]) {
+               sbridge_printk(KERN_ERR,
+                       "Duplicated device for "
+                       "dev %02x:%d.%d PCI ID %04x:%04x\n",
+                       bus, dev_descr->dev, dev_descr->func,
+                       PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+               pci_dev_put(pdev);
+               return -ENODEV;
+       }
+
+       sbridge_dev->pdev[devno] = pdev;
+
+       /* Sanity check */
+       if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
+                       PCI_FUNC(pdev->devfn) != dev_descr->func)) {
+               sbridge_printk(KERN_ERR,
+                       "Device PCI ID %04x:%04x "
+                       "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
+                       PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
+                       bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+                       bus, dev_descr->dev, dev_descr->func);
+               return -ENODEV;
+       }
+
+       /* Be sure that the device is enabled */
+       if (unlikely(pci_enable_device(pdev) < 0)) {
+               sbridge_printk(KERN_ERR,
+                       "Couldn't enable "
+                       "dev %02x:%d.%d PCI ID %04x:%04x\n",
+                       bus, dev_descr->dev, dev_descr->func,
+                       PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+               return -ENODEV;
+       }
+
+       debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
+               bus, dev_descr->dev,
+               dev_descr->func,
+               PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+
+       /*
+        * As stated on drivers/pci/search.c, the reference count for
+        * @from is always decremented if it is not %NULL. So, as we need
+        * to get all devices up to null, we need to do a get for the device
+        */
+       pci_dev_get(pdev);
+
+       *prev = pdev;
+
+       return 0;
+}
+
+static int sbridge_get_all_devices(u8 *num_mc)
+{
+       int i, rc;
+       struct pci_dev *pdev = NULL;
+       const struct pci_id_table *table = pci_dev_descr_sbridge_table;
+
+       while (table && table->descr) {
+               for (i = 0; i < table->n_devs; i++) {
+                       pdev = NULL;
+                       do {
+                               rc = sbridge_get_onedevice(&pdev, num_mc,
+                                                          table, i);
+                               if (rc < 0) {
+                                       if (i == 0) {
+                                               i = table->n_devs;
+                                               break;
+                                       }
+                                       sbridge_put_all_devices();
+                                       return -ENODEV;
+                               }
+                       } while (pdev);
+               }
+               table++;
+       }
+
+       return 0;
+}
+
+static int mci_bind_devs(struct mem_ctl_info *mci,
+                        struct sbridge_dev *sbridge_dev)
+{
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       struct pci_dev *pdev;
+       int i, func, slot;
+
+       for (i = 0; i < sbridge_dev->n_devs; i++) {
+               pdev = sbridge_dev->pdev[i];
+               if (!pdev)
+                       continue;
+               slot = PCI_SLOT(pdev->devfn);
+               func = PCI_FUNC(pdev->devfn);
+               switch (slot) {
+               case 12:
+                       switch (func) {
+                       case 6:
+                               pvt->pci_sad0 = pdev;
+                               break;
+                       case 7:
+                               pvt->pci_sad1 = pdev;
+                               break;
+                       default:
+                               goto error;
+                       }
+                       break;
+               case 13:
+                       switch (func) {
+                       case 6:
+                               pvt->pci_br = pdev;
+                               break;
+                       default:
+                               goto error;
+                       }
+                       break;
+               case 14:
+                       switch (func) {
+                       case 0:
+                               pvt->pci_ha0 = pdev;
+                               break;
+                       default:
+                               goto error;
+                       }
+                       break;
+               case 15:
+                       switch (func) {
+                       case 0:
+                               pvt->pci_ta = pdev;
+                               break;
+                       case 1:
+                               pvt->pci_ras = pdev;
+                               break;
+                       case 2:
+                       case 3:
+                       case 4:
+                       case 5:
+                               pvt->pci_tad[func - 2] = pdev;
+                               break;
+                       default:
+                               goto error;
+                       }
+                       break;
+               case 17:
+                       switch (func) {
+                       case 0:
+                               pvt->pci_ddrio = pdev;
+                               break;
+                       default:
+                               goto error;
+                       }
+                       break;
+               default:
+                       goto error;
+               }
+
+               debugf0("Associated PCI %02x.%02d.%d with dev = %p\n",
+                       sbridge_dev->bus,
+                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+                       pdev);
+       }
+
+       /* Check if everything were registered */
+       if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
+           !pvt-> pci_tad || !pvt->pci_ras  || !pvt->pci_ta ||
+           !pvt->pci_ddrio)
+               goto enodev;
+
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               if (!pvt->pci_tad[i])
+                       goto enodev;
+       }
+       return 0;
+
+enodev:
+       sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
+       return -ENODEV;
+
+error:
+       sbridge_printk(KERN_ERR, "Device %d, function %d "
+                     "is out of the expected range\n",
+                     slot, func);
+       return -EINVAL;
+}
+
+/****************************************************************************
+                       Error check routines
+ ****************************************************************************/
+
+/*
+ * While Sandy Bridge has error count registers, SMI BIOS read values from
+ * and resets the counters. So, they are not reliable for the OS to read
+ * from them. So, we have no option but to just trust on whatever MCE is
+ * telling us about the errors.
+ */
+static void sbridge_mce_output_error(struct mem_ctl_info *mci,
+                                   const struct mce *m)
+{
+       struct mem_ctl_info *new_mci;
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       char *type, *optype, *msg, *recoverable_msg;
+       bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+       bool overflow = GET_BITFIELD(m->status, 62, 62);
+       bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+       bool recoverable = GET_BITFIELD(m->status, 56, 56);
+       u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+       u32 mscod = GET_BITFIELD(m->status, 16, 31);
+       u32 errcode = GET_BITFIELD(m->status, 0, 15);
+       u32 channel = GET_BITFIELD(m->status, 0, 3);
+       u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+       long channel_mask, first_channel;
+       u8  rank, socket;
+       int csrow, rc, dimm;
+       char *area_type = "Unknown";
+
+       if (ripv)
+               type = "NON_FATAL";
+       else
+               type = "FATAL";
+
+       /*
+        * According with Table 15-9 of the Intel Archictecture spec vol 3A,
+        * memory errors should fit in this mask:
+        *      000f 0000 1mmm cccc (binary)
+        * where:
+        *      f = Correction Report Filtering Bit. If 1, subsequent errors
+        *          won't be shown
+        *      mmm = error type
+        *      cccc = channel
+        * If the mask doesn't match, report an error to the parsing logic
+        */
+       if (! ((errcode & 0xef80) == 0x80)) {
+               optype = "Can't parse: it is not a mem";
+       } else {
+               switch (optypenum) {
+               case 0:
+                       optype = "generic undef request";
+                       break;
+               case 1:
+                       optype = "memory read";
+                       break;
+               case 2:
+                       optype = "memory write";
+                       break;
+               case 3:
+                       optype = "addr/cmd";
+                       break;
+               case 4:
+                       optype = "memory scrubbing";
+                       break;
+               default:
+                       optype = "reserved";
+                       break;
+               }
+       }
+
+       rc = get_memory_error_data(mci, m->addr, &socket,
+                                  &channel_mask, &rank, area_type);
+       if (rc < 0)
+               return;
+       new_mci = get_mci_for_node_id(socket);
+       if (!new_mci) {
+               edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!");
+               return;
+       }
+       mci = new_mci;
+       pvt = mci->pvt_info;
+
+       first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
+
+       if (rank < 4)
+               dimm = 0;
+       else if (rank < 8)
+               dimm = 1;
+       else
+               dimm = 2;
+
+       csrow = pvt->csrow_map[first_channel][dimm];
+
+       if (uncorrected_error && recoverable)
+               recoverable_msg = " recoverable";
+       else
+               recoverable_msg = "";
+
+       /*
+        * FIXME: What should we do with "channel" information on mcelog?
+        * Probably, we can just discard it, as the channel information
+        * comes from the get_memory_error_data() address decoding
+        */
+       msg = kasprintf(GFP_ATOMIC,
+                       "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), "
+                       "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n",
+                       core_err_cnt,
+                       area_type,
+                       optype,
+                       type,
+                       recoverable_msg,
+                       overflow ? "OVERFLOW" : "",
+                       m->cpu,
+                       mscod, errcode,
+                       channel,                /* 1111b means not specified */
+                       (long long) m->addr,
+                       socket,
+                       first_channel,          /* This is the real channel on SB */
+                       channel_mask,
+                       rank);
+
+       debugf0("%s", msg);
+
+       /* Call the helper to output message */
+       if (uncorrected_error)
+               edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg);
+       else
+               edac_mc_handle_fbd_ce(mci, csrow, 0, msg);
+
+       kfree(msg);
+}
+
+/*
+ *     sbridge_check_error     Retrieve and process errors reported by the
+ *                             hardware. Called by the Core module.
+ */
+static void sbridge_check_error(struct mem_ctl_info *mci)
+{
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       int i;
+       unsigned count = 0;
+       struct mce *m;
+
+       /*
+        * MCE first step: Copy all mce errors into a temporary buffer
+        * We use a double buffering here, to reduce the risk of
+        * loosing an error.
+        */
+       smp_rmb();
+       count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
+               % MCE_LOG_LEN;
+       if (!count)
+               return;
+
+       m = pvt->mce_outentry;
+       if (pvt->mce_in + count > MCE_LOG_LEN) {
+               unsigned l = MCE_LOG_LEN - pvt->mce_in;
+
+               memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
+               smp_wmb();
+               pvt->mce_in = 0;
+               count -= l;
+               m += l;
+       }
+       memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
+       smp_wmb();
+       pvt->mce_in += count;
+
+       smp_rmb();
+       if (pvt->mce_overrun) {
+               sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
+                             pvt->mce_overrun);
+               smp_wmb();
+               pvt->mce_overrun = 0;
+       }
+
+       /*
+        * MCE second step: parse errors and display
+        */
+       for (i = 0; i < count; i++)
+               sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
+}
+
+/*
+ * sbridge_mce_check_error     Replicates mcelog routine to get errors
+ *                             This routine simply queues mcelog errors, and
+ *                             return. The error itself should be handled later
+ *                             by sbridge_check_error.
+ * WARNING: As this routine should be called at NMI time, extra care should
+ * be taken to avoid deadlocks, and to be as fast as possible.
+ */
+static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
+                                  void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct mem_ctl_info *mci;
+       struct sbridge_pvt *pvt;
+
+       mci = get_mci_for_node_id(mce->socketid);
+       if (!mci)
+               return NOTIFY_BAD;
+       pvt = mci->pvt_info;
+
+       /*
+        * Just let mcelog handle it if the error is
+        * outside the memory controller. A memory error
+        * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
+        * bit 12 has an special meaning.
+        */
+       if ((mce->status & 0xefff) >> 7 != 1)
+               return NOTIFY_DONE;
+
+       printk("sbridge: HANDLING MCE MEMORY ERROR\n");
+
+       printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
+              mce->extcpu, mce->mcgstatus, mce->bank, mce->status);
+       printk("TSC %llx ", mce->tsc);
+       printk("ADDR %llx ", mce->addr);
+       printk("MISC %llx ", mce->misc);
+
+       printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+               mce->cpuvendor, mce->cpuid, mce->time,
+               mce->socketid, mce->apicid);
+
+#ifdef CONFIG_SMP
+       /* Only handle if it is the right mc controller */
+       if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
+               return NOTIFY_DONE;
+#endif
+
+       smp_rmb();
+       if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
+               smp_wmb();
+               pvt->mce_overrun++;
+               return NOTIFY_DONE;
+       }
+
+       /* Copy memory error at the ringbuffer */
+       memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
+       smp_wmb();
+       pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
+
+       /* Handle fatal errors immediately */
+       if (mce->mcgstatus & 1)
+               sbridge_check_error(mci);
+
+       /* Advice mcelog that the error were handled */
+       return NOTIFY_STOP;
+}
+
+static struct notifier_block sbridge_mce_dec = {
+       .notifier_call      = sbridge_mce_check_error,
+};
+
+/****************************************************************************
+                       EDAC register/unregister logic
+ ****************************************************************************/
+
+static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
+{
+       struct mem_ctl_info *mci = sbridge_dev->mci;
+       struct sbridge_pvt *pvt;
+
+       if (unlikely(!mci || !mci->pvt_info)) {
+               debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
+                       __func__, &sbridge_dev->pdev[0]->dev);
+
+               sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
+               return;
+       }
+
+       pvt = mci->pvt_info;
+
+       debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
+               __func__, mci, &sbridge_dev->pdev[0]->dev);
+
+       atomic_notifier_chain_unregister(&x86_mce_decoder_chain,
+                                        &sbridge_mce_dec);
+
+       /* Remove MC sysfs nodes */
+       edac_mc_del_mc(mci->dev);
+
+       debugf1("%s: free mci struct\n", mci->ctl_name);
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+       sbridge_dev->mci = NULL;
+}
+
+static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
+{
+       struct mem_ctl_info *mci;
+       struct sbridge_pvt *pvt;
+       int rc, channels, csrows;
+
+       /* Check the number of active and not disabled channels */
+       rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows);
+       if (unlikely(rc < 0))
+               return rc;
+
+       /* allocate a new MC control structure */
+       mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc);
+       if (unlikely(!mci))
+               return -ENOMEM;
+
+       debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
+               __func__, mci, &sbridge_dev->pdev[0]->dev);
+
+       pvt = mci->pvt_info;
+       memset(pvt, 0, sizeof(*pvt));
+
+       /* Associate sbridge_dev and mci for future usage */
+       pvt->sbridge_dev = sbridge_dev;
+       sbridge_dev->mci = mci;
+
+       mci->mtype_cap = MEM_FLAG_DDR3;
+       mci->edac_ctl_cap = EDAC_FLAG_NONE;
+       mci->edac_cap = EDAC_FLAG_NONE;
+       mci->mod_name = "sbridge_edac.c";
+       mci->mod_ver = SBRIDGE_REVISION;
+       mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
+       mci->dev_name = pci_name(sbridge_dev->pdev[0]);
+       mci->ctl_page_to_phys = NULL;
+
+       /* Set the function pointer to an actual operation function */
+       mci->edac_check = sbridge_check_error;
+
+       /* Store pci devices at mci for faster access */
+       rc = mci_bind_devs(mci, sbridge_dev);
+       if (unlikely(rc < 0))
+               goto fail0;
+
+       /* Get dimm basic config and the memory layout */
+       get_dimm_config(mci);
+       get_memory_layout(mci);
+
+       /* record ptr to the generic device */
+       mci->dev = &sbridge_dev->pdev[0]->dev;
+
+       /* add this new MC control structure to EDAC's list of MCs */
+       if (unlikely(edac_mc_add_mc(mci))) {
+               debugf0("MC: " __FILE__
+                       ": %s(): failed edac_mc_add_mc()\n", __func__);
+               rc = -EINVAL;
+               goto fail0;
+       }
+
+       atomic_notifier_chain_register(&x86_mce_decoder_chain,
+                                      &sbridge_mce_dec);
+       return 0;
+
+fail0:
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+       sbridge_dev->mci = NULL;
+       return rc;
+}
+
+/*
+ *     sbridge_probe   Probe for ONE instance of device to see if it is
+ *                     present.
+ *     return:
+ *             0 for FOUND a device
+ *             < 0 for error code
+ */
+
+static int __devinit sbridge_probe(struct pci_dev *pdev,
+                                 const struct pci_device_id *id)
+{
+       int rc;
+       u8 mc, num_mc = 0;
+       struct sbridge_dev *sbridge_dev;
+
+       /* get the pci devices we want to reserve for our use */
+       mutex_lock(&sbridge_edac_lock);
+
+       /*
+        * All memory controllers are allocated at the first pass.
+        */
+       if (unlikely(probed >= 1)) {
+               mutex_unlock(&sbridge_edac_lock);
+               return -ENODEV;
+       }
+       probed++;
+
+       rc = sbridge_get_all_devices(&num_mc);
+       if (unlikely(rc < 0))
+               goto fail0;
+       mc = 0;
+
+       list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
+               debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc);
+               sbridge_dev->mc = mc++;
+               rc = sbridge_register_mci(sbridge_dev);
+               if (unlikely(rc < 0))
+                       goto fail1;
+       }
+
+       sbridge_printk(KERN_INFO, "Driver loaded.\n");
+
+       mutex_unlock(&sbridge_edac_lock);
+       return 0;
+
+fail1:
+       list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
+               sbridge_unregister_mci(sbridge_dev);
+
+       sbridge_put_all_devices();
+fail0:
+       mutex_unlock(&sbridge_edac_lock);
+       return rc;
+}
+
+/*
+ *     sbridge_remove  destructor for one instance of device
+ *
+ */
+static void __devexit sbridge_remove(struct pci_dev *pdev)
+{
+       struct sbridge_dev *sbridge_dev;
+
+       debugf0(__FILE__ ": %s()\n", __func__);
+
+       /*
+        * we have a trouble here: pdev value for removal will be wrong, since
+        * it will point to the X58 register used to detect that the machine
+        * is a Nehalem or upper design. However, due to the way several PCI
+        * devices are grouped together to provide MC functionality, we need
+        * to use a different method for releasing the devices
+        */
+
+       mutex_lock(&sbridge_edac_lock);
+
+       if (unlikely(!probed)) {
+               mutex_unlock(&sbridge_edac_lock);
+               return;
+       }
+
+       list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
+               sbridge_unregister_mci(sbridge_dev);
+
+       /* Release PCI resources */
+       sbridge_put_all_devices();
+
+       probed--;
+
+       mutex_unlock(&sbridge_edac_lock);
+}
+
+MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
+
+/*
+ *     sbridge_driver  pci_driver structure for this module
+ *
+ */
+static struct pci_driver sbridge_driver = {
+       .name     = "sbridge_edac",
+       .probe    = sbridge_probe,
+       .remove   = __devexit_p(sbridge_remove),
+       .id_table = sbridge_pci_tbl,
+};
+
+/*
+ *     sbridge_init            Module entry function
+ *                     Try to initialize this module for its devices
+ */
+static int __init sbridge_init(void)
+{
+       int pci_rc;
+
+       debugf2("MC: " __FILE__ ": %s()\n", __func__);
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       pci_rc = pci_register_driver(&sbridge_driver);
+
+       if (pci_rc >= 0)
+               return 0;
+
+       sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
+                     pci_rc);
+
+       return pci_rc;
+}
+
+/*
+ *     sbridge_exit()  Module exit function
+ *                     Unregister the driver
+ */
+static void __exit sbridge_exit(void)
+{
+       debugf2("MC: " __FILE__ ": %s()\n", __func__);
+       pci_unregister_driver(&sbridge_driver);
+}
+
+module_init(sbridge_init);
+module_exit(sbridge_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - "
+                  SBRIDGE_REVISION);
index cb0bd078efc030887f31548b4c6f8d97abfa3eaf..8b3c745b1b051f7924fc3a95c113dd840a60ec4a 100644 (file)
@@ -189,7 +189,7 @@ config GPIO_U300
 
 config GPIO_VX855
        tristate "VIA VX855/VX875 GPIO"
-       depends on MFD_SUPPORT && PCI
+       depends on PCI
        select MFD_CORE
        select MFD_VX855
        help
@@ -428,7 +428,6 @@ config GPIO_TIMBERDALE
 config GPIO_RDC321X
        tristate "RDC R-321x GPIO support"
        depends on PCI
-       select MFD_SUPPORT
        select MFD_CORE
        select MFD_RDC321X
        help
index 9bc7b03269dfbb130a7a2f8831c0884ce461a3ce..8cdb4b45b30a26f3dcf44be3c662299e6e92d001 100644 (file)
@@ -458,6 +458,9 @@ static const struct hid_device_id apple_devices[] = {
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_KEYBOARD },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO),
+               .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+                       APPLE_ISO_KEYBOARD },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI),
@@ -508,6 +511,12 @@ static const struct hid_device_id apple_devices[] = {
                .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
+               .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO),
index 91adcc5bad284ea6cd28e301a237b554de959bb8..848a56c0279c8ac61687340c732521094428e541 100644 (file)
@@ -1362,6 +1362,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) },
@@ -1374,6 +1377,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_T91MT) },
@@ -1942,6 +1946,9 @@ static const struct hid_device_id hid_mouse_ignore_list[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS) },
index 1680e99b4816a8558291293d38dcc43625640e9c..06ce996b8b6504f65c9216c3173a38cf5a51cb34 100644 (file)
 #define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI      0x024f
 #define USB_DEVICE_ID_APPLE_ALU_REVB_ISO       0x0250
 #define USB_DEVICE_ID_APPLE_ALU_REVB_JIS       0x0251
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI  0x0252
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO   0x0253
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS   0x0254
 #define USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI  0x0249
 #define USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO   0x024a
 #define USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS   0x024b
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI  0x0239
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO   0x023a
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS   0x023b
+#define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO   0x0256
 #define USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY   0x030a
 #define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY    0x030b
 #define USB_DEVICE_ID_APPLE_ATV_IRCONTROL      0x8241
index fa5d7a1ffa9e97e81a8785a5d96c4f1af59cd47d..f1c909f1b23998f91d9b7b76d8cf31e53f65aadd 100644 (file)
@@ -291,7 +291,6 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                        td->last_slot_field = usage->hid;
                        td->last_field_index = field->index;
                        td->last_mt_collection = usage->collection_index;
-                       hdev->quirks &= ~HID_QUIRK_MULTITOUCH;
                        return 1;
                case HID_DG_WIDTH:
                        hid_map_usage(hi, usage, bit, max,
@@ -530,44 +529,12 @@ static void mt_set_input_mode(struct hid_device *hdev)
        }
 }
 
-/* a list of devices for which there is a specialized multitouch driver */
-static const struct hid_device_id mt_have_special_driver[] = {
-       { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, 0x0001) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, 0x0006) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA,
-                       USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA,
-                       USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH) },
-       { }
-};
-
-static bool mt_match_one_id(struct hid_device *hdev,
-               const struct hid_device_id *id)
-{
-       return id->bus == hdev->bus &&
-               (id->vendor == HID_ANY_ID || id->vendor == hdev->vendor) &&
-               (id->product == HID_ANY_ID || id->product == hdev->product);
-}
-
-static const struct hid_device_id *mt_match_id(struct hid_device *hdev,
-               const struct hid_device_id *id)
-{
-       for (; id->bus; id++)
-               if (mt_match_one_id(hdev, id))
-                       return id;
-
-       return NULL;
-}
-
 static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
        int ret, i;
        struct mt_device *td;
        struct mt_class *mtclass = mt_classes; /* MT_CLS_DEFAULT */
 
-       if (mt_match_id(hdev, mt_have_special_driver))
-               return -ENODEV;
-
        for (i = 0; mt_classes[i].name ; i++) {
                if (id->driver_data == mt_classes[i].name) {
                        mtclass = &(mt_classes[i]);
@@ -575,6 +542,10 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
                }
        }
 
+       /* This allows the driver to correctly support devices
+        * that emit events over several HID messages.
+        */
+       hdev->quirks |= HID_QUIRK_NO_INPUT_SYNC;
 
        td = kzalloc(sizeof(struct mt_device), GFP_KERNEL);
        if (!td) {
@@ -590,16 +561,10 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
        if (ret != 0)
                goto fail;
 
-       hdev->quirks |= HID_QUIRK_MULTITOUCH;
        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret)
                goto fail;
 
-       /* This allows the driver to correctly support devices
-        * that emit events over several HID messages.
-        */
-       hdev->quirks |= HID_QUIRK_NO_INPUT_SYNC;
-
        td->slots = kzalloc(td->maxcontacts * sizeof(struct mt_slot),
                                GFP_KERNEL);
        if (!td->slots) {
@@ -793,10 +758,6 @@ static const struct hid_device_id mt_devices[] = {
                HID_USB_DEVICE(USB_VENDOR_ID_XAT,
                        USB_DEVICE_ID_XAT_CSR) },
 
-       /* Rest of the world */
-       { .driver_data = MT_CLS_DEFAULT,
-               HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) },
-
        { }
 };
 MODULE_DEVICE_TABLE(hid, mt_devices);
index 5666e7587b185300c113ff4d4878df25ce027a85..56ce12c23b02b523ef095615a5e8f6e0f828553a 100644 (file)
@@ -162,27 +162,27 @@ static int roccat_open(struct inode *inode, struct file *file)
 
        device = devices[minor];
 
-       mutex_lock(&device->readers_lock);
-
        if (!device) {
                pr_emerg("roccat device with minor %d doesn't exist\n", minor);
                error = -ENODEV;
-               goto exit_err;
+               goto exit_err_devices;
        }
 
+       mutex_lock(&device->readers_lock);
+
        if (!device->open++) {
                /* power on device on adding first reader */
                error = hid_hw_power(device->hid, PM_HINT_FULLON);
                if (error < 0) {
                        --device->open;
-                       goto exit_err;
+                       goto exit_err_readers;
                }
 
                error = hid_hw_open(device->hid);
                if (error < 0) {
                        hid_hw_power(device->hid, PM_HINT_NORMAL);
                        --device->open;
-                       goto exit_err;
+                       goto exit_err_readers;
                }
        }
 
@@ -193,13 +193,13 @@ static int roccat_open(struct inode *inode, struct file *file)
        list_add_tail(&reader->node, &device->readers);
        file->private_data = reader;
 
-exit_unlock:
+exit_err_readers:
        mutex_unlock(&device->readers_lock);
+exit_err_devices:
        mutex_unlock(&devices_lock);
+       if (error)
+               kfree(reader);
        return error;
-exit_err:
-       kfree(reader);
-       goto exit_unlock;
 }
 
 static int roccat_release(struct inode *inode, struct file *file)
index d5226c9e1201521046458a403d83fba245032db3..ef65ab56b0948a23da0d797c01c321fee45fba4f 100644 (file)
@@ -31,7 +31,7 @@
 #define MC13783_ADC_NAME       "mc13783-adc"
 
 struct mc13783_adc_priv {
-       struct mc13783 *mc13783;
+       struct mc13xxx *mc13xxx;
        struct device *hwmon_dev;
 };
 
@@ -51,8 +51,8 @@ static int mc13783_adc_read(struct device *dev,
        unsigned int sample[4];
        int ret;
 
-       ret = mc13783_adc_do_conversion(priv->mc13783,
-                       MC13783_ADC_MODE_MULT_CHAN,
+       ret = mc13xxx_adc_do_conversion(priv->mc13xxx,
+                       MC13XXX_ADC_MODE_MULT_CHAN,
                        channel, sample);
        if (ret)
                return ret;
@@ -147,9 +147,9 @@ static const struct attribute_group mc13783_group_ts = {
 static int mc13783_adc_use_touchscreen(struct platform_device *pdev)
 {
        struct mc13783_adc_priv *priv = platform_get_drvdata(pdev);
-       unsigned flags = mc13783_get_flags(priv->mc13783);
+       unsigned flags = mc13xxx_get_flags(priv->mc13xxx);
 
-       return flags & MC13783_USE_TOUCHSCREEN;
+       return flags & MC13XXX_USE_TOUCHSCREEN;
 }
 
 static int __init mc13783_adc_probe(struct platform_device *pdev)
@@ -161,7 +161,7 @@ static int __init mc13783_adc_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       priv->mc13783 = dev_get_drvdata(pdev->dev.parent);
+       priv->mc13xxx = dev_get_drvdata(pdev->dev.parent);
 
        platform_set_drvdata(pdev, priv);
 
index 1f29bab6b3e5a274d8cf02e0d3e4225ba8cc4a74..c7c3128393d1dcdce0589d5a268893479ae61804 100644 (file)
@@ -2,22 +2,31 @@
 # Generic HWSPINLOCK framework
 #
 
+# HWSPINLOCK always gets selected by whoever wants it.
 config HWSPINLOCK
-       tristate "Generic Hardware Spinlock framework"
-       depends on ARCH_OMAP4
-       help
-         Say y here to support the generic hardware spinlock framework.
-         You only need to enable this if you have hardware spinlock module
-         on your system (usually only relevant if your system has remote slave
-         coprocessors).
+       tristate
 
-         If unsure, say N.
+menu "Hardware Spinlock drivers"
 
 config HWSPINLOCK_OMAP
        tristate "OMAP Hardware Spinlock device"
-       depends on HWSPINLOCK && ARCH_OMAP4
+       depends on ARCH_OMAP4
+       select HWSPINLOCK
        help
          Say y here to support the OMAP Hardware Spinlock device (firstly
          introduced in OMAP4).
 
          If unsure, say N.
+
+config HSEM_U8500
+       tristate "STE Hardware Semaphore functionality"
+       depends on ARCH_U8500
+       select HWSPINLOCK
+       help
+         Say y here to support the STE Hardware Semaphore functionality, which
+         provides a synchronisation mechanism for the various processor on the
+         SoC.
+
+         If unsure, say N.
+
+endmenu
index 5729a3f7ed3dd33db9e77757a7c15f1aea4fb0e9..93eb64b664863b58b3cb1fb0c32c5c04ead0dad1 100644 (file)
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_HWSPINLOCK)               += hwspinlock_core.o
 obj-$(CONFIG_HWSPINLOCK_OMAP)          += omap_hwspinlock.o
+obj-$(CONFIG_HSEM_U8500)               += u8500_hsem.o
index 43a62714b4fba1d4799df6166caf27031082cf00..61c9cf15fa52ecd50cb1a1b5421c8fef3666f24c 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/radix-tree.h>
 #include <linux/hwspinlock.h>
 #include <linux/pm_runtime.h>
+#include <linux/mutex.h>
 
 #include "hwspinlock_internal.h"
 
 static RADIX_TREE(hwspinlock_tree, GFP_KERNEL);
 
 /*
- * Synchronization of access to the tree is achieved using this spinlock,
+ * Synchronization of access to the tree is achieved using this mutex,
  * as the radix-tree API requires that users provide all synchronisation.
+ * A mutex is needed because we're using non-atomic radix tree allocations.
  */
-static DEFINE_SPINLOCK(hwspinlock_tree_lock);
+static DEFINE_MUTEX(hwspinlock_tree_lock);
+
 
 /**
  * __hwspin_trylock() - attempt to lock a specific hwspinlock
@@ -114,7 +117,7 @@ int __hwspin_trylock(struct hwspinlock *hwlock, int mode, unsigned long *flags)
                return -EBUSY;
 
        /* try to take the hwspinlock device */
-       ret = hwlock->ops->trylock(hwlock);
+       ret = hwlock->bank->ops->trylock(hwlock);
 
        /* if hwlock is already taken, undo spin_trylock_* and exit */
        if (!ret) {
@@ -196,8 +199,8 @@ int __hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int to,
                 * Allow platform-specific relax handlers to prevent
                 * hogging the interconnect (no sleeping, though)
                 */
-               if (hwlock->ops->relax)
-                       hwlock->ops->relax(hwlock);
+               if (hwlock->bank->ops->relax)
+                       hwlock->bank->ops->relax(hwlock);
        }
 
        return ret;
@@ -242,7 +245,7 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags)
         */
        mb();
 
-       hwlock->ops->unlock(hwlock);
+       hwlock->bank->ops->unlock(hwlock);
 
        /* Undo the spin_trylock{_irq, _irqsave} called while locking */
        if (mode == HWLOCK_IRQSTATE)
@@ -254,68 +257,37 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags)
 }
 EXPORT_SYMBOL_GPL(__hwspin_unlock);
 
-/**
- * hwspin_lock_register() - register a new hw spinlock
- * @hwlock: hwspinlock to register.
- *
- * This function should be called from the underlying platform-specific
- * implementation, to register a new hwspinlock instance.
- *
- * Can be called from an atomic context (will not sleep) but not from
- * within interrupt context.
- *
- * Returns 0 on success, or an appropriate error code on failure
- */
-int hwspin_lock_register(struct hwspinlock *hwlock)
+static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id)
 {
        struct hwspinlock *tmp;
        int ret;
 
-       if (!hwlock || !hwlock->ops ||
-               !hwlock->ops->trylock || !hwlock->ops->unlock) {
-               pr_err("invalid parameters\n");
-               return -EINVAL;
-       }
-
-       spin_lock_init(&hwlock->lock);
-
-       spin_lock(&hwspinlock_tree_lock);
+       mutex_lock(&hwspinlock_tree_lock);
 
-       ret = radix_tree_insert(&hwspinlock_tree, hwlock->id, hwlock);
-       if (ret)
+       ret = radix_tree_insert(&hwspinlock_tree, id, hwlock);
+       if (ret) {
+               if (ret == -EEXIST)
+                       pr_err("hwspinlock id %d already exists!\n", id);
                goto out;
+       }
 
        /* mark this hwspinlock as available */
-       tmp = radix_tree_tag_set(&hwspinlock_tree, hwlock->id,
-                                                       HWSPINLOCK_UNUSED);
+       tmp = radix_tree_tag_set(&hwspinlock_tree, id, HWSPINLOCK_UNUSED);
 
        /* self-sanity check which should never fail */
        WARN_ON(tmp != hwlock);
 
 out:
-       spin_unlock(&hwspinlock_tree_lock);
-       return ret;
+       mutex_unlock(&hwspinlock_tree_lock);
+       return 0;
 }
-EXPORT_SYMBOL_GPL(hwspin_lock_register);
 
-/**
- * hwspin_lock_unregister() - unregister an hw spinlock
- * @id: index of the specific hwspinlock to unregister
- *
- * This function should be called from the underlying platform-specific
- * implementation, to unregister an existing (and unused) hwspinlock.
- *
- * Can be called from an atomic context (will not sleep) but not from
- * within interrupt context.
- *
- * Returns the address of hwspinlock @id on success, or NULL on failure
- */
-struct hwspinlock *hwspin_lock_unregister(unsigned int id)
+static struct hwspinlock *hwspin_lock_unregister_single(unsigned int id)
 {
        struct hwspinlock *hwlock = NULL;
        int ret;
 
-       spin_lock(&hwspinlock_tree_lock);
+       mutex_lock(&hwspinlock_tree_lock);
 
        /* make sure the hwspinlock is not in use (tag is set) */
        ret = radix_tree_tag_get(&hwspinlock_tree, id, HWSPINLOCK_UNUSED);
@@ -331,9 +303,91 @@ struct hwspinlock *hwspin_lock_unregister(unsigned int id)
        }
 
 out:
-       spin_unlock(&hwspinlock_tree_lock);
+       mutex_unlock(&hwspinlock_tree_lock);
        return hwlock;
 }
+
+/**
+ * hwspin_lock_register() - register a new hw spinlock device
+ * @bank: the hwspinlock device, which usually provides numerous hw locks
+ * @dev: the backing device
+ * @ops: hwspinlock handlers for this device
+ * @base_id: id of the first hardware spinlock in this bank
+ * @num_locks: number of hwspinlocks provided by this device
+ *
+ * This function should be called from the underlying platform-specific
+ * implementation, to register a new hwspinlock device instance.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+               const struct hwspinlock_ops *ops, int base_id, int num_locks)
+{
+       struct hwspinlock *hwlock;
+       int ret = 0, i;
+
+       if (!bank || !ops || !dev || !num_locks || !ops->trylock ||
+                                                       !ops->unlock) {
+               pr_err("invalid parameters\n");
+               return -EINVAL;
+       }
+
+       bank->dev = dev;
+       bank->ops = ops;
+       bank->base_id = base_id;
+       bank->num_locks = num_locks;
+
+       for (i = 0; i < num_locks; i++) {
+               hwlock = &bank->lock[i];
+
+               spin_lock_init(&hwlock->lock);
+               hwlock->bank = bank;
+
+               ret = hwspin_lock_register_single(hwlock, i);
+               if (ret)
+                       goto reg_failed;
+       }
+
+       return 0;
+
+reg_failed:
+       while (--i >= 0)
+               hwspin_lock_unregister_single(i);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(hwspin_lock_register);
+
+/**
+ * hwspin_lock_unregister() - unregister an hw spinlock device
+ * @bank: the hwspinlock device, which usually provides numerous hw locks
+ *
+ * This function should be called from the underlying platform-specific
+ * implementation, to unregister an existing (and unused) hwspinlock.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int hwspin_lock_unregister(struct hwspinlock_device *bank)
+{
+       struct hwspinlock *hwlock, *tmp;
+       int i;
+
+       for (i = 0; i < bank->num_locks; i++) {
+               hwlock = &bank->lock[i];
+
+               tmp = hwspin_lock_unregister_single(bank->base_id + i);
+               if (!tmp)
+                       return -EBUSY;
+
+               /* self-sanity check that should never fail */
+               WARN_ON(tmp != hwlock);
+       }
+
+       return 0;
+}
 EXPORT_SYMBOL_GPL(hwspin_lock_unregister);
 
 /**
@@ -348,24 +402,25 @@ EXPORT_SYMBOL_GPL(hwspin_lock_unregister);
  */
 static int __hwspin_lock_request(struct hwspinlock *hwlock)
 {
+       struct device *dev = hwlock->bank->dev;
        struct hwspinlock *tmp;
        int ret;
 
        /* prevent underlying implementation from being removed */
-       if (!try_module_get(hwlock->owner)) {
-               dev_err(hwlock->dev, "%s: can't get owner\n", __func__);
+       if (!try_module_get(dev->driver->owner)) {
+               dev_err(dev, "%s: can't get owner\n", __func__);
                return -EINVAL;
        }
 
        /* notify PM core that power is now needed */
-       ret = pm_runtime_get_sync(hwlock->dev);
+       ret = pm_runtime_get_sync(dev);
        if (ret < 0) {
-               dev_err(hwlock->dev, "%s: can't power on device\n", __func__);
+               dev_err(dev, "%s: can't power on device\n", __func__);
                return ret;
        }
 
        /* mark hwspinlock as used, should not fail */
-       tmp = radix_tree_tag_clear(&hwspinlock_tree, hwlock->id,
+       tmp = radix_tree_tag_clear(&hwspinlock_tree, hwlock_to_id(hwlock),
                                                        HWSPINLOCK_UNUSED);
 
        /* self-sanity check that should never fail */
@@ -387,7 +442,7 @@ int hwspin_lock_get_id(struct hwspinlock *hwlock)
                return -EINVAL;
        }
 
-       return hwlock->id;
+       return hwlock_to_id(hwlock);
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_get_id);
 
@@ -400,9 +455,7 @@ EXPORT_SYMBOL_GPL(hwspin_lock_get_id);
  * to the remote core before it can be used for synchronization (to get the
  * id of a given hwlock, use hwspin_lock_get_id()).
  *
- * Can be called from an atomic context (will not sleep) but not from
- * within interrupt context (simply because there is no use case for
- * that yet).
+ * Should be called from a process context (might sleep)
  *
  * Returns the address of the assigned hwspinlock, or NULL on error
  */
@@ -411,7 +464,7 @@ struct hwspinlock *hwspin_lock_request(void)
        struct hwspinlock *hwlock;
        int ret;
 
-       spin_lock(&hwspinlock_tree_lock);
+       mutex_lock(&hwspinlock_tree_lock);
 
        /* look for an unused lock */
        ret = radix_tree_gang_lookup_tag(&hwspinlock_tree, (void **)&hwlock,
@@ -431,7 +484,7 @@ struct hwspinlock *hwspin_lock_request(void)
                hwlock = NULL;
 
 out:
-       spin_unlock(&hwspinlock_tree_lock);
+       mutex_unlock(&hwspinlock_tree_lock);
        return hwlock;
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_request);
@@ -445,9 +498,7 @@ EXPORT_SYMBOL_GPL(hwspin_lock_request);
  * Usually early board code will be calling this function in order to
  * reserve specific hwspinlock ids for predefined purposes.
  *
- * Can be called from an atomic context (will not sleep) but not from
- * within interrupt context (simply because there is no use case for
- * that yet).
+ * Should be called from a process context (might sleep)
  *
  * Returns the address of the assigned hwspinlock, or NULL on error
  */
@@ -456,7 +507,7 @@ struct hwspinlock *hwspin_lock_request_specific(unsigned int id)
        struct hwspinlock *hwlock;
        int ret;
 
-       spin_lock(&hwspinlock_tree_lock);
+       mutex_lock(&hwspinlock_tree_lock);
 
        /* make sure this hwspinlock exists */
        hwlock = radix_tree_lookup(&hwspinlock_tree, id);
@@ -466,7 +517,7 @@ struct hwspinlock *hwspin_lock_request_specific(unsigned int id)
        }
 
        /* sanity check (this shouldn't happen) */
-       WARN_ON(hwlock->id != id);
+       WARN_ON(hwlock_to_id(hwlock) != id);
 
        /* make sure this hwspinlock is unused */
        ret = radix_tree_tag_get(&hwspinlock_tree, id, HWSPINLOCK_UNUSED);
@@ -482,7 +533,7 @@ struct hwspinlock *hwspin_lock_request_specific(unsigned int id)
                hwlock = NULL;
 
 out:
-       spin_unlock(&hwspinlock_tree_lock);
+       mutex_unlock(&hwspinlock_tree_lock);
        return hwlock;
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_request_specific);
@@ -495,14 +546,13 @@ EXPORT_SYMBOL_GPL(hwspin_lock_request_specific);
  * Should only be called with an @hwlock that was retrieved from
  * an earlier call to omap_hwspin_lock_request{_specific}.
  *
- * Can be called from an atomic context (will not sleep) but not from
- * within interrupt context (simply because there is no use case for
- * that yet).
+ * Should be called from a process context (might sleep)
  *
  * Returns 0 on success, or an appropriate error code on failure
  */
 int hwspin_lock_free(struct hwspinlock *hwlock)
 {
+       struct device *dev = hwlock->bank->dev;
        struct hwspinlock *tmp;
        int ret;
 
@@ -511,34 +561,34 @@ int hwspin_lock_free(struct hwspinlock *hwlock)
                return -EINVAL;
        }
 
-       spin_lock(&hwspinlock_tree_lock);
+       mutex_lock(&hwspinlock_tree_lock);
 
        /* make sure the hwspinlock is used */
-       ret = radix_tree_tag_get(&hwspinlock_tree, hwlock->id,
+       ret = radix_tree_tag_get(&hwspinlock_tree, hwlock_to_id(hwlock),
                                                        HWSPINLOCK_UNUSED);
        if (ret == 1) {
-               dev_err(hwlock->dev, "%s: hwlock is already free\n", __func__);
+               dev_err(dev, "%s: hwlock is already free\n", __func__);
                dump_stack();
                ret = -EINVAL;
                goto out;
        }
 
        /* notify the underlying device that power is not needed */
-       ret = pm_runtime_put(hwlock->dev);
+       ret = pm_runtime_put(dev);
        if (ret < 0)
                goto out;
 
        /* mark this hwspinlock as available */
-       tmp = radix_tree_tag_set(&hwspinlock_tree, hwlock->id,
+       tmp = radix_tree_tag_set(&hwspinlock_tree, hwlock_to_id(hwlock),
                                                        HWSPINLOCK_UNUSED);
 
        /* sanity check (this shouldn't happen) */
        WARN_ON(tmp != hwlock);
 
-       module_put(hwlock->owner);
+       module_put(dev->driver->owner);
 
 out:
-       spin_unlock(&hwspinlock_tree_lock);
+       mutex_unlock(&hwspinlock_tree_lock);
        return ret;
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_free);
index 69935e6b93e526ab4c8b2d94fa9655dda152b7e5..d26f78b8f214d226de7bf8c1b523257f2956a7e0 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 
+struct hwspinlock_device;
+
 /**
  * struct hwspinlock_ops - platform-specific hwspinlock handlers
  *
@@ -39,23 +41,37 @@ struct hwspinlock_ops {
 
 /**
  * struct hwspinlock - this struct represents a single hwspinlock instance
- *
- * @dev: underlying device, will be used to invoke runtime PM api
- * @ops: platform-specific hwspinlock handlers
- * @id: a global, unique, system-wide, index of the lock.
+ * @bank: the hwspinlock_device structure which owns this lock
  * @lock: initialized and used by hwspinlock core
- * @owner: underlying implementation module, used to maintain module ref count
- *
- * Note: currently simplicity was opted for, but later we can squeeze some
- * memory bytes by grouping the dev, ops and owner members in a single
- * per-platform struct, and have all hwspinlocks point at it.
+ * @priv: private data, owned by the underlying platform-specific hwspinlock drv
  */
 struct hwspinlock {
+       struct hwspinlock_device *bank;
+       spinlock_t lock;
+       void *priv;
+};
+
+/**
+ * struct hwspinlock_device - a device which usually spans numerous hwspinlocks
+ * @dev: underlying device, will be used to invoke runtime PM api
+ * @ops: platform-specific hwspinlock handlers
+ * @base_id: id index of the first lock in this device
+ * @num_locks: number of locks in this device
+ * @lock: dynamically allocated array of 'struct hwspinlock'
+ */
+struct hwspinlock_device {
        struct device *dev;
        const struct hwspinlock_ops *ops;
-       int id;
-       spinlock_t lock;
-       struct module *owner;
+       int base_id;
+       int num_locks;
+       struct hwspinlock lock[0];
 };
 
+static inline int hwlock_to_id(struct hwspinlock *hwlock)
+{
+       int local_id = hwlock - &hwlock->bank->lock[0];
+
+       return hwlock->bank->base_id + local_id;
+}
+
 #endif /* __HWSPINLOCK_HWSPINLOCK_H */
index a8f02734c026f0691a0e7b3847af07429bb937bd..887d34effb3a3a40d4636a03e1783dc1e9f56584 100644 (file)
 #define SPINLOCK_NOTTAKEN              (0)     /* free */
 #define SPINLOCK_TAKEN                 (1)     /* locked */
 
-#define to_omap_hwspinlock(lock)       \
-       container_of(lock, struct omap_hwspinlock, lock)
-
-struct omap_hwspinlock {
-       struct hwspinlock lock;
-       void __iomem *addr;
-};
-
-struct omap_hwspinlock_state {
-       int num_locks;                  /* Total number of locks in system */
-       void __iomem *io_base;          /* Mapped base address */
-};
-
 static int omap_hwspinlock_trylock(struct hwspinlock *lock)
 {
-       struct omap_hwspinlock *omap_lock = to_omap_hwspinlock(lock);
+       void __iomem *lock_addr = lock->priv;
 
        /* attempt to acquire the lock by reading its value */
-       return (SPINLOCK_NOTTAKEN == readl(omap_lock->addr));
+       return (SPINLOCK_NOTTAKEN == readl(lock_addr));
 }
 
 static void omap_hwspinlock_unlock(struct hwspinlock *lock)
 {
-       struct omap_hwspinlock *omap_lock = to_omap_hwspinlock(lock);
+       void __iomem *lock_addr = lock->priv;
 
        /* release the lock by writing 0 to it */
-       writel(SPINLOCK_NOTTAKEN, omap_lock->addr);
+       writel(SPINLOCK_NOTTAKEN, lock_addr);
 }
 
 /*
@@ -93,26 +80,23 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = {
 
 static int __devinit omap_hwspinlock_probe(struct platform_device *pdev)
 {
-       struct omap_hwspinlock *omap_lock;
-       struct omap_hwspinlock_state *state;
-       struct hwspinlock *lock;
+       struct hwspinlock_pdata *pdata = pdev->dev.platform_data;
+       struct hwspinlock_device *bank;
+       struct hwspinlock *hwlock;
        struct resource *res;
        void __iomem *io_base;
-       int i, ret;
+       int num_locks, i, ret;
+
+       if (!pdata)
+               return -ENODEV;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!res)
                return -ENODEV;
 
-       state = kzalloc(sizeof(*state), GFP_KERNEL);
-       if (!state)
-               return -ENOMEM;
-
        io_base = ioremap(res->start, resource_size(res));
-       if (!io_base) {
-               ret = -ENOMEM;
-               goto free_state;
-       }
+       if (!io_base)
+               return -ENOMEM;
 
        /* Determine number of locks */
        i = readl(io_base + SYSSTATUS_OFFSET);
@@ -124,10 +108,18 @@ static int __devinit omap_hwspinlock_probe(struct platform_device *pdev)
                goto iounmap_base;
        }
 
-       state->num_locks = i * 32;
-       state->io_base = io_base;
+       num_locks = i * 32; /* actual number of locks in this device */
+
+       bank = kzalloc(sizeof(*bank) + num_locks * sizeof(*hwlock), GFP_KERNEL);
+       if (!bank) {
+               ret = -ENOMEM;
+               goto iounmap_base;
+       }
+
+       platform_set_drvdata(pdev, bank);
 
-       platform_set_drvdata(pdev, state);
+       for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
+               hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
 
        /*
         * runtime PM will make sure the clock of this module is
@@ -135,79 +127,46 @@ static int __devinit omap_hwspinlock_probe(struct platform_device *pdev)
         */
        pm_runtime_enable(&pdev->dev);
 
-       for (i = 0; i < state->num_locks; i++) {
-               omap_lock = kzalloc(sizeof(*omap_lock), GFP_KERNEL);
-               if (!omap_lock) {
-                       ret = -ENOMEM;
-                       goto free_locks;
-               }
-
-               omap_lock->lock.dev = &pdev->dev;
-               omap_lock->lock.owner = THIS_MODULE;
-               omap_lock->lock.id = i;
-               omap_lock->lock.ops = &omap_hwspinlock_ops;
-               omap_lock->addr = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
-
-               ret = hwspin_lock_register(&omap_lock->lock);
-               if (ret) {
-                       kfree(omap_lock);
-                       goto free_locks;
-               }
-       }
+       ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
+                                               pdata->base_id, num_locks);
+       if (ret)
+               goto reg_fail;
 
        return 0;
 
-free_locks:
-       while (--i >= 0) {
-               lock = hwspin_lock_unregister(i);
-               /* this should't happen, but let's give our best effort */
-               if (!lock) {
-                       dev_err(&pdev->dev, "%s: cleanups failed\n", __func__);
-                       continue;
-               }
-               omap_lock = to_omap_hwspinlock(lock);
-               kfree(omap_lock);
-       }
+reg_fail:
        pm_runtime_disable(&pdev->dev);
+       kfree(bank);
 iounmap_base:
        iounmap(io_base);
-free_state:
-       kfree(state);
        return ret;
 }
 
-static int omap_hwspinlock_remove(struct platform_device *pdev)
+static int __devexit omap_hwspinlock_remove(struct platform_device *pdev)
 {
-       struct omap_hwspinlock_state *state = platform_get_drvdata(pdev);
-       struct hwspinlock *lock;
-       struct omap_hwspinlock *omap_lock;
-       int i;
-
-       for (i = 0; i < state->num_locks; i++) {
-               lock = hwspin_lock_unregister(i);
-               /* this shouldn't happen at this point. if it does, at least
-                * don't continue with the remove */
-               if (!lock) {
-                       dev_err(&pdev->dev, "%s: failed on %d\n", __func__, i);
-                       return -EBUSY;
-               }
-
-               omap_lock = to_omap_hwspinlock(lock);
-               kfree(omap_lock);
+       struct hwspinlock_device *bank = platform_get_drvdata(pdev);
+       void __iomem *io_base = bank->lock[0].priv - LOCK_BASE_OFFSET;
+       int ret;
+
+       ret = hwspin_lock_unregister(bank);
+       if (ret) {
+               dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret);
+               return ret;
        }
 
        pm_runtime_disable(&pdev->dev);
-       iounmap(state->io_base);
-       kfree(state);
+       iounmap(io_base);
+       kfree(bank);
 
        return 0;
 }
 
 static struct platform_driver omap_hwspinlock_driver = {
        .probe          = omap_hwspinlock_probe,
-       .remove         = omap_hwspinlock_remove,
+       .remove         = __devexit_p(omap_hwspinlock_remove),
        .driver         = {
                .name   = "omap_hwspinlock",
+               .owner  = THIS_MODULE,
        },
 };
 
diff --git a/drivers/hwspinlock/u8500_hsem.c b/drivers/hwspinlock/u8500_hsem.c
new file mode 100644 (file)
index 0000000..143461a
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * u8500 HWSEM driver
+ *
+ * Copyright (C) 2010-2011 ST-Ericsson
+ *
+ * Implements u8500 semaphore handling for protocol 1, no interrupts.
+ *
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ * Heavily borrowed from the work of :
+ *   Simon Que <sque@ti.com>
+ *   Hari Kanigeri <h-kanigeri2@ti.com>
+ *   Ohad Ben-Cohen <ohad@wizery.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/hwspinlock.h>
+#include <linux/platform_device.h>
+
+#include "hwspinlock_internal.h"
+
+/*
+ * Implementation of STE's HSem protocol 1 without interrutps.
+ * The only masterID we allow is '0x01' to force people to use
+ * HSems for synchronisation between processors rather than processes
+ * on the ARM core.
+ */
+
+#define U8500_MAX_SEMAPHORE            32      /* a total of 32 semaphore */
+#define RESET_SEMAPHORE                        (0)     /* free */
+
+/*
+ * CPU ID for master running u8500 kernel.
+ * Hswpinlocks should only be used to synchonise operations
+ * between the Cortex A9 core and the other CPUs.  Hence
+ * forcing the masterID to a preset value.
+ */
+#define HSEM_MASTER_ID                 0x01
+
+#define HSEM_REGISTER_OFFSET           0x08
+
+#define HSEM_CTRL_REG                  0x00
+#define HSEM_ICRALL                    0x90
+#define HSEM_PROTOCOL_1                        0x01
+
+static int u8500_hsem_trylock(struct hwspinlock *lock)
+{
+       void __iomem *lock_addr = lock->priv;
+
+       writel(HSEM_MASTER_ID, lock_addr);
+
+       /* get only first 4 bit and compare to masterID.
+        * if equal, we have the semaphore, otherwise
+        * someone else has it.
+        */
+       return (HSEM_MASTER_ID == (0x0F & readl(lock_addr)));
+}
+
+static void u8500_hsem_unlock(struct hwspinlock *lock)
+{
+       void __iomem *lock_addr = lock->priv;
+
+       /* release the lock by writing 0 to it */
+       writel(RESET_SEMAPHORE, lock_addr);
+}
+
+/*
+ * u8500: what value is recommended here ?
+ */
+static void u8500_hsem_relax(struct hwspinlock *lock)
+{
+       ndelay(50);
+}
+
+static const struct hwspinlock_ops u8500_hwspinlock_ops = {
+       .trylock        = u8500_hsem_trylock,
+       .unlock         = u8500_hsem_unlock,
+       .relax          = u8500_hsem_relax,
+};
+
+static int __devinit u8500_hsem_probe(struct platform_device *pdev)
+{
+       struct hwspinlock_pdata *pdata = pdev->dev.platform_data;
+       struct hwspinlock_device *bank;
+       struct hwspinlock *hwlock;
+       struct resource *res;
+       void __iomem *io_base;
+       int i, ret, num_locks = U8500_MAX_SEMAPHORE;
+       ulong val;
+
+       if (!pdata)
+               return -ENODEV;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       io_base = ioremap(res->start, resource_size(res));
+       if (!io_base) {
+               ret = -ENOMEM;
+               goto free_state;
+       }
+
+       /* make sure protocol 1 is selected */
+       val = readl(io_base + HSEM_CTRL_REG);
+       writel((val & ~HSEM_PROTOCOL_1), io_base + HSEM_CTRL_REG);
+
+       /* clear all interrupts */
+       writel(0xFFFF, io_base + HSEM_ICRALL);
+
+       bank = kzalloc(sizeof(*bank) + num_locks * sizeof(*hwlock), GFP_KERNEL);
+       if (!bank) {
+               ret = -ENOMEM;
+               goto iounmap_base;
+       }
+
+       platform_set_drvdata(pdev, bank);
+
+       for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
+               hwlock->priv = io_base + HSEM_REGISTER_OFFSET + sizeof(u32) * i;
+
+       /* no pm needed for HSem but required to comply with hwspilock core */
+       pm_runtime_enable(&pdev->dev);
+
+       ret = hwspin_lock_register(bank, &pdev->dev, &u8500_hwspinlock_ops,
+                                               pdata->base_id, num_locks);
+       if (ret)
+               goto reg_fail;
+
+       return 0;
+
+reg_fail:
+       pm_runtime_disable(&pdev->dev);
+       kfree(bank);
+iounmap_base:
+       iounmap(io_base);
+       return ret;
+}
+
+static int __devexit u8500_hsem_remove(struct platform_device *pdev)
+{
+       struct hwspinlock_device *bank = platform_get_drvdata(pdev);
+       void __iomem *io_base = bank->lock[0].priv - HSEM_REGISTER_OFFSET;
+       int ret;
+
+       /* clear all interrupts */
+       writel(0xFFFF, io_base + HSEM_ICRALL);
+
+       ret = hwspin_lock_unregister(bank);
+       if (ret) {
+               dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret);
+               return ret;
+       }
+
+       pm_runtime_disable(&pdev->dev);
+       iounmap(io_base);
+       kfree(bank);
+
+       return 0;
+}
+
+static struct platform_driver u8500_hsem_driver = {
+       .probe          = u8500_hsem_probe,
+       .remove         = __devexit_p(u8500_hsem_remove),
+       .driver         = {
+               .name   = "u8500_hsem",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init u8500_hsem_init(void)
+{
+       return platform_driver_register(&u8500_hsem_driver);
+}
+/* board init code might need to reserve hwspinlocks for predefined purposes */
+postcore_initcall(u8500_hsem_init);
+
+static void __exit u8500_hsem_exit(void)
+{
+       platform_driver_unregister(&u8500_hsem_driver);
+}
+module_exit(u8500_hsem_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Hardware Spinlock driver for u8500");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
index b2b85629d074a9343c153225dba2c6aa4b0bc424..a3afac4be73463190c0d687efe480caa27db8d90 100644 (file)
@@ -110,7 +110,6 @@ config I2C_I801
 config I2C_ISCH
        tristate "Intel SCH SMBus 1.0"
        depends on PCI
-       select MFD_CORE
        select LPC_SCH
        help
          Say Y here if you want to use SMBus controller on the Intel SCH
@@ -301,7 +300,7 @@ config I2C_AT91
 
 config I2C_AU1550
        tristate "Au1550/Au1200 SMBus interface"
-       depends on SOC_AU1550 || SOC_AU1200
+       depends on MIPS_ALCHEMY
        help
          If you say yes to this option, support will be included for the
          Au1550 and Au1200 SMBus interface.
index 4f757a2da8cc938f19f95eeb5d9d7859570a506e..f314d7f433d321f4abd236cca1723443db778aa7 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/i2c.h>
 #include <linux/slab.h>
 
-#include <asm/mach-au1x00/au1xxx.h>
+#include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_psc.h>
 
 #define PSC_SEL                0x00
index 76b6d98bd29532ca0ce006ac675542dc13a3a102..5a26584934caa4c542f4a53582b5a03fcc28f634 100644 (file)
@@ -677,19 +677,19 @@ config BLK_DEV_IDE_PMAC_ATA100FIRST
 
 config BLK_DEV_IDE_AU1XXX
        bool "IDE for AMD Alchemy Au1200"
-       depends on SOC_AU1200
+       depends on MIPS_ALCHEMY
        select IDE_XFER_MODE
 choice
        prompt "IDE Mode for AMD Alchemy Au1200"
        default BLK_DEV_IDE_AU1XXX_PIO_DBDMA
-       depends on SOC_AU1200 && BLK_DEV_IDE_AU1XXX
+       depends on BLK_DEV_IDE_AU1XXX
 
 config BLK_DEV_IDE_AU1XXX_PIO_DBDMA
        bool "PIO+DbDMA IDE for AMD Alchemy Au1200"
 
 config BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
        bool "MDMA2+DbDMA IDE for AMD Alchemy Au1200"
-       depends on SOC_AU1200 && BLK_DEV_IDE_AU1XXX
+       depends on BLK_DEV_IDE_AU1XXX
 endchoice
 
 config BLK_DEV_IDE_TX4938
index b26c23416fa775c715eaede8f5cca4b551440b4f..259786ca8b7551c8e0afd03d843e6eef5b4ea9bb 100644 (file)
 #include <linux/ide.h>
 #include <linux/scatterlist.h>
 
-#include <asm/mach-au1x00/au1xxx.h>
+#include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
 #include <asm/mach-au1x00/au1xxx_ide.h>
 
 #define DRV_NAME       "au1200-ide"
 #define DRV_AUTHOR     "Enrico Walther <enrico.walther@amd.com> / Pete Popov <ppopov@embeddedalley.com>"
 
+#ifndef IDE_REG_SHIFT
+#define IDE_REG_SHIFT 5
+#endif
+
 /* enable the burstmode in the dbdma */
 #define IDE_AU1XXX_BURSTMODE   1
 
@@ -317,10 +321,11 @@ static void auide_ddma_rx_callback(int irq, void *param)
 }
 #endif /* end CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA */
 
-static void auide_init_dbdma_dev(dbdev_tab_t *dev, u32 dev_id, u32 tsize, u32 devwidth, u32 flags)
+static void auide_init_dbdma_dev(dbdev_tab_t *dev, u32 dev_id, u32 tsize,
+                                u32 devwidth, u32 flags, u32 regbase)
 {
        dev->dev_id          = dev_id;
-       dev->dev_physaddr    = (u32)IDE_PHYS_ADDR;
+       dev->dev_physaddr    = CPHYSADDR(regbase);
        dev->dev_intlevel    = 0;
        dev->dev_intpolarity = 0;
        dev->dev_tsize       = tsize;
@@ -344,7 +349,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
        dbdev_tab_t source_dev_tab, target_dev_tab;
        u32 dev_id, tsize, devwidth, flags;
 
-       dev_id   = IDE_DDMA_REQ;
+       dev_id   = hwif->ddma_id;
 
        tsize    =  8; /*  1 */
        devwidth = 32; /* 16 */
@@ -356,20 +361,17 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 #endif
 
        /* setup dev_tab for tx channel */
-       auide_init_dbdma_dev( &source_dev_tab,
-                             dev_id,
-                             tsize, devwidth, DEV_FLAGS_OUT | flags);
+       auide_init_dbdma_dev(&source_dev_tab, dev_id, tsize, devwidth,
+                            DEV_FLAGS_OUT | flags, auide->regbase);
        auide->tx_dev_id = au1xxx_ddma_add_device( &source_dev_tab );
 
-       auide_init_dbdma_dev( &source_dev_tab,
-                             dev_id,
-                             tsize, devwidth, DEV_FLAGS_IN | flags);
+       auide_init_dbdma_dev(&source_dev_tab, dev_id, tsize, devwidth,
+                            DEV_FLAGS_IN | flags, auide->regbase);
        auide->rx_dev_id = au1xxx_ddma_add_device( &source_dev_tab );
        
        /* We also need to add a target device for the DMA */
-       auide_init_dbdma_dev( &target_dev_tab,
-                             (u32)DSCR_CMD0_ALWAYS,
-                             tsize, devwidth, DEV_FLAGS_ANYUSE);
+       auide_init_dbdma_dev(&target_dev_tab, (u32)DSCR_CMD0_ALWAYS, tsize,
+                            devwidth, DEV_FLAGS_ANYUSE, auide->regbase);
        auide->target_dev_id = au1xxx_ddma_add_device(&target_dev_tab); 
  
        /* Get a channel for TX */
@@ -411,14 +413,12 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 #endif
 
        /* setup dev_tab for tx channel */
-       auide_init_dbdma_dev( &source_dev_tab,
-                             (u32)DSCR_CMD0_ALWAYS,
-                             8, 32, DEV_FLAGS_OUT | flags);
+       auide_init_dbdma_dev(&source_dev_tab, (u32)DSCR_CMD0_ALWAYS, 8, 32,
+                            DEV_FLAGS_OUT | flags, auide->regbase);
        auide->tx_dev_id = au1xxx_ddma_add_device( &source_dev_tab );
 
-       auide_init_dbdma_dev( &source_dev_tab,
-                             (u32)DSCR_CMD0_ALWAYS,
-                             8, 32, DEV_FLAGS_IN | flags);
+       auide_init_dbdma_dev(&source_dev_tab, (u32)DSCR_CMD0_ALWAYS, 8, 32,
+                            DEV_FLAGS_IN | flags, auide->regbase);
        auide->rx_dev_id = au1xxx_ddma_add_device( &source_dev_tab );
        
        /* Get a channel for TX */
@@ -540,6 +540,14 @@ static int au_ide_probe(struct platform_device *dev)
                goto out;
        }
 
+       res = platform_get_resource(dev, IORESOURCE_DMA, 0);
+       if (!res) {
+               pr_debug("%s: no DDMA ID resource\n", DRV_NAME);
+               ret = -ENODEV;
+               goto out;
+       }
+       ahwif->ddma_id = res->start;
+
        memset(&hw, 0, sizeof(hw));
        auide_setup_ports(&hw, ahwif);
        hw.irq = ahwif->irq;
index 23e82e46656dbe08236577dd0d5fdf04ca209f61..001b147c7f9576d8ac9d2212685e160bb491eabb 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 menu "Input device support"
-       depends on !S390
+       depends on !S390 && !UML
 
 config INPUT
        tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT
index 56aa465d1b9975d663504490740922183db64449..22d875fde53a229f9ff8278f396a632534564849 100644 (file)
@@ -134,6 +134,16 @@ config INPUT_MAX8925_ONKEY
          To compile this driver as a module, choose M here: the module
          will be called max8925_onkey.
 
+config INPUT_MC13783_PWRBUTTON
+       tristate "MC13783 ON buttons"
+       depends on MFD_MC13783
+       help
+         Support the ON buttons of MC13783 PMIC as an input device
+         reporting power button status.
+
+         To compile this driver as a module, choose M here: the module
+         will be called mc13783-pwrbutton.
+
 config INPUT_MMA8450
        tristate "MMA8450 - Freescale's 3-Axis, 8/12-bit Digital Accelerometer"
        depends on I2C
index 62dcd79d548f8bf0acab7e88f517a61047357ddc..a244fc6a781cbafebb5043359f5ba1033a1518d2 100644 (file)
@@ -28,6 +28,7 @@ obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)    += keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)              += kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)          += m68kspkr.o
 obj-$(CONFIG_INPUT_MAX8925_ONKEY)      += max8925_onkey.o
+obj-$(CONFIG_INPUT_MC13783_PWRBUTTON)  += mc13783-pwrbutton.o
 obj-$(CONFIG_INPUT_MMA8450)            += mma8450.o
 obj-$(CONFIG_INPUT_MPU3050)            += mpu3050.o
 obj-$(CONFIG_INPUT_PCAP)               += pcap_keys.o
diff --git a/drivers/input/misc/mc13783-pwrbutton.c b/drivers/input/misc/mc13783-pwrbutton.c
new file mode 100644 (file)
index 0000000..09b0522
--- /dev/null
@@ -0,0 +1,282 @@
+/**
+ * Copyright (C) 2011 Philippe Rétornaz
+ *
+ * Based on twl4030-pwrbutton driver by:
+ *     Peter De Schrijver <peter.de-schrijver@nokia.com>
+ *     Felipe Balbi <felipe.balbi@nokia.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335  USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/mc13783.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+struct mc13783_pwrb {
+       struct input_dev *pwr;
+       struct mc13xxx *mc13783;
+#define MC13783_PWRB_B1_POL_INVERT     (1 << 0)
+#define MC13783_PWRB_B2_POL_INVERT     (1 << 1)
+#define MC13783_PWRB_B3_POL_INVERT     (1 << 2)
+       int flags;
+       unsigned short keymap[3];
+};
+
+#define MC13783_REG_INTERRUPT_SENSE_1          5
+#define MC13783_IRQSENSE1_ONOFD1S              (1 << 3)
+#define MC13783_IRQSENSE1_ONOFD2S              (1 << 4)
+#define MC13783_IRQSENSE1_ONOFD3S              (1 << 5)
+
+#define MC13783_REG_POWER_CONTROL_2            15
+#define MC13783_POWER_CONTROL_2_ON1BDBNC       4
+#define MC13783_POWER_CONTROL_2_ON2BDBNC       6
+#define MC13783_POWER_CONTROL_2_ON3BDBNC       8
+#define MC13783_POWER_CONTROL_2_ON1BRSTEN      (1 << 1)
+#define MC13783_POWER_CONTROL_2_ON2BRSTEN      (1 << 2)
+#define MC13783_POWER_CONTROL_2_ON3BRSTEN      (1 << 3)
+
+static irqreturn_t button_irq(int irq, void *_priv)
+{
+       struct mc13783_pwrb *priv = _priv;
+       int val;
+
+       mc13xxx_irq_ack(priv->mc13783, irq);
+       mc13xxx_reg_read(priv->mc13783, MC13783_REG_INTERRUPT_SENSE_1, &val);
+
+       switch (irq) {
+       case MC13783_IRQ_ONOFD1:
+               val = val & MC13783_IRQSENSE1_ONOFD1S ? 1 : 0;
+               if (priv->flags & MC13783_PWRB_B1_POL_INVERT)
+                       val ^= 1;
+               input_report_key(priv->pwr, priv->keymap[0], val);
+               break;
+
+       case MC13783_IRQ_ONOFD2:
+               val = val & MC13783_IRQSENSE1_ONOFD2S ? 1 : 0;
+               if (priv->flags & MC13783_PWRB_B2_POL_INVERT)
+                       val ^= 1;
+               input_report_key(priv->pwr, priv->keymap[1], val);
+               break;
+
+       case MC13783_IRQ_ONOFD3:
+               val = val & MC13783_IRQSENSE1_ONOFD3S ? 1 : 0;
+               if (priv->flags & MC13783_PWRB_B3_POL_INVERT)
+                       val ^= 1;
+               input_report_key(priv->pwr, priv->keymap[2], val);
+               break;
+       }
+
+       input_sync(priv->pwr);
+
+       return IRQ_HANDLED;
+}
+
+static int __devinit mc13783_pwrbutton_probe(struct platform_device *pdev)
+{
+       const struct mc13xxx_buttons_platform_data *pdata;
+       struct mc13xxx *mc13783 = dev_get_drvdata(pdev->dev.parent);
+       struct input_dev *pwr;
+       struct mc13783_pwrb *priv;
+       int err = 0;
+       int reg = 0;
+
+       pdata = dev_get_platdata(&pdev->dev);
+       if (!pdata) {
+               dev_err(&pdev->dev, "missing platform data\n");
+               return -ENODEV;
+       }
+
+       pwr = input_allocate_device();
+       if (!pwr) {
+               dev_dbg(&pdev->dev, "Can't allocate power button\n");
+               return -ENOMEM;
+       }
+
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv) {
+               err = -ENOMEM;
+               dev_dbg(&pdev->dev, "Can't allocate power button\n");
+               goto free_input_dev;
+       }
+
+       reg |= (pdata->b1on_flags & 0x3) << MC13783_POWER_CONTROL_2_ON1BDBNC;
+       reg |= (pdata->b2on_flags & 0x3) << MC13783_POWER_CONTROL_2_ON2BDBNC;
+       reg |= (pdata->b3on_flags & 0x3) << MC13783_POWER_CONTROL_2_ON3BDBNC;
+
+       priv->pwr = pwr;
+       priv->mc13783 = mc13783;
+
+       mc13xxx_lock(mc13783);
+
+       if (pdata->b1on_flags & MC13783_BUTTON_ENABLE) {
+               priv->keymap[0] = pdata->b1on_key;
+               if (pdata->b1on_key != KEY_RESERVED)
+                       __set_bit(pdata->b1on_key, pwr->keybit);
+
+               if (pdata->b1on_flags & MC13783_BUTTON_POL_INVERT)
+                       priv->flags |= MC13783_PWRB_B1_POL_INVERT;
+
+               if (pdata->b1on_flags & MC13783_BUTTON_RESET_EN)
+                       reg |= MC13783_POWER_CONTROL_2_ON1BRSTEN;
+
+               err = mc13xxx_irq_request(mc13783, MC13783_IRQ_ONOFD1,
+                                         button_irq, "b1on", priv);
+               if (err) {
+                       dev_dbg(&pdev->dev, "Can't request irq\n");
+                       goto free_priv;
+               }
+       }
+
+       if (pdata->b2on_flags & MC13783_BUTTON_ENABLE) {
+               priv->keymap[1] = pdata->b2on_key;
+               if (pdata->b2on_key != KEY_RESERVED)
+                       __set_bit(pdata->b2on_key, pwr->keybit);
+
+               if (pdata->b2on_flags & MC13783_BUTTON_POL_INVERT)
+                       priv->flags |= MC13783_PWRB_B2_POL_INVERT;
+
+               if (pdata->b2on_flags & MC13783_BUTTON_RESET_EN)
+                       reg |= MC13783_POWER_CONTROL_2_ON2BRSTEN;
+
+               err = mc13xxx_irq_request(mc13783, MC13783_IRQ_ONOFD2,
+                                         button_irq, "b2on", priv);
+               if (err) {
+                       dev_dbg(&pdev->dev, "Can't request irq\n");
+                       goto free_irq_b1;
+               }
+       }
+
+       if (pdata->b3on_flags & MC13783_BUTTON_ENABLE) {
+               priv->keymap[2] = pdata->b3on_key;
+               if (pdata->b3on_key != KEY_RESERVED)
+                       __set_bit(pdata->b3on_key, pwr->keybit);
+
+               if (pdata->b3on_flags & MC13783_BUTTON_POL_INVERT)
+                       priv->flags |= MC13783_PWRB_B3_POL_INVERT;
+
+               if (pdata->b3on_flags & MC13783_BUTTON_RESET_EN)
+                       reg |= MC13783_POWER_CONTROL_2_ON3BRSTEN;
+
+               err = mc13xxx_irq_request(mc13783, MC13783_IRQ_ONOFD3,
+                                         button_irq, "b3on", priv);
+               if (err) {
+                       dev_dbg(&pdev->dev, "Can't request irq: %d\n", err);
+                       goto free_irq_b2;
+               }
+       }
+
+       mc13xxx_reg_rmw(mc13783, MC13783_REG_POWER_CONTROL_2, 0x3FE, reg);
+
+       mc13xxx_unlock(mc13783);
+
+       pwr->name = "mc13783_pwrbutton";
+       pwr->phys = "mc13783_pwrbutton/input0";
+       pwr->dev.parent = &pdev->dev;
+
+       pwr->keycode = priv->keymap;
+       pwr->keycodemax = ARRAY_SIZE(priv->keymap);
+       pwr->keycodesize = sizeof(priv->keymap[0]);
+       __set_bit(EV_KEY, pwr->evbit);
+
+       err = input_register_device(pwr);
+       if (err) {
+               dev_dbg(&pdev->dev, "Can't register power button: %d\n", err);
+               goto free_irq;
+       }
+
+       platform_set_drvdata(pdev, priv);
+
+       return 0;
+
+free_irq:
+       mc13xxx_lock(mc13783);
+
+       if (pdata->b3on_flags & MC13783_BUTTON_ENABLE)
+               mc13xxx_irq_free(mc13783, MC13783_IRQ_ONOFD3, priv);
+
+free_irq_b2:
+       if (pdata->b2on_flags & MC13783_BUTTON_ENABLE)
+               mc13xxx_irq_free(mc13783, MC13783_IRQ_ONOFD2, priv);
+
+free_irq_b1:
+       if (pdata->b1on_flags & MC13783_BUTTON_ENABLE)
+               mc13xxx_irq_free(mc13783, MC13783_IRQ_ONOFD1, priv);
+
+free_priv:
+       mc13xxx_unlock(mc13783);
+       kfree(priv);
+
+free_input_dev:
+       input_free_device(pwr);
+
+       return err;
+}
+
+static int __devexit mc13783_pwrbutton_remove(struct platform_device *pdev)
+{
+       struct mc13783_pwrb *priv = platform_get_drvdata(pdev);
+       const struct mc13xxx_buttons_platform_data *pdata;
+
+       pdata = dev_get_platdata(&pdev->dev);
+
+       mc13xxx_lock(priv->mc13783);
+
+       if (pdata->b3on_flags & MC13783_BUTTON_ENABLE)
+               mc13xxx_irq_free(priv->mc13783, MC13783_IRQ_ONOFD3, priv);
+       if (pdata->b2on_flags & MC13783_BUTTON_ENABLE)
+               mc13xxx_irq_free(priv->mc13783, MC13783_IRQ_ONOFD2, priv);
+       if (pdata->b1on_flags & MC13783_BUTTON_ENABLE)
+               mc13xxx_irq_free(priv->mc13783, MC13783_IRQ_ONOFD1, priv);
+
+       mc13xxx_unlock(priv->mc13783);
+
+       input_unregister_device(priv->pwr);
+       kfree(priv);
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+struct platform_driver mc13783_pwrbutton_driver = {
+       .probe          = mc13783_pwrbutton_probe,
+       .remove         = __devexit_p(mc13783_pwrbutton_remove),
+       .driver         = {
+               .name   = "mc13783-pwrbutton",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init mc13783_pwrbutton_init(void)
+{
+       return platform_driver_register(&mc13783_pwrbutton_driver);
+}
+module_init(mc13783_pwrbutton_init);
+
+static void __exit mc13783_pwrbutton_exit(void)
+{
+       platform_driver_unregister(&mc13783_pwrbutton_driver);
+}
+module_exit(mc13783_pwrbutton_exit);
+
+MODULE_ALIAS("platform:mc13783-pwrbutton");
+MODULE_DESCRIPTION("MC13783 Power Button");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Philippe Retornaz");
index c5bc62d85bb6e1987ebd1377888de3f1ddfa6a79..ede02743eac1997b426a7970f96429a486ea4059 100644 (file)
@@ -35,7 +35,7 @@ MODULE_PARM_DESC(sample_tolerance,
 
 struct mc13783_ts_priv {
        struct input_dev *idev;
-       struct mc13783 *mc13783;
+       struct mc13xxx *mc13xxx;
        struct delayed_work work;
        struct workqueue_struct *workq;
        unsigned int sample[4];
@@ -45,7 +45,7 @@ static irqreturn_t mc13783_ts_handler(int irq, void *data)
 {
        struct mc13783_ts_priv *priv = data;
 
-       mc13783_irq_ack(priv->mc13783, irq);
+       mc13xxx_irq_ack(priv->mc13xxx, irq);
 
        /*
         * Kick off reading coordinates. Note that if work happens already
@@ -121,10 +121,10 @@ static void mc13783_ts_work(struct work_struct *work)
 {
        struct mc13783_ts_priv *priv =
                container_of(work, struct mc13783_ts_priv, work.work);
-       unsigned int mode = MC13783_ADC_MODE_TS;
+       unsigned int mode = MC13XXX_ADC_MODE_TS;
        unsigned int channel = 12;
 
-       if (mc13783_adc_do_conversion(priv->mc13783,
+       if (mc13xxx_adc_do_conversion(priv->mc13xxx,
                                mode, channel, priv->sample) == 0)
                mc13783_ts_report_sample(priv);
 }
@@ -134,21 +134,21 @@ static int mc13783_ts_open(struct input_dev *dev)
        struct mc13783_ts_priv *priv = input_get_drvdata(dev);
        int ret;
 
-       mc13783_lock(priv->mc13783);
+       mc13xxx_lock(priv->mc13xxx);
 
-       mc13783_irq_ack(priv->mc13783, MC13783_IRQ_TS);
+       mc13xxx_irq_ack(priv->mc13xxx, MC13XXX_IRQ_TS);
 
-       ret = mc13783_irq_request(priv->mc13783, MC13783_IRQ_TS,
+       ret = mc13xxx_irq_request(priv->mc13xxx, MC13XXX_IRQ_TS,
                mc13783_ts_handler, MC13783_TS_NAME, priv);
        if (ret)
                goto out;
 
-       ret = mc13783_reg_rmw(priv->mc13783, MC13783_ADC0,
-                       MC13783_ADC0_TSMOD_MASK, MC13783_ADC0_TSMOD0);
+       ret = mc13xxx_reg_rmw(priv->mc13xxx, MC13XXX_ADC0,
+                       MC13XXX_ADC0_TSMOD_MASK, MC13XXX_ADC0_TSMOD0);
        if (ret)
-               mc13783_irq_free(priv->mc13783, MC13783_IRQ_TS, priv);
+               mc13xxx_irq_free(priv->mc13xxx, MC13XXX_IRQ_TS, priv);
 out:
-       mc13783_unlock(priv->mc13783);
+       mc13xxx_unlock(priv->mc13xxx);
        return ret;
 }
 
@@ -156,11 +156,11 @@ static void mc13783_ts_close(struct input_dev *dev)
 {
        struct mc13783_ts_priv *priv = input_get_drvdata(dev);
 
-       mc13783_lock(priv->mc13783);
-       mc13783_reg_rmw(priv->mc13783, MC13783_ADC0,
-                       MC13783_ADC0_TSMOD_MASK, 0);
-       mc13783_irq_free(priv->mc13783, MC13783_IRQ_TS, priv);
-       mc13783_unlock(priv->mc13783);
+       mc13xxx_lock(priv->mc13xxx);
+       mc13xxx_reg_rmw(priv->mc13xxx, MC13XXX_ADC0,
+                       MC13XXX_ADC0_TSMOD_MASK, 0);
+       mc13xxx_irq_free(priv->mc13xxx, MC13XXX_IRQ_TS, priv);
+       mc13xxx_unlock(priv->mc13xxx);
 
        cancel_delayed_work_sync(&priv->work);
 }
@@ -177,7 +177,7 @@ static int __init mc13783_ts_probe(struct platform_device *pdev)
                goto err_free_mem;
 
        INIT_DELAYED_WORK(&priv->work, mc13783_ts_work);
-       priv->mc13783 = dev_get_drvdata(pdev->dev.parent);
+       priv->mc13xxx = dev_get_drvdata(pdev->dev.parent);
        priv->idev = idev;
 
        /*
index 4fb601670de386679862c4bc268ae9646b873527..a233ed53913a67bb67150ce9d35abf6f8aa42297 100644 (file)
@@ -5,7 +5,7 @@
 menuconfig ISDN
        bool "ISDN support"
        depends on NET
-       depends on !S390
+       depends on !S390 && !UML
        ---help---
          ISDN ("Integrated Services Digital Network", called RNIS in France)
          is a fully digital telephone service that can be used for voice and
index b0d9ab1f21c07b22b1a770ebaab18a87b43202e2..6a8acf65777da9447aa4388b2893fe43328769a3 100644 (file)
@@ -353,7 +353,7 @@ l3dss1_parse_facility(struct PStack *st, struct l3_process *pc,
                                 { l3dss1_dummy_invoke(st, cr, id, ident, p, nlen);
                                    return;
                                  } 
-#ifdef HISAX_DE_AOC
+#ifdef CONFIG_DE_AOC
                        {
 
 #define FOO1(s,a,b) \
@@ -422,9 +422,9 @@ l3dss1_parse_facility(struct PStack *st, struct l3_process *pc,
 #undef FOO1
 
                        }
-#else  /* not HISAX_DE_AOC */
+#else  /* not CONFIG_DE_AOC */
                         l3_debug(st, "invoke break");
-#endif /* not HISAX_DE_AOC */
+#endif /* not CONFIG_DE_AOC */
                        break;
                case 2: /* return result */
                         /* if no process available handle separately */ 
index 22f847c890c9bfc23fb5a2f5d7b7658d5551773e..fbd5d88ccd8f3fc07ef18777f05bbf8fede75994 100644 (file)
@@ -107,9 +107,10 @@ static int __devinit asic3_led_probe(struct platform_device *pdev)
        }
 
        led->cdev->name = led->name;
-       led->cdev->default_trigger = led->default_trigger;
+       led->cdev->flags = LED_CORE_SUSPENDRESUME;
        led->cdev->brightness_set = brightness_set;
        led->cdev->blink_set = blink_set;
+       led->cdev->default_trigger = led->default_trigger;
 
        ret = led_classdev_register(&pdev->dev, led->cdev);
        if (ret < 0)
@@ -136,12 +137,44 @@ static int __devexit asic3_led_remove(struct platform_device *pdev)
        return mfd_cell_disable(pdev);
 }
 
+static int asic3_led_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       const struct mfd_cell *cell = mfd_get_cell(pdev);
+       int ret;
+
+       ret = 0;
+       if (cell->suspend)
+               ret = (*cell->suspend)(pdev);
+
+       return ret;
+}
+
+static int asic3_led_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       const struct mfd_cell *cell = mfd_get_cell(pdev);
+       int ret;
+
+       ret = 0;
+       if (cell->resume)
+               ret = (*cell->resume)(pdev);
+
+       return ret;
+}
+
+static const struct dev_pm_ops asic3_led_pm_ops = {
+       .suspend        = asic3_led_suspend,
+       .resume         = asic3_led_resume,
+};
+
 static struct platform_driver asic3_led_driver = {
        .probe          = asic3_led_probe,
        .remove         = __devexit_p(asic3_led_remove),
        .driver         = {
                .name   = "leds-asic3",
                .owner  = THIS_MODULE,
+               .pm     = &asic3_led_pm_ops,
        },
 };
 
index f369e56d6547ced1e92444c8f22cad3e3e09060d..b3393a9f21398c119ad0440b68cf58953bc002a5 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/leds.h>
 #include <linux/workqueue.h>
-#include <linux/mfd/mc13783.h>
+#include <linux/mfd/mc13xxx.h>
 #include <linux/slab.h>
 
 struct mc13783_led {
        struct led_classdev     cdev;
        struct work_struct      work;
-       struct mc13783          *master;
+       struct mc13xxx          *master;
        enum led_brightness     new_brightness;
        int                     id;
 };
@@ -111,11 +111,11 @@ static void mc13783_led_work(struct work_struct *work)
                break;
        }
 
-       mc13783_lock(led->master);
+       mc13xxx_lock(led->master);
 
-       mc13783_reg_rmw(led->master, reg, mask, value);
+       mc13xxx_reg_rmw(led->master, reg, mask, value);
 
-       mc13783_unlock(led->master);
+       mc13xxx_unlock(led->master);
 }
 
 static void mc13783_led_set(struct led_classdev *led_cdev,
@@ -172,23 +172,23 @@ static int __devinit mc13783_led_setup(struct mc13783_led *led, int max_current)
                break;
        }
 
-       mc13783_lock(led->master);
+       mc13xxx_lock(led->master);
 
-       ret = mc13783_reg_rmw(led->master, reg, mask << shift,
+       ret = mc13xxx_reg_rmw(led->master, reg, mask << shift,
                                                value << shift);
 
-       mc13783_unlock(led->master);
+       mc13xxx_unlock(led->master);
        return ret;
 }
 
 static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
 {
-       struct mc13783_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
-       struct mc13783 *dev = dev_get_drvdata(pdev->dev.parent);
+       struct mc13xxx_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
+       struct mc13xxx *dev = dev_get_drvdata(pdev->dev.parent);
        int ret = 0;
        int reg = 0;
 
-       mc13783_lock(dev);
+       mc13xxx_lock(dev);
 
        if (pdata->flags & MC13783_LED_TC1HALF)
                reg |= MC13783_LED_C1_TC1HALF_BIT;
@@ -196,7 +196,7 @@ static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
        if (pdata->flags & MC13783_LED_SLEWLIMTC)
                reg |= MC13783_LED_Cx_SLEWLIM_BIT;
 
-       ret = mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_1, reg);
+       ret = mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_1, reg);
        if (ret)
                goto out;
 
@@ -206,7 +206,7 @@ static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
        if (pdata->flags & MC13783_LED_SLEWLIMBL)
                reg |= MC13783_LED_Cx_SLEWLIM_BIT;
 
-       ret = mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_2, reg);
+       ret = mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_2, reg);
        if (ret)
                goto out;
 
@@ -216,7 +216,7 @@ static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
        if (pdata->flags & MC13783_LED_TRIODE_TC1)
                reg |= MC13783_LED_Cx_TRIODE_TC_BIT;
 
-       ret = mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_3, reg);
+       ret = mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_3, reg);
        if (ret)
                goto out;
 
@@ -226,7 +226,7 @@ static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
        if (pdata->flags & MC13783_LED_TRIODE_TC2)
                reg |= MC13783_LED_Cx_TRIODE_TC_BIT;
 
-       ret = mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_4, reg);
+       ret = mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_4, reg);
        if (ret)
                goto out;
 
@@ -236,7 +236,7 @@ static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
        if (pdata->flags & MC13783_LED_TRIODE_TC3)
                reg |= MC13783_LED_Cx_TRIODE_TC_BIT;
 
-       ret = mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_5, reg);
+       ret = mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_5, reg);
        if (ret)
                goto out;
 
@@ -255,17 +255,17 @@ static int __devinit mc13783_leds_prepare(struct platform_device *pdev)
        reg |= (pdata->abref & MC13783_LED_C0_ABREF_MASK) <<
                                                        MC13783_LED_C0_ABREF;
 
-       ret = mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_0, reg);
+       ret = mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_0, reg);
 
 out:
-       mc13783_unlock(dev);
+       mc13xxx_unlock(dev);
        return ret;
 }
 
 static int __devinit mc13783_led_probe(struct platform_device *pdev)
 {
-       struct mc13783_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
-       struct mc13783_led_platform_data *led_cur;
+       struct mc13xxx_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
+       struct mc13xxx_led_platform_data *led_cur;
        struct mc13783_led *led, *led_dat;
        int ret, i;
        int init_led = 0;
@@ -351,9 +351,9 @@ err_free:
 
 static int __devexit mc13783_led_remove(struct platform_device *pdev)
 {
-       struct mc13783_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
+       struct mc13xxx_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct mc13783_led *led = platform_get_drvdata(pdev);
-       struct mc13783 *dev = dev_get_drvdata(pdev->dev.parent);
+       struct mc13xxx *dev = dev_get_drvdata(pdev->dev.parent);
        int i;
 
        for (i = 0; i < pdata->num_leds; i++) {
@@ -361,16 +361,16 @@ static int __devexit mc13783_led_remove(struct platform_device *pdev)
                cancel_work_sync(&led[i].work);
        }
 
-       mc13783_lock(dev);
+       mc13xxx_lock(dev);
 
-       mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_0, 0);
-       mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_1, 0);
-       mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_2, 0);
-       mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_3, 0);
-       mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_4, 0);
-       mc13783_reg_write(dev, MC13783_REG_LED_CONTROL_5, 0);
+       mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_0, 0);
+       mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_1, 0);
+       mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_2, 0);
+       mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_3, 0);
+       mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_4, 0);
+       mc13xxx_reg_write(dev, MC13783_REG_LED_CONTROL_5, 0);
 
-       mc13783_unlock(dev);
+       mc13xxx_unlock(dev);
 
        kfree(led);
        return 0;
index f75a66e7d312a8e1efed3d2db37ba1c554785e94..faa4741df6d3f7fca87e79b8a5f788b5d1b87218 100644 (file)
@@ -208,6 +208,16 @@ config DM_DEBUG
 
          If unsure, say N.
 
+config DM_BUFIO
+       tristate
+       depends on BLK_DEV_DM && EXPERIMENTAL
+       ---help---
+        This interface allows you to do buffered I/O on a device and acts
+        as a cache, holding recently-read blocks in memory and performing
+        delayed writes.
+
+source "drivers/md/persistent-data/Kconfig"
+
 config DM_CRYPT
        tristate "Crypt target support"
        depends on BLK_DEV_DM
@@ -233,6 +243,32 @@ config DM_SNAPSHOT
        ---help---
          Allow volume managers to take writable snapshots of a device.
 
+config DM_THIN_PROVISIONING
+       tristate "Thin provisioning target (EXPERIMENTAL)"
+       depends on BLK_DEV_DM && EXPERIMENTAL
+       select DM_PERSISTENT_DATA
+       ---help---
+         Provides thin provisioning and snapshots that share a data store.
+
+config DM_DEBUG_BLOCK_STACK_TRACING
+       boolean "Keep stack trace of thin provisioning block lock holders"
+       depends on STACKTRACE_SUPPORT && DM_THIN_PROVISIONING
+       select STACKTRACE
+       ---help---
+         Enable this for messages that may help debug problems with the
+         block manager locking used by thin provisioning.
+
+         If unsure, say N.
+
+config DM_DEBUG_SPACE_MAPS
+       boolean "Extra validation for thin provisioning space maps"
+       depends on DM_THIN_PROVISIONING
+       ---help---
+         Enable this for messages that may help debug problems with the
+         space maps used by thin provisioning.
+
+          If unsure, say N.
+
 config DM_MIRROR
        tristate "Mirror target"
        depends on BLK_DEV_DM
index 448838b1f92a492e30c0c8b86f2ffb014a5e27cf..046860c7a1666e54f5cdd33a40901fb2b84dd090 100644 (file)
@@ -10,6 +10,7 @@ dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
 dm-mirror-y    += dm-raid1.o
 dm-log-userspace-y \
                += dm-log-userspace-base.o dm-log-userspace-transfer.o
+dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
 md-mod-y       += md.o bitmap.o
 raid456-y      += raid5.o
 
@@ -27,6 +28,7 @@ obj-$(CONFIG_MD_MULTIPATH)    += multipath.o
 obj-$(CONFIG_MD_FAULTY)                += faulty.o
 obj-$(CONFIG_BLK_DEV_MD)       += md-mod.o
 obj-$(CONFIG_BLK_DEV_DM)       += dm-mod.o
+obj-$(CONFIG_DM_BUFIO)         += dm-bufio.o
 obj-$(CONFIG_DM_CRYPT)         += dm-crypt.o
 obj-$(CONFIG_DM_DELAY)         += dm-delay.o
 obj-$(CONFIG_DM_FLAKEY)                += dm-flakey.o
@@ -34,10 +36,12 @@ obj-$(CONFIG_DM_MULTIPATH)  += dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_QL)  += dm-queue-length.o
 obj-$(CONFIG_DM_MULTIPATH_ST)  += dm-service-time.o
 obj-$(CONFIG_DM_SNAPSHOT)      += dm-snapshot.o
+obj-$(CONFIG_DM_PERSISTENT_DATA)       += persistent-data/
 obj-$(CONFIG_DM_MIRROR)                += dm-mirror.o dm-log.o dm-region-hash.o
 obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
 obj-$(CONFIG_DM_ZERO)          += dm-zero.o
 obj-$(CONFIG_DM_RAID)  += dm-raid.o
+obj-$(CONFIG_DM_THIN_PROVISIONING)     += dm-thin-pool.o
 
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs                    += dm-uevent.o
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
new file mode 100644 (file)
index 0000000..cb24666
--- /dev/null
@@ -0,0 +1,1699 @@
+/*
+ * Copyright (C) 2009-2011 Red Hat, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-bufio.h"
+
+#include <linux/device-mapper.h>
+#include <linux/dm-io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/shrinker.h>
+
+#define DM_MSG_PREFIX "bufio"
+
+/*
+ * Memory management policy:
+ *     Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory
+ *     or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower).
+ *     Always allocate at least DM_BUFIO_MIN_BUFFERS buffers.
+ *     Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT
+ *     dirty buffers.
+ */
+#define DM_BUFIO_MIN_BUFFERS           8
+
+#define DM_BUFIO_MEMORY_PERCENT                2
+#define DM_BUFIO_VMALLOC_PERCENT       25
+#define DM_BUFIO_WRITEBACK_PERCENT     75
+
+/*
+ * Check buffer ages in this interval (seconds)
+ */
+#define DM_BUFIO_WORK_TIMER_SECS       10
+
+/*
+ * Free buffers when they are older than this (seconds)
+ */
+#define DM_BUFIO_DEFAULT_AGE_SECS      60
+
+/*
+ * The number of bvec entries that are embedded directly in the buffer.
+ * If the chunk size is larger, dm-io is used to do the io.
+ */
+#define DM_BUFIO_INLINE_VECS           16
+
+/*
+ * Buffer hash
+ */
+#define DM_BUFIO_HASH_BITS     20
+#define DM_BUFIO_HASH(block) \
+       ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \
+        ((1 << DM_BUFIO_HASH_BITS) - 1))
+
+/*
+ * Don't try to use kmem_cache_alloc for blocks larger than this.
+ * For explanation, see alloc_buffer_data below.
+ */
+#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1)
+#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT  (PAGE_SIZE << (MAX_ORDER - 1))
+
+/*
+ * dm_buffer->list_mode
+ */
+#define LIST_CLEAN     0
+#define LIST_DIRTY     1
+#define LIST_SIZE      2
+
+/*
+ * Linking of buffers:
+ *     All buffers are linked to cache_hash with their hash_list field.
+ *
+ *     Clean buffers that are not being written (B_WRITING not set)
+ *     are linked to lru[LIST_CLEAN] with their lru_list field.
+ *
+ *     Dirty and clean buffers that are being written are linked to
+ *     lru[LIST_DIRTY] with their lru_list field. When the write
+ *     finishes, the buffer cannot be relinked immediately (because we
+ *     are in an interrupt context and relinking requires process
+ *     context), so some clean-not-writing buffers can be held on
+ *     dirty_lru too.  They are later added to lru in the process
+ *     context.
+ */
+struct dm_bufio_client {
+       struct mutex lock;
+
+       struct list_head lru[LIST_SIZE];
+       unsigned long n_buffers[LIST_SIZE];
+
+       struct block_device *bdev;
+       unsigned block_size;
+       unsigned char sectors_per_block_bits;
+       unsigned char pages_per_block_bits;
+       unsigned char blocks_per_page_bits;
+       unsigned aux_size;
+       void (*alloc_callback)(struct dm_buffer *);
+       void (*write_callback)(struct dm_buffer *);
+
+       struct dm_io_client *dm_io;
+
+       struct list_head reserved_buffers;
+       unsigned need_reserved_buffers;
+
+       struct hlist_head *cache_hash;
+       wait_queue_head_t free_buffer_wait;
+
+       int async_write_error;
+
+       struct list_head client_list;
+       struct shrinker shrinker;
+};
+
+/*
+ * Buffer state bits.
+ */
+#define B_READING      0
+#define B_WRITING      1
+#define B_DIRTY                2
+
+/*
+ * Describes how the block was allocated:
+ * kmem_cache_alloc(), __get_free_pages() or vmalloc().
+ * See the comment at alloc_buffer_data.
+ */
+enum data_mode {
+       DATA_MODE_SLAB = 0,
+       DATA_MODE_GET_FREE_PAGES = 1,
+       DATA_MODE_VMALLOC = 2,
+       DATA_MODE_LIMIT = 3
+};
+
+struct dm_buffer {
+       struct hlist_node hash_list;
+       struct list_head lru_list;
+       sector_t block;
+       void *data;
+       enum data_mode data_mode;
+       unsigned char list_mode;                /* LIST_* */
+       unsigned hold_count;
+       int read_error;
+       int write_error;
+       unsigned long state;
+       unsigned long last_accessed;
+       struct dm_bufio_client *c;
+       struct bio bio;
+       struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
+};
+
+/*----------------------------------------------------------------*/
+
+static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
+static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];
+
+static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
+{
+       unsigned ret = c->blocks_per_page_bits - 1;
+
+       BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));
+
+       return ret;
+}
+
+#define DM_BUFIO_CACHE(c)      (dm_bufio_caches[dm_bufio_cache_index(c)])
+#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)])
+
+#define dm_bufio_in_request()  (!!current->bio_list)
+
+static void dm_bufio_lock(struct dm_bufio_client *c)
+{
+       mutex_lock_nested(&c->lock, dm_bufio_in_request());
+}
+
+static int dm_bufio_trylock(struct dm_bufio_client *c)
+{
+       return mutex_trylock(&c->lock);
+}
+
+static void dm_bufio_unlock(struct dm_bufio_client *c)
+{
+       mutex_unlock(&c->lock);
+}
+
+/*
+ * FIXME Move to sched.h?
+ */
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+#  define dm_bufio_cond_resched()              \
+do {                                           \
+       if (unlikely(need_resched()))           \
+               _cond_resched();                \
+} while (0)
+#else
+#  define dm_bufio_cond_resched()                do { } while (0)
+#endif
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Default cache size: available memory divided by the ratio.
+ */
+static unsigned long dm_bufio_default_cache_size;
+
+/*
+ * Total cache size set by the user.
+ */
+static unsigned long dm_bufio_cache_size;
+
+/*
+ * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change
+ * at any time.  If it disagrees, the user has changed cache size.
+ */
+static unsigned long dm_bufio_cache_size_latch;
+
+static DEFINE_SPINLOCK(param_spinlock);
+
+/*
+ * Buffers are freed after this timeout
+ */
+static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
+
+static unsigned long dm_bufio_peak_allocated;
+static unsigned long dm_bufio_allocated_kmem_cache;
+static unsigned long dm_bufio_allocated_get_free_pages;
+static unsigned long dm_bufio_allocated_vmalloc;
+static unsigned long dm_bufio_current_allocated;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count
+ */
+static unsigned long dm_bufio_cache_size_per_client;
+
+/*
+ * The current number of clients.
+ */
+static int dm_bufio_client_count;
+
+/*
+ * The list of all clients.
+ */
+static LIST_HEAD(dm_bufio_all_clients);
+
+/*
+ * This mutex protects dm_bufio_cache_size_latch,
+ * dm_bufio_cache_size_per_client and dm_bufio_client_count
+ */
+static DEFINE_MUTEX(dm_bufio_clients_lock);
+
+/*----------------------------------------------------------------*/
+
+static void adjust_total_allocated(enum data_mode data_mode, long diff)
+{
+       static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
+               &dm_bufio_allocated_kmem_cache,
+               &dm_bufio_allocated_get_free_pages,
+               &dm_bufio_allocated_vmalloc,
+       };
+
+       spin_lock(&param_spinlock);
+
+       *class_ptr[data_mode] += diff;
+
+       dm_bufio_current_allocated += diff;
+
+       if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
+               dm_bufio_peak_allocated = dm_bufio_current_allocated;
+
+       spin_unlock(&param_spinlock);
+}
+
+/*
+ * Change the number of clients and recalculate per-client limit.
+ */
+static void __cache_size_refresh(void)
+{
+       BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
+       BUG_ON(dm_bufio_client_count < 0);
+
+       dm_bufio_cache_size_latch = dm_bufio_cache_size;
+
+       barrier();
+
+       /*
+        * Use default if set to 0 and report the actual cache size used.
+        */
+       if (!dm_bufio_cache_size_latch) {
+               (void)cmpxchg(&dm_bufio_cache_size, 0,
+                             dm_bufio_default_cache_size);
+               dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
+       }
+
+       dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
+                                        (dm_bufio_client_count ? : 1);
+}
+
+/*
+ * Allocating buffer data.
+ *
+ * Small buffers are allocated with kmem_cache, to use space optimally.
+ *
+ * For large buffers, we choose between get_free_pages and vmalloc.
+ * Each has advantages and disadvantages.
+ *
+ * __get_free_pages can randomly fail if the memory is fragmented.
+ * __vmalloc won't randomly fail, but vmalloc space is limited (it may be
+ * as low as 128M) so using it for caching is not appropriate.
+ *
+ * If the allocation may fail we use __get_free_pages. Memory fragmentation
+ * won't have a fatal effect here, but it just causes flushes of some other
+ * buffers and more I/O will be performed. Don't use __get_free_pages if it
+ * always fails (i.e. order >= MAX_ORDER).
+ *
+ * If the allocation shouldn't fail we use __vmalloc. This is only for the
+ * initial reserve allocation, so there's no risk of wasting all vmalloc
+ * space.
+ */
+static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
+                              enum data_mode *data_mode)
+{
+       if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
+               *data_mode = DATA_MODE_SLAB;
+               return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
+       }
+
+       if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
+           gfp_mask & __GFP_NORETRY) {
+               *data_mode = DATA_MODE_GET_FREE_PAGES;
+               return (void *)__get_free_pages(gfp_mask,
+                                               c->pages_per_block_bits);
+       }
+
+       *data_mode = DATA_MODE_VMALLOC;
+       return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+}
+
+/*
+ * Free buffer's data.
+ */
+static void free_buffer_data(struct dm_bufio_client *c,
+                            void *data, enum data_mode data_mode)
+{
+       switch (data_mode) {
+       case DATA_MODE_SLAB:
+               kmem_cache_free(DM_BUFIO_CACHE(c), data);
+               break;
+
+       case DATA_MODE_GET_FREE_PAGES:
+               free_pages((unsigned long)data, c->pages_per_block_bits);
+               break;
+
+       case DATA_MODE_VMALLOC:
+               vfree(data);
+               break;
+
+       default:
+               DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
+                      data_mode);
+               BUG();
+       }
+}
+
+/*
+ * Allocate buffer and its data.
+ */
+static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
+{
+       struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
+                                     gfp_mask);
+
+       if (!b)
+               return NULL;
+
+       b->c = c;
+
+       b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
+       if (!b->data) {
+               kfree(b);
+               return NULL;
+       }
+
+       adjust_total_allocated(b->data_mode, (long)c->block_size);
+
+       return b;
+}
+
+/*
+ * Free buffer and its data.
+ */
+static void free_buffer(struct dm_buffer *b)
+{
+       struct dm_bufio_client *c = b->c;
+
+       adjust_total_allocated(b->data_mode, -(long)c->block_size);
+
+       free_buffer_data(c, b->data, b->data_mode);
+       kfree(b);
+}
+
+/*
+ * Link buffer to the hash list and clean or dirty queue.
+ */
+static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
+{
+       struct dm_bufio_client *c = b->c;
+
+       c->n_buffers[dirty]++;
+       b->block = block;
+       b->list_mode = dirty;
+       list_add(&b->lru_list, &c->lru[dirty]);
+       hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]);
+       b->last_accessed = jiffies;
+}
+
+/*
+ * Unlink buffer from the hash list and dirty or clean queue.
+ */
+static void __unlink_buffer(struct dm_buffer *b)
+{
+       struct dm_bufio_client *c = b->c;
+
+       BUG_ON(!c->n_buffers[b->list_mode]);
+
+       c->n_buffers[b->list_mode]--;
+       hlist_del(&b->hash_list);
+       list_del(&b->lru_list);
+}
+
+/*
+ * Place the buffer to the head of dirty or clean LRU queue.
+ */
+static void __relink_lru(struct dm_buffer *b, int dirty)
+{
+       struct dm_bufio_client *c = b->c;
+
+       BUG_ON(!c->n_buffers[b->list_mode]);
+
+       c->n_buffers[b->list_mode]--;
+       c->n_buffers[dirty]++;
+       b->list_mode = dirty;
+       list_del(&b->lru_list);
+       list_add(&b->lru_list, &c->lru[dirty]);
+}
+
+/*----------------------------------------------------------------
+ * Submit I/O on the buffer.
+ *
+ * Bio interface is faster but it has some problems:
+ *     the vector list is limited (increasing this limit increases
+ *     memory-consumption per buffer, so it is not viable);
+ *
+ *     the memory must be direct-mapped, not vmalloced;
+ *
+ *     the I/O driver can reject requests spuriously if it thinks that
+ *     the requests are too big for the device or if they cross a
+ *     controller-defined memory boundary.
+ *
+ * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
+ * it is not vmalloced, try using the bio interface.
+ *
+ * If the buffer is big, if it is vmalloced or if the underlying device
+ * rejects the bio because it is too large, use dm-io layer to do the I/O.
+ * The dm-io layer splits the I/O into multiple requests, avoiding the above
+ * shortcomings.
+ *--------------------------------------------------------------*/
+
+/*
+ * dm-io completion routine. It just calls b->bio.bi_end_io, pretending
+ * that the request was handled directly with bio interface.
+ */
+static void dmio_complete(unsigned long error, void *context)
+{
+       struct dm_buffer *b = context;
+
+       b->bio.bi_end_io(&b->bio, error ? -EIO : 0);
+}
+
+static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
+                    bio_end_io_t *end_io)
+{
+       int r;
+       struct dm_io_request io_req = {
+               .bi_rw = rw,
+               .notify.fn = dmio_complete,
+               .notify.context = b,
+               .client = b->c->dm_io,
+       };
+       struct dm_io_region region = {
+               .bdev = b->c->bdev,
+               .sector = block << b->c->sectors_per_block_bits,
+               .count = b->c->block_size >> SECTOR_SHIFT,
+       };
+
+       if (b->data_mode != DATA_MODE_VMALLOC) {
+               io_req.mem.type = DM_IO_KMEM;
+               io_req.mem.ptr.addr = b->data;
+       } else {
+               io_req.mem.type = DM_IO_VMA;
+               io_req.mem.ptr.vma = b->data;
+       }
+
+       b->bio.bi_end_io = end_io;
+
+       r = dm_io(&io_req, 1, &region, NULL);
+       if (r)
+               end_io(&b->bio, r);
+}
+
+static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
+                          bio_end_io_t *end_io)
+{
+       char *ptr;
+       int len;
+
+       bio_init(&b->bio);
+       b->bio.bi_io_vec = b->bio_vec;
+       b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
+       b->bio.bi_sector = block << b->c->sectors_per_block_bits;
+       b->bio.bi_bdev = b->c->bdev;
+       b->bio.bi_end_io = end_io;
+
+       /*
+        * We assume that if len >= PAGE_SIZE ptr is page-aligned.
+        * If len < PAGE_SIZE the buffer doesn't cross page boundary.
+        */
+       ptr = b->data;
+       len = b->c->block_size;
+
+       if (len >= PAGE_SIZE)
+               BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
+       else
+               BUG_ON((unsigned long)ptr & (len - 1));
+
+       do {
+               if (!bio_add_page(&b->bio, virt_to_page(ptr),
+                                 len < PAGE_SIZE ? len : PAGE_SIZE,
+                                 virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
+                       BUG_ON(b->c->block_size <= PAGE_SIZE);
+                       use_dmio(b, rw, block, end_io);
+                       return;
+               }
+
+               len -= PAGE_SIZE;
+               ptr += PAGE_SIZE;
+       } while (len > 0);
+
+       submit_bio(rw, &b->bio);
+}
+
+static void submit_io(struct dm_buffer *b, int rw, sector_t block,
+                     bio_end_io_t *end_io)
+{
+       if (rw == WRITE && b->c->write_callback)
+               b->c->write_callback(b);
+
+       if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
+           b->data_mode != DATA_MODE_VMALLOC)
+               use_inline_bio(b, rw, block, end_io);
+       else
+               use_dmio(b, rw, block, end_io);
+}
+
+/*----------------------------------------------------------------
+ * Writing dirty buffers
+ *--------------------------------------------------------------*/
+
+/*
+ * The endio routine for write.
+ *
+ * Set the error, clear B_WRITING bit and wake anyone who was waiting on
+ * it.
+ */
+static void write_endio(struct bio *bio, int error)
+{
+       struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
+
+       b->write_error = error;
+       if (error) {
+               struct dm_bufio_client *c = b->c;
+               (void)cmpxchg(&c->async_write_error, 0, error);
+       }
+
+       BUG_ON(!test_bit(B_WRITING, &b->state));
+
+       smp_mb__before_clear_bit();
+       clear_bit(B_WRITING, &b->state);
+       smp_mb__after_clear_bit();
+
+       wake_up_bit(&b->state, B_WRITING);
+}
+
+/*
+ * This function is called when wait_on_bit is actually waiting.
+ */
+static int do_io_schedule(void *word)
+{
+       io_schedule();
+
+       return 0;
+}
+
+/*
+ * Initiate a write on a dirty buffer, but don't wait for it.
+ *
+ * - If the buffer is not dirty, exit.
+ * - If there some previous write going on, wait for it to finish (we can't
+ *   have two writes on the same buffer simultaneously).
+ * - Submit our write and don't wait on it. We set B_WRITING indicating
+ *   that there is a write in progress.
+ */
+static void __write_dirty_buffer(struct dm_buffer *b)
+{
+       if (!test_bit(B_DIRTY, &b->state))
+               return;
+
+       clear_bit(B_DIRTY, &b->state);
+       wait_on_bit_lock(&b->state, B_WRITING,
+                        do_io_schedule, TASK_UNINTERRUPTIBLE);
+
+       submit_io(b, WRITE, b->block, write_endio);
+}
+
+/*
+ * Wait until any activity on the buffer finishes.  Possibly write the
+ * buffer if it is dirty.  When this function finishes, there is no I/O
+ * running on the buffer and the buffer is not dirty.
+ */
+static void __make_buffer_clean(struct dm_buffer *b)
+{
+       BUG_ON(b->hold_count);
+
+       if (!b->state)  /* fast case */
+               return;
+
+       wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+       __write_dirty_buffer(b);
+       wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * Find some buffer that is not held by anybody, clean it, unlink it and
+ * return it.
+ */
+static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
+{
+       struct dm_buffer *b;
+
+       list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
+               BUG_ON(test_bit(B_WRITING, &b->state));
+               BUG_ON(test_bit(B_DIRTY, &b->state));
+
+               if (!b->hold_count) {
+                       __make_buffer_clean(b);
+                       __unlink_buffer(b);
+                       return b;
+               }
+               dm_bufio_cond_resched();
+       }
+
+       list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
+               BUG_ON(test_bit(B_READING, &b->state));
+
+               if (!b->hold_count) {
+                       __make_buffer_clean(b);
+                       __unlink_buffer(b);
+                       return b;
+               }
+               dm_bufio_cond_resched();
+       }
+
+       return NULL;
+}
+
+/*
+ * Wait until some other threads free some buffer or release hold count on
+ * some buffer.
+ *
+ * This function is entered with c->lock held, drops it and regains it
+ * before exiting.
+ */
+static void __wait_for_free_buffer(struct dm_bufio_client *c)
+{
+       DECLARE_WAITQUEUE(wait, current);
+
+       add_wait_queue(&c->free_buffer_wait, &wait);
+       set_task_state(current, TASK_UNINTERRUPTIBLE);
+       dm_bufio_unlock(c);
+
+       io_schedule();
+
+       set_task_state(current, TASK_RUNNING);
+       remove_wait_queue(&c->free_buffer_wait, &wait);
+
+       dm_bufio_lock(c);
+}
+
+/*
+ * Allocate a new buffer. If the allocation is not possible, wait until
+ * some other thread frees a buffer.
+ *
+ * May drop the lock and regain it.
+ */
+static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c)
+{
+       struct dm_buffer *b;
+
+       /*
+        * dm-bufio is resistant to allocation failures (it just keeps
+        * one buffer reserved in cases all the allocations fail).
+        * So set flags to not try too hard:
+        *      GFP_NOIO: don't recurse into the I/O layer
+        *      __GFP_NORETRY: don't retry and rather return failure
+        *      __GFP_NOMEMALLOC: don't use emergency reserves
+        *      __GFP_NOWARN: don't print a warning in case of failure
+        *
+        * For debugging, if we set the cache size to 1, no new buffers will
+        * be allocated.
+        */
+       while (1) {
+               if (dm_bufio_cache_size_latch != 1) {
+                       b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+                       if (b)
+                               return b;
+               }
+
+               if (!list_empty(&c->reserved_buffers)) {
+                       b = list_entry(c->reserved_buffers.next,
+                                      struct dm_buffer, lru_list);
+                       list_del(&b->lru_list);
+                       c->need_reserved_buffers++;
+
+                       return b;
+               }
+
+               b = __get_unclaimed_buffer(c);
+               if (b)
+                       return b;
+
+               __wait_for_free_buffer(c);
+       }
+}
+
+static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
+{
+       struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
+
+       if (c->alloc_callback)
+               c->alloc_callback(b);
+
+       return b;
+}
+
+/*
+ * Free a buffer and wake other threads waiting for free buffers.
+ */
+static void __free_buffer_wake(struct dm_buffer *b)
+{
+       struct dm_bufio_client *c = b->c;
+
+       if (!c->need_reserved_buffers)
+               free_buffer(b);
+       else {
+               list_add(&b->lru_list, &c->reserved_buffers);
+               c->need_reserved_buffers--;
+       }
+
+       wake_up(&c->free_buffer_wait);
+}
+
+static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait)
+{
+       struct dm_buffer *b, *tmp;
+
+       list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
+               BUG_ON(test_bit(B_READING, &b->state));
+
+               if (!test_bit(B_DIRTY, &b->state) &&
+                   !test_bit(B_WRITING, &b->state)) {
+                       __relink_lru(b, LIST_CLEAN);
+                       continue;
+               }
+
+               if (no_wait && test_bit(B_WRITING, &b->state))
+                       return;
+
+               __write_dirty_buffer(b);
+               dm_bufio_cond_resched();
+       }
+}
+
+/*
+ * Get writeback threshold and buffer limit for a given client.
+ */
+static void __get_memory_limit(struct dm_bufio_client *c,
+                              unsigned long *threshold_buffers,
+                              unsigned long *limit_buffers)
+{
+       unsigned long buffers;
+
+       if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
+               mutex_lock(&dm_bufio_clients_lock);
+               __cache_size_refresh();
+               mutex_unlock(&dm_bufio_clients_lock);
+       }
+
+       buffers = dm_bufio_cache_size_per_client >>
+                 (c->sectors_per_block_bits + SECTOR_SHIFT);
+
+       if (buffers < DM_BUFIO_MIN_BUFFERS)
+               buffers = DM_BUFIO_MIN_BUFFERS;
+
+       *limit_buffers = buffers;
+       *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
+}
+
+/*
+ * Check if we're over watermark.
+ * If we are over threshold_buffers, start freeing buffers.
+ * If we're over "limit_buffers", block until we get under the limit.
+ */
+static void __check_watermark(struct dm_bufio_client *c)
+{
+       unsigned long threshold_buffers, limit_buffers;
+
+       __get_memory_limit(c, &threshold_buffers, &limit_buffers);
+
+       while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
+              limit_buffers) {
+
+               struct dm_buffer *b = __get_unclaimed_buffer(c);
+
+               if (!b)
+                       return;
+
+               __free_buffer_wake(b);
+               dm_bufio_cond_resched();
+       }
+
+       if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
+               __write_dirty_buffers_async(c, 1);
+}
+
+/*
+ * Find a buffer in the hash.
+ */
+static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
+{
+       struct dm_buffer *b;
+       struct hlist_node *hn;
+
+       hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)],
+                            hash_list) {
+               dm_bufio_cond_resched();
+               if (b->block == block)
+                       return b;
+       }
+
+       return NULL;
+}
+
+/*----------------------------------------------------------------
+ * Getting a buffer
+ *--------------------------------------------------------------*/
+
+enum new_flag {
+       NF_FRESH = 0,
+       NF_READ = 1,
+       NF_GET = 2
+};
+
+static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
+                                    enum new_flag nf, struct dm_buffer **bp,
+                                    int *need_submit)
+{
+       struct dm_buffer *b, *new_b = NULL;
+
+       *need_submit = 0;
+
+       b = __find(c, block);
+       if (b) {
+               b->hold_count++;
+               __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
+                            test_bit(B_WRITING, &b->state));
+               return b;
+       }
+
+       if (nf == NF_GET)
+               return NULL;
+
+       new_b = __alloc_buffer_wait(c);
+
+       /*
+        * We've had a period where the mutex was unlocked, so need to
+        * recheck the hash table.
+        */
+       b = __find(c, block);
+       if (b) {
+               __free_buffer_wake(new_b);
+               b->hold_count++;
+               __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
+                            test_bit(B_WRITING, &b->state));
+               return b;
+       }
+
+       __check_watermark(c);
+
+       b = new_b;
+       b->hold_count = 1;
+       b->read_error = 0;
+       b->write_error = 0;
+       __link_buffer(b, block, LIST_CLEAN);
+
+       if (nf == NF_FRESH) {
+               b->state = 0;
+               return b;
+       }
+
+       b->state = 1 << B_READING;
+       *need_submit = 1;
+
+       return b;
+}
+
+/*
+ * The endio routine for reading: set the error, clear the bit and wake up
+ * anyone waiting on the buffer.
+ */
+static void read_endio(struct bio *bio, int error)
+{
+       struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
+
+       b->read_error = error;
+
+       BUG_ON(!test_bit(B_READING, &b->state));
+
+       smp_mb__before_clear_bit();
+       clear_bit(B_READING, &b->state);
+       smp_mb__after_clear_bit();
+
+       wake_up_bit(&b->state, B_READING);
+}
+
+/*
+ * A common routine for dm_bufio_new and dm_bufio_read.  Operation of these
+ * functions is similar except that dm_bufio_new doesn't read the
+ * buffer from the disk (assuming that the caller overwrites all the data
+ * and uses dm_bufio_mark_buffer_dirty to write new data back).
+ */
+static void *new_read(struct dm_bufio_client *c, sector_t block,
+                     enum new_flag nf, struct dm_buffer **bp)
+{
+       int need_submit;
+       struct dm_buffer *b;
+
+       dm_bufio_lock(c);
+       b = __bufio_new(c, block, nf, bp, &need_submit);
+       dm_bufio_unlock(c);
+
+       if (!b || IS_ERR(b))
+               return b;
+
+       if (need_submit)
+               submit_io(b, READ, b->block, read_endio);
+
+       wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+
+       if (b->read_error) {
+               int error = b->read_error;
+
+               dm_bufio_release(b);
+
+               return ERR_PTR(error);
+       }
+
+       *bp = b;
+
+       return b->data;
+}
+
+void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
+                  struct dm_buffer **bp)
+{
+       return new_read(c, block, NF_GET, bp);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get);
+
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+                   struct dm_buffer **bp)
+{
+       BUG_ON(dm_bufio_in_request());
+
+       return new_read(c, block, NF_READ, bp);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_read);
+
+void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
+                  struct dm_buffer **bp)
+{
+       BUG_ON(dm_bufio_in_request());
+
+       return new_read(c, block, NF_FRESH, bp);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_new);
+
+void dm_bufio_release(struct dm_buffer *b)
+{
+       struct dm_bufio_client *c = b->c;
+
+       dm_bufio_lock(c);
+
+       BUG_ON(test_bit(B_READING, &b->state));
+       BUG_ON(!b->hold_count);
+
+       b->hold_count--;
+       if (!b->hold_count) {
+               wake_up(&c->free_buffer_wait);
+
+               /*
+                * If there were errors on the buffer, and the buffer is not
+                * to be written, free the buffer. There is no point in caching
+                * invalid buffer.
+                */
+               if ((b->read_error || b->write_error) &&
+                   !test_bit(B_WRITING, &b->state) &&
+                   !test_bit(B_DIRTY, &b->state)) {
+                       __unlink_buffer(b);
+                       __free_buffer_wake(b);
+               }
+       }
+
+       dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_release);
+
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+{
+       struct dm_bufio_client *c = b->c;
+
+       dm_bufio_lock(c);
+
+       if (!test_and_set_bit(B_DIRTY, &b->state))
+               __relink_lru(b, LIST_DIRTY);
+
+       dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
+
+void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
+{
+       BUG_ON(dm_bufio_in_request());
+
+       dm_bufio_lock(c);
+       __write_dirty_buffers_async(c, 0);
+       dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
+
+/*
+ * For performance, it is essential that the buffers are written asynchronously
+ * and simultaneously (so that the block layer can merge the writes) and then
+ * waited upon.
+ *
+ * Finally, we flush hardware disk cache.
+ */
+int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
+{
+       int a, f;
+       unsigned long buffers_processed = 0;
+       struct dm_buffer *b, *tmp;
+
+       dm_bufio_lock(c);
+       __write_dirty_buffers_async(c, 0);
+
+again:
+       list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
+               int dropped_lock = 0;
+
+               if (buffers_processed < c->n_buffers[LIST_DIRTY])
+                       buffers_processed++;
+
+               BUG_ON(test_bit(B_READING, &b->state));
+
+               if (test_bit(B_WRITING, &b->state)) {
+                       if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
+                               dropped_lock = 1;
+                               b->hold_count++;
+                               dm_bufio_unlock(c);
+                               wait_on_bit(&b->state, B_WRITING,
+                                           do_io_schedule,
+                                           TASK_UNINTERRUPTIBLE);
+                               dm_bufio_lock(c);
+                               b->hold_count--;
+                       } else
+                               wait_on_bit(&b->state, B_WRITING,
+                                           do_io_schedule,
+                                           TASK_UNINTERRUPTIBLE);
+               }
+
+               if (!test_bit(B_DIRTY, &b->state) &&
+                   !test_bit(B_WRITING, &b->state))
+                       __relink_lru(b, LIST_CLEAN);
+
+               dm_bufio_cond_resched();
+
+               /*
+                * If we dropped the lock, the list is no longer consistent,
+                * so we must restart the search.
+                *
+                * In the most common case, the buffer just processed is
+                * relinked to the clean list, so we won't loop scanning the
+                * same buffer again and again.
+                *
+                * This may livelock if there is another thread simultaneously
+                * dirtying buffers, so we count the number of buffers walked
+                * and if it exceeds the total number of buffers, it means that
+                * someone is doing some writes simultaneously with us.  In
+                * this case, stop, dropping the lock.
+                */
+               if (dropped_lock)
+                       goto again;
+       }
+       wake_up(&c->free_buffer_wait);
+       dm_bufio_unlock(c);
+
+       a = xchg(&c->async_write_error, 0);
+       f = dm_bufio_issue_flush(c);
+       if (a)
+               return a;
+
+       return f;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
+
+/*
+ * Use dm-io to send and empty barrier flush the device.
+ */
+int dm_bufio_issue_flush(struct dm_bufio_client *c)
+{
+       struct dm_io_request io_req = {
+               .bi_rw = REQ_FLUSH,
+               .mem.type = DM_IO_KMEM,
+               .mem.ptr.addr = NULL,
+               .client = c->dm_io,
+       };
+       struct dm_io_region io_reg = {
+               .bdev = c->bdev,
+               .sector = 0,
+               .count = 0,
+       };
+
+       BUG_ON(dm_bufio_in_request());
+
+       return dm_io(&io_req, 1, &io_reg, NULL);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
+
+/*
+ * We first delete any other buffer that may be at that new location.
+ *
+ * Then, we write the buffer to the original location if it was dirty.
+ *
+ * Then, if we are the only one who is holding the buffer, relink the buffer
+ * in the hash queue for the new location.
+ *
+ * If there was someone else holding the buffer, we write it to the new
+ * location but not relink it, because that other user needs to have the buffer
+ * at the same place.
+ */
+void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
+{
+       struct dm_bufio_client *c = b->c;
+       struct dm_buffer *new;
+
+       BUG_ON(dm_bufio_in_request());
+
+       dm_bufio_lock(c);
+
+retry:
+       new = __find(c, new_block);
+       if (new) {
+               if (new->hold_count) {
+                       __wait_for_free_buffer(c);
+                       goto retry;
+               }
+
+               /*
+                * FIXME: Is there any point waiting for a write that's going
+                * to be overwritten in a bit?
+                */
+               __make_buffer_clean(new);
+               __unlink_buffer(new);
+               __free_buffer_wake(new);
+       }
+
+       BUG_ON(!b->hold_count);
+       BUG_ON(test_bit(B_READING, &b->state));
+
+       __write_dirty_buffer(b);
+       if (b->hold_count == 1) {
+               wait_on_bit(&b->state, B_WRITING,
+                           do_io_schedule, TASK_UNINTERRUPTIBLE);
+               set_bit(B_DIRTY, &b->state);
+               __unlink_buffer(b);
+               __link_buffer(b, new_block, LIST_DIRTY);
+       } else {
+               sector_t old_block;
+               wait_on_bit_lock(&b->state, B_WRITING,
+                                do_io_schedule, TASK_UNINTERRUPTIBLE);
+               /*
+                * Relink buffer to "new_block" so that write_callback
+                * sees "new_block" as a block number.
+                * After the write, link the buffer back to old_block.
+                * All this must be done in bufio lock, so that block number
+                * change isn't visible to other threads.
+                */
+               old_block = b->block;
+               __unlink_buffer(b);
+               __link_buffer(b, new_block, b->list_mode);
+               submit_io(b, WRITE, new_block, write_endio);
+               wait_on_bit(&b->state, B_WRITING,
+                           do_io_schedule, TASK_UNINTERRUPTIBLE);
+               __unlink_buffer(b);
+               __link_buffer(b, old_block, b->list_mode);
+       }
+
+       dm_bufio_unlock(c);
+       dm_bufio_release(b);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_release_move);
+
+unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
+{
+       return c->block_size;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
+
+sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
+{
+       return i_size_read(c->bdev->bd_inode) >>
+                          (SECTOR_SHIFT + c->sectors_per_block_bits);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
+
+sector_t dm_bufio_get_block_number(struct dm_buffer *b)
+{
+       return b->block;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
+
+void *dm_bufio_get_block_data(struct dm_buffer *b)
+{
+       return b->data;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
+
+void *dm_bufio_get_aux_data(struct dm_buffer *b)
+{
+       return b + 1;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
+
+struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
+{
+       return b->c;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_client);
+
+static void drop_buffers(struct dm_bufio_client *c)
+{
+       struct dm_buffer *b;
+       int i;
+
+       BUG_ON(dm_bufio_in_request());
+
+       /*
+        * An optimization so that the buffers are not written one-by-one.
+        */
+       dm_bufio_write_dirty_buffers_async(c);
+
+       dm_bufio_lock(c);
+
+       while ((b = __get_unclaimed_buffer(c)))
+               __free_buffer_wake(b);
+
+       for (i = 0; i < LIST_SIZE; i++)
+               list_for_each_entry(b, &c->lru[i], lru_list)
+                       DMERR("leaked buffer %llx, hold count %u, list %d",
+                             (unsigned long long)b->block, b->hold_count, i);
+
+       for (i = 0; i < LIST_SIZE; i++)
+               BUG_ON(!list_empty(&c->lru[i]));
+
+       dm_bufio_unlock(c);
+}
+
+/*
+ * Test if the buffer is unused and too old, and commit it.
+ * At if noio is set, we must not do any I/O because we hold
+ * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to
+ * different bufio client.
+ */
+static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
+                               unsigned long max_jiffies)
+{
+       if (jiffies - b->last_accessed < max_jiffies)
+               return 1;
+
+       if (!(gfp & __GFP_IO)) {
+               if (test_bit(B_READING, &b->state) ||
+                   test_bit(B_WRITING, &b->state) ||
+                   test_bit(B_DIRTY, &b->state))
+                       return 1;
+       }
+
+       if (b->hold_count)
+               return 1;
+
+       __make_buffer_clean(b);
+       __unlink_buffer(b);
+       __free_buffer_wake(b);
+
+       return 0;
+}
+
+static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
+                  struct shrink_control *sc)
+{
+       int l;
+       struct dm_buffer *b, *tmp;
+
+       for (l = 0; l < LIST_SIZE; l++) {
+               list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
+                       if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
+                           !--nr_to_scan)
+                               return;
+               dm_bufio_cond_resched();
+       }
+}
+
+static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
+{
+       struct dm_bufio_client *c =
+           container_of(shrinker, struct dm_bufio_client, shrinker);
+       unsigned long r;
+       unsigned long nr_to_scan = sc->nr_to_scan;
+
+       if (sc->gfp_mask & __GFP_IO)
+               dm_bufio_lock(c);
+       else if (!dm_bufio_trylock(c))
+               return !nr_to_scan ? 0 : -1;
+
+       if (nr_to_scan)
+               __scan(c, nr_to_scan, sc);
+
+       r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+       if (r > INT_MAX)
+               r = INT_MAX;
+
+       dm_bufio_unlock(c);
+
+       return r;
+}
+
+/*
+ * Create the buffering interface
+ */
+struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
+                                              unsigned reserved_buffers, unsigned aux_size,
+                                              void (*alloc_callback)(struct dm_buffer *),
+                                              void (*write_callback)(struct dm_buffer *))
+{
+       int r;
+       struct dm_bufio_client *c;
+       unsigned i;
+
+       BUG_ON(block_size < 1 << SECTOR_SHIFT ||
+              (block_size & (block_size - 1)));
+
+       c = kmalloc(sizeof(*c), GFP_KERNEL);
+       if (!c) {
+               r = -ENOMEM;
+               goto bad_client;
+       }
+       c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS);
+       if (!c->cache_hash) {
+               r = -ENOMEM;
+               goto bad_hash;
+       }
+
+       c->bdev = bdev;
+       c->block_size = block_size;
+       c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT;
+       c->pages_per_block_bits = (ffs(block_size) - 1 >= PAGE_SHIFT) ?
+                                 ffs(block_size) - 1 - PAGE_SHIFT : 0;
+       c->blocks_per_page_bits = (ffs(block_size) - 1 < PAGE_SHIFT ?
+                                 PAGE_SHIFT - (ffs(block_size) - 1) : 0);
+
+       c->aux_size = aux_size;
+       c->alloc_callback = alloc_callback;
+       c->write_callback = write_callback;
+
+       for (i = 0; i < LIST_SIZE; i++) {
+               INIT_LIST_HEAD(&c->lru[i]);
+               c->n_buffers[i] = 0;
+       }
+
+       for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
+               INIT_HLIST_HEAD(&c->cache_hash[i]);
+
+       mutex_init(&c->lock);
+       INIT_LIST_HEAD(&c->reserved_buffers);
+       c->need_reserved_buffers = reserved_buffers;
+
+       init_waitqueue_head(&c->free_buffer_wait);
+       c->async_write_error = 0;
+
+       c->dm_io = dm_io_client_create();
+       if (IS_ERR(c->dm_io)) {
+               r = PTR_ERR(c->dm_io);
+               goto bad_dm_io;
+       }
+
+       mutex_lock(&dm_bufio_clients_lock);
+       if (c->blocks_per_page_bits) {
+               if (!DM_BUFIO_CACHE_NAME(c)) {
+                       DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
+                       if (!DM_BUFIO_CACHE_NAME(c)) {
+                               r = -ENOMEM;
+                               mutex_unlock(&dm_bufio_clients_lock);
+                               goto bad_cache;
+                       }
+               }
+
+               if (!DM_BUFIO_CACHE(c)) {
+                       DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
+                                                             c->block_size,
+                                                             c->block_size, 0, NULL);
+                       if (!DM_BUFIO_CACHE(c)) {
+                               r = -ENOMEM;
+                               mutex_unlock(&dm_bufio_clients_lock);
+                               goto bad_cache;
+                       }
+               }
+       }
+       mutex_unlock(&dm_bufio_clients_lock);
+
+       while (c->need_reserved_buffers) {
+               struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
+
+               if (!b) {
+                       r = -ENOMEM;
+                       goto bad_buffer;
+               }
+               __free_buffer_wake(b);
+       }
+
+       mutex_lock(&dm_bufio_clients_lock);
+       dm_bufio_client_count++;
+       list_add(&c->client_list, &dm_bufio_all_clients);
+       __cache_size_refresh();
+       mutex_unlock(&dm_bufio_clients_lock);
+
+       c->shrinker.shrink = shrink;
+       c->shrinker.seeks = 1;
+       c->shrinker.batch = 0;
+       register_shrinker(&c->shrinker);
+
+       return c;
+
+bad_buffer:
+bad_cache:
+       while (!list_empty(&c->reserved_buffers)) {
+               struct dm_buffer *b = list_entry(c->reserved_buffers.next,
+                                                struct dm_buffer, lru_list);
+               list_del(&b->lru_list);
+               free_buffer(b);
+       }
+       dm_io_client_destroy(c->dm_io);
+bad_dm_io:
+       vfree(c->cache_hash);
+bad_hash:
+       kfree(c);
+bad_client:
+       return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_client_create);
+
+/*
+ * Free the buffering interface.
+ * It is required that there are no references on any buffers.
+ */
+void dm_bufio_client_destroy(struct dm_bufio_client *c)
+{
+       unsigned i;
+
+       drop_buffers(c);
+
+       unregister_shrinker(&c->shrinker);
+
+       mutex_lock(&dm_bufio_clients_lock);
+
+       list_del(&c->client_list);
+       dm_bufio_client_count--;
+       __cache_size_refresh();
+
+       mutex_unlock(&dm_bufio_clients_lock);
+
+       for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
+               BUG_ON(!hlist_empty(&c->cache_hash[i]));
+
+       BUG_ON(c->need_reserved_buffers);
+
+       while (!list_empty(&c->reserved_buffers)) {
+               struct dm_buffer *b = list_entry(c->reserved_buffers.next,
+                                                struct dm_buffer, lru_list);
+               list_del(&b->lru_list);
+               free_buffer(b);
+       }
+
+       for (i = 0; i < LIST_SIZE; i++)
+               if (c->n_buffers[i])
+                       DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
+
+       for (i = 0; i < LIST_SIZE; i++)
+               BUG_ON(c->n_buffers[i]);
+
+       dm_io_client_destroy(c->dm_io);
+       vfree(c->cache_hash);
+       kfree(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
+
+static void cleanup_old_buffers(void)
+{
+       unsigned long max_age = dm_bufio_max_age;
+       struct dm_bufio_client *c;
+
+       barrier();
+
+       if (max_age > ULONG_MAX / HZ)
+               max_age = ULONG_MAX / HZ;
+
+       mutex_lock(&dm_bufio_clients_lock);
+       list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
+               if (!dm_bufio_trylock(c))
+                       continue;
+
+               while (!list_empty(&c->lru[LIST_CLEAN])) {
+                       struct dm_buffer *b;
+                       b = list_entry(c->lru[LIST_CLEAN].prev,
+                                      struct dm_buffer, lru_list);
+                       if (__cleanup_old_buffer(b, 0, max_age * HZ))
+                               break;
+                       dm_bufio_cond_resched();
+               }
+
+               dm_bufio_unlock(c);
+               dm_bufio_cond_resched();
+       }
+       mutex_unlock(&dm_bufio_clients_lock);
+}
+
+static struct workqueue_struct *dm_bufio_wq;
+static struct delayed_work dm_bufio_work;
+
+static void work_fn(struct work_struct *w)
+{
+       cleanup_old_buffers();
+
+       queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
+                          DM_BUFIO_WORK_TIMER_SECS * HZ);
+}
+
+/*----------------------------------------------------------------
+ * Module setup
+ *--------------------------------------------------------------*/
+
+/*
+ * This is called only once for the whole dm_bufio module.
+ * It initializes memory limit.
+ */
+static int __init dm_bufio_init(void)
+{
+       __u64 mem;
+
+       memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
+       memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
+
+       mem = (__u64)((totalram_pages - totalhigh_pages) *
+                     DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT;
+
+       if (mem > ULONG_MAX)
+               mem = ULONG_MAX;
+
+#ifdef CONFIG_MMU
+       /*
+        * Get the size of vmalloc space the same way as VMALLOC_TOTAL
+        * in fs/proc/internal.h
+        */
+       if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100)
+               mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100;
+#endif
+
+       dm_bufio_default_cache_size = mem;
+
+       mutex_lock(&dm_bufio_clients_lock);
+       __cache_size_refresh();
+       mutex_unlock(&dm_bufio_clients_lock);
+
+       dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
+       if (!dm_bufio_wq)
+               return -ENOMEM;
+
+       INIT_DELAYED_WORK(&dm_bufio_work, work_fn);
+       queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
+                          DM_BUFIO_WORK_TIMER_SECS * HZ);
+
+       return 0;
+}
+
+/*
+ * This is called once when unloading the dm_bufio module.
+ */
+static void __exit dm_bufio_exit(void)
+{
+       int bug = 0;
+       int i;
+
+       cancel_delayed_work_sync(&dm_bufio_work);
+       destroy_workqueue(dm_bufio_wq);
+
+       for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) {
+               struct kmem_cache *kc = dm_bufio_caches[i];
+
+               if (kc)
+                       kmem_cache_destroy(kc);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
+               kfree(dm_bufio_cache_names[i]);
+
+       if (dm_bufio_client_count) {
+               DMCRIT("%s: dm_bufio_client_count leaked: %d",
+                       __func__, dm_bufio_client_count);
+               bug = 1;
+       }
+
+       if (dm_bufio_current_allocated) {
+               DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
+                       __func__, dm_bufio_current_allocated);
+               bug = 1;
+       }
+
+       if (dm_bufio_allocated_get_free_pages) {
+               DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
+                      __func__, dm_bufio_allocated_get_free_pages);
+               bug = 1;
+       }
+
+       if (dm_bufio_allocated_vmalloc) {
+               DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
+                      __func__, dm_bufio_allocated_vmalloc);
+               bug = 1;
+       }
+
+       if (bug)
+               BUG();
+}
+
+module_init(dm_bufio_init)
+module_exit(dm_bufio_exit)
+
+module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
+
+module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
+
+module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
+
+module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
+MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
+
+module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
+MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
+
+module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
+MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
+
+module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
+MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
+
+MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
+MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h
new file mode 100644 (file)
index 0000000..5c4c3a0
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2009-2011 Red Hat, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_BUFIO_H
+#define DM_BUFIO_H
+
+#include <linux/blkdev.h>
+#include <linux/types.h>
+
+/*----------------------------------------------------------------*/
+
+struct dm_bufio_client;
+struct dm_buffer;
+
+/*
+ * Create a buffered IO cache on a given device
+ */
+struct dm_bufio_client *
+dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
+                      unsigned reserved_buffers, unsigned aux_size,
+                      void (*alloc_callback)(struct dm_buffer *),
+                      void (*write_callback)(struct dm_buffer *));
+
+/*
+ * Release a buffered IO cache.
+ */
+void dm_bufio_client_destroy(struct dm_bufio_client *c);
+
+/*
+ * WARNING: to avoid deadlocks, these conditions are observed:
+ *
+ * - At most one thread can hold at most "reserved_buffers" simultaneously.
+ * - Each other threads can hold at most one buffer.
+ * - Threads which call only dm_bufio_get can hold unlimited number of
+ *   buffers.
+ */
+
+/*
+ * Read a given block from disk. Returns pointer to data.  Returns a
+ * pointer to dm_buffer that can be used to release the buffer or to make
+ * it dirty.
+ */
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+                   struct dm_buffer **bp);
+
+/*
+ * Like dm_bufio_read, but return buffer from cache, don't read
+ * it. If the buffer is not in the cache, return NULL.
+ */
+void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
+                  struct dm_buffer **bp);
+
+/*
+ * Like dm_bufio_read, but don't read anything from the disk.  It is
+ * expected that the caller initializes the buffer and marks it dirty.
+ */
+void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
+                  struct dm_buffer **bp);
+
+/*
+ * Release a reference obtained with dm_bufio_{read,get,new}. The data
+ * pointer and dm_buffer pointer is no longer valid after this call.
+ */
+void dm_bufio_release(struct dm_buffer *b);
+
+/*
+ * Mark a buffer dirty. It should be called after the buffer is modified.
+ *
+ * In case of memory pressure, the buffer may be written after
+ * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
+ * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
+ * the actual writing may occur earlier.
+ */
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
+
+/*
+ * Initiate writing of dirty buffers, without waiting for completion.
+ */
+void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
+
+/*
+ * Write all dirty buffers. Guarantees that all dirty buffers created prior
+ * to this call are on disk when this call exits.
+ */
+int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
+
+/*
+ * Send an empty write barrier to the device to flush hardware disk cache.
+ */
+int dm_bufio_issue_flush(struct dm_bufio_client *c);
+
+/*
+ * Like dm_bufio_release but also move the buffer to the new
+ * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
+ */
+void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
+
+unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
+sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
+sector_t dm_bufio_get_block_number(struct dm_buffer *b);
+void *dm_bufio_get_block_data(struct dm_buffer *b);
+void *dm_bufio_get_aux_data(struct dm_buffer *b);
+struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
+
+/*----------------------------------------------------------------*/
+
+#endif
index 2e9a3ca37bdd39c6cbaf36f800b77aa3a04a638f..31c2dc25886d985467e7f4084c96d35a98858f67 100644 (file)
@@ -1215,6 +1215,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
        struct hash_cell *hc;
        struct dm_table *t;
        struct mapped_device *md;
+       struct target_type *immutable_target_type;
 
        md = find_device(param);
        if (!md)
@@ -1230,6 +1231,16 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
                goto out;
        }
 
+       immutable_target_type = dm_get_immutable_target_type(md);
+       if (immutable_target_type &&
+           (immutable_target_type != dm_table_get_immutable_target_type(t))) {
+               DMWARN("can't replace immutable target type %s",
+                      immutable_target_type->name);
+               dm_table_destroy(t);
+               r = -EINVAL;
+               goto out;
+       }
+
        /* Protect md->type and md->queue against concurrent table loads. */
        dm_lock_md_type(md);
        if (dm_get_md_type(md) == DM_TYPE_NONE)
index 32ac70861d66c9827518f723b2b1d8d4138493f9..bed444c93d8d49e03757fdf88c8facb7d98f07dc 100644 (file)
@@ -66,6 +66,8 @@ struct dm_kcopyd_client {
        struct list_head pages_jobs;
 };
 
+static struct page_list zero_page_list;
+
 static void wake(struct dm_kcopyd_client *kc)
 {
        queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
@@ -254,6 +256,9 @@ int __init dm_kcopyd_init(void)
        if (!_job_cache)
                return -ENOMEM;
 
+       zero_page_list.next = &zero_page_list;
+       zero_page_list.page = ZERO_PAGE(0);
+
        return 0;
 }
 
@@ -322,7 +327,7 @@ static int run_complete_job(struct kcopyd_job *job)
        dm_kcopyd_notify_fn fn = job->fn;
        struct dm_kcopyd_client *kc = job->kc;
 
-       if (job->pages)
+       if (job->pages && job->pages != &zero_page_list)
                kcopyd_put_pages(kc, job->pages);
        /*
         * If this is the master job, the sub jobs have already
@@ -484,6 +489,8 @@ static void dispatch_job(struct kcopyd_job *job)
        atomic_inc(&kc->nr_jobs);
        if (unlikely(!job->source.count))
                push(&kc->complete_jobs, job);
+       else if (job->pages == &zero_page_list)
+               push(&kc->io_jobs, job);
        else
                push(&kc->pages_jobs, job);
        wake(kc);
@@ -592,14 +599,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
        job->flags = flags;
        job->read_err = 0;
        job->write_err = 0;
-       job->rw = READ;
-
-       job->source = *from;
 
        job->num_dests = num_dests;
        memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
 
-       job->pages = NULL;
+       if (from) {
+               job->source = *from;
+               job->pages = NULL;
+               job->rw = READ;
+       } else {
+               memset(&job->source, 0, sizeof job->source);
+               job->source.count = job->dests[0].count;
+               job->pages = &zero_page_list;
+               job->rw = WRITE;
+       }
 
        job->fn = fn;
        job->context = context;
@@ -617,6 +630,14 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 }
 EXPORT_SYMBOL(dm_kcopyd_copy);
 
+int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
+                  unsigned num_dests, struct dm_io_region *dests,
+                  unsigned flags, dm_kcopyd_notify_fn fn, void *context)
+{
+       return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
+}
+EXPORT_SYMBOL(dm_kcopyd_zero);
+
 void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
                                 dm_kcopyd_notify_fn fn, void *context)
 {
index 1021c89860116a5bb6e3a2faea1362c0ea805f52..8db3862dade5346afa18de58640db51391b7d0fe 100644 (file)
@@ -30,6 +30,7 @@ struct flush_entry {
 
 struct log_c {
        struct dm_target *ti;
+       struct dm_dev *log_dev;
        uint32_t region_size;
        region_t region_count;
        uint64_t luid;
@@ -146,7 +147,7 @@ static int build_constructor_string(struct dm_target *ti,
  *     <UUID> <other args>
  * Where 'other args' is the userspace implementation specific log
  * arguments.  An example might be:
- *     <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
+ *     <UUID> clustered-disk <arg count> <log dev> <region_size> [[no]sync]
  *
  * So, this module will strip off the <UUID> for identification purposes
  * when communicating with userspace about a log; but will pass on everything
@@ -161,13 +162,15 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
        struct log_c *lc = NULL;
        uint64_t rdata;
        size_t rdata_size = sizeof(rdata);
+       char *devices_rdata = NULL;
+       size_t devices_rdata_size = DM_NAME_LEN;
 
        if (argc < 3) {
                DMWARN("Too few arguments to userspace dirty log");
                return -EINVAL;
        }
 
-       lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+       lc = kzalloc(sizeof(*lc), GFP_KERNEL);
        if (!lc) {
                DMWARN("Unable to allocate userspace log context.");
                return -ENOMEM;
@@ -195,9 +198,19 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
                return str_size;
        }
 
-       /* Send table string */
+       devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL);
+       if (!devices_rdata) {
+               DMERR("Failed to allocate memory for device information");
+               r = -ENOMEM;
+               goto out;
+       }
+
+       /*
+        * Send table string and get back any opened device.
+        */
        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
-                                ctr_str, str_size, NULL, NULL);
+                                ctr_str, str_size,
+                                devices_rdata, &devices_rdata_size);
 
        if (r < 0) {
                if (r == -ESRCH)
@@ -220,7 +233,20 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
        lc->region_size = (uint32_t)rdata;
        lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
 
+       if (devices_rdata_size) {
+               if (devices_rdata[devices_rdata_size - 1] != '\0') {
+                       DMERR("DM_ULOG_CTR device return string not properly terminated");
+                       r = -EINVAL;
+                       goto out;
+               }
+               r = dm_get_device(ti, devices_rdata,
+                                 dm_table_get_mode(ti->table), &lc->log_dev);
+               if (r)
+                       DMERR("Failed to register %s with device-mapper",
+                             devices_rdata);
+       }
 out:
+       kfree(devices_rdata);
        if (r) {
                kfree(lc);
                kfree(ctr_str);
@@ -241,6 +267,9 @@ static void userspace_dtr(struct dm_dirty_log *log)
                                 NULL, 0,
                                 NULL, NULL);
 
+       if (lc->log_dev)
+               dm_put_device(lc->ti, lc->log_dev);
+
        kfree(lc->usr_argv_str);
        kfree(lc);
 
index 37a37266a1e38cf6e44cd23d3e33d0d58142f29c..11fa96df4b0614fefa3c00231b5ce98b5c091233 100644 (file)
@@ -1017,30 +1017,56 @@ static int raid_status(struct dm_target *ti, status_type_t type,
        struct raid_set *rs = ti->private;
        unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
        unsigned sz = 0;
-       int i;
+       int i, array_in_sync = 0;
        sector_t sync;
 
        switch (type) {
        case STATUSTYPE_INFO:
                DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks);
 
-               for (i = 0; i < rs->md.raid_disks; i++) {
-                       if (test_bit(Faulty, &rs->dev[i].rdev.flags))
-                               DMEMIT("D");
-                       else if (test_bit(In_sync, &rs->dev[i].rdev.flags))
-                               DMEMIT("A");
-                       else
-                               DMEMIT("a");
-               }
-
                if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery))
                        sync = rs->md.curr_resync_completed;
                else
                        sync = rs->md.recovery_cp;
 
-               if (sync > rs->md.resync_max_sectors)
+               if (sync >= rs->md.resync_max_sectors) {
+                       array_in_sync = 1;
                        sync = rs->md.resync_max_sectors;
+               } else {
+                       /*
+                        * The array may be doing an initial sync, or it may
+                        * be rebuilding individual components.  If all the
+                        * devices are In_sync, then it is the array that is
+                        * being initialized.
+                        */
+                       for (i = 0; i < rs->md.raid_disks; i++)
+                               if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
+                                       array_in_sync = 1;
+               }
+               /*
+                * Status characters:
+                *  'D' = Dead/Failed device
+                *  'a' = Alive but not in-sync
+                *  'A' = Alive and in-sync
+                */
+               for (i = 0; i < rs->md.raid_disks; i++) {
+                       if (test_bit(Faulty, &rs->dev[i].rdev.flags))
+                               DMEMIT("D");
+                       else if (!array_in_sync ||
+                                !test_bit(In_sync, &rs->dev[i].rdev.flags))
+                               DMEMIT("a");
+                       else
+                               DMEMIT("A");
+               }
 
+               /*
+                * In-sync ratio:
+                *  The in-sync ratio shows the progress of:
+                *   - Initializing the array
+                *   - Rebuilding a subset of devices of the array
+                *  The user can distinguish between the two by referring
+                *  to the status characters.
+                */
                DMEMIT(" %llu/%llu",
                       (unsigned long long) sync,
                       (unsigned long long) rs->md.resync_max_sectors);
index bc04518e9d8b588edc5682442c05e425eb15c451..8e91321301424afda07c456a8b6ae53aec524d04 100644 (file)
@@ -54,7 +54,9 @@ struct dm_table {
        sector_t *highs;
        struct dm_target *targets;
 
+       struct target_type *immutable_target_type;
        unsigned integrity_supported:1;
+       unsigned singleton:1;
 
        /*
         * Indicates the rw permissions for the new logical
@@ -740,6 +742,12 @@ int dm_table_add_target(struct dm_table *t, const char *type,
        char **argv;
        struct dm_target *tgt;
 
+       if (t->singleton) {
+               DMERR("%s: target type %s must appear alone in table",
+                     dm_device_name(t->md), t->targets->type->name);
+               return -EINVAL;
+       }
+
        if ((r = check_space(t)))
                return r;
 
@@ -758,6 +766,36 @@ int dm_table_add_target(struct dm_table *t, const char *type,
                return -EINVAL;
        }
 
+       if (dm_target_needs_singleton(tgt->type)) {
+               if (t->num_targets) {
+                       DMERR("%s: target type %s must appear alone in table",
+                             dm_device_name(t->md), type);
+                       return -EINVAL;
+               }
+               t->singleton = 1;
+       }
+
+       if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
+               DMERR("%s: target type %s may not be included in read-only tables",
+                     dm_device_name(t->md), type);
+               return -EINVAL;
+       }
+
+       if (t->immutable_target_type) {
+               if (t->immutable_target_type != tgt->type) {
+                       DMERR("%s: immutable target type %s cannot be mixed with other target types",
+                             dm_device_name(t->md), t->immutable_target_type->name);
+                       return -EINVAL;
+               }
+       } else if (dm_target_is_immutable(tgt->type)) {
+               if (t->num_targets) {
+                       DMERR("%s: immutable target type %s cannot be mixed with other target types",
+                             dm_device_name(t->md), tgt->type->name);
+                       return -EINVAL;
+               }
+               t->immutable_target_type = tgt->type;
+       }
+
        tgt->table = t;
        tgt->begin = start;
        tgt->len = len;
@@ -915,6 +953,11 @@ unsigned dm_table_get_type(struct dm_table *t)
        return t->type;
 }
 
+struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
+{
+       return t->immutable_target_type;
+}
+
 bool dm_table_request_based(struct dm_table *t)
 {
        return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
@@ -1299,6 +1342,31 @@ static bool dm_table_discard_zeroes_data(struct dm_table *t)
        return 1;
 }
 
+static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
+                           sector_t start, sector_t len, void *data)
+{
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+
+       return q && blk_queue_nonrot(q);
+}
+
+static bool dm_table_is_nonrot(struct dm_table *t)
+{
+       struct dm_target *ti;
+       unsigned i = 0;
+
+       /* Ensure that all underlying device are non-rotational. */
+       while (i < dm_table_get_num_targets(t)) {
+               ti = dm_table_get_target(t, i++);
+
+               if (!ti->type->iterate_devices ||
+                   !ti->type->iterate_devices(ti, device_is_nonrot, NULL))
+                       return 0;
+       }
+
+       return 1;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
                               struct queue_limits *limits)
 {
@@ -1324,6 +1392,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
        if (!dm_table_discard_zeroes_data(t))
                q->limits.discard_zeroes_data = 0;
 
+       if (dm_table_is_nonrot(t))
+               queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+       else
+               queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
+
        dm_table_set_integrity(t);
 
        /*
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
new file mode 100644 (file)
index 0000000..59c4f04
--- /dev/null
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-thin-metadata.h"
+#include "persistent-data/dm-btree.h"
+#include "persistent-data/dm-space-map.h"
+#include "persistent-data/dm-space-map-disk.h"
+#include "persistent-data/dm-transaction-manager.h"
+
+#include <linux/list.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+
+/*--------------------------------------------------------------------------
+ * As far as the metadata goes, there is:
+ *
+ * - A superblock in block zero, taking up fewer than 512 bytes for
+ *   atomic writes.
+ *
+ * - A space map managing the metadata blocks.
+ *
+ * - A space map managing the data blocks.
+ *
+ * - A btree mapping our internal thin dev ids onto struct disk_device_details.
+ *
+ * - A hierarchical btree, with 2 levels which effectively maps (thin
+ *   dev id, virtual block) -> block_time.  Block time is a 64-bit
+ *   field holding the time in the low 24 bits, and block in the top 48
+ *   bits.
+ *
+ * BTrees consist solely of btree_nodes, that fill a block.  Some are
+ * internal nodes, as such their values are a __le64 pointing to other
+ * nodes.  Leaf nodes can store data of any reasonable size (ie. much
+ * smaller than the block size).  The nodes consist of the header,
+ * followed by an array of keys, followed by an array of values.  We have
+ * to binary search on the keys so they're all held together to help the
+ * cpu cache.
+ *
+ * Space maps have 2 btrees:
+ *
+ * - One maps a uint64_t onto a struct index_entry.  Which points to a
+ *   bitmap block, and has some details about how many free entries there
+ *   are etc.
+ *
+ * - The bitmap blocks have a header (for the checksum).  Then the rest
+ *   of the block is pairs of bits.  With the meaning being:
+ *
+ *   0 - ref count is 0
+ *   1 - ref count is 1
+ *   2 - ref count is 2
+ *   3 - ref count is higher than 2
+ *
+ * - If the count is higher than 2 then the ref count is entered in a
+ *   second btree that directly maps the block_address to a uint32_t ref
+ *   count.
+ *
+ * The space map metadata variant doesn't have a bitmaps btree.  Instead
+ * it has one single blocks worth of index_entries.  This avoids
+ * recursive issues with the bitmap btree needing to allocate space in
+ * order to insert.  With a small data block size such as 64k the
+ * metadata support data devices that are hundreds of terrabytes.
+ *
+ * The space maps allocate space linearly from front to back.  Space that
+ * is freed in a transaction is never recycled within that transaction.
+ * To try and avoid fragmenting _free_ space the allocator always goes
+ * back and fills in gaps.
+ *
+ * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks
+ * from the block manager.
+ *--------------------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX   "thin metadata"
+
+#define THIN_SUPERBLOCK_MAGIC 27022010
+#define THIN_SUPERBLOCK_LOCATION 0
+#define THIN_VERSION 1
+#define THIN_METADATA_CACHE_SIZE 64
+#define SECTOR_TO_BLOCK_SHIFT 3
+
+/* This should be plenty */
+#define SPACE_MAP_ROOT_SIZE 128
+
+/*
+ * Little endian on-disk superblock and device details.
+ */
+struct thin_disk_superblock {
+       __le32 csum;    /* Checksum of superblock except for this field. */
+       __le32 flags;
+       __le64 blocknr; /* This block number, dm_block_t. */
+
+       __u8 uuid[16];
+       __le64 magic;
+       __le32 version;
+       __le32 time;
+
+       __le64 trans_id;
+
+       /*
+        * Root held by userspace transactions.
+        */
+       __le64 held_root;
+
+       __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
+       __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+
+       /*
+        * 2-level btree mapping (dev_id, (dev block, time)) -> data block
+        */
+       __le64 data_mapping_root;
+
+       /*
+        * Device detail root mapping dev_id -> device_details
+        */
+       __le64 device_details_root;
+
+       __le32 data_block_size;         /* In 512-byte sectors. */
+
+       __le32 metadata_block_size;     /* In 512-byte sectors. */
+       __le64 metadata_nr_blocks;
+
+       __le32 compat_flags;
+       __le32 compat_ro_flags;
+       __le32 incompat_flags;
+} __packed;
+
+struct disk_device_details {
+       __le64 mapped_blocks;
+       __le64 transaction_id;          /* When created. */
+       __le32 creation_time;
+       __le32 snapshotted_time;
+} __packed;
+
+struct dm_pool_metadata {
+       struct hlist_node hash;
+
+       struct block_device *bdev;
+       struct dm_block_manager *bm;
+       struct dm_space_map *metadata_sm;
+       struct dm_space_map *data_sm;
+       struct dm_transaction_manager *tm;
+       struct dm_transaction_manager *nb_tm;
+
+       /*
+        * Two-level btree.
+        * First level holds thin_dev_t.
+        * Second level holds mappings.
+        */
+       struct dm_btree_info info;
+
+       /*
+        * Non-blocking version of the above.
+        */
+       struct dm_btree_info nb_info;
+
+       /*
+        * Just the top level for deleting whole devices.
+        */
+       struct dm_btree_info tl_info;
+
+       /*
+        * Just the bottom level for creating new devices.
+        */
+       struct dm_btree_info bl_info;
+
+       /*
+        * Describes the device details btree.
+        */
+       struct dm_btree_info details_info;
+
+       struct rw_semaphore root_lock;
+       uint32_t time;
+       int need_commit;
+       dm_block_t root;
+       dm_block_t details_root;
+       struct list_head thin_devices;
+       uint64_t trans_id;
+       unsigned long flags;
+       sector_t data_block_size;
+};
+
+struct dm_thin_device {
+       struct list_head list;
+       struct dm_pool_metadata *pmd;
+       dm_thin_id id;
+
+       int open_count;
+       int changed;
+       uint64_t mapped_blocks;
+       uint64_t transaction_id;
+       uint32_t creation_time;
+       uint32_t snapshotted_time;
+};
+
+/*----------------------------------------------------------------
+ * superblock validator
+ *--------------------------------------------------------------*/
+
+#define SUPERBLOCK_CSUM_XOR 160774
+
+static void sb_prepare_for_write(struct dm_block_validator *v,
+                                struct dm_block *b,
+                                size_t block_size)
+{
+       struct thin_disk_superblock *disk_super = dm_block_data(b);
+
+       disk_super->blocknr = cpu_to_le64(dm_block_location(b));
+       disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
+                                                     block_size - sizeof(__le32),
+                                                     SUPERBLOCK_CSUM_XOR));
+}
+
+static int sb_check(struct dm_block_validator *v,
+                   struct dm_block *b,
+                   size_t block_size)
+{
+       struct thin_disk_superblock *disk_super = dm_block_data(b);
+       __le32 csum_le;
+
+       if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
+               DMERR("sb_check failed: blocknr %llu: "
+                     "wanted %llu", le64_to_cpu(disk_super->blocknr),
+                     (unsigned long long)dm_block_location(b));
+               return -ENOTBLK;
+       }
+
+       if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
+               DMERR("sb_check failed: magic %llu: "
+                     "wanted %llu", le64_to_cpu(disk_super->magic),
+                     (unsigned long long)THIN_SUPERBLOCK_MAGIC);
+               return -EILSEQ;
+       }
+
+       csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
+                                            block_size - sizeof(__le32),
+                                            SUPERBLOCK_CSUM_XOR));
+       if (csum_le != disk_super->csum) {
+               DMERR("sb_check failed: csum %u: wanted %u",
+                     le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
+               return -EILSEQ;
+       }
+
+       return 0;
+}
+
+static struct dm_block_validator sb_validator = {
+       .name = "superblock",
+       .prepare_for_write = sb_prepare_for_write,
+       .check = sb_check
+};
+
+/*----------------------------------------------------------------
+ * Methods for the btree value types
+ *--------------------------------------------------------------*/
+
+static uint64_t pack_block_time(dm_block_t b, uint32_t t)
+{
+       return (b << 24) | t;
+}
+
+static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
+{
+       *b = v >> 24;
+       *t = v & ((1 << 24) - 1);
+}
+
+static void data_block_inc(void *context, void *value_le)
+{
+       struct dm_space_map *sm = context;
+       __le64 v_le;
+       uint64_t b;
+       uint32_t t;
+
+       memcpy(&v_le, value_le, sizeof(v_le));
+       unpack_block_time(le64_to_cpu(v_le), &b, &t);
+       dm_sm_inc_block(sm, b);
+}
+
+static void data_block_dec(void *context, void *value_le)
+{
+       struct dm_space_map *sm = context;
+       __le64 v_le;
+       uint64_t b;
+       uint32_t t;
+
+       memcpy(&v_le, value_le, sizeof(v_le));
+       unpack_block_time(le64_to_cpu(v_le), &b, &t);
+       dm_sm_dec_block(sm, b);
+}
+
+static int data_block_equal(void *context, void *value1_le, void *value2_le)
+{
+       __le64 v1_le, v2_le;
+       uint64_t b1, b2;
+       uint32_t t;
+
+       memcpy(&v1_le, value1_le, sizeof(v1_le));
+       memcpy(&v2_le, value2_le, sizeof(v2_le));
+       unpack_block_time(le64_to_cpu(v1_le), &b1, &t);
+       unpack_block_time(le64_to_cpu(v2_le), &b2, &t);
+
+       return b1 == b2;
+}
+
+static void subtree_inc(void *context, void *value)
+{
+       struct dm_btree_info *info = context;
+       __le64 root_le;
+       uint64_t root;
+
+       memcpy(&root_le, value, sizeof(root_le));
+       root = le64_to_cpu(root_le);
+       dm_tm_inc(info->tm, root);
+}
+
+static void subtree_dec(void *context, void *value)
+{
+       struct dm_btree_info *info = context;
+       __le64 root_le;
+       uint64_t root;
+
+       memcpy(&root_le, value, sizeof(root_le));
+       root = le64_to_cpu(root_le);
+       if (dm_btree_del(info, root))
+               DMERR("btree delete failed\n");
+}
+
+static int subtree_equal(void *context, void *value1_le, void *value2_le)
+{
+       __le64 v1_le, v2_le;
+       memcpy(&v1_le, value1_le, sizeof(v1_le));
+       memcpy(&v2_le, value2_le, sizeof(v2_le));
+
+       return v1_le == v2_le;
+}
+
+/*----------------------------------------------------------------*/
+
+static int superblock_all_zeroes(struct dm_block_manager *bm, int *result)
+{
+       int r;
+       unsigned i;
+       struct dm_block *b;
+       __le64 *data_le, zero = cpu_to_le64(0);
+       unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64);
+
+       /*
+        * We can't use a validator here - it may be all zeroes.
+        */
+       r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b);
+       if (r)
+               return r;
+
+       data_le = dm_block_data(b);
+       *result = 1;
+       for (i = 0; i < block_size; i++) {
+               if (data_le[i] != zero) {
+                       *result = 0;
+                       break;
+               }
+       }
+
+       return dm_bm_unlock(b);
+}
+
+static int init_pmd(struct dm_pool_metadata *pmd,
+                   struct dm_block_manager *bm,
+                   dm_block_t nr_blocks, int create)
+{
+       int r;
+       struct dm_space_map *sm, *data_sm;
+       struct dm_transaction_manager *tm;
+       struct dm_block *sblock;
+
+       if (create) {
+               r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION,
+                                        &sb_validator, &tm, &sm, &sblock);
+               if (r < 0) {
+                       DMERR("tm_create_with_sm failed");
+                       return r;
+               }
+
+               data_sm = dm_sm_disk_create(tm, nr_blocks);
+               if (IS_ERR(data_sm)) {
+                       DMERR("sm_disk_create failed");
+                       r = PTR_ERR(data_sm);
+                       goto bad;
+               }
+       } else {
+               struct thin_disk_superblock *disk_super = NULL;
+               size_t space_map_root_offset =
+                       offsetof(struct thin_disk_superblock, metadata_space_map_root);
+
+               r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION,
+                                      &sb_validator, space_map_root_offset,
+                                      SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock);
+               if (r < 0) {
+                       DMERR("tm_open_with_sm failed");
+                       return r;
+               }
+
+               disk_super = dm_block_data(sblock);
+               data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root,
+                                         sizeof(disk_super->data_space_map_root));
+               if (IS_ERR(data_sm)) {
+                       DMERR("sm_disk_open failed");
+                       r = PTR_ERR(data_sm);
+                       goto bad;
+               }
+       }
+
+
+       r = dm_tm_unlock(tm, sblock);
+       if (r < 0) {
+               DMERR("couldn't unlock superblock");
+               goto bad_data_sm;
+       }
+
+       pmd->bm = bm;
+       pmd->metadata_sm = sm;
+       pmd->data_sm = data_sm;
+       pmd->tm = tm;
+       pmd->nb_tm = dm_tm_create_non_blocking_clone(tm);
+       if (!pmd->nb_tm) {
+               DMERR("could not create clone tm");
+               r = -ENOMEM;
+               goto bad_data_sm;
+       }
+
+       pmd->info.tm = tm;
+       pmd->info.levels = 2;
+       pmd->info.value_type.context = pmd->data_sm;
+       pmd->info.value_type.size = sizeof(__le64);
+       pmd->info.value_type.inc = data_block_inc;
+       pmd->info.value_type.dec = data_block_dec;
+       pmd->info.value_type.equal = data_block_equal;
+
+       memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info));
+       pmd->nb_info.tm = pmd->nb_tm;
+
+       pmd->tl_info.tm = tm;
+       pmd->tl_info.levels = 1;
+       pmd->tl_info.value_type.context = &pmd->info;
+       pmd->tl_info.value_type.size = sizeof(__le64);
+       pmd->tl_info.value_type.inc = subtree_inc;
+       pmd->tl_info.value_type.dec = subtree_dec;
+       pmd->tl_info.value_type.equal = subtree_equal;
+
+       pmd->bl_info.tm = tm;
+       pmd->bl_info.levels = 1;
+       pmd->bl_info.value_type.context = pmd->data_sm;
+       pmd->bl_info.value_type.size = sizeof(__le64);
+       pmd->bl_info.value_type.inc = data_block_inc;
+       pmd->bl_info.value_type.dec = data_block_dec;
+       pmd->bl_info.value_type.equal = data_block_equal;
+
+       pmd->details_info.tm = tm;
+       pmd->details_info.levels = 1;
+       pmd->details_info.value_type.context = NULL;
+       pmd->details_info.value_type.size = sizeof(struct disk_device_details);
+       pmd->details_info.value_type.inc = NULL;
+       pmd->details_info.value_type.dec = NULL;
+       pmd->details_info.value_type.equal = NULL;
+
+       pmd->root = 0;
+
+       init_rwsem(&pmd->root_lock);
+       pmd->time = 0;
+       pmd->need_commit = 0;
+       pmd->details_root = 0;
+       pmd->trans_id = 0;
+       pmd->flags = 0;
+       INIT_LIST_HEAD(&pmd->thin_devices);
+
+       return 0;
+
+bad_data_sm:
+       dm_sm_destroy(data_sm);
+bad:
+       dm_tm_destroy(tm);
+       dm_sm_destroy(sm);
+
+       return r;
+}
+
+static int __begin_transaction(struct dm_pool_metadata *pmd)
+{
+       int r;
+       u32 features;
+       struct thin_disk_superblock *disk_super;
+       struct dm_block *sblock;
+
+       /*
+        * __maybe_commit_transaction() resets these
+        */
+       WARN_ON(pmd->need_commit);
+
+       /*
+        * We re-read the superblock every time.  Shouldn't need to do this
+        * really.
+        */
+       r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+                           &sb_validator, &sblock);
+       if (r)
+               return r;
+
+       disk_super = dm_block_data(sblock);
+       pmd->time = le32_to_cpu(disk_super->time);
+       pmd->root = le64_to_cpu(disk_super->data_mapping_root);
+       pmd->details_root = le64_to_cpu(disk_super->device_details_root);
+       pmd->trans_id = le64_to_cpu(disk_super->trans_id);
+       pmd->flags = le32_to_cpu(disk_super->flags);
+       pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
+
+       features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP;
+       if (features) {
+               DMERR("could not access metadata due to "
+                     "unsupported optional features (%lx).",
+                     (unsigned long)features);
+               r = -EINVAL;
+               goto out;
+       }
+
+       /*
+        * Check for read-only metadata to skip the following RDWR checks.
+        */
+       if (get_disk_ro(pmd->bdev->bd_disk))
+               goto out;
+
+       features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
+       if (features) {
+               DMERR("could not access metadata RDWR due to "
+                     "unsupported optional features (%lx).",
+                     (unsigned long)features);
+               r = -EINVAL;
+       }
+
+out:
+       dm_bm_unlock(sblock);
+       return r;
+}
+
+static int __write_changed_details(struct dm_pool_metadata *pmd)
+{
+       int r;
+       struct dm_thin_device *td, *tmp;
+       struct disk_device_details details;
+       uint64_t key;
+
+       list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
+               if (!td->changed)
+                       continue;
+
+               key = td->id;
+
+               details.mapped_blocks = cpu_to_le64(td->mapped_blocks);
+               details.transaction_id = cpu_to_le64(td->transaction_id);
+               details.creation_time = cpu_to_le32(td->creation_time);
+               details.snapshotted_time = cpu_to_le32(td->snapshotted_time);
+               __dm_bless_for_disk(&details);
+
+               r = dm_btree_insert(&pmd->details_info, pmd->details_root,
+                                   &key, &details, &pmd->details_root);
+               if (r)
+                       return r;
+
+               if (td->open_count)
+                       td->changed = 0;
+               else {
+                       list_del(&td->list);
+                       kfree(td);
+               }
+
+               pmd->need_commit = 1;
+       }
+
+       return 0;
+}
+
+static int __commit_transaction(struct dm_pool_metadata *pmd)
+{
+       /*
+        * FIXME: Associated pool should be made read-only on failure.
+        */
+       int r;
+       size_t metadata_len, data_len;
+       struct thin_disk_superblock *disk_super;
+       struct dm_block *sblock;
+
+       /*
+        * We need to know if the thin_disk_superblock exceeds a 512-byte sector.
+        */
+       BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
+
+       r = __write_changed_details(pmd);
+       if (r < 0)
+               goto out;
+
+       if (!pmd->need_commit)
+               goto out;
+
+       r = dm_sm_commit(pmd->data_sm);
+       if (r < 0)
+               goto out;
+
+       r = dm_tm_pre_commit(pmd->tm);
+       if (r < 0)
+               goto out;
+
+       r = dm_sm_root_size(pmd->metadata_sm, &metadata_len);
+       if (r < 0)
+               goto out;
+
+       r = dm_sm_root_size(pmd->metadata_sm, &data_len);
+       if (r < 0)
+               goto out;
+
+       r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+                            &sb_validator, &sblock);
+       if (r)
+               goto out;
+
+       disk_super = dm_block_data(sblock);
+       disk_super->time = cpu_to_le32(pmd->time);
+       disk_super->data_mapping_root = cpu_to_le64(pmd->root);
+       disk_super->device_details_root = cpu_to_le64(pmd->details_root);
+       disk_super->trans_id = cpu_to_le64(pmd->trans_id);
+       disk_super->flags = cpu_to_le32(pmd->flags);
+
+       r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root,
+                           metadata_len);
+       if (r < 0)
+               goto out_locked;
+
+       r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root,
+                           data_len);
+       if (r < 0)
+               goto out_locked;
+
+       r = dm_tm_commit(pmd->tm, sblock);
+       if (!r)
+               pmd->need_commit = 0;
+
+out:
+       return r;
+
+out_locked:
+       dm_bm_unlock(sblock);
+       return r;
+}
+
+struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
+                                              sector_t data_block_size)
+{
+       int r;
+       struct thin_disk_superblock *disk_super;
+       struct dm_pool_metadata *pmd;
+       sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
+       struct dm_block_manager *bm;
+       int create;
+       struct dm_block *sblock;
+
+       pmd = kmalloc(sizeof(*pmd), GFP_KERNEL);
+       if (!pmd) {
+               DMERR("could not allocate metadata struct");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       /*
+        * Max hex locks:
+        *  3 for btree insert +
+        *  2 for btree lookup used within space map
+        */
+       bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE,
+                                    THIN_METADATA_CACHE_SIZE, 5);
+       if (!bm) {
+               DMERR("could not create block manager");
+               kfree(pmd);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       r = superblock_all_zeroes(bm, &create);
+       if (r) {
+               dm_block_manager_destroy(bm);
+               kfree(pmd);
+               return ERR_PTR(r);
+       }
+
+
+       r = init_pmd(pmd, bm, 0, create);
+       if (r) {
+               dm_block_manager_destroy(bm);
+               kfree(pmd);
+               return ERR_PTR(r);
+       }
+       pmd->bdev = bdev;
+
+       if (!create) {
+               r = __begin_transaction(pmd);
+               if (r < 0)
+                       goto bad;
+               return pmd;
+       }
+
+       /*
+        * Create.
+        */
+       r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+                            &sb_validator, &sblock);
+       if (r)
+               goto bad;
+
+       disk_super = dm_block_data(sblock);
+       disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC);
+       disk_super->version = cpu_to_le32(THIN_VERSION);
+       disk_super->time = 0;
+       disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
+       disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
+       disk_super->data_block_size = cpu_to_le32(data_block_size);
+
+       r = dm_bm_unlock(sblock);
+       if (r < 0)
+               goto bad;
+
+       r = dm_btree_empty(&pmd->info, &pmd->root);
+       if (r < 0)
+               goto bad;
+
+       r = dm_btree_empty(&pmd->details_info, &pmd->details_root);
+       if (r < 0) {
+               DMERR("couldn't create devices root");
+               goto bad;
+       }
+
+       pmd->flags = 0;
+       pmd->need_commit = 1;
+       r = dm_pool_commit_metadata(pmd);
+       if (r < 0) {
+               DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+                     __func__, r);
+               goto bad;
+       }
+
+       return pmd;
+
+bad:
+       if (dm_pool_metadata_close(pmd) < 0)
+               DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
+       return ERR_PTR(r);
+}
+
+int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
+{
+       int r;
+       unsigned open_devices = 0;
+       struct dm_thin_device *td, *tmp;
+
+       down_read(&pmd->root_lock);
+       list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
+               if (td->open_count)
+                       open_devices++;
+               else {
+                       list_del(&td->list);
+                       kfree(td);
+               }
+       }
+       up_read(&pmd->root_lock);
+
+       if (open_devices) {
+               DMERR("attempt to close pmd when %u device(s) are still open",
+                      open_devices);
+               return -EBUSY;
+       }
+
+       r = __commit_transaction(pmd);
+       if (r < 0)
+               DMWARN("%s: __commit_transaction() failed, error = %d",
+                      __func__, r);
+
+       dm_tm_destroy(pmd->tm);
+       dm_tm_destroy(pmd->nb_tm);
+       dm_block_manager_destroy(pmd->bm);
+       dm_sm_destroy(pmd->metadata_sm);
+       dm_sm_destroy(pmd->data_sm);
+       kfree(pmd);
+
+       return 0;
+}
+
+static int __open_device(struct dm_pool_metadata *pmd,
+                        dm_thin_id dev, int create,
+                        struct dm_thin_device **td)
+{
+       int r, changed = 0;
+       struct dm_thin_device *td2;
+       uint64_t key = dev;
+       struct disk_device_details details_le;
+
+       /*
+        * Check the device isn't already open.
+        */
+       list_for_each_entry(td2, &pmd->thin_devices, list)
+               if (td2->id == dev) {
+                       td2->open_count++;
+                       *td = td2;
+                       return 0;
+               }
+
+       /*
+        * Check the device exists.
+        */
+       r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
+                           &key, &details_le);
+       if (r) {
+               if (r != -ENODATA || !create)
+                       return r;
+
+               changed = 1;
+               details_le.mapped_blocks = 0;
+               details_le.transaction_id = cpu_to_le64(pmd->trans_id);
+               details_le.creation_time = cpu_to_le32(pmd->time);
+               details_le.snapshotted_time = cpu_to_le32(pmd->time);
+       }
+
+       *td = kmalloc(sizeof(**td), GFP_NOIO);
+       if (!*td)
+               return -ENOMEM;
+
+       (*td)->pmd = pmd;
+       (*td)->id = dev;
+       (*td)->open_count = 1;
+       (*td)->changed = changed;
+       (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks);
+       (*td)->transaction_id = le64_to_cpu(details_le.transaction_id);
+       (*td)->creation_time = le32_to_cpu(details_le.creation_time);
+       (*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time);
+
+       list_add(&(*td)->list, &pmd->thin_devices);
+
+       return 0;
+}
+
+static void __close_device(struct dm_thin_device *td)
+{
+       --td->open_count;
+}
+
+static int __create_thin(struct dm_pool_metadata *pmd,
+                        dm_thin_id dev)
+{
+       int r;
+       dm_block_t dev_root;
+       uint64_t key = dev;
+       struct disk_device_details details_le;
+       struct dm_thin_device *td;
+       __le64 value;
+
+       r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
+                           &key, &details_le);
+       if (!r)
+               return -EEXIST;
+
+       /*
+        * Create an empty btree for the mappings.
+        */
+       r = dm_btree_empty(&pmd->bl_info, &dev_root);
+       if (r)
+               return r;
+
+       /*
+        * Insert it into the main mapping tree.
+        */
+       value = cpu_to_le64(dev_root);
+       __dm_bless_for_disk(&value);
+       r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
+       if (r) {
+               dm_btree_del(&pmd->bl_info, dev_root);
+               return r;
+       }
+
+       r = __open_device(pmd, dev, 1, &td);
+       if (r) {
+               __close_device(td);
+               dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
+               dm_btree_del(&pmd->bl_info, dev_root);
+               return r;
+       }
+       td->changed = 1;
+       __close_device(td);
+
+       return r;
+}
+
+int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+       r = __create_thin(pmd, dev);
+       up_write(&pmd->root_lock);
+
+       return r;
+}
+
+static int __set_snapshot_details(struct dm_pool_metadata *pmd,
+                                 struct dm_thin_device *snap,
+                                 dm_thin_id origin, uint32_t time)
+{
+       int r;
+       struct dm_thin_device *td;
+
+       r = __open_device(pmd, origin, 0, &td);
+       if (r)
+               return r;
+
+       td->changed = 1;
+       td->snapshotted_time = time;
+
+       snap->mapped_blocks = td->mapped_blocks;
+       snap->snapshotted_time = time;
+       __close_device(td);
+
+       return 0;
+}
+
+static int __create_snap(struct dm_pool_metadata *pmd,
+                        dm_thin_id dev, dm_thin_id origin)
+{
+       int r;
+       dm_block_t origin_root;
+       uint64_t key = origin, dev_key = dev;
+       struct dm_thin_device *td;
+       struct disk_device_details details_le;
+       __le64 value;
+
+       /* check this device is unused */
+       r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
+                           &dev_key, &details_le);
+       if (!r)
+               return -EEXIST;
+
+       /* find the mapping tree for the origin */
+       r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value);
+       if (r)
+               return r;
+       origin_root = le64_to_cpu(value);
+
+       /* clone the origin, an inc will do */
+       dm_tm_inc(pmd->tm, origin_root);
+
+       /* insert into the main mapping tree */
+       value = cpu_to_le64(origin_root);
+       __dm_bless_for_disk(&value);
+       key = dev;
+       r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
+       if (r) {
+               dm_tm_dec(pmd->tm, origin_root);
+               return r;
+       }
+
+       pmd->time++;
+
+       r = __open_device(pmd, dev, 1, &td);
+       if (r)
+               goto bad;
+
+       r = __set_snapshot_details(pmd, td, origin, pmd->time);
+       if (r)
+               goto bad;
+
+       __close_device(td);
+       return 0;
+
+bad:
+       __close_device(td);
+       dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
+       dm_btree_remove(&pmd->details_info, pmd->details_root,
+                       &key, &pmd->details_root);
+       return r;
+}
+
+int dm_pool_create_snap(struct dm_pool_metadata *pmd,
+                                dm_thin_id dev,
+                                dm_thin_id origin)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+       r = __create_snap(pmd, dev, origin);
+       up_write(&pmd->root_lock);
+
+       return r;
+}
+
+static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
+{
+       int r;
+       uint64_t key = dev;
+       struct dm_thin_device *td;
+
+       /* TODO: failure should mark the transaction invalid */
+       r = __open_device(pmd, dev, 0, &td);
+       if (r)
+               return r;
+
+       if (td->open_count > 1) {
+               __close_device(td);
+               return -EBUSY;
+       }
+
+       list_del(&td->list);
+       kfree(td);
+       r = dm_btree_remove(&pmd->details_info, pmd->details_root,
+                           &key, &pmd->details_root);
+       if (r)
+               return r;
+
+       r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
+       if (r)
+               return r;
+
+       pmd->need_commit = 1;
+
+       return 0;
+}
+
+int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
+                              dm_thin_id dev)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+       r = __delete_device(pmd, dev);
+       up_write(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
+                                       uint64_t current_id,
+                                       uint64_t new_id)
+{
+       down_write(&pmd->root_lock);
+       if (pmd->trans_id != current_id) {
+               up_write(&pmd->root_lock);
+               DMERR("mismatched transaction id");
+               return -EINVAL;
+       }
+
+       pmd->trans_id = new_id;
+       pmd->need_commit = 1;
+       up_write(&pmd->root_lock);
+
+       return 0;
+}
+
+int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
+                                       uint64_t *result)
+{
+       down_read(&pmd->root_lock);
+       *result = pmd->trans_id;
+       up_read(&pmd->root_lock);
+
+       return 0;
+}
+
+static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
+                                   dm_block_t *result)
+{
+       int r;
+       struct thin_disk_superblock *disk_super;
+       struct dm_block *sblock;
+
+       r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+                            &sb_validator, &sblock);
+       if (r)
+               return r;
+
+       disk_super = dm_block_data(sblock);
+       *result = le64_to_cpu(disk_super->held_root);
+
+       return dm_bm_unlock(sblock);
+}
+
+int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
+                                  dm_block_t *result)
+{
+       int r;
+
+       down_read(&pmd->root_lock);
+       r = __get_held_metadata_root(pmd, result);
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
+                            struct dm_thin_device **td)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+       r = __open_device(pmd, dev, 0, td);
+       up_write(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_close_thin_device(struct dm_thin_device *td)
+{
+       down_write(&td->pmd->root_lock);
+       __close_device(td);
+       up_write(&td->pmd->root_lock);
+
+       return 0;
+}
+
+dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
+{
+       return td->id;
+}
+
+static int __snapshotted_since(struct dm_thin_device *td, uint32_t time)
+{
+       return td->snapshotted_time > time;
+}
+
+int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
+                      int can_block, struct dm_thin_lookup_result *result)
+{
+       int r;
+       uint64_t block_time = 0;
+       __le64 value;
+       struct dm_pool_metadata *pmd = td->pmd;
+       dm_block_t keys[2] = { td->id, block };
+
+       if (can_block) {
+               down_read(&pmd->root_lock);
+               r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value);
+               if (!r)
+                       block_time = le64_to_cpu(value);
+               up_read(&pmd->root_lock);
+
+       } else if (down_read_trylock(&pmd->root_lock)) {
+               r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value);
+               if (!r)
+                       block_time = le64_to_cpu(value);
+               up_read(&pmd->root_lock);
+
+       } else
+               return -EWOULDBLOCK;
+
+       if (!r) {
+               dm_block_t exception_block;
+               uint32_t exception_time;
+               unpack_block_time(block_time, &exception_block,
+                                 &exception_time);
+               result->block = exception_block;
+               result->shared = __snapshotted_since(td, exception_time);
+       }
+
+       return r;
+}
+
+static int __insert(struct dm_thin_device *td, dm_block_t block,
+                   dm_block_t data_block)
+{
+       int r, inserted;
+       __le64 value;
+       struct dm_pool_metadata *pmd = td->pmd;
+       dm_block_t keys[2] = { td->id, block };
+
+       pmd->need_commit = 1;
+       value = cpu_to_le64(pack_block_time(data_block, pmd->time));
+       __dm_bless_for_disk(&value);
+
+       r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value,
+                                  &pmd->root, &inserted);
+       if (r)
+               return r;
+
+       if (inserted) {
+               td->mapped_blocks++;
+               td->changed = 1;
+       }
+
+       return 0;
+}
+
+int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
+                        dm_block_t data_block)
+{
+       int r;
+
+       down_write(&td->pmd->root_lock);
+       r = __insert(td, block, data_block);
+       up_write(&td->pmd->root_lock);
+
+       return r;
+}
+
+static int __remove(struct dm_thin_device *td, dm_block_t block)
+{
+       int r;
+       struct dm_pool_metadata *pmd = td->pmd;
+       dm_block_t keys[2] = { td->id, block };
+
+       r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root);
+       if (r)
+               return r;
+
+       pmd->need_commit = 1;
+
+       return 0;
+}
+
+int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block)
+{
+       int r;
+
+       down_write(&td->pmd->root_lock);
+       r = __remove(td, block);
+       up_write(&td->pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+
+       r = dm_sm_new_block(pmd->data_sm, result);
+       pmd->need_commit = 1;
+
+       up_write(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+
+       r = __commit_transaction(pmd);
+       if (r <= 0)
+               goto out;
+
+       /*
+        * Open the next transaction.
+        */
+       r = __begin_transaction(pmd);
+out:
+       up_write(&pmd->root_lock);
+       return r;
+}
+
+int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
+{
+       int r;
+
+       down_read(&pmd->root_lock);
+       r = dm_sm_get_nr_free(pmd->data_sm, result);
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
+                                         dm_block_t *result)
+{
+       int r;
+
+       down_read(&pmd->root_lock);
+       r = dm_sm_get_nr_free(pmd->metadata_sm, result);
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
+                                 dm_block_t *result)
+{
+       int r;
+
+       down_read(&pmd->root_lock);
+       r = dm_sm_get_nr_blocks(pmd->metadata_sm, result);
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result)
+{
+       down_read(&pmd->root_lock);
+       *result = pmd->data_block_size;
+       up_read(&pmd->root_lock);
+
+       return 0;
+}
+
+int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
+{
+       int r;
+
+       down_read(&pmd->root_lock);
+       r = dm_sm_get_nr_blocks(pmd->data_sm, result);
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
+int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
+{
+       struct dm_pool_metadata *pmd = td->pmd;
+
+       down_read(&pmd->root_lock);
+       *result = td->mapped_blocks;
+       up_read(&pmd->root_lock);
+
+       return 0;
+}
+
+static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
+{
+       int r;
+       __le64 value_le;
+       dm_block_t thin_root;
+       struct dm_pool_metadata *pmd = td->pmd;
+
+       r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le);
+       if (r)
+               return r;
+
+       thin_root = le64_to_cpu(value_le);
+
+       return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result);
+}
+
+int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
+                                    dm_block_t *result)
+{
+       int r;
+       struct dm_pool_metadata *pmd = td->pmd;
+
+       down_read(&pmd->root_lock);
+       r = __highest_block(td, result);
+       up_read(&pmd->root_lock);
+
+       return r;
+}
+
+static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
+{
+       int r;
+       dm_block_t old_count;
+
+       r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count);
+       if (r)
+               return r;
+
+       if (new_count == old_count)
+               return 0;
+
+       if (new_count < old_count) {
+               DMERR("cannot reduce size of data device");
+               return -EINVAL;
+       }
+
+       r = dm_sm_extend(pmd->data_sm, new_count - old_count);
+       if (!r)
+               pmd->need_commit = 1;
+
+       return r;
+}
+
+int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
+{
+       int r;
+
+       down_write(&pmd->root_lock);
+       r = __resize_data_dev(pmd, new_count);
+       up_write(&pmd->root_lock);
+
+       return r;
+}
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
new file mode 100644 (file)
index 0000000..859c168
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2010-2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_THIN_METADATA_H
+#define DM_THIN_METADATA_H
+
+#include "persistent-data/dm-block-manager.h"
+
+#define THIN_METADATA_BLOCK_SIZE 4096
+
+/*----------------------------------------------------------------*/
+
+struct dm_pool_metadata;
+struct dm_thin_device;
+
+/*
+ * Device identifier
+ */
+typedef uint64_t dm_thin_id;
+
+/*
+ * Reopens or creates a new, empty metadata volume.
+ */
+struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
+                                              sector_t data_block_size);
+
+int dm_pool_metadata_close(struct dm_pool_metadata *pmd);
+
+/*
+ * Compat feature flags.  Any incompat flags beyond the ones
+ * specified below will prevent use of the thin metadata.
+ */
+#define THIN_FEATURE_COMPAT_SUPP         0UL
+#define THIN_FEATURE_COMPAT_RO_SUPP      0UL
+#define THIN_FEATURE_INCOMPAT_SUPP       0UL
+
+/*
+ * Device creation/deletion.
+ */
+int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev);
+
+/*
+ * An internal snapshot.
+ *
+ * You can only snapshot a quiesced origin i.e. one that is either
+ * suspended or not instanced at all.
+ */
+int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev,
+                       dm_thin_id origin);
+
+/*
+ * Deletes a virtual device from the metadata.  It _is_ safe to call this
+ * when that device is open.  Operations on that device will just start
+ * failing.  You still need to call close() on the device.
+ */
+int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
+                              dm_thin_id dev);
+
+/*
+ * Commits _all_ metadata changes: device creation, deletion, mapping
+ * updates.
+ */
+int dm_pool_commit_metadata(struct dm_pool_metadata *pmd);
+
+/*
+ * Set/get userspace transaction id.
+ */
+int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
+                                       uint64_t current_id,
+                                       uint64_t new_id);
+
+int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
+                                       uint64_t *result);
+
+/*
+ * Hold/get root for userspace transaction.
+ */
+int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
+
+int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
+                                  dm_block_t *result);
+
+/*
+ * Actions on a single virtual device.
+ */
+
+/*
+ * Opening the same device more than once will fail with -EBUSY.
+ */
+int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
+                            struct dm_thin_device **td);
+
+int dm_pool_close_thin_device(struct dm_thin_device *td);
+
+dm_thin_id dm_thin_dev_id(struct dm_thin_device *td);
+
+struct dm_thin_lookup_result {
+       dm_block_t block;
+       int shared;
+};
+
+/*
+ * Returns:
+ *   -EWOULDBLOCK iff @can_block is set and would block.
+ *   -ENODATA iff that mapping is not present.
+ *   0 success
+ */
+int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
+                      int can_block, struct dm_thin_lookup_result *result);
+
+/*
+ * Obtain an unused block.
+ */
+int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result);
+
+/*
+ * Insert or remove block.
+ */
+int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
+                        dm_block_t data_block);
+
+int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block);
+
+/*
+ * Queries.
+ */
+int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
+                                    dm_block_t *highest_mapped);
+
+int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result);
+
+int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd,
+                                dm_block_t *result);
+
+int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
+                                         dm_block_t *result);
+
+int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
+                                 dm_block_t *result);
+
+int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result);
+
+int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
+
+/*
+ * Returns -ENOSPC if the new size is too small and already allocated
+ * blocks would be lost.
+ */
+int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
+
+/*----------------------------------------------------------------*/
+
+#endif
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
new file mode 100644 (file)
index 0000000..c308757
--- /dev/null
@@ -0,0 +1,2428 @@
+/*
+ * Copyright (C) 2011 Red Hat UK.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-thin-metadata.h"
+
+#include <linux/device-mapper.h>
+#include <linux/dm-io.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define        DM_MSG_PREFIX   "thin"
+
+/*
+ * Tunable constants
+ */
+#define ENDIO_HOOK_POOL_SIZE 10240
+#define DEFERRED_SET_SIZE 64
+#define MAPPING_POOL_SIZE 1024
+#define PRISON_CELLS 1024
+
+/*
+ * The block size of the device holding pool data must be
+ * between 64KB and 1GB.
+ */
+#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
+#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
+
+/*
+ * The metadata device is currently limited in size.  The limitation is
+ * checked lower down in dm-space-map-metadata, but we also check it here
+ * so we can fail early.
+ *
+ * We have one block of index, which can hold 255 index entries.  Each
+ * index entry contains allocation info about 16k metadata blocks.
+ */
+#define METADATA_DEV_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
+
+/*
+ * Device id is restricted to 24 bits.
+ */
+#define MAX_DEV_ID ((1 << 24) - 1)
+
+/*
+ * How do we handle breaking sharing of data blocks?
+ * =================================================
+ *
+ * We use a standard copy-on-write btree to store the mappings for the
+ * devices (note I'm talking about copy-on-write of the metadata here, not
+ * the data).  When you take an internal snapshot you clone the root node
+ * of the origin btree.  After this there is no concept of an origin or a
+ * snapshot.  They are just two device trees that happen to point to the
+ * same data blocks.
+ *
+ * When we get a write in we decide if it's to a shared data block using
+ * some timestamp magic.  If it is, we have to break sharing.
+ *
+ * Let's say we write to a shared block in what was the origin.  The
+ * steps are:
+ *
+ * i) plug io further to this physical block. (see bio_prison code).
+ *
+ * ii) quiesce any read io to that shared data block.  Obviously
+ * including all devices that share this block.  (see deferred_set code)
+ *
+ * iii) copy the data block to a newly allocate block.  This step can be
+ * missed out if the io covers the block. (schedule_copy).
+ *
+ * iv) insert the new mapping into the origin's btree
+ * (process_prepared_mappings).  This act of inserting breaks some
+ * sharing of btree nodes between the two devices.  Breaking sharing only
+ * effects the btree of that specific device.  Btrees for the other
+ * devices that share the block never change.  The btree for the origin
+ * device as it was after the last commit is untouched, ie. we're using
+ * persistent data structures in the functional programming sense.
+ *
+ * v) unplug io to this physical block, including the io that triggered
+ * the breaking of sharing.
+ *
+ * Steps (ii) and (iii) occur in parallel.
+ *
+ * The metadata _doesn't_ need to be committed before the io continues.  We
+ * get away with this because the io is always written to a _new_ block.
+ * If there's a crash, then:
+ *
+ * - The origin mapping will point to the old origin block (the shared
+ * one).  This will contain the data as it was before the io that triggered
+ * the breaking of sharing came in.
+ *
+ * - The snap mapping still points to the old block.  As it would after
+ * the commit.
+ *
+ * The downside of this scheme is the timestamp magic isn't perfect, and
+ * will continue to think that data block in the snapshot device is shared
+ * even after the write to the origin has broken sharing.  I suspect data
+ * blocks will typically be shared by many different devices, so we're
+ * breaking sharing n + 1 times, rather than n, where n is the number of
+ * devices that reference this data block.  At the moment I think the
+ * benefits far, far outweigh the disadvantages.
+ */
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Sometimes we can't deal with a bio straight away.  We put them in prison
+ * where they can't cause any mischief.  Bios are put in a cell identified
+ * by a key, multiple bios can be in the same cell.  When the cell is
+ * subsequently unlocked the bios become available.
+ */
+struct bio_prison;
+
+struct cell_key {
+       int virtual;
+       dm_thin_id dev;
+       dm_block_t block;
+};
+
+struct cell {
+       struct hlist_node list;
+       struct bio_prison *prison;
+       struct cell_key key;
+       unsigned count;
+       struct bio_list bios;
+};
+
+struct bio_prison {
+       spinlock_t lock;
+       mempool_t *cell_pool;
+
+       unsigned nr_buckets;
+       unsigned hash_mask;
+       struct hlist_head *cells;
+};
+
+static uint32_t calc_nr_buckets(unsigned nr_cells)
+{
+       uint32_t n = 128;
+
+       nr_cells /= 4;
+       nr_cells = min(nr_cells, 8192u);
+
+       while (n < nr_cells)
+               n <<= 1;
+
+       return n;
+}
+
+/*
+ * @nr_cells should be the number of cells you want in use _concurrently_.
+ * Don't confuse it with the number of distinct keys.
+ */
+static struct bio_prison *prison_create(unsigned nr_cells)
+{
+       unsigned i;
+       uint32_t nr_buckets = calc_nr_buckets(nr_cells);
+       size_t len = sizeof(struct bio_prison) +
+               (sizeof(struct hlist_head) * nr_buckets);
+       struct bio_prison *prison = kmalloc(len, GFP_KERNEL);
+
+       if (!prison)
+               return NULL;
+
+       spin_lock_init(&prison->lock);
+       prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
+                                                       sizeof(struct cell));
+       if (!prison->cell_pool) {
+               kfree(prison);
+               return NULL;
+       }
+
+       prison->nr_buckets = nr_buckets;
+       prison->hash_mask = nr_buckets - 1;
+       prison->cells = (struct hlist_head *) (prison + 1);
+       for (i = 0; i < nr_buckets; i++)
+               INIT_HLIST_HEAD(prison->cells + i);
+
+       return prison;
+}
+
+static void prison_destroy(struct bio_prison *prison)
+{
+       mempool_destroy(prison->cell_pool);
+       kfree(prison);
+}
+
+static uint32_t hash_key(struct bio_prison *prison, struct cell_key *key)
+{
+       const unsigned long BIG_PRIME = 4294967291UL;
+       uint64_t hash = key->block * BIG_PRIME;
+
+       return (uint32_t) (hash & prison->hash_mask);
+}
+
+static int keys_equal(struct cell_key *lhs, struct cell_key *rhs)
+{
+              return (lhs->virtual == rhs->virtual) &&
+                      (lhs->dev == rhs->dev) &&
+                      (lhs->block == rhs->block);
+}
+
+static struct cell *__search_bucket(struct hlist_head *bucket,
+                                   struct cell_key *key)
+{
+       struct cell *cell;
+       struct hlist_node *tmp;
+
+       hlist_for_each_entry(cell, tmp, bucket, list)
+               if (keys_equal(&cell->key, key))
+                       return cell;
+
+       return NULL;
+}
+
+/*
+ * This may block if a new cell needs allocating.  You must ensure that
+ * cells will be unlocked even if the calling thread is blocked.
+ *
+ * Returns the number of entries in the cell prior to the new addition
+ * or < 0 on failure.
+ */
+static int bio_detain(struct bio_prison *prison, struct cell_key *key,
+                     struct bio *inmate, struct cell **ref)
+{
+       int r;
+       unsigned long flags;
+       uint32_t hash = hash_key(prison, key);
+       struct cell *uninitialized_var(cell), *cell2 = NULL;
+
+       BUG_ON(hash > prison->nr_buckets);
+
+       spin_lock_irqsave(&prison->lock, flags);
+       cell = __search_bucket(prison->cells + hash, key);
+
+       if (!cell) {
+               /*
+                * Allocate a new cell
+                */
+               spin_unlock_irqrestore(&prison->lock, flags);
+               cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
+               spin_lock_irqsave(&prison->lock, flags);
+
+               /*
+                * We've been unlocked, so we have to double check that
+                * nobody else has inserted this cell in the meantime.
+                */
+               cell = __search_bucket(prison->cells + hash, key);
+
+               if (!cell) {
+                       cell = cell2;
+                       cell2 = NULL;
+
+                       cell->prison = prison;
+                       memcpy(&cell->key, key, sizeof(cell->key));
+                       cell->count = 0;
+                       bio_list_init(&cell->bios);
+                       hlist_add_head(&cell->list, prison->cells + hash);
+               }
+       }
+
+       r = cell->count++;
+       bio_list_add(&cell->bios, inmate);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       if (cell2)
+               mempool_free(cell2, prison->cell_pool);
+
+       *ref = cell;
+
+       return r;
+}
+
+/*
+ * @inmates must have been initialised prior to this call
+ */
+static void __cell_release(struct cell *cell, struct bio_list *inmates)
+{
+       struct bio_prison *prison = cell->prison;
+
+       hlist_del(&cell->list);
+
+       if (inmates)
+               bio_list_merge(inmates, &cell->bios);
+
+       mempool_free(cell, prison->cell_pool);
+}
+
+static void cell_release(struct cell *cell, struct bio_list *bios)
+{
+       unsigned long flags;
+       struct bio_prison *prison = cell->prison;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       __cell_release(cell, bios);
+       spin_unlock_irqrestore(&prison->lock, flags);
+}
+
+/*
+ * There are a couple of places where we put a bio into a cell briefly
+ * before taking it out again.  In these situations we know that no other
+ * bio may be in the cell.  This function releases the cell, and also does
+ * a sanity check.
+ */
+static void cell_release_singleton(struct cell *cell, struct bio *bio)
+{
+       struct bio_prison *prison = cell->prison;
+       struct bio_list bios;
+       struct bio *b;
+       unsigned long flags;
+
+       bio_list_init(&bios);
+
+       spin_lock_irqsave(&prison->lock, flags);
+       __cell_release(cell, &bios);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       b = bio_list_pop(&bios);
+       BUG_ON(b != bio);
+       BUG_ON(!bio_list_empty(&bios));
+}
+
+static void cell_error(struct cell *cell)
+{
+       struct bio_prison *prison = cell->prison;
+       struct bio_list bios;
+       struct bio *bio;
+       unsigned long flags;
+
+       bio_list_init(&bios);
+
+       spin_lock_irqsave(&prison->lock, flags);
+       __cell_release(cell, &bios);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       while ((bio = bio_list_pop(&bios)))
+               bio_io_error(bio);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * We use the deferred set to keep track of pending reads to shared blocks.
+ * We do this to ensure the new mapping caused by a write isn't performed
+ * until these prior reads have completed.  Otherwise the insertion of the
+ * new mapping could free the old block that the read bios are mapped to.
+ */
+
+struct deferred_set;
+struct deferred_entry {
+       struct deferred_set *ds;
+       unsigned count;
+       struct list_head work_items;
+};
+
+struct deferred_set {
+       spinlock_t lock;
+       unsigned current_entry;
+       unsigned sweeper;
+       struct deferred_entry entries[DEFERRED_SET_SIZE];
+};
+
+static void ds_init(struct deferred_set *ds)
+{
+       int i;
+
+       spin_lock_init(&ds->lock);
+       ds->current_entry = 0;
+       ds->sweeper = 0;
+       for (i = 0; i < DEFERRED_SET_SIZE; i++) {
+               ds->entries[i].ds = ds;
+               ds->entries[i].count = 0;
+               INIT_LIST_HEAD(&ds->entries[i].work_items);
+       }
+}
+
+static struct deferred_entry *ds_inc(struct deferred_set *ds)
+{
+       unsigned long flags;
+       struct deferred_entry *entry;
+
+       spin_lock_irqsave(&ds->lock, flags);
+       entry = ds->entries + ds->current_entry;
+       entry->count++;
+       spin_unlock_irqrestore(&ds->lock, flags);
+
+       return entry;
+}
+
+static unsigned ds_next(unsigned index)
+{
+       return (index + 1) % DEFERRED_SET_SIZE;
+}
+
+static void __sweep(struct deferred_set *ds, struct list_head *head)
+{
+       while ((ds->sweeper != ds->current_entry) &&
+              !ds->entries[ds->sweeper].count) {
+               list_splice_init(&ds->entries[ds->sweeper].work_items, head);
+               ds->sweeper = ds_next(ds->sweeper);
+       }
+
+       if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
+               list_splice_init(&ds->entries[ds->sweeper].work_items, head);
+}
+
+static void ds_dec(struct deferred_entry *entry, struct list_head *head)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&entry->ds->lock, flags);
+       BUG_ON(!entry->count);
+       --entry->count;
+       __sweep(entry->ds, head);
+       spin_unlock_irqrestore(&entry->ds->lock, flags);
+}
+
+/*
+ * Returns 1 if deferred or 0 if no pending items to delay job.
+ */
+static int ds_add_work(struct deferred_set *ds, struct list_head *work)
+{
+       int r = 1;
+       unsigned long flags;
+       unsigned next_entry;
+
+       spin_lock_irqsave(&ds->lock, flags);
+       if ((ds->sweeper == ds->current_entry) &&
+           !ds->entries[ds->current_entry].count)
+               r = 0;
+       else {
+               list_add(work, &ds->entries[ds->current_entry].work_items);
+               next_entry = ds_next(ds->current_entry);
+               if (!ds->entries[next_entry].count)
+                       ds->current_entry = next_entry;
+       }
+       spin_unlock_irqrestore(&ds->lock, flags);
+
+       return r;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Key building.
+ */
+static void build_data_key(struct dm_thin_device *td,
+                          dm_block_t b, struct cell_key *key)
+{
+       key->virtual = 0;
+       key->dev = dm_thin_dev_id(td);
+       key->block = b;
+}
+
+static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
+                             struct cell_key *key)
+{
+       key->virtual = 1;
+       key->dev = dm_thin_dev_id(td);
+       key->block = b;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * A pool device ties together a metadata device and a data device.  It
+ * also provides the interface for creating and destroying internal
+ * devices.
+ */
+struct new_mapping;
+struct pool {
+       struct list_head list;
+       struct dm_target *ti;   /* Only set if a pool target is bound */
+
+       struct mapped_device *pool_md;
+       struct block_device *md_dev;
+       struct dm_pool_metadata *pmd;
+
+       uint32_t sectors_per_block;
+       unsigned block_shift;
+       dm_block_t offset_mask;
+       dm_block_t low_water_blocks;
+
+       unsigned zero_new_blocks:1;
+       unsigned low_water_triggered:1; /* A dm event has been sent */
+       unsigned no_free_space:1;       /* A -ENOSPC warning has been issued */
+
+       struct bio_prison *prison;
+       struct dm_kcopyd_client *copier;
+
+       struct workqueue_struct *wq;
+       struct work_struct worker;
+
+       unsigned ref_count;
+
+       spinlock_t lock;
+       struct bio_list deferred_bios;
+       struct bio_list deferred_flush_bios;
+       struct list_head prepared_mappings;
+
+       struct bio_list retry_on_resume_list;
+
+       struct deferred_set ds; /* FIXME: move to thin_c */
+
+       struct new_mapping *next_mapping;
+       mempool_t *mapping_pool;
+       mempool_t *endio_hook_pool;
+};
+
+/*
+ * Target context for a pool.
+ */
+struct pool_c {
+       struct dm_target *ti;
+       struct pool *pool;
+       struct dm_dev *data_dev;
+       struct dm_dev *metadata_dev;
+       struct dm_target_callbacks callbacks;
+
+       dm_block_t low_water_blocks;
+       unsigned zero_new_blocks:1;
+};
+
+/*
+ * Target context for a thin.
+ */
+struct thin_c {
+       struct dm_dev *pool_dev;
+       dm_thin_id dev_id;
+
+       struct pool *pool;
+       struct dm_thin_device *td;
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * A global list of pools that uses a struct mapped_device as a key.
+ */
+static struct dm_thin_pool_table {
+       struct mutex mutex;
+       struct list_head pools;
+} dm_thin_pool_table;
+
+static void pool_table_init(void)
+{
+       mutex_init(&dm_thin_pool_table.mutex);
+       INIT_LIST_HEAD(&dm_thin_pool_table.pools);
+}
+
+static void __pool_table_insert(struct pool *pool)
+{
+       BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
+       list_add(&pool->list, &dm_thin_pool_table.pools);
+}
+
+static void __pool_table_remove(struct pool *pool)
+{
+       BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
+       list_del(&pool->list);
+}
+
+static struct pool *__pool_table_lookup(struct mapped_device *md)
+{
+       struct pool *pool = NULL, *tmp;
+
+       BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
+
+       list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
+               if (tmp->pool_md == md) {
+                       pool = tmp;
+                       break;
+               }
+       }
+
+       return pool;
+}
+
+static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev)
+{
+       struct pool *pool = NULL, *tmp;
+
+       BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
+
+       list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
+               if (tmp->md_dev == md_dev) {
+                       pool = tmp;
+                       break;
+               }
+       }
+
+       return pool;
+}
+
+/*----------------------------------------------------------------*/
+
+static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
+{
+       struct bio *bio;
+       struct bio_list bios;
+
+       bio_list_init(&bios);
+       bio_list_merge(&bios, master);
+       bio_list_init(master);
+
+       while ((bio = bio_list_pop(&bios))) {
+               if (dm_get_mapinfo(bio)->ptr == tc)
+                       bio_endio(bio, DM_ENDIO_REQUEUE);
+               else
+                       bio_list_add(master, bio);
+       }
+}
+
+static void requeue_io(struct thin_c *tc)
+{
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       __requeue_bio_list(tc, &pool->deferred_bios);
+       __requeue_bio_list(tc, &pool->retry_on_resume_list);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+/*
+ * This section of code contains the logic for processing a thin device's IO.
+ * Much of the code depends on pool object resources (lists, workqueues, etc)
+ * but most is exclusively called from the thin target rather than the thin-pool
+ * target.
+ */
+
+static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
+{
+       return bio->bi_sector >> tc->pool->block_shift;
+}
+
+static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
+{
+       struct pool *pool = tc->pool;
+
+       bio->bi_bdev = tc->pool_dev->bdev;
+       bio->bi_sector = (block << pool->block_shift) +
+               (bio->bi_sector & pool->offset_mask);
+}
+
+static void remap_and_issue(struct thin_c *tc, struct bio *bio,
+                           dm_block_t block)
+{
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+
+       remap(tc, bio, block);
+
+       /*
+        * Batch together any FUA/FLUSH bios we find and then issue
+        * a single commit for them in process_deferred_bios().
+        */
+       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+               spin_lock_irqsave(&pool->lock, flags);
+               bio_list_add(&pool->deferred_flush_bios, bio);
+               spin_unlock_irqrestore(&pool->lock, flags);
+       } else
+               generic_make_request(bio);
+}
+
+/*
+ * wake_worker() is used when new work is queued and when pool_resume is
+ * ready to continue deferred IO processing.
+ */
+static void wake_worker(struct pool *pool)
+{
+       queue_work(pool->wq, &pool->worker);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Bio endio functions.
+ */
+struct endio_hook {
+       struct thin_c *tc;
+       bio_end_io_t *saved_bi_end_io;
+       struct deferred_entry *entry;
+};
+
+struct new_mapping {
+       struct list_head list;
+
+       int prepared;
+
+       struct thin_c *tc;
+       dm_block_t virt_block;
+       dm_block_t data_block;
+       struct cell *cell;
+       int err;
+
+       /*
+        * If the bio covers the whole area of a block then we can avoid
+        * zeroing or copying.  Instead this bio is hooked.  The bio will
+        * still be in the cell, so care has to be taken to avoid issuing
+        * the bio twice.
+        */
+       struct bio *bio;
+       bio_end_io_t *saved_bi_end_io;
+};
+
+static void __maybe_add_mapping(struct new_mapping *m)
+{
+       struct pool *pool = m->tc->pool;
+
+       if (list_empty(&m->list) && m->prepared) {
+               list_add(&m->list, &pool->prepared_mappings);
+               wake_worker(pool);
+       }
+}
+
+static void copy_complete(int read_err, unsigned long write_err, void *context)
+{
+       unsigned long flags;
+       struct new_mapping *m = context;
+       struct pool *pool = m->tc->pool;
+
+       m->err = read_err || write_err ? -EIO : 0;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       m->prepared = 1;
+       __maybe_add_mapping(m);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static void overwrite_endio(struct bio *bio, int err)
+{
+       unsigned long flags;
+       struct new_mapping *m = dm_get_mapinfo(bio)->ptr;
+       struct pool *pool = m->tc->pool;
+
+       m->err = err;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       m->prepared = 1;
+       __maybe_add_mapping(m);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static void shared_read_endio(struct bio *bio, int err)
+{
+       struct list_head mappings;
+       struct new_mapping *m, *tmp;
+       struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+       unsigned long flags;
+       struct pool *pool = h->tc->pool;
+
+       bio->bi_end_io = h->saved_bi_end_io;
+       bio_endio(bio, err);
+
+       INIT_LIST_HEAD(&mappings);
+       ds_dec(h->entry, &mappings);
+
+       spin_lock_irqsave(&pool->lock, flags);
+       list_for_each_entry_safe(m, tmp, &mappings, list) {
+               list_del(&m->list);
+               INIT_LIST_HEAD(&m->list);
+               __maybe_add_mapping(m);
+       }
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       mempool_free(h, pool->endio_hook_pool);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Workqueue.
+ */
+
+/*
+ * Prepared mapping jobs.
+ */
+
+/*
+ * This sends the bios in the cell back to the deferred_bios list.
+ */
+static void cell_defer(struct thin_c *tc, struct cell *cell,
+                      dm_block_t data_block)
+{
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       cell_release(cell, &pool->deferred_bios);
+       spin_unlock_irqrestore(&tc->pool->lock, flags);
+
+       wake_worker(pool);
+}
+
+/*
+ * Same as cell_defer above, except it omits one particular detainee,
+ * a write bio that covers the block and has already been processed.
+ */
+static void cell_defer_except(struct thin_c *tc, struct cell *cell,
+                             struct bio *exception)
+{
+       struct bio_list bios;
+       struct bio *bio;
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+
+       bio_list_init(&bios);
+       cell_release(cell, &bios);
+
+       spin_lock_irqsave(&pool->lock, flags);
+       while ((bio = bio_list_pop(&bios)))
+               if (bio != exception)
+                       bio_list_add(&pool->deferred_bios, bio);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       wake_worker(pool);
+}
+
+static void process_prepared_mapping(struct new_mapping *m)
+{
+       struct thin_c *tc = m->tc;
+       struct bio *bio;
+       int r;
+
+       bio = m->bio;
+       if (bio)
+               bio->bi_end_io = m->saved_bi_end_io;
+
+       if (m->err) {
+               cell_error(m->cell);
+               return;
+       }
+
+       /*
+        * Commit the prepared block into the mapping btree.
+        * Any I/O for this block arriving after this point will get
+        * remapped to it directly.
+        */
+       r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
+       if (r) {
+               DMERR("dm_thin_insert_block() failed");
+               cell_error(m->cell);
+               return;
+       }
+
+       /*
+        * Release any bios held while the block was being provisioned.
+        * If we are processing a write bio that completely covers the block,
+        * we already processed it so can ignore it now when processing
+        * the bios in the cell.
+        */
+       if (bio) {
+               cell_defer_except(tc, m->cell, bio);
+               bio_endio(bio, 0);
+       } else
+               cell_defer(tc, m->cell, m->data_block);
+
+       list_del(&m->list);
+       mempool_free(m, tc->pool->mapping_pool);
+}
+
+static void process_prepared_mappings(struct pool *pool)
+{
+       unsigned long flags;
+       struct list_head maps;
+       struct new_mapping *m, *tmp;
+
+       INIT_LIST_HEAD(&maps);
+       spin_lock_irqsave(&pool->lock, flags);
+       list_splice_init(&pool->prepared_mappings, &maps);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       list_for_each_entry_safe(m, tmp, &maps, list)
+               process_prepared_mapping(m);
+}
+
+/*
+ * Deferred bio jobs.
+ */
+static int io_overwrites_block(struct pool *pool, struct bio *bio)
+{
+       return ((bio_data_dir(bio) == WRITE) &&
+               !(bio->bi_sector & pool->offset_mask)) &&
+               (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
+}
+
+static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
+                              bio_end_io_t *fn)
+{
+       *save = bio->bi_end_io;
+       bio->bi_end_io = fn;
+}
+
+static int ensure_next_mapping(struct pool *pool)
+{
+       if (pool->next_mapping)
+               return 0;
+
+       pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
+
+       return pool->next_mapping ? 0 : -ENOMEM;
+}
+
+static struct new_mapping *get_next_mapping(struct pool *pool)
+{
+       struct new_mapping *r = pool->next_mapping;
+
+       BUG_ON(!pool->next_mapping);
+
+       pool->next_mapping = NULL;
+
+       return r;
+}
+
+static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
+                         dm_block_t data_origin, dm_block_t data_dest,
+                         struct cell *cell, struct bio *bio)
+{
+       int r;
+       struct pool *pool = tc->pool;
+       struct new_mapping *m = get_next_mapping(pool);
+
+       INIT_LIST_HEAD(&m->list);
+       m->prepared = 0;
+       m->tc = tc;
+       m->virt_block = virt_block;
+       m->data_block = data_dest;
+       m->cell = cell;
+       m->err = 0;
+       m->bio = NULL;
+
+       ds_add_work(&pool->ds, &m->list);
+
+       /*
+        * IO to pool_dev remaps to the pool target's data_dev.
+        *
+        * If the whole block of data is being overwritten, we can issue the
+        * bio immediately. Otherwise we use kcopyd to clone the data first.
+        */
+       if (io_overwrites_block(pool, bio)) {
+               m->bio = bio;
+               save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
+               dm_get_mapinfo(bio)->ptr = m;
+               remap_and_issue(tc, bio, data_dest);
+       } else {
+               struct dm_io_region from, to;
+
+               from.bdev = tc->pool_dev->bdev;
+               from.sector = data_origin * pool->sectors_per_block;
+               from.count = pool->sectors_per_block;
+
+               to.bdev = tc->pool_dev->bdev;
+               to.sector = data_dest * pool->sectors_per_block;
+               to.count = pool->sectors_per_block;
+
+               r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
+                                  0, copy_complete, m);
+               if (r < 0) {
+                       mempool_free(m, pool->mapping_pool);
+                       DMERR("dm_kcopyd_copy() failed");
+                       cell_error(cell);
+               }
+       }
+}
+
+static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
+                         dm_block_t data_block, struct cell *cell,
+                         struct bio *bio)
+{
+       struct pool *pool = tc->pool;
+       struct new_mapping *m = get_next_mapping(pool);
+
+       INIT_LIST_HEAD(&m->list);
+       m->prepared = 0;
+       m->tc = tc;
+       m->virt_block = virt_block;
+       m->data_block = data_block;
+       m->cell = cell;
+       m->err = 0;
+       m->bio = NULL;
+
+       /*
+        * If the whole block of data is being overwritten or we are not
+        * zeroing pre-existing data, we can issue the bio immediately.
+        * Otherwise we use kcopyd to zero the data first.
+        */
+       if (!pool->zero_new_blocks)
+               process_prepared_mapping(m);
+
+       else if (io_overwrites_block(pool, bio)) {
+               m->bio = bio;
+               save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
+               dm_get_mapinfo(bio)->ptr = m;
+               remap_and_issue(tc, bio, data_block);
+
+       } else {
+               int r;
+               struct dm_io_region to;
+
+               to.bdev = tc->pool_dev->bdev;
+               to.sector = data_block * pool->sectors_per_block;
+               to.count = pool->sectors_per_block;
+
+               r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
+               if (r < 0) {
+                       mempool_free(m, pool->mapping_pool);
+                       DMERR("dm_kcopyd_zero() failed");
+                       cell_error(cell);
+               }
+       }
+}
+
+static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
+{
+       int r;
+       dm_block_t free_blocks;
+       unsigned long flags;
+       struct pool *pool = tc->pool;
+
+       r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
+       if (r)
+               return r;
+
+       if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
+               DMWARN("%s: reached low water mark, sending event.",
+                      dm_device_name(pool->pool_md));
+               spin_lock_irqsave(&pool->lock, flags);
+               pool->low_water_triggered = 1;
+               spin_unlock_irqrestore(&pool->lock, flags);
+               dm_table_event(pool->ti->table);
+       }
+
+       if (!free_blocks) {
+               if (pool->no_free_space)
+                       return -ENOSPC;
+               else {
+                       /*
+                        * Try to commit to see if that will free up some
+                        * more space.
+                        */
+                       r = dm_pool_commit_metadata(pool->pmd);
+                       if (r) {
+                               DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+                                     __func__, r);
+                               return r;
+                       }
+
+                       r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
+                       if (r)
+                               return r;
+
+                       /*
+                        * If we still have no space we set a flag to avoid
+                        * doing all this checking and return -ENOSPC.
+                        */
+                       if (!free_blocks) {
+                               DMWARN("%s: no free space available.",
+                                      dm_device_name(pool->pool_md));
+                               spin_lock_irqsave(&pool->lock, flags);
+                               pool->no_free_space = 1;
+                               spin_unlock_irqrestore(&pool->lock, flags);
+                               return -ENOSPC;
+                       }
+               }
+       }
+
+       r = dm_pool_alloc_data_block(pool->pmd, result);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+/*
+ * If we have run out of space, queue bios until the device is
+ * resumed, presumably after having been reloaded with more space.
+ */
+static void retry_on_resume(struct bio *bio)
+{
+       struct thin_c *tc = dm_get_mapinfo(bio)->ptr;
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       bio_list_add(&pool->retry_on_resume_list, bio);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static void no_space(struct cell *cell)
+{
+       struct bio *bio;
+       struct bio_list bios;
+
+       bio_list_init(&bios);
+       cell_release(cell, &bios);
+
+       while ((bio = bio_list_pop(&bios)))
+               retry_on_resume(bio);
+}
+
+static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
+                         struct cell_key *key,
+                         struct dm_thin_lookup_result *lookup_result,
+                         struct cell *cell)
+{
+       int r;
+       dm_block_t data_block;
+
+       r = alloc_data_block(tc, &data_block);
+       switch (r) {
+       case 0:
+               schedule_copy(tc, block, lookup_result->block,
+                             data_block, cell, bio);
+               break;
+
+       case -ENOSPC:
+               no_space(cell);
+               break;
+
+       default:
+               DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
+               cell_error(cell);
+               break;
+       }
+}
+
+static void process_shared_bio(struct thin_c *tc, struct bio *bio,
+                              dm_block_t block,
+                              struct dm_thin_lookup_result *lookup_result)
+{
+       struct cell *cell;
+       struct pool *pool = tc->pool;
+       struct cell_key key;
+
+       /*
+        * If cell is already occupied, then sharing is already in the process
+        * of being broken so we have nothing further to do here.
+        */
+       build_data_key(tc->td, lookup_result->block, &key);
+       if (bio_detain(pool->prison, &key, bio, &cell))
+               return;
+
+       if (bio_data_dir(bio) == WRITE)
+               break_sharing(tc, bio, block, &key, lookup_result, cell);
+       else {
+               struct endio_hook *h;
+               h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
+
+               h->tc = tc;
+               h->entry = ds_inc(&pool->ds);
+               save_and_set_endio(bio, &h->saved_bi_end_io, shared_read_endio);
+               dm_get_mapinfo(bio)->ptr = h;
+
+               cell_release_singleton(cell, bio);
+               remap_and_issue(tc, bio, lookup_result->block);
+       }
+}
+
+static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block,
+                           struct cell *cell)
+{
+       int r;
+       dm_block_t data_block;
+
+       /*
+        * Remap empty bios (flushes) immediately, without provisioning.
+        */
+       if (!bio->bi_size) {
+               cell_release_singleton(cell, bio);
+               remap_and_issue(tc, bio, 0);
+               return;
+       }
+
+       /*
+        * Fill read bios with zeroes and complete them immediately.
+        */
+       if (bio_data_dir(bio) == READ) {
+               zero_fill_bio(bio);
+               cell_release_singleton(cell, bio);
+               bio_endio(bio, 0);
+               return;
+       }
+
+       r = alloc_data_block(tc, &data_block);
+       switch (r) {
+       case 0:
+               schedule_zero(tc, block, data_block, cell, bio);
+               break;
+
+       case -ENOSPC:
+               no_space(cell);
+               break;
+
+       default:
+               DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
+               cell_error(cell);
+               break;
+       }
+}
+
+static void process_bio(struct thin_c *tc, struct bio *bio)
+{
+       int r;
+       dm_block_t block = get_bio_block(tc, bio);
+       struct cell *cell;
+       struct cell_key key;
+       struct dm_thin_lookup_result lookup_result;
+
+       /*
+        * If cell is already occupied, then the block is already
+        * being provisioned so we have nothing further to do here.
+        */
+       build_virtual_key(tc->td, block, &key);
+       if (bio_detain(tc->pool->prison, &key, bio, &cell))
+               return;
+
+       r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
+       switch (r) {
+       case 0:
+               /*
+                * We can release this cell now.  This thread is the only
+                * one that puts bios into a cell, and we know there were
+                * no preceding bios.
+                */
+               /*
+                * TODO: this will probably have to change when discard goes
+                * back in.
+                */
+               cell_release_singleton(cell, bio);
+
+               if (lookup_result.shared)
+                       process_shared_bio(tc, bio, block, &lookup_result);
+               else
+                       remap_and_issue(tc, bio, lookup_result.block);
+               break;
+
+       case -ENODATA:
+               provision_block(tc, bio, block, cell);
+               break;
+
+       default:
+               DMERR("dm_thin_find_block() failed, error = %d", r);
+               bio_io_error(bio);
+               break;
+       }
+}
+
+static void process_deferred_bios(struct pool *pool)
+{
+       unsigned long flags;
+       struct bio *bio;
+       struct bio_list bios;
+       int r;
+
+       bio_list_init(&bios);
+
+       spin_lock_irqsave(&pool->lock, flags);
+       bio_list_merge(&bios, &pool->deferred_bios);
+       bio_list_init(&pool->deferred_bios);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       while ((bio = bio_list_pop(&bios))) {
+               struct thin_c *tc = dm_get_mapinfo(bio)->ptr;
+               /*
+                * If we've got no free new_mapping structs, and processing
+                * this bio might require one, we pause until there are some
+                * prepared mappings to process.
+                */
+               if (ensure_next_mapping(pool)) {
+                       spin_lock_irqsave(&pool->lock, flags);
+                       bio_list_merge(&pool->deferred_bios, &bios);
+                       spin_unlock_irqrestore(&pool->lock, flags);
+
+                       break;
+               }
+               process_bio(tc, bio);
+       }
+
+       /*
+        * If there are any deferred flush bios, we must commit
+        * the metadata before issuing them.
+        */
+       bio_list_init(&bios);
+       spin_lock_irqsave(&pool->lock, flags);
+       bio_list_merge(&bios, &pool->deferred_flush_bios);
+       bio_list_init(&pool->deferred_flush_bios);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       if (bio_list_empty(&bios))
+               return;
+
+       r = dm_pool_commit_metadata(pool->pmd);
+       if (r) {
+               DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+                     __func__, r);
+               while ((bio = bio_list_pop(&bios)))
+                       bio_io_error(bio);
+               return;
+       }
+
+       while ((bio = bio_list_pop(&bios)))
+               generic_make_request(bio);
+}
+
+static void do_worker(struct work_struct *ws)
+{
+       struct pool *pool = container_of(ws, struct pool, worker);
+
+       process_prepared_mappings(pool);
+       process_deferred_bios(pool);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Mapping functions.
+ */
+
+/*
+ * Called only while mapping a thin bio to hand it over to the workqueue.
+ */
+static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
+{
+       unsigned long flags;
+       struct pool *pool = tc->pool;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       bio_list_add(&pool->deferred_bios, bio);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       wake_worker(pool);
+}
+
+/*
+ * Non-blocking function called from the thin target's map function.
+ */
+static int thin_bio_map(struct dm_target *ti, struct bio *bio,
+                       union map_info *map_context)
+{
+       int r;
+       struct thin_c *tc = ti->private;
+       dm_block_t block = get_bio_block(tc, bio);
+       struct dm_thin_device *td = tc->td;
+       struct dm_thin_lookup_result result;
+
+       /*
+        * Save the thin context for easy access from the deferred bio later.
+        */
+       map_context->ptr = tc;
+
+       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+               thin_defer_bio(tc, bio);
+               return DM_MAPIO_SUBMITTED;
+       }
+
+       r = dm_thin_find_block(td, block, 0, &result);
+
+       /*
+        * Note that we defer readahead too.
+        */
+       switch (r) {
+       case 0:
+               if (unlikely(result.shared)) {
+                       /*
+                        * We have a race condition here between the
+                        * result.shared value returned by the lookup and
+                        * snapshot creation, which may cause new
+                        * sharing.
+                        *
+                        * To avoid this always quiesce the origin before
+                        * taking the snap.  You want to do this anyway to
+                        * ensure a consistent application view
+                        * (i.e. lockfs).
+                        *
+                        * More distant ancestors are irrelevant. The
+                        * shared flag will be set in their case.
+                        */
+                       thin_defer_bio(tc, bio);
+                       r = DM_MAPIO_SUBMITTED;
+               } else {
+                       remap(tc, bio, result.block);
+                       r = DM_MAPIO_REMAPPED;
+               }
+               break;
+
+       case -ENODATA:
+               /*
+                * In future, the failed dm_thin_find_block above could
+                * provide the hint to load the metadata into cache.
+                */
+       case -EWOULDBLOCK:
+               thin_defer_bio(tc, bio);
+               r = DM_MAPIO_SUBMITTED;
+               break;
+       }
+
+       return r;
+}
+
+static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
+{
+       int r;
+       unsigned long flags;
+       struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
+
+       spin_lock_irqsave(&pt->pool->lock, flags);
+       r = !bio_list_empty(&pt->pool->retry_on_resume_list);
+       spin_unlock_irqrestore(&pt->pool->lock, flags);
+
+       if (!r) {
+               struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
+               r = bdi_congested(&q->backing_dev_info, bdi_bits);
+       }
+
+       return r;
+}
+
+static void __requeue_bios(struct pool *pool)
+{
+       bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
+       bio_list_init(&pool->retry_on_resume_list);
+}
+
+/*----------------------------------------------------------------
+ * Binding of control targets to a pool object
+ *--------------------------------------------------------------*/
+static int bind_control_target(struct pool *pool, struct dm_target *ti)
+{
+       struct pool_c *pt = ti->private;
+
+       pool->ti = ti;
+       pool->low_water_blocks = pt->low_water_blocks;
+       pool->zero_new_blocks = pt->zero_new_blocks;
+
+       return 0;
+}
+
+static void unbind_control_target(struct pool *pool, struct dm_target *ti)
+{
+       if (pool->ti == ti)
+               pool->ti = NULL;
+}
+
+/*----------------------------------------------------------------
+ * Pool creation
+ *--------------------------------------------------------------*/
+static void __pool_destroy(struct pool *pool)
+{
+       __pool_table_remove(pool);
+
+       if (dm_pool_metadata_close(pool->pmd) < 0)
+               DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
+
+       prison_destroy(pool->prison);
+       dm_kcopyd_client_destroy(pool->copier);
+
+       if (pool->wq)
+               destroy_workqueue(pool->wq);
+
+       if (pool->next_mapping)
+               mempool_free(pool->next_mapping, pool->mapping_pool);
+       mempool_destroy(pool->mapping_pool);
+       mempool_destroy(pool->endio_hook_pool);
+       kfree(pool);
+}
+
+static struct pool *pool_create(struct mapped_device *pool_md,
+                               struct block_device *metadata_dev,
+                               unsigned long block_size, char **error)
+{
+       int r;
+       void *err_p;
+       struct pool *pool;
+       struct dm_pool_metadata *pmd;
+
+       pmd = dm_pool_metadata_open(metadata_dev, block_size);
+       if (IS_ERR(pmd)) {
+               *error = "Error creating metadata object";
+               return (struct pool *)pmd;
+       }
+
+       pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool) {
+               *error = "Error allocating memory for pool";
+               err_p = ERR_PTR(-ENOMEM);
+               goto bad_pool;
+       }
+
+       pool->pmd = pmd;
+       pool->sectors_per_block = block_size;
+       pool->block_shift = ffs(block_size) - 1;
+       pool->offset_mask = block_size - 1;
+       pool->low_water_blocks = 0;
+       pool->zero_new_blocks = 1;
+       pool->prison = prison_create(PRISON_CELLS);
+       if (!pool->prison) {
+               *error = "Error creating pool's bio prison";
+               err_p = ERR_PTR(-ENOMEM);
+               goto bad_prison;
+       }
+
+       pool->copier = dm_kcopyd_client_create();
+       if (IS_ERR(pool->copier)) {
+               r = PTR_ERR(pool->copier);
+               *error = "Error creating pool's kcopyd client";
+               err_p = ERR_PTR(r);
+               goto bad_kcopyd_client;
+       }
+
+       /*
+        * Create singlethreaded workqueue that will service all devices
+        * that use this metadata.
+        */
+       pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
+       if (!pool->wq) {
+               *error = "Error creating pool's workqueue";
+               err_p = ERR_PTR(-ENOMEM);
+               goto bad_wq;
+       }
+
+       INIT_WORK(&pool->worker, do_worker);
+       spin_lock_init(&pool->lock);
+       bio_list_init(&pool->deferred_bios);
+       bio_list_init(&pool->deferred_flush_bios);
+       INIT_LIST_HEAD(&pool->prepared_mappings);
+       pool->low_water_triggered = 0;
+       pool->no_free_space = 0;
+       bio_list_init(&pool->retry_on_resume_list);
+       ds_init(&pool->ds);
+
+       pool->next_mapping = NULL;
+       pool->mapping_pool =
+               mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
+       if (!pool->mapping_pool) {
+               *error = "Error creating pool's mapping mempool";
+               err_p = ERR_PTR(-ENOMEM);
+               goto bad_mapping_pool;
+       }
+
+       pool->endio_hook_pool =
+               mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
+       if (!pool->endio_hook_pool) {
+               *error = "Error creating pool's endio_hook mempool";
+               err_p = ERR_PTR(-ENOMEM);
+               goto bad_endio_hook_pool;
+       }
+       pool->ref_count = 1;
+       pool->pool_md = pool_md;
+       pool->md_dev = metadata_dev;
+       __pool_table_insert(pool);
+
+       return pool;
+
+bad_endio_hook_pool:
+       mempool_destroy(pool->mapping_pool);
+bad_mapping_pool:
+       destroy_workqueue(pool->wq);
+bad_wq:
+       dm_kcopyd_client_destroy(pool->copier);
+bad_kcopyd_client:
+       prison_destroy(pool->prison);
+bad_prison:
+       kfree(pool);
+bad_pool:
+       if (dm_pool_metadata_close(pmd))
+               DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
+
+       return err_p;
+}
+
+static void __pool_inc(struct pool *pool)
+{
+       BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
+       pool->ref_count++;
+}
+
+static void __pool_dec(struct pool *pool)
+{
+       BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
+       BUG_ON(!pool->ref_count);
+       if (!--pool->ref_count)
+               __pool_destroy(pool);
+}
+
+static struct pool *__pool_find(struct mapped_device *pool_md,
+                               struct block_device *metadata_dev,
+                               unsigned long block_size, char **error)
+{
+       struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
+
+       if (pool) {
+               if (pool->pool_md != pool_md)
+                       return ERR_PTR(-EBUSY);
+               __pool_inc(pool);
+
+       } else {
+               pool = __pool_table_lookup(pool_md);
+               if (pool) {
+                       if (pool->md_dev != metadata_dev)
+                               return ERR_PTR(-EINVAL);
+                       __pool_inc(pool);
+
+               } else
+                       pool = pool_create(pool_md, metadata_dev, block_size, error);
+       }
+
+       return pool;
+}
+
+/*----------------------------------------------------------------
+ * Pool target methods
+ *--------------------------------------------------------------*/
+static void pool_dtr(struct dm_target *ti)
+{
+       struct pool_c *pt = ti->private;
+
+       mutex_lock(&dm_thin_pool_table.mutex);
+
+       unbind_control_target(pt->pool, ti);
+       __pool_dec(pt->pool);
+       dm_put_device(ti, pt->metadata_dev);
+       dm_put_device(ti, pt->data_dev);
+       kfree(pt);
+
+       mutex_unlock(&dm_thin_pool_table.mutex);
+}
+
+struct pool_features {
+       unsigned zero_new_blocks:1;
+};
+
+static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
+                              struct dm_target *ti)
+{
+       int r;
+       unsigned argc;
+       const char *arg_name;
+
+       static struct dm_arg _args[] = {
+               {0, 1, "Invalid number of pool feature arguments"},
+       };
+
+       /*
+        * No feature arguments supplied.
+        */
+       if (!as->argc)
+               return 0;
+
+       r = dm_read_arg_group(_args, as, &argc, &ti->error);
+       if (r)
+               return -EINVAL;
+
+       while (argc && !r) {
+               arg_name = dm_shift_arg(as);
+               argc--;
+
+               if (!strcasecmp(arg_name, "skip_block_zeroing")) {
+                       pf->zero_new_blocks = 0;
+                       continue;
+               }
+
+               ti->error = "Unrecognised pool feature requested";
+               r = -EINVAL;
+       }
+
+       return r;
+}
+
+/*
+ * thin-pool <metadata dev> <data dev>
+ *          <data block size (sectors)>
+ *          <low water mark (blocks)>
+ *          [<#feature args> [<arg>]*]
+ *
+ * Optional feature arguments are:
+ *          skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
+ */
+static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+       int r;
+       struct pool_c *pt;
+       struct pool *pool;
+       struct pool_features pf;
+       struct dm_arg_set as;
+       struct dm_dev *data_dev;
+       unsigned long block_size;
+       dm_block_t low_water_blocks;
+       struct dm_dev *metadata_dev;
+       sector_t metadata_dev_size;
+
+       /*
+        * FIXME Remove validation from scope of lock.
+        */
+       mutex_lock(&dm_thin_pool_table.mutex);
+
+       if (argc < 4) {
+               ti->error = "Invalid argument count";
+               r = -EINVAL;
+               goto out_unlock;
+       }
+       as.argc = argc;
+       as.argv = argv;
+
+       r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &metadata_dev);
+       if (r) {
+               ti->error = "Error opening metadata block device";
+               goto out_unlock;
+       }
+
+       metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
+       if (metadata_dev_size > METADATA_DEV_MAX_SECTORS) {
+               ti->error = "Metadata device is too large";
+               r = -EINVAL;
+               goto out_metadata;
+       }
+
+       r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
+       if (r) {
+               ti->error = "Error getting data device";
+               goto out_metadata;
+       }
+
+       if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
+           block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
+           block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
+           !is_power_of_2(block_size)) {
+               ti->error = "Invalid block size";
+               r = -EINVAL;
+               goto out;
+       }
+
+       if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
+               ti->error = "Invalid low water mark";
+               r = -EINVAL;
+               goto out;
+       }
+
+       /*
+        * Set default pool features.
+        */
+       memset(&pf, 0, sizeof(pf));
+       pf.zero_new_blocks = 1;
+
+       dm_consume_args(&as, 4);
+       r = parse_pool_features(&as, &pf, ti);
+       if (r)
+               goto out;
+
+       pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+       if (!pt) {
+               r = -ENOMEM;
+               goto out;
+       }
+
+       pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
+                          block_size, &ti->error);
+       if (IS_ERR(pool)) {
+               r = PTR_ERR(pool);
+               goto out_free_pt;
+       }
+
+       pt->pool = pool;
+       pt->ti = ti;
+       pt->metadata_dev = metadata_dev;
+       pt->data_dev = data_dev;
+       pt->low_water_blocks = low_water_blocks;
+       pt->zero_new_blocks = pf.zero_new_blocks;
+       ti->num_flush_requests = 1;
+       ti->num_discard_requests = 0;
+       ti->private = pt;
+
+       pt->callbacks.congested_fn = pool_is_congested;
+       dm_table_add_target_callbacks(ti->table, &pt->callbacks);
+
+       mutex_unlock(&dm_thin_pool_table.mutex);
+
+       return 0;
+
+out_free_pt:
+       kfree(pt);
+out:
+       dm_put_device(ti, data_dev);
+out_metadata:
+       dm_put_device(ti, metadata_dev);
+out_unlock:
+       mutex_unlock(&dm_thin_pool_table.mutex);
+
+       return r;
+}
+
+static int pool_map(struct dm_target *ti, struct bio *bio,
+                   union map_info *map_context)
+{
+       int r;
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+       unsigned long flags;
+
+       /*
+        * As this is a singleton target, ti->begin is always zero.
+        */
+       spin_lock_irqsave(&pool->lock, flags);
+       bio->bi_bdev = pt->data_dev->bdev;
+       r = DM_MAPIO_REMAPPED;
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       return r;
+}
+
+/*
+ * Retrieves the number of blocks of the data device from
+ * the superblock and compares it to the actual device size,
+ * thus resizing the data device in case it has grown.
+ *
+ * This both copes with opening preallocated data devices in the ctr
+ * being followed by a resume
+ * -and-
+ * calling the resume method individually after userspace has
+ * grown the data device in reaction to a table event.
+ */
+static int pool_preresume(struct dm_target *ti)
+{
+       int r;
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+       dm_block_t data_size, sb_data_size;
+
+       /*
+        * Take control of the pool object.
+        */
+       r = bind_control_target(pool, ti);
+       if (r)
+               return r;
+
+       data_size = ti->len >> pool->block_shift;
+       r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
+       if (r) {
+               DMERR("failed to retrieve data device size");
+               return r;
+       }
+
+       if (data_size < sb_data_size) {
+               DMERR("pool target too small, is %llu blocks (expected %llu)",
+                     data_size, sb_data_size);
+               return -EINVAL;
+
+       } else if (data_size > sb_data_size) {
+               r = dm_pool_resize_data_dev(pool->pmd, data_size);
+               if (r) {
+                       DMERR("failed to resize data device");
+                       return r;
+               }
+
+               r = dm_pool_commit_metadata(pool->pmd);
+               if (r) {
+                       DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+                             __func__, r);
+                       return r;
+               }
+       }
+
+       return 0;
+}
+
+static void pool_resume(struct dm_target *ti)
+{
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       pool->low_water_triggered = 0;
+       pool->no_free_space = 0;
+       __requeue_bios(pool);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       wake_worker(pool);
+}
+
+static void pool_postsuspend(struct dm_target *ti)
+{
+       int r;
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+
+       flush_workqueue(pool->wq);
+
+       r = dm_pool_commit_metadata(pool->pmd);
+       if (r < 0) {
+               DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+                     __func__, r);
+               /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
+       }
+}
+
+static int check_arg_count(unsigned argc, unsigned args_required)
+{
+       if (argc != args_required) {
+               DMWARN("Message received with %u arguments instead of %u.",
+                      argc, args_required);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int read_dev_id(char *arg, dm_thin_id *dev_id, int warning)
+{
+       if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
+           *dev_id <= MAX_DEV_ID)
+               return 0;
+
+       if (warning)
+               DMWARN("Message received with invalid device id: %s", arg);
+
+       return -EINVAL;
+}
+
+static int process_create_thin_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+       dm_thin_id dev_id;
+       int r;
+
+       r = check_arg_count(argc, 2);
+       if (r)
+               return r;
+
+       r = read_dev_id(argv[1], &dev_id, 1);
+       if (r)
+               return r;
+
+       r = dm_pool_create_thin(pool->pmd, dev_id);
+       if (r) {
+               DMWARN("Creation of new thinly-provisioned device with id %s failed.",
+                      argv[1]);
+               return r;
+       }
+
+       return 0;
+}
+
+static int process_create_snap_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+       dm_thin_id dev_id;
+       dm_thin_id origin_dev_id;
+       int r;
+
+       r = check_arg_count(argc, 3);
+       if (r)
+               return r;
+
+       r = read_dev_id(argv[1], &dev_id, 1);
+       if (r)
+               return r;
+
+       r = read_dev_id(argv[2], &origin_dev_id, 1);
+       if (r)
+               return r;
+
+       r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
+       if (r) {
+               DMWARN("Creation of new snapshot %s of device %s failed.",
+                      argv[1], argv[2]);
+               return r;
+       }
+
+       return 0;
+}
+
+static int process_delete_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+       dm_thin_id dev_id;
+       int r;
+
+       r = check_arg_count(argc, 2);
+       if (r)
+               return r;
+
+       r = read_dev_id(argv[1], &dev_id, 1);
+       if (r)
+               return r;
+
+       r = dm_pool_delete_thin_device(pool->pmd, dev_id);
+       if (r)
+               DMWARN("Deletion of thin device %s failed.", argv[1]);
+
+       return r;
+}
+
+static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+       dm_thin_id old_id, new_id;
+       int r;
+
+       r = check_arg_count(argc, 3);
+       if (r)
+               return r;
+
+       if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
+               DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
+               return -EINVAL;
+       }
+
+       if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
+               DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
+               return -EINVAL;
+       }
+
+       r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
+       if (r) {
+               DMWARN("Failed to change transaction id from %s to %s.",
+                      argv[1], argv[2]);
+               return r;
+       }
+
+       return 0;
+}
+
+/*
+ * Messages supported:
+ *   create_thin       <dev_id>
+ *   create_snap       <dev_id> <origin_id>
+ *   delete            <dev_id>
+ *   trim              <dev_id> <new_size_in_sectors>
+ *   set_transaction_id <current_trans_id> <new_trans_id>
+ */
+static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+       int r = -EINVAL;
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+
+       if (!strcasecmp(argv[0], "create_thin"))
+               r = process_create_thin_mesg(argc, argv, pool);
+
+       else if (!strcasecmp(argv[0], "create_snap"))
+               r = process_create_snap_mesg(argc, argv, pool);
+
+       else if (!strcasecmp(argv[0], "delete"))
+               r = process_delete_mesg(argc, argv, pool);
+
+       else if (!strcasecmp(argv[0], "set_transaction_id"))
+               r = process_set_transaction_id_mesg(argc, argv, pool);
+
+       else
+               DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
+
+       if (!r) {
+               r = dm_pool_commit_metadata(pool->pmd);
+               if (r)
+                       DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
+                             argv[0], r);
+       }
+
+       return r;
+}
+
+/*
+ * Status line is:
+ *    <transaction id> <used metadata sectors>/<total metadata sectors>
+ *    <used data sectors>/<total data sectors> <held metadata root>
+ */
+static int pool_status(struct dm_target *ti, status_type_t type,
+                      char *result, unsigned maxlen)
+{
+       int r;
+       unsigned sz = 0;
+       uint64_t transaction_id;
+       dm_block_t nr_free_blocks_data;
+       dm_block_t nr_free_blocks_metadata;
+       dm_block_t nr_blocks_data;
+       dm_block_t nr_blocks_metadata;
+       dm_block_t held_root;
+       char buf[BDEVNAME_SIZE];
+       char buf2[BDEVNAME_SIZE];
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+
+       switch (type) {
+       case STATUSTYPE_INFO:
+               r = dm_pool_get_metadata_transaction_id(pool->pmd,
+                                                       &transaction_id);
+               if (r)
+                       return r;
+
+               r = dm_pool_get_free_metadata_block_count(pool->pmd,
+                                                         &nr_free_blocks_metadata);
+               if (r)
+                       return r;
+
+               r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
+               if (r)
+                       return r;
+
+               r = dm_pool_get_free_block_count(pool->pmd,
+                                                &nr_free_blocks_data);
+               if (r)
+                       return r;
+
+               r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
+               if (r)
+                       return r;
+
+               r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
+               if (r)
+                       return r;
+
+               DMEMIT("%llu %llu/%llu %llu/%llu ",
+                      (unsigned long long)transaction_id,
+                      (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
+                      (unsigned long long)nr_blocks_metadata,
+                      (unsigned long long)(nr_blocks_data - nr_free_blocks_data),
+                      (unsigned long long)nr_blocks_data);
+
+               if (held_root)
+                       DMEMIT("%llu", held_root);
+               else
+                       DMEMIT("-");
+
+               break;
+
+       case STATUSTYPE_TABLE:
+               DMEMIT("%s %s %lu %llu ",
+                      format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
+                      format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
+                      (unsigned long)pool->sectors_per_block,
+                      (unsigned long long)pt->low_water_blocks);
+
+               DMEMIT("%u ", !pool->zero_new_blocks);
+
+               if (!pool->zero_new_blocks)
+                       DMEMIT("skip_block_zeroing ");
+               break;
+       }
+
+       return 0;
+}
+
+static int pool_iterate_devices(struct dm_target *ti,
+                               iterate_devices_callout_fn fn, void *data)
+{
+       struct pool_c *pt = ti->private;
+
+       return fn(ti, pt->data_dev, 0, ti->len, data);
+}
+
+static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
+                     struct bio_vec *biovec, int max_size)
+{
+       struct pool_c *pt = ti->private;
+       struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
+
+       if (!q->merge_bvec_fn)
+               return max_size;
+
+       bvm->bi_bdev = pt->data_dev->bdev;
+
+       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
+}
+
+static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+       struct pool_c *pt = ti->private;
+       struct pool *pool = pt->pool;
+
+       blk_limits_io_min(limits, 0);
+       blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+}
+
+static struct target_type pool_target = {
+       .name = "thin-pool",
+       .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
+                   DM_TARGET_IMMUTABLE,
+       .version = {1, 0, 0},
+       .module = THIS_MODULE,
+       .ctr = pool_ctr,
+       .dtr = pool_dtr,
+       .map = pool_map,
+       .postsuspend = pool_postsuspend,
+       .preresume = pool_preresume,
+       .resume = pool_resume,
+       .message = pool_message,
+       .status = pool_status,
+       .merge = pool_merge,
+       .iterate_devices = pool_iterate_devices,
+       .io_hints = pool_io_hints,
+};
+
+/*----------------------------------------------------------------
+ * Thin target methods
+ *--------------------------------------------------------------*/
+static void thin_dtr(struct dm_target *ti)
+{
+       struct thin_c *tc = ti->private;
+
+       mutex_lock(&dm_thin_pool_table.mutex);
+
+       __pool_dec(tc->pool);
+       dm_pool_close_thin_device(tc->td);
+       dm_put_device(ti, tc->pool_dev);
+       kfree(tc);
+
+       mutex_unlock(&dm_thin_pool_table.mutex);
+}
+
+/*
+ * Thin target parameters:
+ *
+ * <pool_dev> <dev_id>
+ *
+ * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
+ * dev_id: the internal device identifier
+ */
+static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+       int r;
+       struct thin_c *tc;
+       struct dm_dev *pool_dev;
+       struct mapped_device *pool_md;
+
+       mutex_lock(&dm_thin_pool_table.mutex);
+
+       if (argc != 2) {
+               ti->error = "Invalid argument count";
+               r = -EINVAL;
+               goto out_unlock;
+       }
+
+       tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
+       if (!tc) {
+               ti->error = "Out of memory";
+               r = -ENOMEM;
+               goto out_unlock;
+       }
+
+       r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
+       if (r) {
+               ti->error = "Error opening pool device";
+               goto bad_pool_dev;
+       }
+       tc->pool_dev = pool_dev;
+
+       if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
+               ti->error = "Invalid device id";
+               r = -EINVAL;
+               goto bad_common;
+       }
+
+       pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
+       if (!pool_md) {
+               ti->error = "Couldn't get pool mapped device";
+               r = -EINVAL;
+               goto bad_common;
+       }
+
+       tc->pool = __pool_table_lookup(pool_md);
+       if (!tc->pool) {
+               ti->error = "Couldn't find pool object";
+               r = -EINVAL;
+               goto bad_pool_lookup;
+       }
+       __pool_inc(tc->pool);
+
+       r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
+       if (r) {
+               ti->error = "Couldn't open thin internal device";
+               goto bad_thin_open;
+       }
+
+       ti->split_io = tc->pool->sectors_per_block;
+       ti->num_flush_requests = 1;
+       ti->num_discard_requests = 0;
+       ti->discards_supported = 0;
+
+       dm_put(pool_md);
+
+       mutex_unlock(&dm_thin_pool_table.mutex);
+
+       return 0;
+
+bad_thin_open:
+       __pool_dec(tc->pool);
+bad_pool_lookup:
+       dm_put(pool_md);
+bad_common:
+       dm_put_device(ti, tc->pool_dev);
+bad_pool_dev:
+       kfree(tc);
+out_unlock:
+       mutex_unlock(&dm_thin_pool_table.mutex);
+
+       return r;
+}
+
+static int thin_map(struct dm_target *ti, struct bio *bio,
+                   union map_info *map_context)
+{
+       bio->bi_sector -= ti->begin;
+
+       return thin_bio_map(ti, bio, map_context);
+}
+
+static void thin_postsuspend(struct dm_target *ti)
+{
+       if (dm_noflush_suspending(ti))
+               requeue_io((struct thin_c *)ti->private);
+}
+
+/*
+ * <nr mapped sectors> <highest mapped sector>
+ */
+static int thin_status(struct dm_target *ti, status_type_t type,
+                      char *result, unsigned maxlen)
+{
+       int r;
+       ssize_t sz = 0;
+       dm_block_t mapped, highest;
+       char buf[BDEVNAME_SIZE];
+       struct thin_c *tc = ti->private;
+
+       if (!tc->td)
+               DMEMIT("-");
+       else {
+               switch (type) {
+               case STATUSTYPE_INFO:
+                       r = dm_thin_get_mapped_count(tc->td, &mapped);
+                       if (r)
+                               return r;
+
+                       r = dm_thin_get_highest_mapped_block(tc->td, &highest);
+                       if (r < 0)
+                               return r;
+
+                       DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
+                       if (r)
+                               DMEMIT("%llu", ((highest + 1) *
+                                               tc->pool->sectors_per_block) - 1);
+                       else
+                               DMEMIT("-");
+                       break;
+
+               case STATUSTYPE_TABLE:
+                       DMEMIT("%s %lu",
+                              format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
+                              (unsigned long) tc->dev_id);
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+static int thin_iterate_devices(struct dm_target *ti,
+                               iterate_devices_callout_fn fn, void *data)
+{
+       dm_block_t blocks;
+       struct thin_c *tc = ti->private;
+
+       /*
+        * We can't call dm_pool_get_data_dev_size() since that blocks.  So
+        * we follow a more convoluted path through to the pool's target.
+        */
+       if (!tc->pool->ti)
+               return 0;       /* nothing is bound */
+
+       blocks = tc->pool->ti->len >> tc->pool->block_shift;
+       if (blocks)
+               return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
+
+       return 0;
+}
+
+static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+       struct thin_c *tc = ti->private;
+
+       blk_limits_io_min(limits, 0);
+       blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT);
+}
+
+static struct target_type thin_target = {
+       .name = "thin",
+       .version = {1, 0, 0},
+       .module = THIS_MODULE,
+       .ctr = thin_ctr,
+       .dtr = thin_dtr,
+       .map = thin_map,
+       .postsuspend = thin_postsuspend,
+       .status = thin_status,
+       .iterate_devices = thin_iterate_devices,
+       .io_hints = thin_io_hints,
+};
+
+/*----------------------------------------------------------------*/
+
+static int __init dm_thin_init(void)
+{
+       int r;
+
+       pool_table_init();
+
+       r = dm_register_target(&thin_target);
+       if (r)
+               return r;
+
+       r = dm_register_target(&pool_target);
+       if (r)
+               dm_unregister_target(&thin_target);
+
+       return r;
+}
+
+static void dm_thin_exit(void)
+{
+       dm_unregister_target(&thin_target);
+       dm_unregister_target(&pool_target);
+}
+
+module_init(dm_thin_init);
+module_exit(dm_thin_exit);
+
+MODULE_DESCRIPTION(DM_NAME "device-mapper thin provisioning target");
+MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
index 52b39f335bb38549045f46eae8cad3714c867c5d..6b6616a41baaa63c546af3ed53181d3271d09b82 100644 (file)
 
 #define DM_MSG_PREFIX "core"
 
+#ifdef CONFIG_PRINTK
+/*
+ * ratelimit state to be used in DMXXX_LIMIT().
+ */
+DEFINE_RATELIMIT_STATE(dm_ratelimit_state,
+                      DEFAULT_RATELIMIT_INTERVAL,
+                      DEFAULT_RATELIMIT_BURST);
+EXPORT_SYMBOL(dm_ratelimit_state);
+#endif
+
 /*
  * Cookies are numeric values sent with CHANGE and REMOVE
  * uevents while resuming, removing or renaming the device.
@@ -130,6 +140,8 @@ struct mapped_device {
        /* Protect queue and type against concurrent access. */
        struct mutex type_lock;
 
+       struct target_type *immutable_target_type;
+
        struct gendisk *disk;
        char name[16];
 
@@ -2086,6 +2098,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
        write_lock_irqsave(&md->map_lock, flags);
        old_map = md->map;
        md->map = t;
+       md->immutable_target_type = dm_table_get_immutable_target_type(t);
+
        dm_table_set_restrictions(t, q, limits);
        if (merge_is_optional)
                set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
@@ -2156,6 +2170,11 @@ unsigned dm_get_md_type(struct mapped_device *md)
        return md->type;
 }
 
+struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
+{
+       return md->immutable_target_type;
+}
+
 /*
  * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
  */
@@ -2231,6 +2250,7 @@ struct mapped_device *dm_get_md(dev_t dev)
 
        return md;
 }
+EXPORT_SYMBOL_GPL(dm_get_md);
 
 void *dm_get_mdptr(struct mapped_device *md)
 {
@@ -2316,7 +2336,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
        while (1) {
                set_current_state(interruptible);
 
-               smp_mb();
                if (!md_in_flight(md))
                        break;
 
index 6745dbd278a4ffc463188c72cab4f301a74adc74..b7dacd59d8d7534fbc73584609b5644a20ed4d84 100644 (file)
@@ -60,6 +60,7 @@ int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 int dm_table_any_busy_target(struct dm_table *t);
 unsigned dm_table_get_type(struct dm_table *t);
+struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
 bool dm_table_supports_discards(struct dm_table *t);
 int dm_table_alloc_md_mempools(struct dm_table *t);
@@ -72,6 +73,7 @@ void dm_lock_md_type(struct mapped_device *md);
 void dm_unlock_md_type(struct mapped_device *md);
 void dm_set_md_type(struct mapped_device *md, unsigned type);
 unsigned dm_get_md_type(struct mapped_device *md);
+struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
 
 int dm_setup_md_queue(struct mapped_device *md);
 
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
new file mode 100644 (file)
index 0000000..ceb3590
--- /dev/null
@@ -0,0 +1,8 @@
+config DM_PERSISTENT_DATA
+       tristate
+       depends on BLK_DEV_DM && EXPERIMENTAL
+       select LIBCRC32C
+       select DM_BUFIO
+       ---help---
+        Library providing immutable on-disk data structure support for
+        device-mapper targets such as the thin provisioning target.
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile
new file mode 100644 (file)
index 0000000..cfa95f6
--- /dev/null
@@ -0,0 +1,11 @@
+obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
+dm-persistent-data-objs := \
+       dm-block-manager.o \
+       dm-space-map-checker.o \
+       dm-space-map-common.o \
+       dm-space-map-disk.o \
+       dm-space-map-metadata.o \
+       dm-transaction-manager.o \
+       dm-btree.o \
+       dm-btree-remove.o \
+       dm-btree-spine.o
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
new file mode 100644 (file)
index 0000000..0317ecd
--- /dev/null
@@ -0,0 +1,620 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#include "dm-block-manager.h"
+#include "dm-persistent-data-internal.h"
+#include "../dm-bufio.h"
+
+#include <linux/crc32c.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/device-mapper.h>
+#include <linux/stacktrace.h>
+
+#define DM_MSG_PREFIX "block manager"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * This is a read/write semaphore with a couple of differences.
+ *
+ * i) There is a restriction on the number of concurrent read locks that
+ * may be held at once.  This is just an implementation detail.
+ *
+ * ii) Recursive locking attempts are detected and return EINVAL.  A stack
+ * trace is also emitted for the previous lock aquisition.
+ *
+ * iii) Priority is given to write locks.
+ */
+#define MAX_HOLDERS 4
+#define MAX_STACK 10
+
+typedef unsigned long stack_entries[MAX_STACK];
+
+struct block_lock {
+       spinlock_t lock;
+       __s32 count;
+       struct list_head waiters;
+       struct task_struct *holders[MAX_HOLDERS];
+
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       struct stack_trace traces[MAX_HOLDERS];
+       stack_entries entries[MAX_HOLDERS];
+#endif
+};
+
+struct waiter {
+       struct list_head list;
+       struct task_struct *task;
+       int wants_write;
+};
+
+static unsigned __find_holder(struct block_lock *lock,
+                             struct task_struct *task)
+{
+       unsigned i;
+
+       for (i = 0; i < MAX_HOLDERS; i++)
+               if (lock->holders[i] == task)
+                       break;
+
+       BUG_ON(i == MAX_HOLDERS);
+       return i;
+}
+
+/* call this *after* you increment lock->count */
+static void __add_holder(struct block_lock *lock, struct task_struct *task)
+{
+       unsigned h = __find_holder(lock, NULL);
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       struct stack_trace *t;
+#endif
+
+       get_task_struct(task);
+       lock->holders[h] = task;
+
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       t = lock->traces + h;
+       t->nr_entries = 0;
+       t->max_entries = MAX_STACK;
+       t->entries = lock->entries[h];
+       t->skip = 2;
+       save_stack_trace(t);
+#endif
+}
+
+/* call this *before* you decrement lock->count */
+static void __del_holder(struct block_lock *lock, struct task_struct *task)
+{
+       unsigned h = __find_holder(lock, task);
+       lock->holders[h] = NULL;
+       put_task_struct(task);
+}
+
+static int __check_holder(struct block_lock *lock)
+{
+       unsigned i;
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+       static struct stack_trace t;
+       static stack_entries entries;
+#endif
+
+       for (i = 0; i < MAX_HOLDERS; i++) {
+               if (lock->holders[i] == current) {
+                       DMERR("recursive lock detected in pool metadata");
+#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
+                       DMERR("previously held here:");
+                       print_stack_trace(lock->traces + i, 4);
+
+                       DMERR("subsequent aquisition attempted here:");
+                       t.nr_entries = 0;
+                       t.max_entries = MAX_STACK;
+                       t.entries = entries;
+                       t.skip = 3;
+                       save_stack_trace(&t);
+                       print_stack_trace(&t, 4);
+#endif
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static void __wait(struct waiter *w)
+{
+       for (;;) {
+               set_task_state(current, TASK_UNINTERRUPTIBLE);
+
+               if (!w->task)
+                       break;
+
+               schedule();
+       }
+
+       set_task_state(current, TASK_RUNNING);
+}
+
+static void __wake_waiter(struct waiter *w)
+{
+       struct task_struct *task;
+
+       list_del(&w->list);
+       task = w->task;
+       smp_mb();
+       w->task = NULL;
+       wake_up_process(task);
+}
+
+/*
+ * We either wake a few readers or a single writer.
+ */
+static void __wake_many(struct block_lock *lock)
+{
+       struct waiter *w, *tmp;
+
+       BUG_ON(lock->count < 0);
+       list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
+               if (lock->count >= MAX_HOLDERS)
+                       return;
+
+               if (w->wants_write) {
+                       if (lock->count > 0)
+                               return; /* still read locked */
+
+                       lock->count = -1;
+                       __add_holder(lock, w->task);
+                       __wake_waiter(w);
+                       return;
+               }
+
+               lock->count++;
+               __add_holder(lock, w->task);
+               __wake_waiter(w);
+       }
+}
+
+static void bl_init(struct block_lock *lock)
+{
+       int i;
+
+       spin_lock_init(&lock->lock);
+       lock->count = 0;
+       INIT_LIST_HEAD(&lock->waiters);
+       for (i = 0; i < MAX_HOLDERS; i++)
+               lock->holders[i] = NULL;
+}
+
+static int __available_for_read(struct block_lock *lock)
+{
+       return lock->count >= 0 &&
+               lock->count < MAX_HOLDERS &&
+               list_empty(&lock->waiters);
+}
+
+static int bl_down_read(struct block_lock *lock)
+{
+       int r;
+       struct waiter w;
+
+       spin_lock(&lock->lock);
+       r = __check_holder(lock);
+       if (r) {
+               spin_unlock(&lock->lock);
+               return r;
+       }
+
+       if (__available_for_read(lock)) {
+               lock->count++;
+               __add_holder(lock, current);
+               spin_unlock(&lock->lock);
+               return 0;
+       }
+
+       get_task_struct(current);
+
+       w.task = current;
+       w.wants_write = 0;
+       list_add_tail(&w.list, &lock->waiters);
+       spin_unlock(&lock->lock);
+
+       __wait(&w);
+       put_task_struct(current);
+       return 0;
+}
+
+static int bl_down_read_nonblock(struct block_lock *lock)
+{
+       int r;
+
+       spin_lock(&lock->lock);
+       r = __check_holder(lock);
+       if (r)
+               goto out;
+
+       if (__available_for_read(lock)) {
+               lock->count++;
+               __add_holder(lock, current);
+               r = 0;
+       } else
+               r = -EWOULDBLOCK;
+
+out:
+       spin_unlock(&lock->lock);
+       return r;
+}
+
+static void bl_up_read(struct block_lock *lock)
+{
+       spin_lock(&lock->lock);
+       BUG_ON(lock->count <= 0);
+       __del_holder(lock, current);
+       --lock->count;
+       if (!list_empty(&lock->waiters))
+               __wake_many(lock);
+       spin_unlock(&lock->lock);
+}
+
+static int bl_down_write(struct block_lock *lock)
+{
+       int r;
+       struct waiter w;
+
+       spin_lock(&lock->lock);
+       r = __check_holder(lock);
+       if (r) {
+               spin_unlock(&lock->lock);
+               return r;
+       }
+
+       if (lock->count == 0 && list_empty(&lock->waiters)) {
+               lock->count = -1;
+               __add_holder(lock, current);
+               spin_unlock(&lock->lock);
+               return 0;
+       }
+
+       get_task_struct(current);
+       w.task = current;
+       w.wants_write = 1;
+
+       /*
+        * Writers given priority. We know there's only one mutator in the
+        * system, so ignoring the ordering reversal.
+        */
+       list_add(&w.list, &lock->waiters);
+       spin_unlock(&lock->lock);
+
+       __wait(&w);
+       put_task_struct(current);
+
+       return 0;
+}
+
+static void bl_up_write(struct block_lock *lock)
+{
+       spin_lock(&lock->lock);
+       __del_holder(lock, current);
+       lock->count = 0;
+       if (!list_empty(&lock->waiters))
+               __wake_many(lock);
+       spin_unlock(&lock->lock);
+}
+
+static void report_recursive_bug(dm_block_t b, int r)
+{
+       if (r == -EINVAL)
+               DMERR("recursive acquisition of block %llu requested.",
+                     (unsigned long long) b);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Block manager is currently implemented using dm-bufio.  struct
+ * dm_block_manager and struct dm_block map directly onto a couple of
+ * structs in the bufio interface.  I want to retain the freedom to move
+ * away from bufio in the future.  So these structs are just cast within
+ * this .c file, rather than making it through to the public interface.
+ */
+static struct dm_buffer *to_buffer(struct dm_block *b)
+{
+       return (struct dm_buffer *) b;
+}
+
+static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm)
+{
+       return (struct dm_bufio_client *) bm;
+}
+
+dm_block_t dm_block_location(struct dm_block *b)
+{
+       return dm_bufio_get_block_number(to_buffer(b));
+}
+EXPORT_SYMBOL_GPL(dm_block_location);
+
+void *dm_block_data(struct dm_block *b)
+{
+       return dm_bufio_get_block_data(to_buffer(b));
+}
+EXPORT_SYMBOL_GPL(dm_block_data);
+
+struct buffer_aux {
+       struct dm_block_validator *validator;
+       struct block_lock lock;
+       int write_locked;
+};
+
+static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
+{
+       struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
+       aux->validator = NULL;
+       bl_init(&aux->lock);
+}
+
+static void dm_block_manager_write_callback(struct dm_buffer *buf)
+{
+       struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
+       if (aux->validator) {
+               aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
+                        dm_bufio_get_block_size(dm_bufio_get_client(buf)));
+       }
+}
+
+/*----------------------------------------------------------------
+ * Public interface
+ *--------------------------------------------------------------*/
+struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
+                                                unsigned block_size,
+                                                unsigned cache_size,
+                                                unsigned max_held_per_thread)
+{
+       return (struct dm_block_manager *)
+               dm_bufio_client_create(bdev, block_size, max_held_per_thread,
+                                      sizeof(struct buffer_aux),
+                                      dm_block_manager_alloc_callback,
+                                      dm_block_manager_write_callback);
+}
+EXPORT_SYMBOL_GPL(dm_block_manager_create);
+
+void dm_block_manager_destroy(struct dm_block_manager *bm)
+{
+       return dm_bufio_client_destroy(to_bufio(bm));
+}
+EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
+
+unsigned dm_bm_block_size(struct dm_block_manager *bm)
+{
+       return dm_bufio_get_block_size(to_bufio(bm));
+}
+EXPORT_SYMBOL_GPL(dm_bm_block_size);
+
+dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
+{
+       return dm_bufio_get_device_size(to_bufio(bm));
+}
+
+static int dm_bm_validate_buffer(struct dm_block_manager *bm,
+                                struct dm_buffer *buf,
+                                struct buffer_aux *aux,
+                                struct dm_block_validator *v)
+{
+       if (unlikely(!aux->validator)) {
+               int r;
+               if (!v)
+                       return 0;
+               r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm)));
+               if (unlikely(r))
+                       return r;
+               aux->validator = v;
+       } else {
+               if (unlikely(aux->validator != v)) {
+                       DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
+                               aux->validator->name, v ? v->name : "NULL",
+                               (unsigned long long)
+                                       dm_bufio_get_block_number(buf));
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
+                   struct dm_block_validator *v,
+                   struct dm_block **result)
+{
+       struct buffer_aux *aux;
+       void *p;
+       int r;
+
+       p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
+       if (unlikely(IS_ERR(p)))
+               return PTR_ERR(p);
+
+       aux = dm_bufio_get_aux_data(to_buffer(*result));
+       r = bl_down_read(&aux->lock);
+       if (unlikely(r)) {
+               dm_bufio_release(to_buffer(*result));
+               report_recursive_bug(b, r);
+               return r;
+       }
+
+       aux->write_locked = 0;
+
+       r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
+       if (unlikely(r)) {
+               bl_up_read(&aux->lock);
+               dm_bufio_release(to_buffer(*result));
+               return r;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bm_read_lock);
+
+int dm_bm_write_lock(struct dm_block_manager *bm,
+                    dm_block_t b, struct dm_block_validator *v,
+                    struct dm_block **result)
+{
+       struct buffer_aux *aux;
+       void *p;
+       int r;
+
+       p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
+       if (unlikely(IS_ERR(p)))
+               return PTR_ERR(p);
+
+       aux = dm_bufio_get_aux_data(to_buffer(*result));
+       r = bl_down_write(&aux->lock);
+       if (r) {
+               dm_bufio_release(to_buffer(*result));
+               report_recursive_bug(b, r);
+               return r;
+       }
+
+       aux->write_locked = 1;
+
+       r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
+       if (unlikely(r)) {
+               bl_up_write(&aux->lock);
+               dm_bufio_release(to_buffer(*result));
+               return r;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bm_write_lock);
+
+int dm_bm_read_try_lock(struct dm_block_manager *bm,
+                       dm_block_t b, struct dm_block_validator *v,
+                       struct dm_block **result)
+{
+       struct buffer_aux *aux;
+       void *p;
+       int r;
+
+       p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result);
+       if (unlikely(IS_ERR(p)))
+               return PTR_ERR(p);
+       if (unlikely(!p))
+               return -EWOULDBLOCK;
+
+       aux = dm_bufio_get_aux_data(to_buffer(*result));
+       r = bl_down_read_nonblock(&aux->lock);
+       if (r < 0) {
+               dm_bufio_release(to_buffer(*result));
+               report_recursive_bug(b, r);
+               return r;
+       }
+       aux->write_locked = 0;
+
+       r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
+       if (unlikely(r)) {
+               bl_up_read(&aux->lock);
+               dm_bufio_release(to_buffer(*result));
+               return r;
+       }
+
+       return 0;
+}
+
+int dm_bm_write_lock_zero(struct dm_block_manager *bm,
+                         dm_block_t b, struct dm_block_validator *v,
+                         struct dm_block **result)
+{
+       int r;
+       struct buffer_aux *aux;
+       void *p;
+
+       p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result);
+       if (unlikely(IS_ERR(p)))
+               return PTR_ERR(p);
+
+       memset(p, 0, dm_bm_block_size(bm));
+
+       aux = dm_bufio_get_aux_data(to_buffer(*result));
+       r = bl_down_write(&aux->lock);
+       if (r) {
+               dm_bufio_release(to_buffer(*result));
+               return r;
+       }
+
+       aux->write_locked = 1;
+       aux->validator = v;
+
+       return 0;
+}
+
+int dm_bm_unlock(struct dm_block *b)
+{
+       struct buffer_aux *aux;
+       aux = dm_bufio_get_aux_data(to_buffer(b));
+
+       if (aux->write_locked) {
+               dm_bufio_mark_buffer_dirty(to_buffer(b));
+               bl_up_write(&aux->lock);
+       } else
+               bl_up_read(&aux->lock);
+
+       dm_bufio_release(to_buffer(b));
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bm_unlock);
+
+int dm_bm_unlock_move(struct dm_block *b, dm_block_t n)
+{
+       struct buffer_aux *aux;
+
+       aux = dm_bufio_get_aux_data(to_buffer(b));
+
+       if (aux->write_locked) {
+               dm_bufio_mark_buffer_dirty(to_buffer(b));
+               bl_up_write(&aux->lock);
+       } else
+               bl_up_read(&aux->lock);
+
+       dm_bufio_release_move(to_buffer(b), n);
+       return 0;
+}
+
+int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
+                          struct dm_block *superblock)
+{
+       int r;
+
+       r = dm_bufio_write_dirty_buffers(to_bufio(bm));
+       if (unlikely(r))
+               return r;
+       r = dm_bufio_issue_flush(to_bufio(bm));
+       if (unlikely(r))
+               return r;
+
+       dm_bm_unlock(superblock);
+
+       r = dm_bufio_write_dirty_buffers(to_bufio(bm));
+       if (unlikely(r))
+               return r;
+       r = dm_bufio_issue_flush(to_bufio(bm));
+       if (unlikely(r))
+               return r;
+
+       return 0;
+}
+
+u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
+{
+       return crc32c(~(u32) 0, data, len) ^ init_xor;
+}
+EXPORT_SYMBOL_GPL(dm_bm_checksum);
+
+/*----------------------------------------------------------------*/
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_DESCRIPTION("Immutable metadata library for dm");
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
new file mode 100644 (file)
index 0000000..924833d
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _LINUX_DM_BLOCK_MANAGER_H
+#define _LINUX_DM_BLOCK_MANAGER_H
+
+#include <linux/types.h>
+#include <linux/blkdev.h>
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Block number.
+ */
+typedef uint64_t dm_block_t;
+struct dm_block;
+
+dm_block_t dm_block_location(struct dm_block *b);
+void *dm_block_data(struct dm_block *b);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * @name should be a unique identifier for the block manager, no longer
+ * than 32 chars.
+ *
+ * @max_held_per_thread should be the maximum number of locks, read or
+ * write, that an individual thread holds at any one time.
+ */
+struct dm_block_manager;
+struct dm_block_manager *dm_block_manager_create(
+       struct block_device *bdev, unsigned block_size,
+       unsigned cache_size, unsigned max_held_per_thread);
+void dm_block_manager_destroy(struct dm_block_manager *bm);
+
+unsigned dm_bm_block_size(struct dm_block_manager *bm);
+dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The validator allows the caller to verify newly-read data and modify
+ * the data just before writing, e.g. to calculate checksums.  It's
+ * important to be consistent with your use of validators.  The only time
+ * you can change validators is if you call dm_bm_write_lock_zero.
+ */
+struct dm_block_validator {
+       const char *name;
+       void (*prepare_for_write)(struct dm_block_validator *v, struct dm_block *b, size_t block_size);
+
+       /*
+        * Return 0 if the checksum is valid or < 0 on error.
+        */
+       int (*check)(struct dm_block_validator *v, struct dm_block *b, size_t block_size);
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * You can have multiple concurrent readers or a single writer holding a
+ * block lock.
+ */
+
+/*
+ * dm_bm_lock() locks a block and returns through @result a pointer to
+ * memory that holds a copy of that block.  If you have write-locked the
+ * block then any changes you make to memory pointed to by @result will be
+ * written back to the disk sometime after dm_bm_unlock is called.
+ */
+int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
+                   struct dm_block_validator *v,
+                   struct dm_block **result);
+
+int dm_bm_write_lock(struct dm_block_manager *bm, dm_block_t b,
+                    struct dm_block_validator *v,
+                    struct dm_block **result);
+
+/*
+ * The *_try_lock variants return -EWOULDBLOCK if the block isn't
+ * available immediately.
+ */
+int dm_bm_read_try_lock(struct dm_block_manager *bm, dm_block_t b,
+                       struct dm_block_validator *v,
+                       struct dm_block **result);
+
+/*
+ * Use dm_bm_write_lock_zero() when you know you're going to
+ * overwrite the block completely.  It saves a disk read.
+ */
+int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b,
+                         struct dm_block_validator *v,
+                         struct dm_block **result);
+
+int dm_bm_unlock(struct dm_block *b);
+
+/*
+ * An optimisation; we often want to copy a block's contents to a new
+ * block.  eg, as part of the shadowing operation.  It's far better for
+ * bufio to do this move behind the scenes than hold 2 locks and memcpy the
+ * data.
+ */
+int dm_bm_unlock_move(struct dm_block *b, dm_block_t n);
+
+/*
+ * It's a common idiom to have a superblock that should be committed last.
+ *
+ * @superblock should be write-locked on entry. It will be unlocked during
+ * this function.  All dirty blocks are guaranteed to be written and flushed
+ * before the superblock.
+ *
+ * This method always blocks.
+ */
+int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
+                          struct dm_block *superblock);
+
+u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
+
+/*----------------------------------------------------------------*/
+
+#endif /* _LINUX_DM_BLOCK_MANAGER_H */
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
new file mode 100644 (file)
index 0000000..d279c76
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_BTREE_INTERNAL_H
+#define DM_BTREE_INTERNAL_H
+
+#include "dm-btree.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * We'll need 2 accessor functions for n->csum and n->blocknr
+ * to support dm-btree-spine.c in that case.
+ */
+
+enum node_flags {
+       INTERNAL_NODE = 1,
+       LEAF_NODE = 1 << 1
+};
+
+/*
+ * Every btree node begins with this structure.  Make sure it's a multiple
+ * of 8-bytes in size, otherwise the 64bit keys will be mis-aligned.
+ */
+struct node_header {
+       __le32 csum;
+       __le32 flags;
+       __le64 blocknr; /* Block this node is supposed to live in. */
+
+       __le32 nr_entries;
+       __le32 max_entries;
+       __le32 value_size;
+       __le32 padding;
+} __packed;
+
+struct node {
+       struct node_header header;
+       __le64 keys[0];
+} __packed;
+
+
+void inc_children(struct dm_transaction_manager *tm, struct node *n,
+                 struct dm_btree_value_type *vt);
+
+int new_block(struct dm_btree_info *info, struct dm_block **result);
+int unlock_block(struct dm_btree_info *info, struct dm_block *b);
+
+/*
+ * Spines keep track of the rolling locks.  There are 2 variants, read-only
+ * and one that uses shadowing.  These are separate structs to allow the
+ * type checker to spot misuse, for example accidentally calling read_lock
+ * on a shadow spine.
+ */
+struct ro_spine {
+       struct dm_btree_info *info;
+
+       int count;
+       struct dm_block *nodes[2];
+};
+
+void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
+int exit_ro_spine(struct ro_spine *s);
+int ro_step(struct ro_spine *s, dm_block_t new_child);
+struct node *ro_node(struct ro_spine *s);
+
+struct shadow_spine {
+       struct dm_btree_info *info;
+
+       int count;
+       struct dm_block *nodes[2];
+
+       dm_block_t root;
+};
+
+void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info);
+int exit_shadow_spine(struct shadow_spine *s);
+
+int shadow_step(struct shadow_spine *s, dm_block_t b,
+               struct dm_btree_value_type *vt);
+
+/*
+ * The spine must have at least one entry before calling this.
+ */
+struct dm_block *shadow_current(struct shadow_spine *s);
+
+/*
+ * The spine must have at least two entries before calling this.
+ */
+struct dm_block *shadow_parent(struct shadow_spine *s);
+
+int shadow_has_parent(struct shadow_spine *s);
+
+int shadow_root(struct shadow_spine *s);
+
+/*
+ * Some inlines.
+ */
+static inline __le64 *key_ptr(struct node *n, uint32_t index)
+{
+       return n->keys + index;
+}
+
+static inline void *value_base(struct node *n)
+{
+       return &n->keys[le32_to_cpu(n->header.max_entries)];
+}
+
+/*
+ * FIXME: Now that value size is stored in node we don't need the third parm.
+ */
+static inline void *value_ptr(struct node *n, uint32_t index, size_t value_size)
+{
+       BUG_ON(value_size != le32_to_cpu(n->header.value_size));
+       return value_base(n) + (value_size * index);
+}
+
+/*
+ * Assumes the values are suitably-aligned and converts to core format.
+ */
+static inline uint64_t value64(struct node *n, uint32_t index)
+{
+       __le64 *values_le = value_base(n);
+
+       return le64_to_cpu(values_le[index]);
+}
+
+/*
+ * Searching for a key within a single node.
+ */
+int lower_bound(struct node *n, uint64_t key);
+
+extern struct dm_block_validator btree_node_validator;
+
+#endif /* DM_BTREE_INTERNAL_H */
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
new file mode 100644 (file)
index 0000000..65fd85e
--- /dev/null
@@ -0,0 +1,566 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree.h"
+#include "dm-btree-internal.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/module.h>
+
+/*
+ * Removing an entry from a btree
+ * ==============================
+ *
+ * A very important constraint for our btree is that no node, except the
+ * root, may have fewer than a certain number of entries.
+ * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
+ *
+ * Ensuring this is complicated by the way we want to only ever hold the
+ * locks on 2 nodes concurrently, and only change nodes in a top to bottom
+ * fashion.
+ *
+ * Each node may have a left or right sibling.  When decending the spine,
+ * if a node contains only MIN_ENTRIES then we try and increase this to at
+ * least MIN_ENTRIES + 1.  We do this in the following ways:
+ *
+ * [A] No siblings => this can only happen if the node is the root, in which
+ *     case we copy the childs contents over the root.
+ *
+ * [B] No left sibling
+ *     ==> rebalance(node, right sibling)
+ *
+ * [C] No right sibling
+ *     ==> rebalance(left sibling, node)
+ *
+ * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
+ *     ==> delete node adding it's contents to left and right
+ *
+ * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
+ *     ==> rebalance(left, node, right)
+ *
+ * After these operations it's possible that the our original node no
+ * longer contains the desired sub tree.  For this reason this rebalancing
+ * is performed on the children of the current node.  This also avoids
+ * having a special case for the root.
+ *
+ * Once this rebalancing has occurred we can then step into the child node
+ * for internal nodes.  Or delete the entry for leaf nodes.
+ */
+
+/*
+ * Some little utilities for moving node data around.
+ */
+static void node_shift(struct node *n, int shift)
+{
+       uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+       uint32_t value_size = le32_to_cpu(n->header.value_size);
+
+       if (shift < 0) {
+               shift = -shift;
+               BUG_ON(shift > nr_entries);
+               BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift, value_size));
+               memmove(key_ptr(n, 0),
+                       key_ptr(n, shift),
+                       (nr_entries - shift) * sizeof(__le64));
+               memmove(value_ptr(n, 0, value_size),
+                       value_ptr(n, shift, value_size),
+                       (nr_entries - shift) * value_size);
+       } else {
+               BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
+               memmove(key_ptr(n, shift),
+                       key_ptr(n, 0),
+                       nr_entries * sizeof(__le64));
+               memmove(value_ptr(n, shift, value_size),
+                       value_ptr(n, 0, value_size),
+                       nr_entries * value_size);
+       }
+}
+
+static void node_copy(struct node *left, struct node *right, int shift)
+{
+       uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+       uint32_t value_size = le32_to_cpu(left->header.value_size);
+       BUG_ON(value_size != le32_to_cpu(right->header.value_size));
+
+       if (shift < 0) {
+               shift = -shift;
+               BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
+               memcpy(key_ptr(left, nr_left),
+                      key_ptr(right, 0),
+                      shift * sizeof(__le64));
+               memcpy(value_ptr(left, nr_left, value_size),
+                      value_ptr(right, 0, value_size),
+                      shift * value_size);
+       } else {
+               BUG_ON(shift > le32_to_cpu(right->header.max_entries));
+               memcpy(key_ptr(right, 0),
+                      key_ptr(left, nr_left - shift),
+                      shift * sizeof(__le64));
+               memcpy(value_ptr(right, 0, value_size),
+                      value_ptr(left, nr_left - shift, value_size),
+                      shift * value_size);
+       }
+}
+
+/*
+ * Delete a specific entry from a leaf node.
+ */
+static void delete_at(struct node *n, unsigned index)
+{
+       unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
+       unsigned nr_to_copy = nr_entries - (index + 1);
+       uint32_t value_size = le32_to_cpu(n->header.value_size);
+       BUG_ON(index >= nr_entries);
+
+       if (nr_to_copy) {
+               memmove(key_ptr(n, index),
+                       key_ptr(n, index + 1),
+                       nr_to_copy * sizeof(__le64));
+
+               memmove(value_ptr(n, index, value_size),
+                       value_ptr(n, index + 1, value_size),
+                       nr_to_copy * value_size);
+       }
+
+       n->header.nr_entries = cpu_to_le32(nr_entries - 1);
+}
+
+static unsigned del_threshold(struct node *n)
+{
+       return le32_to_cpu(n->header.max_entries) / 3;
+}
+
+static unsigned merge_threshold(struct node *n)
+{
+       /*
+        * The extra one is because we know we're potentially going to
+        * delete an entry.
+        */
+       return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
+}
+
+struct child {
+       unsigned index;
+       struct dm_block *block;
+       struct node *n;
+};
+
+static struct dm_btree_value_type le64_type = {
+       .context = NULL,
+       .size = sizeof(__le64),
+       .inc = NULL,
+       .dec = NULL,
+       .equal = NULL
+};
+
+static int init_child(struct dm_btree_info *info, struct node *parent,
+                     unsigned index, struct child *result)
+{
+       int r, inc;
+       dm_block_t root;
+
+       result->index = index;
+       root = value64(parent, index);
+
+       r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
+                              &result->block, &inc);
+       if (r)
+               return r;
+
+       result->n = dm_block_data(result->block);
+
+       if (inc)
+               inc_children(info->tm, result->n, &le64_type);
+
+       *((__le64 *) value_ptr(parent, index, sizeof(__le64))) =
+               cpu_to_le64(dm_block_location(result->block));
+
+       return 0;
+}
+
+static int exit_child(struct dm_btree_info *info, struct child *c)
+{
+       return dm_tm_unlock(info->tm, c->block);
+}
+
+static void shift(struct node *left, struct node *right, int count)
+{
+       if (!count)
+               return;
+
+       if (count > 0) {
+               node_shift(right, count);
+               node_copy(left, right, count);
+       } else {
+               node_copy(left, right, count);
+               node_shift(right, count);
+       }
+
+       left->header.nr_entries =
+               cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count);
+       BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries));
+
+       right->header.nr_entries =
+               cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
+       BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries));
+}
+
+static void __rebalance2(struct dm_btree_info *info, struct node *parent,
+                        struct child *l, struct child *r)
+{
+       struct node *left = l->n;
+       struct node *right = r->n;
+       uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+       uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+
+       if (nr_left + nr_right <= merge_threshold(left)) {
+               /*
+                * Merge
+                */
+               node_copy(left, right, -nr_right);
+               left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
+               delete_at(parent, r->index);
+
+               /*
+                * We need to decrement the right block, but not it's
+                * children, since they're still referenced by left.
+                */
+               dm_tm_dec(info->tm, dm_block_location(r->block));
+       } else {
+               /*
+                * Rebalance.
+                */
+               unsigned target_left = (nr_left + nr_right) / 2;
+               unsigned shift_ = nr_left - target_left;
+               BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_);
+               BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_);
+               shift(left, right, nr_left - target_left);
+               *key_ptr(parent, r->index) = right->keys[0];
+       }
+}
+
+static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
+                     unsigned left_index)
+{
+       int r;
+       struct node *parent;
+       struct child left, right;
+
+       parent = dm_block_data(shadow_current(s));
+
+       r = init_child(info, parent, left_index, &left);
+       if (r)
+               return r;
+
+       r = init_child(info, parent, left_index + 1, &right);
+       if (r) {
+               exit_child(info, &left);
+               return r;
+       }
+
+       __rebalance2(info, parent, &left, &right);
+
+       r = exit_child(info, &left);
+       if (r) {
+               exit_child(info, &right);
+               return r;
+       }
+
+       return exit_child(info, &right);
+}
+
+static void __rebalance3(struct dm_btree_info *info, struct node *parent,
+                        struct child *l, struct child *c, struct child *r)
+{
+       struct node *left = l->n;
+       struct node *center = c->n;
+       struct node *right = r->n;
+
+       uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+       uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
+       uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+       uint32_t max_entries = le32_to_cpu(left->header.max_entries);
+
+       unsigned target;
+
+       BUG_ON(left->header.max_entries != center->header.max_entries);
+       BUG_ON(center->header.max_entries != right->header.max_entries);
+
+       if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) {
+               /*
+                * Delete center node:
+                *
+                * We dump as many entries from center as possible into
+                * left, then the rest in right, then rebalance2.  This
+                * wastes some cpu, but I want something simple atm.
+                */
+               unsigned shift = min(max_entries - nr_left, nr_center);
+
+               BUG_ON(nr_left + shift > max_entries);
+               node_copy(left, center, -shift);
+               left->header.nr_entries = cpu_to_le32(nr_left + shift);
+
+               if (shift != nr_center) {
+                       shift = nr_center - shift;
+                       BUG_ON((nr_right + shift) >= max_entries);
+                       node_shift(right, shift);
+                       node_copy(center, right, shift);
+                       right->header.nr_entries = cpu_to_le32(nr_right + shift);
+               }
+               *key_ptr(parent, r->index) = right->keys[0];
+
+               delete_at(parent, c->index);
+               r->index--;
+
+               dm_tm_dec(info->tm, dm_block_location(c->block));
+               __rebalance2(info, parent, l, r);
+
+               return;
+       }
+
+       /*
+        * Rebalance
+        */
+       target = (nr_left + nr_center + nr_right) / 3;
+       BUG_ON(target > max_entries);
+
+       /*
+        * Adjust the left node
+        */
+       shift(left, center, nr_left - target);
+
+       /*
+        * Adjust the right node
+        */
+       shift(center, right, target - nr_right);
+       *key_ptr(parent, c->index) = center->keys[0];
+       *key_ptr(parent, r->index) = right->keys[0];
+}
+
+static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
+                     unsigned left_index)
+{
+       int r;
+       struct node *parent = dm_block_data(shadow_current(s));
+       struct child left, center, right;
+
+       /*
+        * FIXME: fill out an array?
+        */
+       r = init_child(info, parent, left_index, &left);
+       if (r)
+               return r;
+
+       r = init_child(info, parent, left_index + 1, &center);
+       if (r) {
+               exit_child(info, &left);
+               return r;
+       }
+
+       r = init_child(info, parent, left_index + 2, &right);
+       if (r) {
+               exit_child(info, &left);
+               exit_child(info, &center);
+               return r;
+       }
+
+       __rebalance3(info, parent, &left, &center, &right);
+
+       r = exit_child(info, &left);
+       if (r) {
+               exit_child(info, &center);
+               exit_child(info, &right);
+               return r;
+       }
+
+       r = exit_child(info, &center);
+       if (r) {
+               exit_child(info, &right);
+               return r;
+       }
+
+       r = exit_child(info, &right);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+static int get_nr_entries(struct dm_transaction_manager *tm,
+                         dm_block_t b, uint32_t *result)
+{
+       int r;
+       struct dm_block *block;
+       struct node *n;
+
+       r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
+       if (r)
+               return r;
+
+       n = dm_block_data(block);
+       *result = le32_to_cpu(n->header.nr_entries);
+
+       return dm_tm_unlock(tm, block);
+}
+
+static int rebalance_children(struct shadow_spine *s,
+                             struct dm_btree_info *info, uint64_t key)
+{
+       int i, r, has_left_sibling, has_right_sibling;
+       uint32_t child_entries;
+       struct node *n;
+
+       n = dm_block_data(shadow_current(s));
+
+       if (le32_to_cpu(n->header.nr_entries) == 1) {
+               struct dm_block *child;
+               dm_block_t b = value64(n, 0);
+
+               r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
+               if (r)
+                       return r;
+
+               memcpy(n, dm_block_data(child),
+                      dm_bm_block_size(dm_tm_get_bm(info->tm)));
+               r = dm_tm_unlock(info->tm, child);
+               if (r)
+                       return r;
+
+               dm_tm_dec(info->tm, dm_block_location(child));
+               return 0;
+       }
+
+       i = lower_bound(n, key);
+       if (i < 0)
+               return -ENODATA;
+
+       r = get_nr_entries(info->tm, value64(n, i), &child_entries);
+       if (r)
+               return r;
+
+       if (child_entries > del_threshold(n))
+               return 0;
+
+       has_left_sibling = i > 0;
+       has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
+
+       if (!has_left_sibling)
+               r = rebalance2(s, info, i);
+
+       else if (!has_right_sibling)
+               r = rebalance2(s, info, i - 1);
+
+       else
+               r = rebalance3(s, info, i - 1);
+
+       return r;
+}
+
+static int do_leaf(struct node *n, uint64_t key, unsigned *index)
+{
+       int i = lower_bound(n, key);
+
+       if ((i < 0) ||
+           (i >= le32_to_cpu(n->header.nr_entries)) ||
+           (le64_to_cpu(n->keys[i]) != key))
+               return -ENODATA;
+
+       *index = i;
+
+       return 0;
+}
+
+/*
+ * Prepares for removal from one level of the hierarchy.  The caller must
+ * call delete_at() to remove the entry at index.
+ */
+static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
+                     struct dm_btree_value_type *vt, dm_block_t root,
+                     uint64_t key, unsigned *index)
+{
+       int i = *index, r;
+       struct node *n;
+
+       for (;;) {
+               r = shadow_step(s, root, vt);
+               if (r < 0)
+                       break;
+
+               /*
+                * We have to patch up the parent node, ugly, but I don't
+                * see a way to do this automatically as part of the spine
+                * op.
+                */
+               if (shadow_has_parent(s)) {
+                       __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+                       memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(__le64)),
+                              &location, sizeof(__le64));
+               }
+
+               n = dm_block_data(shadow_current(s));
+
+               if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+                       return do_leaf(n, key, index);
+
+               r = rebalance_children(s, info, key);
+               if (r)
+                       break;
+
+               n = dm_block_data(shadow_current(s));
+               if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+                       return do_leaf(n, key, index);
+
+               i = lower_bound(n, key);
+
+               /*
+                * We know the key is present, or else
+                * rebalance_children would have returned
+                * -ENODATA
+                */
+               root = value64(n, i);
+       }
+
+       return r;
+}
+
+int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, dm_block_t *new_root)
+{
+       unsigned level, last_level = info->levels - 1;
+       int index = 0, r = 0;
+       struct shadow_spine spine;
+       struct node *n;
+
+       init_shadow_spine(&spine, info);
+       for (level = 0; level < info->levels; level++) {
+               r = remove_raw(&spine, info,
+                              (level == last_level ?
+                               &info->value_type : &le64_type),
+                              root, keys[level], (unsigned *)&index);
+               if (r < 0)
+                       break;
+
+               n = dm_block_data(shadow_current(&spine));
+               if (level != last_level) {
+                       root = value64(n, index);
+                       continue;
+               }
+
+               BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
+
+               if (info->value_type.dec)
+                       info->value_type.dec(info->value_type.context,
+                                            value_ptr(n, index, info->value_type.size));
+
+               delete_at(n, index);
+       }
+
+       *new_root = shadow_root(&spine);
+       exit_shadow_spine(&spine);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_remove);
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
new file mode 100644 (file)
index 0000000..d9a7912
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree-internal.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "btree spine"
+
+/*----------------------------------------------------------------*/
+
+#define BTREE_CSUM_XOR 121107
+
+static int node_check(struct dm_block_validator *v,
+                     struct dm_block *b,
+                     size_t block_size);
+
+static void node_prepare_for_write(struct dm_block_validator *v,
+                                  struct dm_block *b,
+                                  size_t block_size)
+{
+       struct node *n = dm_block_data(b);
+       struct node_header *h = &n->header;
+
+       h->blocknr = cpu_to_le64(dm_block_location(b));
+       h->csum = cpu_to_le32(dm_bm_checksum(&h->flags,
+                                            block_size - sizeof(__le32),
+                                            BTREE_CSUM_XOR));
+
+       BUG_ON(node_check(v, b, 4096));
+}
+
+static int node_check(struct dm_block_validator *v,
+                     struct dm_block *b,
+                     size_t block_size)
+{
+       struct node *n = dm_block_data(b);
+       struct node_header *h = &n->header;
+       size_t value_size;
+       __le32 csum_disk;
+       uint32_t flags;
+
+       if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
+               DMERR("node_check failed blocknr %llu wanted %llu",
+                     le64_to_cpu(h->blocknr), dm_block_location(b));
+               return -ENOTBLK;
+       }
+
+       csum_disk = cpu_to_le32(dm_bm_checksum(&h->flags,
+                                              block_size - sizeof(__le32),
+                                              BTREE_CSUM_XOR));
+       if (csum_disk != h->csum) {
+               DMERR("node_check failed csum %u wanted %u",
+                     le32_to_cpu(csum_disk), le32_to_cpu(h->csum));
+               return -EILSEQ;
+       }
+
+       value_size = le32_to_cpu(h->value_size);
+
+       if (sizeof(struct node_header) +
+           (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
+               DMERR("node_check failed: max_entries too large");
+               return -EILSEQ;
+       }
+
+       if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
+               DMERR("node_check failed, too many entries");
+               return -EILSEQ;
+       }
+
+       /*
+        * The node must be either INTERNAL or LEAF.
+        */
+       flags = le32_to_cpu(h->flags);
+       if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) {
+               DMERR("node_check failed, node is neither INTERNAL or LEAF");
+               return -EILSEQ;
+       }
+
+       return 0;
+}
+
+struct dm_block_validator btree_node_validator = {
+       .name = "btree_node",
+       .prepare_for_write = node_prepare_for_write,
+       .check = node_check
+};
+
+/*----------------------------------------------------------------*/
+
+static int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
+                struct dm_block **result)
+{
+       return dm_tm_read_lock(info->tm, b, &btree_node_validator, result);
+}
+
+static int bn_shadow(struct dm_btree_info *info, dm_block_t orig,
+             struct dm_btree_value_type *vt,
+             struct dm_block **result)
+{
+       int r, inc;
+
+       r = dm_tm_shadow_block(info->tm, orig, &btree_node_validator,
+                              result, &inc);
+       if (!r && inc)
+               inc_children(info->tm, dm_block_data(*result), vt);
+
+       return r;
+}
+
+int new_block(struct dm_btree_info *info, struct dm_block **result)
+{
+       return dm_tm_new_block(info->tm, &btree_node_validator, result);
+}
+
+int unlock_block(struct dm_btree_info *info, struct dm_block *b)
+{
+       return dm_tm_unlock(info->tm, b);
+}
+
+/*----------------------------------------------------------------*/
+
+void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
+{
+       s->info = info;
+       s->count = 0;
+       s->nodes[0] = NULL;
+       s->nodes[1] = NULL;
+}
+
+int exit_ro_spine(struct ro_spine *s)
+{
+       int r = 0, i;
+
+       for (i = 0; i < s->count; i++) {
+               int r2 = unlock_block(s->info, s->nodes[i]);
+               if (r2 < 0)
+                       r = r2;
+       }
+
+       return r;
+}
+
+int ro_step(struct ro_spine *s, dm_block_t new_child)
+{
+       int r;
+
+       if (s->count == 2) {
+               r = unlock_block(s->info, s->nodes[0]);
+               if (r < 0)
+                       return r;
+               s->nodes[0] = s->nodes[1];
+               s->count--;
+       }
+
+       r = bn_read_lock(s->info, new_child, s->nodes + s->count);
+       if (!r)
+               s->count++;
+
+       return r;
+}
+
+struct node *ro_node(struct ro_spine *s)
+{
+       struct dm_block *block;
+
+       BUG_ON(!s->count);
+       block = s->nodes[s->count - 1];
+
+       return dm_block_data(block);
+}
+
+/*----------------------------------------------------------------*/
+
+void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info)
+{
+       s->info = info;
+       s->count = 0;
+}
+
+int exit_shadow_spine(struct shadow_spine *s)
+{
+       int r = 0, i;
+
+       for (i = 0; i < s->count; i++) {
+               int r2 = unlock_block(s->info, s->nodes[i]);
+               if (r2 < 0)
+                       r = r2;
+       }
+
+       return r;
+}
+
+int shadow_step(struct shadow_spine *s, dm_block_t b,
+               struct dm_btree_value_type *vt)
+{
+       int r;
+
+       if (s->count == 2) {
+               r = unlock_block(s->info, s->nodes[0]);
+               if (r < 0)
+                       return r;
+               s->nodes[0] = s->nodes[1];
+               s->count--;
+       }
+
+       r = bn_shadow(s->info, b, vt, s->nodes + s->count);
+       if (!r) {
+               if (!s->count)
+                       s->root = dm_block_location(s->nodes[0]);
+
+               s->count++;
+       }
+
+       return r;
+}
+
+struct dm_block *shadow_current(struct shadow_spine *s)
+{
+       BUG_ON(!s->count);
+
+       return s->nodes[s->count - 1];
+}
+
+struct dm_block *shadow_parent(struct shadow_spine *s)
+{
+       BUG_ON(s->count != 2);
+
+       return s->count == 2 ? s->nodes[0] : NULL;
+}
+
+int shadow_has_parent(struct shadow_spine *s)
+{
+       return s->count >= 2;
+}
+
+int shadow_root(struct shadow_spine *s)
+{
+       return s->root;
+}
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
new file mode 100644 (file)
index 0000000..e0638be
--- /dev/null
@@ -0,0 +1,805 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree-internal.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/module.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "btree"
+
+/*----------------------------------------------------------------
+ * Array manipulation
+ *--------------------------------------------------------------*/
+static void memcpy_disk(void *dest, const void *src, size_t len)
+       __dm_written_to_disk(src)
+{
+       memcpy(dest, src, len);
+       __dm_unbless_for_disk(src);
+}
+
+static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
+                        unsigned index, void *elt)
+       __dm_written_to_disk(elt)
+{
+       if (index < nr_elts)
+               memmove(base + (elt_size * (index + 1)),
+                       base + (elt_size * index),
+                       (nr_elts - index) * elt_size);
+
+       memcpy_disk(base + (elt_size * index), elt, elt_size);
+}
+
+/*----------------------------------------------------------------*/
+
+/* makes the assumption that no two keys are the same. */
+static int bsearch(struct node *n, uint64_t key, int want_hi)
+{
+       int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
+
+       while (hi - lo > 1) {
+               int mid = lo + ((hi - lo) / 2);
+               uint64_t mid_key = le64_to_cpu(n->keys[mid]);
+
+               if (mid_key == key)
+                       return mid;
+
+               if (mid_key < key)
+                       lo = mid;
+               else
+                       hi = mid;
+       }
+
+       return want_hi ? hi : lo;
+}
+
+int lower_bound(struct node *n, uint64_t key)
+{
+       return bsearch(n, key, 0);
+}
+
+void inc_children(struct dm_transaction_manager *tm, struct node *n,
+                 struct dm_btree_value_type *vt)
+{
+       unsigned i;
+       uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+
+       if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
+               for (i = 0; i < nr_entries; i++)
+                       dm_tm_inc(tm, value64(n, i));
+       else if (vt->inc)
+               for (i = 0; i < nr_entries; i++)
+                       vt->inc(vt->context,
+                               value_ptr(n, i, vt->size));
+}
+
+static int insert_at(size_t value_size, struct node *node, unsigned index,
+                     uint64_t key, void *value)
+                     __dm_written_to_disk(value)
+{
+       uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
+       __le64 key_le = cpu_to_le64(key);
+
+       if (index > nr_entries ||
+           index >= le32_to_cpu(node->header.max_entries)) {
+               DMERR("too many entries in btree node for insert");
+               __dm_unbless_for_disk(value);
+               return -ENOMEM;
+       }
+
+       __dm_bless_for_disk(&key_le);
+
+       array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
+       array_insert(value_base(node), value_size, nr_entries, index, value);
+       node->header.nr_entries = cpu_to_le32(nr_entries + 1);
+
+       return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * We want 3n entries (for some n).  This works more nicely for repeated
+ * insert remove loops than (2n + 1).
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t block_size)
+{
+       uint32_t total, n;
+       size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
+
+       block_size -= sizeof(struct node_header);
+       total = block_size / elt_size;
+       n = total / 3;          /* rounds down */
+
+       return 3 * n;
+}
+
+int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
+{
+       int r;
+       struct dm_block *b;
+       struct node *n;
+       size_t block_size;
+       uint32_t max_entries;
+
+       r = new_block(info, &b);
+       if (r < 0)
+               return r;
+
+       block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
+       max_entries = calc_max_entries(info->value_type.size, block_size);
+
+       n = dm_block_data(b);
+       memset(n, 0, block_size);
+       n->header.flags = cpu_to_le32(LEAF_NODE);
+       n->header.nr_entries = cpu_to_le32(0);
+       n->header.max_entries = cpu_to_le32(max_entries);
+       n->header.value_size = cpu_to_le32(info->value_type.size);
+
+       *root = dm_block_location(b);
+       return unlock_block(info, b);
+}
+EXPORT_SYMBOL_GPL(dm_btree_empty);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Deletion uses a recursive algorithm, since we have limited stack space
+ * we explicitly manage our own stack on the heap.
+ */
+#define MAX_SPINE_DEPTH 64
+struct frame {
+       struct dm_block *b;
+       struct node *n;
+       unsigned level;
+       unsigned nr_children;
+       unsigned current_child;
+};
+
+struct del_stack {
+       struct dm_transaction_manager *tm;
+       int top;
+       struct frame spine[MAX_SPINE_DEPTH];
+};
+
+static int top_frame(struct del_stack *s, struct frame **f)
+{
+       if (s->top < 0) {
+               DMERR("btree deletion stack empty");
+               return -EINVAL;
+       }
+
+       *f = s->spine + s->top;
+
+       return 0;
+}
+
+static int unprocessed_frames(struct del_stack *s)
+{
+       return s->top >= 0;
+}
+
+static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
+{
+       int r;
+       uint32_t ref_count;
+
+       if (s->top >= MAX_SPINE_DEPTH - 1) {
+               DMERR("btree deletion stack out of memory");
+               return -ENOMEM;
+       }
+
+       r = dm_tm_ref(s->tm, b, &ref_count);
+       if (r)
+               return r;
+
+       if (ref_count > 1)
+               /*
+                * This is a shared node, so we can just decrement it's
+                * reference counter and leave the children.
+                */
+               dm_tm_dec(s->tm, b);
+
+       else {
+               struct frame *f = s->spine + ++s->top;
+
+               r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
+               if (r) {
+                       s->top--;
+                       return r;
+               }
+
+               f->n = dm_block_data(f->b);
+               f->level = level;
+               f->nr_children = le32_to_cpu(f->n->header.nr_entries);
+               f->current_child = 0;
+       }
+
+       return 0;
+}
+
+static void pop_frame(struct del_stack *s)
+{
+       struct frame *f = s->spine + s->top--;
+
+       dm_tm_dec(s->tm, dm_block_location(f->b));
+       dm_tm_unlock(s->tm, f->b);
+}
+
+int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
+{
+       int r;
+       struct del_stack *s;
+
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+       s->tm = info->tm;
+       s->top = -1;
+
+       r = push_frame(s, root, 1);
+       if (r)
+               goto out;
+
+       while (unprocessed_frames(s)) {
+               uint32_t flags;
+               struct frame *f;
+               dm_block_t b;
+
+               r = top_frame(s, &f);
+               if (r)
+                       goto out;
+
+               if (f->current_child >= f->nr_children) {
+                       pop_frame(s);
+                       continue;
+               }
+
+               flags = le32_to_cpu(f->n->header.flags);
+               if (flags & INTERNAL_NODE) {
+                       b = value64(f->n, f->current_child);
+                       f->current_child++;
+                       r = push_frame(s, b, f->level);
+                       if (r)
+                               goto out;
+
+               } else if (f->level != (info->levels - 1)) {
+                       b = value64(f->n, f->current_child);
+                       f->current_child++;
+                       r = push_frame(s, b, f->level + 1);
+                       if (r)
+                               goto out;
+
+               } else {
+                       if (info->value_type.dec) {
+                               unsigned i;
+
+                               for (i = 0; i < f->nr_children; i++)
+                                       info->value_type.dec(info->value_type.context,
+                                                            value_ptr(f->n, i, info->value_type.size));
+                       }
+                       f->current_child = f->nr_children;
+               }
+       }
+
+out:
+       kfree(s);
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_del);
+
+/*----------------------------------------------------------------*/
+
+static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
+                           int (*search_fn)(struct node *, uint64_t),
+                           uint64_t *result_key, void *v, size_t value_size)
+{
+       int i, r;
+       uint32_t flags, nr_entries;
+
+       do {
+               r = ro_step(s, block);
+               if (r < 0)
+                       return r;
+
+               i = search_fn(ro_node(s), key);
+
+               flags = le32_to_cpu(ro_node(s)->header.flags);
+               nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
+               if (i < 0 || i >= nr_entries)
+                       return -ENODATA;
+
+               if (flags & INTERNAL_NODE)
+                       block = value64(ro_node(s), i);
+
+       } while (!(flags & LEAF_NODE));
+
+       *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+       memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
+
+       return 0;
+}
+
+int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, void *value_le)
+{
+       unsigned level, last_level = info->levels - 1;
+       int r = -ENODATA;
+       uint64_t rkey;
+       __le64 internal_value_le;
+       struct ro_spine spine;
+
+       init_ro_spine(&spine, info);
+       for (level = 0; level < info->levels; level++) {
+               size_t size;
+               void *value_p;
+
+               if (level == last_level) {
+                       value_p = value_le;
+                       size = info->value_type.size;
+
+               } else {
+                       value_p = &internal_value_le;
+                       size = sizeof(uint64_t);
+               }
+
+               r = btree_lookup_raw(&spine, root, keys[level],
+                                    lower_bound, &rkey,
+                                    value_p, size);
+
+               if (!r) {
+                       if (rkey != keys[level]) {
+                               exit_ro_spine(&spine);
+                               return -ENODATA;
+                       }
+               } else {
+                       exit_ro_spine(&spine);
+                       return r;
+               }
+
+               root = le64_to_cpu(internal_value_le);
+       }
+       exit_ro_spine(&spine);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_lookup);
+
+/*
+ * Splits a node by creating a sibling node and shifting half the nodes
+ * contents across.  Assumes there is a parent node, and it has room for
+ * another child.
+ *
+ * Before:
+ *       +--------+
+ *       | Parent |
+ *       +--------+
+ *          |
+ *          v
+ *     +----------+
+ *     | A ++++++ |
+ *     +----------+
+ *
+ *
+ * After:
+ *             +--------+
+ *             | Parent |
+ *             +--------+
+ *               |     |
+ *               v     +------+
+ *         +---------+        |
+ *         | A* +++  |        v
+ *         +---------+   +-------+
+ *                       | B +++ |
+ *                       +-------+
+ *
+ * Where A* is a shadow of A.
+ */
+static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
+                              unsigned parent_index, uint64_t key)
+{
+       int r;
+       size_t size;
+       unsigned nr_left, nr_right;
+       struct dm_block *left, *right, *parent;
+       struct node *ln, *rn, *pn;
+       __le64 location;
+
+       left = shadow_current(s);
+
+       r = new_block(s->info, &right);
+       if (r < 0)
+               return r;
+
+       ln = dm_block_data(left);
+       rn = dm_block_data(right);
+
+       nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
+       nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
+
+       ln->header.nr_entries = cpu_to_le32(nr_left);
+
+       rn->header.flags = ln->header.flags;
+       rn->header.nr_entries = cpu_to_le32(nr_right);
+       rn->header.max_entries = ln->header.max_entries;
+       rn->header.value_size = ln->header.value_size;
+       memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
+
+       size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
+               sizeof(uint64_t) : s->info->value_type.size;
+       memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
+              size * nr_right);
+
+       /*
+        * Patch up the parent
+        */
+       parent = shadow_parent(s);
+
+       pn = dm_block_data(parent);
+       location = cpu_to_le64(dm_block_location(left));
+       __dm_bless_for_disk(&location);
+       memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
+                   &location, sizeof(__le64));
+
+       location = cpu_to_le64(dm_block_location(right));
+       __dm_bless_for_disk(&location);
+
+       r = insert_at(sizeof(__le64), pn, parent_index + 1,
+                     le64_to_cpu(rn->keys[0]), &location);
+       if (r)
+               return r;
+
+       if (key < le64_to_cpu(rn->keys[0])) {
+               unlock_block(s->info, right);
+               s->nodes[1] = left;
+       } else {
+               unlock_block(s->info, left);
+               s->nodes[1] = right;
+       }
+
+       return 0;
+}
+
+/*
+ * Splits a node by creating two new children beneath the given node.
+ *
+ * Before:
+ *       +----------+
+ *       | A ++++++ |
+ *       +----------+
+ *
+ *
+ * After:
+ *     +------------+
+ *     | A (shadow) |
+ *     +------------+
+ *         |   |
+ *   +------+  +----+
+ *   |              |
+ *   v              v
+ * +-------+    +-------+
+ * | B +++ |    | C +++ |
+ * +-------+    +-------+
+ */
+static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
+{
+       int r;
+       size_t size;
+       unsigned nr_left, nr_right;
+       struct dm_block *left, *right, *new_parent;
+       struct node *pn, *ln, *rn;
+       __le64 val;
+
+       new_parent = shadow_current(s);
+
+       r = new_block(s->info, &left);
+       if (r < 0)
+               return r;
+
+       r = new_block(s->info, &right);
+       if (r < 0) {
+               /* FIXME: put left */
+               return r;
+       }
+
+       pn = dm_block_data(new_parent);
+       ln = dm_block_data(left);
+       rn = dm_block_data(right);
+
+       nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
+       nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
+
+       ln->header.flags = pn->header.flags;
+       ln->header.nr_entries = cpu_to_le32(nr_left);
+       ln->header.max_entries = pn->header.max_entries;
+       ln->header.value_size = pn->header.value_size;
+
+       rn->header.flags = pn->header.flags;
+       rn->header.nr_entries = cpu_to_le32(nr_right);
+       rn->header.max_entries = pn->header.max_entries;
+       rn->header.value_size = pn->header.value_size;
+
+       memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
+       memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
+
+       size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
+               sizeof(__le64) : s->info->value_type.size;
+       memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
+       memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
+              nr_right * size);
+
+       /* new_parent should just point to l and r now */
+       pn->header.flags = cpu_to_le32(INTERNAL_NODE);
+       pn->header.nr_entries = cpu_to_le32(2);
+       pn->header.max_entries = cpu_to_le32(
+               calc_max_entries(sizeof(__le64),
+                                dm_bm_block_size(
+                                        dm_tm_get_bm(s->info->tm))));
+       pn->header.value_size = cpu_to_le32(sizeof(__le64));
+
+       val = cpu_to_le64(dm_block_location(left));
+       __dm_bless_for_disk(&val);
+       pn->keys[0] = ln->keys[0];
+       memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
+
+       val = cpu_to_le64(dm_block_location(right));
+       __dm_bless_for_disk(&val);
+       pn->keys[1] = rn->keys[0];
+       memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
+
+       /*
+        * rejig the spine.  This is ugly, since it knows too
+        * much about the spine
+        */
+       if (s->nodes[0] != new_parent) {
+               unlock_block(s->info, s->nodes[0]);
+               s->nodes[0] = new_parent;
+       }
+       if (key < le64_to_cpu(rn->keys[0])) {
+               unlock_block(s->info, right);
+               s->nodes[1] = left;
+       } else {
+               unlock_block(s->info, left);
+               s->nodes[1] = right;
+       }
+       s->count = 2;
+
+       return 0;
+}
+
+static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
+                           struct dm_btree_value_type *vt,
+                           uint64_t key, unsigned *index)
+{
+       int r, i = *index, top = 1;
+       struct node *node;
+
+       for (;;) {
+               r = shadow_step(s, root, vt);
+               if (r < 0)
+                       return r;
+
+               node = dm_block_data(shadow_current(s));
+
+               /*
+                * We have to patch up the parent node, ugly, but I don't
+                * see a way to do this automatically as part of the spine
+                * op.
+                */
+               if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
+                       __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+                       __dm_bless_for_disk(&location);
+                       memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+                                   &location, sizeof(__le64));
+               }
+
+               node = dm_block_data(shadow_current(s));
+
+               if (node->header.nr_entries == node->header.max_entries) {
+                       if (top)
+                               r = btree_split_beneath(s, key);
+                       else
+                               r = btree_split_sibling(s, root, i, key);
+
+                       if (r < 0)
+                               return r;
+               }
+
+               node = dm_block_data(shadow_current(s));
+
+               i = lower_bound(node, key);
+
+               if (le32_to_cpu(node->header.flags) & LEAF_NODE)
+                       break;
+
+               if (i < 0) {
+                       /* change the bounds on the lowest key */
+                       node->keys[0] = cpu_to_le64(key);
+                       i = 0;
+               }
+
+               root = value64(node, i);
+               top = 0;
+       }
+
+       if (i < 0 || le64_to_cpu(node->keys[i]) != key)
+               i++;
+
+       *index = i;
+       return 0;
+}
+
+static int insert(struct dm_btree_info *info, dm_block_t root,
+                 uint64_t *keys, void *value, dm_block_t *new_root,
+                 int *inserted)
+                 __dm_written_to_disk(value)
+{
+       int r, need_insert;
+       unsigned level, index = -1, last_level = info->levels - 1;
+       dm_block_t block = root;
+       struct shadow_spine spine;
+       struct node *n;
+       struct dm_btree_value_type le64_type;
+
+       le64_type.context = NULL;
+       le64_type.size = sizeof(__le64);
+       le64_type.inc = NULL;
+       le64_type.dec = NULL;
+       le64_type.equal = NULL;
+
+       init_shadow_spine(&spine, info);
+
+       for (level = 0; level < (info->levels - 1); level++) {
+               r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
+               if (r < 0)
+                       goto bad;
+
+               n = dm_block_data(shadow_current(&spine));
+               need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+                              (le64_to_cpu(n->keys[index]) != keys[level]));
+
+               if (need_insert) {
+                       dm_block_t new_tree;
+                       __le64 new_le;
+
+                       r = dm_btree_empty(info, &new_tree);
+                       if (r < 0)
+                               goto bad;
+
+                       new_le = cpu_to_le64(new_tree);
+                       __dm_bless_for_disk(&new_le);
+
+                       r = insert_at(sizeof(uint64_t), n, index,
+                                     keys[level], &new_le);
+                       if (r)
+                               goto bad;
+               }
+
+               if (level < last_level)
+                       block = value64(n, index);
+       }
+
+       r = btree_insert_raw(&spine, block, &info->value_type,
+                            keys[level], &index);
+       if (r < 0)
+               goto bad;
+
+       n = dm_block_data(shadow_current(&spine));
+       need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+                      (le64_to_cpu(n->keys[index]) != keys[level]));
+
+       if (need_insert) {
+               if (inserted)
+                       *inserted = 1;
+
+               r = insert_at(info->value_type.size, n, index,
+                             keys[level], value);
+               if (r)
+                       goto bad_unblessed;
+       } else {
+               if (inserted)
+                       *inserted = 0;
+
+               if (info->value_type.dec &&
+                   (!info->value_type.equal ||
+                    !info->value_type.equal(
+                            info->value_type.context,
+                            value_ptr(n, index, info->value_type.size),
+                            value))) {
+                       info->value_type.dec(info->value_type.context,
+                                            value_ptr(n, index, info->value_type.size));
+               }
+               memcpy_disk(value_ptr(n, index, info->value_type.size),
+                           value, info->value_type.size);
+       }
+
+       *new_root = shadow_root(&spine);
+       exit_shadow_spine(&spine);
+
+       return 0;
+
+bad:
+       __dm_unbless_for_disk(value);
+bad_unblessed:
+       exit_shadow_spine(&spine);
+       return r;
+}
+
+int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, void *value, dm_block_t *new_root)
+                   __dm_written_to_disk(value)
+{
+       return insert(info, root, keys, value, new_root, NULL);
+}
+EXPORT_SYMBOL_GPL(dm_btree_insert);
+
+int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
+                          uint64_t *keys, void *value, dm_block_t *new_root,
+                          int *inserted)
+                          __dm_written_to_disk(value)
+{
+       return insert(info, root, keys, value, new_root, inserted);
+}
+EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
+
+/*----------------------------------------------------------------*/
+
+static int find_highest_key(struct ro_spine *s, dm_block_t block,
+                           uint64_t *result_key, dm_block_t *next_block)
+{
+       int i, r;
+       uint32_t flags;
+
+       do {
+               r = ro_step(s, block);
+               if (r < 0)
+                       return r;
+
+               flags = le32_to_cpu(ro_node(s)->header.flags);
+               i = le32_to_cpu(ro_node(s)->header.nr_entries);
+               if (!i)
+                       return -ENODATA;
+               else
+                       i--;
+
+               *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+               if (next_block || flags & INTERNAL_NODE)
+                       block = value64(ro_node(s), i);
+
+       } while (flags & INTERNAL_NODE);
+
+       if (next_block)
+               *next_block = block;
+       return 0;
+}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+                             uint64_t *result_keys)
+{
+       int r = 0, count = 0, level;
+       struct ro_spine spine;
+
+       init_ro_spine(&spine, info);
+       for (level = 0; level < info->levels; level++) {
+               r = find_highest_key(&spine, root, result_keys + level,
+                                    level == info->levels - 1 ? NULL : &root);
+               if (r == -ENODATA) {
+                       r = 0;
+                       break;
+
+               } else if (r)
+                       break;
+
+               count++;
+       }
+       exit_ro_spine(&spine);
+
+       return r ? r : count;
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
new file mode 100644 (file)
index 0000000..ae02c84
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_BTREE_H
+#define _LINUX_DM_BTREE_H
+
+#include "dm-block-manager.h"
+
+struct dm_transaction_manager;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Annotations used to check on-disk metadata is handled as little-endian.
+ */
+#ifdef __CHECKER__
+#  define __dm_written_to_disk(x) __releases(x)
+#  define __dm_reads_from_disk(x) __acquires(x)
+#  define __dm_bless_for_disk(x) __acquire(x)
+#  define __dm_unbless_for_disk(x) __release(x)
+#else
+#  define __dm_written_to_disk(x)
+#  define __dm_reads_from_disk(x)
+#  define __dm_bless_for_disk(x)
+#  define __dm_unbless_for_disk(x)
+#endif
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized
+ * values.
+ */
+
+/*
+ * Infomation about the values stored within the btree.
+ */
+struct dm_btree_value_type {
+       void *context;
+
+       /*
+        * The size in bytes of each value.
+        */
+       uint32_t size;
+
+       /*
+        * Any of these methods can be safely set to NULL if you do not
+        * need the corresponding feature.
+        */
+
+       /*
+        * The btree is making a duplicate of the value, for instance
+        * because previously-shared btree nodes have now diverged.
+        * @value argument is the new copy that the copy function may modify.
+        * (Probably it just wants to increment a reference count
+        * somewhere.) This method is _not_ called for insertion of a new
+        * value: It is assumed the ref count is already 1.
+        */
+       void (*inc)(void *context, void *value);
+
+       /*
+        * This value is being deleted.  The btree takes care of freeing
+        * the memory pointed to by @value.  Often the del function just
+        * needs to decrement a reference count somewhere.
+        */
+       void (*dec)(void *context, void *value);
+
+       /*
+        * A test for equality between two values.  When a value is
+        * overwritten with a new one, the old one has the dec method
+        * called _unless_ the new and old value are deemed equal.
+        */
+       int (*equal)(void *context, void *value1, void *value2);
+};
+
+/*
+ * The shape and contents of a btree.
+ */
+struct dm_btree_info {
+       struct dm_transaction_manager *tm;
+
+       /*
+        * Number of nested btrees. (Not the depth of a single tree.)
+        */
+       unsigned levels;
+       struct dm_btree_value_type value_type;
+};
+
+/*
+ * Set up an empty tree.  O(1).
+ */
+int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root);
+
+/*
+ * Delete a tree.  O(n) - this is the slow one!  It can also block, so
+ * please don't call it on an IO path.
+ */
+int dm_btree_del(struct dm_btree_info *info, dm_block_t root);
+
+/*
+ * All the lookup functions return -ENODATA if the key cannot be found.
+ */
+
+/*
+ * Tries to find a key that matches exactly.  O(ln(n))
+ */
+int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, void *value_le);
+
+/*
+ * Insertion (or overwrite an existing value).  O(ln(n))
+ */
+int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, void *value, dm_block_t *new_root)
+                   __dm_written_to_disk(value);
+
+/*
+ * A variant of insert that indicates whether it actually inserted or just
+ * overwrote.  Useful if you're keeping track of the number of entries in a
+ * tree.
+ */
+int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
+                          uint64_t *keys, void *value, dm_block_t *new_root,
+                          int *inserted)
+                          __dm_written_to_disk(value);
+
+/*
+ * Remove a key if present.  This doesn't remove empty sub trees.  Normally
+ * subtrees represent a separate entity, like a snapshot map, so this is
+ * correct behaviour.  O(ln(n)).
+ */
+int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, dm_block_t *new_root);
+
+/*
+ * Returns < 0 on failure.  Otherwise the number of key entries that have
+ * been filled out.  Remember trees can have zero entries, and as such have
+ * no highest key.
+ */
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+                             uint64_t *result_keys);
+
+#endif /* _LINUX_DM_BTREE_H */
diff --git a/drivers/md/persistent-data/dm-persistent-data-internal.h b/drivers/md/persistent-data/dm-persistent-data-internal.h
new file mode 100644 (file)
index 0000000..c49e26f
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _DM_PERSISTENT_DATA_INTERNAL_H
+#define _DM_PERSISTENT_DATA_INTERNAL_H
+
+#include "dm-block-manager.h"
+
+static inline unsigned dm_hash_block(dm_block_t b, unsigned hash_mask)
+{
+       const unsigned BIG_PRIME = 4294967291UL;
+
+       return (((unsigned) b) * BIG_PRIME) & hash_mask;
+}
+
+#endif /* _PERSISTENT_DATA_INTERNAL_H */
diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c
new file mode 100644 (file)
index 0000000..bb44a93
--- /dev/null
@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map-checker.h"
+
+#include <linux/device-mapper.h>
+
+#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
+
+#define DM_MSG_PREFIX "space map checker"
+
+/*----------------------------------------------------------------*/
+
+struct count_array {
+       dm_block_t nr;
+       dm_block_t nr_free;
+
+       uint32_t *counts;
+};
+
+static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count)
+{
+       if (b >= ca->nr)
+               return -EINVAL;
+
+       *count = ca->counts[b];
+       return 0;
+}
+
+static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r)
+{
+       if (b >= ca->nr)
+               return -EINVAL;
+
+       *r = ca->counts[b] > 1;
+       return 0;
+}
+
+static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count)
+{
+       uint32_t old_count;
+
+       if (b >= ca->nr)
+               return -EINVAL;
+
+       old_count = ca->counts[b];
+
+       if (!count && old_count)
+               ca->nr_free++;
+
+       else if (count && !old_count)
+               ca->nr_free--;
+
+       ca->counts[b] = count;
+       return 0;
+}
+
+static int ca_inc_block(struct count_array *ca, dm_block_t b)
+{
+       if (b >= ca->nr)
+               return -EINVAL;
+
+       ca_set_count(ca, b, ca->counts[b] + 1);
+       return 0;
+}
+
+static int ca_dec_block(struct count_array *ca, dm_block_t b)
+{
+       if (b >= ca->nr)
+               return -EINVAL;
+
+       BUG_ON(ca->counts[b] == 0);
+       ca_set_count(ca, b, ca->counts[b] - 1);
+       return 0;
+}
+
+static int ca_create(struct count_array *ca, struct dm_space_map *sm)
+{
+       int r;
+       dm_block_t nr_blocks;
+
+       r = dm_sm_get_nr_blocks(sm, &nr_blocks);
+       if (r)
+               return r;
+
+       ca->nr = nr_blocks;
+       ca->nr_free = nr_blocks;
+       ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL);
+       if (!ca->counts)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int ca_load(struct count_array *ca, struct dm_space_map *sm)
+{
+       int r;
+       uint32_t count;
+       dm_block_t nr_blocks, i;
+
+       r = dm_sm_get_nr_blocks(sm, &nr_blocks);
+       if (r)
+               return r;
+
+       BUG_ON(ca->nr != nr_blocks);
+
+       DMWARN("Loading debug space map from disk.  This may take some time");
+       for (i = 0; i < nr_blocks; i++) {
+               r = dm_sm_get_count(sm, i, &count);
+               if (r) {
+                       DMERR("load failed");
+                       return r;
+               }
+
+               ca_set_count(ca, i, count);
+       }
+       DMWARN("Load complete");
+
+       return 0;
+}
+
+static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
+{
+       dm_block_t nr_blocks = ca->nr + extra_blocks;
+       uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL);
+       if (!counts)
+               return -ENOMEM;
+
+       memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
+       kfree(ca->counts);
+       ca->nr = nr_blocks;
+       ca->nr_free += extra_blocks;
+       ca->counts = counts;
+       return 0;
+}
+
+static int ca_commit(struct count_array *old, struct count_array *new)
+{
+       if (old->nr != new->nr) {
+               BUG_ON(old->nr > new->nr);
+               ca_extend(old, new->nr - old->nr);
+       }
+
+       BUG_ON(old->nr != new->nr);
+       old->nr_free = new->nr_free;
+       memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr);
+       return 0;
+}
+
+static void ca_destroy(struct count_array *ca)
+{
+       kfree(ca->counts);
+}
+
+/*----------------------------------------------------------------*/
+
+struct sm_checker {
+       struct dm_space_map sm;
+
+       struct count_array old_counts;
+       struct count_array counts;
+
+       struct dm_space_map *real_sm;
+};
+
+static void sm_checker_destroy(struct dm_space_map *sm)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+
+       dm_sm_destroy(smc->real_sm);
+       ca_destroy(&smc->old_counts);
+       ca_destroy(&smc->counts);
+       kfree(smc);
+}
+
+static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r = dm_sm_get_nr_blocks(smc->real_sm, count);
+       if (!r)
+               BUG_ON(smc->old_counts.nr != *count);
+       return r;
+}
+
+static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r = dm_sm_get_nr_free(smc->real_sm, count);
+       if (!r) {
+               /*
+                * Slow, but we know it's correct.
+                */
+               dm_block_t b, n = 0;
+               for (b = 0; b < smc->old_counts.nr; b++)
+                       if (smc->old_counts.counts[b] == 0 &&
+                           smc->counts.counts[b] == 0)
+                               n++;
+
+               if (n != *count)
+                       DMERR("free block counts differ, checker %u, sm-disk:%u",
+                             (unsigned) n, (unsigned) *count);
+       }
+       return r;
+}
+
+static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r = dm_sm_new_block(smc->real_sm, b);
+
+       if (!r) {
+               BUG_ON(*b >= smc->old_counts.nr);
+               BUG_ON(smc->old_counts.counts[*b] != 0);
+               BUG_ON(*b >= smc->counts.nr);
+               BUG_ON(smc->counts.counts[*b] != 0);
+               ca_set_count(&smc->counts, *b, 1);
+       }
+
+       return r;
+}
+
+static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r = dm_sm_inc_block(smc->real_sm, b);
+       int r2 = ca_inc_block(&smc->counts, b);
+       BUG_ON(r != r2);
+       return r;
+}
+
+static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r = dm_sm_dec_block(smc->real_sm, b);
+       int r2 = ca_dec_block(&smc->counts, b);
+       BUG_ON(r != r2);
+       return r;
+}
+
+static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       uint32_t result2 = 0;
+       int r = dm_sm_get_count(smc->real_sm, b, result);
+       int r2 = ca_get_count(&smc->counts, b, &result2);
+
+       BUG_ON(r != r2);
+       if (!r)
+               BUG_ON(*result != result2);
+       return r;
+}
+
+static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int result2 = 0;
+       int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result);
+       int r2 = ca_count_more_than_one(&smc->counts, b, &result2);
+
+       BUG_ON(r != r2);
+       if (!r)
+               BUG_ON(!(*result) && result2);
+       return r;
+}
+
+static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       uint32_t old_rc;
+       int r = dm_sm_set_count(smc->real_sm, b, count);
+       int r2;
+
+       BUG_ON(b >= smc->counts.nr);
+       old_rc = smc->counts.counts[b];
+       r2 = ca_set_count(&smc->counts, b, count);
+       BUG_ON(r != r2);
+
+       return r;
+}
+
+static int sm_checker_commit(struct dm_space_map *sm)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r;
+
+       r = dm_sm_commit(smc->real_sm);
+       if (r)
+               return r;
+
+       r = ca_commit(&smc->old_counts, &smc->counts);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       int r = dm_sm_extend(smc->real_sm, extra_blocks);
+       if (r)
+               return r;
+
+       return ca_extend(&smc->counts, extra_blocks);
+}
+
+static int sm_checker_root_size(struct dm_space_map *sm, size_t *result)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       return dm_sm_root_size(smc->real_sm, result);
+}
+
+static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len)
+{
+       struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+       return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_space_map ops_ = {
+       .destroy = sm_checker_destroy,
+       .get_nr_blocks = sm_checker_get_nr_blocks,
+       .get_nr_free = sm_checker_get_nr_free,
+       .inc_block = sm_checker_inc_block,
+       .dec_block = sm_checker_dec_block,
+       .new_block = sm_checker_new_block,
+       .get_count = sm_checker_get_count,
+       .count_is_more_than_one = sm_checker_count_more_than_one,
+       .set_count = sm_checker_set_count,
+       .commit = sm_checker_commit,
+       .extend = sm_checker_extend,
+       .root_size = sm_checker_root_size,
+       .copy_root = sm_checker_copy_root
+};
+
+struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+{
+       int r;
+       struct sm_checker *smc;
+
+       if (!sm)
+               return NULL;
+
+       smc = kmalloc(sizeof(*smc), GFP_KERNEL);
+       if (!smc)
+               return NULL;
+
+       memcpy(&smc->sm, &ops_, sizeof(smc->sm));
+       r = ca_create(&smc->old_counts, sm);
+       if (r) {
+               kfree(smc);
+               return NULL;
+       }
+
+       r = ca_create(&smc->counts, sm);
+       if (r) {
+               ca_destroy(&smc->old_counts);
+               kfree(smc);
+               return NULL;
+       }
+
+       smc->real_sm = sm;
+
+       r = ca_load(&smc->counts, sm);
+       if (r) {
+               ca_destroy(&smc->counts);
+               ca_destroy(&smc->old_counts);
+               kfree(smc);
+               return NULL;
+       }
+
+       r = ca_commit(&smc->old_counts, &smc->counts);
+       if (r) {
+               ca_destroy(&smc->counts);
+               ca_destroy(&smc->old_counts);
+               kfree(smc);
+               return NULL;
+       }
+
+       return &smc->sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create);
+
+struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
+{
+       int r;
+       struct sm_checker *smc;
+
+       if (!sm)
+               return NULL;
+
+       smc = kmalloc(sizeof(*smc), GFP_KERNEL);
+       if (!smc)
+               return NULL;
+
+       memcpy(&smc->sm, &ops_, sizeof(smc->sm));
+       r = ca_create(&smc->old_counts, sm);
+       if (r) {
+               kfree(smc);
+               return NULL;
+       }
+
+       r = ca_create(&smc->counts, sm);
+       if (r) {
+               ca_destroy(&smc->old_counts);
+               kfree(smc);
+               return NULL;
+       }
+
+       smc->real_sm = sm;
+       return &smc->sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
+
+/*----------------------------------------------------------------*/
+
+#else
+
+struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+{
+       return sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create);
+
+struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
+{
+       return sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
+
+/*----------------------------------------------------------------*/
+
+#endif
diff --git a/drivers/md/persistent-data/dm-space-map-checker.h b/drivers/md/persistent-data/dm-space-map-checker.h
new file mode 100644 (file)
index 0000000..444dccf
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef SNAPSHOTS_SPACE_MAP_CHECKER_H
+#define SNAPSHOTS_SPACE_MAP_CHECKER_H
+
+#include "dm-space-map.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * This space map wraps a real on-disk space map, and verifies all of its
+ * operations.  It uses a lot of memory, so only use if you have a specific
+ * problem that you're debugging.
+ *
+ * Ownership of @sm passes.
+ */
+struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm);
+struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm);
+
+/*----------------------------------------------------------------*/
+
+#endif
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
new file mode 100644 (file)
index 0000000..df2494c
--- /dev/null
@@ -0,0 +1,705 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map-common.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/bitops.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "space map common"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Index validator.
+ */
+#define INDEX_CSUM_XOR 160478
+
+static void index_prepare_for_write(struct dm_block_validator *v,
+                                   struct dm_block *b,
+                                   size_t block_size)
+{
+       struct disk_metadata_index *mi_le = dm_block_data(b);
+
+       mi_le->blocknr = cpu_to_le64(dm_block_location(b));
+       mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
+                                                block_size - sizeof(__le32),
+                                                INDEX_CSUM_XOR));
+}
+
+static int index_check(struct dm_block_validator *v,
+                      struct dm_block *b,
+                      size_t block_size)
+{
+       struct disk_metadata_index *mi_le = dm_block_data(b);
+       __le32 csum_disk;
+
+       if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
+               DMERR("index_check failed blocknr %llu wanted %llu",
+                     le64_to_cpu(mi_le->blocknr), dm_block_location(b));
+               return -ENOTBLK;
+       }
+
+       csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
+                                              block_size - sizeof(__le32),
+                                              INDEX_CSUM_XOR));
+       if (csum_disk != mi_le->csum) {
+               DMERR("index_check failed csum %u wanted %u",
+                     le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
+               return -EILSEQ;
+       }
+
+       return 0;
+}
+
+static struct dm_block_validator index_validator = {
+       .name = "index",
+       .prepare_for_write = index_prepare_for_write,
+       .check = index_check
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Bitmap validator
+ */
+#define BITMAP_CSUM_XOR 240779
+
+static void bitmap_prepare_for_write(struct dm_block_validator *v,
+                                    struct dm_block *b,
+                                    size_t block_size)
+{
+       struct disk_bitmap_header *disk_header = dm_block_data(b);
+
+       disk_header->blocknr = cpu_to_le64(dm_block_location(b));
+       disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
+                                                      block_size - sizeof(__le32),
+                                                      BITMAP_CSUM_XOR));
+}
+
+static int bitmap_check(struct dm_block_validator *v,
+                       struct dm_block *b,
+                       size_t block_size)
+{
+       struct disk_bitmap_header *disk_header = dm_block_data(b);
+       __le32 csum_disk;
+
+       if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
+               DMERR("bitmap check failed blocknr %llu wanted %llu",
+                     le64_to_cpu(disk_header->blocknr), dm_block_location(b));
+               return -ENOTBLK;
+       }
+
+       csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
+                                              block_size - sizeof(__le32),
+                                              BITMAP_CSUM_XOR));
+       if (csum_disk != disk_header->csum) {
+               DMERR("bitmap check failed csum %u wanted %u",
+                     le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
+               return -EILSEQ;
+       }
+
+       return 0;
+}
+
+static struct dm_block_validator dm_sm_bitmap_validator = {
+       .name = "sm_bitmap",
+       .prepare_for_write = bitmap_prepare_for_write,
+       .check = bitmap_check
+};
+
+/*----------------------------------------------------------------*/
+
+#define ENTRIES_PER_WORD 32
+#define ENTRIES_SHIFT  5
+
+static void *dm_bitmap_data(struct dm_block *b)
+{
+       return dm_block_data(b) + sizeof(struct disk_bitmap_header);
+}
+
+#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
+
+static unsigned bitmap_word_used(void *addr, unsigned b)
+{
+       __le64 *words_le = addr;
+       __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+
+       uint64_t bits = le64_to_cpu(*w_le);
+       uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
+
+       return !(~bits & mask);
+}
+
+static unsigned sm_lookup_bitmap(void *addr, unsigned b)
+{
+       __le64 *words_le = addr;
+       __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+       unsigned hi, lo;
+
+       b = (b & (ENTRIES_PER_WORD - 1)) << 1;
+       hi = !!test_bit_le(b, (void *) w_le);
+       lo = !!test_bit_le(b + 1, (void *) w_le);
+       return (hi << 1) | lo;
+}
+
+static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
+{
+       __le64 *words_le = addr;
+       __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+
+       b = (b & (ENTRIES_PER_WORD - 1)) << 1;
+
+       if (val & 2)
+               __set_bit_le(b, (void *) w_le);
+       else
+               __clear_bit_le(b, (void *) w_le);
+
+       if (val & 1)
+               __set_bit_le(b + 1, (void *) w_le);
+       else
+               __clear_bit_le(b + 1, (void *) w_le);
+}
+
+static int sm_find_free(void *addr, unsigned begin, unsigned end,
+                       unsigned *result)
+{
+       while (begin < end) {
+               if (!(begin & (ENTRIES_PER_WORD - 1)) &&
+                   bitmap_word_used(addr, begin)) {
+                       begin += ENTRIES_PER_WORD;
+                       continue;
+               }
+
+               if (!sm_lookup_bitmap(addr, begin)) {
+                       *result = begin;
+                       return 0;
+               }
+
+               begin++;
+       }
+
+       return -ENOSPC;
+}
+
+/*----------------------------------------------------------------*/
+
+static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
+{
+       ll->tm = tm;
+
+       ll->bitmap_info.tm = tm;
+       ll->bitmap_info.levels = 1;
+
+       /*
+        * Because the new bitmap blocks are created via a shadow
+        * operation, the old entry has already had its reference count
+        * decremented and we don't need the btree to do any bookkeeping.
+        */
+       ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
+       ll->bitmap_info.value_type.inc = NULL;
+       ll->bitmap_info.value_type.dec = NULL;
+       ll->bitmap_info.value_type.equal = NULL;
+
+       ll->ref_count_info.tm = tm;
+       ll->ref_count_info.levels = 1;
+       ll->ref_count_info.value_type.size = sizeof(uint32_t);
+       ll->ref_count_info.value_type.inc = NULL;
+       ll->ref_count_info.value_type.dec = NULL;
+       ll->ref_count_info.value_type.equal = NULL;
+
+       ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
+
+       if (ll->block_size > (1 << 30)) {
+               DMERR("block size too big to hold bitmaps");
+               return -EINVAL;
+       }
+
+       ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
+               ENTRIES_PER_BYTE;
+       ll->nr_blocks = 0;
+       ll->bitmap_root = 0;
+       ll->ref_count_root = 0;
+
+       return 0;
+}
+
+int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
+{
+       int r;
+       dm_block_t i, nr_blocks, nr_indexes;
+       unsigned old_blocks, blocks;
+
+       nr_blocks = ll->nr_blocks + extra_blocks;
+       old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
+       blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
+
+       nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
+       if (nr_indexes > ll->max_entries(ll)) {
+               DMERR("space map too large");
+               return -EINVAL;
+       }
+
+       for (i = old_blocks; i < blocks; i++) {
+               struct dm_block *b;
+               struct disk_index_entry idx;
+
+               r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
+               if (r < 0)
+                       return r;
+               idx.blocknr = cpu_to_le64(dm_block_location(b));
+
+               r = dm_tm_unlock(ll->tm, b);
+               if (r < 0)
+                       return r;
+
+               idx.nr_free = cpu_to_le32(ll->entries_per_block);
+               idx.none_free_before = 0;
+
+               r = ll->save_ie(ll, i, &idx);
+               if (r < 0)
+                       return r;
+       }
+
+       ll->nr_blocks = nr_blocks;
+       return 0;
+}
+
+int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+{
+       int r;
+       dm_block_t index = b;
+       struct disk_index_entry ie_disk;
+       struct dm_block *blk;
+
+       b = do_div(index, ll->entries_per_block);
+       r = ll->load_ie(ll, index, &ie_disk);
+       if (r < 0)
+               return r;
+
+       r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
+                           &dm_sm_bitmap_validator, &blk);
+       if (r < 0)
+               return r;
+
+       *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
+
+       return dm_tm_unlock(ll->tm, blk);
+}
+
+int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+{
+       __le32 le_rc;
+       int r = sm_ll_lookup_bitmap(ll, b, result);
+
+       if (r)
+               return r;
+
+       if (*result != 3)
+               return r;
+
+       r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
+       if (r < 0)
+               return r;
+
+       *result = le32_to_cpu(le_rc);
+
+       return r;
+}
+
+int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
+                         dm_block_t end, dm_block_t *result)
+{
+       int r;
+       struct disk_index_entry ie_disk;
+       dm_block_t i, index_begin = begin;
+       dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
+
+       /*
+        * FIXME: Use shifts
+        */
+       begin = do_div(index_begin, ll->entries_per_block);
+       end = do_div(end, ll->entries_per_block);
+
+       for (i = index_begin; i < index_end; i++, begin = 0) {
+               struct dm_block *blk;
+               unsigned position;
+               uint32_t bit_end;
+
+               r = ll->load_ie(ll, i, &ie_disk);
+               if (r < 0)
+                       return r;
+
+               if (le32_to_cpu(ie_disk.nr_free) == 0)
+                       continue;
+
+               r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
+                                   &dm_sm_bitmap_validator, &blk);
+               if (r < 0)
+                       return r;
+
+               bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
+
+               r = sm_find_free(dm_bitmap_data(blk),
+                                max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
+                                bit_end, &position);
+               if (r == -ENOSPC) {
+                       /*
+                        * This might happen because we started searching
+                        * part way through the bitmap.
+                        */
+                       dm_tm_unlock(ll->tm, blk);
+                       continue;
+
+               } else if (r < 0) {
+                       dm_tm_unlock(ll->tm, blk);
+                       return r;
+               }
+
+               r = dm_tm_unlock(ll->tm, blk);
+               if (r < 0)
+                       return r;
+
+               *result = i * ll->entries_per_block + (dm_block_t) position;
+               return 0;
+       }
+
+       return -ENOSPC;
+}
+
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+                uint32_t ref_count, enum allocation_event *ev)
+{
+       int r;
+       uint32_t bit, old;
+       struct dm_block *nb;
+       dm_block_t index = b;
+       struct disk_index_entry ie_disk;
+       void *bm_le;
+       int inc;
+
+       bit = do_div(index, ll->entries_per_block);
+       r = ll->load_ie(ll, index, &ie_disk);
+       if (r < 0)
+               return r;
+
+       r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
+                              &dm_sm_bitmap_validator, &nb, &inc);
+       if (r < 0) {
+               DMERR("dm_tm_shadow_block() failed");
+               return r;
+       }
+       ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
+
+       bm_le = dm_bitmap_data(nb);
+       old = sm_lookup_bitmap(bm_le, bit);
+
+       if (ref_count <= 2) {
+               sm_set_bitmap(bm_le, bit, ref_count);
+
+               r = dm_tm_unlock(ll->tm, nb);
+               if (r < 0)
+                       return r;
+
+#if 0
+               /* FIXME: dm_btree_remove doesn't handle this yet */
+               if (old > 2) {
+                       r = dm_btree_remove(&ll->ref_count_info,
+                                           ll->ref_count_root,
+                                           &b, &ll->ref_count_root);
+                       if (r)
+                               return r;
+               }
+#endif
+
+       } else {
+               __le32 le_rc = cpu_to_le32(ref_count);
+
+               sm_set_bitmap(bm_le, bit, 3);
+               r = dm_tm_unlock(ll->tm, nb);
+               if (r < 0)
+                       return r;
+
+               __dm_bless_for_disk(&le_rc);
+               r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
+                                   &b, &le_rc, &ll->ref_count_root);
+               if (r < 0) {
+                       DMERR("ref count insert failed");
+                       return r;
+               }
+       }
+
+       if (ref_count && !old) {
+               *ev = SM_ALLOC;
+               ll->nr_allocated++;
+               ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) - 1);
+               if (le32_to_cpu(ie_disk.none_free_before) == bit)
+                       ie_disk.none_free_before = cpu_to_le32(bit + 1);
+
+       } else if (old && !ref_count) {
+               *ev = SM_FREE;
+               ll->nr_allocated--;
+               ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) + 1);
+               ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
+       }
+
+       return ll->save_ie(ll, index, &ie_disk);
+}
+
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+{
+       int r;
+       uint32_t rc;
+
+       r = sm_ll_lookup(ll, b, &rc);
+       if (r)
+               return r;
+
+       return sm_ll_insert(ll, b, rc + 1, ev);
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+{
+       int r;
+       uint32_t rc;
+
+       r = sm_ll_lookup(ll, b, &rc);
+       if (r)
+               return r;
+
+       if (!rc)
+               return -EINVAL;
+
+       return sm_ll_insert(ll, b, rc - 1, ev);
+}
+
+int sm_ll_commit(struct ll_disk *ll)
+{
+       return ll->commit(ll);
+}
+
+/*----------------------------------------------------------------*/
+
+static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
+                              struct disk_index_entry *ie)
+{
+       memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
+       return 0;
+}
+
+static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
+                              struct disk_index_entry *ie)
+{
+       memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
+       return 0;
+}
+
+static int metadata_ll_init_index(struct ll_disk *ll)
+{
+       int r;
+       struct dm_block *b;
+
+       r = dm_tm_new_block(ll->tm, &index_validator, &b);
+       if (r < 0)
+               return r;
+
+       memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
+       ll->bitmap_root = dm_block_location(b);
+
+       return dm_tm_unlock(ll->tm, b);
+}
+
+static int metadata_ll_open(struct ll_disk *ll)
+{
+       int r;
+       struct dm_block *block;
+
+       r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
+                           &index_validator, &block);
+       if (r)
+               return r;
+
+       memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
+       return dm_tm_unlock(ll->tm, block);
+}
+
+static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
+{
+       return MAX_METADATA_BITMAPS;
+}
+
+static int metadata_ll_commit(struct ll_disk *ll)
+{
+       int r, inc;
+       struct dm_block *b;
+
+       r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
+       if (r)
+               return r;
+
+       memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
+       ll->bitmap_root = dm_block_location(b);
+
+       return dm_tm_unlock(ll->tm, b);
+}
+
+int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
+{
+       int r;
+
+       r = sm_ll_init(ll, tm);
+       if (r < 0)
+               return r;
+
+       ll->load_ie = metadata_ll_load_ie;
+       ll->save_ie = metadata_ll_save_ie;
+       ll->init_index = metadata_ll_init_index;
+       ll->open_index = metadata_ll_open;
+       ll->max_entries = metadata_ll_max_entries;
+       ll->commit = metadata_ll_commit;
+
+       ll->nr_blocks = 0;
+       ll->nr_allocated = 0;
+
+       r = ll->init_index(ll);
+       if (r < 0)
+               return r;
+
+       r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
+       if (r < 0)
+               return r;
+
+       return 0;
+}
+
+int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
+                       void *root_le, size_t len)
+{
+       int r;
+       struct disk_sm_root *smr = root_le;
+
+       if (len < sizeof(struct disk_sm_root)) {
+               DMERR("sm_metadata root too small");
+               return -ENOMEM;
+       }
+
+       r = sm_ll_init(ll, tm);
+       if (r < 0)
+               return r;
+
+       ll->load_ie = metadata_ll_load_ie;
+       ll->save_ie = metadata_ll_save_ie;
+       ll->init_index = metadata_ll_init_index;
+       ll->open_index = metadata_ll_open;
+       ll->max_entries = metadata_ll_max_entries;
+       ll->commit = metadata_ll_commit;
+
+       ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
+       ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
+       ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
+       ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
+
+       return ll->open_index(ll);
+}
+
+/*----------------------------------------------------------------*/
+
+static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
+                          struct disk_index_entry *ie)
+{
+       return dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
+}
+
+static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
+                          struct disk_index_entry *ie)
+{
+       __dm_bless_for_disk(ie);
+       return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
+                              &index, ie, &ll->bitmap_root);
+}
+
+static int disk_ll_init_index(struct ll_disk *ll)
+{
+       return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
+}
+
+static int disk_ll_open(struct ll_disk *ll)
+{
+       /* nothing to do */
+       return 0;
+}
+
+static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
+{
+       return -1ULL;
+}
+
+static int disk_ll_commit(struct ll_disk *ll)
+{
+       return 0;
+}
+
+int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
+{
+       int r;
+
+       r = sm_ll_init(ll, tm);
+       if (r < 0)
+               return r;
+
+       ll->load_ie = disk_ll_load_ie;
+       ll->save_ie = disk_ll_save_ie;
+       ll->init_index = disk_ll_init_index;
+       ll->open_index = disk_ll_open;
+       ll->max_entries = disk_ll_max_entries;
+       ll->commit = disk_ll_commit;
+
+       ll->nr_blocks = 0;
+       ll->nr_allocated = 0;
+
+       r = ll->init_index(ll);
+       if (r < 0)
+               return r;
+
+       r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
+       if (r < 0)
+               return r;
+
+       return 0;
+}
+
+int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
+                   void *root_le, size_t len)
+{
+       int r;
+       struct disk_sm_root *smr = root_le;
+
+       if (len < sizeof(struct disk_sm_root)) {
+               DMERR("sm_metadata root too small");
+               return -ENOMEM;
+       }
+
+       r = sm_ll_init(ll, tm);
+       if (r < 0)
+               return r;
+
+       ll->load_ie = disk_ll_load_ie;
+       ll->save_ie = disk_ll_save_ie;
+       ll->init_index = disk_ll_init_index;
+       ll->open_index = disk_ll_open;
+       ll->max_entries = disk_ll_max_entries;
+       ll->commit = disk_ll_commit;
+
+       ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
+       ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
+       ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
+       ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
+
+       return ll->open_index(ll);
+}
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
new file mode 100644 (file)
index 0000000..8f22082
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_SPACE_MAP_COMMON_H
+#define DM_SPACE_MAP_COMMON_H
+
+#include "dm-btree.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Low level disk format
+ *
+ * Bitmap btree
+ * ------------
+ *
+ * Each value stored in the btree is an index_entry.  This points to a
+ * block that is used as a bitmap.  Within the bitmap hold 2 bits per
+ * entry, which represent UNUSED = 0, REF_COUNT = 1, REF_COUNT = 2 and
+ * REF_COUNT = many.
+ *
+ * Refcount btree
+ * --------------
+ *
+ * Any entry that has a ref count higher than 2 gets entered in the ref
+ * count tree.  The leaf values for this tree is the 32-bit ref count.
+ */
+
+struct disk_index_entry {
+       __le64 blocknr;
+       __le32 nr_free;
+       __le32 none_free_before;
+} __packed;
+
+
+#define MAX_METADATA_BITMAPS 255
+struct disk_metadata_index {
+       __le32 csum;
+       __le32 padding;
+       __le64 blocknr;
+
+       struct disk_index_entry index[MAX_METADATA_BITMAPS];
+} __packed;
+
+struct ll_disk;
+
+typedef int (*load_ie_fn)(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *result);
+typedef int (*save_ie_fn)(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *ie);
+typedef int (*init_index_fn)(struct ll_disk *ll);
+typedef int (*open_index_fn)(struct ll_disk *ll);
+typedef dm_block_t (*max_index_entries_fn)(struct ll_disk *ll);
+typedef int (*commit_fn)(struct ll_disk *ll);
+
+struct ll_disk {
+       struct dm_transaction_manager *tm;
+       struct dm_btree_info bitmap_info;
+       struct dm_btree_info ref_count_info;
+
+       uint32_t block_size;
+       uint32_t entries_per_block;
+       dm_block_t nr_blocks;
+       dm_block_t nr_allocated;
+
+       /*
+        * bitmap_root may be a btree root or a simple index.
+        */
+       dm_block_t bitmap_root;
+
+       dm_block_t ref_count_root;
+
+       struct disk_metadata_index mi_le;
+       load_ie_fn load_ie;
+       save_ie_fn save_ie;
+       init_index_fn init_index;
+       open_index_fn open_index;
+       max_index_entries_fn max_entries;
+       commit_fn commit;
+};
+
+struct disk_sm_root {
+       __le64 nr_blocks;
+       __le64 nr_allocated;
+       __le64 bitmap_root;
+       __le64 ref_count_root;
+} __packed;
+
+#define ENTRIES_PER_BYTE 4
+
+struct disk_bitmap_header {
+       __le32 csum;
+       __le32 not_used;
+       __le64 blocknr;
+} __packed;
+
+enum allocation_event {
+       SM_NONE,
+       SM_ALLOC,
+       SM_FREE,
+};
+
+/*----------------------------------------------------------------*/
+
+int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks);
+int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result);
+int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result);
+int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
+                         dm_block_t end, dm_block_t *result);
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
+int sm_ll_commit(struct ll_disk *ll);
+
+int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
+int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
+                       void *root_le, size_t len);
+
+int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm);
+int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
+                   void *root_le, size_t len);
+
+/*----------------------------------------------------------------*/
+
+#endif /* DM_SPACE_MAP_COMMON_H */
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
new file mode 100644 (file)
index 0000000..aeff785
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map-checker.h"
+#include "dm-space-map-common.h"
+#include "dm-space-map-disk.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "space map disk"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Space map interface.
+ */
+struct sm_disk {
+       struct dm_space_map sm;
+
+       struct ll_disk ll;
+       struct ll_disk old_ll;
+
+       dm_block_t begin;
+       dm_block_t nr_allocated_this_transaction;
+};
+
+static void sm_disk_destroy(struct dm_space_map *sm)
+{
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       kfree(smd);
+}
+
+static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       return sm_ll_extend(&smd->ll, extra_blocks);
+}
+
+static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+       *count = smd->old_ll.nr_blocks;
+
+       return 0;
+}
+
+static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+       *count = (smd->old_ll.nr_blocks - smd->old_ll.nr_allocated) - smd->nr_allocated_this_transaction;
+
+       return 0;
+}
+
+static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b,
+                            uint32_t *result)
+{
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+       return sm_ll_lookup(&smd->ll, b, result);
+}
+
+static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
+                                         int *result)
+{
+       int r;
+       uint32_t count;
+
+       r = sm_disk_get_count(sm, b, &count);
+       if (r)
+               return r;
+
+       return count > 1;
+}
+
+static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
+                            uint32_t count)
+{
+       int r;
+       uint32_t old_count;
+       enum allocation_event ev;
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       r = sm_ll_insert(&smd->ll, b, count, &ev);
+       if (!r) {
+               switch (ev) {
+               case SM_NONE:
+                       break;
+
+               case SM_ALLOC:
+                       /*
+                        * This _must_ be free in the prior transaction
+                        * otherwise we've lost atomicity.
+                        */
+                       smd->nr_allocated_this_transaction++;
+                       break;
+
+               case SM_FREE:
+                       /*
+                        * It's only free if it's also free in the last
+                        * transaction.
+                        */
+                       r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+                       if (r)
+                               return r;
+
+                       if (!old_count)
+                               smd->nr_allocated_this_transaction--;
+                       break;
+               }
+       }
+
+       return r;
+}
+
+static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+       int r;
+       enum allocation_event ev;
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       r = sm_ll_inc(&smd->ll, b, &ev);
+       if (!r && (ev == SM_ALLOC))
+               /*
+                * This _must_ be free in the prior transaction
+                * otherwise we've lost atomicity.
+                */
+               smd->nr_allocated_this_transaction++;
+
+       return r;
+}
+
+static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+       int r;
+       uint32_t old_count;
+       enum allocation_event ev;
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       r = sm_ll_dec(&smd->ll, b, &ev);
+       if (!r && (ev == SM_FREE)) {
+               /*
+                * It's only free if it's also free in the last
+                * transaction.
+                */
+               r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+               if (r)
+                       return r;
+
+               if (!old_count)
+                       smd->nr_allocated_this_transaction--;
+       }
+
+       return r;
+}
+
+static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+       int r;
+       enum allocation_event ev;
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       /* FIXME: we should loop round a couple of times */
+       r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b);
+       if (r)
+               return r;
+
+       smd->begin = *b + 1;
+       r = sm_ll_inc(&smd->ll, *b, &ev);
+       if (!r) {
+               BUG_ON(ev != SM_ALLOC);
+               smd->nr_allocated_this_transaction++;
+       }
+
+       return r;
+}
+
+static int sm_disk_commit(struct dm_space_map *sm)
+{
+       int r;
+       dm_block_t nr_free;
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+       r = sm_disk_get_nr_free(sm, &nr_free);
+       if (r)
+               return r;
+
+       r = sm_ll_commit(&smd->ll);
+       if (r)
+               return r;
+
+       memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
+       smd->begin = 0;
+       smd->nr_allocated_this_transaction = 0;
+
+       r = sm_disk_get_nr_free(sm, &nr_free);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+static int sm_disk_root_size(struct dm_space_map *sm, size_t *result)
+{
+       *result = sizeof(struct disk_sm_root);
+
+       return 0;
+}
+
+static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
+{
+       struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+       struct disk_sm_root root_le;
+
+       root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks);
+       root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated);
+       root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root);
+       root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root);
+
+       if (max < sizeof(root_le))
+               return -ENOSPC;
+
+       memcpy(where_le, &root_le, sizeof(root_le));
+
+       return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_space_map ops = {
+       .destroy = sm_disk_destroy,
+       .extend = sm_disk_extend,
+       .get_nr_blocks = sm_disk_get_nr_blocks,
+       .get_nr_free = sm_disk_get_nr_free,
+       .get_count = sm_disk_get_count,
+       .count_is_more_than_one = sm_disk_count_is_more_than_one,
+       .set_count = sm_disk_set_count,
+       .inc_block = sm_disk_inc_block,
+       .dec_block = sm_disk_dec_block,
+       .new_block = sm_disk_new_block,
+       .commit = sm_disk_commit,
+       .root_size = sm_disk_root_size,
+       .copy_root = sm_disk_copy_root
+};
+
+static struct dm_space_map *dm_sm_disk_create_real(
+       struct dm_transaction_manager *tm,
+       dm_block_t nr_blocks)
+{
+       int r;
+       struct sm_disk *smd;
+
+       smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+       if (!smd)
+               return ERR_PTR(-ENOMEM);
+
+       smd->begin = 0;
+       smd->nr_allocated_this_transaction = 0;
+       memcpy(&smd->sm, &ops, sizeof(smd->sm));
+
+       r = sm_ll_new_disk(&smd->ll, tm);
+       if (r)
+               goto bad;
+
+       r = sm_ll_extend(&smd->ll, nr_blocks);
+       if (r)
+               goto bad;
+
+       r = sm_disk_commit(&smd->sm);
+       if (r)
+               goto bad;
+
+       return &smd->sm;
+
+bad:
+       kfree(smd);
+       return ERR_PTR(r);
+}
+
+struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+                                      dm_block_t nr_blocks)
+{
+       struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
+       return dm_sm_checker_create_fresh(sm);
+}
+EXPORT_SYMBOL_GPL(dm_sm_disk_create);
+
+static struct dm_space_map *dm_sm_disk_open_real(
+       struct dm_transaction_manager *tm,
+       void *root_le, size_t len)
+{
+       int r;
+       struct sm_disk *smd;
+
+       smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+       if (!smd)
+               return ERR_PTR(-ENOMEM);
+
+       smd->begin = 0;
+       smd->nr_allocated_this_transaction = 0;
+       memcpy(&smd->sm, &ops, sizeof(smd->sm));
+
+       r = sm_ll_open_disk(&smd->ll, tm, root_le, len);
+       if (r)
+               goto bad;
+
+       r = sm_disk_commit(&smd->sm);
+       if (r)
+               goto bad;
+
+       return &smd->sm;
+
+bad:
+       kfree(smd);
+       return ERR_PTR(r);
+}
+
+struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
+                                    void *root_le, size_t len)
+{
+       return dm_sm_checker_create(
+               dm_sm_disk_open_real(tm, root_le, len));
+}
+EXPORT_SYMBOL_GPL(dm_sm_disk_open);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-space-map-disk.h b/drivers/md/persistent-data/dm-space-map-disk.h
new file mode 100644 (file)
index 0000000..447a0a9
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _LINUX_DM_SPACE_MAP_DISK_H
+#define _LINUX_DM_SPACE_MAP_DISK_H
+
+#include "dm-block-manager.h"
+
+struct dm_space_map;
+struct dm_transaction_manager;
+
+/*
+ * Unfortunately we have to use two-phase construction due to the cycle
+ * between the tm and sm.
+ */
+struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+                                      dm_block_t nr_blocks);
+
+struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
+                                    void *root, size_t len);
+
+#endif /* _LINUX_DM_SPACE_MAP_DISK_H */
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
new file mode 100644 (file)
index 0000000..e89ae5e
--- /dev/null
@@ -0,0 +1,596 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map.h"
+#include "dm-space-map-common.h"
+#include "dm-space-map-metadata.h"
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "space map metadata"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Space map interface.
+ *
+ * The low level disk format is written using the standard btree and
+ * transaction manager.  This means that performing disk operations may
+ * cause us to recurse into the space map in order to allocate new blocks.
+ * For this reason we have a pool of pre-allocated blocks large enough to
+ * service any metadata_ll_disk operation.
+ */
+
+/*
+ * FIXME: we should calculate this based on the size of the device.
+ * Only the metadata space map needs this functionality.
+ */
+#define MAX_RECURSIVE_ALLOCATIONS 1024
+
+enum block_op_type {
+       BOP_INC,
+       BOP_DEC
+};
+
+struct block_op {
+       enum block_op_type type;
+       dm_block_t block;
+};
+
+struct sm_metadata {
+       struct dm_space_map sm;
+
+       struct ll_disk ll;
+       struct ll_disk old_ll;
+
+       dm_block_t begin;
+
+       unsigned recursion_count;
+       unsigned allocated_this_transaction;
+       unsigned nr_uncommitted;
+       struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
+};
+
+static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
+{
+       struct block_op *op;
+
+       if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) {
+               DMERR("too many recursive allocations");
+               return -ENOMEM;
+       }
+
+       op = smm->uncommitted + smm->nr_uncommitted++;
+       op->type = type;
+       op->block = b;
+
+       return 0;
+}
+
+static int commit_bop(struct sm_metadata *smm, struct block_op *op)
+{
+       int r = 0;
+       enum allocation_event ev;
+
+       switch (op->type) {
+       case BOP_INC:
+               r = sm_ll_inc(&smm->ll, op->block, &ev);
+               break;
+
+       case BOP_DEC:
+               r = sm_ll_dec(&smm->ll, op->block, &ev);
+               break;
+       }
+
+       return r;
+}
+
+static void in(struct sm_metadata *smm)
+{
+       smm->recursion_count++;
+}
+
+static int out(struct sm_metadata *smm)
+{
+       int r = 0;
+
+       /*
+        * If we're not recursing then very bad things are happening.
+        */
+       if (!smm->recursion_count) {
+               DMERR("lost track of recursion depth");
+               return -ENOMEM;
+       }
+
+       if (smm->recursion_count == 1 && smm->nr_uncommitted) {
+               while (smm->nr_uncommitted && !r) {
+                       smm->nr_uncommitted--;
+                       r = commit_bop(smm, smm->uncommitted +
+                                      smm->nr_uncommitted);
+                       if (r)
+                               break;
+               }
+       }
+
+       smm->recursion_count--;
+
+       return r;
+}
+
+/*
+ * When using the out() function above, we often want to combine an error
+ * code for the operation run in the recursive context with that from
+ * out().
+ */
+static int combine_errors(int r1, int r2)
+{
+       return r1 ? r1 : r2;
+}
+
+static int recursing(struct sm_metadata *smm)
+{
+       return smm->recursion_count;
+}
+
+static void sm_metadata_destroy(struct dm_space_map *sm)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       kfree(smm);
+}
+
+static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+       DMERR("doesn't support extend");
+       return -EINVAL;
+}
+
+static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       *count = smm->ll.nr_blocks;
+
+       return 0;
+}
+
+static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
+                smm->allocated_this_transaction;
+
+       return 0;
+}
+
+static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
+                                uint32_t *result)
+{
+       int r, i;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+       unsigned adjustment = 0;
+
+       /*
+        * We may have some uncommitted adjustments to add.  This list
+        * should always be really short.
+        */
+       for (i = 0; i < smm->nr_uncommitted; i++) {
+               struct block_op *op = smm->uncommitted + i;
+
+               if (op->block != b)
+                       continue;
+
+               switch (op->type) {
+               case BOP_INC:
+                       adjustment++;
+                       break;
+
+               case BOP_DEC:
+                       adjustment--;
+                       break;
+               }
+       }
+
+       r = sm_ll_lookup(&smm->ll, b, result);
+       if (r)
+               return r;
+
+       *result += adjustment;
+
+       return 0;
+}
+
+static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
+                                             dm_block_t b, int *result)
+{
+       int r, i, adjustment = 0;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+       uint32_t rc;
+
+       /*
+        * We may have some uncommitted adjustments to add.  This list
+        * should always be really short.
+        */
+       for (i = 0; i < smm->nr_uncommitted; i++) {
+               struct block_op *op = smm->uncommitted + i;
+
+               if (op->block != b)
+                       continue;
+
+               switch (op->type) {
+               case BOP_INC:
+                       adjustment++;
+                       break;
+
+               case BOP_DEC:
+                       adjustment--;
+                       break;
+               }
+       }
+
+       if (adjustment > 1) {
+               *result = 1;
+               return 0;
+       }
+
+       r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
+       if (r)
+               return r;
+
+       if (rc == 3)
+               /*
+                * We err on the side of caution, and always return true.
+                */
+               *result = 1;
+       else
+               *result = rc + adjustment > 1;
+
+       return 0;
+}
+
+static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
+                                uint32_t count)
+{
+       int r, r2;
+       enum allocation_event ev;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       if (smm->recursion_count) {
+               DMERR("cannot recurse set_count()");
+               return -EINVAL;
+       }
+
+       in(smm);
+       r = sm_ll_insert(&smm->ll, b, count, &ev);
+       r2 = out(smm);
+
+       return combine_errors(r, r2);
+}
+
+static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+       int r, r2 = 0;
+       enum allocation_event ev;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       if (recursing(smm))
+               r = add_bop(smm, BOP_INC, b);
+       else {
+               in(smm);
+               r = sm_ll_inc(&smm->ll, b, &ev);
+               r2 = out(smm);
+       }
+
+       return combine_errors(r, r2);
+}
+
+static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+       int r, r2 = 0;
+       enum allocation_event ev;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       if (recursing(smm))
+               r = add_bop(smm, BOP_DEC, b);
+       else {
+               in(smm);
+               r = sm_ll_dec(&smm->ll, b, &ev);
+               r2 = out(smm);
+       }
+
+       return combine_errors(r, r2);
+}
+
+static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
+{
+       int r, r2 = 0;
+       enum allocation_event ev;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b);
+       if (r)
+               return r;
+
+       smm->begin = *b + 1;
+
+       if (recursing(smm))
+               r = add_bop(smm, BOP_INC, *b);
+       else {
+               in(smm);
+               r = sm_ll_inc(&smm->ll, *b, &ev);
+               r2 = out(smm);
+       }
+
+       if (!r)
+               smm->allocated_this_transaction++;
+
+       return combine_errors(r, r2);
+}
+
+static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+       int r = sm_metadata_new_block_(sm, b);
+       if (r)
+               DMERR("out of metadata space");
+       return r;
+}
+
+static int sm_metadata_commit(struct dm_space_map *sm)
+{
+       int r;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       r = sm_ll_commit(&smm->ll);
+       if (r)
+               return r;
+
+       memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
+       smm->begin = 0;
+       smm->allocated_this_transaction = 0;
+
+       return 0;
+}
+
+static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
+{
+       *result = sizeof(struct disk_sm_root);
+
+       return 0;
+}
+
+static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+       struct disk_sm_root root_le;
+
+       root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
+       root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
+       root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
+       root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
+
+       if (max < sizeof(root_le))
+               return -ENOSPC;
+
+       memcpy(where_le, &root_le, sizeof(root_le));
+
+       return 0;
+}
+
+static struct dm_space_map ops = {
+       .destroy = sm_metadata_destroy,
+       .extend = sm_metadata_extend,
+       .get_nr_blocks = sm_metadata_get_nr_blocks,
+       .get_nr_free = sm_metadata_get_nr_free,
+       .get_count = sm_metadata_get_count,
+       .count_is_more_than_one = sm_metadata_count_is_more_than_one,
+       .set_count = sm_metadata_set_count,
+       .inc_block = sm_metadata_inc_block,
+       .dec_block = sm_metadata_dec_block,
+       .new_block = sm_metadata_new_block,
+       .commit = sm_metadata_commit,
+       .root_size = sm_metadata_root_size,
+       .copy_root = sm_metadata_copy_root
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * When a new space map is created that manages its own space.  We use
+ * this tiny bootstrap allocator.
+ */
+static void sm_bootstrap_destroy(struct dm_space_map *sm)
+{
+}
+
+static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+       DMERR("boostrap doesn't support extend");
+
+       return -EINVAL;
+}
+
+static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       return smm->ll.nr_blocks;
+}
+
+static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       *count = smm->ll.nr_blocks - smm->begin;
+
+       return 0;
+}
+
+static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
+                                 uint32_t *result)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       return b < smm->begin ? 1 : 0;
+}
+
+static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
+                                              dm_block_t b, int *result)
+{
+       *result = 0;
+
+       return 0;
+}
+
+static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
+                                 uint32_t count)
+{
+       DMERR("boostrap doesn't support set_count");
+
+       return -EINVAL;
+}
+
+static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       /*
+        * We know the entire device is unused.
+        */
+       if (smm->begin == smm->ll.nr_blocks)
+               return -ENOSPC;
+
+       *b = smm->begin++;
+
+       return 0;
+}
+
+static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       return add_bop(smm, BOP_INC, b);
+}
+
+static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       return add_bop(smm, BOP_DEC, b);
+}
+
+static int sm_bootstrap_commit(struct dm_space_map *sm)
+{
+       return 0;
+}
+
+static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
+{
+       DMERR("boostrap doesn't support root_size");
+
+       return -EINVAL;
+}
+
+static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
+                                 size_t max)
+{
+       DMERR("boostrap doesn't support copy_root");
+
+       return -EINVAL;
+}
+
+static struct dm_space_map bootstrap_ops = {
+       .destroy = sm_bootstrap_destroy,
+       .extend = sm_bootstrap_extend,
+       .get_nr_blocks = sm_bootstrap_get_nr_blocks,
+       .get_nr_free = sm_bootstrap_get_nr_free,
+       .get_count = sm_bootstrap_get_count,
+       .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
+       .set_count = sm_bootstrap_set_count,
+       .inc_block = sm_bootstrap_inc_block,
+       .dec_block = sm_bootstrap_dec_block,
+       .new_block = sm_bootstrap_new_block,
+       .commit = sm_bootstrap_commit,
+       .root_size = sm_bootstrap_root_size,
+       .copy_root = sm_bootstrap_copy_root
+};
+
+/*----------------------------------------------------------------*/
+
+struct dm_space_map *dm_sm_metadata_init(void)
+{
+       struct sm_metadata *smm;
+
+       smm = kmalloc(sizeof(*smm), GFP_KERNEL);
+       if (!smm)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(&smm->sm, &ops, sizeof(smm->sm));
+
+       return &smm->sm;
+}
+
+int dm_sm_metadata_create(struct dm_space_map *sm,
+                         struct dm_transaction_manager *tm,
+                         dm_block_t nr_blocks,
+                         dm_block_t superblock)
+{
+       int r;
+       dm_block_t i;
+       enum allocation_event ev;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       smm->begin = superblock + 1;
+       smm->recursion_count = 0;
+       smm->allocated_this_transaction = 0;
+       smm->nr_uncommitted = 0;
+
+       memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
+
+       r = sm_ll_new_metadata(&smm->ll, tm);
+       if (r)
+               return r;
+
+       r = sm_ll_extend(&smm->ll, nr_blocks);
+       if (r)
+               return r;
+
+       memcpy(&smm->sm, &ops, sizeof(smm->sm));
+
+       /*
+        * Now we need to update the newly created data structures with the
+        * allocated blocks that they were built from.
+        */
+       for (i = superblock; !r && i < smm->begin; i++)
+               r = sm_ll_inc(&smm->ll, i, &ev);
+
+       if (r)
+               return r;
+
+       return sm_metadata_commit(sm);
+}
+
+int dm_sm_metadata_open(struct dm_space_map *sm,
+                       struct dm_transaction_manager *tm,
+                       void *root_le, size_t len)
+{
+       int r;
+       struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+       r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
+       if (r)
+               return r;
+
+       smm->begin = 0;
+       smm->recursion_count = 0;
+       smm->allocated_this_transaction = 0;
+       smm->nr_uncommitted = 0;
+
+       memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
+       return 0;
+}
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h
new file mode 100644 (file)
index 0000000..39bba08
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_SPACE_MAP_METADATA_H
+#define DM_SPACE_MAP_METADATA_H
+
+#include "dm-transaction-manager.h"
+
+/*
+ * Unfortunately we have to use two-phase construction due to the cycle
+ * between the tm and sm.
+ */
+struct dm_space_map *dm_sm_metadata_init(void);
+
+/*
+ * Create a fresh space map.
+ */
+int dm_sm_metadata_create(struct dm_space_map *sm,
+                         struct dm_transaction_manager *tm,
+                         dm_block_t nr_blocks,
+                         dm_block_t superblock);
+
+/*
+ * Open from a previously-recorded root.
+ */
+int dm_sm_metadata_open(struct dm_space_map *sm,
+                       struct dm_transaction_manager *tm,
+                       void *root_le, size_t len);
+
+#endif /* DM_SPACE_MAP_METADATA_H */
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
new file mode 100644 (file)
index 0000000..1cbfc6b
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _LINUX_DM_SPACE_MAP_H
+#define _LINUX_DM_SPACE_MAP_H
+
+#include "dm-block-manager.h"
+
+/*
+ * struct dm_space_map keeps a record of how many times each block in a device
+ * is referenced.  It needs to be fixed on disk as part of the transaction.
+ */
+struct dm_space_map {
+       void (*destroy)(struct dm_space_map *sm);
+
+       /*
+        * You must commit before allocating the newly added space.
+        */
+       int (*extend)(struct dm_space_map *sm, dm_block_t extra_blocks);
+
+       /*
+        * Extensions do not appear in this count until after commit has
+        * been called.
+        */
+       int (*get_nr_blocks)(struct dm_space_map *sm, dm_block_t *count);
+
+       /*
+        * Space maps must never allocate a block from the previous
+        * transaction, in case we need to rollback.  This complicates the
+        * semantics of get_nr_free(), it should return the number of blocks
+        * that are available for allocation _now_.  For instance you may
+        * have blocks with a zero reference count that will not be
+        * available for allocation until after the next commit.
+        */
+       int (*get_nr_free)(struct dm_space_map *sm, dm_block_t *count);
+
+       int (*get_count)(struct dm_space_map *sm, dm_block_t b, uint32_t *result);
+       int (*count_is_more_than_one)(struct dm_space_map *sm, dm_block_t b,
+                                     int *result);
+       int (*set_count)(struct dm_space_map *sm, dm_block_t b, uint32_t count);
+
+       int (*commit)(struct dm_space_map *sm);
+
+       int (*inc_block)(struct dm_space_map *sm, dm_block_t b);
+       int (*dec_block)(struct dm_space_map *sm, dm_block_t b);
+
+       /*
+        * new_block will increment the returned block.
+        */
+       int (*new_block)(struct dm_space_map *sm, dm_block_t *b);
+
+       /*
+        * The root contains all the information needed to fix the space map.
+        * Generally this info is small, so squirrel it away in a disk block
+        * along with other info.
+        */
+       int (*root_size)(struct dm_space_map *sm, size_t *result);
+       int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len);
+};
+
+/*----------------------------------------------------------------*/
+
+static inline void dm_sm_destroy(struct dm_space_map *sm)
+{
+       sm->destroy(sm);
+}
+
+static inline int dm_sm_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+       return sm->extend(sm, extra_blocks);
+}
+
+static inline int dm_sm_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+       return sm->get_nr_blocks(sm, count);
+}
+
+static inline int dm_sm_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+       return sm->get_nr_free(sm, count);
+}
+
+static inline int dm_sm_get_count(struct dm_space_map *sm, dm_block_t b,
+                                 uint32_t *result)
+{
+       return sm->get_count(sm, b, result);
+}
+
+static inline int dm_sm_count_is_more_than_one(struct dm_space_map *sm,
+                                              dm_block_t b, int *result)
+{
+       return sm->count_is_more_than_one(sm, b, result);
+}
+
+static inline int dm_sm_set_count(struct dm_space_map *sm, dm_block_t b,
+                                 uint32_t count)
+{
+       return sm->set_count(sm, b, count);
+}
+
+static inline int dm_sm_commit(struct dm_space_map *sm)
+{
+       return sm->commit(sm);
+}
+
+static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+       return sm->inc_block(sm, b);
+}
+
+static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+       return sm->dec_block(sm, b);
+}
+
+static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+       return sm->new_block(sm, b);
+}
+
+static inline int dm_sm_root_size(struct dm_space_map *sm, size_t *result)
+{
+       return sm->root_size(sm, result);
+}
+
+static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len)
+{
+       return sm->copy_root(sm, copy_to_here_le, len);
+}
+
+#endif /* _LINUX_DM_SPACE_MAP_H */
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
new file mode 100644 (file)
index 0000000..728e89a
--- /dev/null
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#include "dm-transaction-manager.h"
+#include "dm-space-map.h"
+#include "dm-space-map-checker.h"
+#include "dm-space-map-disk.h"
+#include "dm-space-map-metadata.h"
+#include "dm-persistent-data-internal.h"
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "transaction manager"
+
+/*----------------------------------------------------------------*/
+
+struct shadow_info {
+       struct hlist_node hlist;
+       dm_block_t where;
+};
+
+/*
+ * It would be nice if we scaled with the size of transaction.
+ */
+#define HASH_SIZE 256
+#define HASH_MASK (HASH_SIZE - 1)
+
+struct dm_transaction_manager {
+       int is_clone;
+       struct dm_transaction_manager *real;
+
+       struct dm_block_manager *bm;
+       struct dm_space_map *sm;
+
+       spinlock_t lock;
+       struct hlist_head buckets[HASH_SIZE];
+};
+
+/*----------------------------------------------------------------*/
+
+static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
+{
+       int r = 0;
+       unsigned bucket = dm_hash_block(b, HASH_MASK);
+       struct shadow_info *si;
+       struct hlist_node *n;
+
+       spin_lock(&tm->lock);
+       hlist_for_each_entry(si, n, tm->buckets + bucket, hlist)
+               if (si->where == b) {
+                       r = 1;
+                       break;
+               }
+       spin_unlock(&tm->lock);
+
+       return r;
+}
+
+/*
+ * This can silently fail if there's no memory.  We're ok with this since
+ * creating redundant shadows causes no harm.
+ */
+static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
+{
+       unsigned bucket;
+       struct shadow_info *si;
+
+       si = kmalloc(sizeof(*si), GFP_NOIO);
+       if (si) {
+               si->where = b;
+               bucket = dm_hash_block(b, HASH_MASK);
+               spin_lock(&tm->lock);
+               hlist_add_head(&si->hlist, tm->buckets + bucket);
+               spin_unlock(&tm->lock);
+       }
+}
+
+static void wipe_shadow_table(struct dm_transaction_manager *tm)
+{
+       struct shadow_info *si;
+       struct hlist_node *n, *tmp;
+       struct hlist_head *bucket;
+       int i;
+
+       spin_lock(&tm->lock);
+       for (i = 0; i < HASH_SIZE; i++) {
+               bucket = tm->buckets + i;
+               hlist_for_each_entry_safe(si, n, tmp, bucket, hlist)
+                       kfree(si);
+
+               INIT_HLIST_HEAD(bucket);
+       }
+
+       spin_unlock(&tm->lock);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
+                                                  struct dm_space_map *sm)
+{
+       int i;
+       struct dm_transaction_manager *tm;
+
+       tm = kmalloc(sizeof(*tm), GFP_KERNEL);
+       if (!tm)
+               return ERR_PTR(-ENOMEM);
+
+       tm->is_clone = 0;
+       tm->real = NULL;
+       tm->bm = bm;
+       tm->sm = sm;
+
+       spin_lock_init(&tm->lock);
+       for (i = 0; i < HASH_SIZE; i++)
+               INIT_HLIST_HEAD(tm->buckets + i);
+
+       return tm;
+}
+
+struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real)
+{
+       struct dm_transaction_manager *tm;
+
+       tm = kmalloc(sizeof(*tm), GFP_KERNEL);
+       if (tm) {
+               tm->is_clone = 1;
+               tm->real = real;
+       }
+
+       return tm;
+}
+EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
+
+void dm_tm_destroy(struct dm_transaction_manager *tm)
+{
+       kfree(tm);
+}
+EXPORT_SYMBOL_GPL(dm_tm_destroy);
+
+int dm_tm_pre_commit(struct dm_transaction_manager *tm)
+{
+       int r;
+
+       if (tm->is_clone)
+               return -EWOULDBLOCK;
+
+       r = dm_sm_commit(tm->sm);
+       if (r < 0)
+               return r;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
+
+int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
+{
+       if (tm->is_clone)
+               return -EWOULDBLOCK;
+
+       wipe_shadow_table(tm);
+
+       return dm_bm_flush_and_unlock(tm->bm, root);
+}
+EXPORT_SYMBOL_GPL(dm_tm_commit);
+
+int dm_tm_new_block(struct dm_transaction_manager *tm,
+                   struct dm_block_validator *v,
+                   struct dm_block **result)
+{
+       int r;
+       dm_block_t new_block;
+
+       if (tm->is_clone)
+               return -EWOULDBLOCK;
+
+       r = dm_sm_new_block(tm->sm, &new_block);
+       if (r < 0)
+               return r;
+
+       r = dm_bm_write_lock_zero(tm->bm, new_block, v, result);
+       if (r < 0) {
+               dm_sm_dec_block(tm->sm, new_block);
+               return r;
+       }
+
+       /*
+        * New blocks count as shadows in that they don't need to be
+        * shadowed again.
+        */
+       insert_shadow(tm, new_block);
+
+       return 0;
+}
+
+static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
+                         struct dm_block_validator *v,
+                         struct dm_block **result)
+{
+       int r;
+       dm_block_t new;
+       struct dm_block *orig_block;
+
+       r = dm_sm_new_block(tm->sm, &new);
+       if (r < 0)
+               return r;
+
+       r = dm_sm_dec_block(tm->sm, orig);
+       if (r < 0)
+               return r;
+
+       r = dm_bm_read_lock(tm->bm, orig, v, &orig_block);
+       if (r < 0)
+               return r;
+
+       r = dm_bm_unlock_move(orig_block, new);
+       if (r < 0) {
+               dm_bm_unlock(orig_block);
+               return r;
+       }
+
+       return dm_bm_write_lock(tm->bm, new, v, result);
+}
+
+int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
+                      struct dm_block_validator *v, struct dm_block **result,
+                      int *inc_children)
+{
+       int r;
+
+       if (tm->is_clone)
+               return -EWOULDBLOCK;
+
+       r = dm_sm_count_is_more_than_one(tm->sm, orig, inc_children);
+       if (r < 0)
+               return r;
+
+       if (is_shadow(tm, orig) && !*inc_children)
+               return dm_bm_write_lock(tm->bm, orig, v, result);
+
+       r = __shadow_block(tm, orig, v, result);
+       if (r < 0)
+               return r;
+       insert_shadow(tm, dm_block_location(*result));
+
+       return r;
+}
+
+int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
+                   struct dm_block_validator *v,
+                   struct dm_block **blk)
+{
+       if (tm->is_clone)
+               return dm_bm_read_try_lock(tm->real->bm, b, v, blk);
+
+       return dm_bm_read_lock(tm->bm, b, v, blk);
+}
+
+int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
+{
+       return dm_bm_unlock(b);
+}
+EXPORT_SYMBOL_GPL(dm_tm_unlock);
+
+void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
+{
+       /*
+        * The non-blocking clone doesn't support this.
+        */
+       BUG_ON(tm->is_clone);
+
+       dm_sm_inc_block(tm->sm, b);
+}
+EXPORT_SYMBOL_GPL(dm_tm_inc);
+
+void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
+{
+       /*
+        * The non-blocking clone doesn't support this.
+        */
+       BUG_ON(tm->is_clone);
+
+       dm_sm_dec_block(tm->sm, b);
+}
+EXPORT_SYMBOL_GPL(dm_tm_dec);
+
+int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
+             uint32_t *result)
+{
+       if (tm->is_clone)
+               return -EWOULDBLOCK;
+
+       return dm_sm_get_count(tm->sm, b, result);
+}
+
+struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
+{
+       return tm->bm;
+}
+
+/*----------------------------------------------------------------*/
+
+static int dm_tm_create_internal(struct dm_block_manager *bm,
+                                dm_block_t sb_location,
+                                struct dm_block_validator *sb_validator,
+                                size_t root_offset, size_t root_max_len,
+                                struct dm_transaction_manager **tm,
+                                struct dm_space_map **sm,
+                                struct dm_block **sblock,
+                                int create)
+{
+       int r;
+       struct dm_space_map *inner;
+
+       inner = dm_sm_metadata_init();
+       if (IS_ERR(inner))
+               return PTR_ERR(inner);
+
+       *tm = dm_tm_create(bm, inner);
+       if (IS_ERR(*tm)) {
+               dm_sm_destroy(inner);
+               return PTR_ERR(*tm);
+       }
+
+       if (create) {
+               r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location,
+                                         sb_validator, sblock);
+               if (r < 0) {
+                       DMERR("couldn't lock superblock");
+                       goto bad1;
+               }
+
+               r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm),
+                                         sb_location);
+               if (r) {
+                       DMERR("couldn't create metadata space map");
+                       goto bad2;
+               }
+
+               *sm = dm_sm_checker_create(inner);
+               if (!*sm)
+                       goto bad2;
+
+       } else {
+               r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
+                                    sb_validator, sblock);
+               if (r < 0) {
+                       DMERR("couldn't lock superblock");
+                       goto bad1;
+               }
+
+               r = dm_sm_metadata_open(inner, *tm,
+                                       dm_block_data(*sblock) + root_offset,
+                                       root_max_len);
+               if (r) {
+                       DMERR("couldn't open metadata space map");
+                       goto bad2;
+               }
+
+               *sm = dm_sm_checker_create(inner);
+               if (!*sm)
+                       goto bad2;
+       }
+
+       return 0;
+
+bad2:
+       dm_tm_unlock(*tm, *sblock);
+bad1:
+       dm_tm_destroy(*tm);
+       dm_sm_destroy(inner);
+       return r;
+}
+
+int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
+                        struct dm_block_validator *sb_validator,
+                        struct dm_transaction_manager **tm,
+                        struct dm_space_map **sm, struct dm_block **sblock)
+{
+       return dm_tm_create_internal(bm, sb_location, sb_validator,
+                                    0, 0, tm, sm, sblock, 1);
+}
+EXPORT_SYMBOL_GPL(dm_tm_create_with_sm);
+
+int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
+                      struct dm_block_validator *sb_validator,
+                      size_t root_offset, size_t root_max_len,
+                      struct dm_transaction_manager **tm,
+                      struct dm_space_map **sm, struct dm_block **sblock)
+{
+       return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset,
+                                    root_max_len, tm, sm, sblock, 0);
+}
+EXPORT_SYMBOL_GPL(dm_tm_open_with_sm);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
new file mode 100644 (file)
index 0000000..6da7848
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _LINUX_DM_TRANSACTION_MANAGER_H
+#define _LINUX_DM_TRANSACTION_MANAGER_H
+
+#include "dm-block-manager.h"
+
+struct dm_transaction_manager;
+struct dm_space_map;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * This manages the scope of a transaction.  It also enforces immutability
+ * of the on-disk data structures by limiting access to writeable blocks.
+ *
+ * Clients should not fiddle with the block manager directly.
+ */
+
+void dm_tm_destroy(struct dm_transaction_manager *tm);
+
+/*
+ * The non-blocking version of a transaction manager is intended for use in
+ * fast path code that needs to do lookups e.g. a dm mapping function.
+ * You create the non-blocking variant from a normal tm.  The interface is
+ * the same, except that most functions will just return -EWOULDBLOCK.
+ * Methods that return void yet may block should not be called on a clone
+ * viz. dm_tm_inc, dm_tm_dec.  Call dm_tm_destroy() as you would with a normal
+ * tm when you've finished with it.  You may not destroy the original prior
+ * to clones.
+ */
+struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real);
+
+/*
+ * We use a 2-phase commit here.
+ *
+ * i) In the first phase the block manager is told to start flushing, and
+ * the changes to the space map are written to disk.  You should interrogate
+ * your particular space map to get detail of its root node etc. to be
+ * included in your superblock.
+ *
+ * ii) @root will be committed last.  You shouldn't use more than the
+ * first 512 bytes of @root if you wish the transaction to survive a power
+ * failure.  You *must* have a write lock held on @root for both stage (i)
+ * and (ii).  The commit will drop the write lock.
+ */
+int dm_tm_pre_commit(struct dm_transaction_manager *tm);
+int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root);
+
+/*
+ * These methods are the only way to get hold of a writeable block.
+ */
+
+/*
+ * dm_tm_new_block() is pretty self-explanatory.  Make sure you do actually
+ * write to the whole of @data before you unlock, otherwise you could get
+ * a data leak.  (The other option is for tm_new_block() to zero new blocks
+ * before handing them out, which will be redundant in most, if not all,
+ * cases).
+ * Zeroes the new block and returns with write lock held.
+ */
+int dm_tm_new_block(struct dm_transaction_manager *tm,
+                   struct dm_block_validator *v,
+                   struct dm_block **result);
+
+/*
+ * dm_tm_shadow_block() allocates a new block and copies the data from @orig
+ * to it.  It then decrements the reference count on original block.  Use
+ * this to update the contents of a block in a data structure, don't
+ * confuse this with a clone - you shouldn't access the orig block after
+ * this operation.  Because the tm knows the scope of the transaction it
+ * can optimise requests for a shadow of a shadow to a no-op.  Don't forget
+ * to unlock when you've finished with the shadow.
+ *
+ * The @inc_children flag is used to tell the caller whether it needs to
+ * adjust reference counts for children.  (Data in the block may refer to
+ * other blocks.)
+ *
+ * Shadowing implicitly drops a reference on @orig so you must not have
+ * it locked when you call this.
+ */
+int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
+                      struct dm_block_validator *v,
+                      struct dm_block **result, int *inc_children);
+
+/*
+ * Read access.  You can lock any block you want.  If there's a write lock
+ * on it outstanding then it'll block.
+ */
+int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
+                   struct dm_block_validator *v,
+                   struct dm_block **result);
+
+int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
+
+/*
+ * Functions for altering the reference count of a block directly.
+ */
+void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
+
+void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
+
+int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
+             uint32_t *result);
+
+struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm);
+
+/*
+ * A little utility that ties the knot by producing a transaction manager
+ * that has a space map managed by the transaction manager...
+ *
+ * Returns a tm that has an open transaction to write the new disk sm.
+ * Caller should store the new sm root and commit.
+ */
+int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
+                        struct dm_block_validator *sb_validator,
+                        struct dm_transaction_manager **tm,
+                        struct dm_space_map **sm, struct dm_block **sblock);
+
+int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
+                      struct dm_block_validator *sb_validator,
+                      size_t root_offset, size_t root_max_len,
+                      struct dm_transaction_manager **tm,
+                      struct dm_space_map **sm, struct dm_block **sblock);
+
+#endif /* _LINUX_DM_TRANSACTION_MANAGER_H */
index 52798a111e16cb5a55df761aa228e60ec3f9343f..ccd5f0d8a012113a08b788b9927860079289fa0c 100644 (file)
@@ -426,7 +426,6 @@ config RADIO_TIMBERDALE
 config RADIO_WL1273
        tristate "Texas Instruments WL1273 I2C FM Radio"
        depends on I2C && VIDEO_V4L2
-       select MFD_CORE
        select MFD_WL1273_CORE
        select FW_LOADER
        ---help---
index a67adcbd0fa19916d19f60f784a5e34699648fe3..f1391c21ef267d349ce28ec2217eb603c869bb9e 100644 (file)
@@ -2,23 +2,8 @@
 # Multifunction miscellaneous devices
 #
 
-menuconfig MFD_SUPPORT
-       bool "Multifunction device drivers"
-       depends on HAS_IOMEM
-       default y
-       help
-         Multifunction devices embed several functions (e.g. GPIOs,
-         touchscreens, keyboards, current regulators, power management chips,
-         etc...) in one single integrated circuit. They usually talk to the
-         main CPU through one or more IRQ lines and low speed data busses (SPI,
-         I2C, etc..). They appear as one single device to the main system
-         through the data bus and the MFD framework allows for sub devices
-         (a.k.a. functions) to appear as discrete platform devices.
-         MFDs are typically found on embedded platforms.
-
-         This option alone does not add any kernel code.
-
-if MFD_SUPPORT
+if HAS_IOMEM
+menu "Multifunction device drivers"
 
 config MFD_CORE
        tristate
@@ -390,6 +375,7 @@ config MFD_WM8400
        tristate "Support Wolfson Microelectronics WM8400"
        select MFD_CORE
        depends on I2C
+       select REGMAP_I2C
        help
          Support for the Wolfson Microelecronics WM8400 PMIC and audio
          CODEC.  This driver provides common support for accessing
@@ -503,6 +489,7 @@ config MFD_WM8994
 config MFD_PCF50633
        tristate "Support for NXP PCF50633"
        depends on I2C
+       select REGMAP_I2C
        help
          Say yes here if you have NXP PCF50633 chip on your board.
          This core driver provides register access and IRQ handling
@@ -579,6 +566,23 @@ config EZX_PCAP
          This enables the PCAP ASIC present on EZX Phones. This is
          needed for MMC, TouchScreen, Sound, USB, etc..
 
+config AB5500_CORE
+       bool "ST-Ericsson AB5500 Mixed Signal Power Management chip"
+       depends on ABX500_CORE && MFD_DB5500_PRCMU
+       select MFD_CORE
+       help
+         Select this option to enable access to AB5500 power management
+         chip. This connects to the db5500 chip via the I2C bus via PRCMU.
+         This chip embeds various other multimedia funtionalities as well.
+
+config AB5500_DEBUG
+       bool "Enable debug info via debugfs"
+       depends on AB5500_CORE && DEBUG_FS
+       default y if DEBUG_FS
+       help
+         Select this option if you want debug information from the AB5500
+         using the debug filesystem, debugfs.
+
 config AB8500_CORE
        bool "ST-Ericsson AB8500 Mixed Signal Power Management chip"
        depends on GENERIC_HARDIRQS && ABX500_CORE
@@ -615,20 +619,6 @@ config AB8500_GPADC
        help
          AB8500 GPADC driver used to convert Acc and battery/ac/usb voltage
 
-config AB3550_CORE
-        bool "ST-Ericsson AB3550 Mixed Signal Circuit core functions"
-       select MFD_CORE
-       depends on I2C=y && GENERIC_HARDIRQS && ABX500_CORE
-       help
-         Select this to enable the AB3550 Mixed Signal IC core
-         functionality. This connects to a AB3550 on the I2C bus
-         and expose a number of symbols needed for dependent devices
-         to read and write registers and subscribe to events from
-         this multi-functional IC. This is needed to use other features
-         of the AB3550 such as battery-backed RTC, charging control,
-         LEDs, vibrator, system power and temperature, power management
-         and ALSA sound.
-
 config MFD_DB8500_PRCMU
        bool "ST-Ericsson DB8500 Power Reset Control Management Unit"
        depends on UX500_SOC_DB8500
@@ -773,7 +763,17 @@ config MFD_AAT2870_CORE
          additional drivers must be enabled in order to use the
          functionality of the device.
 
-endif # MFD_SUPPORT
+config MFD_INTEL_MSIC
+       bool "Support for Intel MSIC"
+       depends on INTEL_SCU_IPC
+       select MFD_CORE
+       help
+         Select this option to enable access to Intel MSIC (Avatele
+         Passage) chip. This chip embeds audio, battery, GPIO, etc.
+         devices used in Intel Medfield platforms.
+
+endmenu
+endif
 
 menu "Multimedia Capabilities Port drivers"
        depends on ARCH_SA1100
index c58020303d184922dce43b36845413e3d798ff8d..b2292eb752429b46503103c114c902c05f24f3d1 100644 (file)
@@ -79,7 +79,8 @@ obj-$(CONFIG_PCF50633_GPIO)   += pcf50633-gpio.o
 obj-$(CONFIG_ABX500_CORE)      += abx500-core.o
 obj-$(CONFIG_AB3100_CORE)      += ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)       += ab3100-otp.o
-obj-$(CONFIG_AB3550_CORE)      += ab3550-core.o
+obj-$(CONFIG_AB5500_CORE)      += ab5500-core.o
+obj-$(CONFIG_AB5500_DEBUG)     += ab5500-debugfs.o
 obj-$(CONFIG_AB8500_CORE)      += ab8500-core.o ab8500-sysctrl.o
 obj-$(CONFIG_AB8500_DEBUG)     += ab8500-debugfs.o
 obj-$(CONFIG_AB8500_GPADC)     += ab8500-gpadc.o
@@ -102,3 +103,4 @@ obj-$(CONFIG_MFD_PM8921_CORE)       += pm8921-core.o
 obj-$(CONFIG_MFD_PM8XXX_IRQ)   += pm8xxx-irq.o
 obj-$(CONFIG_TPS65911_COMPARATOR)      += tps65911-comparator.o
 obj-$(CONFIG_MFD_AAT2870_CORE) += aat2870-core.o
+obj-$(CONFIG_MFD_INTEL_MSIC)   += intel_msic.o
index 345dc658ef066399f2ace78c9d299255c837888e..02c42015ba5108344c46cd29986d30e64e5cd304 100644 (file)
@@ -295,7 +295,7 @@ static ssize_t aat2870_reg_write_file(struct file *file,
 {
        struct aat2870_data *aat2870 = file->private_data;
        char buf[32];
-       int buf_size;
+       ssize_t buf_size;
        char *start = buf;
        unsigned long addr, val;
        int ret;
index a20e1c41bed2f67c7fcdf65958ef83e62dde98e2..4f5725508ac0eb47ed09396998343d5c41df7ad9 100644 (file)
@@ -809,7 +809,7 @@ struct ab_family_id {
        char    *name;
 };
 
-static const struct ab_family_id ids[] __devinitdata = {
+static const struct ab_family_id ids[] __devinitconst = {
        /* AB3100 */
        {
                .id = 0xc0,
diff --git a/drivers/mfd/ab3550-core.c b/drivers/mfd/ab3550-core.c
deleted file mode 100644 (file)
index 56ba194..0000000
+++ /dev/null
@@ -1,1380 +0,0 @@
-/*
- * Copyright (C) 2007-2010 ST-Ericsson
- * License terms: GNU General Public License (GPL) version 2
- * Low-level core for exclusive access to the AB3550 IC on the I2C bus
- * and some basic chip-configuration.
- * Author: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
- * Author: Mattias Wallin <mattias.wallin@stericsson.com>
- * Author: Rickard Andersson <rickard.andersson@stericsson.com>
- */
-
-#include <linux/i2c.h>
-#include <linux/mutex.h>
-#include <linux/err.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/random.h>
-#include <linux/workqueue.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/uaccess.h>
-#include <linux/mfd/abx500.h>
-#include <linux/list.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
-#include <linux/mfd/core.h>
-
-#define AB3550_NAME_STRING "ab3550"
-#define AB3550_ID_FORMAT_STRING "AB3550 %s"
-#define AB3550_NUM_BANKS 2
-#define AB3550_NUM_EVENT_REG 5
-
-/* These are the only registers inside AB3550 used in this main file */
-
-/* Chip ID register */
-#define AB3550_CID_REG           0x20
-
-/* Interrupt event registers */
-#define AB3550_EVENT_BANK        0
-#define AB3550_EVENT_REG         0x22
-
-/* Read/write operation values. */
-#define AB3550_PERM_RD (0x01)
-#define AB3550_PERM_WR (0x02)
-
-/* Read/write permissions. */
-#define AB3550_PERM_RO (AB3550_PERM_RD)
-#define AB3550_PERM_RW (AB3550_PERM_RD | AB3550_PERM_WR)
-
-/**
- * struct ab3550
- * @access_mutex: lock out concurrent accesses to the AB registers
- * @i2c_client: I2C client for this chip
- * @chip_name: name of this chip variant
- * @chip_id: 8 bit chip ID for this chip variant
- * @mask_work: a worker for writing to mask registers
- * @event_lock: a lock to protect the event_mask
- * @event_mask: a local copy of the mask event registers
- * @startup_events: a copy of the first reading of the event registers
- * @startup_events_read: whether the first events have been read
- */
-struct ab3550 {
-       struct mutex access_mutex;
-       struct i2c_client *i2c_client[AB3550_NUM_BANKS];
-       char chip_name[32];
-       u8 chip_id;
-       struct work_struct mask_work;
-       spinlock_t event_lock;
-       u8 event_mask[AB3550_NUM_EVENT_REG];
-       u8 startup_events[AB3550_NUM_EVENT_REG];
-       bool startup_events_read;
-#ifdef CONFIG_DEBUG_FS
-       unsigned int debug_bank;
-       unsigned int debug_address;
-#endif
-};
-
-/**
- * struct ab3550_reg_range
- * @first: the first address of the range
- * @last: the last address of the range
- * @perm: access permissions for the range
- */
-struct ab3550_reg_range {
-       u8 first;
-       u8 last;
-       u8 perm;
-};
-
-/**
- * struct ab3550_reg_ranges
- * @count: the number of ranges in the list
- * @range: the list of register ranges
- */
-struct ab3550_reg_ranges {
-       u8 count;
-       const struct ab3550_reg_range *range;
-};
-
-/*
- * Permissible register ranges for reading and writing per device and bank.
- *
- * The ranges must be listed in increasing address order, and no overlaps are
- * allowed. It is assumed that write permission implies read permission
- * (i.e. only RO and RW permissions should be used).  Ranges with write
- * permission must not be split up.
- */
-
-#define NO_RANGE {.count = 0, .range = NULL,}
-
-static struct
-ab3550_reg_ranges ab3550_reg_ranges[AB3550_NUM_DEVICES][AB3550_NUM_BANKS] = {
-       [AB3550_DEVID_DAC] = {
-               NO_RANGE,
-               {
-                       .count = 2,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0xb0,
-                                       .last = 0xba,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                               {
-                                       .first = 0xbc,
-                                       .last = 0xc3,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       },
-               },
-       },
-       [AB3550_DEVID_LEDS] = {
-               NO_RANGE,
-               {
-                       .count = 2,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x5a,
-                                       .last = 0x88,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                               {
-                                       .first = 0x8a,
-                                       .last = 0xad,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-               },
-       },
-       [AB3550_DEVID_POWER] = {
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x21,
-                                       .last = 0x21,
-                                       .perm = AB3550_PERM_RO,
-                               },
-                       }
-               },
-               NO_RANGE,
-       },
-       [AB3550_DEVID_REGULATORS] = {
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x69,
-                                       .last = 0xa3,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-               },
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x14,
-                                       .last = 0x16,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-               },
-       },
-       [AB3550_DEVID_SIM] = {
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x21,
-                                       .last = 0x21,
-                                       .perm = AB3550_PERM_RO,
-                               },
-                       }
-               },
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x14,
-                                       .last = 0x17,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-
-               },
-       },
-       [AB3550_DEVID_UART] = {
-               NO_RANGE,
-               NO_RANGE,
-       },
-       [AB3550_DEVID_RTC] = {
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x00,
-                                       .last = 0x0c,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-               },
-               NO_RANGE,
-       },
-       [AB3550_DEVID_CHARGER] = {
-               {
-                       .count = 2,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x10,
-                                       .last = 0x1a,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                               {
-                                       .first = 0x21,
-                                       .last = 0x21,
-                                       .perm = AB3550_PERM_RO,
-                               },
-                       }
-               },
-               NO_RANGE,
-       },
-       [AB3550_DEVID_ADC] = {
-               NO_RANGE,
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x20,
-                                       .last = 0x56,
-                                       .perm = AB3550_PERM_RW,
-                               },
-
-                       }
-               },
-       },
-       [AB3550_DEVID_FUELGAUGE] = {
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x21,
-                                       .last = 0x21,
-                                       .perm = AB3550_PERM_RO,
-                               },
-                       }
-               },
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x00,
-                                       .last = 0x0e,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-               },
-       },
-       [AB3550_DEVID_VIBRATOR] = {
-               NO_RANGE,
-               {
-                       .count = 1,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x10,
-                                       .last = 0x13,
-                                       .perm = AB3550_PERM_RW,
-                               },
-
-                       }
-               },
-       },
-       [AB3550_DEVID_CODEC] = {
-               {
-                       .count = 2,
-                       .range = (struct ab3550_reg_range[]) {
-                               {
-                                       .first = 0x31,
-                                       .last = 0x63,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                               {
-                                       .first = 0x65,
-                                       .last = 0x68,
-                                       .perm = AB3550_PERM_RW,
-                               },
-                       }
-               },
-               NO_RANGE,
-       },
-};
-
-static struct mfd_cell ab3550_devs[AB3550_NUM_DEVICES] = {
-       [AB3550_DEVID_DAC] = {
-               .name = "ab3550-dac",
-               .id = AB3550_DEVID_DAC,
-               .num_resources = 0,
-       },
-       [AB3550_DEVID_LEDS] = {
-               .name = "ab3550-leds",
-               .id = AB3550_DEVID_LEDS,
-       },
-       [AB3550_DEVID_POWER] = {
-               .name = "ab3550-power",
-               .id = AB3550_DEVID_POWER,
-       },
-       [AB3550_DEVID_REGULATORS] = {
-               .name = "ab3550-regulators",
-               .id = AB3550_DEVID_REGULATORS,
-       },
-       [AB3550_DEVID_SIM] = {
-               .name = "ab3550-sim",
-               .id = AB3550_DEVID_SIM,
-       },
-       [AB3550_DEVID_UART] = {
-               .name = "ab3550-uart",
-               .id = AB3550_DEVID_UART,
-       },
-       [AB3550_DEVID_RTC] = {
-               .name = "ab3550-rtc",
-               .id = AB3550_DEVID_RTC,
-       },
-       [AB3550_DEVID_CHARGER] = {
-               .name = "ab3550-charger",
-               .id = AB3550_DEVID_CHARGER,
-       },
-       [AB3550_DEVID_ADC] = {
-               .name = "ab3550-adc",
-               .id = AB3550_DEVID_ADC,
-               .num_resources = 10,
-               .resources = (struct resource[]) {
-                       {
-                               .name = "TRIGGER-0",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 16,
-                               .end = 16,
-                       },
-                       {
-                               .name = "TRIGGER-1",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 17,
-                               .end = 17,
-                       },
-                       {
-                               .name = "TRIGGER-2",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 18,
-                               .end = 18,
-                       },
-                       {
-                               .name = "TRIGGER-3",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 19,
-                               .end = 19,
-                       },
-                       {
-                               .name = "TRIGGER-4",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 20,
-                               .end = 20,
-                       },
-                       {
-                               .name = "TRIGGER-5",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 21,
-                               .end = 21,
-                       },
-                       {
-                               .name = "TRIGGER-6",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 22,
-                               .end = 22,
-                       },
-                       {
-                               .name = "TRIGGER-7",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 23,
-                               .end = 23,
-                       },
-                       {
-                               .name = "TRIGGER-VBAT-TXON",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 13,
-                               .end = 13,
-                       },
-                       {
-                               .name = "TRIGGER-VBAT",
-                               .flags = IORESOURCE_IRQ,
-                               .start = 12,
-                               .end = 12,
-                       },
-               },
-       },
-       [AB3550_DEVID_FUELGAUGE] = {
-               .name = "ab3550-fuelgauge",
-               .id = AB3550_DEVID_FUELGAUGE,
-       },
-       [AB3550_DEVID_VIBRATOR] = {
-               .name = "ab3550-vibrator",
-               .id = AB3550_DEVID_VIBRATOR,
-       },
-       [AB3550_DEVID_CODEC] = {
-               .name = "ab3550-codec",
-               .id = AB3550_DEVID_CODEC,
-       },
-};
-
-/*
- * I2C transactions with error messages.
- */
-static int ab3550_i2c_master_send(struct ab3550 *ab, u8 bank, u8 *data,
-       u8 count)
-{
-       int err;
-
-       err = i2c_master_send(ab->i2c_client[bank], data, count);
-       if (err < 0) {
-               dev_err(&ab->i2c_client[0]->dev, "send error: %d\n", err);
-               return err;
-       }
-       return 0;
-}
-
-static int ab3550_i2c_master_recv(struct ab3550 *ab, u8 bank, u8 *data,
-       u8 count)
-{
-       int err;
-
-       err = i2c_master_recv(ab->i2c_client[bank], data, count);
-       if (err < 0) {
-               dev_err(&ab->i2c_client[0]->dev, "receive error: %d\n", err);
-               return err;
-       }
-       return 0;
-}
-
-/*
- * Functionality for getting/setting register values.
- */
-static int get_register_interruptible(struct ab3550 *ab, u8 bank, u8 reg,
-       u8 *value)
-{
-       int err;
-
-       err = mutex_lock_interruptible(&ab->access_mutex);
-       if (err)
-               return err;
-
-       err = ab3550_i2c_master_send(ab, bank, &reg, 1);
-       if (!err)
-               err = ab3550_i2c_master_recv(ab, bank, value, 1);
-
-       mutex_unlock(&ab->access_mutex);
-       return err;
-}
-
-static int get_register_page_interruptible(struct ab3550 *ab, u8 bank,
-       u8 first_reg, u8 *regvals, u8 numregs)
-{
-       int err;
-
-       err = mutex_lock_interruptible(&ab->access_mutex);
-       if (err)
-               return err;
-
-       err = ab3550_i2c_master_send(ab, bank, &first_reg, 1);
-       if (!err)
-               err = ab3550_i2c_master_recv(ab, bank, regvals, numregs);
-
-       mutex_unlock(&ab->access_mutex);
-       return err;
-}
-
-static int mask_and_set_register_interruptible(struct ab3550 *ab, u8 bank,
-       u8 reg, u8 bitmask, u8 bitvalues)
-{
-       int err = 0;
-
-       if (likely(bitmask)) {
-               u8 reg_bits[2] = {reg, 0};
-
-               err = mutex_lock_interruptible(&ab->access_mutex);
-               if (err)
-                       return err;
-
-               if (bitmask == 0xFF) /* No need to read in this case. */
-                       reg_bits[1] = bitvalues;
-               else { /* Read and modify the register value. */
-                       u8 bits;
-
-                       err = ab3550_i2c_master_send(ab, bank, &reg, 1);
-                       if (err)
-                               goto unlock_and_return;
-                       err = ab3550_i2c_master_recv(ab, bank, &bits, 1);
-                       if (err)
-                               goto unlock_and_return;
-                       reg_bits[1] = ((~bitmask & bits) |
-                               (bitmask & bitvalues));
-               }
-               /* Write the new value. */
-               err = ab3550_i2c_master_send(ab, bank, reg_bits, 2);
-unlock_and_return:
-               mutex_unlock(&ab->access_mutex);
-       }
-       return err;
-}
-
-/*
- * Read/write permission checking functions.
- */
-static bool page_write_allowed(const struct ab3550_reg_ranges *ranges,
-       u8 first_reg, u8 last_reg)
-{
-       u8 i;
-
-       if (last_reg < first_reg)
-               return false;
-
-       for (i = 0; i < ranges->count; i++) {
-               if (first_reg < ranges->range[i].first)
-                       break;
-               if ((last_reg <= ranges->range[i].last) &&
-                       (ranges->range[i].perm & AB3550_PERM_WR))
-                       return true;
-       }
-       return false;
-}
-
-static bool reg_write_allowed(const struct ab3550_reg_ranges *ranges, u8 reg)
-{
-       return page_write_allowed(ranges, reg, reg);
-}
-
-static bool page_read_allowed(const struct ab3550_reg_ranges *ranges,
-       u8 first_reg, u8 last_reg)
-{
-       u8 i;
-
-       if (last_reg < first_reg)
-               return false;
-       /* Find the range (if it exists in the list) that includes first_reg. */
-       for (i = 0; i < ranges->count; i++) {
-               if (first_reg < ranges->range[i].first)
-                       return false;
-               if (first_reg <= ranges->range[i].last)
-                       break;
-       }
-       /* Make sure that the entire range up to and including last_reg is
-        * readable. This may span several of the ranges in the list.
-        */
-       while ((i < ranges->count) &&
-               (ranges->range[i].perm & AB3550_PERM_RD)) {
-               if (last_reg <= ranges->range[i].last)
-                       return true;
-               if ((++i >= ranges->count) ||
-                       (ranges->range[i].first !=
-                        (ranges->range[i - 1].last + 1))) {
-                       break;
-               }
-       }
-       return false;
-}
-
-static bool reg_read_allowed(const struct ab3550_reg_ranges *ranges, u8 reg)
-{
-       return page_read_allowed(ranges, reg, reg);
-}
-
-/*
- * The register access functionality.
- */
-static int ab3550_get_chip_id(struct device *dev)
-{
-       struct ab3550 *ab = dev_get_drvdata(dev->parent);
-       return (int)ab->chip_id;
-}
-
-static int ab3550_mask_and_set_register_interruptible(struct device *dev,
-       u8 bank, u8 reg, u8 bitmask, u8 bitvalues)
-{
-       struct ab3550 *ab;
-       struct platform_device *pdev = to_platform_device(dev);
-
-       if ((AB3550_NUM_BANKS <= bank) ||
-               !reg_write_allowed(&ab3550_reg_ranges[pdev->id][bank], reg))
-               return -EINVAL;
-
-       ab = dev_get_drvdata(dev->parent);
-       return mask_and_set_register_interruptible(ab, bank, reg,
-               bitmask, bitvalues);
-}
-
-static int ab3550_set_register_interruptible(struct device *dev, u8 bank,
-       u8 reg, u8 value)
-{
-       return ab3550_mask_and_set_register_interruptible(dev, bank, reg, 0xFF,
-               value);
-}
-
-static int ab3550_get_register_interruptible(struct device *dev, u8 bank,
-       u8 reg, u8 *value)
-{
-       struct ab3550 *ab;
-       struct platform_device *pdev = to_platform_device(dev);
-
-       if ((AB3550_NUM_BANKS <= bank) ||
-               !reg_read_allowed(&ab3550_reg_ranges[pdev->id][bank], reg))
-               return -EINVAL;
-
-       ab = dev_get_drvdata(dev->parent);
-       return get_register_interruptible(ab, bank, reg, value);
-}
-
-static int ab3550_get_register_page_interruptible(struct device *dev, u8 bank,
-       u8 first_reg, u8 *regvals, u8 numregs)
-{
-       struct ab3550 *ab;
-       struct platform_device *pdev = to_platform_device(dev);
-
-       if ((AB3550_NUM_BANKS <= bank) ||
-               !page_read_allowed(&ab3550_reg_ranges[pdev->id][bank],
-                       first_reg, (first_reg + numregs - 1)))
-               return -EINVAL;
-
-       ab = dev_get_drvdata(dev->parent);
-       return get_register_page_interruptible(ab, bank, first_reg, regvals,
-               numregs);
-}
-
-static int ab3550_event_registers_startup_state_get(struct device *dev,
-       u8 *event)
-{
-       struct ab3550 *ab;
-
-       ab = dev_get_drvdata(dev->parent);
-       if (!ab->startup_events_read)
-               return -EAGAIN; /* Try again later */
-
-       memcpy(event, ab->startup_events, AB3550_NUM_EVENT_REG);
-       return 0;
-}
-
-static int ab3550_startup_irq_enabled(struct device *dev, unsigned int irq)
-{
-       struct ab3550 *ab;
-       struct ab3550_platform_data *plf_data;
-       bool val;
-
-       ab = irq_get_chip_data(irq);
-       plf_data = ab->i2c_client[0]->dev.platform_data;
-       irq -= plf_data->irq.base;
-       val = ((ab->startup_events[irq / 8] & BIT(irq % 8)) != 0);
-
-       return val;
-}
-
-static struct abx500_ops ab3550_ops = {
-       .get_chip_id = ab3550_get_chip_id,
-       .get_register = ab3550_get_register_interruptible,
-       .set_register = ab3550_set_register_interruptible,
-       .get_register_page = ab3550_get_register_page_interruptible,
-       .set_register_page = NULL,
-       .mask_and_set_register = ab3550_mask_and_set_register_interruptible,
-       .event_registers_startup_state_get =
-               ab3550_event_registers_startup_state_get,
-       .startup_irq_enabled = ab3550_startup_irq_enabled,
-};
-
-static irqreturn_t ab3550_irq_handler(int irq, void *data)
-{
-       struct ab3550 *ab = data;
-       int err;
-       unsigned int i;
-       u8 e[AB3550_NUM_EVENT_REG];
-       u8 *events;
-       unsigned long flags;
-
-       events = (ab->startup_events_read ? e : ab->startup_events);
-
-       err = get_register_page_interruptible(ab, AB3550_EVENT_BANK,
-               AB3550_EVENT_REG, events, AB3550_NUM_EVENT_REG);
-       if (err)
-               goto err_event_rd;
-
-       if (!ab->startup_events_read) {
-               dev_info(&ab->i2c_client[0]->dev,
-                       "startup events 0x%x,0x%x,0x%x,0x%x,0x%x\n",
-                       ab->startup_events[0], ab->startup_events[1],
-                       ab->startup_events[2], ab->startup_events[3],
-                       ab->startup_events[4]);
-               ab->startup_events_read = true;
-               goto out;
-       }
-
-       /* The two highest bits in event[4] are not used. */
-       events[4] &= 0x3f;
-
-       spin_lock_irqsave(&ab->event_lock, flags);
-       for (i = 0; i < AB3550_NUM_EVENT_REG; i++)
-               events[i] &= ~ab->event_mask[i];
-       spin_unlock_irqrestore(&ab->event_lock, flags);
-
-       for (i = 0; i < AB3550_NUM_EVENT_REG; i++) {
-               u8 bit;
-               u8 event_reg;
-
-               dev_dbg(&ab->i2c_client[0]->dev, "IRQ Event[%d]: 0x%2x\n",
-                       i, events[i]);
-
-               event_reg = events[i];
-               for (bit = 0; event_reg; bit++, event_reg /= 2) {
-                       if (event_reg % 2) {
-                               unsigned int irq;
-                               struct ab3550_platform_data *plf_data;
-
-                               plf_data = ab->i2c_client[0]->dev.platform_data;
-                               irq = plf_data->irq.base + (i * 8) + bit;
-                               handle_nested_irq(irq);
-                       }
-               }
-       }
-out:
-       return IRQ_HANDLED;
-
-err_event_rd:
-       dev_dbg(&ab->i2c_client[0]->dev, "error reading event registers\n");
-       return IRQ_HANDLED;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct ab3550_reg_ranges debug_ranges[AB3550_NUM_BANKS] = {
-       {
-               .count = 6,
-               .range = (struct ab3550_reg_range[]) {
-                       {
-                               .first = 0x00,
-                               .last = 0x0e,
-                       },
-                       {
-                               .first = 0x10,
-                               .last = 0x1a,
-                       },
-                       {
-                               .first = 0x1e,
-                               .last = 0x4f,
-                       },
-                       {
-                               .first = 0x51,
-                               .last = 0x63,
-                       },
-                       {
-                               .first = 0x65,
-                               .last = 0xa3,
-                       },
-                       {
-                               .first = 0xa5,
-                               .last = 0xa8,
-                       },
-               }
-       },
-       {
-               .count = 8,
-               .range = (struct ab3550_reg_range[]) {
-                       {
-                               .first = 0x00,
-                               .last = 0x0e,
-                       },
-                       {
-                               .first = 0x10,
-                               .last = 0x17,
-                       },
-                       {
-                               .first = 0x1a,
-                               .last = 0x1c,
-                       },
-                       {
-                               .first = 0x20,
-                               .last = 0x56,
-                       },
-                       {
-                               .first = 0x5a,
-                               .last = 0x88,
-                       },
-                       {
-                               .first = 0x8a,
-                               .last = 0xad,
-                       },
-                       {
-                               .first = 0xb0,
-                               .last = 0xba,
-                       },
-                       {
-                               .first = 0xbc,
-                               .last = 0xc3,
-                       },
-               }
-       },
-};
-
-static int ab3550_registers_print(struct seq_file *s, void *p)
-{
-       struct ab3550 *ab = s->private;
-       int bank;
-
-       seq_printf(s, AB3550_NAME_STRING " register values:\n");
-
-       for (bank = 0; bank < AB3550_NUM_BANKS; bank++) {
-               unsigned int i;
-
-               seq_printf(s, " bank %d:\n", bank);
-               for (i = 0; i < debug_ranges[bank].count; i++) {
-                       u8 reg;
-
-                       for (reg = debug_ranges[bank].range[i].first;
-                               reg <= debug_ranges[bank].range[i].last;
-                               reg++) {
-                               u8 value;
-
-                               get_register_interruptible(ab, bank, reg,
-                                       &value);
-                               seq_printf(s, "  [%d/0x%02X]: 0x%02X\n", bank,
-                                       reg, value);
-                       }
-               }
-       }
-       return 0;
-}
-
-static int ab3550_registers_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, ab3550_registers_print, inode->i_private);
-}
-
-static const struct file_operations ab3550_registers_fops = {
-       .open = ab3550_registers_open,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-       .owner = THIS_MODULE,
-};
-
-static int ab3550_bank_print(struct seq_file *s, void *p)
-{
-       struct ab3550 *ab = s->private;
-
-       seq_printf(s, "%d\n", ab->debug_bank);
-       return 0;
-}
-
-static int ab3550_bank_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, ab3550_bank_print, inode->i_private);
-}
-
-static ssize_t ab3550_bank_write(struct file *file,
-       const char __user *user_buf,
-       size_t count, loff_t *ppos)
-{
-       struct ab3550 *ab = ((struct seq_file *)(file->private_data))->private;
-       unsigned long user_bank;
-       int err;
-
-       /* Get userspace string and assure termination */
-       err = kstrtoul_from_user(user_buf, count, 0, &user_bank);
-       if (err)
-               return err;
-
-       if (user_bank >= AB3550_NUM_BANKS) {
-               dev_err(&ab->i2c_client[0]->dev,
-                       "debugfs error input > number of banks\n");
-               return -EINVAL;
-       }
-
-       ab->debug_bank = user_bank;
-
-       return count;
-}
-
-static int ab3550_address_print(struct seq_file *s, void *p)
-{
-       struct ab3550 *ab = s->private;
-
-       seq_printf(s, "0x%02X\n", ab->debug_address);
-       return 0;
-}
-
-static int ab3550_address_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, ab3550_address_print, inode->i_private);
-}
-
-static ssize_t ab3550_address_write(struct file *file,
-       const char __user *user_buf,
-       size_t count, loff_t *ppos)
-{
-       struct ab3550 *ab = ((struct seq_file *)(file->private_data))->private;
-       unsigned long user_address;
-       int err;
-
-       /* Get userspace string and assure termination */
-       err = kstrtoul_from_user(user_buf, count, 0, &user_address);
-       if (err)
-               return err;
-
-       if (user_address > 0xff) {
-               dev_err(&ab->i2c_client[0]->dev,
-                       "debugfs error input > 0xff\n");
-               return -EINVAL;
-       }
-       ab->debug_address = user_address;
-       return count;
-}
-
-static int ab3550_val_print(struct seq_file *s, void *p)
-{
-       struct ab3550 *ab = s->private;
-       int err;
-       u8 regvalue;
-
-       err = get_register_interruptible(ab, (u8)ab->debug_bank,
-               (u8)ab->debug_address, &regvalue);
-       if (err)
-               return -EINVAL;
-       seq_printf(s, "0x%02X\n", regvalue);
-
-       return 0;
-}
-
-static int ab3550_val_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, ab3550_val_print, inode->i_private);
-}
-
-static ssize_t ab3550_val_write(struct file *file,
-       const char __user *user_buf,
-       size_t count, loff_t *ppos)
-{
-       struct ab3550 *ab = ((struct seq_file *)(file->private_data))->private;
-       unsigned long user_val;
-       int err;
-       u8 regvalue;
-
-       /* Get userspace string and assure termination */
-       err = kstrtoul_from_user(user_buf, count, 0, &user_val);
-       if (err)
-               return err;
-
-       if (user_val > 0xff) {
-               dev_err(&ab->i2c_client[0]->dev,
-                       "debugfs error input > 0xff\n");
-               return -EINVAL;
-       }
-       err = mask_and_set_register_interruptible(
-               ab, (u8)ab->debug_bank,
-               (u8)ab->debug_address, 0xFF, (u8)user_val);
-       if (err)
-               return -EINVAL;
-
-       get_register_interruptible(ab, (u8)ab->debug_bank,
-               (u8)ab->debug_address, &regvalue);
-       if (err)
-               return -EINVAL;
-
-       return count;
-}
-
-static const struct file_operations ab3550_bank_fops = {
-       .open = ab3550_bank_open,
-       .write = ab3550_bank_write,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-       .owner = THIS_MODULE,
-};
-
-static const struct file_operations ab3550_address_fops = {
-       .open = ab3550_address_open,
-       .write = ab3550_address_write,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-       .owner = THIS_MODULE,
-};
-
-static const struct file_operations ab3550_val_fops = {
-       .open = ab3550_val_open,
-       .write = ab3550_val_write,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-       .owner = THIS_MODULE,
-};
-
-static struct dentry *ab3550_dir;
-static struct dentry *ab3550_reg_file;
-static struct dentry *ab3550_bank_file;
-static struct dentry *ab3550_address_file;
-static struct dentry *ab3550_val_file;
-
-static inline void ab3550_setup_debugfs(struct ab3550 *ab)
-{
-       ab->debug_bank = 0;
-       ab->debug_address = 0x00;
-
-       ab3550_dir = debugfs_create_dir(AB3550_NAME_STRING, NULL);
-       if (!ab3550_dir)
-               goto exit_no_debugfs;
-
-       ab3550_reg_file = debugfs_create_file("all-registers",
-               S_IRUGO, ab3550_dir, ab, &ab3550_registers_fops);
-       if (!ab3550_reg_file)
-               goto exit_destroy_dir;
-
-       ab3550_bank_file = debugfs_create_file("register-bank",
-               (S_IRUGO | S_IWUSR), ab3550_dir, ab, &ab3550_bank_fops);
-       if (!ab3550_bank_file)
-               goto exit_destroy_reg;
-
-       ab3550_address_file = debugfs_create_file("register-address",
-               (S_IRUGO | S_IWUSR), ab3550_dir, ab, &ab3550_address_fops);
-       if (!ab3550_address_file)
-               goto exit_destroy_bank;
-
-       ab3550_val_file = debugfs_create_file("register-value",
-               (S_IRUGO | S_IWUSR), ab3550_dir, ab, &ab3550_val_fops);
-       if (!ab3550_val_file)
-               goto exit_destroy_address;
-
-       return;
-
-exit_destroy_address:
-       debugfs_remove(ab3550_address_file);
-exit_destroy_bank:
-       debugfs_remove(ab3550_bank_file);
-exit_destroy_reg:
-       debugfs_remove(ab3550_reg_file);
-exit_destroy_dir:
-       debugfs_remove(ab3550_dir);
-exit_no_debugfs:
-       dev_err(&ab->i2c_client[0]->dev, "failed to create debugfs entries.\n");
-       return;
-}
-
-static inline void ab3550_remove_debugfs(void)
-{
-       debugfs_remove(ab3550_val_file);
-       debugfs_remove(ab3550_address_file);
-       debugfs_remove(ab3550_bank_file);
-       debugfs_remove(ab3550_reg_file);
-       debugfs_remove(ab3550_dir);
-}
-
-#else /* !CONFIG_DEBUG_FS */
-static inline void ab3550_setup_debugfs(struct ab3550 *ab)
-{
-}
-static inline void ab3550_remove_debugfs(void)
-{
-}
-#endif
-
-/*
- * Basic set-up, datastructure creation/destruction and I2C interface.
- * This sets up a default config in the AB3550 chip so that it
- * will work as expected.
- */
-static int __init ab3550_setup(struct ab3550 *ab)
-{
-       int err = 0;
-       int i;
-       struct ab3550_platform_data *plf_data;
-       struct abx500_init_settings *settings;
-
-       plf_data = ab->i2c_client[0]->dev.platform_data;
-       settings = plf_data->init_settings;
-
-       for (i = 0; i < plf_data->init_settings_sz; i++) {
-               err = mask_and_set_register_interruptible(ab,
-                       settings[i].bank,
-                       settings[i].reg,
-                       0xFF, settings[i].setting);
-               if (err)
-                       goto exit_no_setup;
-
-               /* If event mask register update the event mask in ab3550 */
-               if ((settings[i].bank == 0) &&
-                       (AB3550_IMR1 <= settings[i].reg) &&
-                       (settings[i].reg <= AB3550_IMR5)) {
-                       ab->event_mask[settings[i].reg - AB3550_IMR1] =
-                               settings[i].setting;
-               }
-       }
-exit_no_setup:
-       return err;
-}
-
-static void ab3550_mask_work(struct work_struct *work)
-{
-       struct ab3550 *ab = container_of(work, struct ab3550, mask_work);
-       int i;
-       unsigned long flags;
-       u8 mask[AB3550_NUM_EVENT_REG];
-
-       spin_lock_irqsave(&ab->event_lock, flags);
-       for (i = 0; i < AB3550_NUM_EVENT_REG; i++)
-               mask[i] = ab->event_mask[i];
-       spin_unlock_irqrestore(&ab->event_lock, flags);
-
-       for (i = 0; i < AB3550_NUM_EVENT_REG; i++) {
-               int err;
-
-               err = mask_and_set_register_interruptible(ab, 0,
-                       (AB3550_IMR1 + i), ~0, mask[i]);
-               if (err)
-                       dev_err(&ab->i2c_client[0]->dev,
-                               "ab3550_mask_work failed 0x%x,0x%x\n",
-                               (AB3550_IMR1 + i), mask[i]);
-       }
-}
-
-static void ab3550_mask(struct irq_data *data)
-{
-       unsigned long flags;
-       struct ab3550 *ab;
-       struct ab3550_platform_data *plf_data;
-       int irq;
-
-       ab = irq_data_get_irq_chip_data(data);
-       plf_data = ab->i2c_client[0]->dev.platform_data;
-       irq = data->irq - plf_data->irq.base;
-
-       spin_lock_irqsave(&ab->event_lock, flags);
-       ab->event_mask[irq / 8] |= BIT(irq % 8);
-       spin_unlock_irqrestore(&ab->event_lock, flags);
-
-       schedule_work(&ab->mask_work);
-}
-
-static void ab3550_unmask(struct irq_data *data)
-{
-       unsigned long flags;
-       struct ab3550 *ab;
-       struct ab3550_platform_data *plf_data;
-       int irq;
-
-       ab = irq_data_get_irq_chip_data(data);
-       plf_data = ab->i2c_client[0]->dev.platform_data;
-       irq = data->irq - plf_data->irq.base;
-
-       spin_lock_irqsave(&ab->event_lock, flags);
-       ab->event_mask[irq / 8] &= ~BIT(irq % 8);
-       spin_unlock_irqrestore(&ab->event_lock, flags);
-
-       schedule_work(&ab->mask_work);
-}
-
-static void noop(struct irq_data *data)
-{
-}
-
-static struct irq_chip ab3550_irq_chip = {
-       .name           = "ab3550-core", /* Keep the same name as the request */
-       .irq_disable    = ab3550_mask, /* No default to mask in chip.c */
-       .irq_ack        = noop,
-       .irq_mask       = ab3550_mask,
-       .irq_unmask     = ab3550_unmask,
-};
-
-struct ab_family_id {
-       u8      id;
-       char    *name;
-};
-
-static const struct ab_family_id ids[] __initdata = {
-       /* AB3550 */
-       {
-               .id = AB3550_P1A,
-               .name = "P1A"
-       },
-       /* Terminator */
-       {
-               .id = 0x00,
-       }
-};
-
-static int __init ab3550_probe(struct i2c_client *client,
-       const struct i2c_device_id *id)
-{
-       struct ab3550 *ab;
-       struct ab3550_platform_data *ab3550_plf_data =
-               client->dev.platform_data;
-       int err;
-       int i;
-       int num_i2c_clients = 0;
-
-       ab = kzalloc(sizeof(struct ab3550), GFP_KERNEL);
-       if (!ab) {
-               dev_err(&client->dev,
-                       "could not allocate " AB3550_NAME_STRING " device\n");
-               return -ENOMEM;
-       }
-
-       /* Initialize data structure */
-       mutex_init(&ab->access_mutex);
-       spin_lock_init(&ab->event_lock);
-       ab->i2c_client[0] = client;
-
-       i2c_set_clientdata(client, ab);
-
-       /* Read chip ID register */
-       err = get_register_interruptible(ab, 0, AB3550_CID_REG, &ab->chip_id);
-       if (err) {
-               dev_err(&client->dev, "could not communicate with the analog "
-                       "baseband chip\n");
-               goto exit_no_detect;
-       }
-
-       for (i = 0; ids[i].id != 0x0; i++) {
-               if (ids[i].id == ab->chip_id) {
-                       snprintf(&ab->chip_name[0], sizeof(ab->chip_name) - 1,
-                               AB3550_ID_FORMAT_STRING, ids[i].name);
-                       break;
-               }
-       }
-
-       if (ids[i].id == 0x0) {
-               dev_err(&client->dev, "unknown analog baseband chip id: 0x%x\n",
-                       ab->chip_id);
-               dev_err(&client->dev, "driver not started!\n");
-               goto exit_no_detect;
-       }
-
-       dev_info(&client->dev, "detected AB chip: %s\n", &ab->chip_name[0]);
-
-       /* Attach other dummy I2C clients. */
-       while (++num_i2c_clients < AB3550_NUM_BANKS) {
-               ab->i2c_client[num_i2c_clients] =
-                       i2c_new_dummy(client->adapter,
-                               (client->addr + num_i2c_clients));
-               if (!ab->i2c_client[num_i2c_clients]) {
-                       err = -ENOMEM;
-                       goto exit_no_dummy_client;
-               }
-               strlcpy(ab->i2c_client[num_i2c_clients]->name, id->name,
-                       sizeof(ab->i2c_client[num_i2c_clients]->name));
-       }
-
-       err = ab3550_setup(ab);
-       if (err)
-               goto exit_no_setup;
-
-       INIT_WORK(&ab->mask_work, ab3550_mask_work);
-
-       for (i = 0; i < ab3550_plf_data->irq.count; i++) {
-               unsigned int irq;
-
-               irq = ab3550_plf_data->irq.base + i;
-               irq_set_chip_data(irq, ab);
-               irq_set_chip_and_handler(irq, &ab3550_irq_chip,
-                                        handle_simple_irq);
-               irq_set_nested_thread(irq, 1);
-#ifdef CONFIG_ARM
-               set_irq_flags(irq, IRQF_VALID);
-#else
-               irq_set_noprobe(irq);
-#endif
-       }
-
-       err = request_threaded_irq(client->irq, NULL, ab3550_irq_handler,
-               IRQF_ONESHOT, "ab3550-core", ab);
-       /* This real unpredictable IRQ is of course sampled for entropy */
-       rand_initialize_irq(client->irq);
-
-       if (err)
-               goto exit_no_irq;
-
-       err = abx500_register_ops(&client->dev, &ab3550_ops);
-       if (err)
-               goto exit_no_ops;
-
-       /* Set up and register the platform devices. */
-       for (i = 0; i < AB3550_NUM_DEVICES; i++) {
-               ab3550_devs[i].platform_data = ab3550_plf_data->dev_data[i];
-               ab3550_devs[i].pdata_size = ab3550_plf_data->dev_data_sz[i];
-       }
-
-       err = mfd_add_devices(&client->dev, 0, ab3550_devs,
-               ARRAY_SIZE(ab3550_devs), NULL,
-               ab3550_plf_data->irq.base);
-
-       ab3550_setup_debugfs(ab);
-
-       return 0;
-
-exit_no_ops:
-exit_no_irq:
-exit_no_setup:
-exit_no_dummy_client:
-       /* Unregister the dummy i2c clients. */
-       while (--num_i2c_clients)
-               i2c_unregister_device(ab->i2c_client[num_i2c_clients]);
-exit_no_detect:
-       kfree(ab);
-       return err;
-}
-
-static int __exit ab3550_remove(struct i2c_client *client)
-{
-       struct ab3550 *ab = i2c_get_clientdata(client);
-       int num_i2c_clients = AB3550_NUM_BANKS;
-
-       mfd_remove_devices(&client->dev);
-       ab3550_remove_debugfs();
-
-       while (--num_i2c_clients)
-               i2c_unregister_device(ab->i2c_client[num_i2c_clients]);
-
-       /*
-        * At this point, all subscribers should have unregistered
-        * their notifiers so deactivate IRQ
-        */
-       free_irq(client->irq, ab);
-       kfree(ab);
-       return 0;
-}
-
-static const struct i2c_device_id ab3550_id[] = {
-       {AB3550_NAME_STRING, 0},
-       {}
-};
-MODULE_DEVICE_TABLE(i2c, ab3550_id);
-
-static struct i2c_driver ab3550_driver = {
-       .driver = {
-               .name   = AB3550_NAME_STRING,
-               .owner  = THIS_MODULE,
-       },
-       .id_table       = ab3550_id,
-       .probe          = ab3550_probe,
-       .remove         = __exit_p(ab3550_remove),
-};
-
-static int __init ab3550_i2c_init(void)
-{
-       return i2c_add_driver(&ab3550_driver);
-}
-
-static void __exit ab3550_i2c_exit(void)
-{
-       i2c_del_driver(&ab3550_driver);
-}
-
-subsys_initcall(ab3550_i2c_init);
-module_exit(ab3550_i2c_exit);
-
-MODULE_AUTHOR("Mattias Wallin <mattias.wallin@stericsson.com>");
-MODULE_DESCRIPTION("AB3550 core driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c
new file mode 100644 (file)
index 0000000..4175544
--- /dev/null
@@ -0,0 +1,1439 @@
+/*
+ * Copyright (C) 2007-2011 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Low-level core for exclusive access to the AB5500 IC on the I2C bus
+ * and some basic chip-configuration.
+ * Author: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ * Author: Mattias Wallin <mattias.wallin@stericsson.com>
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ * Author: Karl Komierowski  <karl.komierowski@stericsson.com>
+ * Author: Bibek Basu <bibek.basu@stericsson.com>
+ *
+ * TODO: Event handling with irq_chip. Waiting for PRCMU fw support.
+ */
+
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/random.h>
+#include <linux/mfd/ab5500/ab5500.h>
+#include <linux/mfd/abx500.h>
+#include <linux/list.h>
+#include <linux/bitops.h>
+#include <linux/spinlock.h>
+#include <linux/mfd/core.h>
+#include <linux/version.h>
+#include <linux/mfd/db5500-prcmu.h>
+
+#include "ab5500-core.h"
+#include "ab5500-debugfs.h"
+
+#define AB5500_NUM_EVENT_REG 23
+#define AB5500_IT_LATCH0_REG 0x40
+#define AB5500_IT_MASK0_REG 0x60
+
+/*
+ * Permissible register ranges for reading and writing per device and bank.
+ *
+ * The ranges must be listed in increasing address order, and no overlaps are
+ * allowed. It is assumed that write permission implies read permission
+ * (i.e. only RO and RW permissions should be used).  Ranges with write
+ * permission must not be split up.
+ */
+
+#define NO_RANGE {.count = 0, .range = NULL,}
+static struct ab5500_i2c_banks ab5500_bank_ranges[AB5500_NUM_DEVICES] = {
+       [AB5500_DEVID_USB] =  {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_USB,
+                               .nranges = 12,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x01,
+                                               .last = 0x01,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x80,
+                                               .last = 0x83,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x87,
+                                               .last = 0x8A,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x8B,
+                                               .last = 0x8B,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x91,
+                                               .last = 0x92,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x93,
+                                               .last = 0x93,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x94,
+                                               .last = 0x94,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0xA8,
+                                               .last = 0xB0,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0xB2,
+                                               .last = 0xB2,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0xB4,
+                                               .last = 0xBC,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0xBF,
+                                               .last = 0xBF,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0xC1,
+                                               .last = 0xC5,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_ADC] =  {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_ADC,
+                               .nranges = 6,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x1F,
+                                               .last = 0x22,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x23,
+                                               .last = 0x24,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x26,
+                                               .last = 0x2D,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x2F,
+                                               .last = 0x34,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x37,
+                                               .last = 0x57,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x58,
+                                               .last = 0x58,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_LEDS] =  {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_LED,
+                               .nranges = 1,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x00,
+                                               .last = 0x0C,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_VIDEO] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_VDENC,
+                               .nranges = 12,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x00,
+                                               .last = 0x08,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x09,
+                                               .last = 0x09,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x0A,
+                                               .last = 0x12,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x15,
+                                               .last = 0x19,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x1B,
+                                               .last = 0x21,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x27,
+                                               .last = 0x2C,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x41,
+                                               .last = 0x41,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x45,
+                                               .last = 0x5B,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x5D,
+                                               .last = 0x5D,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x69,
+                                               .last = 0x69,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x6C,
+                                               .last = 0x6D,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x80,
+                                               .last = 0x81,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_REGULATORS] =   {
+               .nbanks = 2,
+               .bank =  (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_STARTUP,
+                               .nranges = 12,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x00,
+                                               .last = 0x01,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x1F,
+                                               .last = 0x1F,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x2E,
+                                               .last = 0x2E,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x2F,
+                                               .last = 0x30,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x50,
+                                               .last = 0x51,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x60,
+                                               .last = 0x61,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x66,
+                                               .last = 0x8A,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x8C,
+                                               .last = 0x96,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0xAA,
+                                               .last = 0xB4,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0xB7,
+                                               .last = 0xBF,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0xC1,
+                                               .last = 0xCA,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0xD3,
+                                               .last = 0xE0,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+                       {
+                               .bankid = AB5500_BANK_SIM_USBSIM,
+                               .nranges = 1,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x13,
+                                               .last = 0x19,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_SIM] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_SIM_USBSIM,
+                               .nranges = 1,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x13,
+                                               .last = 0x19,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_RTC] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_RTC,
+                               .nranges = 2,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x00,
+                                               .last = 0x04,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0x06,
+                                               .last = 0x0C,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_CHARGER] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_CHG,
+                               .nranges = 2,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x11,
+                                               .last = 0x11,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x12,
+                                               .last = 0x1B,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_FUELGAUGE] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_FG_BATTCOM_ACC,
+                               .nranges = 2,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x00,
+                                               .last = 0x0B,
+                                               .perm = AB5500_PERM_RO,
+                                       },
+                                       {
+                                               .first = 0x0C,
+                                               .last = 0x10,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_VIBRATOR] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_VIBRA,
+                               .nranges = 2,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x10,
+                                               .last = 0x13,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0xFE,
+                                               .last = 0xFE,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_CODEC] =   {
+               .nbanks = 1,
+               .bank = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_AUDIO_HEADSETUSB,
+                               .nranges = 2,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x00,
+                                               .last = 0x48,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                                       {
+                                               .first = 0xEB,
+                                               .last = 0xFB,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+       [AB5500_DEVID_POWER] = {
+               .nbanks = 2,
+               .bank   = (struct ab5500_i2c_ranges []) {
+                       {
+                               .bankid = AB5500_BANK_STARTUP,
+                               .nranges = 1,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x30,
+                                               .last = 0x30,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+                       {
+                               .bankid = AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP,
+                               .nranges = 1,
+                               .range = (struct ab5500_reg_range[]) {
+                                       {
+                                               .first = 0x01,
+                                               .last = 0x01,
+                                               .perm = AB5500_PERM_RW,
+                                       },
+                               },
+                       },
+               },
+       },
+};
+
+#define AB5500_IRQ(bank, bit)  ((bank) * 8 + (bit))
+
+/* I appologize for the resource names beeing a mix of upper case
+ * and lower case but I want them to be exact as the documentation */
+static struct mfd_cell ab5500_devs[AB5500_NUM_DEVICES] = {
+       [AB5500_DEVID_LEDS] = {
+               .name = "ab5500-leds",
+               .id = AB5500_DEVID_LEDS,
+       },
+       [AB5500_DEVID_POWER] = {
+               .name = "ab5500-power",
+               .id = AB5500_DEVID_POWER,
+       },
+       [AB5500_DEVID_REGULATORS] = {
+               .name = "ab5500-regulator",
+               .id = AB5500_DEVID_REGULATORS,
+       },
+       [AB5500_DEVID_SIM] = {
+               .name = "ab5500-sim",
+               .id = AB5500_DEVID_SIM,
+               .num_resources = 1,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "SIMOFF",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(2, 0), /*rising*/
+                               .end = AB5500_IRQ(2, 1), /*falling*/
+                       },
+               },
+       },
+       [AB5500_DEVID_RTC] = {
+               .name = "ab5500-rtc",
+               .id = AB5500_DEVID_RTC,
+               .num_resources = 1,
+               .resources = (struct resource[]) {
+                       {
+                               .name   = "RTC_Alarm",
+                               .flags  = IORESOURCE_IRQ,
+                               .start  = AB5500_IRQ(1, 7),
+                               .end    = AB5500_IRQ(1, 7),
+                       }
+               },
+       },
+       [AB5500_DEVID_CHARGER] = {
+               .name = "ab5500-charger",
+               .id = AB5500_DEVID_CHARGER,
+       },
+       [AB5500_DEVID_ADC] = {
+               .name = "ab5500-adc",
+               .id = AB5500_DEVID_ADC,
+               .num_resources = 10,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "TRIGGER-0",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 0),
+                               .end = AB5500_IRQ(0, 0),
+                       },
+                       {
+                               .name = "TRIGGER-1",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 1),
+                               .end = AB5500_IRQ(0, 1),
+                       },
+                       {
+                               .name = "TRIGGER-2",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 2),
+                               .end = AB5500_IRQ(0, 2),
+                       },
+                       {
+                               .name = "TRIGGER-3",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 3),
+                               .end = AB5500_IRQ(0, 3),
+                       },
+                       {
+                               .name = "TRIGGER-4",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 4),
+                               .end = AB5500_IRQ(0, 4),
+                       },
+                       {
+                               .name = "TRIGGER-5",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 5),
+                               .end = AB5500_IRQ(0, 5),
+                       },
+                       {
+                               .name = "TRIGGER-6",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 6),
+                               .end = AB5500_IRQ(0, 6),
+                       },
+                       {
+                               .name = "TRIGGER-7",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 7),
+                               .end = AB5500_IRQ(0, 7),
+                       },
+                       {
+                               .name = "TRIGGER-VBAT",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 8),
+                               .end = AB5500_IRQ(0, 8),
+                       },
+                       {
+                               .name = "TRIGGER-VBAT-TXON",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(0, 9),
+                               .end = AB5500_IRQ(0, 9),
+                       },
+               },
+       },
+       [AB5500_DEVID_FUELGAUGE] = {
+               .name = "ab5500-fuelgauge",
+               .id = AB5500_DEVID_FUELGAUGE,
+               .num_resources = 6,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "Batt_attach",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(7, 5),
+                               .end = AB5500_IRQ(7, 5),
+                       },
+                       {
+                               .name = "Batt_removal",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(7, 6),
+                               .end = AB5500_IRQ(7, 6),
+                       },
+                       {
+                               .name = "UART_framing",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(7, 7),
+                               .end = AB5500_IRQ(7, 7),
+                       },
+                       {
+                               .name = "UART_overrun",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 0),
+                               .end = AB5500_IRQ(8, 0),
+                       },
+                       {
+                               .name = "UART_Rdy_RX",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 1),
+                               .end = AB5500_IRQ(8, 1),
+                       },
+                       {
+                               .name = "UART_Rdy_TX",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 2),
+                               .end = AB5500_IRQ(8, 2),
+                       },
+               },
+       },
+       [AB5500_DEVID_VIBRATOR] = {
+               .name = "ab5500-vibrator",
+               .id = AB5500_DEVID_VIBRATOR,
+       },
+       [AB5500_DEVID_CODEC] = {
+               .name = "ab5500-codec",
+               .id = AB5500_DEVID_CODEC,
+               .num_resources = 3,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "audio_spkr1_ovc",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 5),
+                               .end = AB5500_IRQ(9, 5),
+                       },
+                       {
+                               .name = "audio_plllocked",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 6),
+                               .end = AB5500_IRQ(9, 6),
+                       },
+                       {
+                               .name = "audio_spkr2_ovc",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(17, 4),
+                               .end = AB5500_IRQ(17, 4),
+                       },
+               },
+       },
+       [AB5500_DEVID_USB] = {
+               .name = "ab5500-usb",
+               .id = AB5500_DEVID_USB,
+               .num_resources = 36,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "Link_Update",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(22, 1),
+                               .end = AB5500_IRQ(22, 1),
+                       },
+                       {
+                               .name = "DCIO",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 3),
+                               .end = AB5500_IRQ(8, 4),
+                       },
+                       {
+                               .name = "VBUS_R",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 5),
+                               .end = AB5500_IRQ(8, 5),
+                       },
+                       {
+                               .name = "VBUS_F",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 6),
+                               .end = AB5500_IRQ(8, 6),
+                       },
+                       {
+                               .name = "CHGstate_10_PCVBUSchg",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(8, 7),
+                               .end = AB5500_IRQ(8, 7),
+                       },
+                       {
+                               .name = "DCIOreverse_ovc",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 0),
+                               .end = AB5500_IRQ(9, 0),
+                       },
+                       {
+                               .name = "USBCharDetDone",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 1),
+                               .end = AB5500_IRQ(9, 1),
+                       },
+                       {
+                               .name = "DCIO_no_limit",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 2),
+                               .end = AB5500_IRQ(9, 2),
+                       },
+                       {
+                               .name = "USB_suspend",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 3),
+                               .end = AB5500_IRQ(9, 3),
+                       },
+                       {
+                               .name = "DCIOreverse_fwdcurrent",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 4),
+                               .end = AB5500_IRQ(9, 4),
+                       },
+                       {
+                               .name = "Vbus_Imeasmax_change",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(9, 5),
+                               .end = AB5500_IRQ(9, 6),
+                       },
+                       {
+                               .name = "OVV",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 5),
+                               .end = AB5500_IRQ(14, 5),
+                       },
+                       {
+                               .name = "USBcharging_NOTok",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(15, 3),
+                               .end = AB5500_IRQ(15, 3),
+                       },
+                       {
+                               .name = "usb_adp_sensoroff",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(15, 6),
+                               .end = AB5500_IRQ(15, 6),
+                       },
+                       {
+                               .name = "usb_adp_probeplug",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(15, 7),
+                               .end = AB5500_IRQ(15, 7),
+                       },
+                       {
+                               .name = "usb_adp_sinkerror",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(16, 0),
+                               .end = AB5500_IRQ(16, 6),
+                       },
+                       {
+                               .name = "usb_adp_sourceerror",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(16, 1),
+                               .end = AB5500_IRQ(16, 1),
+                       },
+                       {
+                               .name = "usb_idgnd_r",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(16, 2),
+                               .end = AB5500_IRQ(16, 2),
+                       },
+                       {
+                               .name = "usb_idgnd_f",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(16, 3),
+                               .end = AB5500_IRQ(16, 3),
+                       },
+                       {
+                               .name = "usb_iddetR1",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(16, 4),
+                               .end = AB5500_IRQ(16, 5),
+                       },
+                       {
+                               .name = "usb_iddetR2",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(16, 6),
+                               .end = AB5500_IRQ(16, 7),
+                       },
+                       {
+                               .name = "usb_iddetR3",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(17, 0),
+                               .end = AB5500_IRQ(17, 1),
+                       },
+                       {
+                               .name = "usb_iddetR4",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(17, 2),
+                               .end = AB5500_IRQ(17, 3),
+                       },
+                       {
+                               .name = "CharTempWindowOk",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(17, 7),
+                               .end = AB5500_IRQ(18, 0),
+                       },
+                       {
+                               .name = "USB_SprDetect",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(18, 1),
+                               .end = AB5500_IRQ(18, 1),
+                       },
+                       {
+                               .name = "usb_adp_probe_unplug",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(18, 2),
+                               .end = AB5500_IRQ(18, 2),
+                       },
+                       {
+                               .name = "VBUSChDrop",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(18, 3),
+                               .end = AB5500_IRQ(18, 4),
+                       },
+                       {
+                               .name = "dcio_char_rec_done",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(18, 5),
+                               .end = AB5500_IRQ(18, 5),
+                       },
+                       {
+                               .name = "Charging_stopped_by_temp",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(18, 6),
+                               .end = AB5500_IRQ(18, 6),
+                       },
+                       {
+                               .name = "CHGstate_11_SafeModeVBUS",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 1),
+                               .end = AB5500_IRQ(21, 2),
+                       },
+                       {
+                               .name = "CHGstate_12_comletedVBUS",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 2),
+                               .end = AB5500_IRQ(21, 2),
+                       },
+                       {
+                               .name = "CHGstate_13_completedVBUS",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 3),
+                               .end = AB5500_IRQ(21, 3),
+                       },
+                       {
+                               .name = "CHGstate_14_FullChgDCIO",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 4),
+                               .end = AB5500_IRQ(21, 4),
+                       },
+                       {
+                               .name = "CHGstate_15_SafeModeDCIO",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 5),
+                               .end = AB5500_IRQ(21, 5),
+                       },
+                       {
+                               .name = "CHGstate_16_OFFsuspendDCIO",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 6),
+                               .end = AB5500_IRQ(21, 6),
+                       },
+                       {
+                               .name = "CHGstate_17_completedDCIO",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(21, 7),
+                               .end = AB5500_IRQ(21, 7),
+                       },
+               },
+       },
+       [AB5500_DEVID_OTP] = {
+               .name = "ab5500-otp",
+               .id = AB5500_DEVID_OTP,
+       },
+       [AB5500_DEVID_VIDEO] = {
+               .name = "ab5500-video",
+               .id = AB5500_DEVID_VIDEO,
+               .num_resources = 1,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "plugTVdet",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(22, 2),
+                               .end = AB5500_IRQ(22, 2),
+                       },
+               },
+       },
+       [AB5500_DEVID_DBIECI] = {
+               .name = "ab5500-dbieci",
+               .id = AB5500_DEVID_DBIECI,
+               .num_resources = 10,
+               .resources = (struct resource[]) {
+                       {
+                               .name = "COLL",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 0),
+                               .end = AB5500_IRQ(14, 0),
+                       },
+                       {
+                               .name = "RESERR",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 1),
+                               .end = AB5500_IRQ(14, 1),
+                       },
+                       {
+                               .name = "FRAERR",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 2),
+                               .end = AB5500_IRQ(14, 2),
+                       },
+                       {
+                               .name = "COMERR",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 3),
+                               .end = AB5500_IRQ(14, 3),
+                       },
+                       {
+                               .name = "BSI_indicator",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 4),
+                               .end = AB5500_IRQ(14, 4),
+                       },
+                       {
+                               .name = "SPDSET",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 6),
+                               .end = AB5500_IRQ(14, 6),
+                       },
+                       {
+                               .name = "DSENT",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(14, 7),
+                               .end = AB5500_IRQ(14, 7),
+                       },
+                       {
+                               .name = "DREC",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(15, 0),
+                               .end = AB5500_IRQ(15, 0),
+                       },
+                       {
+                               .name = "ACCINT",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(15, 1),
+                               .end = AB5500_IRQ(15, 1),
+                       },
+                       {
+                               .name = "NOPINT",
+                               .flags = IORESOURCE_IRQ,
+                               .start = AB5500_IRQ(15, 2),
+                               .end = AB5500_IRQ(15, 2),
+                       },
+               },
+       },
+       [AB5500_DEVID_ONSWA] = {
+               .name = "ab5500-onswa",
+               .id = AB5500_DEVID_ONSWA,
+               .num_resources = 2,
+               .resources = (struct resource[]) {
+                       {
+                               .name   = "ONSWAn_rising",
+                               .flags  = IORESOURCE_IRQ,
+                               .start  = AB5500_IRQ(1, 3),
+                               .end    = AB5500_IRQ(1, 3),
+                       },
+                       {
+                               .name   = "ONSWAn_falling",
+                               .flags  = IORESOURCE_IRQ,
+                               .start  = AB5500_IRQ(1, 4),
+                               .end    = AB5500_IRQ(1, 4),
+                       },
+               },
+       },
+};
+
+/*
+ * Functionality for getting/setting register values.
+ */
+int ab5500_get_register_interruptible_raw(struct ab5500 *ab,
+                                         u8 bank, u8 reg,
+                                         u8 *value)
+{
+       int err;
+
+       if (bank >= AB5500_NUM_BANKS)
+               return -EINVAL;
+
+       err = mutex_lock_interruptible(&ab->access_mutex);
+       if (err)
+               return err;
+       err = db5500_prcmu_abb_read(bankinfo[bank].slave_addr, reg, value, 1);
+
+       mutex_unlock(&ab->access_mutex);
+       return err;
+}
+
+static int get_register_page_interruptible(struct ab5500 *ab, u8 bank,
+       u8 first_reg, u8 *regvals, u8 numregs)
+{
+       int err;
+
+       if (bank >= AB5500_NUM_BANKS)
+               return -EINVAL;
+
+       err = mutex_lock_interruptible(&ab->access_mutex);
+       if (err)
+               return err;
+
+       while (numregs) {
+               /* The hardware limit for get page is 4 */
+               u8 curnum = min_t(u8, numregs, 4u);
+
+               err = db5500_prcmu_abb_read(bankinfo[bank].slave_addr,
+                                           first_reg, regvals, curnum);
+               if (err)
+                       goto out;
+
+               numregs -= curnum;
+               first_reg += curnum;
+               regvals += curnum;
+       }
+
+out:
+       mutex_unlock(&ab->access_mutex);
+       return err;
+}
+
+int ab5500_mask_and_set_register_interruptible_raw(struct ab5500 *ab, u8 bank,
+       u8 reg, u8 bitmask, u8 bitvalues)
+{
+       int err = 0;
+
+       if (bank >= AB5500_NUM_BANKS)
+               return -EINVAL;
+
+       if (bitmask) {
+               u8 buf;
+
+               err = mutex_lock_interruptible(&ab->access_mutex);
+               if (err)
+                       return err;
+
+               if (bitmask == 0xFF) /* No need to read in this case. */
+                       buf = bitvalues;
+               else { /* Read and modify the register value. */
+                       err = db5500_prcmu_abb_read(bankinfo[bank].slave_addr,
+                               reg, &buf, 1);
+                       if (err)
+                               return err;
+
+                       buf = ((~bitmask & buf) | (bitmask & bitvalues));
+               }
+               /* Write the new value. */
+               err = db5500_prcmu_abb_write(bankinfo[bank].slave_addr, reg,
+                                            &buf, 1);
+
+               mutex_unlock(&ab->access_mutex);
+       }
+       return err;
+}
+
+static int
+set_register_interruptible(struct ab5500 *ab, u8 bank, u8 reg, u8 value)
+{
+       return ab5500_mask_and_set_register_interruptible_raw(ab, bank, reg,
+                                                             0xff, value);
+}
+
+/*
+ * Read/write permission checking functions.
+ */
+static const struct ab5500_i2c_ranges *get_bankref(u8 devid, u8 bank)
+{
+       u8 i;
+
+       if (devid < AB5500_NUM_DEVICES) {
+               for (i = 0; i < ab5500_bank_ranges[devid].nbanks; i++) {
+                       if (ab5500_bank_ranges[devid].bank[i].bankid == bank)
+                               return &ab5500_bank_ranges[devid].bank[i];
+               }
+       }
+       return NULL;
+}
+
+static bool page_write_allowed(u8 devid, u8 bank, u8 first_reg, u8 last_reg)
+{
+       u8 i; /* range loop index */
+       const struct ab5500_i2c_ranges *bankref;
+
+       bankref = get_bankref(devid, bank);
+       if (bankref == NULL || last_reg < first_reg)
+               return false;
+
+       for (i = 0; i < bankref->nranges; i++) {
+               if (first_reg < bankref->range[i].first)
+                       break;
+               if ((last_reg <= bankref->range[i].last) &&
+                       (bankref->range[i].perm & AB5500_PERM_WR))
+                       return true;
+       }
+       return false;
+}
+
+static bool reg_write_allowed(u8 devid, u8 bank, u8 reg)
+{
+       return page_write_allowed(devid, bank, reg, reg);
+}
+
+static bool page_read_allowed(u8 devid, u8 bank, u8 first_reg, u8 last_reg)
+{
+       u8 i;
+       const struct ab5500_i2c_ranges *bankref;
+
+       bankref = get_bankref(devid, bank);
+       if (bankref == NULL || last_reg < first_reg)
+               return false;
+
+
+       /* Find the range (if it exists in the list) that includes first_reg. */
+       for (i = 0; i < bankref->nranges; i++) {
+               if (first_reg < bankref->range[i].first)
+                       return false;
+               if (first_reg <= bankref->range[i].last)
+                       break;
+       }
+       /* Make sure that the entire range up to and including last_reg is
+        * readable. This may span several of the ranges in the list.
+        */
+       while ((i < bankref->nranges) &&
+               (bankref->range[i].perm & AB5500_PERM_RD)) {
+               if (last_reg <= bankref->range[i].last)
+                       return true;
+               if ((++i >= bankref->nranges) ||
+                       (bankref->range[i].first !=
+                               (bankref->range[i - 1].last + 1))) {
+                       break;
+               }
+       }
+       return false;
+}
+
+static bool reg_read_allowed(u8 devid, u8 bank, u8 reg)
+{
+       return page_read_allowed(devid, bank, reg, reg);
+}
+
+
+/*
+ * The exported register access functionality.
+ */
+static int ab5500_get_chip_id(struct device *dev)
+{
+       struct ab5500 *ab = dev_get_drvdata(dev->parent);
+
+       return (int)ab->chip_id;
+}
+
+static int ab5500_mask_and_set_register_interruptible(struct device *dev,
+               u8 bank, u8 reg, u8 bitmask, u8 bitvalues)
+{
+       struct ab5500 *ab;
+       struct platform_device *pdev = to_platform_device(dev);
+
+       if ((AB5500_NUM_BANKS <= bank) ||
+               !reg_write_allowed(pdev->id, bank, reg))
+               return -EINVAL;
+
+       ab = dev_get_drvdata(dev->parent);
+       return ab5500_mask_and_set_register_interruptible_raw(ab, bank, reg,
+               bitmask, bitvalues);
+}
+
+static int ab5500_set_register_interruptible(struct device *dev, u8 bank,
+       u8 reg, u8 value)
+{
+       return ab5500_mask_and_set_register_interruptible(dev, bank, reg, 0xFF,
+               value);
+}
+
+static int ab5500_get_register_interruptible(struct device *dev, u8 bank,
+               u8 reg, u8 *value)
+{
+       struct ab5500 *ab;
+       struct platform_device *pdev = to_platform_device(dev);
+
+       if ((AB5500_NUM_BANKS <= bank) ||
+               !reg_read_allowed(pdev->id, bank, reg))
+               return -EINVAL;
+
+       ab = dev_get_drvdata(dev->parent);
+       return ab5500_get_register_interruptible_raw(ab, bank, reg, value);
+}
+
+static int ab5500_get_register_page_interruptible(struct device *dev, u8 bank,
+               u8 first_reg, u8 *regvals, u8 numregs)
+{
+       struct ab5500 *ab;
+       struct platform_device *pdev = to_platform_device(dev);
+
+       if ((AB5500_NUM_BANKS <= bank) ||
+               !page_read_allowed(pdev->id, bank,
+                       first_reg, (first_reg + numregs - 1)))
+               return -EINVAL;
+
+       ab = dev_get_drvdata(dev->parent);
+       return get_register_page_interruptible(ab, bank, first_reg, regvals,
+               numregs);
+}
+
+static int
+ab5500_event_registers_startup_state_get(struct device *dev, u8 *event)
+{
+       struct ab5500 *ab;
+
+       ab = dev_get_drvdata(dev->parent);
+       if (!ab->startup_events_read)
+               return -EAGAIN; /* Try again later */
+
+       memcpy(event, ab->startup_events, AB5500_NUM_EVENT_REG);
+       return 0;
+}
+
+static struct abx500_ops ab5500_ops = {
+       .get_chip_id = ab5500_get_chip_id,
+       .get_register = ab5500_get_register_interruptible,
+       .set_register = ab5500_set_register_interruptible,
+       .get_register_page = ab5500_get_register_page_interruptible,
+       .set_register_page = NULL,
+       .mask_and_set_register = ab5500_mask_and_set_register_interruptible,
+       .event_registers_startup_state_get =
+               ab5500_event_registers_startup_state_get,
+       .startup_irq_enabled = NULL,
+};
+
+/*
+ * ab5500_setup : Basic set-up, datastructure creation/destruction
+ *               and I2C interface.This sets up a default config
+ *               in the AB5500 chip so that it will work as expected.
+ * @ab :         Pointer to ab5500 structure
+ * @settings :    Pointer to struct abx500_init_settings
+ * @size :        Size of init data
+ */
+static int __init ab5500_setup(struct ab5500 *ab,
+       struct abx500_init_settings *settings, unsigned int size)
+{
+       int err = 0;
+       int i;
+
+       for (i = 0; i < size; i++) {
+               err = ab5500_mask_and_set_register_interruptible_raw(ab,
+                       settings[i].bank,
+                       settings[i].reg,
+                       0xFF, settings[i].setting);
+               if (err)
+                       goto exit_no_setup;
+
+               /* If event mask register update the event mask in ab5500 */
+               if ((settings[i].bank == AB5500_BANK_IT) &&
+                       (AB5500_MASK_BASE <= settings[i].reg) &&
+                       (settings[i].reg <= AB5500_MASK_END)) {
+                       ab->mask[settings[i].reg - AB5500_MASK_BASE] =
+                               settings[i].setting;
+               }
+       }
+exit_no_setup:
+       return err;
+}
+
+struct ab_family_id {
+       u8      id;
+       char    *name;
+};
+
+static const struct ab_family_id ids[] __initdata = {
+       /* AB5500 */
+       {
+               .id = AB5500_1_0,
+               .name = "1.0"
+       },
+       {
+               .id = AB5500_1_1,
+               .name = "1.1"
+       },
+       /* Terminator */
+       {
+               .id = 0x00,
+       }
+};
+
+static int __init ab5500_probe(struct platform_device *pdev)
+{
+       struct ab5500 *ab;
+       struct ab5500_platform_data *ab5500_plf_data =
+               pdev->dev.platform_data;
+       int err;
+       int i;
+
+       ab = kzalloc(sizeof(struct ab5500), GFP_KERNEL);
+       if (!ab) {
+               dev_err(&pdev->dev,
+                       "could not allocate ab5500 device\n");
+               return -ENOMEM;
+       }
+
+       /* Initialize data structure */
+       mutex_init(&ab->access_mutex);
+       mutex_init(&ab->irq_lock);
+       ab->dev = &pdev->dev;
+
+       platform_set_drvdata(pdev, ab);
+
+       /* Read chip ID register */
+       err = ab5500_get_register_interruptible_raw(ab,
+                                       AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP,
+                                       AB5500_CHIP_ID, &ab->chip_id);
+       if (err) {
+               dev_err(&pdev->dev, "could not communicate with the analog "
+                       "baseband chip\n");
+               goto exit_no_detect;
+       }
+
+       for (i = 0; ids[i].id != 0x0; i++) {
+               if (ids[i].id == ab->chip_id) {
+                       snprintf(&ab->chip_name[0], sizeof(ab->chip_name) - 1,
+                               "AB5500 %s", ids[i].name);
+                       break;
+               }
+       }
+       if (ids[i].id == 0x0) {
+               dev_err(&pdev->dev, "unknown analog baseband chip id: 0x%x\n",
+                       ab->chip_id);
+               dev_err(&pdev->dev, "driver not started!\n");
+               goto exit_no_detect;
+       }
+
+       /* Clear and mask all interrupts */
+       for (i = 0; i < AB5500_NUM_IRQ_REGS; i++) {
+               u8 latchreg = AB5500_IT_LATCH0_REG + i;
+               u8 maskreg = AB5500_IT_MASK0_REG + i;
+               u8 val;
+
+               ab5500_get_register_interruptible_raw(ab, AB5500_BANK_IT,
+                                                     latchreg, &val);
+               set_register_interruptible(ab, AB5500_BANK_IT, maskreg, 0xff);
+               ab->mask[i] = ab->oldmask[i] = 0xff;
+       }
+
+       err = abx500_register_ops(&pdev->dev, &ab5500_ops);
+       if (err) {
+               dev_err(&pdev->dev, "ab5500_register ops error\n");
+               goto exit_no_detect;
+       }
+
+       /* Set up and register the platform devices. */
+       for (i = 0; i < AB5500_NUM_DEVICES; i++) {
+               ab5500_devs[i].platform_data = ab5500_plf_data->dev_data[i];
+               ab5500_devs[i].pdata_size =
+                       sizeof(ab5500_plf_data->dev_data[i]);
+       }
+
+       err = mfd_add_devices(&pdev->dev, 0, ab5500_devs,
+               ARRAY_SIZE(ab5500_devs), NULL,
+               ab5500_plf_data->irq.base);
+       if (err) {
+               dev_err(&pdev->dev, "ab5500_mfd_add_device error\n");
+               goto exit_no_detect;
+       }
+
+       err = ab5500_setup(ab, ab5500_plf_data->init_settings,
+               ab5500_plf_data->init_settings_sz);
+       if (err) {
+               dev_err(&pdev->dev, "ab5500_setup error\n");
+               goto exit_no_detect;
+       }
+
+       ab5500_setup_debugfs(ab);
+
+       dev_info(&pdev->dev, "detected AB chip: %s\n", &ab->chip_name[0]);
+       return 0;
+
+exit_no_detect:
+       kfree(ab);
+       return err;
+}
+
+static int __exit ab5500_remove(struct platform_device *pdev)
+{
+       struct ab5500 *ab = platform_get_drvdata(pdev);
+
+       ab5500_remove_debugfs();
+       mfd_remove_devices(&pdev->dev);
+       kfree(ab);
+       return 0;
+}
+
+static struct platform_driver ab5500_driver = {
+       .driver = {
+               .name = "ab5500-core",
+               .owner = THIS_MODULE,
+       },
+       .remove  = __exit_p(ab5500_remove),
+};
+
+static int __init ab5500_core_init(void)
+{
+       return platform_driver_probe(&ab5500_driver, ab5500_probe);
+}
+
+static void __exit ab5500_core_exit(void)
+{
+       platform_driver_unregister(&ab5500_driver);
+}
+
+subsys_initcall(ab5500_core_init);
+module_exit(ab5500_core_exit);
+
+MODULE_AUTHOR("Mattias Wallin <mattias.wallin@stericsson.com>");
+MODULE_DESCRIPTION("AB5500 core driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ab5500-core.h b/drivers/mfd/ab5500-core.h
new file mode 100644 (file)
index 0000000..63b30b1
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2011 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Shared definitions and data structures for the AB5500 MFD driver
+ */
+
+/* Read/write operation values. */
+#define AB5500_PERM_RD (0x01)
+#define AB5500_PERM_WR (0x02)
+
+/* Read/write permissions. */
+#define AB5500_PERM_RO (AB5500_PERM_RD)
+#define AB5500_PERM_RW (AB5500_PERM_RD | AB5500_PERM_WR)
+
+#define AB5500_MASK_BASE (0x60)
+#define AB5500_MASK_END (0x79)
+#define AB5500_CHIP_ID (0x20)
+
+/**
+ * struct ab5500_reg_range
+ * @first: the first address of the range
+ * @last: the last address of the range
+ * @perm: access permissions for the range
+ */
+struct ab5500_reg_range {
+       u8 first;
+       u8 last;
+       u8 perm;
+};
+
+/**
+ * struct ab5500_i2c_ranges
+ * @count: the number of ranges in the list
+ * @range: the list of register ranges
+ */
+struct ab5500_i2c_ranges {
+       u8 nranges;
+       u8 bankid;
+       const struct ab5500_reg_range *range;
+};
+
+/**
+ * struct ab5500_i2c_banks
+ * @count: the number of ranges in the list
+ * @range: the list of register ranges
+ */
+struct ab5500_i2c_banks {
+       u8 nbanks;
+       const struct ab5500_i2c_ranges *bank;
+};
+
+/**
+ * struct ab5500_bank
+ * @slave_addr: I2C slave_addr found in AB5500 specification
+ * @name: Documentation name of the bank. For reference
+ */
+struct ab5500_bank {
+       u8 slave_addr;
+       const char *name;
+};
+
+static const struct ab5500_bank bankinfo[AB5500_NUM_BANKS] = {
+       [AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP] = {
+               AB5500_ADDR_VIT_IO_I2C_CLK_TST_OTP, "VIT_IO_I2C_CLK_TST_OTP"},
+       [AB5500_BANK_VDDDIG_IO_I2C_CLK_TST] = {
+               AB5500_ADDR_VDDDIG_IO_I2C_CLK_TST, "VDDDIG_IO_I2C_CLK_TST"},
+       [AB5500_BANK_VDENC] = {AB5500_ADDR_VDENC, "VDENC"},
+       [AB5500_BANK_SIM_USBSIM] = {AB5500_ADDR_SIM_USBSIM, "SIM_USBSIM"},
+       [AB5500_BANK_LED] = {AB5500_ADDR_LED, "LED"},
+       [AB5500_BANK_ADC] = {AB5500_ADDR_ADC, "ADC"},
+       [AB5500_BANK_RTC] = {AB5500_ADDR_RTC, "RTC"},
+       [AB5500_BANK_STARTUP] = {AB5500_ADDR_STARTUP, "STARTUP"},
+       [AB5500_BANK_DBI_ECI] = {AB5500_ADDR_DBI_ECI, "DBI-ECI"},
+       [AB5500_BANK_CHG] = {AB5500_ADDR_CHG, "CHG"},
+       [AB5500_BANK_FG_BATTCOM_ACC] = {
+               AB5500_ADDR_FG_BATTCOM_ACC, "FG_BATCOM_ACC"},
+       [AB5500_BANK_USB] = {AB5500_ADDR_USB, "USB"},
+       [AB5500_BANK_IT] = {AB5500_ADDR_IT, "IT"},
+       [AB5500_BANK_VIBRA] = {AB5500_ADDR_VIBRA, "VIBRA"},
+       [AB5500_BANK_AUDIO_HEADSETUSB] = {
+               AB5500_ADDR_AUDIO_HEADSETUSB, "AUDIO_HEADSETUSB"},
+};
+
+int ab5500_get_register_interruptible_raw(struct ab5500 *ab, u8 bank, u8 reg,
+       u8 *value);
+int ab5500_mask_and_set_register_interruptible_raw(struct ab5500 *ab, u8 bank,
+       u8 reg, u8 bitmask, u8 bitvalues);
diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c
new file mode 100644 (file)
index 0000000..6be1fe6
--- /dev/null
@@ -0,0 +1,806 @@
+/*
+ * Copyright (C) 2011 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Debugfs support for the AB5500 MFD driver
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/mfd/ab5500/ab5500.h>
+#include <linux/mfd/abx500.h>
+#include <linux/uaccess.h>
+
+#include "ab5500-core.h"
+#include "ab5500-debugfs.h"
+
+static struct ab5500_i2c_ranges ab5500_reg_ranges[AB5500_NUM_BANKS] = {
+       [AB5500_BANK_LED] = {
+               .bankid = AB5500_BANK_LED,
+               .nranges = 1,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x0C,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_ADC] = {
+               .bankid = AB5500_BANK_ADC,
+               .nranges = 6,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x1F,
+                               .last = 0x22,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x23,
+                               .last = 0x24,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x26,
+                               .last = 0x2D,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x2F,
+                               .last = 0x34,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x37,
+                               .last = 0x57,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x58,
+                               .last = 0x58,
+                               .perm = AB5500_PERM_RO,
+                       },
+               },
+       },
+       [AB5500_BANK_RTC] = {
+               .bankid = AB5500_BANK_RTC,
+               .nranges = 2,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x04,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x06,
+                               .last = 0x0C,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_STARTUP] = {
+               .bankid = AB5500_BANK_STARTUP,
+               .nranges = 12,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x01,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x1F,
+                               .last = 0x1F,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x2E,
+                               .last = 0x2E,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x2F,
+                               .last = 0x30,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x50,
+                               .last = 0x51,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x60,
+                               .last = 0x61,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x66,
+                               .last = 0x8A,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x8C,
+                               .last = 0x96,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0xAA,
+                               .last = 0xB4,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0xB7,
+                               .last = 0xBF,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0xC1,
+                               .last = 0xCA,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0xD3,
+                               .last = 0xE0,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_DBI_ECI] = {
+               .bankid = AB5500_BANK_DBI_ECI,
+               .nranges = 3,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x07,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x10,
+                               .last = 0x10,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x13,
+                               .last = 0x13,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_CHG] = {
+               .bankid = AB5500_BANK_CHG,
+               .nranges = 2,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x11,
+                               .last = 0x11,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x12,
+                               .last = 0x1B,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_FG_BATTCOM_ACC] = {
+               .bankid = AB5500_BANK_FG_BATTCOM_ACC,
+               .nranges = 2,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x0B,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x0C,
+                               .last = 0x10,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_USB] = {
+               .bankid = AB5500_BANK_USB,
+               .nranges = 12,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x01,
+                               .last = 0x01,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x80,
+                               .last = 0x83,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x87,
+                               .last = 0x8A,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x8B,
+                               .last = 0x8B,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x91,
+                               .last = 0x92,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x93,
+                               .last = 0x93,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x94,
+                               .last = 0x94,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0xA8,
+                               .last = 0xB0,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0xB2,
+                               .last = 0xB2,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0xB4,
+                               .last = 0xBC,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0xBF,
+                               .last = 0xBF,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0xC1,
+                               .last = 0xC5,
+                               .perm = AB5500_PERM_RO,
+                       },
+               },
+       },
+       [AB5500_BANK_IT] = {
+               .bankid = AB5500_BANK_IT,
+               .nranges = 4,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x02,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x20,
+                               .last = 0x36,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x40,
+                               .last = 0x56,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x60,
+                               .last = 0x76,
+                               .perm = AB5500_PERM_RO,
+                       },
+               },
+       },
+       [AB5500_BANK_VDDDIG_IO_I2C_CLK_TST] = {
+               .bankid = AB5500_BANK_VDDDIG_IO_I2C_CLK_TST,
+               .nranges = 7,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x02,
+                               .last = 0x02,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x12,
+                               .last = 0x12,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x30,
+                               .last = 0x34,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x40,
+                               .last = 0x44,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x50,
+                               .last = 0x54,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x60,
+                               .last = 0x64,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x70,
+                               .last = 0x74,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP] = {
+               .bankid = AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP,
+               .nranges = 13,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x01,
+                               .last = 0x01,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x02,
+                               .last = 0x02,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x0D,
+                               .last = 0x0F,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x1C,
+                               .last = 0x1C,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x1E,
+                               .last = 0x1E,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x20,
+                               .last = 0x21,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x25,
+                               .last = 0x25,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x28,
+                               .last = 0x2A,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x30,
+                               .last = 0x33,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x40,
+                               .last = 0x43,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x50,
+                               .last = 0x53,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x60,
+                               .last = 0x63,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x70,
+                               .last = 0x73,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_VIBRA] = {
+               .bankid = AB5500_BANK_VIBRA,
+               .nranges = 2,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x10,
+                               .last = 0x13,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0xFE,
+                               .last = 0xFE,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_AUDIO_HEADSETUSB] = {
+               .bankid = AB5500_BANK_AUDIO_HEADSETUSB,
+               .nranges = 2,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x48,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0xEB,
+                               .last = 0xFB,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_SIM_USBSIM] = {
+               .bankid = AB5500_BANK_SIM_USBSIM,
+               .nranges = 1,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x13,
+                               .last = 0x19,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+       [AB5500_BANK_VDENC] = {
+               .bankid = AB5500_BANK_VDENC,
+               .nranges = 12,
+               .range = (struct ab5500_reg_range[]) {
+                       {
+                               .first = 0x00,
+                               .last = 0x08,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x09,
+                               .last = 0x09,
+                               .perm = AB5500_PERM_RO,
+                       },
+                       {
+                               .first = 0x0A,
+                               .last = 0x12,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x15,
+                               .last = 0x19,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x1B,
+                               .last = 0x21,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x27,
+                               .last = 0x2C,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x41,
+                               .last = 0x41,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x45,
+                               .last = 0x5B,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x5D,
+                               .last = 0x5D,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x69,
+                               .last = 0x69,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x6C,
+                               .last = 0x6D,
+                               .perm = AB5500_PERM_RW,
+                       },
+                       {
+                               .first = 0x80,
+                               .last = 0x81,
+                               .perm = AB5500_PERM_RW,
+                       },
+               },
+       },
+};
+
+static int ab5500_registers_print(struct seq_file *s, void *p)
+{
+       struct ab5500 *ab = s->private;
+       unsigned int i;
+       u8 bank = (u8)ab->debug_bank;
+
+       seq_printf(s, "ab5500 register values:\n");
+       for (bank = 0; bank < AB5500_NUM_BANKS; bank++) {
+               seq_printf(s, " bank %u, %s (0x%x):\n", bank,
+                               bankinfo[bank].name,
+                               bankinfo[bank].slave_addr);
+               for (i = 0; i < ab5500_reg_ranges[bank].nranges; i++) {
+                       u8 reg;
+                       int err;
+
+                       for (reg = ab5500_reg_ranges[bank].range[i].first;
+                               reg <= ab5500_reg_ranges[bank].range[i].last;
+                               reg++) {
+                               u8 value;
+
+                               err = ab5500_get_register_interruptible_raw(ab,
+                                                               bank, reg,
+                                                               &value);
+                               if (err < 0) {
+                                       dev_err(ab->dev, "get_reg failed %d"
+                                               "bank 0x%x reg 0x%x\n",
+                                               err, bank, reg);
+                                       return err;
+                               }
+
+                               err = seq_printf(s, "[%d/0x%02X]: 0x%02X\n",
+                                               bank, reg, value);
+                               if (err < 0) {
+                                       dev_err(ab->dev,
+                                               "seq_printf overflow\n");
+                                       /*
+                                        * Error is not returned here since
+                                        * the output is wanted in any case
+                                        */
+                                       return 0;
+                               }
+                       }
+               }
+       }
+       return 0;
+}
+
+static int ab5500_registers_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ab5500_registers_print, inode->i_private);
+}
+
+static const struct file_operations ab5500_registers_fops = {
+       .open = ab5500_registers_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static int ab5500_bank_print(struct seq_file *s, void *p)
+{
+       struct ab5500 *ab = s->private;
+
+       seq_printf(s, "%d\n", ab->debug_bank);
+       return 0;
+}
+
+static int ab5500_bank_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ab5500_bank_print, inode->i_private);
+}
+
+static ssize_t ab5500_bank_write(struct file *file,
+       const char __user *user_buf,
+       size_t count, loff_t *ppos)
+{
+       struct ab5500 *ab = ((struct seq_file *)(file->private_data))->private;
+       char buf[32];
+       int buf_size;
+       unsigned long user_bank;
+       int err;
+
+       /* Get userspace string and assure termination */
+       buf_size = min(count, (sizeof(buf) - 1));
+       if (copy_from_user(buf, user_buf, buf_size))
+               return -EFAULT;
+       buf[buf_size] = 0;
+
+       err = strict_strtoul(buf, 0, &user_bank);
+       if (err)
+               return -EINVAL;
+
+       if (user_bank >= AB5500_NUM_BANKS) {
+               dev_err(ab->dev,
+                       "debugfs error input > number of banks\n");
+               return -EINVAL;
+       }
+
+       ab->debug_bank = user_bank;
+
+       return buf_size;
+}
+
+static int ab5500_address_print(struct seq_file *s, void *p)
+{
+       struct ab5500 *ab = s->private;
+
+       seq_printf(s, "0x%02X\n", ab->debug_address);
+       return 0;
+}
+
+static int ab5500_address_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ab5500_address_print, inode->i_private);
+}
+
+static ssize_t ab5500_address_write(struct file *file,
+       const char __user *user_buf,
+       size_t count, loff_t *ppos)
+{
+       struct ab5500 *ab = ((struct seq_file *)(file->private_data))->private;
+       char buf[32];
+       int buf_size;
+       unsigned long user_address;
+       int err;
+
+       /* Get userspace string and assure termination */
+       buf_size = min(count, (sizeof(buf) - 1));
+       if (copy_from_user(buf, user_buf, buf_size))
+               return -EFAULT;
+       buf[buf_size] = 0;
+
+       err = strict_strtoul(buf, 0, &user_address);
+       if (err)
+               return -EINVAL;
+       if (user_address > 0xff) {
+               dev_err(ab->dev,
+                       "debugfs error input > 0xff\n");
+               return -EINVAL;
+       }
+       ab->debug_address = user_address;
+       return buf_size;
+}
+
+static int ab5500_val_print(struct seq_file *s, void *p)
+{
+       struct ab5500 *ab = s->private;
+       int err;
+       u8 regvalue;
+
+       err = ab5500_get_register_interruptible_raw(ab, (u8)ab->debug_bank,
+               (u8)ab->debug_address, &regvalue);
+       if (err) {
+               dev_err(ab->dev, "get_reg failed %d, bank 0x%x"
+                       ", reg 0x%x\n", err, ab->debug_bank,
+                       ab->debug_address);
+               return -EINVAL;
+       }
+       seq_printf(s, "0x%02X\n", regvalue);
+
+       return 0;
+}
+
+static int ab5500_val_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ab5500_val_print, inode->i_private);
+}
+
+static ssize_t ab5500_val_write(struct file *file,
+       const char __user *user_buf,
+       size_t count, loff_t *ppos)
+{
+       struct ab5500 *ab = ((struct seq_file *)(file->private_data))->private;
+       char buf[32];
+       int buf_size;
+       unsigned long user_val;
+       int err;
+       u8 regvalue;
+
+       /* Get userspace string and assure termination */
+       buf_size = min(count, (sizeof(buf)-1));
+       if (copy_from_user(buf, user_buf, buf_size))
+               return -EFAULT;
+       buf[buf_size] = 0;
+
+       err = strict_strtoul(buf, 0, &user_val);
+       if (err)
+               return -EINVAL;
+       if (user_val > 0xff) {
+               dev_err(ab->dev,
+                       "debugfs error input > 0xff\n");
+               return -EINVAL;
+       }
+       err = ab5500_mask_and_set_register_interruptible_raw(
+               ab, (u8)ab->debug_bank,
+               (u8)ab->debug_address, 0xFF, (u8)user_val);
+       if (err)
+               return -EINVAL;
+
+       ab5500_get_register_interruptible_raw(ab, (u8)ab->debug_bank,
+               (u8)ab->debug_address, &regvalue);
+       if (err)
+               return -EINVAL;
+
+       return buf_size;
+}
+
+static const struct file_operations ab5500_bank_fops = {
+       .open = ab5500_bank_open,
+       .write = ab5500_bank_write,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static const struct file_operations ab5500_address_fops = {
+       .open = ab5500_address_open,
+       .write = ab5500_address_write,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static const struct file_operations ab5500_val_fops = {
+       .open = ab5500_val_open,
+       .write = ab5500_val_write,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+       .owner = THIS_MODULE,
+};
+
+static struct dentry *ab5500_dir;
+static struct dentry *ab5500_reg_file;
+static struct dentry *ab5500_bank_file;
+static struct dentry *ab5500_address_file;
+static struct dentry *ab5500_val_file;
+
+void __init ab5500_setup_debugfs(struct ab5500 *ab)
+{
+       ab->debug_bank = AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP;
+       ab->debug_address = AB5500_CHIP_ID;
+
+       ab5500_dir = debugfs_create_dir("ab5500", NULL);
+       if (!ab5500_dir)
+               goto exit_no_debugfs;
+
+       ab5500_reg_file = debugfs_create_file("all-bank-registers",
+               S_IRUGO, ab5500_dir, ab, &ab5500_registers_fops);
+       if (!ab5500_reg_file)
+               goto exit_destroy_dir;
+
+       ab5500_bank_file = debugfs_create_file("register-bank",
+               (S_IRUGO | S_IWUGO), ab5500_dir, ab, &ab5500_bank_fops);
+       if (!ab5500_bank_file)
+               goto exit_destroy_reg;
+
+       ab5500_address_file = debugfs_create_file("register-address",
+               (S_IRUGO | S_IWUGO), ab5500_dir, ab, &ab5500_address_fops);
+       if (!ab5500_address_file)
+               goto exit_destroy_bank;
+
+       ab5500_val_file = debugfs_create_file("register-value",
+               (S_IRUGO | S_IWUGO), ab5500_dir, ab, &ab5500_val_fops);
+       if (!ab5500_val_file)
+               goto exit_destroy_address;
+
+       return;
+
+exit_destroy_address:
+       debugfs_remove(ab5500_address_file);
+exit_destroy_bank:
+       debugfs_remove(ab5500_bank_file);
+exit_destroy_reg:
+       debugfs_remove(ab5500_reg_file);
+exit_destroy_dir:
+       debugfs_remove(ab5500_dir);
+exit_no_debugfs:
+       dev_err(ab->dev, "failed to create debugfs entries.\n");
+       return;
+}
+
+void __exit ab5500_remove_debugfs(void)
+{
+       debugfs_remove(ab5500_val_file);
+       debugfs_remove(ab5500_address_file);
+       debugfs_remove(ab5500_bank_file);
+       debugfs_remove(ab5500_reg_file);
+       debugfs_remove(ab5500_dir);
+}
diff --git a/drivers/mfd/ab5500-debugfs.h b/drivers/mfd/ab5500-debugfs.h
new file mode 100644 (file)
index 0000000..7330a9b
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2011 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Debugfs interface to the AB5500 core driver
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+void ab5500_setup_debugfs(struct ab5500 *ab);
+void ab5500_remove_debugfs(void);
+
+#else /* !CONFIG_DEBUG_FS */
+
+static inline void ab5500_setup_debugfs(struct ab5500 *ab)
+{
+}
+
+static inline void ab5500_remove_debugfs(void)
+{
+}
+
+#endif
index 387705e494b963a3b6fa9ded763cf2cc6d8f90a7..1e9173804ede2bacb93a7179d5d7c817c79dfcf5 100644 (file)
@@ -92,6 +92,8 @@
 #define AB8500_REV_REG                 0x80
 #define AB8500_SWITCH_OFF_STATUS       0x00
 
+#define AB8500_TURN_ON_STATUS          0x00
+
 /*
  * Map interrupt numbers to the LATCH and MASK register offsets, Interrupt
  * numbers are indexed into this array with (num / 8).
@@ -293,6 +295,7 @@ static struct irq_chip ab8500_irq_chip = {
        .irq_bus_lock           = ab8500_irq_lock,
        .irq_bus_sync_unlock    = ab8500_irq_sync_unlock,
        .irq_mask               = ab8500_irq_mask,
+       .irq_disable            = ab8500_irq_mask,
        .irq_unmask             = ab8500_irq_unmask,
 };
 
@@ -811,12 +814,40 @@ static ssize_t show_switch_off_status(struct device *dev,
        return sprintf(buf, "%#x\n", value);
 }
 
+/*
+ * ab8500 has turned on due to (TURN_ON_STATUS):
+ * 0x01 PORnVbat
+ * 0x02 PonKey1dbF
+ * 0x04 PonKey2dbF
+ * 0x08 RTCAlarm
+ * 0x10 MainChDet
+ * 0x20 VbusDet
+ * 0x40 UsbIDDetect
+ * 0x80 Reserved
+ */
+static ssize_t show_turn_on_status(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       int ret;
+       u8 value;
+       struct ab8500 *ab8500;
+
+       ab8500 = dev_get_drvdata(dev);
+       ret = get_register_interruptible(ab8500, AB8500_SYS_CTRL1_BLOCK,
+               AB8500_TURN_ON_STATUS, &value);
+       if (ret < 0)
+               return ret;
+       return sprintf(buf, "%#x\n", value);
+}
+
 static DEVICE_ATTR(chip_id, S_IRUGO, show_chip_id, NULL);
 static DEVICE_ATTR(switch_off_status, S_IRUGO, show_switch_off_status, NULL);
+static DEVICE_ATTR(turn_on_status, S_IRUGO, show_turn_on_status, NULL);
 
 static struct attribute *ab8500_sysfs_entries[] = {
        &dev_attr_chip_id.attr,
        &dev_attr_switch_off_status.attr,
+       &dev_attr_turn_on_status.attr,
        NULL,
 };
 
@@ -843,11 +874,11 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
                return ret;
 
        switch (value) {
-       case AB8500_CUTEARLY:
        case AB8500_CUT1P0:
        case AB8500_CUT1P1:
        case AB8500_CUT2P0:
        case AB8500_CUT3P0:
+       case AB8500_CUT3P3:
                dev_info(ab8500->dev, "detected chip, revision: %#x\n", value);
                break;
        default:
index f16afb234ff98a250d0ded3a9aac0d1768ab679b..e985d1701a83df56a463cb47ff2a1169bc894ba6 100644 (file)
@@ -143,12 +143,15 @@ struct ab8500_gpadc *ab8500_gpadc_get(char *name)
 }
 EXPORT_SYMBOL(ab8500_gpadc_get);
 
-static int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 input,
+/**
+ * ab8500_gpadc_ad_to_voltage() - Convert a raw ADC value to a voltage
+ */
+int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 channel,
        int ad_value)
 {
        int res;
 
-       switch (input) {
+       switch (channel) {
        case MAIN_CHARGER_V:
                /* For some reason we don't have calibrated data */
                if (!gpadc->cal_data[ADC_INPUT_VMAIN].gain) {
@@ -232,18 +235,46 @@ static int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, u8 input,
        }
        return res;
 }
+EXPORT_SYMBOL(ab8500_gpadc_ad_to_voltage);
 
 /**
  * ab8500_gpadc_convert() - gpadc conversion
- * @input:     analog input to be converted to digital data
+ * @channel:   analog channel to be converted to digital data
  *
  * This function converts the selected analog i/p to digital
  * data.
  */
-int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 input)
+int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel)
+{
+       int ad_value;
+       int voltage;
+
+       ad_value = ab8500_gpadc_read_raw(gpadc, channel);
+       if (ad_value < 0) {
+               dev_err(gpadc->dev, "GPADC raw value failed ch: %d\n", channel);
+               return ad_value;
+       }
+
+       voltage = ab8500_gpadc_ad_to_voltage(gpadc, channel, ad_value);
+
+       if (voltage < 0)
+               dev_err(gpadc->dev, "GPADC to voltage conversion failed ch:"
+                       " %d AD: 0x%x\n", channel, ad_value);
+
+       return voltage;
+}
+EXPORT_SYMBOL(ab8500_gpadc_convert);
+
+/**
+ * ab8500_gpadc_read_raw() - gpadc read
+ * @channel:   analog channel to be read
+ *
+ * This function obtains the raw ADC value, this then needs
+ * to be converted by calling ab8500_gpadc_ad_to_voltage()
+ */
+int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel)
 {
        int ret;
-       u16 data = 0;
        int looplimit = 0;
        u8 val, low_data, high_data;
 
@@ -278,9 +309,9 @@ int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 input)
                goto out;
        }
 
-       /* Select the input source and set average samples to 16 */
+       /* Select the channel source and set average samples to 16 */
        ret = abx500_set_register_interruptible(gpadc->dev, AB8500_GPADC,
-               AB8500_GPADC_CTRL2_REG, (input | SW_AVG_16));
+               AB8500_GPADC_CTRL2_REG, (channel | SW_AVG_16));
        if (ret < 0) {
                dev_err(gpadc->dev,
                        "gpadc_conversion: set avg samples failed\n");
@@ -292,7 +323,7 @@ int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 input)
         * charging current sense if it needed, ABB 3.0 needs some special
         * treatment too.
         */
-       switch (input) {
+       switch (channel) {
        case MAIN_CHARGER_C:
        case USB_CHARGER_C:
                ret = abx500_mask_and_set_register_interruptible(gpadc->dev,
@@ -359,7 +390,6 @@ int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 input)
                goto out;
        }
 
-       data = (high_data << 8) | low_data;
        /* Disable GPADC */
        ret = abx500_set_register_interruptible(gpadc->dev, AB8500_GPADC,
                AB8500_GPADC_CTRL1_REG, DIS_GPADC);
@@ -370,8 +400,8 @@ int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 input)
        /* Disable VTVout LDO this is required for GPADC */
        regulator_disable(gpadc->regu);
        mutex_unlock(&gpadc->ab8500_gpadc_lock);
-       ret = ab8500_gpadc_ad_to_voltage(gpadc, input, data);
-       return ret;
+
+       return (high_data << 8) | low_data;
 
 out:
        /*
@@ -385,10 +415,10 @@ out:
        regulator_disable(gpadc->regu);
        mutex_unlock(&gpadc->ab8500_gpadc_lock);
        dev_err(gpadc->dev,
-               "gpadc_conversion: Failed to AD convert channel %d\n", input);
+               "gpadc_conversion: Failed to AD convert channel %d\n", channel);
        return ret;
 }
-EXPORT_SYMBOL(ab8500_gpadc_convert);
+EXPORT_SYMBOL(ab8500_gpadc_read_raw);
 
 /**
  * ab8500_bm_gpswadcconvend_handler() - isr for s/w gpadc conversion completion
index c71ae09430c5c580e7f0cc5bbd0d3610fd8b5048..3bd85bddf6e31e51096937abdb60ff170779e1c3 100644 (file)
@@ -584,7 +584,7 @@ static int asic3_gpio_remove(struct platform_device *pdev)
        return gpiochip_remove(&asic->gpio);
 }
 
-static int asic3_clk_enable(struct asic3 *asic, struct asic3_clk *clk)
+static void asic3_clk_enable(struct asic3 *asic, struct asic3_clk *clk)
 {
        unsigned long flags;
        u32 cdex;
@@ -596,8 +596,6 @@ static int asic3_clk_enable(struct asic3 *asic, struct asic3_clk *clk)
                asic3_write_register(asic, ASIC3_OFFSET(CLOCK, CDEX), cdex);
        }
        spin_unlock_irqrestore(&asic->lock, flags);
-
-       return 0;
 }
 
 static void asic3_clk_disable(struct asic3 *asic, struct asic3_clk *clk)
@@ -779,6 +777,8 @@ static struct mfd_cell asic3_cell_mmc = {
        .name          = "tmio-mmc",
        .enable        = asic3_mmc_enable,
        .disable       = asic3_mmc_disable,
+       .suspend       = asic3_mmc_disable,
+       .resume        = asic3_mmc_enable,
        .platform_data = &asic3_mmc_data,
        .pdata_size    = sizeof(asic3_mmc_data),
        .num_resources = ARRAY_SIZE(asic3_mmc_resources),
@@ -811,24 +811,43 @@ static int asic3_leds_disable(struct platform_device *pdev)
        return 0;
 }
 
+static int asic3_leds_suspend(struct platform_device *pdev)
+{
+       const struct mfd_cell *cell = mfd_get_cell(pdev);
+       struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+       while (asic3_gpio_get(&asic->gpio, ASIC3_GPIO(C, cell->id)) != 0)
+               msleep(1);
+
+       asic3_clk_disable(asic, &asic->clocks[clock_ledn[cell->id]]);
+
+       return 0;
+}
+
 static struct mfd_cell asic3_cell_leds[ASIC3_NUM_LEDS] = {
        [0] = {
                .name          = "leds-asic3",
                .id            = 0,
                .enable        = asic3_leds_enable,
                .disable       = asic3_leds_disable,
+               .suspend       = asic3_leds_suspend,
+               .resume        = asic3_leds_enable,
        },
        [1] = {
                .name          = "leds-asic3",
                .id            = 1,
                .enable        = asic3_leds_enable,
                .disable       = asic3_leds_disable,
+               .suspend       = asic3_leds_suspend,
+               .resume        = asic3_leds_enable,
        },
        [2] = {
                .name          = "leds-asic3",
                .id            = 2,
                .enable        = asic3_leds_enable,
                .disable       = asic3_leds_disable,
+               .suspend       = asic3_leds_suspend,
+               .resume        = asic3_leds_enable,
        },
 };
 
@@ -949,6 +968,7 @@ static int __init asic3_probe(struct platform_device *pdev)
                goto out_unmap;
        }
 
+       asic->gpio.label = "asic3";
        asic->gpio.base = pdata->gpio_base;
        asic->gpio.ngpio = ASIC3_NUM_GPIOS;
        asic->gpio.get = asic3_gpio_get;
index 2fadbaeb1cb138da62a91f57f394599058c70624..1b79c37fd59901b882fc0b995e6182fe43a4eedb 100644 (file)
@@ -523,7 +523,7 @@ static int __devinit da903x_probe(struct i2c_client *client,
        chip->ops->read_events(chip, &tmp);
 
        ret = request_irq(client->irq, da903x_irq_handler,
-                       IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+                       IRQF_TRIGGER_FALLING,
                        "da903x", chip);
        if (ret) {
                dev_err(&client->dev, "failed to request irq %d\n",
diff --git a/drivers/mfd/db5500-prcmu-regs.h b/drivers/mfd/db5500-prcmu-regs.h
deleted file mode 100644 (file)
index 9a8e9e4..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit registers
- */
-
-#ifndef __MACH_PRCMU_REGS_H
-#define __MACH_PRCMU_REGS_H
-
-#include <mach/hardware.h>
-
-#define PRCM_ARM_PLLDIVPS      (_PRCMU_BASE + 0x118)
-#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE         0x3f
-#define PRCM_ARM_PLLDIVPS_MAX_MASK             0xf
-
-#define PRCM_PLLARM_LOCKP       (_PRCMU_BASE + 0x0a8)
-#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3   0x2
-
-#define PRCM_ARM_CHGCLKREQ     (_PRCMU_BASE + 0x114)
-#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ  0x1
-
-#define PRCM_PLLARM_ENABLE     (_PRCMU_BASE + 0x98)
-#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE  0x1
-#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON 0x100
-
-#define PRCM_ARMCLKFIX_MGT     (_PRCMU_BASE + 0x0)
-#define PRCM_A9_RESETN_CLR     (_PRCMU_BASE + 0x1f4)
-#define PRCM_A9_RESETN_SET     (_PRCMU_BASE + 0x1f0)
-#define PRCM_ARM_LS_CLAMP      (_PRCMU_BASE + 0x30c)
-#define PRCM_SRAM_A9           (_PRCMU_BASE + 0x308)
-
-/* ARM WFI Standby signal register */
-#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
-#define PRCM_IOCR              (_PRCMU_BASE + 0x310)
-#define PRCM_IOCR_IOFORCE                      0x1
-
-/* CPU mailbox registers */
-#define PRCM_MBOX_CPU_VAL      (_PRCMU_BASE + 0x0fc)
-#define PRCM_MBOX_CPU_SET      (_PRCMU_BASE + 0x100)
-#define PRCM_MBOX_CPU_CLR      (_PRCMU_BASE + 0x104)
-
-/* Dual A9 core interrupt management unit registers */
-#define PRCM_A9_MASK_REQ       (_PRCMU_BASE + 0x328)
-#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ      0x1
-
-#define PRCM_A9_MASK_ACK       (_PRCMU_BASE + 0x32c)
-#define PRCM_ARMITMSK31TO0     (_PRCMU_BASE + 0x11c)
-#define PRCM_ARMITMSK63TO32    (_PRCMU_BASE + 0x120)
-#define PRCM_ARMITMSK95TO64    (_PRCMU_BASE + 0x124)
-#define PRCM_ARMITMSK127TO96   (_PRCMU_BASE + 0x128)
-#define PRCM_POWER_STATE_VAL   (_PRCMU_BASE + 0x25C)
-#define PRCM_ARMITVAL31TO0     (_PRCMU_BASE + 0x260)
-#define PRCM_ARMITVAL63TO32    (_PRCMU_BASE + 0x264)
-#define PRCM_ARMITVAL95TO64    (_PRCMU_BASE + 0x268)
-#define PRCM_ARMITVAL127TO96   (_PRCMU_BASE + 0x26C)
-
-#define PRCM_HOSTACCESS_REQ    (_PRCMU_BASE + 0x334)
-#define ARM_WAKEUP_MODEM       0x1
-
-#define PRCM_ARM_IT1_CLEAR     (_PRCMU_BASE + 0x48C)
-#define PRCM_ARM_IT1_VAL       (_PRCMU_BASE + 0x494)
-#define PRCM_HOLD_EVT          (_PRCMU_BASE + 0x174)
-
-#define PRCM_ITSTATUS0         (_PRCMU_BASE + 0x148)
-#define PRCM_ITSTATUS1         (_PRCMU_BASE + 0x150)
-#define PRCM_ITSTATUS2         (_PRCMU_BASE + 0x158)
-#define PRCM_ITSTATUS3         (_PRCMU_BASE + 0x160)
-#define PRCM_ITSTATUS4         (_PRCMU_BASE + 0x168)
-#define PRCM_ITSTATUS5         (_PRCMU_BASE + 0x484)
-#define PRCM_ITCLEAR5          (_PRCMU_BASE + 0x488)
-#define PRCM_ARMIT_MASKXP70_IT (_PRCMU_BASE + 0x1018)
-
-/* System reset register */
-#define PRCM_APE_SOFTRST       (_PRCMU_BASE + 0x228)
-
-/* Level shifter and clamp control registers */
-#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
-#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
-
-/* PRCMU clock/PLL/reset registers */
-#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
-#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
-#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
-#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
-#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
-#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
-#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
-#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
-#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
-#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
-#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
-#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
-#define PRCM_CLKOCR               (_PRCMU_BASE + 0x1CC)
-
-/* ePOD and memory power signal control registers */
-#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
-#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
-
-/* Debug power control unit registers */
-#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
-
-/* Miscellaneous unit registers */
-#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
-#define PRCM_GPIOCR                (_PRCMU_BASE + 0x138)
-#define PRCM_GPIOCR_DBG_STM_MOD_CMD1            0x800
-#define PRCM_GPIOCR_DBG_UARTMOD_CMD0            0x1
-
-
-#endif /* __MACH_PRCMU__REGS_H */
index 9dbb3cab4a6f53018f5333eda9c363edfb6b63f1..bb115b2f04e9b36949f35e53fb288899c5153a8b 100644 (file)
 #include <linux/jiffies.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/db5500-prcmu.h>
+#include <linux/mfd/dbx500-prcmu.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/db5500-regs.h>
-#include "db5500-prcmu-regs.h"
+#include "dbx500-prcmu-regs.h"
 
 #define _PRCM_MB_HEADER (tcdm_base + 0xFE8)
 #define PRCM_REQ_MB0_HEADER (_PRCM_MB_HEADER + 0x0)
@@ -109,15 +109,18 @@ enum mb5_header {
 #define PRCMU_DSI_CLOCK_SETTING                        0x00000128
 /* TVCLK_MGT PLLSW=001 (PLLSOC0) PLLDIV=0x13, = 19.05 MHZ */
 #define PRCMU_DSI_LP_CLOCK_SETTING             0x00000135
-#define PRCMU_PLLDSI_FREQ_SETTING              0x0004013C
+#define PRCMU_PLLDSI_FREQ_SETTING              0x00020121
 #define PRCMU_DSI_PLLOUT_SEL_SETTING           0x00000002
-#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV          0x03000101
+#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV          0x03000201
 #define PRCMU_DISABLE_ESCAPE_CLOCK_DIV         0x00000101
 
 #define PRCMU_ENABLE_PLLDSI                    0x00000001
 #define PRCMU_DISABLE_PLLDSI                   0x00000000
 
 #define PRCMU_DSI_RESET_SW                     0x00000003
+#define PRCMU_RESOUTN0_PIN                     0x00000001
+#define PRCMU_RESOUTN1_PIN                     0x00000002
+#define PRCMU_RESOUTN2_PIN                     0x00000004
 
 #define PRCMU_PLLDSI_LOCKP_LOCKED              0x3
 
@@ -315,31 +318,31 @@ static bool read_mailbox_0(void)
                r = false;
                break;
        }
-       writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(0), PRCM_ARM_IT1_CLR);
        return r;
 }
 
 static bool read_mailbox_1(void)
 {
-       writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(1), PRCM_ARM_IT1_CLR);
        return false;
 }
 
 static bool read_mailbox_2(void)
 {
-       writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(2), PRCM_ARM_IT1_CLR);
        return false;
 }
 
 static bool read_mailbox_3(void)
 {
-       writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(3), PRCM_ARM_IT1_CLR);
        return false;
 }
 
 static bool read_mailbox_4(void)
 {
-       writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(4), PRCM_ARM_IT1_CLR);
        return false;
 }
 
@@ -360,19 +363,19 @@ static bool read_mailbox_5(void)
                print_unknown_header_warning(5, header);
                break;
        }
-       writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(5), PRCM_ARM_IT1_CLR);
        return false;
 }
 
 static bool read_mailbox_6(void)
 {
-       writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(6), PRCM_ARM_IT1_CLR);
        return false;
 }
 
 static bool read_mailbox_7(void)
 {
-       writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+       writel(MBOX_BIT(7), PRCM_ARM_IT1_CLR);
        return false;
 }
 
@@ -434,7 +437,7 @@ int __init db5500_prcmu_init(void)
                return -ENODEV;
 
        /* Clean up the mailbox interrupts after pre-kernel code. */
-       writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLEAR);
+       writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR);
 
        r = request_threaded_irq(IRQ_DB5500_PRCMU1, prcmu_irq_handler,
                prcmu_irq_thread_fn, 0, "prcmu", NULL);
diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h
deleted file mode 100644 (file)
index 3bbf04d..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit registers
- */
-#ifndef __DB8500_PRCMU_REGS_H
-#define __DB8500_PRCMU_REGS_H
-
-#include <linux/bitops.h>
-#include <mach/hardware.h>
-
-#define BITS(_start, _end) ((BIT(_end) - BIT(_start)) + BIT(_end))
-
-#define PRCM_ARM_PLLDIVPS 0x118
-#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE BITS(0, 5)
-#define PRCM_ARM_PLLDIVPS_MAX_MASK     0xF
-
-#define PRCM_PLLARM_LOCKP 0x0A8
-#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3 BIT(1)
-
-#define PRCM_ARM_CHGCLKREQ 0x114
-#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ BIT(0)
-
-#define PRCM_PLLARM_ENABLE 0x98
-#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE  BIT(0)
-#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON BIT(8)
-
-#define PRCM_ARMCLKFIX_MGT     0x0
-#define PRCM_A9_RESETN_CLR     0x1f4
-#define PRCM_A9_RESETN_SET     0x1f0
-#define PRCM_ARM_LS_CLAMP      0x30C
-#define PRCM_SRAM_A9           0x308
-
-/* ARM WFI Standby signal register */
-#define PRCM_ARM_WFI_STANDBY   0x130
-#define PRCM_IOCR              0x310
-#define PRCM_IOCR_IOFORCE BIT(0)
-
-/* CPU mailbox registers */
-#define PRCM_MBOX_CPU_VAL 0x0FC
-#define PRCM_MBOX_CPU_SET 0x100
-
-/* Dual A9 core interrupt management unit registers */
-#define PRCM_A9_MASK_REQ 0x328
-#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ BIT(0)
-
-#define PRCM_A9_MASK_ACK       0x32C
-#define PRCM_ARMITMSK31TO0     0x11C
-#define PRCM_ARMITMSK63TO32    0x120
-#define PRCM_ARMITMSK95TO64    0x124
-#define PRCM_ARMITMSK127TO96   0x128
-#define PRCM_POWER_STATE_VAL   0x25C
-#define PRCM_ARMITVAL31TO0     0x260
-#define PRCM_ARMITVAL63TO32    0x264
-#define PRCM_ARMITVAL95TO64    0x268
-#define PRCM_ARMITVAL127TO96   0x26C
-
-#define PRCM_HOSTACCESS_REQ 0x334
-#define PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ BIT(0)
-
-#define PRCM_ARM_IT1_CLR 0x48C
-#define PRCM_ARM_IT1_VAL 0x494
-
-#define PRCM_ITSTATUS0         0x148
-#define PRCM_ITSTATUS1         0x150
-#define PRCM_ITSTATUS2         0x158
-#define PRCM_ITSTATUS3         0x160
-#define PRCM_ITSTATUS4         0x168
-#define PRCM_ITSTATUS5         0x484
-#define PRCM_ITCLEAR5          0x488
-#define PRCM_ARMIT_MASKXP70_IT 0x1018
-
-/* System reset register */
-#define PRCM_APE_SOFTRST 0x228
-
-/* Level shifter and clamp control registers */
-#define PRCM_MMIP_LS_CLAMP_SET 0x420
-#define PRCM_MMIP_LS_CLAMP_CLR 0x424
-
-/* PRCMU HW semaphore */
-#define PRCM_SEM 0x400
-#define PRCM_SEM_PRCM_SEM BIT(0)
-
-/* PRCMU clock/PLL/reset registers */
-#define PRCM_PLLDSI_FREQ       0x500
-#define PRCM_PLLDSI_ENABLE     0x504
-#define PRCM_PLLDSI_LOCKP      0x508
-#define PRCM_DSI_PLLOUT_SEL    0x530
-#define PRCM_DSITVCLK_DIV      0x52C
-#define PRCM_APE_RESETN_SET    0x1E4
-#define PRCM_APE_RESETN_CLR    0x1E8
-
-#define PRCM_TCR               0x1C8
-#define PRCM_TCR_TENSEL_MASK   BITS(0, 7)
-#define PRCM_TCR_STOP_TIMERS   BIT(16)
-#define PRCM_TCR_DOZE_MODE     BIT(17)
-
-#define PRCM_CLKOCR                    0x1CC
-#define PRCM_CLKOCR_CLKODIV0_SHIFT     0
-#define PRCM_CLKOCR_CLKODIV0_MASK      BITS(0, 5)
-#define PRCM_CLKOCR_CLKOSEL0_SHIFT     6
-#define PRCM_CLKOCR_CLKOSEL0_MASK      BITS(6, 8)
-#define PRCM_CLKOCR_CLKODIV1_SHIFT     16
-#define PRCM_CLKOCR_CLKODIV1_MASK      BITS(16, 21)
-#define PRCM_CLKOCR_CLKOSEL1_SHIFT     22
-#define PRCM_CLKOCR_CLKOSEL1_MASK      BITS(22, 24)
-#define PRCM_CLKOCR_CLK1TYPE           BIT(28)
-
-#define PRCM_SGACLK_MGT                0x014
-#define PRCM_UARTCLK_MGT       0x018
-#define PRCM_MSP02CLK_MGT      0x01C
-#define PRCM_MSP1CLK_MGT       0x288
-#define PRCM_I2CCLK_MGT                0x020
-#define PRCM_SDMMCCLK_MGT      0x024
-#define PRCM_SLIMCLK_MGT       0x028
-#define PRCM_PER1CLK_MGT       0x02C
-#define PRCM_PER2CLK_MGT       0x030
-#define PRCM_PER3CLK_MGT       0x034
-#define PRCM_PER5CLK_MGT       0x038
-#define PRCM_PER6CLK_MGT       0x03C
-#define PRCM_PER7CLK_MGT       0x040
-#define PRCM_LCDCLK_MGT                0x044
-#define PRCM_BMLCLK_MGT                0x04C
-#define PRCM_HSITXCLK_MGT      0x050
-#define PRCM_HSIRXCLK_MGT      0x054
-#define PRCM_HDMICLK_MGT       0x058
-#define PRCM_APEATCLK_MGT      0x05C
-#define PRCM_APETRACECLK_MGT   0x060
-#define PRCM_MCDECLK_MGT       0x064
-#define PRCM_IPI2CCLK_MGT      0x068
-#define PRCM_DSIALTCLK_MGT     0x06C
-#define PRCM_DMACLK_MGT                0x074
-#define PRCM_B2R2CLK_MGT       0x078
-#define PRCM_TVCLK_MGT         0x07C
-#define PRCM_UNIPROCLK_MGT     0x278
-#define PRCM_SSPCLK_MGT                0x280
-#define PRCM_RNGCLK_MGT                0x284
-#define PRCM_UICCCLK_MGT       0x27C
-
-#define PRCM_CLK_MGT_CLKPLLDIV_MASK    BITS(0, 4)
-#define PRCM_CLK_MGT_CLKPLLSW_MASK     BITS(5, 7)
-#define PRCM_CLK_MGT_CLKEN             BIT(8)
-
-/* ePOD and memory power signal control registers */
-#define PRCM_EPOD_C_SET                0x410
-#define PRCM_SRAM_LS_SLEEP     0x304
-
-/* Debug power control unit registers */
-#define PRCM_POWER_STATE_SET 0x254
-
-/* Miscellaneous unit registers */
-#define PRCM_DSI_SW_RESET 0x324
-#define PRCM_GPIOCR            0x138
-
-/* GPIOCR register */
-#define PRCM_GPIOCR_SPI2_SELECT BIT(23)
-
-#define PRCM_DDR_SUBSYS_APE_MINBW  0x438
-
-#endif /* __DB8500_PRCMU_REGS_H */
index 02a15d7cb3b08c4af6a1889a54b38259fb963a52..a25ab9c6b5afe81492c6a2d411e01acaf5aaa8d7 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/db8500-prcmu.h>
+#include <linux/mfd/dbx500-prcmu.h>
 #include <linux/regulator/db8500-prcmu.h>
 #include <linux/regulator/machine.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/db8500-regs.h>
 #include <mach/id.h>
-#include "db8500-prcmu-regs.h"
+#include "dbx500-prcmu-regs.h"
 
 /* Offset for the firmware version within the TCPM */
 #define PRCMU_FW_VERSION_OFFSET 0xA4
 #define MB1H_REQUEST_APE_OPP_100_VOLT 0x3
 #define MB1H_RELEASE_APE_OPP_100_VOLT 0x4
 #define MB1H_RELEASE_USB_WAKEUP 0x5
+#define MB1H_PLL_ON_OFF 0x6
 
 /* Mailbox 1 Requests */
 #define PRCM_REQ_MB1_ARM_OPP                   (PRCM_REQ_MB1 + 0x0)
 #define PRCM_REQ_MB1_APE_OPP                   (PRCM_REQ_MB1 + 0x1)
-#define PRCM_REQ_MB1_APE_OPP_100_RESTORE       (PRCM_REQ_MB1 + 0x4)
-#define PRCM_REQ_MB1_ARM_OPP_100_RESTORE       (PRCM_REQ_MB1 + 0x8)
+#define PRCM_REQ_MB1_PLL_ON_OFF                        (PRCM_REQ_MB1 + 0x4)
+#define PLL_SOC1_OFF   0x4
+#define PLL_SOC1_ON    0x8
 
 /* Mailbox 1 ACKs */
 #define PRCM_ACK_MB1_CURRENT_ARM_OPP   (PRCM_ACK_MB1 + 0x0)
 #define MB4H_HOTDOG    0x12
 #define MB4H_HOTMON    0x13
 #define MB4H_HOT_PERIOD        0x14
+#define MB4H_A9WDOG_CONF 0x16
+#define MB4H_A9WDOG_EN   0x17
+#define MB4H_A9WDOG_DIS  0x18
+#define MB4H_A9WDOG_LOAD 0x19
+#define MB4H_A9WDOG_KICK 0x20
 
 /* Mailbox 4 Requests */
 #define PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE      (PRCM_REQ_MB4 + 0x0)
 #define PRCM_REQ_MB4_HOT_PERIOD                        (PRCM_REQ_MB4 + 0x0)
 #define HOTMON_CONFIG_LOW                      BIT(0)
 #define HOTMON_CONFIG_HIGH                     BIT(1)
+#define PRCM_REQ_MB4_A9WDOG_0                  (PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_A9WDOG_1                  (PRCM_REQ_MB4 + 0x1)
+#define PRCM_REQ_MB4_A9WDOG_2                  (PRCM_REQ_MB4 + 0x2)
+#define PRCM_REQ_MB4_A9WDOG_3                  (PRCM_REQ_MB4 + 0x3)
+#define A9WDOG_AUTO_OFF_EN                     BIT(7)
+#define A9WDOG_AUTO_OFF_DIS                    0
+#define A9WDOG_ID_MASK                         0xf
 
 /* Mailbox 5 Requests */
 #define PRCM_REQ_MB5_I2C_SLAVE_OP      (PRCM_REQ_MB5 + 0x0)
@@ -412,7 +426,7 @@ struct clk_mgt {
 
 static DEFINE_SPINLOCK(clk_mgt_lock);
 
-#define CLK_MGT_ENTRY(_name)[PRCMU_##_name] = { (PRCM_##_name##_MGT), 0 }
+#define CLK_MGT_ENTRY(_name)[PRCMU_##_name] = { (PRCM_##_name##_MGT_OFF), 0 }
 struct clk_mgt clk_mgt[PRCMU_NUM_REG_CLOCKS] = {
        CLK_MGT_ENTRY(SGACLK),
        CLK_MGT_ENTRY(UARTCLK),
@@ -445,6 +459,35 @@ struct clk_mgt clk_mgt[PRCMU_NUM_REG_CLOCKS] = {
        CLK_MGT_ENTRY(UICCCLK),
 };
 
+static struct regulator *hwacc_regulator[NUM_HW_ACC];
+static struct regulator *hwacc_ret_regulator[NUM_HW_ACC];
+
+static bool hwacc_enabled[NUM_HW_ACC];
+static bool hwacc_ret_enabled[NUM_HW_ACC];
+
+static const char *hwacc_regulator_name[NUM_HW_ACC] = {
+       [HW_ACC_SVAMMDSP]       = "hwacc-sva-mmdsp",
+       [HW_ACC_SVAPIPE]        = "hwacc-sva-pipe",
+       [HW_ACC_SIAMMDSP]       = "hwacc-sia-mmdsp",
+       [HW_ACC_SIAPIPE]        = "hwacc-sia-pipe",
+       [HW_ACC_SGA]            = "hwacc-sga",
+       [HW_ACC_B2R2]           = "hwacc-b2r2",
+       [HW_ACC_MCDE]           = "hwacc-mcde",
+       [HW_ACC_ESRAM1]         = "hwacc-esram1",
+       [HW_ACC_ESRAM2]         = "hwacc-esram2",
+       [HW_ACC_ESRAM3]         = "hwacc-esram3",
+       [HW_ACC_ESRAM4]         = "hwacc-esram4",
+};
+
+static const char *hwacc_ret_regulator_name[NUM_HW_ACC] = {
+       [HW_ACC_SVAMMDSP]       = "hwacc-sva-mmdsp-ret",
+       [HW_ACC_SIAMMDSP]       = "hwacc-sia-mmdsp-ret",
+       [HW_ACC_ESRAM1]         = "hwacc-esram1-ret",
+       [HW_ACC_ESRAM2]         = "hwacc-esram2-ret",
+       [HW_ACC_ESRAM3]         = "hwacc-esram3-ret",
+       [HW_ACC_ESRAM4]         = "hwacc-esram4-ret",
+};
+
 /*
 * Used by MCDE to setup all necessary PRCMU registers
 */
@@ -493,55 +536,51 @@ static struct {
 } prcmu_version;
 
 
-int prcmu_enable_dsipll(void)
+int db8500_prcmu_enable_dsipll(void)
 {
        int i;
        unsigned int plldsifreq;
 
        /* Clear DSIPLL_RESETN */
-       writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_CLR));
+       writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_CLR);
        /* Unclamp DSIPLL in/out */
-       writel(PRCMU_UNCLAMP_DSIPLL, (_PRCMU_BASE + PRCM_MMIP_LS_CLAMP_CLR));
+       writel(PRCMU_UNCLAMP_DSIPLL, PRCM_MMIP_LS_CLAMP_CLR);
 
        if (prcmu_is_u8400())
                plldsifreq = PRCMU_PLLDSI_FREQ_SETTING_U8400;
        else
                plldsifreq = PRCMU_PLLDSI_FREQ_SETTING;
        /* Set DSI PLL FREQ */
-       writel(plldsifreq, (_PRCMU_BASE + PRCM_PLLDSI_FREQ));
-       writel(PRCMU_DSI_PLLOUT_SEL_SETTING,
-               (_PRCMU_BASE + PRCM_DSI_PLLOUT_SEL));
+       writel(plldsifreq, PRCM_PLLDSI_FREQ);
+       writel(PRCMU_DSI_PLLOUT_SEL_SETTING, PRCM_DSI_PLLOUT_SEL);
        /* Enable Escape clocks */
-       writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV,
-                                       (_PRCMU_BASE + PRCM_DSITVCLK_DIV));
+       writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV);
 
        /* Start DSI PLL */
-       writel(PRCMU_ENABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE));
+       writel(PRCMU_ENABLE_PLLDSI, PRCM_PLLDSI_ENABLE);
        /* Reset DSI PLL */
-       writel(PRCMU_DSI_RESET_SW, (_PRCMU_BASE + PRCM_DSI_SW_RESET));
+       writel(PRCMU_DSI_RESET_SW, PRCM_DSI_SW_RESET);
        for (i = 0; i < 10; i++) {
-               if ((readl(_PRCMU_BASE + PRCM_PLLDSI_LOCKP) &
-                       PRCMU_PLLDSI_LOCKP_LOCKED)
+               if ((readl(PRCM_PLLDSI_LOCKP) & PRCMU_PLLDSI_LOCKP_LOCKED)
                                        == PRCMU_PLLDSI_LOCKP_LOCKED)
                        break;
                udelay(100);
        }
        /* Set DSIPLL_RESETN */
-       writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_SET));
+       writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_SET);
        return 0;
 }
 
-int prcmu_disable_dsipll(void)
+int db8500_prcmu_disable_dsipll(void)
 {
        /* Disable dsi pll */
-       writel(PRCMU_DISABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE));
+       writel(PRCMU_DISABLE_PLLDSI, PRCM_PLLDSI_ENABLE);
        /* Disable  escapeclock */
-       writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV,
-                                       (_PRCMU_BASE + PRCM_DSITVCLK_DIV));
+       writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV);
        return 0;
 }
 
-int prcmu_set_display_clocks(void)
+int db8500_prcmu_set_display_clocks(void)
 {
        unsigned long flags;
        unsigned int dsiclk;
@@ -554,15 +593,15 @@ int prcmu_set_display_clocks(void)
        spin_lock_irqsave(&clk_mgt_lock, flags);
 
        /* Grab the HW semaphore. */
-       while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+       while ((readl(PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
                cpu_relax();
 
-       writel(dsiclk, (_PRCMU_BASE + PRCM_HDMICLK_MGT));
-       writel(PRCMU_DSI_LP_CLOCK_SETTING, (_PRCMU_BASE + PRCM_TVCLK_MGT));
-       writel(PRCMU_DPI_CLOCK_SETTING, (_PRCMU_BASE + PRCM_LCDCLK_MGT));
+       writel(dsiclk, PRCM_HDMICLK_MGT);
+       writel(PRCMU_DSI_LP_CLOCK_SETTING, PRCM_TVCLK_MGT);
+       writel(PRCMU_DPI_CLOCK_SETTING, PRCM_LCDCLK_MGT);
 
        /* Release the HW semaphore. */
-       writel(0, (_PRCMU_BASE + PRCM_SEM));
+       writel(0, PRCM_SEM);
 
        spin_unlock_irqrestore(&clk_mgt_lock, flags);
 
@@ -578,8 +617,8 @@ void prcmu_enable_spi2(void)
        unsigned long flags;
 
        spin_lock_irqsave(&gpiocr_lock, flags);
-       reg = readl(_PRCMU_BASE + PRCM_GPIOCR);
-       writel(reg | PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR);
+       reg = readl(PRCM_GPIOCR);
+       writel(reg | PRCM_GPIOCR_SPI2_SELECT, PRCM_GPIOCR);
        spin_unlock_irqrestore(&gpiocr_lock, flags);
 }
 
@@ -592,8 +631,8 @@ void prcmu_disable_spi2(void)
        unsigned long flags;
 
        spin_lock_irqsave(&gpiocr_lock, flags);
-       reg = readl(_PRCMU_BASE + PRCM_GPIOCR);
-       writel(reg & ~PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR);
+       reg = readl(PRCM_GPIOCR);
+       writel(reg & ~PRCM_GPIOCR_SPI2_SELECT, PRCM_GPIOCR);
        spin_unlock_irqrestore(&gpiocr_lock, flags);
 }
 
@@ -701,7 +740,7 @@ int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
 
        spin_lock_irqsave(&clkout_lock, flags);
 
-       val = readl(_PRCMU_BASE + PRCM_CLKOCR);
+       val = readl(PRCM_CLKOCR);
        if (val & div_mask) {
                if (div) {
                        if ((val & mask) != bits) {
@@ -715,7 +754,7 @@ int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
                        }
                }
        }
-       writel((bits | (val & ~mask)), (_PRCMU_BASE + PRCM_CLKOCR));
+       writel((bits | (val & ~mask)), PRCM_CLKOCR);
        requests[clkout] += (div ? 1 : -1);
 
 unlock_and_return:
@@ -724,7 +763,7 @@ unlock_and_return:
        return r;
 }
 
-int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll)
+int db8500_prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll)
 {
        unsigned long flags;
 
@@ -732,7 +771,7 @@ int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll)
 
        spin_lock_irqsave(&mb0_transfer.lock, flags);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
                cpu_relax();
 
        writeb(MB0H_POWER_STATE_TRANS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
@@ -741,7 +780,7 @@ int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll)
        writeb((keep_ulp_clk ? 1 : 0),
                (tcdm_base + PRCM_REQ_MB0_ULP_CLOCK_STATE));
        writeb(0, (tcdm_base + PRCM_REQ_MB0_DO_NOT_WFI));
-       writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(0), PRCM_MBOX_CPU_SET);
 
        spin_unlock_irqrestore(&mb0_transfer.lock, flags);
 
@@ -770,18 +809,18 @@ static void config_wakeups(void)
                return;
 
        for (i = 0; i < 2; i++) {
-               while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+               while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
                        cpu_relax();
                writel(dbb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_8500));
                writel(abb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_4500));
                writeb(header[i], (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
-               writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+               writel(MBOX_BIT(0), PRCM_MBOX_CPU_SET);
        }
        last_dbb_events = dbb_events;
        last_abb_events = abb_events;
 }
 
-void prcmu_enable_wakeups(u32 wakeups)
+void db8500_prcmu_enable_wakeups(u32 wakeups)
 {
        unsigned long flags;
        u32 bits;
@@ -802,7 +841,7 @@ void prcmu_enable_wakeups(u32 wakeups)
        spin_unlock_irqrestore(&mb0_transfer.lock, flags);
 }
 
-void prcmu_config_abb_event_readout(u32 abb_events)
+void db8500_prcmu_config_abb_event_readout(u32 abb_events)
 {
        unsigned long flags;
 
@@ -814,7 +853,7 @@ void prcmu_config_abb_event_readout(u32 abb_events)
        spin_unlock_irqrestore(&mb0_transfer.lock, flags);
 }
 
-void prcmu_get_abb_event_buffer(void __iomem **buf)
+void db8500_prcmu_get_abb_event_buffer(void __iomem **buf)
 {
        if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1)
                *buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_1_4500);
@@ -823,13 +862,13 @@ void prcmu_get_abb_event_buffer(void __iomem **buf)
 }
 
 /**
- * prcmu_set_arm_opp - set the appropriate ARM OPP
+ * db8500_prcmu_set_arm_opp - set the appropriate ARM OPP
  * @opp: The new ARM operating point to which transition is to be made
  * Returns: 0 on success, non-zero on failure
  *
  * This function sets the the operating point of the ARM.
  */
-int prcmu_set_arm_opp(u8 opp)
+int db8500_prcmu_set_arm_opp(u8 opp)
 {
        int r;
 
@@ -840,14 +879,14 @@ int prcmu_set_arm_opp(u8 opp)
 
        mutex_lock(&mb1_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
                cpu_relax();
 
        writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
        writeb(opp, (tcdm_base + PRCM_REQ_MB1_ARM_OPP));
        writeb(APE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_APE_OPP));
 
-       writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb1_transfer.work);
 
        if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) ||
@@ -860,11 +899,11 @@ int prcmu_set_arm_opp(u8 opp)
 }
 
 /**
- * prcmu_get_arm_opp - get the current ARM OPP
+ * db8500_prcmu_get_arm_opp - get the current ARM OPP
  *
  * Returns: the current ARM OPP
  */
-int prcmu_get_arm_opp(void)
+int db8500_prcmu_get_arm_opp(void)
 {
        return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_ARM_OPP);
 }
@@ -876,7 +915,7 @@ int prcmu_get_arm_opp(void)
  */
 int prcmu_get_ddr_opp(void)
 {
-       return readb(_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW);
+       return readb(PRCM_DDR_SUBSYS_APE_MINBW);
 }
 
 /**
@@ -892,7 +931,7 @@ int prcmu_set_ddr_opp(u8 opp)
                return -EINVAL;
        /* Changing the DDR OPP can hang the hardware pre-v21 */
        if (cpu_is_u8500v20_or_later() && !cpu_is_u8500v20())
-               writeb(opp, (_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW));
+               writeb(opp, PRCM_DDR_SUBSYS_APE_MINBW);
 
        return 0;
 }
@@ -909,14 +948,14 @@ int prcmu_set_ape_opp(u8 opp)
 
        mutex_lock(&mb1_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
                cpu_relax();
 
        writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
        writeb(ARM_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_ARM_OPP));
        writeb(opp, (tcdm_base + PRCM_REQ_MB1_APE_OPP));
 
-       writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb1_transfer.work);
 
        if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) ||
@@ -966,12 +1005,12 @@ int prcmu_request_ape_opp_100_voltage(bool enable)
                header = MB1H_RELEASE_APE_OPP_100_VOLT;
        }
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
                cpu_relax();
 
        writeb(header, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
 
-       writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb1_transfer.work);
 
        if ((mb1_transfer.ack.header != header) ||
@@ -995,13 +1034,13 @@ int prcmu_release_usb_wakeup_state(void)
 
        mutex_lock(&mb1_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
                cpu_relax();
 
        writeb(MB1H_RELEASE_USB_WAKEUP,
                (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
 
-       writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb1_transfer.work);
 
        if ((mb1_transfer.ack.header != MB1H_RELEASE_USB_WAKEUP) ||
@@ -1013,15 +1052,169 @@ int prcmu_release_usb_wakeup_state(void)
        return r;
 }
 
+static int request_pll(u8 clock, bool enable)
+{
+       int r = 0;
+
+       if (clock == PRCMU_PLLSOC1)
+               clock = (enable ? PLL_SOC1_ON : PLL_SOC1_OFF);
+       else
+               return -EINVAL;
+
+       mutex_lock(&mb1_transfer.lock);
+
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+               cpu_relax();
+
+       writeb(MB1H_PLL_ON_OFF, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+       writeb(clock, (tcdm_base + PRCM_REQ_MB1_PLL_ON_OFF));
+
+       writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
+       wait_for_completion(&mb1_transfer.work);
+
+       if (mb1_transfer.ack.header != MB1H_PLL_ON_OFF)
+               r = -EIO;
+
+       mutex_unlock(&mb1_transfer.lock);
+
+       return r;
+}
+
 /**
- * prcmu_set_epod - set the state of a EPOD (power domain)
+ * prcmu_set_hwacc - set the power state of a h/w accelerator
+ * @hwacc_dev: The hardware accelerator (enum hw_acc_dev).
+ * @state: The new power state (enum hw_acc_state).
+ *
+ * This function sets the power state of a hardware accelerator.
+ * This function should not be called from interrupt context.
+ *
+ * NOTE! Deprecated, to be removed when all users switched over to use the
+ * regulator framework API.
+ */
+int prcmu_set_hwacc(u16 hwacc_dev, u8 state)
+{
+       int r = 0;
+       bool ram_retention = false;
+       bool enable, enable_ret;
+
+       /* check argument */
+       BUG_ON(hwacc_dev >= NUM_HW_ACC);
+
+       /* get state of switches */
+       enable = hwacc_enabled[hwacc_dev];
+       enable_ret = hwacc_ret_enabled[hwacc_dev];
+
+       /* set flag if retention is possible */
+       switch (hwacc_dev) {
+       case HW_ACC_SVAMMDSP:
+       case HW_ACC_SIAMMDSP:
+       case HW_ACC_ESRAM1:
+       case HW_ACC_ESRAM2:
+       case HW_ACC_ESRAM3:
+       case HW_ACC_ESRAM4:
+               ram_retention = true;
+               break;
+       }
+
+       /* check argument */
+       BUG_ON(state > HW_ON);
+       BUG_ON(state == HW_OFF_RAMRET && !ram_retention);
+
+       /* modify enable flags */
+       switch (state) {
+       case HW_OFF:
+               enable_ret = false;
+               enable = false;
+               break;
+       case HW_ON:
+               enable = true;
+               break;
+       case HW_OFF_RAMRET:
+               enable_ret = true;
+               enable = false;
+               break;
+       }
+
+       /* get regulator (lazy) */
+       if (hwacc_regulator[hwacc_dev] == NULL) {
+               hwacc_regulator[hwacc_dev] = regulator_get(NULL,
+                       hwacc_regulator_name[hwacc_dev]);
+               if (IS_ERR(hwacc_regulator[hwacc_dev])) {
+                       pr_err("prcmu: failed to get supply %s\n",
+                               hwacc_regulator_name[hwacc_dev]);
+                       r = PTR_ERR(hwacc_regulator[hwacc_dev]);
+                       goto out;
+               }
+       }
+
+       if (ram_retention) {
+               if (hwacc_ret_regulator[hwacc_dev] == NULL) {
+                       hwacc_ret_regulator[hwacc_dev] = regulator_get(NULL,
+                               hwacc_ret_regulator_name[hwacc_dev]);
+                       if (IS_ERR(hwacc_ret_regulator[hwacc_dev])) {
+                               pr_err("prcmu: failed to get supply %s\n",
+                                       hwacc_ret_regulator_name[hwacc_dev]);
+                               r = PTR_ERR(hwacc_ret_regulator[hwacc_dev]);
+                               goto out;
+                       }
+               }
+       }
+
+       /* set regulators */
+       if (ram_retention) {
+               if (enable_ret && !hwacc_ret_enabled[hwacc_dev]) {
+                       r = regulator_enable(hwacc_ret_regulator[hwacc_dev]);
+                       if (r < 0) {
+                               pr_err("prcmu_set_hwacc: ret enable failed\n");
+                               goto out;
+                       }
+                       hwacc_ret_enabled[hwacc_dev] = true;
+               }
+       }
+
+       if (enable && !hwacc_enabled[hwacc_dev]) {
+               r = regulator_enable(hwacc_regulator[hwacc_dev]);
+               if (r < 0) {
+                       pr_err("prcmu_set_hwacc: enable failed\n");
+                       goto out;
+               }
+               hwacc_enabled[hwacc_dev] = true;
+       }
+
+       if (!enable && hwacc_enabled[hwacc_dev]) {
+               r = regulator_disable(hwacc_regulator[hwacc_dev]);
+               if (r < 0) {
+                       pr_err("prcmu_set_hwacc: disable failed\n");
+                       goto out;
+               }
+               hwacc_enabled[hwacc_dev] = false;
+       }
+
+       if (ram_retention) {
+               if (!enable_ret && hwacc_ret_enabled[hwacc_dev]) {
+                       r = regulator_disable(hwacc_ret_regulator[hwacc_dev]);
+                       if (r < 0) {
+                               pr_err("prcmu_set_hwacc: ret disable failed\n");
+                               goto out;
+                       }
+                       hwacc_ret_enabled[hwacc_dev] = false;
+               }
+       }
+
+out:
+       return r;
+}
+EXPORT_SYMBOL(prcmu_set_hwacc);
+
+/**
+ * db8500_prcmu_set_epod - set the state of a EPOD (power domain)
  * @epod_id: The EPOD to set
  * @epod_state: The new EPOD state
  *
  * This function sets the state of a EPOD (power domain). It may not be called
  * from interrupt context.
  */
-int prcmu_set_epod(u16 epod_id, u8 epod_state)
+int db8500_prcmu_set_epod(u16 epod_id, u8 epod_state)
 {
        int r = 0;
        bool ram_retention = false;
@@ -1048,7 +1241,7 @@ int prcmu_set_epod(u16 epod_id, u8 epod_state)
        mutex_lock(&mb2_transfer.lock);
 
        /* wait for mailbox */
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(2))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(2))
                cpu_relax();
 
        /* fill in mailbox */
@@ -1058,7 +1251,7 @@ int prcmu_set_epod(u16 epod_id, u8 epod_state)
 
        writeb(MB2H_DPS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB2));
 
-       writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(2), PRCM_MBOX_CPU_SET);
 
        /*
         * The current firmware version does not handle errors correctly,
@@ -1145,13 +1338,13 @@ static int request_sysclk(bool enable)
 
        spin_lock_irqsave(&mb3_transfer.lock, flags);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(3))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(3))
                cpu_relax();
 
        writeb((enable ? ON : OFF), (tcdm_base + PRCM_REQ_MB3_SYSCLK_MGT));
 
        writeb(MB3H_SYSCLK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB3));
-       writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(3), PRCM_MBOX_CPU_SET);
 
        spin_unlock_irqrestore(&mb3_transfer.lock, flags);
 
@@ -1177,7 +1370,7 @@ static int request_timclk(bool enable)
 
        if (!enable)
                val |= PRCM_TCR_STOP_TIMERS;
-       writel(val, (_PRCMU_BASE + PRCM_TCR));
+       writel(val, PRCM_TCR);
 
        return 0;
 }
@@ -1190,7 +1383,7 @@ static int request_reg_clock(u8 clock, bool enable)
        spin_lock_irqsave(&clk_mgt_lock, flags);
 
        /* Grab the HW semaphore. */
-       while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+       while ((readl(PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
                cpu_relax();
 
        val = readl(_PRCMU_BASE + clk_mgt[clock].offset);
@@ -1203,34 +1396,61 @@ static int request_reg_clock(u8 clock, bool enable)
        writel(val, (_PRCMU_BASE + clk_mgt[clock].offset));
 
        /* Release the HW semaphore. */
-       writel(0, (_PRCMU_BASE + PRCM_SEM));
+       writel(0, PRCM_SEM);
 
        spin_unlock_irqrestore(&clk_mgt_lock, flags);
 
        return 0;
 }
 
+static int request_sga_clock(u8 clock, bool enable)
+{
+       u32 val;
+       int ret;
+
+       if (enable) {
+               val = readl(PRCM_CGATING_BYPASS);
+               writel(val | PRCM_CGATING_BYPASS_ICN2, PRCM_CGATING_BYPASS);
+       }
+
+       ret = request_reg_clock(clock, enable);
+
+       if (!ret && !enable) {
+               val = readl(PRCM_CGATING_BYPASS);
+               writel(val & ~PRCM_CGATING_BYPASS_ICN2, PRCM_CGATING_BYPASS);
+       }
+
+       return ret;
+}
+
 /**
- * prcmu_request_clock() - Request for a clock to be enabled or disabled.
+ * db8500_prcmu_request_clock() - Request for a clock to be enabled or disabled.
  * @clock:      The clock for which the request is made.
  * @enable:     Whether the clock should be enabled (true) or disabled (false).
  *
  * This function should only be used by the clock implementation.
  * Do not use it from any other place!
  */
-int prcmu_request_clock(u8 clock, bool enable)
+int db8500_prcmu_request_clock(u8 clock, bool enable)
 {
-       if (clock < PRCMU_NUM_REG_CLOCKS)
-               return request_reg_clock(clock, enable);
-       else if (clock == PRCMU_TIMCLK)
+       switch(clock) {
+       case PRCMU_SGACLK:
+               return request_sga_clock(clock, enable);
+       case PRCMU_TIMCLK:
                return request_timclk(enable);
-       else if (clock == PRCMU_SYSCLK)
+       case PRCMU_SYSCLK:
                return request_sysclk(enable);
-       else
-               return -EINVAL;
+       case PRCMU_PLLSOC1:
+               return request_pll(clock, enable);
+       default:
+               break;
+       }
+       if (clock < PRCMU_NUM_REG_CLOCKS)
+               return request_reg_clock(clock, enable);
+       return -EINVAL;
 }
 
-int prcmu_config_esram0_deep_sleep(u8 state)
+int db8500_prcmu_config_esram0_deep_sleep(u8 state)
 {
        if ((state > ESRAM0_DEEP_SLEEP_STATE_RET) ||
            (state < ESRAM0_DEEP_SLEEP_STATE_OFF))
@@ -1238,7 +1458,7 @@ int prcmu_config_esram0_deep_sleep(u8 state)
 
        mutex_lock(&mb4_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
                cpu_relax();
 
        writeb(MB4H_MEM_ST, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
@@ -1248,7 +1468,7 @@ int prcmu_config_esram0_deep_sleep(u8 state)
               (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE));
        writeb(state, (tcdm_base + PRCM_REQ_MB4_ESRAM0_ST));
 
-       writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(4), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb4_transfer.work);
 
        mutex_unlock(&mb4_transfer.lock);
@@ -1260,13 +1480,13 @@ int prcmu_config_hotdog(u8 threshold)
 {
        mutex_lock(&mb4_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
                cpu_relax();
 
        writeb(threshold, (tcdm_base + PRCM_REQ_MB4_HOTDOG_THRESHOLD));
        writeb(MB4H_HOTDOG, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
 
-       writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(4), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb4_transfer.work);
 
        mutex_unlock(&mb4_transfer.lock);
@@ -1278,7 +1498,7 @@ int prcmu_config_hotmon(u8 low, u8 high)
 {
        mutex_lock(&mb4_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
                cpu_relax();
 
        writeb(low, (tcdm_base + PRCM_REQ_MB4_HOTMON_LOW));
@@ -1287,7 +1507,7 @@ int prcmu_config_hotmon(u8 low, u8 high)
                (tcdm_base + PRCM_REQ_MB4_HOTMON_CONFIG));
        writeb(MB4H_HOTMON, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
 
-       writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(4), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb4_transfer.work);
 
        mutex_unlock(&mb4_transfer.lock);
@@ -1299,13 +1519,13 @@ static int config_hot_period(u16 val)
 {
        mutex_lock(&mb4_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
                cpu_relax();
 
        writew(val, (tcdm_base + PRCM_REQ_MB4_HOT_PERIOD));
        writeb(MB4H_HOT_PERIOD, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
 
-       writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(4), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb4_transfer.work);
 
        mutex_unlock(&mb4_transfer.lock);
@@ -1326,6 +1546,78 @@ int prcmu_stop_temp_sense(void)
        return config_hot_period(0xFFFF);
 }
 
+static int prcmu_a9wdog(u8 cmd, u8 d0, u8 d1, u8 d2, u8 d3)
+{
+
+       mutex_lock(&mb4_transfer.lock);
+
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+               cpu_relax();
+
+       writeb(d0, (tcdm_base + PRCM_REQ_MB4_A9WDOG_0));
+       writeb(d1, (tcdm_base + PRCM_REQ_MB4_A9WDOG_1));
+       writeb(d2, (tcdm_base + PRCM_REQ_MB4_A9WDOG_2));
+       writeb(d3, (tcdm_base + PRCM_REQ_MB4_A9WDOG_3));
+
+       writeb(cmd, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+       writel(MBOX_BIT(4), PRCM_MBOX_CPU_SET);
+       wait_for_completion(&mb4_transfer.work);
+
+       mutex_unlock(&mb4_transfer.lock);
+
+       return 0;
+
+}
+
+int prcmu_config_a9wdog(u8 num, bool sleep_auto_off)
+{
+       BUG_ON(num == 0 || num > 0xf);
+       return prcmu_a9wdog(MB4H_A9WDOG_CONF, num, 0, 0,
+                           sleep_auto_off ? A9WDOG_AUTO_OFF_EN :
+                           A9WDOG_AUTO_OFF_DIS);
+}
+
+int prcmu_enable_a9wdog(u8 id)
+{
+       return prcmu_a9wdog(MB4H_A9WDOG_EN, id, 0, 0, 0);
+}
+
+int prcmu_disable_a9wdog(u8 id)
+{
+       return prcmu_a9wdog(MB4H_A9WDOG_DIS, id, 0, 0, 0);
+}
+
+int prcmu_kick_a9wdog(u8 id)
+{
+       return prcmu_a9wdog(MB4H_A9WDOG_KICK, id, 0, 0, 0);
+}
+
+/*
+ * timeout is 28 bit, in ms.
+ */
+#define MAX_WATCHDOG_TIMEOUT 131000
+int prcmu_load_a9wdog(u8 id, u32 timeout)
+{
+       if (timeout > MAX_WATCHDOG_TIMEOUT)
+               /*
+                * Due to calculation bug in prcmu fw, timeouts
+                * can't be bigger than 131 seconds.
+                */
+               return -EINVAL;
+
+       return prcmu_a9wdog(MB4H_A9WDOG_LOAD,
+                           (id & A9WDOG_ID_MASK) |
+                           /*
+                            * Put the lowest 28 bits of timeout at
+                            * offset 4. Four first bits are used for id.
+                            */
+                           (u8)((timeout << 4) & 0xf0),
+                           (u8)((timeout >> 4) & 0xff),
+                           (u8)((timeout >> 12) & 0xff),
+                           (u8)((timeout >> 20) & 0xff));
+}
+
 /**
  * prcmu_set_clock_divider() - Configure the clock divider.
  * @clock:     The clock for which the request is made.
@@ -1345,7 +1637,7 @@ int prcmu_set_clock_divider(u8 clock, u8 divider)
        spin_lock_irqsave(&clk_mgt_lock, flags);
 
        /* Grab the HW semaphore. */
-       while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+       while ((readl(PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
                cpu_relax();
 
        val = readl(_PRCMU_BASE + clk_mgt[clock].offset);
@@ -1354,7 +1646,7 @@ int prcmu_set_clock_divider(u8 clock, u8 divider)
        writel(val, (_PRCMU_BASE + clk_mgt[clock].offset));
 
        /* Release the HW semaphore. */
-       writel(0, (_PRCMU_BASE + PRCM_SEM));
+       writel(0, PRCM_SEM);
 
        spin_unlock_irqrestore(&clk_mgt_lock, flags);
 
@@ -1380,7 +1672,7 @@ int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
 
        mutex_lock(&mb5_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
                cpu_relax();
 
        writeb(PRCMU_I2C_READ(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP));
@@ -1388,7 +1680,7 @@ int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
        writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG));
        writeb(0, (tcdm_base + PRCM_REQ_MB5_I2C_VAL));
 
-       writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
 
        if (!wait_for_completion_timeout(&mb5_transfer.work,
                                msecs_to_jiffies(20000))) {
@@ -1426,7 +1718,7 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
 
        mutex_lock(&mb5_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
                cpu_relax();
 
        writeb(PRCMU_I2C_WRITE(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP));
@@ -1434,7 +1726,7 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
        writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG));
        writeb(*value, (tcdm_base + PRCM_REQ_MB5_I2C_VAL));
 
-       writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
 
        if (!wait_for_completion_timeout(&mb5_transfer.work,
                                msecs_to_jiffies(20000))) {
@@ -1456,21 +1748,44 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
 void prcmu_ac_wake_req(void)
 {
        u32 val;
+       u32 status;
 
        mutex_lock(&mb0_transfer.ac_wake_lock);
 
-       val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ);
+       val = readl(PRCM_HOSTACCESS_REQ);
        if (val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ)
                goto unlock_and_return;
 
        atomic_set(&ac_wake_req_state, 1);
 
-       writel((val | PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ),
-               (_PRCMU_BASE + PRCM_HOSTACCESS_REQ));
+retry:
+       writel((val | PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ), PRCM_HOSTACCESS_REQ);
 
        if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
-                       msecs_to_jiffies(20000))) {
-               pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+                       msecs_to_jiffies(5000))) {
+               pr_crit("prcmu: %s timed out (5 s) waiting for a reply.\n",
+                       __func__);
+               goto unlock_and_return;
+       }
+
+       /*
+        * The modem can generate an AC_WAKE_ACK, and then still go to sleep.
+        * As a workaround, we wait, and then check that the modem is indeed
+        * awake (in terms of the value of the PRCM_MOD_AWAKE_STATUS
+        * register, which may not be the whole truth).
+        */
+       udelay(400);
+       status = (readl(PRCM_MOD_AWAKE_STATUS) & BITS(0, 2));
+       if (status != (PRCM_MOD_AWAKE_STATUS_PRCM_MOD_AAPD_AWAKE |
+                       PRCM_MOD_AWAKE_STATUS_PRCM_MOD_COREPD_AWAKE)) {
+               pr_err("prcmu: %s received ack, but modem not awake (0x%X).\n",
+                       __func__, status);
+               udelay(1200);
+               writel(val, PRCM_HOSTACCESS_REQ);
+               if (wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
+                               msecs_to_jiffies(5000)))
+                       goto retry;
+               pr_crit("prcmu: %s timed out (5 s) waiting for AC_SLEEP_ACK.\n",
                        __func__);
        }
 
@@ -1487,16 +1802,16 @@ void prcmu_ac_sleep_req()
 
        mutex_lock(&mb0_transfer.ac_wake_lock);
 
-       val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ);
+       val = readl(PRCM_HOSTACCESS_REQ);
        if (!(val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ))
                goto unlock_and_return;
 
        writel((val & ~PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ),
-               (_PRCMU_BASE + PRCM_HOSTACCESS_REQ));
+               PRCM_HOSTACCESS_REQ);
 
        if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
-                       msecs_to_jiffies(20000))) {
-               pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+                       msecs_to_jiffies(5000))) {
+               pr_crit("prcmu: %s timed out (5 s) waiting for a reply.\n",
                        __func__);
        }
 
@@ -1506,21 +1821,32 @@ unlock_and_return:
        mutex_unlock(&mb0_transfer.ac_wake_lock);
 }
 
-bool prcmu_is_ac_wake_requested(void)
+bool db8500_prcmu_is_ac_wake_requested(void)
 {
        return (atomic_read(&ac_wake_req_state) != 0);
 }
 
 /**
- * prcmu_system_reset - System reset
+ * db8500_prcmu_system_reset - System reset
  *
- * Saves the reset reason code and then sets the APE_SOFRST register which
+ * Saves the reset reason code and then sets the APE_SOFTRST register which
  * fires interrupt to fw
  */
-void prcmu_system_reset(u16 reset_code)
+void db8500_prcmu_system_reset(u16 reset_code)
 {
        writew(reset_code, (tcdm_base + PRCM_SW_RST_REASON));
-       writel(1, (_PRCMU_BASE + PRCM_APE_SOFTRST));
+       writel(1, PRCM_APE_SOFTRST);
+}
+
+/**
+ * db8500_prcmu_get_reset_code - Retrieve SW reset reason code
+ *
+ * Retrieves the reset reason code stored by prcmu_system_reset() before
+ * last restart.
+ */
+u16 db8500_prcmu_get_reset_code(void)
+{
+       return readw(tcdm_base + PRCM_SW_RST_REASON);
 }
 
 /**
@@ -1530,11 +1856,11 @@ void prcmu_modem_reset(void)
 {
        mutex_lock(&mb1_transfer.lock);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
                cpu_relax();
 
        writeb(MB1H_RESET_MODEM, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
-       writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
        wait_for_completion(&mb1_transfer.work);
 
        /*
@@ -1551,11 +1877,11 @@ static void ack_dbb_wakeup(void)
 
        spin_lock_irqsave(&mb0_transfer.lock, flags);
 
-       while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+       while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
                cpu_relax();
 
        writeb(MB0H_READ_WAKEUP_ACK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
-       writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+       writel(MBOX_BIT(0), PRCM_MBOX_CPU_SET);
 
        spin_unlock_irqrestore(&mb0_transfer.lock, flags);
 }
@@ -1600,7 +1926,7 @@ static bool read_mailbox_0(void)
                r = false;
                break;
        }
-       writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(0), PRCM_ARM_IT1_CLR);
        return r;
 }
 
@@ -1613,7 +1939,7 @@ static bool read_mailbox_1(void)
                PRCM_ACK_MB1_CURRENT_APE_OPP);
        mb1_transfer.ack.ape_voltage_status = readb(tcdm_base +
                PRCM_ACK_MB1_APE_VOLTAGE_STATUS);
-       writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(1), PRCM_ARM_IT1_CLR);
        complete(&mb1_transfer.work);
        return false;
 }
@@ -1621,14 +1947,14 @@ static bool read_mailbox_1(void)
 static bool read_mailbox_2(void)
 {
        mb2_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB2_DPS_STATUS);
-       writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(2), PRCM_ARM_IT1_CLR);
        complete(&mb2_transfer.work);
        return false;
 }
 
 static bool read_mailbox_3(void)
 {
-       writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(3), PRCM_ARM_IT1_CLR);
        return false;
 }
 
@@ -1643,6 +1969,11 @@ static bool read_mailbox_4(void)
        case MB4H_HOTDOG:
        case MB4H_HOTMON:
        case MB4H_HOT_PERIOD:
+       case MB4H_A9WDOG_CONF:
+       case MB4H_A9WDOG_EN:
+       case MB4H_A9WDOG_DIS:
+       case MB4H_A9WDOG_LOAD:
+       case MB4H_A9WDOG_KICK:
                break;
        default:
                print_unknown_header_warning(4, header);
@@ -1650,7 +1981,7 @@ static bool read_mailbox_4(void)
                break;
        }
 
-       writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(4), PRCM_ARM_IT1_CLR);
 
        if (do_complete)
                complete(&mb4_transfer.work);
@@ -1662,20 +1993,20 @@ static bool read_mailbox_5(void)
 {
        mb5_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB5_I2C_STATUS);
        mb5_transfer.ack.value = readb(tcdm_base + PRCM_ACK_MB5_I2C_VAL);
-       writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(5), PRCM_ARM_IT1_CLR);
        complete(&mb5_transfer.work);
        return false;
 }
 
 static bool read_mailbox_6(void)
 {
-       writel(MBOX_BIT(6), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(6), PRCM_ARM_IT1_CLR);
        return false;
 }
 
 static bool read_mailbox_7(void)
 {
-       writel(MBOX_BIT(7), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(MBOX_BIT(7), PRCM_ARM_IT1_CLR);
        return false;
 }
 
@@ -1696,7 +2027,7 @@ static irqreturn_t prcmu_irq_handler(int irq, void *data)
        u8 n;
        irqreturn_t r;
 
-       bits = (readl(_PRCMU_BASE + PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS);
+       bits = (readl(PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS);
        if (unlikely(!bits))
                return IRQ_NONE;
 
@@ -1768,7 +2099,7 @@ static struct irq_chip prcmu_irq_chip = {
        .irq_unmask     = prcmu_irq_unmask,
 };
 
-void __init prcmu_early_init(void)
+void __init db8500_prcmu_early_init(void)
 {
        unsigned int i;
 
@@ -1826,6 +2157,16 @@ void __init prcmu_early_init(void)
        }
 }
 
+static void __init db8500_prcmu_init_clkforce(void)
+{
+       u32 val;
+
+       val = readl(PRCM_A9PL_FORCE_CLKEN);
+       val &= ~(PRCM_A9PL_FORCE_CLKEN_PRCM_A9PL_FORCE_CLKEN |
+               PRCM_A9PL_FORCE_CLKEN_PRCM_A9AXI_FORCE_CLKEN);
+       writel(val, (PRCM_A9PL_FORCE_CLKEN));
+}
+
 /*
  * Power domain switches (ePODs) modeled as regulators for the DB8500 SoC
  */
@@ -1861,7 +2202,42 @@ static struct regulator_consumer_supply db8500_vsmps2_consumers[] = {
 
 static struct regulator_consumer_supply db8500_b2r2_mcde_consumers[] = {
        REGULATOR_SUPPLY("vsupply", "b2r2.0"),
-       REGULATOR_SUPPLY("vsupply", "mcde.0"),
+       REGULATOR_SUPPLY("vsupply", "mcde"),
+};
+
+/* SVA MMDSP regulator switch */
+static struct regulator_consumer_supply db8500_svammdsp_consumers[] = {
+       REGULATOR_SUPPLY("sva-mmdsp", "cm_control"),
+};
+
+/* SVA pipe regulator switch */
+static struct regulator_consumer_supply db8500_svapipe_consumers[] = {
+       REGULATOR_SUPPLY("sva-pipe", "cm_control"),
+};
+
+/* SIA MMDSP regulator switch */
+static struct regulator_consumer_supply db8500_siammdsp_consumers[] = {
+       REGULATOR_SUPPLY("sia-mmdsp", "cm_control"),
+};
+
+/* SIA pipe regulator switch */
+static struct regulator_consumer_supply db8500_siapipe_consumers[] = {
+       REGULATOR_SUPPLY("sia-pipe", "cm_control"),
+};
+
+static struct regulator_consumer_supply db8500_sga_consumers[] = {
+       REGULATOR_SUPPLY("v-mali", NULL),
+};
+
+/* ESRAM1 and 2 regulator switch */
+static struct regulator_consumer_supply db8500_esram12_consumers[] = {
+       REGULATOR_SUPPLY("esram12", "cm_control"),
+};
+
+/* ESRAM3 and 4 regulator switch */
+static struct regulator_consumer_supply db8500_esram34_consumers[] = {
+       REGULATOR_SUPPLY("v-esram34", "mcde"),
+       REGULATOR_SUPPLY("esram34", "cm_control"),
 };
 
 static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
@@ -1923,6 +2299,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-sva-mmdsp",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_svammdsp_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_svammdsp_consumers),
        },
        [DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = {
                .constraints = {
@@ -1937,6 +2315,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-sva-pipe",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_svapipe_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_svapipe_consumers),
        },
        [DB8500_REGULATOR_SWITCH_SIAMMDSP] = {
                .supply_regulator = "db8500-vape",
@@ -1944,6 +2324,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-sia-mmdsp",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_siammdsp_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_siammdsp_consumers),
        },
        [DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = {
                .constraints = {
@@ -1957,6 +2339,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-sia-pipe",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_siapipe_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_siapipe_consumers),
        },
        [DB8500_REGULATOR_SWITCH_SGA] = {
                .supply_regulator = "db8500-vape",
@@ -1964,6 +2348,9 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-sga",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_sga_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_sga_consumers),
+
        },
        [DB8500_REGULATOR_SWITCH_B2R2_MCDE] = {
                .supply_regulator = "db8500-vape",
@@ -1980,6 +2367,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-esram12",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_esram12_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_esram12_consumers),
        },
        [DB8500_REGULATOR_SWITCH_ESRAM12RET] = {
                .constraints = {
@@ -1993,6 +2382,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
                        .name = "db8500-esram34",
                        .valid_ops_mask = REGULATOR_CHANGE_STATUS,
                },
+               .consumer_supplies = db8500_esram34_consumers,
+               .num_consumer_supplies = ARRAY_SIZE(db8500_esram34_consumers),
        },
        [DB8500_REGULATOR_SWITCH_ESRAM34RET] = {
                .constraints = {
@@ -2024,8 +2415,10 @@ static int __init db8500_prcmu_probe(struct platform_device *pdev)
        if (ux500_is_svp())
                return -ENODEV;
 
+       db8500_prcmu_init_clkforce();
+
        /* Clean up the mailbox interrupts after pre-kernel code. */
-       writel(ALL_MBOX_BITS, (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+       writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR);
 
        err = request_threaded_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler,
                prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
diff --git a/drivers/mfd/dbx500-prcmu-regs.h b/drivers/mfd/dbx500-prcmu-regs.h
new file mode 100644 (file)
index 0000000..ec22e9f
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit registers
+ */
+
+#ifndef __DB8500_PRCMU_REGS_H
+#define __DB8500_PRCMU_REGS_H
+
+#include <mach/hardware.h>
+
+#define BITS(_start, _end) ((BIT(_end) - BIT(_start)) + BIT(_end))
+
+#define PRCM_SVACLK_MGT_OFF            0x008
+#define PRCM_SIACLK_MGT_OFF            0x00C
+#define PRCM_SGACLK_MGT_OFF            0x014
+#define PRCM_UARTCLK_MGT_OFF           0x018
+#define PRCM_MSP02CLK_MGT_OFF          0x01C
+#define PRCM_I2CCLK_MGT_OFF            0x020
+#define PRCM_SDMMCCLK_MGT_OFF          0x024
+#define PRCM_SLIMCLK_MGT_OFF           0x028
+#define PRCM_PER1CLK_MGT_OFF           0x02C
+#define PRCM_PER2CLK_MGT_OFF           0x030
+#define PRCM_PER3CLK_MGT_OFF           0x034
+#define PRCM_PER5CLK_MGT_OFF           0x038
+#define PRCM_PER6CLK_MGT_OFF           0x03C
+#define PRCM_PER7CLK_MGT_OFF           0x040
+#define PRCM_PWMCLK_MGT_OFF            0x044 /* for DB5500 */
+#define PRCM_IRDACLK_MGT_OFF           0x048 /* for DB5500 */
+#define PRCM_IRRCCLK_MGT_OFF           0x04C /* for DB5500 */
+#define PRCM_LCDCLK_MGT_OFF            0x044
+#define PRCM_BMLCLK_MGT_OFF            0x04C
+#define PRCM_HSITXCLK_MGT_OFF          0x050
+#define PRCM_HSIRXCLK_MGT_OFF          0x054
+#define PRCM_HDMICLK_MGT_OFF           0x058
+#define PRCM_APEATCLK_MGT_OFF          0x05C
+#define PRCM_APETRACECLK_MGT_OFF       0x060
+#define PRCM_MCDECLK_MGT_OFF           0x064
+#define PRCM_IPI2CCLK_MGT_OFF          0x068
+#define PRCM_DSIALTCLK_MGT_OFF         0x06C
+#define PRCM_DMACLK_MGT_OFF            0x074
+#define PRCM_B2R2CLK_MGT_OFF           0x078
+#define PRCM_TVCLK_MGT_OFF             0x07C
+#define PRCM_UNIPROCLK_MGT_OFF         0x278
+#define PRCM_SSPCLK_MGT_OFF            0x280
+#define PRCM_RNGCLK_MGT_OFF            0x284
+#define PRCM_UICCCLK_MGT_OFF           0x27C
+#define PRCM_MSP1CLK_MGT_OFF           0x288
+
+#define PRCM_ARM_PLLDIVPS      (_PRCMU_BASE + 0x118)
+#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE         0x3f
+#define PRCM_ARM_PLLDIVPS_MAX_MASK             0xf
+
+#define PRCM_PLLARM_LOCKP       (_PRCMU_BASE + 0x0a8)
+#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3   0x2
+
+#define PRCM_ARM_CHGCLKREQ     (_PRCMU_BASE + 0x114)
+#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ  0x1
+
+#define PRCM_PLLARM_ENABLE     (_PRCMU_BASE + 0x98)
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE  0x1
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON 0x100
+
+#define PRCM_ARMCLKFIX_MGT     (_PRCMU_BASE + 0x0)
+#define PRCM_A9PL_FORCE_CLKEN  (_PRCMU_BASE + 0x19C)
+#define PRCM_A9_RESETN_CLR     (_PRCMU_BASE + 0x1f4)
+#define PRCM_A9_RESETN_SET     (_PRCMU_BASE + 0x1f0)
+#define PRCM_ARM_LS_CLAMP      (_PRCMU_BASE + 0x30c)
+#define PRCM_SRAM_A9           (_PRCMU_BASE + 0x308)
+
+#define PRCM_A9PL_FORCE_CLKEN_PRCM_A9PL_FORCE_CLKEN BIT(0)
+#define PRCM_A9PL_FORCE_CLKEN_PRCM_A9AXI_FORCE_CLKEN BIT(1)
+
+/* ARM WFI Standby signal register */
+#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
+#define PRCM_IOCR              (_PRCMU_BASE + 0x310)
+#define PRCM_IOCR_IOFORCE                      0x1
+
+/* CPU mailbox registers */
+#define PRCM_MBOX_CPU_VAL      (_PRCMU_BASE + 0x0fc)
+#define PRCM_MBOX_CPU_SET      (_PRCMU_BASE + 0x100)
+#define PRCM_MBOX_CPU_CLR      (_PRCMU_BASE + 0x104)
+
+/* Dual A9 core interrupt management unit registers */
+#define PRCM_A9_MASK_REQ       (_PRCMU_BASE + 0x328)
+#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ      0x1
+
+#define PRCM_A9_MASK_ACK       (_PRCMU_BASE + 0x32c)
+#define PRCM_ARMITMSK31TO0     (_PRCMU_BASE + 0x11c)
+#define PRCM_ARMITMSK63TO32    (_PRCMU_BASE + 0x120)
+#define PRCM_ARMITMSK95TO64    (_PRCMU_BASE + 0x124)
+#define PRCM_ARMITMSK127TO96   (_PRCMU_BASE + 0x128)
+#define PRCM_POWER_STATE_VAL   (_PRCMU_BASE + 0x25C)
+#define PRCM_ARMITVAL31TO0     (_PRCMU_BASE + 0x260)
+#define PRCM_ARMITVAL63TO32    (_PRCMU_BASE + 0x264)
+#define PRCM_ARMITVAL95TO64    (_PRCMU_BASE + 0x268)
+#define PRCM_ARMITVAL127TO96   (_PRCMU_BASE + 0x26C)
+
+#define PRCM_HOSTACCESS_REQ    (_PRCMU_BASE + 0x334)
+#define PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ 0x1
+#define ARM_WAKEUP_MODEM       0x1
+
+#define PRCM_ARM_IT1_CLR       (_PRCMU_BASE + 0x48C)
+#define PRCM_ARM_IT1_VAL       (_PRCMU_BASE + 0x494)
+#define PRCM_HOLD_EVT          (_PRCMU_BASE + 0x174)
+
+#define PRCM_MOD_AWAKE_STATUS  (_PRCMU_BASE + 0x4A0)
+#define PRCM_MOD_AWAKE_STATUS_PRCM_MOD_COREPD_AWAKE    BIT(0)
+#define PRCM_MOD_AWAKE_STATUS_PRCM_MOD_AAPD_AWAKE      BIT(1)
+#define PRCM_MOD_AWAKE_STATUS_PRCM_MOD_VMODEM_OFF_ISO  BIT(2)
+
+#define PRCM_ITSTATUS0         (_PRCMU_BASE + 0x148)
+#define PRCM_ITSTATUS1         (_PRCMU_BASE + 0x150)
+#define PRCM_ITSTATUS2         (_PRCMU_BASE + 0x158)
+#define PRCM_ITSTATUS3         (_PRCMU_BASE + 0x160)
+#define PRCM_ITSTATUS4         (_PRCMU_BASE + 0x168)
+#define PRCM_ITSTATUS5         (_PRCMU_BASE + 0x484)
+#define PRCM_ITCLEAR5          (_PRCMU_BASE + 0x488)
+#define PRCM_ARMIT_MASKXP70_IT (_PRCMU_BASE + 0x1018)
+
+/* System reset register */
+#define PRCM_APE_SOFTRST       (_PRCMU_BASE + 0x228)
+
+/* Level shifter and clamp control registers */
+#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
+#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+
+/* PRCMU clock/PLL/reset registers */
+#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
+#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
+#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
+#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + PRCM_LCDCLK_MGT_OFF)
+#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + PRCM_MCDECLK_MGT_OFF)
+#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + PRCM_HDMICLK_MGT_OFF)
+#define PRCM_TVCLK_MGT             (_PRCMU_BASE + PRCM_TVCLK_MGT_OFF)
+#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
+#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
+#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
+#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
+#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+
+#define PRCM_CLKOCR               (_PRCMU_BASE + 0x1CC)
+#define PRCM_CLKOCR_CLKOUT0_REF_CLK    (1 << 0)
+#define PRCM_CLKOCR_CLKOUT0_MASK       BITS(0, 13)
+#define PRCM_CLKOCR_CLKOUT1_REF_CLK    (1 << 16)
+#define PRCM_CLKOCR_CLKOUT1_MASK       BITS(16, 29)
+
+/* ePOD and memory power signal control registers */
+#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
+#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+
+/* Debug power control unit registers */
+#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+
+/* Miscellaneous unit registers */
+#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+#define PRCM_GPIOCR                (_PRCMU_BASE + 0x138)
+#define PRCM_GPIOCR_DBG_STM_MOD_CMD1            0x800
+#define PRCM_GPIOCR_DBG_UARTMOD_CMD0            0x1
+
+/* PRCMU HW semaphore */
+#define PRCM_SEM                   (_PRCMU_BASE + 0x400)
+#define PRCM_SEM_PRCM_SEM BIT(0)
+
+#define PRCM_TCR                   (_PRCMU_BASE + 0x1C8)
+#define PRCM_TCR_TENSEL_MASK       BITS(0, 7)
+#define PRCM_TCR_STOP_TIMERS       BIT(16)
+#define PRCM_TCR_DOZE_MODE         BIT(17)
+
+#define PRCM_CLKOCR_CLKODIV0_SHIFT     0
+#define PRCM_CLKOCR_CLKODIV0_MASK      BITS(0, 5)
+#define PRCM_CLKOCR_CLKOSEL0_SHIFT     6
+#define PRCM_CLKOCR_CLKOSEL0_MASK      BITS(6, 8)
+#define PRCM_CLKOCR_CLKODIV1_SHIFT     16
+#define PRCM_CLKOCR_CLKODIV1_MASK      BITS(16, 21)
+#define PRCM_CLKOCR_CLKOSEL1_SHIFT     22
+#define PRCM_CLKOCR_CLKOSEL1_MASK      BITS(22, 24)
+#define PRCM_CLKOCR_CLK1TYPE           BIT(28)
+
+#define PRCM_CLK_MGT_CLKPLLDIV_MASK    BITS(0, 4)
+#define PRCM_CLK_MGT_CLKPLLSW_MASK     BITS(5, 7)
+#define PRCM_CLK_MGT_CLKEN             BIT(8)
+
+/* GPIOCR register */
+#define PRCM_GPIOCR_SPI2_SELECT BIT(23)
+
+#define PRCM_DDR_SUBSYS_APE_MINBW      (_PRCMU_BASE + 0x438)
+#define PRCM_CGATING_BYPASS            (_PRCMU_BASE + 0x134)
+#define PRCM_CGATING_BYPASS_ICN2       BIT(6)
+
+/* Miscellaneous unit registers */
+#define PRCM_RESOUTN_SET               (_PRCMU_BASE + 0x214)
+#define PRCM_RESOUTN_CLR               (_PRCMU_BASE + 0x218)
+
+/* System reset register */
+#define PRCM_APE_SOFTRST               (_PRCMU_BASE + 0x228)
+
+#endif /* __DB8500_PRCMU_REGS_H */
diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
new file mode 100644 (file)
index 0000000..97c2776
--- /dev/null
@@ -0,0 +1,502 @@
+/*
+ * Driver for Intel MSIC
+ *
+ * Copyright (C) 2011, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/intel_msic.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/intel_scu_ipc.h>
+
+#define MSIC_VENDOR(id)                ((id >> 6) & 3)
+#define MSIC_VERSION(id)       (id & 0x3f)
+#define MSIC_MAJOR(id)         ('A' + ((id >> 3) & 7))
+#define MSIC_MINOR(id)         (id & 7)
+
+/*
+ * MSIC interrupt tree is readable from SRAM at INTEL_MSIC_IRQ_PHYS_BASE.
+ * Since IRQ block starts from address 0x002 we need to substract that from
+ * the actual IRQ status register address.
+ */
+#define MSIC_IRQ_STATUS(x)     (INTEL_MSIC_IRQ_PHYS_BASE + ((x) - 2))
+#define MSIC_IRQ_STATUS_ACCDET MSIC_IRQ_STATUS(INTEL_MSIC_ACCDET)
+
+/*
+ * The SCU hardware has limitation of 16 bytes per read/write buffer on
+ * Medfield.
+ */
+#define SCU_IPC_RWBUF_LIMIT    16
+
+/**
+ * struct intel_msic - an MSIC MFD instance
+ * @pdev: pointer to the platform device
+ * @vendor: vendor ID
+ * @version: chip version
+ * @irq_base: base address of the mapped MSIC SRAM interrupt tree
+ */
+struct intel_msic {
+       struct platform_device          *pdev;
+       unsigned                        vendor;
+       unsigned                        version;
+       void __iomem                    *irq_base;
+};
+
+static struct resource msic_touch_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_adc_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_battery_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_gpio_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_audio_resources[] = {
+       {
+               .name           = "IRQ",
+               .flags          = IORESOURCE_IRQ,
+       },
+       /*
+        * We will pass IRQ_BASE to the driver now but this can be removed
+        * when/if the driver starts to use intel_msic_irq_read().
+        */
+       {
+               .name           = "IRQ_BASE",
+               .flags          = IORESOURCE_MEM,
+               .start          = MSIC_IRQ_STATUS_ACCDET,
+               .end            = MSIC_IRQ_STATUS_ACCDET,
+       },
+};
+
+static struct resource msic_hdmi_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_thermal_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_power_btn_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+static struct resource msic_ocd_resources[] = {
+       {
+               .flags          = IORESOURCE_IRQ,
+       },
+};
+
+/*
+ * Devices that are part of the MSIC and are available via firmware
+ * populated SFI DEVS table.
+ */
+static struct mfd_cell msic_devs[] = {
+       [INTEL_MSIC_BLOCK_TOUCH]        = {
+               .name                   = "msic_touch",
+               .num_resources          = ARRAY_SIZE(msic_touch_resources),
+               .resources              = msic_touch_resources,
+       },
+       [INTEL_MSIC_BLOCK_ADC]          = {
+               .name                   = "msic_adc",
+               .num_resources          = ARRAY_SIZE(msic_adc_resources),
+               .resources              = msic_adc_resources,
+       },
+       [INTEL_MSIC_BLOCK_BATTERY]      = {
+               .name                   = "msic_battery",
+               .num_resources          = ARRAY_SIZE(msic_battery_resources),
+               .resources              = msic_battery_resources,
+       },
+       [INTEL_MSIC_BLOCK_GPIO]         = {
+               .name                   = "msic_gpio",
+               .num_resources          = ARRAY_SIZE(msic_gpio_resources),
+               .resources              = msic_gpio_resources,
+       },
+       [INTEL_MSIC_BLOCK_AUDIO]        = {
+               .name                   = "msic_audio",
+               .num_resources          = ARRAY_SIZE(msic_audio_resources),
+               .resources              = msic_audio_resources,
+       },
+       [INTEL_MSIC_BLOCK_HDMI]         = {
+               .name                   = "msic_hdmi",
+               .num_resources          = ARRAY_SIZE(msic_hdmi_resources),
+               .resources              = msic_hdmi_resources,
+       },
+       [INTEL_MSIC_BLOCK_THERMAL]      = {
+               .name                   = "msic_thermal",
+               .num_resources          = ARRAY_SIZE(msic_thermal_resources),
+               .resources              = msic_thermal_resources,
+       },
+       [INTEL_MSIC_BLOCK_POWER_BTN]    = {
+               .name                   = "msic_power_btn",
+               .num_resources          = ARRAY_SIZE(msic_power_btn_resources),
+               .resources              = msic_power_btn_resources,
+       },
+       [INTEL_MSIC_BLOCK_OCD]          = {
+               .name                   = "msic_ocd",
+               .num_resources          = ARRAY_SIZE(msic_ocd_resources),
+               .resources              = msic_ocd_resources,
+       },
+};
+
+/*
+ * Other MSIC related devices which are not directly available via SFI DEVS
+ * table. These can be pseudo devices, regulators etc. which are needed for
+ * different purposes.
+ *
+ * These devices appear only after the MSIC driver itself is initialized so
+ * we can guarantee that the SCU IPC interface is ready.
+ */
+static struct mfd_cell msic_other_devs[] = {
+       /* Audio codec in the MSIC */
+       {
+               .id                     = -1,
+               .name                   = "sn95031",
+       },
+};
+
+/**
+ * intel_msic_reg_read - read a single MSIC register
+ * @reg: register to read
+ * @val: register value is placed here
+ *
+ * Read a single register from MSIC. Returns %0 on success and negative
+ * errno in case of failure.
+ *
+ * Function may sleep.
+ */
+int intel_msic_reg_read(unsigned short reg, u8 *val)
+{
+       return intel_scu_ipc_ioread8(reg, val);
+}
+EXPORT_SYMBOL_GPL(intel_msic_reg_read);
+
+/**
+ * intel_msic_reg_write - write a single MSIC register
+ * @reg: register to write
+ * @val: value to write to that register
+ *
+ * Write a single MSIC register. Returns 0 on success and negative
+ * errno in case of failure.
+ *
+ * Function may sleep.
+ */
+int intel_msic_reg_write(unsigned short reg, u8 val)
+{
+       return intel_scu_ipc_iowrite8(reg, val);
+}
+EXPORT_SYMBOL_GPL(intel_msic_reg_write);
+
+/**
+ * intel_msic_reg_update - update a single MSIC register
+ * @reg: register to update
+ * @val: value to write to the register
+ * @mask: specifies which of the bits are updated (%0 = don't update,
+ *        %1 = update)
+ *
+ * Perform an update to a register @reg. @mask is used to specify which
+ * bits are updated. Returns %0 in case of success and negative errno in
+ * case of failure.
+ *
+ * Function may sleep.
+ */
+int intel_msic_reg_update(unsigned short reg, u8 val, u8 mask)
+{
+       return intel_scu_ipc_update_register(reg, val, mask);
+}
+EXPORT_SYMBOL_GPL(intel_msic_reg_update);
+
+/**
+ * intel_msic_bulk_read - read an array of registers
+ * @reg: array of register addresses to read
+ * @buf: array where the read values are placed
+ * @count: number of registers to read
+ *
+ * Function reads @count registers from the MSIC using addresses passed in
+ * @reg. Read values are placed in @buf. Reads are performed atomically
+ * wrt. MSIC.
+ *
+ * Returns %0 in case of success and negative errno in case of failure.
+ *
+ * Function may sleep.
+ */
+int intel_msic_bulk_read(unsigned short *reg, u8 *buf, size_t count)
+{
+       if (WARN_ON(count > SCU_IPC_RWBUF_LIMIT))
+               return -EINVAL;
+
+       return intel_scu_ipc_readv(reg, buf, count);
+}
+EXPORT_SYMBOL_GPL(intel_msic_bulk_read);
+
+/**
+ * intel_msic_bulk_write - write an array of values to the MSIC registers
+ * @reg: array of registers to write
+ * @buf: values to write to each register
+ * @count: number of registers to write
+ *
+ * Function writes @count registers in @buf to MSIC. Writes are performed
+ * atomically wrt MSIC. Returns %0 in case of success and negative errno in
+ * case of failure.
+ *
+ * Function may sleep.
+ */
+int intel_msic_bulk_write(unsigned short *reg, u8 *buf, size_t count)
+{
+       if (WARN_ON(count > SCU_IPC_RWBUF_LIMIT))
+               return -EINVAL;
+
+       return intel_scu_ipc_writev(reg, buf, count);
+}
+EXPORT_SYMBOL_GPL(intel_msic_bulk_write);
+
+/**
+ * intel_msic_irq_read - read a register from an MSIC interrupt tree
+ * @msic: MSIC instance
+ * @reg: interrupt register (between %INTEL_MSIC_IRQLVL1 and
+ *      %INTEL_MSIC_RESETIRQ2)
+ * @val: value of the register is placed here
+ *
+ * This function can be used by an MSIC subdevice interrupt handler to read
+ * a register value from the MSIC interrupt tree. In this way subdevice
+ * drivers don't have to map in the interrupt tree themselves but can just
+ * call this function instead.
+ *
+ * Function doesn't sleep and is callable from interrupt context.
+ *
+ * Returns %-EINVAL if @reg is outside of the allowed register region.
+ */
+int intel_msic_irq_read(struct intel_msic *msic, unsigned short reg, u8 *val)
+{
+       if (WARN_ON(reg < INTEL_MSIC_IRQLVL1 || reg > INTEL_MSIC_RESETIRQ2))
+               return -EINVAL;
+
+       *val = readb(msic->irq_base + (reg - INTEL_MSIC_IRQLVL1));
+       return 0;
+}
+EXPORT_SYMBOL_GPL(intel_msic_irq_read);
+
+static int __devinit intel_msic_init_devices(struct intel_msic *msic)
+{
+       struct platform_device *pdev = msic->pdev;
+       struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
+       int ret, i;
+
+       if (pdata->gpio) {
+               struct mfd_cell *cell = &msic_devs[INTEL_MSIC_BLOCK_GPIO];
+
+               cell->platform_data = pdata->gpio;
+               cell->pdata_size = sizeof(*pdata->gpio);
+       }
+
+       if (pdata->ocd) {
+               unsigned gpio = pdata->ocd->gpio;
+
+               ret = gpio_request_one(gpio, GPIOF_IN, "ocd_gpio");
+               if (ret) {
+                       dev_err(&pdev->dev, "failed to register OCD GPIO\n");
+                       return ret;
+               }
+
+               ret = gpio_to_irq(gpio);
+               if (ret < 0) {
+                       dev_err(&pdev->dev, "no IRQ number for OCD GPIO\n");
+                       gpio_free(gpio);
+                       return ret;
+               }
+
+               /* Update the IRQ number for the OCD */
+               pdata->irq[INTEL_MSIC_BLOCK_OCD] = ret;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(msic_devs); i++) {
+               if (!pdata->irq[i])
+                       continue;
+
+               ret = mfd_add_devices(&pdev->dev, -1, &msic_devs[i], 1, NULL,
+                                     pdata->irq[i]);
+               if (ret)
+                       goto fail;
+       }
+
+       ret = mfd_add_devices(&pdev->dev, 0, msic_other_devs,
+                             ARRAY_SIZE(msic_other_devs), NULL, 0);
+       if (ret)
+               goto fail;
+
+       return 0;
+
+fail:
+       mfd_remove_devices(&pdev->dev);
+       if (pdata->ocd)
+               gpio_free(pdata->ocd->gpio);
+
+       return ret;
+}
+
+static void __devexit intel_msic_remove_devices(struct intel_msic *msic)
+{
+       struct platform_device *pdev = msic->pdev;
+       struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
+
+       mfd_remove_devices(&pdev->dev);
+
+       if (pdata->ocd)
+               gpio_free(pdata->ocd->gpio);
+}
+
+static int __devinit intel_msic_probe(struct platform_device *pdev)
+{
+       struct intel_msic_platform_data *pdata = pdev->dev.platform_data;
+       struct intel_msic *msic;
+       struct resource *res;
+       u8 id0, id1;
+       int ret;
+
+       if (!pdata) {
+               dev_err(&pdev->dev, "no platform data passed\n");
+               return -EINVAL;
+       }
+
+       /* First validate that we have an MSIC in place */
+       ret = intel_scu_ipc_ioread8(INTEL_MSIC_ID0, &id0);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to identify the MSIC chip (ID0)\n");
+               return -ENXIO;
+       }
+
+       ret = intel_scu_ipc_ioread8(INTEL_MSIC_ID1, &id1);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to identify the MSIC chip (ID1)\n");
+               return -ENXIO;
+       }
+
+       if (MSIC_VENDOR(id0) != MSIC_VENDOR(id1)) {
+               dev_err(&pdev->dev, "invalid vendor ID: %x, %x\n", id0, id1);
+               return -ENXIO;
+       }
+
+       msic = kzalloc(sizeof(*msic), GFP_KERNEL);
+       if (!msic)
+               return -ENOMEM;
+
+       msic->vendor = MSIC_VENDOR(id0);
+       msic->version = MSIC_VERSION(id0);
+       msic->pdev = pdev;
+
+       /*
+        * Map in the MSIC interrupt tree area in SRAM. This is exposed to
+        * the clients via intel_msic_irq_read().
+        */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "failed to get SRAM iomem resource\n");
+               ret = -ENODEV;
+               goto fail_free_msic;
+       }
+
+       res = request_mem_region(res->start, resource_size(res), pdev->name);
+       if (!res) {
+               ret = -EBUSY;
+               goto fail_free_msic;
+       }
+
+       msic->irq_base = ioremap_nocache(res->start, resource_size(res));
+       if (!msic->irq_base) {
+               dev_err(&pdev->dev, "failed to map SRAM memory\n");
+               ret = -ENOMEM;
+               goto fail_release_region;
+       }
+
+       platform_set_drvdata(pdev, msic);
+
+       ret = intel_msic_init_devices(msic);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to initialize MSIC devices\n");
+               goto fail_unmap_mem;
+       }
+
+       dev_info(&pdev->dev, "Intel MSIC version %c%d (vendor %#x)\n",
+                MSIC_MAJOR(msic->version), MSIC_MINOR(msic->version),
+                msic->vendor);
+
+       return 0;
+
+fail_unmap_mem:
+       iounmap(msic->irq_base);
+fail_release_region:
+       release_mem_region(res->start, resource_size(res));
+fail_free_msic:
+       kfree(msic);
+
+       return ret;
+}
+
+static int __devexit intel_msic_remove(struct platform_device *pdev)
+{
+       struct intel_msic *msic = platform_get_drvdata(pdev);
+       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       intel_msic_remove_devices(msic);
+       platform_set_drvdata(pdev, NULL);
+       iounmap(msic->irq_base);
+       release_mem_region(res->start, resource_size(res));
+       kfree(msic);
+
+       return 0;
+}
+
+static struct platform_driver intel_msic_driver = {
+       .probe          = intel_msic_probe,
+       .remove         = __devexit_p(intel_msic_remove),
+       .driver         = {
+               .name   = "intel_msic",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init intel_msic_init(void)
+{
+       return platform_driver_register(&intel_msic_driver);
+}
+module_init(intel_msic_init);
+
+static void __exit intel_msic_exit(void)
+{
+       platform_driver_unregister(&intel_msic_driver);
+}
+module_exit(intel_msic_exit);
+
+MODULE_DESCRIPTION("Driver for Intel MSIC");
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
+MODULE_LICENSE("GPL");
index 563654c9b19eee17165f7b916516853ddafddeca..1e9ee533eacb8d204e860f9881d58e2675adf126 100644 (file)
@@ -328,7 +328,7 @@ static int __devexit jz4740_adc_remove(struct platform_device *pdev)
        return 0;
 }
 
-struct platform_driver jz4740_adc_driver = {
+static struct platform_driver jz4740_adc_driver = {
        .probe  = jz4740_adc_probe,
        .remove = __devexit_p(jz4740_adc_remove),
        .driver = {
index f83103b8970d09c2c8166049465a29a280067f78..dc58750bb71bbfc52c45fd8afe93be1d5d288be7 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/mutex.h>
 #include <linux/mfd/core.h>
@@ -142,7 +143,6 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 
        max8997->irq_base = pdata->irq_base;
        max8997->ono = pdata->ono;
-       max8997->wakeup = pdata->wakeup;
 
        mutex_init(&max8997->iolock);
 
@@ -169,6 +169,9 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
        if (ret < 0)
                goto err_mfd;
 
+       /* MAX8997 has a power button input. */
+       device_init_wakeup(max8997->dev, pdata->wakeup);
+
        return ret;
 
 err_mfd:
@@ -398,7 +401,29 @@ static int max8997_restore(struct device *dev)
        return 0;
 }
 
+static int max8997_suspend(struct device *dev)
+{
+       struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
+       struct max8997_dev *max8997 = i2c_get_clientdata(i2c);
+
+       if (device_may_wakeup(dev))
+               irq_set_irq_wake(max8997->irq, 1);
+       return 0;
+}
+
+static int max8997_resume(struct device *dev)
+{
+       struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
+       struct max8997_dev *max8997 = i2c_get_clientdata(i2c);
+
+       if (device_may_wakeup(dev))
+               irq_set_irq_wake(max8997->irq, 0);
+       return max8997_irq_resume(max8997);
+}
+
 const struct dev_pm_ops max8997_pm = {
+       .suspend = max8997_suspend,
+       .resume = max8997_resume,
        .freeze = max8997_freeze,
        .restore = max8997_restore,
 };
index 7e4d44bf92ab90b10725ac14310782b1b6143e31..e9619acc02375342276d84e5d9aee83e7a2597a8 100644 (file)
@@ -26,20 +26,10 @@ struct mc13xxx {
 
        irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
        void *irqdata[MC13XXX_NUM_IRQ];
-};
-
-struct mc13783 {
-       struct mc13xxx mc13xxx;
 
        int adcflags;
 };
 
-struct mc13xxx *mc13783_to_mc13xxx(struct mc13783 *mc13783)
-{
-       return &mc13783->mc13xxx;
-}
-EXPORT_SYMBOL(mc13783_to_mc13xxx);
-
 #define MC13XXX_IRQSTAT0       0
 #define MC13XXX_IRQSTAT0_ADCDONEI      (1 << 0)
 #define MC13XXX_IRQSTAT0_ADCBISDONEI   (1 << 1)
@@ -136,14 +126,14 @@ EXPORT_SYMBOL(mc13783_to_mc13xxx);
 #define MC13XXX_REVISION_FAB           (0x03 << 11)
 #define MC13XXX_REVISION_ICIDCODE      (0x3f << 13)
 
-#define MC13783_ADC1           44
-#define MC13783_ADC1_ADEN              (1 << 0)
-#define MC13783_ADC1_RAND              (1 << 1)
-#define MC13783_ADC1_ADSEL             (1 << 3)
-#define MC13783_ADC1_ASC               (1 << 20)
-#define MC13783_ADC1_ADTRIGIGN         (1 << 21)
+#define MC13XXX_ADC1           44
+#define MC13XXX_ADC1_ADEN              (1 << 0)
+#define MC13XXX_ADC1_RAND              (1 << 1)
+#define MC13XXX_ADC1_ADSEL             (1 << 3)
+#define MC13XXX_ADC1_ASC               (1 << 20)
+#define MC13XXX_ADC1_ADTRIGIGN         (1 << 21)
 
-#define MC13783_ADC2           45
+#define MC13XXX_ADC2           45
 
 #define MC13XXX_NUMREGS 0x3f
 
@@ -487,7 +477,7 @@ enum mc13xxx_id {
        MC13XXX_ID_INVALID,
 };
 
-const char *mc13xxx_chipname[] = {
+static const char *mc13xxx_chipname[] = {
        [MC13XXX_ID_MC13783] = "mc13783",
        [MC13XXX_ID_MC13892] = "mc13892",
 };
@@ -558,8 +548,6 @@ static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx)
        return mc13xxx_chipname[devid->driver_data];
 }
 
-#include <linux/mfd/mc13783.h>
-
 int mc13xxx_get_flags(struct mc13xxx *mc13xxx)
 {
        struct mc13xxx_platform_data *pdata =
@@ -569,15 +557,15 @@ int mc13xxx_get_flags(struct mc13xxx *mc13xxx)
 }
 EXPORT_SYMBOL(mc13xxx_get_flags);
 
-#define MC13783_ADC1_CHAN0_SHIFT       5
-#define MC13783_ADC1_CHAN1_SHIFT       8
+#define MC13XXX_ADC1_CHAN0_SHIFT       5
+#define MC13XXX_ADC1_CHAN1_SHIFT       8
 
 struct mc13xxx_adcdone_data {
        struct mc13xxx *mc13xxx;
        struct completion done;
 };
 
-static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
+static irqreturn_t mc13xxx_handler_adcdone(int irq, void *data)
 {
        struct mc13xxx_adcdone_data *adcdone_data = data;
 
@@ -588,12 +576,11 @@ static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-#define MC13783_ADC_WORKING (1 << 0)
+#define MC13XXX_ADC_WORKING (1 << 0)
 
-int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
+int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
                unsigned int channel, unsigned int *sample)
 {
-       struct mc13xxx *mc13xxx = &mc13783->mc13xxx;
        u32 adc0, adc1, old_adc0;
        int i, ret;
        struct mc13xxx_adcdone_data adcdone_data = {
@@ -605,51 +592,51 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 
        mc13xxx_lock(mc13xxx);
 
-       if (mc13783->adcflags & MC13783_ADC_WORKING) {
+       if (mc13xxx->adcflags & MC13XXX_ADC_WORKING) {
                ret = -EBUSY;
                goto out;
        }
 
-       mc13783->adcflags |= MC13783_ADC_WORKING;
+       mc13xxx->adcflags |= MC13XXX_ADC_WORKING;
 
-       mc13xxx_reg_read(mc13xxx, MC13783_ADC0, &old_adc0);
+       mc13xxx_reg_read(mc13xxx, MC13XXX_ADC0, &old_adc0);
 
-       adc0 = MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
-       adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN | MC13783_ADC1_ASC;
+       adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2;
+       adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC;
 
        if (channel > 7)
-               adc1 |= MC13783_ADC1_ADSEL;
+               adc1 |= MC13XXX_ADC1_ADSEL;
 
        switch (mode) {
-       case MC13783_ADC_MODE_TS:
-               adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_TSMOD0 |
-                       MC13783_ADC0_TSMOD1;
-               adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+       case MC13XXX_ADC_MODE_TS:
+               adc0 |= MC13XXX_ADC0_ADREFEN | MC13XXX_ADC0_TSMOD0 |
+                       MC13XXX_ADC0_TSMOD1;
+               adc1 |= 4 << MC13XXX_ADC1_CHAN1_SHIFT;
                break;
 
-       case MC13783_ADC_MODE_SINGLE_CHAN:
-               adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
-               adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
-               adc1 |= MC13783_ADC1_RAND;
+       case MC13XXX_ADC_MODE_SINGLE_CHAN:
+               adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+               adc1 |= (channel & 0x7) << MC13XXX_ADC1_CHAN0_SHIFT;
+               adc1 |= MC13XXX_ADC1_RAND;
                break;
 
-       case MC13783_ADC_MODE_MULT_CHAN:
-               adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
-               adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+       case MC13XXX_ADC_MODE_MULT_CHAN:
+               adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+               adc1 |= 4 << MC13XXX_ADC1_CHAN1_SHIFT;
                break;
 
        default:
-               mc13783_unlock(mc13783);
+               mc13xxx_unlock(mc13xxx);
                return -EINVAL;
        }
 
-       dev_dbg(&mc13783->mc13xxx.spidev->dev, "%s: request irq\n", __func__);
-       mc13xxx_irq_request(mc13xxx, MC13783_IRQ_ADCDONE,
-                       mc13783_handler_adcdone, __func__, &adcdone_data);
-       mc13xxx_irq_ack(mc13xxx, MC13783_IRQ_ADCDONE);
+       dev_dbg(&mc13xxx->spidev->dev, "%s: request irq\n", __func__);
+       mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE,
+                       mc13xxx_handler_adcdone, __func__, &adcdone_data);
+       mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE);
 
-       mc13xxx_reg_write(mc13xxx, MC13783_ADC0, adc0);
-       mc13xxx_reg_write(mc13xxx, MC13783_ADC1, adc1);
+       mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0);
+       mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1);
 
        mc13xxx_unlock(mc13xxx);
 
@@ -660,27 +647,27 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 
        mc13xxx_lock(mc13xxx);
 
-       mc13xxx_irq_free(mc13xxx, MC13783_IRQ_ADCDONE, &adcdone_data);
+       mc13xxx_irq_free(mc13xxx, MC13XXX_IRQ_ADCDONE, &adcdone_data);
 
        if (ret > 0)
                for (i = 0; i < 4; ++i) {
                        ret = mc13xxx_reg_read(mc13xxx,
-                                       MC13783_ADC2, &sample[i]);
+                                       MC13XXX_ADC2, &sample[i]);
                        if (ret)
                                break;
                }
 
-       if (mode == MC13783_ADC_MODE_TS)
+       if (mode == MC13XXX_ADC_MODE_TS)
                /* restore TSMOD */
-               mc13xxx_reg_write(mc13xxx, MC13783_ADC0, old_adc0);
+               mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, old_adc0);
 
-       mc13783->adcflags &= ~MC13783_ADC_WORKING;
+       mc13xxx->adcflags &= ~MC13XXX_ADC_WORKING;
 out:
        mc13xxx_unlock(mc13xxx);
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion);
+EXPORT_SYMBOL_GPL(mc13xxx_adc_do_conversion);
 
 static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx,
                const char *format, void *pdata, size_t pdata_size)
@@ -716,6 +703,11 @@ static int mc13xxx_probe(struct spi_device *spi)
        enum mc13xxx_id id;
        int ret;
 
+       if (!pdata) {
+               dev_err(&spi->dev, "invalid platform data\n");
+               return -EINVAL;
+       }
+
        mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL);
        if (!mc13xxx)
                return -ENOMEM;
@@ -763,10 +755,8 @@ err_revision:
        if (pdata->flags & MC13XXX_USE_CODEC)
                mc13xxx_add_subdevice(mc13xxx, "%s-codec");
 
-       if (pdata->flags & MC13XXX_USE_REGULATOR) {
-               mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
-                               &pdata->regulators, sizeof(pdata->regulators));
-       }
+       mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
+               &pdata->regulators, sizeof(pdata->regulators));
 
        if (pdata->flags & MC13XXX_USE_RTC)
                mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
@@ -774,10 +764,14 @@ err_revision:
        if (pdata->flags & MC13XXX_USE_TOUCHSCREEN)
                mc13xxx_add_subdevice(mc13xxx, "%s-ts");
 
-       if (pdata->flags & MC13XXX_USE_LED)
+       if (pdata->leds)
                mc13xxx_add_subdevice_pdata(mc13xxx, "%s-led",
                                pdata->leds, sizeof(*pdata->leds));
 
+       if (pdata->buttons)
+               mc13xxx_add_subdevice_pdata(mc13xxx, "%s-pwrbutton",
+                               pdata->buttons, sizeof(*pdata->buttons));
+
        return 0;
 }
 
index af5d9d0613716177bbd0dd53d6940dd5cbe7f596..cb4910ac4d12edad3ba31be42a3e2be0678fcb8a 100644 (file)
@@ -1226,7 +1226,7 @@ static int menelaus_probe(struct i2c_client *client,
        menelaus_write_reg(MENELAUS_MCT_CTRL1, 0x73);
 
        if (client->irq > 0) {
-               err = request_irq(client->irq, menelaus_irq, IRQF_DISABLED,
+               err = request_irq(client->irq, menelaus_irq, 0,
                                  DRIVER_NAME, menelaus);
                if (err) {
                        dev_dbg(&client->dev,  "can't get IRQ %d, err %d\n",
index 57868416c76069d6e4d5ded2043a2a0403098e7a..ff1a7e741ecdfc6d8eb3d27cacaa47a1b492768a 100644 (file)
 #include <linux/i2c.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
+#include <linux/regmap.h>
+#include <linux/err.h>
 
 #include <linux/mfd/pcf50633/core.h>
 
-static int __pcf50633_read(struct pcf50633 *pcf, u8 reg, int num, u8 *data)
-{
-       int ret;
-
-       ret = i2c_smbus_read_i2c_block_data(pcf->i2c_client, reg,
-                               num, data);
-       if (ret < 0)
-               dev_err(pcf->dev, "Error reading %d regs at %d\n", num, reg);
-
-       return ret;
-}
-
-static int __pcf50633_write(struct pcf50633 *pcf, u8 reg, int num, u8 *data)
-{
-       int ret;
-
-       ret = i2c_smbus_write_i2c_block_data(pcf->i2c_client, reg,
-                               num, data);
-       if (ret < 0)
-               dev_err(pcf->dev, "Error writing %d regs at %d\n", num, reg);
-
-       return ret;
-
-}
-
 /* Read a block of up to 32 regs  */
 int pcf50633_read_block(struct pcf50633 *pcf, u8 reg,
                                        int nr_regs, u8 *data)
 {
        int ret;
 
-       mutex_lock(&pcf->lock);
-       ret = __pcf50633_read(pcf, reg, nr_regs, data);
-       mutex_unlock(&pcf->lock);
+       ret = regmap_raw_read(pcf->regmap, reg, data, nr_regs);
+       if (ret != 0)
+               return ret;
 
-       return ret;
+       return nr_regs;
 }
 EXPORT_SYMBOL_GPL(pcf50633_read_block);
 
@@ -71,21 +48,22 @@ int pcf50633_write_block(struct pcf50633 *pcf , u8 reg,
 {
        int ret;
 
-       mutex_lock(&pcf->lock);
-       ret = __pcf50633_write(pcf, reg, nr_regs, data);
-       mutex_unlock(&pcf->lock);
+       ret = regmap_raw_write(pcf->regmap, reg, data, nr_regs);
+       if (ret != 0)
+               return ret;
 
-       return ret;
+       return nr_regs;
 }
 EXPORT_SYMBOL_GPL(pcf50633_write_block);
 
 u8 pcf50633_reg_read(struct pcf50633 *pcf, u8 reg)
 {
-       u8 val;
+       unsigned int val;
+       int ret;
 
-       mutex_lock(&pcf->lock);
-       __pcf50633_read(pcf, reg, 1, &val);
-       mutex_unlock(&pcf->lock);
+       ret = regmap_read(pcf->regmap, reg, &val);
+       if (ret < 0)
+               return -1;
 
        return val;
 }
@@ -93,56 +71,19 @@ EXPORT_SYMBOL_GPL(pcf50633_reg_read);
 
 int pcf50633_reg_write(struct pcf50633 *pcf, u8 reg, u8 val)
 {
-       int ret;
-
-       mutex_lock(&pcf->lock);
-       ret = __pcf50633_write(pcf, reg, 1, &val);
-       mutex_unlock(&pcf->lock);
-
-       return ret;
+       return regmap_write(pcf->regmap, reg, val);
 }
 EXPORT_SYMBOL_GPL(pcf50633_reg_write);
 
 int pcf50633_reg_set_bit_mask(struct pcf50633 *pcf, u8 reg, u8 mask, u8 val)
 {
-       int ret;
-       u8 tmp;
-
-       val &= mask;
-
-       mutex_lock(&pcf->lock);
-       ret = __pcf50633_read(pcf, reg, 1, &tmp);
-       if (ret < 0)
-               goto out;
-
-       tmp &= ~mask;
-       tmp |= val;
-       ret = __pcf50633_write(pcf, reg, 1, &tmp);
-
-out:
-       mutex_unlock(&pcf->lock);
-
-       return ret;
+       return regmap_update_bits(pcf->regmap, reg, mask, val);
 }
 EXPORT_SYMBOL_GPL(pcf50633_reg_set_bit_mask);
 
 int pcf50633_reg_clear_bits(struct pcf50633 *pcf, u8 reg, u8 val)
 {
-       int ret;
-       u8 tmp;
-
-       mutex_lock(&pcf->lock);
-       ret = __pcf50633_read(pcf, reg, 1, &tmp);
-       if (ret < 0)
-               goto out;
-
-       tmp &= ~val;
-       ret = __pcf50633_write(pcf, reg, 1, &tmp);
-
-out:
-       mutex_unlock(&pcf->lock);
-
-       return ret;
+       return regmap_update_bits(pcf->regmap, reg, val, 0);
 }
 EXPORT_SYMBOL_GPL(pcf50633_reg_clear_bits);
 
@@ -251,6 +192,11 @@ static int pcf50633_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(pcf50633_pm, pcf50633_suspend, pcf50633_resume);
 
+static struct regmap_config pcf50633_regmap_config = {
+       .reg_bits = 8,
+       .val_bits = 8,
+};
+
 static int __devinit pcf50633_probe(struct i2c_client *client,
                                const struct i2c_device_id *ids)
 {
@@ -272,16 +218,23 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
 
        mutex_init(&pcf->lock);
 
+       pcf->regmap = regmap_init_i2c(client, &pcf50633_regmap_config);
+       if (IS_ERR(pcf->regmap)) {
+               ret = PTR_ERR(pcf->regmap);
+               dev_err(pcf->dev, "Failed to allocate register map: %d\n",
+                       ret);
+               goto err_free;
+       }
+
        i2c_set_clientdata(client, pcf);
        pcf->dev = &client->dev;
-       pcf->i2c_client = client;
 
        version = pcf50633_reg_read(pcf, 0);
        variant = pcf50633_reg_read(pcf, 1);
        if (version < 0 || variant < 0) {
                dev_err(pcf->dev, "Unable to probe pcf50633\n");
                ret = -ENODEV;
-               goto err_free;
+               goto err_regmap;
        }
 
        dev_info(pcf->dev, "Probed device version %d variant %d\n",
@@ -328,6 +281,8 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
 
        return 0;
 
+err_regmap:
+       regmap_exit(pcf->regmap);
 err_free:
        kfree(pcf);
 
@@ -351,6 +306,7 @@ static int __devexit pcf50633_remove(struct i2c_client *client)
        for (i = 0; i < PCF50633_NUM_REGULATORS; i++)
                platform_device_unregister(pcf->regulator_pdev[i]);
 
+       regmap_exit(pcf->regmap);
        kfree(pcf);
 
        return 0;
index c27e515b0722b2dfa72263e5bd3725ddd27885fd..de979742c6fc1049cf49f858870034f3885eeb05 100644 (file)
@@ -357,6 +357,7 @@ static int __devexit tc3589x_remove(struct i2c_client *client)
        return 0;
 }
 
+#ifdef CONFIG_PM
 static int tc3589x_suspend(struct device *dev)
 {
        struct tc3589x *tc3589x = dev_get_drvdata(dev);
@@ -387,6 +388,7 @@ static int tc3589x_resume(struct device *dev)
 
 static const SIMPLE_DEV_PM_OPS(tc3589x_dev_pm_ops, tc3589x_suspend,
                                                tc3589x_resume);
+#endif
 
 static const struct i2c_device_id tc3589x_id[] = {
        { "tc3589x", 24 },
index 696879e2eef77b2429fd8b52e77375a5032e45fc..02d65692ceb415a1ae95a863dd660adda46807c9 100644 (file)
@@ -697,7 +697,7 @@ static int __devinit timb_probe(struct pci_dev *dev,
                dev_err(&dev->dev, "The driver supports an older "
                        "version of the FPGA, please update the driver to "
                        "support %d.%d\n", priv->fw.major, priv->fw.minor);
-               goto err_ioremap;
+               goto err_config;
        }
        if (priv->fw.major < TIMB_SUPPORTED_MAJOR ||
                priv->fw.minor < TIMB_REQUIRED_MINOR) {
@@ -705,13 +705,13 @@ static int __devinit timb_probe(struct pci_dev *dev,
                        "please upgrade the FPGA to at least: %d.%d\n",
                        priv->fw.major, priv->fw.minor,
                        TIMB_SUPPORTED_MAJOR, TIMB_REQUIRED_MINOR);
-               goto err_ioremap;
+               goto err_config;
        }
 
        msix_entries = kzalloc(TIMBERDALE_NR_IRQS * sizeof(*msix_entries),
                GFP_KERNEL);
        if (!msix_entries)
-               goto err_ioremap;
+               goto err_config;
 
        for (i = 0; i < TIMBERDALE_NR_IRQS; i++)
                msix_entries[i].entry = i;
@@ -825,6 +825,8 @@ err_mfd:
 err_create_file:
        pci_disable_msix(dev);
 err_msix:
+       kfree(msix_entries);
+err_config:
        iounmap(priv->ctl_membase);
 err_ioremap:
        release_mem_region(priv->ctl_mapbase, CHIPCTLSIZE);
@@ -833,7 +835,6 @@ err_request:
 err_start:
        pci_disable_device(dev);
 err_enable:
-       kfree(msix_entries);
        kfree(priv);
        pci_set_drvdata(dev, NULL);
        return -ENODEV;
index 955bc00e4b20d6038fe6e9e550eb873affd66daf..5fec23a9ac039f34eaf6cf902acbfb6521aa7ab4 100644 (file)
@@ -131,9 +131,6 @@ int tps65912_device_init(struct tps65912 *tps65912)
        if (init_data == NULL)
                return -ENOMEM;
 
-       init_data->irq = pmic_plat_data->irq;
-       init_data->irq_base = pmic_plat_data->irq;
-
        mutex_init(&tps65912->io_mutex);
        dev_set_drvdata(tps65912->dev, tps65912);
 
@@ -153,10 +150,13 @@ int tps65912_device_init(struct tps65912 *tps65912)
        if (ret < 0)
                goto err;
 
+       init_data->irq = pmic_plat_data->irq;
+       init_data->irq_base = pmic_plat_data->irq;
        ret = tps65912_irq_init(tps65912, init_data->irq, init_data);
        if (ret < 0)
                goto err;
 
+       kfree(init_data);
        return ret;
 
 err:
index 01ecfeee6524697d26a08634f705bc28131e41bc..b8eef462737a42222e48c80da56a576960384171 100644 (file)
 #define twl_has_watchdog()        false
 #endif
 
-#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE) ||\
+#if defined(CONFIG_MFD_TWL4030_AUDIO) || defined(CONFIG_MFD_TWL4030_AUDIO_MODULE) ||\
        defined(CONFIG_TWL6040_CORE) || defined(CONFIG_TWL6040_CORE_MODULE)
 #define twl_has_codec()        true
 #else
index 8a7ee3139b86fda3bded6748e572893d37b577fb..f062c8cc6c38f3e40337444b91111646afefde88 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/kthread.h>
 #include <linux/slab.h>
 
 #include <linux/i2c/twl.h>
@@ -278,59 +277,6 @@ static const struct sih sih_modules_twl5031[8] = {
 
 static unsigned twl4030_irq_base;
 
-static struct completion irq_event;
-
-/*
- * This thread processes interrupts reported by the Primary Interrupt Handler.
- */
-static int twl4030_irq_thread(void *data)
-{
-       long irq = (long)data;
-       static unsigned i2c_errors;
-       static const unsigned max_i2c_errors = 100;
-
-
-       current->flags |= PF_NOFREEZE;
-
-       while (!kthread_should_stop()) {
-               int ret;
-               int module_irq;
-               u8 pih_isr;
-
-               /* Wait for IRQ, then read PIH irq status (also blocking) */
-               wait_for_completion_interruptible(&irq_event);
-
-               ret = twl_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
-                                         REG_PIH_ISR_P1);
-               if (ret) {
-                       pr_warning("twl4030: I2C error %d reading PIH ISR\n",
-                                       ret);
-                       if (++i2c_errors >= max_i2c_errors) {
-                               printk(KERN_ERR "Maximum I2C error count"
-                                               " exceeded.  Terminating %s.\n",
-                                               __func__);
-                               break;
-                       }
-                       complete(&irq_event);
-                       continue;
-               }
-
-               /* these handlers deal with the relevant SIH irq status */
-               local_irq_disable();
-               for (module_irq = twl4030_irq_base;
-                               pih_isr;
-                               pih_isr >>= 1, module_irq++) {
-                       if (pih_isr & 0x1)
-                               generic_handle_irq(module_irq);
-               }
-               local_irq_enable();
-
-               enable_irq(irq);
-       }
-
-       return 0;
-}
-
 /*
  * handle_twl4030_pih() is the desc->handle method for the twl4030 interrupt.
  * This is a chained interrupt, so there is no desc->action method for it.
@@ -342,9 +288,25 @@ static int twl4030_irq_thread(void *data)
  */
 static irqreturn_t handle_twl4030_pih(int irq, void *devid)
 {
-       /* Acknowledge, clear *AND* mask the interrupt... */
-       disable_irq_nosync(irq);
-       complete(devid);
+       int             module_irq;
+       irqreturn_t     ret;
+       u8              pih_isr;
+
+       ret = twl_i2c_read_u8(TWL4030_MODULE_PIH, &pih_isr,
+                       REG_PIH_ISR_P1);
+       if (ret) {
+               pr_warning("twl4030: I2C error %d reading PIH ISR\n", ret);
+               return IRQ_NONE;
+       }
+
+       /* these handlers deal with the relevant SIH irq status */
+       for (module_irq = twl4030_irq_base;
+                       pih_isr;
+                       pih_isr >>= 1, module_irq++) {
+               if (pih_isr & 0x1)
+                       handle_nested_irq(module_irq);
+       }
+
        return IRQ_HANDLED;
 }
 /*----------------------------------------------------------------------*/
@@ -460,113 +422,17 @@ static inline void activate_irq(int irq)
 
 /*----------------------------------------------------------------------*/
 
-static DEFINE_SPINLOCK(sih_agent_lock);
-
-static struct workqueue_struct *wq;
-
 struct sih_agent {
        int                     irq_base;
        const struct sih        *sih;
 
        u32                     imr;
        bool                    imr_change_pending;
-       struct work_struct      mask_work;
-
-       u32                     edge_change;
-       struct work_struct      edge_work;
-};
-
-static void twl4030_sih_do_mask(struct work_struct *work)
-{
-       struct sih_agent        *agent;
-       const struct sih        *sih;
-       union {
-               u8      bytes[4];
-               u32     word;
-       }                       imr;
-       int                     status;
 
-       agent = container_of(work, struct sih_agent, mask_work);
-
-       /* see what work we have */
-       spin_lock_irq(&sih_agent_lock);
-       if (agent->imr_change_pending) {
-               sih = agent->sih;
-               /* byte[0] gets overwritten as we write ... */
-               imr.word = cpu_to_le32(agent->imr << 8);
-               agent->imr_change_pending = false;
-       } else
-               sih = NULL;
-       spin_unlock_irq(&sih_agent_lock);
-       if (!sih)
-               return;
-
-       /* write the whole mask ... simpler than subsetting it */
-       status = twl_i2c_write(sih->module, imr.bytes,
-                       sih->mask[irq_line].imr_offset, sih->bytes_ixr);
-       if (status)
-               pr_err("twl4030: %s, %s --> %d\n", __func__,
-                               "write", status);
-}
-
-static void twl4030_sih_do_edge(struct work_struct *work)
-{
-       struct sih_agent        *agent;
-       const struct sih        *sih;
-       u8                      bytes[6];
        u32                     edge_change;
-       int                     status;
-
-       agent = container_of(work, struct sih_agent, edge_work);
-
-       /* see what work we have */
-       spin_lock_irq(&sih_agent_lock);
-       edge_change = agent->edge_change;
-       agent->edge_change = 0;
-       sih = edge_change ? agent->sih : NULL;
-       spin_unlock_irq(&sih_agent_lock);
-       if (!sih)
-               return;
-
-       /* Read, reserving first byte for write scratch.  Yes, this
-        * could be cached for some speedup ... but be careful about
-        * any processor on the other IRQ line, EDR registers are
-        * shared.
-        */
-       status = twl_i2c_read(sih->module, bytes + 1,
-                       sih->edr_offset, sih->bytes_edr);
-       if (status) {
-               pr_err("twl4030: %s, %s --> %d\n", __func__,
-                               "read", status);
-               return;
-       }
-
-       /* Modify only the bits we know must change */
-       while (edge_change) {
-               int             i = fls(edge_change) - 1;
-               struct irq_data *idata = irq_get_irq_data(i + agent->irq_base);
-               int             byte = 1 + (i >> 2);
-               int             off = (i & 0x3) * 2;
-               unsigned int    type;
-
-               bytes[byte] &= ~(0x03 << off);
 
-               type = irqd_get_trigger_type(idata);
-               if (type & IRQ_TYPE_EDGE_RISING)
-                       bytes[byte] |= BIT(off + 1);
-               if (type & IRQ_TYPE_EDGE_FALLING)
-                       bytes[byte] |= BIT(off + 0);
-
-               edge_change &= ~BIT(i);
-       }
-
-       /* Write */
-       status = twl_i2c_write(sih->module, bytes,
-                       sih->edr_offset, sih->bytes_edr);
-       if (status)
-               pr_err("twl4030: %s, %s --> %d\n", __func__,
-                               "write", status);
-}
+       struct mutex            irq_lock;
+};
 
 /*----------------------------------------------------------------------*/
 
@@ -579,50 +445,125 @@ static void twl4030_sih_do_edge(struct work_struct *work)
 
 static void twl4030_sih_mask(struct irq_data *data)
 {
-       struct sih_agent *sih = irq_data_get_irq_chip_data(data);
-       unsigned long flags;
-
-       spin_lock_irqsave(&sih_agent_lock, flags);
-       sih->imr |= BIT(data->irq - sih->irq_base);
-       sih->imr_change_pending = true;
-       queue_work(wq, &sih->mask_work);
-       spin_unlock_irqrestore(&sih_agent_lock, flags);
+       struct sih_agent *agent = irq_data_get_irq_chip_data(data);
+
+       agent->imr |= BIT(data->irq - agent->irq_base);
+       agent->imr_change_pending = true;
 }
 
 static void twl4030_sih_unmask(struct irq_data *data)
 {
-       struct sih_agent *sih = irq_data_get_irq_chip_data(data);
-       unsigned long flags;
-
-       spin_lock_irqsave(&sih_agent_lock, flags);
-       sih->imr &= ~BIT(data->irq - sih->irq_base);
-       sih->imr_change_pending = true;
-       queue_work(wq, &sih->mask_work);
-       spin_unlock_irqrestore(&sih_agent_lock, flags);
+       struct sih_agent *agent = irq_data_get_irq_chip_data(data);
+
+       agent->imr &= ~BIT(data->irq - agent->irq_base);
+       agent->imr_change_pending = true;
 }
 
 static int twl4030_sih_set_type(struct irq_data *data, unsigned trigger)
 {
-       struct sih_agent *sih = irq_data_get_irq_chip_data(data);
-       unsigned long flags;
+       struct sih_agent *agent = irq_data_get_irq_chip_data(data);
 
        if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
                return -EINVAL;
 
-       spin_lock_irqsave(&sih_agent_lock, flags);
-       if (irqd_get_trigger_type(data) != trigger) {
-               sih->edge_change |= BIT(data->irq - sih->irq_base);
-               queue_work(wq, &sih->edge_work);
-       }
-       spin_unlock_irqrestore(&sih_agent_lock, flags);
+       if (irqd_get_trigger_type(data) != trigger)
+               agent->edge_change |= BIT(data->irq - agent->irq_base);
+
        return 0;
 }
 
+static void twl4030_sih_bus_lock(struct irq_data *data)
+{
+       struct sih_agent        *agent = irq_data_get_irq_chip_data(data);
+
+       mutex_lock(&agent->irq_lock);
+}
+
+static void twl4030_sih_bus_sync_unlock(struct irq_data *data)
+{
+       struct sih_agent        *agent = irq_data_get_irq_chip_data(data);
+       const struct sih        *sih = agent->sih;
+       int                     status;
+
+       if (agent->imr_change_pending) {
+               union {
+                       u32     word;
+                       u8      bytes[4];
+               } imr;
+
+               /* byte[0] gets overwriten as we write ... */
+               imr.word = cpu_to_le32(agent->imr << 8);
+               agent->imr_change_pending = false;
+
+               /* write the whole mask ... simpler than subsetting it */
+               status = twl_i2c_write(sih->module, imr.bytes,
+                               sih->mask[irq_line].imr_offset,
+                               sih->bytes_ixr);
+               if (status)
+                       pr_err("twl4030: %s, %s --> %d\n", __func__,
+                                       "write", status);
+       }
+
+       if (agent->edge_change) {
+               u32             edge_change;
+               u8              bytes[6];
+
+               edge_change = agent->edge_change;
+               agent->edge_change = 0;
+
+               /*
+                * Read, reserving first byte for write scratch.  Yes, this
+                * could be cached for some speedup ... but be careful about
+                * any processor on the other IRQ line, EDR registers are
+                * shared.
+                */
+               status = twl_i2c_read(sih->module, bytes + 1,
+                               sih->edr_offset, sih->bytes_edr);
+               if (status) {
+                       pr_err("twl4030: %s, %s --> %d\n", __func__,
+                                       "read", status);
+                       return;
+               }
+
+               /* Modify only the bits we know must change */
+               while (edge_change) {
+                       int             i = fls(edge_change) - 1;
+                       struct irq_data *idata;
+                       int             byte = 1 + (i >> 2);
+                       int             off = (i & 0x3) * 2;
+                       unsigned int    type;
+
+                       idata = irq_get_irq_data(i + agent->irq_base);
+
+                       bytes[byte] &= ~(0x03 << off);
+
+                       type = irqd_get_trigger_type(idata);
+                       if (type & IRQ_TYPE_EDGE_RISING)
+                               bytes[byte] |= BIT(off + 1);
+                       if (type & IRQ_TYPE_EDGE_FALLING)
+                               bytes[byte] |= BIT(off + 0);
+
+                       edge_change &= ~BIT(i);
+               }
+
+               /* Write */
+               status = twl_i2c_write(sih->module, bytes,
+                               sih->edr_offset, sih->bytes_edr);
+               if (status)
+                       pr_err("twl4030: %s, %s --> %d\n", __func__,
+                                       "write", status);
+       }
+
+       mutex_unlock(&agent->irq_lock);
+}
+
 static struct irq_chip twl4030_sih_irq_chip = {
        .name           = "twl4030",
-       .irq_mask       = twl4030_sih_mask,
+       .irq_mask       = twl4030_sih_mask,
        .irq_unmask     = twl4030_sih_unmask,
        .irq_set_type   = twl4030_sih_set_type,
+       .irq_bus_lock   = twl4030_sih_bus_lock,
+       .irq_bus_sync_unlock = twl4030_sih_bus_sync_unlock,
 };
 
 /*----------------------------------------------------------------------*/
@@ -655,9 +596,7 @@ static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
        int isr;
 
        /* reading ISR acks the IRQs, using clear-on-read mode */
-       local_irq_enable();
        isr = sih_read_isr(sih);
-       local_irq_disable();
 
        if (isr < 0) {
                pr_err("twl4030: %s SIH, read ISR error %d\n",
@@ -672,7 +611,7 @@ static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
                isr &= ~BIT(irq);
 
                if (irq < sih->bits)
-                       generic_handle_irq(agent->irq_base + irq);
+                       handle_nested_irq(agent->irq_base + irq);
                else
                        pr_err("twl4030: %s SIH, invalid ISR bit %d\n",
                                sih->name, irq);
@@ -718,15 +657,14 @@ int twl4030_sih_setup(int module)
        agent->irq_base = irq_base;
        agent->sih = sih;
        agent->imr = ~0;
-       INIT_WORK(&agent->mask_work, twl4030_sih_do_mask);
-       INIT_WORK(&agent->edge_work, twl4030_sih_do_edge);
+       mutex_init(&agent->irq_lock);
 
        for (i = 0; i < sih->bits; i++) {
                irq = irq_base + i;
 
+               irq_set_chip_data(irq, agent);
                irq_set_chip_and_handler(irq, &twl4030_sih_irq_chip,
                                         handle_edge_irq);
-               irq_set_chip_data(irq, agent);
                activate_irq(irq);
        }
 
@@ -758,7 +696,6 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
 
        int                     status;
        int                     i;
-       struct task_struct      *task;
 
        /*
         * Mask and clear all TWL4030 interrupts since initially we do
@@ -768,12 +705,6 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
        if (status < 0)
                return status;
 
-       wq = create_singlethread_workqueue("twl4030-irqchip");
-       if (!wq) {
-               pr_err("twl4030: workqueue FAIL\n");
-               return -ESRCH;
-       }
-
        twl4030_irq_base = irq_base;
 
        /* install an irq handler for each of the SIH modules;
@@ -787,6 +718,7 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
        for (i = irq_base; i < irq_end; i++) {
                irq_set_chip_and_handler(i, &twl4030_irq_chip,
                                         handle_simple_irq);
+               irq_set_nested_thread(i, 1);
                activate_irq(i);
        }
        twl4030_irq_next = i;
@@ -801,34 +733,22 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
        }
 
        /* install an irq handler to demultiplex the TWL4030 interrupt */
-
-
-       init_completion(&irq_event);
-
-       status = request_irq(irq_num, handle_twl4030_pih, IRQF_DISABLED,
-                               "TWL4030-PIH", &irq_event);
+       status = request_threaded_irq(irq_num, NULL, handle_twl4030_pih, 0,
+                                       "TWL4030-PIH", NULL);
        if (status < 0) {
                pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status);
                goto fail_rqirq;
        }
 
-       task = kthread_run(twl4030_irq_thread, (void *)(long)irq_num,
-                                                               "twl4030-irq");
-       if (IS_ERR(task)) {
-               pr_err("twl4030: could not create irq %d thread!\n", irq_num);
-               status = PTR_ERR(task);
-               goto fail_kthread;
-       }
        return status;
-fail_kthread:
-       free_irq(irq_num, &irq_event);
 fail_rqirq:
        /* clean up twl4030_sih_setup */
 fail:
-       for (i = irq_base; i < irq_end; i++)
+       for (i = irq_base; i < irq_end; i++) {
+               irq_set_nested_thread(i, 0);
                irq_set_chip_and_handler(i, NULL, NULL);
-       destroy_workqueue(wq);
-       wq = NULL;
+       }
+
        return status;
 }
 
index 7cbf2aa9e64f2fb744dbf717ce90057e08a8eaf2..834f824d3c11075b18098a5c0b64a1abf066cd69 100644 (file)
@@ -740,6 +740,28 @@ static int __devinit twl4030_madc_probe(struct platform_device *pdev)
                        TWL4030_BCI_BCICTL1);
                goto err_i2c;
        }
+
+       /* Check that MADC clock is on */
+       ret = twl_i2c_read_u8(TWL4030_MODULE_INTBR, &regval, TWL4030_REG_GPBR1);
+       if (ret) {
+               dev_err(&pdev->dev, "unable to read reg GPBR1 0x%X\n",
+                               TWL4030_REG_GPBR1);
+               goto err_i2c;
+       }
+
+       /* If MADC clk is not on, turn it on */
+       if (!(regval & TWL4030_GPBR1_MADC_HFCLK_EN)) {
+               dev_info(&pdev->dev, "clk disabled, enabling\n");
+               regval |= TWL4030_GPBR1_MADC_HFCLK_EN;
+               ret = twl_i2c_write_u8(TWL4030_MODULE_INTBR, regval,
+                                      TWL4030_REG_GPBR1);
+               if (ret) {
+                       dev_err(&pdev->dev, "unable to write reg GPBR1 0x%X\n",
+                                       TWL4030_REG_GPBR1);
+                       goto err_i2c;
+               }
+       }
+
        platform_set_drvdata(pdev, madc);
        mutex_init(&madc->lock);
        ret = request_threaded_irq(platform_get_irq(pdev, 0), NULL,
index eb3b5f88e566c73e6599705649c83ec9b2758f2d..deec3ec858bf8125af6e76e282157df5ff5e3826 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/kthread.h>
 #include <linux/i2c/twl.h>
 #include <linux/platform_device.h>
+#include <linux/suspend.h>
 
 #include "twl-core.h"
 
@@ -83,8 +84,48 @@ static int twl6030_interrupt_mapping[24] = {
 /*----------------------------------------------------------------------*/
 
 static unsigned twl6030_irq_base;
+static int twl_irq;
+static bool twl_irq_wake_enabled;
 
 static struct completion irq_event;
+static atomic_t twl6030_wakeirqs = ATOMIC_INIT(0);
+
+static int twl6030_irq_pm_notifier(struct notifier_block *notifier,
+                                  unsigned long pm_event, void *unused)
+{
+       int chained_wakeups;
+
+       switch (pm_event) {
+       case PM_SUSPEND_PREPARE:
+               chained_wakeups = atomic_read(&twl6030_wakeirqs);
+
+               if (chained_wakeups && !twl_irq_wake_enabled) {
+                       if (enable_irq_wake(twl_irq))
+                               pr_err("twl6030 IRQ wake enable failed\n");
+                       else
+                               twl_irq_wake_enabled = true;
+               } else if (!chained_wakeups && twl_irq_wake_enabled) {
+                       disable_irq_wake(twl_irq);
+                       twl_irq_wake_enabled = false;
+               }
+
+               disable_irq(twl_irq);
+               break;
+
+       case PM_POST_SUSPEND:
+               enable_irq(twl_irq);
+               break;
+
+       default:
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block twl6030_irq_pm_notifier_block = {
+       .notifier_call = twl6030_irq_pm_notifier,
+};
 
 /*
  * This thread processes interrupts reported by the Primary Interrupt Handler.
@@ -187,6 +228,16 @@ static inline void activate_irq(int irq)
 #endif
 }
 
+int twl6030_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+       if (on)
+               atomic_inc(&twl6030_wakeirqs);
+       else
+               atomic_dec(&twl6030_wakeirqs);
+
+       return 0;
+}
+
 /*----------------------------------------------------------------------*/
 
 static unsigned twl6030_irq_next;
@@ -318,10 +369,12 @@ int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
        twl6030_irq_chip = dummy_irq_chip;
        twl6030_irq_chip.name = "twl6030";
        twl6030_irq_chip.irq_set_type = NULL;
+       twl6030_irq_chip.irq_set_wake = twl6030_irq_set_wake;
 
        for (i = irq_base; i < irq_end; i++) {
                irq_set_chip_and_handler(i, &twl6030_irq_chip,
                                         handle_simple_irq);
+               irq_set_chip_data(i, (void *)irq_num);
                activate_irq(i);
        }
 
@@ -331,6 +384,14 @@ int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
 
        /* install an irq handler to demultiplex the TWL6030 interrupt */
        init_completion(&irq_event);
+
+       status = request_irq(irq_num, handle_twl6030_pih, 0,
+                               "TWL6030-PIH", &irq_event);
+       if (status < 0) {
+               pr_err("twl6030: could not claim irq%d: %d\n", irq_num, status);
+               goto fail_irq;
+       }
+
        task = kthread_run(twl6030_irq_thread, (void *)irq_num, "twl6030-irq");
        if (IS_ERR(task)) {
                pr_err("twl6030: could not create irq %d thread!\n", irq_num);
@@ -338,17 +399,14 @@ int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
                goto fail_kthread;
        }
 
-       status = request_irq(irq_num, handle_twl6030_pih, IRQF_DISABLED,
-                               "TWL6030-PIH", &irq_event);
-       if (status < 0) {
-               pr_err("twl6030: could not claim irq%d: %d\n", irq_num, status);
-               goto fail_irq;
-       }
+       twl_irq = irq_num;
+       register_pm_notifier(&twl6030_irq_pm_notifier_block);
        return status;
-fail_irq:
-       free_irq(irq_num, &irq_event);
 
 fail_kthread:
+       free_irq(irq_num, &irq_event);
+
+fail_irq:
        for (i = irq_base; i < irq_end; i++)
                irq_set_chip_and_handler(i, NULL, NULL);
        return status;
@@ -356,6 +414,7 @@ fail_kthread:
 
 int twl6030_exit_irq(void)
 {
+       unregister_pm_notifier(&twl6030_irq_pm_notifier_block);
 
        if (twl6030_irq_base) {
                pr_err("twl6030: can't yet clean up IRQs?\n");
index ada1835a54557b31424dc1f93297ffdb45b71a9a..f4747a4a9a93fb8d3c906d5060442117860ddaf7 100644 (file)
@@ -420,12 +420,19 @@ static int wm831x_irq_set_type(struct irq_data *data, unsigned int type)
        switch (type) {
        case IRQ_TYPE_EDGE_BOTH:
                wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_INT_MODE;
+               wm831x->gpio_level[irq] = false;
                break;
        case IRQ_TYPE_EDGE_RISING:
                wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL;
+               wm831x->gpio_level[irq] = false;
                break;
        case IRQ_TYPE_EDGE_FALLING:
                wm831x->gpio_update[irq] = 0x10000;
+               wm831x->gpio_level[irq] = false;
+               break;
+       case IRQ_TYPE_LEVEL_HIGH:
+               wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL;
+               wm831x->gpio_level[irq] = true;
                break;
        default:
                return -EINVAL;
@@ -449,7 +456,7 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 {
        struct wm831x *wm831x = data;
        unsigned int i;
-       int primary, status_addr;
+       int primary, status_addr, ret;
        int status_regs[WM831X_NUM_IRQ_REGS] = { 0 };
        int read[WM831X_NUM_IRQ_REGS] = { 0 };
        int *status;
@@ -507,6 +514,19 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 
                if (*status & wm831x_irqs[i].mask)
                        handle_nested_irq(wm831x->irq_base + i);
+
+               /* Simulate an edge triggered IRQ by polling the input
+                * status.  This is sucky but improves interoperability.
+                */
+               if (primary == WM831X_GP_INT &&
+                   wm831x->gpio_level[i - WM831X_IRQ_GPIO_1]) {
+                       ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
+                       while (ret & 1 << (i - WM831X_IRQ_GPIO_1)) {
+                               handle_nested_irq(wm831x->irq_base + i);
+                               ret = wm831x_reg_read(wm831x,
+                                                     WM831X_GPIO_LEVEL);
+                       }
+               }
        }
 
 out:
@@ -596,8 +616,6 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
                         "No interrupt specified - functionality limited\n");
        }
 
-
-
        /* Enable top level interrupts, we mask at secondary level */
        wm831x_reg_write(wm831x, WM831X_SYSTEM_INTERRUPTS_MASK, 0);
 
index b03be1d4e0caf62741beaa1c2e65dec3e9718b6e..5d6ba132837e8efb5f470d48784d5bb5f9809509 100644 (file)
@@ -217,6 +217,47 @@ static int wm8994_suspend(struct device *dev)
                return 0;
        }
 
+       ret = wm8994_reg_read(wm8994, WM8994_POWER_MANAGEMENT_4);
+       if (ret < 0) {
+               dev_err(dev, "Failed to read power status: %d\n", ret);
+       } else if (ret & (WM8994_AIF2ADCL_ENA | WM8994_AIF2ADCR_ENA |
+                         WM8994_AIF1ADC2L_ENA | WM8994_AIF1ADC2R_ENA |
+                         WM8994_AIF1ADC1L_ENA | WM8994_AIF1ADC1R_ENA)) {
+               dev_dbg(dev, "CODEC still active, ignoring suspend\n");
+               return 0;
+       }
+
+       ret = wm8994_reg_read(wm8994, WM8994_POWER_MANAGEMENT_5);
+       if (ret < 0) {
+               dev_err(dev, "Failed to read power status: %d\n", ret);
+       } else if (ret & (WM8994_AIF2DACL_ENA | WM8994_AIF2DACR_ENA |
+                         WM8994_AIF1DAC2L_ENA | WM8994_AIF1DAC2R_ENA |
+                         WM8994_AIF1DAC1L_ENA | WM8994_AIF1DAC1R_ENA)) {
+               dev_dbg(dev, "CODEC still active, ignoring suspend\n");
+               return 0;
+       }
+
+       switch (wm8994->type) {
+       case WM8958:
+               ret = wm8994_reg_read(wm8994, WM8958_MIC_DETECT_1);
+               if (ret < 0) {
+                       dev_err(dev, "Failed to read power status: %d\n", ret);
+               } else if (ret & WM8958_MICD_ENA) {
+                       dev_dbg(dev, "CODEC still active, ignoring suspend\n");
+                       return 0;
+               }
+               break;
+       default:
+               break;
+       }
+
+       /* Disable LDO pulldowns while the device is suspended if we
+        * don't know that something will be driving them. */
+       if (!wm8994->ldo_ena_always_driven)
+               wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
+                               WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
+                               WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD);
+
        /* GPIO configuration state is saved here since we may be configuring
         * the GPIO alternate functions even if we're not using the gpiolib
         * driver for them.
@@ -286,6 +327,11 @@ static int wm8994_resume(struct device *dev)
        if (ret < 0)
                dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
 
+       /* Disable LDO pulldowns while the device is active */
+       wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
+                       WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
+                       0);
+
        wm8994->suspended = false;
 
        return 0;
@@ -467,8 +513,15 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
                                                pdata->gpio_defaults[i]);
                        }
                }
+
+               wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
        }
 
+       /* Disable LDO pulldowns while the device is active */
+       wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
+                       WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
+                       0);
+
        /* In some system designs where the regulators are not in use,
         * we can achieve a small reduction in leakage currents by
         * floating LDO outputs.  This bit makes no difference if the
index 053d36caf955b698d0c1739c2a2b48ca53107bad..cd41d403c9dfa1ffbc8f46563ba8017d35e2c327 100644 (file)
@@ -151,7 +151,7 @@ MODULE_LICENSE("GPL");
 struct vmballoon_stats {
        unsigned int timer;
 
-       /* allocation statustics */
+       /* allocation statistics */
        unsigned int alloc;
        unsigned int alloc_fail;
        unsigned int sleep_alloc;
@@ -412,6 +412,7 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
        gfp_t flags;
        unsigned int hv_status;
        bool locked = false;
+       flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
 
        do {
                if (!can_sleep)
@@ -419,7 +420,6 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
                else
                        STATS_INC(b->stats.sleep_alloc);
 
-               flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
                page = alloc_page(flags);
                if (!page) {
                        if (!can_sleep)
index 87d5067ba629a13b8ce96e602ac88f5734a45973..cf444b0ca2cc8144367c8232a3075cd006e7d6ae 100644 (file)
@@ -263,7 +263,7 @@ config MMC_WBSD
 
 config MMC_AU1X
        tristate "Alchemy AU1XX0 MMC Card Interface support"
-       depends on SOC_AU1200
+       depends on MIPS_ALCHEMY
        help
          This selects the AMD Alchemy(R) Multimedia card interface.
          If you have a Alchemy platform with a MMC slot, say Y or M here.
index 707bc7dddd226a1d29405efefa2bc2e853f13d1a..5d3b9ae645236dd8b7857a383825694286705b8a 100644 (file)
 #define AU1XMMC_DESCRIPTOR_COUNT 1
 
 /* max DMA seg size: 64KB on Au1100, 4MB on Au1200 */
-#ifdef CONFIG_SOC_AU1100
-#define AU1XMMC_DESCRIPTOR_SIZE 0x0000ffff
-#else  /* Au1200 */
-#define AU1XMMC_DESCRIPTOR_SIZE 0x003fffff
-#endif
+#define AU1100_MMC_DESCRIPTOR_SIZE 0x0000ffff
+#define AU1200_MMC_DESCRIPTOR_SIZE 0x003fffff
 
 #define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \
                     MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \
@@ -127,6 +124,7 @@ struct au1xmmc_host {
 #define HOST_F_XMIT    0x0001
 #define HOST_F_RECV    0x0002
 #define HOST_F_DMA     0x0010
+#define HOST_F_DBDMA   0x0020
 #define HOST_F_ACTIVE  0x0100
 #define HOST_F_STOP    0x1000
 
@@ -151,6 +149,16 @@ struct au1xmmc_host {
 #define DMA_CHANNEL(h) \
        (((h)->flags & HOST_F_XMIT) ? (h)->tx_chan : (h)->rx_chan)
 
+static inline int has_dbdma(void)
+{
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1200:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
 static inline void IRQ_ON(struct au1xmmc_host *host, u32 mask)
 {
        u32 val = au_readl(HOST_CONFIG(host));
@@ -353,14 +361,12 @@ static void au1xmmc_data_complete(struct au1xmmc_host *host, u32 status)
        data->bytes_xfered = 0;
 
        if (!data->error) {
-               if (host->flags & HOST_F_DMA) {
-#ifdef CONFIG_SOC_AU1200       /* DBDMA */
+               if (host->flags & (HOST_F_DMA | HOST_F_DBDMA)) {
                        u32 chan = DMA_CHANNEL(host);
 
                        chan_tab_t *c = *((chan_tab_t **)chan);
                        au1x_dma_chan_t *cp = c->chan_ptr;
                        data->bytes_xfered = cp->ddma_bytecnt;
-#endif
                } else
                        data->bytes_xfered =
                                (data->blocks * data->blksz) - host->pio.len;
@@ -570,11 +576,10 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
 
        host->status = HOST_S_DATA;
 
-       if (host->flags & HOST_F_DMA) {
-#ifdef CONFIG_SOC_AU1200       /* DBDMA */
+       if ((host->flags & (HOST_F_DMA | HOST_F_DBDMA))) {
                u32 channel = DMA_CHANNEL(host);
 
-               /* Start the DMA as soon as the buffer gets something in it */
+               /* Start the DBDMA as soon as the buffer gets something in it */
 
                if (host->flags & HOST_F_RECV) {
                        u32 mask = SD_STATUS_DB | SD_STATUS_NE;
@@ -584,7 +589,6 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
                }
 
                au1xxx_dbdma_start(channel);
-#endif
        }
 }
 
@@ -633,8 +637,7 @@ static int au1xmmc_prepare_data(struct au1xmmc_host *host,
 
        au_writel(data->blksz - 1, HOST_BLKSIZE(host));
 
-       if (host->flags & HOST_F_DMA) {
-#ifdef CONFIG_SOC_AU1200       /* DBDMA */
+       if (host->flags & (HOST_F_DMA | HOST_F_DBDMA)) {
                int i;
                u32 channel = DMA_CHANNEL(host);
 
@@ -663,7 +666,6 @@ static int au1xmmc_prepare_data(struct au1xmmc_host *host,
 
                        datalen -= len;
                }
-#endif
        } else {
                host->pio.index = 0;
                host->pio.offset = 0;
@@ -838,7 +840,6 @@ static irqreturn_t au1xmmc_irq(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_SOC_AU1200
 /* 8bit memory DMA device */
 static dbdev_tab_t au1xmmc_mem_dbdev = {
        .dev_id         = DSCR_CMD0_ALWAYS,
@@ -905,7 +906,7 @@ static int au1xmmc_dbdma_init(struct au1xmmc_host *host)
        au1xxx_dbdma_ring_alloc(host->rx_chan, AU1XMMC_DESCRIPTOR_COUNT);
 
        /* DBDMA is good to go */
-       host->flags |= HOST_F_DMA;
+       host->flags |= HOST_F_DMA | HOST_F_DBDMA;
 
        return 0;
 }
@@ -918,7 +919,6 @@ static void au1xmmc_dbdma_shutdown(struct au1xmmc_host *host)
                au1xxx_dbdma_chan_free(host->rx_chan);
        }
 }
-#endif
 
 static void au1xmmc_enable_sdio_irq(struct mmc_host *mmc, int en)
 {
@@ -997,8 +997,16 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev)
        mmc->f_min =   450000;
        mmc->f_max = 24000000;
 
-       mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE;
-       mmc->max_segs = AU1XMMC_DESCRIPTOR_COUNT;
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1100:
+               mmc->max_seg_size = AU1100_MMC_DESCRIPTOR_SIZE;
+               mmc->max_segs = AU1XMMC_DESCRIPTOR_COUNT;
+               break;
+       case ALCHEMY_CPU_AU1200:
+               mmc->max_seg_size = AU1200_MMC_DESCRIPTOR_SIZE;
+               mmc->max_segs = AU1XMMC_DESCRIPTOR_COUNT;
+               break;
+       }
 
        mmc->max_blk_size = 2048;
        mmc->max_blk_count = 512;
@@ -1028,11 +1036,11 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev)
        tasklet_init(&host->finish_task, au1xmmc_tasklet_finish,
                        (unsigned long)host);
 
-#ifdef CONFIG_SOC_AU1200
-       ret = au1xmmc_dbdma_init(host);
-       if (ret)
-               pr_info(DRIVER_NAME ": DBDMA init failed; using PIO\n");
-#endif
+       if (has_dbdma()) {
+               ret = au1xmmc_dbdma_init(host);
+               if (ret)
+                       pr_info(DRIVER_NAME ": DBDMA init failed; using PIO\n");
+       }
 
 #ifdef CONFIG_LEDS_CLASS
        if (host->platdata && host->platdata->led) {
@@ -1073,9 +1081,8 @@ out5:
        au_writel(0, HOST_CONFIG2(host));
        au_sync();
 
-#ifdef CONFIG_SOC_AU1200
-       au1xmmc_dbdma_shutdown(host);
-#endif
+       if (host->flags & HOST_F_DBDMA)
+               au1xmmc_dbdma_shutdown(host);
 
        tasklet_kill(&host->data_task);
        tasklet_kill(&host->finish_task);
@@ -1120,9 +1127,9 @@ static int __devexit au1xmmc_remove(struct platform_device *pdev)
                tasklet_kill(&host->data_task);
                tasklet_kill(&host->finish_task);
 
-#ifdef CONFIG_SOC_AU1200
-               au1xmmc_dbdma_shutdown(host);
-#endif
+               if (host->flags & HOST_F_DBDMA)
+                       au1xmmc_dbdma_shutdown(host);
+
                au1xmmc_set_power(host, 0);
 
                free_irq(host->irq, host);
@@ -1181,24 +1188,23 @@ static struct platform_driver au1xmmc_driver = {
 
 static int __init au1xmmc_init(void)
 {
-#ifdef CONFIG_SOC_AU1200
-       /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride
-        * of 8 bits.  And since devices are shared, we need to create
-        * our own to avoid freaking out other devices.
-        */
-       memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev);
-       if (!memid)
-               pr_err("au1xmmc: cannot add memory dbdma dev\n");
-#endif
+       if (has_dbdma()) {
+               /* DSCR_CMD0_ALWAYS has a stride of 32 bits, we need a stride
+               * of 8 bits.  And since devices are shared, we need to create
+               * our own to avoid freaking out other devices.
+               */
+               memid = au1xxx_ddma_add_device(&au1xmmc_mem_dbdev);
+               if (!memid)
+                       pr_err("au1xmmc: cannot add memory dbdma\n");
+       }
        return platform_driver_register(&au1xmmc_driver);
 }
 
 static void __exit au1xmmc_exit(void)
 {
-#ifdef CONFIG_SOC_AU1200
-       if (memid)
+       if (has_dbdma() && memid)
                au1xxx_ddma_del_device(memid);
-#endif
+
        platform_driver_unregister(&au1xmmc_driver);
 }
 
index e8ff12396680cab635cb1ee370bd5c656cae9640..101cd31c82207982cf5986390a80f91c1053c535 100644 (file)
@@ -1270,7 +1270,7 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
                }
        } else {
                if (!host->protect_card) {
-                       pr_info"%s: cover is open, "
+                       pr_info("%s: cover is open, "
                                         "card is now inaccessible\n",
                                         mmc_hostname(host->mmc));
                        host->protect_card = 1;
index a90cabd7b84d92eb75c061d02805c0db5d15f614..7e508969239e92060e3e3818840756cf0f9c2a4a 100644 (file)
@@ -182,7 +182,7 @@ ltq_mtd_probe(struct platform_device *pdev)
                parts = ltq_mtd_data->parts;
        }
 
-       err = add_mtd_partitions(ltq_mtd->mtd, parts, nr_parts);
+       err = mtd_device_register(ltq_mtd->mtd, parts, nr_parts);
        if (err) {
                dev_err(&pdev->dev, "failed to add partitions\n");
                goto err_destroy;
@@ -208,7 +208,7 @@ ltq_mtd_remove(struct platform_device *pdev)
 
        if (ltq_mtd) {
                if (ltq_mtd->mtd) {
-                       del_mtd_partitions(ltq_mtd->mtd);
+                       mtd_device_unregister(ltq_mtd->mtd);
                        map_destroy(ltq_mtd->mtd);
                }
                if (ltq_mtd->map->virt)
index f1af2228a1b1ec2e90ee2cb19958a68f0d7bb0da..61086ea3cc6bf7856b138042ea308d8b12af554a 100644 (file)
@@ -1144,7 +1144,7 @@ static void mtdchar_notify_remove(struct mtd_info *mtd)
 
        if (mtd_ino) {
                /* Destroy the inode if it exists */
-               mtd_ino->i_nlink = 0;
+               clear_nlink(mtd_ino);
                iput(mtd_ino);
        }
 }
index 4c3425235adc573edbbd2a77ee6efc661c81f75b..dbfa0f7fb4643bb62d9405c9871170429d064012 100644 (file)
@@ -138,7 +138,7 @@ config MTD_NAND_RICOH
 
 config MTD_NAND_AU1550
        tristate "Au1550/1200 NAND support"
-       depends on SOC_AU1200 || SOC_AU1550
+       depends on MIPS_ALCHEMY
        help
          This enables the driver for the NAND flash controller on the
          AMD/Alchemy 1550 SOC.
index e7767eef4505665279142ba57e13a43e6ac4b35b..fa5736b9286c2060bd6af631f704dd146b192566 100644 (file)
 #include <linux/mtd/partitions.h>
 #include <asm/io.h>
 
-#include <asm/mach-au1x00/au1xxx.h>
+#ifdef CONFIG_MIPS_PB1550
+#include <asm/mach-pb1x00/pb1550.h>
+#elif defined(CONFIG_MIPS_DB1550)
+#include <asm/mach-db1x00/db1x00.h>
+#endif
 #include <asm/mach-db1x00/bcsr.h>
 
 /*
index c34cc1e7c6f6d9b413b899cc66a6356066155d32..b2b9109b6712ef63212600811d3b879687dbc215 100644 (file)
@@ -550,7 +550,7 @@ down:
 /*
  * Get link speed and duplex from the slave's base driver
  * using ethtool. If for some reason the call fails or the
- * values are invalid, fake speed and duplex to 100/Full
+ * values are invalid, set speed and duplex to -1,
  * and return error.
  */
 static int bond_update_speed_duplex(struct slave *slave)
@@ -560,9 +560,8 @@ static int bond_update_speed_duplex(struct slave *slave)
        u32 slave_speed;
        int res;
 
-       /* Fake speed and duplex */
-       slave->speed = SPEED_100;
-       slave->duplex = DUPLEX_FULL;
+       slave->speed = -1;
+       slave->duplex = -1;
 
        res = __ethtool_get_settings(slave_dev, &ecmd);
        if (res < 0)
@@ -1751,16 +1750,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                new_slave->link  = BOND_LINK_DOWN;
        }
 
-       if (bond_update_speed_duplex(new_slave) &&
-           (new_slave->link != BOND_LINK_DOWN)) {
-               pr_warning("%s: Warning: failed to get speed and duplex from %s, assumed to be 100Mb/sec and Full.\n",
-                          bond_dev->name, new_slave->dev->name);
-
-               if (bond->params.mode == BOND_MODE_8023AD) {
-                       pr_warning("%s: Warning: Operation of 802.3ad mode requires ETHTOOL support in base driver for proper aggregator selection.\n",
-                                  bond_dev->name);
-               }
-       }
+       bond_update_speed_duplex(new_slave);
 
        if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
                /* if there is a primary slave, remember it */
@@ -3220,6 +3210,7 @@ static int bond_slave_netdev_event(unsigned long event,
 {
        struct net_device *bond_dev = slave_dev->master;
        struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave = NULL;
 
        switch (event) {
        case NETDEV_UNREGISTER:
@@ -3230,20 +3221,16 @@ static int bond_slave_netdev_event(unsigned long event,
                                bond_release(bond_dev, slave_dev);
                }
                break;
+       case NETDEV_UP:
        case NETDEV_CHANGE:
-               if (bond->params.mode == BOND_MODE_8023AD || bond_is_lb(bond)) {
-                       struct slave *slave;
+               slave = bond_get_slave_by_dev(bond, slave_dev);
+               if (slave) {
+                       u32 old_speed = slave->speed;
+                       u8  old_duplex = slave->duplex;
 
-                       slave = bond_get_slave_by_dev(bond, slave_dev);
-                       if (slave) {
-                               u32 old_speed = slave->speed;
-                               u8  old_duplex = slave->duplex;
-
-                               bond_update_speed_duplex(slave);
-
-                               if (bond_is_lb(bond))
-                                       break;
+                       bond_update_speed_duplex(slave);
 
+                       if (bond->params.mode == BOND_MODE_8023AD) {
                                if (old_speed != slave->speed)
                                        bond_3ad_adapter_speed_changed(slave);
                                if (old_duplex != slave->duplex)
index 95de93b9038675994e66622eff5423ae082583c0..d2ff52e63cbb614ea01c817a1342a6195a4de91d 100644 (file)
@@ -157,8 +157,16 @@ static void bond_info_show_slave(struct seq_file *seq,
        seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
        seq_printf(seq, "MII Status: %s\n",
                   (slave->link == BOND_LINK_UP) ?  "up" : "down");
-       seq_printf(seq, "Speed: %d Mbps\n", slave->speed);
-       seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half");
+       if (slave->speed == -1)
+               seq_printf(seq, "Speed: %s\n", "Unknown");
+       else
+               seq_printf(seq, "Speed: %d Mbps\n", slave->speed);
+
+       if (slave->duplex == -1)
+               seq_printf(seq, "Duplex: %s\n", "Unknown");
+       else
+               seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half");
+
        seq_printf(seq, "Link Failure Count: %u\n",
                   slave->link_failure_count);
 
index 6dff5a0e733f6f56246569f41094f1ceca88a4f5..597f4d45c632944840c0c5949b1f76d8600131e9 100644 (file)
@@ -159,6 +159,7 @@ config S6GMAC
          will be called s6gmac.
 
 source "drivers/net/ethernet/seeq/Kconfig"
+source "drivers/net/ethernet/silan/Kconfig"
 source "drivers/net/ethernet/sis/Kconfig"
 source "drivers/net/ethernet/sfc/Kconfig"
 source "drivers/net/ethernet/sgi/Kconfig"
index c53ad3afc9917e19ceba34d3a99599ae056d34f1..be5dde040261b748e75535e4521cc5974f946b75 100644 (file)
@@ -58,6 +58,7 @@ obj-$(CONFIG_SH_ETH) += renesas/
 obj-$(CONFIG_NET_VENDOR_RDC) += rdc/
 obj-$(CONFIG_S6GMAC) += s6gmac.o
 obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
+obj-$(CONFIG_NET_VENDOR_SILAN) += silan/
 obj-$(CONFIG_NET_VENDOR_SIS) += sis/
 obj-$(CONFIG_SFC) += sfc/
 obj-$(CONFIG_NET_VENDOR_SGI) += sgi/
index 82386677bb8c48f5549d9328fd8e835dc383325e..4865ff14bebf230a9c5a460d2817bd029029493d 100644 (file)
@@ -541,19 +541,17 @@ static void au1000_reset_mac(struct net_device *dev)
  * these are not descriptors sitting in memory.
  */
 static void
-au1000_setup_hw_rings(struct au1000_private *aup, u32 rx_base, u32 tx_base)
+au1000_setup_hw_rings(struct au1000_private *aup, void __iomem *tx_base)
 {
        int i;
 
        for (i = 0; i < NUM_RX_DMA; i++) {
-               aup->rx_dma_ring[i] =
-                       (struct rx_dma *)
-                                       (rx_base + sizeof(struct rx_dma)*i);
+               aup->rx_dma_ring[i] = (struct rx_dma *)
+                       (tx_base + 0x100 + sizeof(struct rx_dma) * i);
        }
        for (i = 0; i < NUM_TX_DMA; i++) {
-               aup->tx_dma_ring[i] =
-                       (struct tx_dma *)
-                                       (tx_base + sizeof(struct tx_dma)*i);
+               aup->tx_dma_ring[i] = (struct tx_dma *)
+                       (tx_base + sizeof(struct tx_dma) * i);
        }
 }
 
@@ -1026,7 +1024,7 @@ static int __devinit au1000_probe(struct platform_device *pdev)
        struct net_device *dev = NULL;
        struct db_dest *pDB, *pDBfree;
        int irq, i, err = 0;
-       struct resource *base, *macen;
+       struct resource *base, *macen, *macdma;
 
        base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!base) {
@@ -1049,6 +1047,13 @@ static int __devinit au1000_probe(struct platform_device *pdev)
                goto out;
        }
 
+       macdma = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+       if (!macdma) {
+               dev_err(&pdev->dev, "failed to retrieve MACDMA registers\n");
+               err = -ENODEV;
+               goto out;
+       }
+
        if (!request_mem_region(base->start, resource_size(base),
                                                        pdev->name)) {
                dev_err(&pdev->dev, "failed to request memory region for base registers\n");
@@ -1063,6 +1068,13 @@ static int __devinit au1000_probe(struct platform_device *pdev)
                goto err_request;
        }
 
+       if (!request_mem_region(macdma->start, resource_size(macdma),
+                                                       pdev->name)) {
+               dev_err(&pdev->dev, "failed to request MACDMA memory region\n");
+               err = -ENXIO;
+               goto err_macdma;
+       }
+
        dev = alloc_etherdev(sizeof(struct au1000_private));
        if (!dev) {
                dev_err(&pdev->dev, "alloc_etherdev failed\n");
@@ -1109,10 +1121,14 @@ static int __devinit au1000_probe(struct platform_device *pdev)
        }
        aup->mac_id = pdev->id;
 
-       if (pdev->id == 0)
-               au1000_setup_hw_rings(aup, MAC0_RX_DMA_ADDR, MAC0_TX_DMA_ADDR);
-       else if (pdev->id == 1)
-               au1000_setup_hw_rings(aup, MAC1_RX_DMA_ADDR, MAC1_TX_DMA_ADDR);
+       aup->macdma = ioremap_nocache(macdma->start, resource_size(macdma));
+       if (!aup->macdma) {
+               dev_err(&pdev->dev, "failed to ioremap MACDMA registers\n");
+               err = -ENXIO;
+               goto err_remap3;
+       }
+
+       au1000_setup_hw_rings(aup, aup->macdma);
 
        /* set a random MAC now in case platform_data doesn't provide one */
        random_ether_addr(dev->dev_addr);
@@ -1252,6 +1268,8 @@ err_out:
 err_mdiobus_reg:
        mdiobus_free(aup->mii_bus);
 err_mdiobus_alloc:
+       iounmap(aup->macdma);
+err_remap3:
        iounmap(aup->enable);
 err_remap2:
        iounmap(aup->mac);
@@ -1261,6 +1279,8 @@ err_remap1:
 err_vaddr:
        free_netdev(dev);
 err_alloc:
+       release_mem_region(macdma->start, resource_size(macdma));
+err_macdma:
        release_mem_region(macen->start, resource_size(macen));
 err_request:
        release_mem_region(base->start, resource_size(base));
@@ -1293,9 +1313,13 @@ static int __devexit au1000_remove(struct platform_device *pdev)
                        (NUM_TX_BUFFS + NUM_RX_BUFFS),
                        (void *)aup->vaddr, aup->dma_addr);
 
+       iounmap(aup->macdma);
        iounmap(aup->mac);
        iounmap(aup->enable);
 
+       base = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+       release_mem_region(base->start, resource_size(base));
+
        base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        release_mem_region(base->start, resource_size(base));
 
index 6229c774552cbe507c05866a8602a9a3e175ee72..4b7f7ad62bb883dd08aae97187eafb25f9a432a8 100644 (file)
@@ -124,7 +124,7 @@ struct au1000_private {
         */
        struct mac_reg *mac;  /* mac registers                      */
        u32 *enable;     /* address of MAC Enable Register     */
-
+       void __iomem *macdma;   /* base of MAC DMA port */
        u32 vaddr;                /* virtual address of rx/tx buffers   */
        dma_addr_t dma_addr;      /* dma address of rx/tx buffers       */
 
index a759d5483ab9372d33630c3c1a756f8fe7b9411a..1375e2dc94687d56bac084ad9901e53037bef31a 100644 (file)
@@ -52,18 +52,6 @@ config BMAC
          To compile this driver as a module, choose M here: the module
          will be called bmac.
 
-config MAC89x0
-       tristate "Macintosh CS89x0 based ethernet cards"
-       depends on MAC
-       ---help---
-         Support for CS89x0 chipset based Ethernet cards.  If you have a
-         Nubus or LC-PDS network (Ethernet) card of this type, say Y and
-         read the Ethernet-HOWTO, available from
-         <http://www.tldp.org/docs.html#howto>.
-
-         To compile this driver as a module, choose M here. This module will
-         be called mac89x0.
-
 config MACMACE
        bool "Macintosh (AV) onboard MACE ethernet"
        depends on MAC
index 0d3a5919c95b494ed7b8c7b70848ac7b3584600e..86eaa17af0f4f5ca8ebed6ce3c492daff57f0d9c 100644 (file)
@@ -4,5 +4,4 @@
 
 obj-$(CONFIG_MACE) += mace.o
 obj-$(CONFIG_BMAC) += bmac.o
-obj-$(CONFIG_MAC89x0) += mac89x0.o
 obj-$(CONFIG_MACMACE) += macmace.o
diff --git a/drivers/net/ethernet/apple/mac89x0.c b/drivers/net/ethernet/apple/mac89x0.c
deleted file mode 100644 (file)
index 83781f3..0000000
+++ /dev/null
@@ -1,634 +0,0 @@
-/* mac89x0.c: A Crystal Semiconductor CS89[02]0 driver for linux. */
-/*
-       Written 1996 by Russell Nelson, with reference to skeleton.c
-       written 1993-1994 by Donald Becker.
-
-       This software may be used and distributed according to the terms
-       of the GNU General Public License, incorporated herein by reference.
-
-       The author may be reached at nelson@crynwr.com, Crynwr
-       Software, 11 Grant St., Potsdam, NY 13676
-
-  Changelog:
-
-  Mike Cruse        : mcruse@cti-ltd.com
-                    : Changes for Linux 2.0 compatibility.
-                    : Added dev_id parameter in net_interrupt(),
-                    : request_irq() and free_irq(). Just NULL for now.
-
-  Mike Cruse        : Added MOD_INC_USE_COUNT and MOD_DEC_USE_COUNT macros
-                    : in net_open() and net_close() so kerneld would know
-                    : that the module is in use and wouldn't eject the
-                    : driver prematurely.
-
-  Mike Cruse        : Rewrote init_module() and cleanup_module using 8390.c
-                    : as an example. Disabled autoprobing in init_module(),
-                    : not a good thing to do to other devices while Linux
-                    : is running from all accounts.
-
-  Alan Cox          : Removed 1.2 support, added 2.1 extra counters.
-
-  David Huggins-Daines <dhd@debian.org>
-
-  Split this off into mac89x0.c, and gutted it of all parts which are
-  not relevant to the existing CS8900 cards on the Macintosh
-  (i.e. basically the Daynaport CS and LC cards).  To be precise:
-
-    * Removed all the media-detection stuff, because these cards are
-    TP-only.
-
-    * Lobotomized the ISA interrupt bogosity, because these cards use
-    a hardwired NuBus interrupt and a magic ISAIRQ value in the card.
-
-    * Basically eliminated everything not relevant to getting the
-    cards minimally functioning on the Macintosh.
-
-  I might add that these cards are badly designed even from the Mac
-  standpoint, in that Dayna, in their infinite wisdom, used NuBus slot
-  I/O space and NuBus interrupts for these cards, but neglected to
-  provide anything even remotely resembling a NuBus ROM.  Therefore we
-  have to probe for them in a brain-damaged ISA-like fashion.
-
-  Arnaldo Carvalho de Melo <acme@conectiva.com.br> - 11/01/2001
-  check kmalloc and release the allocated memory on failure in
-  mac89x0_probe and in init_module
-  use local_irq_{save,restore}(flags) in net_get_stat, not just
-  local_irq_{dis,en}able()
-*/
-
-static char *version =
-"cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
-
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
-#include <linux/module.h>
-
-/*
-  Sources:
-
-       Crynwr packet driver epktisa.
-
-       Crystal Semiconductor data sheets.
-
-*/
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/in.h>
-#include <linux/string.h>
-#include <linux/nubus.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <linux/gfp.h>
-
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/hwtest.h>
-#include <asm/macints.h>
-
-#include "cs89x0.h"
-
-static unsigned int net_debug = NET_DEBUG;
-
-/* Information that need to be kept for each board. */
-struct net_local {
-       int chip_type;          /* one of: CS8900, CS8920, CS8920M */
-       char chip_revision;     /* revision letter of the chip ('A'...) */
-       int send_cmd;           /* the propercommand used to send a packet. */
-       int rx_mode;
-       int curr_rx_cfg;
-        int send_underrun;      /* keep track of how many underruns in a row we get */
-       struct sk_buff *skb;
-};
-
-/* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
-static int net_open(struct net_device *dev);
-static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
-static irqreturn_t net_interrupt(int irq, void *dev_id);
-static void set_multicast_list(struct net_device *dev);
-static void net_rx(struct net_device *dev);
-static int net_close(struct net_device *dev);
-static struct net_device_stats *net_get_stats(struct net_device *dev);
-static int set_mac_address(struct net_device *dev, void *addr);
-
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
-/* For reading/writing registers ISA-style */
-static inline int
-readreg_io(struct net_device *dev, int portno)
-{
-       nubus_writew(swab16(portno), dev->base_addr + ADD_PORT);
-       return swab16(nubus_readw(dev->base_addr + DATA_PORT));
-}
-
-static inline void
-writereg_io(struct net_device *dev, int portno, int value)
-{
-       nubus_writew(swab16(portno), dev->base_addr + ADD_PORT);
-       nubus_writew(swab16(value), dev->base_addr + DATA_PORT);
-}
-
-/* These are for reading/writing registers in shared memory */
-static inline int
-readreg(struct net_device *dev, int portno)
-{
-       return swab16(nubus_readw(dev->mem_start + portno));
-}
-
-static inline void
-writereg(struct net_device *dev, int portno, int value)
-{
-       nubus_writew(swab16(value), dev->mem_start + portno);
-}
-
-static const struct net_device_ops mac89x0_netdev_ops = {
-       .ndo_open               = net_open,
-       .ndo_stop               = net_close,
-       .ndo_start_xmit         = net_send_packet,
-       .ndo_get_stats          = net_get_stats,
-       .ndo_set_rx_mode        = set_multicast_list,
-       .ndo_set_mac_address    = set_mac_address,
-       .ndo_validate_addr      = eth_validate_addr,
-       .ndo_change_mtu         = eth_change_mtu,
-};
-
-/* Probe for the CS8900 card in slot E.  We won't bother looking
-   anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
-{
-       struct net_device *dev;
-       static int once_is_enough;
-       struct net_local *lp;
-       static unsigned version_printed;
-       int i, slot;
-       unsigned rev_type = 0;
-       unsigned long ioaddr;
-       unsigned short sig;
-       int err = -ENODEV;
-
-       if (!MACH_IS_MAC)
-               return ERR_PTR(-ENODEV);
-
-       dev = alloc_etherdev(sizeof(struct net_local));
-       if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       if (once_is_enough)
-               goto out;
-       once_is_enough = 1;
-
-       /* We might have to parameterize this later */
-       slot = 0xE;
-       /* Get out now if there's a real NuBus card in slot E */
-       if (nubus_find_slot(slot, NULL) != NULL)
-               goto out;
-
-       /* The pseudo-ISA bits always live at offset 0x300 (gee,
-           wonder why...) */
-       ioaddr = (unsigned long)
-               nubus_slot_addr(slot) | (((slot&0xf) << 20) + DEFAULTIOBASE);
-       {
-               unsigned long flags;
-               int card_present;
-
-               local_irq_save(flags);
-               card_present = (hwreg_present((void*) ioaddr+4) &&
-                               hwreg_present((void*) ioaddr + DATA_PORT));
-               local_irq_restore(flags);
-
-               if (!card_present)
-                       goto out;
-       }
-
-       nubus_writew(0, ioaddr + ADD_PORT);
-       sig = nubus_readw(ioaddr + DATA_PORT);
-       if (sig != swab16(CHIP_EISA_ID_SIG))
-               goto out;
-
-       /* Initialize the net_device structure. */
-       lp = netdev_priv(dev);
-
-       /* Fill in the 'dev' fields. */
-       dev->base_addr = ioaddr;
-       dev->mem_start = (unsigned long)
-               nubus_slot_addr(slot) | (((slot&0xf) << 20) + MMIOBASE);
-       dev->mem_end = dev->mem_start + 0x1000;
-
-       /* Turn on shared memory */
-       writereg_io(dev, PP_BusCTL, MEMORY_ON);
-
-       /* get the chip type */
-       rev_type = readreg(dev, PRODUCT_ID_ADD);
-       lp->chip_type = rev_type &~ REVISON_BITS;
-       lp->chip_revision = ((rev_type & REVISON_BITS) >> 8) + 'A';
-
-       /* Check the chip type and revision in order to set the correct send command
-       CS8920 revision C and CS8900 revision F can use the faster send. */
-       lp->send_cmd = TX_AFTER_381;
-       if (lp->chip_type == CS8900 && lp->chip_revision >= 'F')
-               lp->send_cmd = TX_NOW;
-       if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
-               lp->send_cmd = TX_NOW;
-
-       if (net_debug && version_printed++ == 0)
-               printk(version);
-
-       printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-              dev->name,
-              lp->chip_type==CS8900?'0':'2',
-              lp->chip_type==CS8920M?"M":"",
-              lp->chip_revision,
-              dev->base_addr);
-
-       /* Try to read the MAC address */
-       if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-               printk("\nmac89x0: No EEPROM, giving up now.\n");
-               goto out1;
-        } else {
-                for (i = 0; i < ETH_ALEN; i += 2) {
-                       /* Big-endian (why??!) */
-                       unsigned short s = readreg(dev, PP_IA + i);
-                        dev->dev_addr[i] = s >> 8;
-                        dev->dev_addr[i+1] = s & 0xff;
-                }
-        }
-
-       dev->irq = SLOT2IRQ(slot);
-
-       /* print the IRQ and ethernet address. */
-
-       printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
-
-       dev->netdev_ops         = &mac89x0_netdev_ops;
-
-       err = register_netdev(dev);
-       if (err)
-               goto out1;
-       return NULL;
-out1:
-       nubus_writew(0, dev->base_addr + ADD_PORT);
-out:
-       free_netdev(dev);
-       return ERR_PTR(err);
-}
-
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-       int reset_start_time;
-
-       writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-       /* wait 30 ms */
-       msleep_interruptible(30);
-
-       /* Wait until the chip is reset */
-       reset_start_time = jiffies;
-       while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-               ;
-}
-#endif
-
-/* Open/initialize the board.  This is called (in the current kernel)
-   sometime after booting when the 'ifconfig' program is run.
-
-   This routine should set everything up anew at each open, even
-   registers that "should" only need to be set once at boot, so that
-   there is non-reboot way to recover if something goes wrong.
-   */
-static int
-net_open(struct net_device *dev)
-{
-       struct net_local *lp = netdev_priv(dev);
-       int i;
-
-       /* Disable the interrupt for now */
-       writereg(dev, PP_BusCTL, readreg(dev, PP_BusCTL) & ~ENABLE_IRQ);
-
-       /* Grab the interrupt */
-       if (request_irq(dev->irq, net_interrupt, 0, "cs89x0", dev))
-               return -EAGAIN;
-
-       /* Set up the IRQ - Apparently magic */
-       if (lp->chip_type == CS8900)
-               writereg(dev, PP_CS8900_ISAINT, 0);
-       else
-               writereg(dev, PP_CS8920_ISAINT, 0);
-
-       /* set the Ethernet address */
-       for (i=0; i < ETH_ALEN/2; i++)
-               writereg(dev, PP_IA+i*2, dev->dev_addr[i*2] | (dev->dev_addr[i*2+1] << 8));
-
-       /* Turn on both receive and transmit operations */
-       writereg(dev, PP_LineCTL, readreg(dev, PP_LineCTL) | SERIAL_RX_ON | SERIAL_TX_ON);
-
-       /* Receive only error free packets addressed to this card */
-       lp->rx_mode = 0;
-       writereg(dev, PP_RxCTL, DEF_RX_ACCEPT);
-
-       lp->curr_rx_cfg = RX_OK_ENBL | RX_CRC_ERROR_ENBL;
-
-       writereg(dev, PP_RxCFG, lp->curr_rx_cfg);
-
-       writereg(dev, PP_TxCFG, TX_LOST_CRS_ENBL | TX_SQE_ERROR_ENBL | TX_OK_ENBL |
-              TX_LATE_COL_ENBL | TX_JBR_ENBL | TX_ANY_COL_ENBL | TX_16_COL_ENBL);
-
-       writereg(dev, PP_BufCFG, READY_FOR_TX_ENBL | RX_MISS_COUNT_OVRFLOW_ENBL |
-                TX_COL_COUNT_OVRFLOW_ENBL | TX_UNDERRUN_ENBL);
-
-       /* now that we've got our act together, enable everything */
-       writereg(dev, PP_BusCTL, readreg(dev, PP_BusCTL) | ENABLE_IRQ);
-       netif_start_queue(dev);
-       return 0;
-}
-
-static int
-net_send_packet(struct sk_buff *skb, struct net_device *dev)
-{
-       struct net_local *lp = netdev_priv(dev);
-       unsigned long flags;
-
-       if (net_debug > 3)
-               printk("%s: sent %d byte packet of type %x\n",
-                      dev->name, skb->len,
-                      (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                      | skb->data[ETH_ALEN+ETH_ALEN+1]);
-
-       /* keep the upload from being interrupted, since we
-          ask the chip to start transmitting before the
-          whole packet has been completely uploaded. */
-       local_irq_save(flags);
-       netif_stop_queue(dev);
-
-       /* initiate a transmit sequence */
-       writereg(dev, PP_TxCMD, lp->send_cmd);
-       writereg(dev, PP_TxLength, skb->len);
-
-       /* Test to see if the chip has allocated memory for the packet */
-       if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {
-               /* Gasp!  It hasn't.  But that shouldn't happen since
-                  we're waiting for TxOk, so return 1 and requeue this packet. */
-               local_irq_restore(flags);
-               return NETDEV_TX_BUSY;
-       }
-
-       /* Write the contents of the packet */
-       skb_copy_from_linear_data(skb, (void *)(dev->mem_start + PP_TxFrame),
-                                 skb->len+1);
-
-       local_irq_restore(flags);
-       dev_kfree_skb (skb);
-
-       return NETDEV_TX_OK;
-}
-
-/* The typical workload of the driver:
-   Handle the network interface interrupts. */
-static irqreturn_t net_interrupt(int irq, void *dev_id)
-{
-       struct net_device *dev = dev_id;
-       struct net_local *lp;
-       int ioaddr, status;
-
-       if (dev == NULL) {
-               printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-               return IRQ_NONE;
-       }
-
-       ioaddr = dev->base_addr;
-       lp = netdev_priv(dev);
-
-       /* we MUST read all the events out of the ISQ, otherwise we'll never
-           get interrupted again.  As a consequence, we can't have any limit
-           on the number of times we loop in the interrupt handler.  The
-           hardware guarantees that eventually we'll run out of events.  Of
-           course, if you're on a slow machine, and packets are arriving
-           faster than you can read them off, you're screwed.  Hasta la
-           vista, baby!  */
-       while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-               if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
-               switch(status & ISQ_EVENT_MASK) {
-               case ISQ_RECEIVER_EVENT:
-                       /* Got a packet(s). */
-                       net_rx(dev);
-                       break;
-               case ISQ_TRANSMITTER_EVENT:
-                       dev->stats.tx_packets++;
-                       netif_wake_queue(dev);
-                       if ((status & TX_OK) == 0)
-                               dev->stats.tx_errors++;
-                       if (status & TX_LOST_CRS)
-                               dev->stats.tx_carrier_errors++;
-                       if (status & TX_SQE_ERROR)
-                               dev->stats.tx_heartbeat_errors++;
-                       if (status & TX_LATE_COL)
-                               dev->stats.tx_window_errors++;
-                       if (status & TX_16_COL)
-                               dev->stats.tx_aborted_errors++;
-                       break;
-               case ISQ_BUFFER_EVENT:
-                       if (status & READY_FOR_TX) {
-                               /* we tried to transmit a packet earlier,
-                                   but inexplicably ran out of buffers.
-                                   That shouldn't happen since we only ever
-                                   load one packet.  Shrug.  Do the right
-                                   thing anyway. */
-                               netif_wake_queue(dev);
-                       }
-                       if (status & TX_UNDERRUN) {
-                               if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
-                                lp->send_underrun++;
-                                if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
-                                else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
-                        }
-                       break;
-               case ISQ_RX_MISS_EVENT:
-                       dev->stats.rx_missed_errors += (status >> 6);
-                       break;
-               case ISQ_TX_COL_EVENT:
-                       dev->stats.collisions += (status >> 6);
-                       break;
-               }
-       }
-       return IRQ_HANDLED;
-}
-
-/* We have a good packet(s), get it/them out of the buffers. */
-static void
-net_rx(struct net_device *dev)
-{
-       struct sk_buff *skb;
-       int status, length;
-
-       status = readreg(dev, PP_RxStatus);
-       if ((status & RX_OK) == 0) {
-               dev->stats.rx_errors++;
-               if (status & RX_RUNT)
-                               dev->stats.rx_length_errors++;
-               if (status & RX_EXTRA_DATA)
-                               dev->stats.rx_length_errors++;
-               if ((status & RX_CRC_ERROR) &&
-                   !(status & (RX_EXTRA_DATA|RX_RUNT)))
-                       /* per str 172 */
-                       dev->stats.rx_crc_errors++;
-               if (status & RX_DRIBBLE)
-                               dev->stats.rx_frame_errors++;
-               return;
-       }
-
-       length = readreg(dev, PP_RxLength);
-       /* Malloc up new buffer. */
-       skb = alloc_skb(length, GFP_ATOMIC);
-       if (skb == NULL) {
-               printk("%s: Memory squeeze, dropping packet.\n", dev->name);
-               dev->stats.rx_dropped++;
-               return;
-       }
-       skb_put(skb, length);
-
-       skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
-                               length);
-
-       if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                                | skb->data[ETH_ALEN+ETH_ALEN+1]);
-
-        skb->protocol=eth_type_trans(skb,dev);
-       netif_rx(skb);
-       dev->stats.rx_packets++;
-       dev->stats.rx_bytes += length;
-}
-
-/* The inverse routine to net_open(). */
-static int
-net_close(struct net_device *dev)
-{
-
-       writereg(dev, PP_RxCFG, 0);
-       writereg(dev, PP_TxCFG, 0);
-       writereg(dev, PP_BufCFG, 0);
-       writereg(dev, PP_BusCTL, 0);
-
-       netif_stop_queue(dev);
-
-       free_irq(dev->irq, dev);
-
-       /* Update the statistics here. */
-
-       return 0;
-
-}
-
-/* Get the current statistics. This may be called with the card open or
-   closed. */
-static struct net_device_stats *
-net_get_stats(struct net_device *dev)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       /* Update the statistics from the device registers. */
-       dev->stats.rx_missed_errors += (readreg(dev, PP_RxMiss) >> 6);
-       dev->stats.collisions += (readreg(dev, PP_TxCol) >> 6);
-       local_irq_restore(flags);
-
-       return &dev->stats;
-}
-
-static void set_multicast_list(struct net_device *dev)
-{
-       struct net_local *lp = netdev_priv(dev);
-
-       if(dev->flags&IFF_PROMISC)
-       {
-               lp->rx_mode = RX_ALL_ACCEPT;
-       } else if ((dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev)) {
-               /* The multicast-accept list is initialized to accept-all, and we
-                  rely on higher-level filtering for now. */
-               lp->rx_mode = RX_MULTCAST_ACCEPT;
-       }
-       else
-               lp->rx_mode = 0;
-
-       writereg(dev, PP_RxCTL, DEF_RX_ACCEPT | lp->rx_mode);
-
-       /* in promiscuous mode, we accept errored packets, so we have to enable interrupts on them also */
-       writereg(dev, PP_RxCFG, lp->curr_rx_cfg |
-            (lp->rx_mode == RX_ALL_ACCEPT? (RX_CRC_ERROR_ENBL|RX_RUNT_ENBL|RX_EXTRA_DATA_ENBL) : 0));
-}
-
-
-static int set_mac_address(struct net_device *dev, void *addr)
-{
-       int i;
-       printk("%s: Setting MAC address to ", dev->name);
-       for (i = 0; i < 6; i++)
-               printk(" %2.2x", dev->dev_addr[i] = ((unsigned char *)addr)[i]);
-       printk(".\n");
-       /* set the Ethernet address */
-       for (i=0; i < ETH_ALEN/2; i++)
-               writereg(dev, PP_IA+i*2, dev->dev_addr[i*2] | (dev->dev_addr[i*2+1] << 8));
-
-       return 0;
-}
-
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
-MODULE_LICENSE("GPL");
-
-int __init
-init_module(void)
-{
-       net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-       if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-               return PTR_ERR(dev_cs89x0);
-       }
-       return 0;
-}
-
-void
-cleanup_module(void)
-{
-       unregister_netdev(dev_cs89x0);
-       nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-       free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
index 6cbb81ccc02ef6bd0a85ab13bbc5ca2cb91b6ec4..1f8648f099c7328d3c8334375d324b413ddb3543 100644 (file)
@@ -6,7 +6,7 @@ config NET_VENDOR_CIRRUS
        bool "Cirrus devices"
        default y
        depends on ISA || EISA || MACH_IXDP2351 || ARCH_IXDP2X01 \
-               || MACH_MX31ADS || MACH_QQ2440 || (ARM && ARCH_EP93XX)
+               || MACH_MX31ADS || MACH_QQ2440 || (ARM && ARCH_EP93XX) || MAC
        ---help---
          If you have a network (Ethernet) card belonging to this class, say Y
          and read the Ethernet-HOWTO, available from
@@ -47,4 +47,16 @@ config EP93XX_ETH
          This is a driver for the ethernet hardware included in EP93xx CPUs.
          Say Y if you are building a kernel for EP93xx based devices.
 
+config MAC89x0
+       tristate "Macintosh CS89x0 based ethernet cards"
+       depends on MAC
+       ---help---
+         Support for CS89x0 chipset based Ethernet cards.  If you have a
+         Nubus or LC-PDS network (Ethernet) card of this type, say Y and
+         read the Ethernet-HOWTO, available from
+         <http://www.tldp.org/docs.html#howto>.
+
+         To compile this driver as a module, choose M here. This module will
+         be called mac89x0.
+
 endif # NET_VENDOR_CIRRUS
index 14bd77e0cb57a464094d04856fa50a5fb5a279fc..ca245e2b5d985fbe8fccc95086891e8f61ec1068 100644 (file)
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_CS89x0) += cs89x0.o
 obj-$(CONFIG_EP93XX_ETH) += ep93xx_eth.o
+obj-$(CONFIG_MAC89x0) += mac89x0.o
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
new file mode 100644 (file)
index 0000000..83781f3
--- /dev/null
@@ -0,0 +1,634 @@
+/* mac89x0.c: A Crystal Semiconductor CS89[02]0 driver for linux. */
+/*
+       Written 1996 by Russell Nelson, with reference to skeleton.c
+       written 1993-1994 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       The author may be reached at nelson@crynwr.com, Crynwr
+       Software, 11 Grant St., Potsdam, NY 13676
+
+  Changelog:
+
+  Mike Cruse        : mcruse@cti-ltd.com
+                    : Changes for Linux 2.0 compatibility.
+                    : Added dev_id parameter in net_interrupt(),
+                    : request_irq() and free_irq(). Just NULL for now.
+
+  Mike Cruse        : Added MOD_INC_USE_COUNT and MOD_DEC_USE_COUNT macros
+                    : in net_open() and net_close() so kerneld would know
+                    : that the module is in use and wouldn't eject the
+                    : driver prematurely.
+
+  Mike Cruse        : Rewrote init_module() and cleanup_module using 8390.c
+                    : as an example. Disabled autoprobing in init_module(),
+                    : not a good thing to do to other devices while Linux
+                    : is running from all accounts.
+
+  Alan Cox          : Removed 1.2 support, added 2.1 extra counters.
+
+  David Huggins-Daines <dhd@debian.org>
+
+  Split this off into mac89x0.c, and gutted it of all parts which are
+  not relevant to the existing CS8900 cards on the Macintosh
+  (i.e. basically the Daynaport CS and LC cards).  To be precise:
+
+    * Removed all the media-detection stuff, because these cards are
+    TP-only.
+
+    * Lobotomized the ISA interrupt bogosity, because these cards use
+    a hardwired NuBus interrupt and a magic ISAIRQ value in the card.
+
+    * Basically eliminated everything not relevant to getting the
+    cards minimally functioning on the Macintosh.
+
+  I might add that these cards are badly designed even from the Mac
+  standpoint, in that Dayna, in their infinite wisdom, used NuBus slot
+  I/O space and NuBus interrupts for these cards, but neglected to
+  provide anything even remotely resembling a NuBus ROM.  Therefore we
+  have to probe for them in a brain-damaged ISA-like fashion.
+
+  Arnaldo Carvalho de Melo <acme@conectiva.com.br> - 11/01/2001
+  check kmalloc and release the allocated memory on failure in
+  mac89x0_probe and in init_module
+  use local_irq_{save,restore}(flags) in net_get_stat, not just
+  local_irq_{dis,en}able()
+*/
+
+static char *version =
+"cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
+
+/* ======================= configure the driver here ======================= */
+
+/* use 0 for production, 1 for verification, >2 for debug */
+#ifndef NET_DEBUG
+#define NET_DEBUG 0
+#endif
+
+/* ======================= end of configuration ======================= */
+
+
+/* Always include 'config.h' first in case the user wants to turn on
+   or override something. */
+#include <linux/module.h>
+
+/*
+  Sources:
+
+       Crynwr packet driver epktisa.
+
+       Crystal Semiconductor data sheets.
+
+*/
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/in.h>
+#include <linux/string.h>
+#include <linux/nubus.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/gfp.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/hwtest.h>
+#include <asm/macints.h>
+
+#include "cs89x0.h"
+
+static unsigned int net_debug = NET_DEBUG;
+
+/* Information that need to be kept for each board. */
+struct net_local {
+       int chip_type;          /* one of: CS8900, CS8920, CS8920M */
+       char chip_revision;     /* revision letter of the chip ('A'...) */
+       int send_cmd;           /* the propercommand used to send a packet. */
+       int rx_mode;
+       int curr_rx_cfg;
+        int send_underrun;      /* keep track of how many underruns in a row we get */
+       struct sk_buff *skb;
+};
+
+/* Index to functions, as function prototypes. */
+
+#if 0
+extern void reset_chip(struct net_device *dev);
+#endif
+static int net_open(struct net_device *dev);
+static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
+static irqreturn_t net_interrupt(int irq, void *dev_id);
+static void set_multicast_list(struct net_device *dev);
+static void net_rx(struct net_device *dev);
+static int net_close(struct net_device *dev);
+static struct net_device_stats *net_get_stats(struct net_device *dev);
+static int set_mac_address(struct net_device *dev, void *addr);
+
+
+/* Example routines you must write ;->. */
+#define tx_done(dev) 1
+
+/* For reading/writing registers ISA-style */
+static inline int
+readreg_io(struct net_device *dev, int portno)
+{
+       nubus_writew(swab16(portno), dev->base_addr + ADD_PORT);
+       return swab16(nubus_readw(dev->base_addr + DATA_PORT));
+}
+
+static inline void
+writereg_io(struct net_device *dev, int portno, int value)
+{
+       nubus_writew(swab16(portno), dev->base_addr + ADD_PORT);
+       nubus_writew(swab16(value), dev->base_addr + DATA_PORT);
+}
+
+/* These are for reading/writing registers in shared memory */
+static inline int
+readreg(struct net_device *dev, int portno)
+{
+       return swab16(nubus_readw(dev->mem_start + portno));
+}
+
+static inline void
+writereg(struct net_device *dev, int portno, int value)
+{
+       nubus_writew(swab16(value), dev->mem_start + portno);
+}
+
+static const struct net_device_ops mac89x0_netdev_ops = {
+       .ndo_open               = net_open,
+       .ndo_stop               = net_close,
+       .ndo_start_xmit         = net_send_packet,
+       .ndo_get_stats          = net_get_stats,
+       .ndo_set_rx_mode        = set_multicast_list,
+       .ndo_set_mac_address    = set_mac_address,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_change_mtu         = eth_change_mtu,
+};
+
+/* Probe for the CS8900 card in slot E.  We won't bother looking
+   anywhere else until we have a really good reason to do so. */
+struct net_device * __init mac89x0_probe(int unit)
+{
+       struct net_device *dev;
+       static int once_is_enough;
+       struct net_local *lp;
+       static unsigned version_printed;
+       int i, slot;
+       unsigned rev_type = 0;
+       unsigned long ioaddr;
+       unsigned short sig;
+       int err = -ENODEV;
+
+       if (!MACH_IS_MAC)
+               return ERR_PTR(-ENODEV);
+
+       dev = alloc_etherdev(sizeof(struct net_local));
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+
+       if (unit >= 0) {
+               sprintf(dev->name, "eth%d", unit);
+               netdev_boot_setup_check(dev);
+       }
+
+       if (once_is_enough)
+               goto out;
+       once_is_enough = 1;
+
+       /* We might have to parameterize this later */
+       slot = 0xE;
+       /* Get out now if there's a real NuBus card in slot E */
+       if (nubus_find_slot(slot, NULL) != NULL)
+               goto out;
+
+       /* The pseudo-ISA bits always live at offset 0x300 (gee,
+           wonder why...) */
+       ioaddr = (unsigned long)
+               nubus_slot_addr(slot) | (((slot&0xf) << 20) + DEFAULTIOBASE);
+       {
+               unsigned long flags;
+               int card_present;
+
+               local_irq_save(flags);
+               card_present = (hwreg_present((void*) ioaddr+4) &&
+                               hwreg_present((void*) ioaddr + DATA_PORT));
+               local_irq_restore(flags);
+
+               if (!card_present)
+                       goto out;
+       }
+
+       nubus_writew(0, ioaddr + ADD_PORT);
+       sig = nubus_readw(ioaddr + DATA_PORT);
+       if (sig != swab16(CHIP_EISA_ID_SIG))
+               goto out;
+
+       /* Initialize the net_device structure. */
+       lp = netdev_priv(dev);
+
+       /* Fill in the 'dev' fields. */
+       dev->base_addr = ioaddr;
+       dev->mem_start = (unsigned long)
+               nubus_slot_addr(slot) | (((slot&0xf) << 20) + MMIOBASE);
+       dev->mem_end = dev->mem_start + 0x1000;
+
+       /* Turn on shared memory */
+       writereg_io(dev, PP_BusCTL, MEMORY_ON);
+
+       /* get the chip type */
+       rev_type = readreg(dev, PRODUCT_ID_ADD);
+       lp->chip_type = rev_type &~ REVISON_BITS;
+       lp->chip_revision = ((rev_type & REVISON_BITS) >> 8) + 'A';
+
+       /* Check the chip type and revision in order to set the correct send command
+       CS8920 revision C and CS8900 revision F can use the faster send. */
+       lp->send_cmd = TX_AFTER_381;
+       if (lp->chip_type == CS8900 && lp->chip_revision >= 'F')
+               lp->send_cmd = TX_NOW;
+       if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
+               lp->send_cmd = TX_NOW;
+
+       if (net_debug && version_printed++ == 0)
+               printk(version);
+
+       printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
+              dev->name,
+              lp->chip_type==CS8900?'0':'2',
+              lp->chip_type==CS8920M?"M":"",
+              lp->chip_revision,
+              dev->base_addr);
+
+       /* Try to read the MAC address */
+       if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
+               printk("\nmac89x0: No EEPROM, giving up now.\n");
+               goto out1;
+        } else {
+                for (i = 0; i < ETH_ALEN; i += 2) {
+                       /* Big-endian (why??!) */
+                       unsigned short s = readreg(dev, PP_IA + i);
+                        dev->dev_addr[i] = s >> 8;
+                        dev->dev_addr[i+1] = s & 0xff;
+                }
+        }
+
+       dev->irq = SLOT2IRQ(slot);
+
+       /* print the IRQ and ethernet address. */
+
+       printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+
+       dev->netdev_ops         = &mac89x0_netdev_ops;
+
+       err = register_netdev(dev);
+       if (err)
+               goto out1;
+       return NULL;
+out1:
+       nubus_writew(0, dev->base_addr + ADD_PORT);
+out:
+       free_netdev(dev);
+       return ERR_PTR(err);
+}
+
+#if 0
+/* This is useful for something, but I don't know what yet. */
+void __init reset_chip(struct net_device *dev)
+{
+       int reset_start_time;
+
+       writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
+
+       /* wait 30 ms */
+       msleep_interruptible(30);
+
+       /* Wait until the chip is reset */
+       reset_start_time = jiffies;
+       while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
+               ;
+}
+#endif
+
+/* Open/initialize the board.  This is called (in the current kernel)
+   sometime after booting when the 'ifconfig' program is run.
+
+   This routine should set everything up anew at each open, even
+   registers that "should" only need to be set once at boot, so that
+   there is non-reboot way to recover if something goes wrong.
+   */
+static int
+net_open(struct net_device *dev)
+{
+       struct net_local *lp = netdev_priv(dev);
+       int i;
+
+       /* Disable the interrupt for now */
+       writereg(dev, PP_BusCTL, readreg(dev, PP_BusCTL) & ~ENABLE_IRQ);
+
+       /* Grab the interrupt */
+       if (request_irq(dev->irq, net_interrupt, 0, "cs89x0", dev))
+               return -EAGAIN;
+
+       /* Set up the IRQ - Apparently magic */
+       if (lp->chip_type == CS8900)
+               writereg(dev, PP_CS8900_ISAINT, 0);
+       else
+               writereg(dev, PP_CS8920_ISAINT, 0);
+
+       /* set the Ethernet address */
+       for (i=0; i < ETH_ALEN/2; i++)
+               writereg(dev, PP_IA+i*2, dev->dev_addr[i*2] | (dev->dev_addr[i*2+1] << 8));
+
+       /* Turn on both receive and transmit operations */
+       writereg(dev, PP_LineCTL, readreg(dev, PP_LineCTL) | SERIAL_RX_ON | SERIAL_TX_ON);
+
+       /* Receive only error free packets addressed to this card */
+       lp->rx_mode = 0;
+       writereg(dev, PP_RxCTL, DEF_RX_ACCEPT);
+
+       lp->curr_rx_cfg = RX_OK_ENBL | RX_CRC_ERROR_ENBL;
+
+       writereg(dev, PP_RxCFG, lp->curr_rx_cfg);
+
+       writereg(dev, PP_TxCFG, TX_LOST_CRS_ENBL | TX_SQE_ERROR_ENBL | TX_OK_ENBL |
+              TX_LATE_COL_ENBL | TX_JBR_ENBL | TX_ANY_COL_ENBL | TX_16_COL_ENBL);
+
+       writereg(dev, PP_BufCFG, READY_FOR_TX_ENBL | RX_MISS_COUNT_OVRFLOW_ENBL |
+                TX_COL_COUNT_OVRFLOW_ENBL | TX_UNDERRUN_ENBL);
+
+       /* now that we've got our act together, enable everything */
+       writereg(dev, PP_BusCTL, readreg(dev, PP_BusCTL) | ENABLE_IRQ);
+       netif_start_queue(dev);
+       return 0;
+}
+
+static int
+net_send_packet(struct sk_buff *skb, struct net_device *dev)
+{
+       struct net_local *lp = netdev_priv(dev);
+       unsigned long flags;
+
+       if (net_debug > 3)
+               printk("%s: sent %d byte packet of type %x\n",
+                      dev->name, skb->len,
+                      (skb->data[ETH_ALEN+ETH_ALEN] << 8)
+                      | skb->data[ETH_ALEN+ETH_ALEN+1]);
+
+       /* keep the upload from being interrupted, since we
+          ask the chip to start transmitting before the
+          whole packet has been completely uploaded. */
+       local_irq_save(flags);
+       netif_stop_queue(dev);
+
+       /* initiate a transmit sequence */
+       writereg(dev, PP_TxCMD, lp->send_cmd);
+       writereg(dev, PP_TxLength, skb->len);
+
+       /* Test to see if the chip has allocated memory for the packet */
+       if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {
+               /* Gasp!  It hasn't.  But that shouldn't happen since
+                  we're waiting for TxOk, so return 1 and requeue this packet. */
+               local_irq_restore(flags);
+               return NETDEV_TX_BUSY;
+       }
+
+       /* Write the contents of the packet */
+       skb_copy_from_linear_data(skb, (void *)(dev->mem_start + PP_TxFrame),
+                                 skb->len+1);
+
+       local_irq_restore(flags);
+       dev_kfree_skb (skb);
+
+       return NETDEV_TX_OK;
+}
+
+/* The typical workload of the driver:
+   Handle the network interface interrupts. */
+static irqreturn_t net_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = dev_id;
+       struct net_local *lp;
+       int ioaddr, status;
+
+       if (dev == NULL) {
+               printk ("net_interrupt(): irq %d for unknown device.\n", irq);
+               return IRQ_NONE;
+       }
+
+       ioaddr = dev->base_addr;
+       lp = netdev_priv(dev);
+
+       /* we MUST read all the events out of the ISQ, otherwise we'll never
+           get interrupted again.  As a consequence, we can't have any limit
+           on the number of times we loop in the interrupt handler.  The
+           hardware guarantees that eventually we'll run out of events.  Of
+           course, if you're on a slow machine, and packets are arriving
+           faster than you can read them off, you're screwed.  Hasta la
+           vista, baby!  */
+       while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
+               if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+               switch(status & ISQ_EVENT_MASK) {
+               case ISQ_RECEIVER_EVENT:
+                       /* Got a packet(s). */
+                       net_rx(dev);
+                       break;
+               case ISQ_TRANSMITTER_EVENT:
+                       dev->stats.tx_packets++;
+                       netif_wake_queue(dev);
+                       if ((status & TX_OK) == 0)
+                               dev->stats.tx_errors++;
+                       if (status & TX_LOST_CRS)
+                               dev->stats.tx_carrier_errors++;
+                       if (status & TX_SQE_ERROR)
+                               dev->stats.tx_heartbeat_errors++;
+                       if (status & TX_LATE_COL)
+                               dev->stats.tx_window_errors++;
+                       if (status & TX_16_COL)
+                               dev->stats.tx_aborted_errors++;
+                       break;
+               case ISQ_BUFFER_EVENT:
+                       if (status & READY_FOR_TX) {
+                               /* we tried to transmit a packet earlier,
+                                   but inexplicably ran out of buffers.
+                                   That shouldn't happen since we only ever
+                                   load one packet.  Shrug.  Do the right
+                                   thing anyway. */
+                               netif_wake_queue(dev);
+                       }
+                       if (status & TX_UNDERRUN) {
+                               if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+                                lp->send_underrun++;
+                                if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
+                                else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
+                        }
+                       break;
+               case ISQ_RX_MISS_EVENT:
+                       dev->stats.rx_missed_errors += (status >> 6);
+                       break;
+               case ISQ_TX_COL_EVENT:
+                       dev->stats.collisions += (status >> 6);
+                       break;
+               }
+       }
+       return IRQ_HANDLED;
+}
+
+/* We have a good packet(s), get it/them out of the buffers. */
+static void
+net_rx(struct net_device *dev)
+{
+       struct sk_buff *skb;
+       int status, length;
+
+       status = readreg(dev, PP_RxStatus);
+       if ((status & RX_OK) == 0) {
+               dev->stats.rx_errors++;
+               if (status & RX_RUNT)
+                               dev->stats.rx_length_errors++;
+               if (status & RX_EXTRA_DATA)
+                               dev->stats.rx_length_errors++;
+               if ((status & RX_CRC_ERROR) &&
+                   !(status & (RX_EXTRA_DATA|RX_RUNT)))
+                       /* per str 172 */
+                       dev->stats.rx_crc_errors++;
+               if (status & RX_DRIBBLE)
+                               dev->stats.rx_frame_errors++;
+               return;
+       }
+
+       length = readreg(dev, PP_RxLength);
+       /* Malloc up new buffer. */
+       skb = alloc_skb(length, GFP_ATOMIC);
+       if (skb == NULL) {
+               printk("%s: Memory squeeze, dropping packet.\n", dev->name);
+               dev->stats.rx_dropped++;
+               return;
+       }
+       skb_put(skb, length);
+
+       skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
+                               length);
+
+       if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
+                                 dev->name, length,
+                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
+                                | skb->data[ETH_ALEN+ETH_ALEN+1]);
+
+        skb->protocol=eth_type_trans(skb,dev);
+       netif_rx(skb);
+       dev->stats.rx_packets++;
+       dev->stats.rx_bytes += length;
+}
+
+/* The inverse routine to net_open(). */
+static int
+net_close(struct net_device *dev)
+{
+
+       writereg(dev, PP_RxCFG, 0);
+       writereg(dev, PP_TxCFG, 0);
+       writereg(dev, PP_BufCFG, 0);
+       writereg(dev, PP_BusCTL, 0);
+
+       netif_stop_queue(dev);
+
+       free_irq(dev->irq, dev);
+
+       /* Update the statistics here. */
+
+       return 0;
+
+}
+
+/* Get the current statistics. This may be called with the card open or
+   closed. */
+static struct net_device_stats *
+net_get_stats(struct net_device *dev)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       /* Update the statistics from the device registers. */
+       dev->stats.rx_missed_errors += (readreg(dev, PP_RxMiss) >> 6);
+       dev->stats.collisions += (readreg(dev, PP_TxCol) >> 6);
+       local_irq_restore(flags);
+
+       return &dev->stats;
+}
+
+static void set_multicast_list(struct net_device *dev)
+{
+       struct net_local *lp = netdev_priv(dev);
+
+       if(dev->flags&IFF_PROMISC)
+       {
+               lp->rx_mode = RX_ALL_ACCEPT;
+       } else if ((dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev)) {
+               /* The multicast-accept list is initialized to accept-all, and we
+                  rely on higher-level filtering for now. */
+               lp->rx_mode = RX_MULTCAST_ACCEPT;
+       }
+       else
+               lp->rx_mode = 0;
+
+       writereg(dev, PP_RxCTL, DEF_RX_ACCEPT | lp->rx_mode);
+
+       /* in promiscuous mode, we accept errored packets, so we have to enable interrupts on them also */
+       writereg(dev, PP_RxCFG, lp->curr_rx_cfg |
+            (lp->rx_mode == RX_ALL_ACCEPT? (RX_CRC_ERROR_ENBL|RX_RUNT_ENBL|RX_EXTRA_DATA_ENBL) : 0));
+}
+
+
+static int set_mac_address(struct net_device *dev, void *addr)
+{
+       int i;
+       printk("%s: Setting MAC address to ", dev->name);
+       for (i = 0; i < 6; i++)
+               printk(" %2.2x", dev->dev_addr[i] = ((unsigned char *)addr)[i]);
+       printk(".\n");
+       /* set the Ethernet address */
+       for (i=0; i < ETH_ALEN/2; i++)
+               writereg(dev, PP_IA+i*2, dev->dev_addr[i*2] | (dev->dev_addr[i*2+1] << 8));
+
+       return 0;
+}
+
+#ifdef MODULE
+
+static struct net_device *dev_cs89x0;
+static int debug;
+
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
+MODULE_LICENSE("GPL");
+
+int __init
+init_module(void)
+{
+       net_debug = debug;
+        dev_cs89x0 = mac89x0_probe(-1);
+       if (IS_ERR(dev_cs89x0)) {
+                printk(KERN_WARNING "mac89x0.c: No card found\n");
+               return PTR_ERR(dev_cs89x0);
+       }
+       return 0;
+}
+
+void
+cleanup_module(void)
+{
+       unregister_netdev(dev_cs89x0);
+       nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
+       free_netdev(dev_cs89x0);
+}
+#endif /* MODULE */
index 824b8e6021f6eff17299350a1e6e9af05380b448..2c7b36673dfc27bbae1c94a033ff702a2a928be4 100644 (file)
@@ -318,8 +318,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
 
                if (msecs > 4000) {
                        dev_err(&adapter->pdev->dev, "mbox poll timed out\n");
-                       if (!lancer_chip(adapter))
-                               be_detect_dump_ue(adapter);
+                       be_detect_dump_ue(adapter);
                        return -1;
                }
 
@@ -1540,7 +1539,14 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
 
                req->if_flags_mask = req->if_flags =
                                cpu_to_le32(BE_IF_FLAGS_MULTICAST);
-               req->mcast_num = cpu_to_le16(netdev_mc_count(adapter->netdev));
+
+               /* Reset mcast promisc mode if already set by setting mask
+                * and not setting flags field
+                */
+               req->if_flags_mask |=
+                               cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS);
+
+               req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev));
                netdev_for_each_mc_addr(ha, adapter->netdev)
                        memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
        }
index fbc8a915519e57aa36bb2f01f781475a5530ada1..f2c89e3ccabde3bdbae75296f882eca41df18322 100644 (file)
@@ -48,6 +48,8 @@
 /* Lancer SLIPORT_CONTROL SLIPORT_STATUS registers */
 #define SLIPORT_STATUS_OFFSET          0x404
 #define SLIPORT_CONTROL_OFFSET         0x408
+#define SLIPORT_ERROR1_OFFSET          0x40C
+#define SLIPORT_ERROR2_OFFSET          0x410
 
 #define SLIPORT_STATUS_ERR_MASK                0x80000000
 #define SLIPORT_STATUS_RN_MASK         0x01000000
index 21804972fa2f91cc2278823c096f379403a51218..e0aed188c57fd09b714766f0c96aa9e2c3601f43 100644 (file)
@@ -1905,6 +1905,8 @@ loop_continue:
                be_rx_stats_update(rxo, rxcp);
        }
 
+       be_cq_notify(adapter, rx_cq->id, false, work_done);
+
        /* Refill the queue */
        if (work_done && atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM)
                be_post_rx_frags(rxo, GFP_ATOMIC);
@@ -1912,10 +1914,8 @@ loop_continue:
        /* All consumed */
        if (work_done < budget) {
                napi_complete(napi);
-               be_cq_notify(adapter, rx_cq->id, true, work_done);
-       } else {
-               /* More to be consumed; continue with interrupts disabled */
-               be_cq_notify(adapter, rx_cq->id, false, work_done);
+               /* Arm CQ */
+               be_cq_notify(adapter, rx_cq->id, true, 0);
        }
        return work_done;
 }
@@ -1977,42 +1977,62 @@ static int be_poll_tx_mcc(struct napi_struct *napi, int budget)
 
 void be_detect_dump_ue(struct be_adapter *adapter)
 {
-       u32 ue_status_lo, ue_status_hi, ue_status_lo_mask, ue_status_hi_mask;
+       u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
+       u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
        u32 i;
 
-       pci_read_config_dword(adapter->pdev,
-                               PCICFG_UE_STATUS_LOW, &ue_status_lo);
-       pci_read_config_dword(adapter->pdev,
-                               PCICFG_UE_STATUS_HIGH, &ue_status_hi);
-       pci_read_config_dword(adapter->pdev,
-                               PCICFG_UE_STATUS_LOW_MASK, &ue_status_lo_mask);
-       pci_read_config_dword(adapter->pdev,
-                               PCICFG_UE_STATUS_HI_MASK, &ue_status_hi_mask);
+       if (lancer_chip(adapter)) {
+               sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
+               if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
+                       sliport_err1 = ioread32(adapter->db +
+                                       SLIPORT_ERROR1_OFFSET);
+                       sliport_err2 = ioread32(adapter->db +
+                                       SLIPORT_ERROR2_OFFSET);
+               }
+       } else {
+               pci_read_config_dword(adapter->pdev,
+                               PCICFG_UE_STATUS_LOW, &ue_lo);
+               pci_read_config_dword(adapter->pdev,
+                               PCICFG_UE_STATUS_HIGH, &ue_hi);
+               pci_read_config_dword(adapter->pdev,
+                               PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask);
+               pci_read_config_dword(adapter->pdev,
+                               PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
 
-       ue_status_lo = (ue_status_lo & (~ue_status_lo_mask));
-       ue_status_hi = (ue_status_hi & (~ue_status_hi_mask));
+               ue_lo = (ue_lo & (~ue_lo_mask));
+               ue_hi = (ue_hi & (~ue_hi_mask));
+       }
 
-       if (ue_status_lo || ue_status_hi) {
+       if (ue_lo || ue_hi ||
+               sliport_status & SLIPORT_STATUS_ERR_MASK) {
                adapter->ue_detected = true;
                adapter->eeh_err = true;
                dev_err(&adapter->pdev->dev, "UE Detected!!\n");
        }
 
-       if (ue_status_lo) {
-               for (i = 0; ue_status_lo; ue_status_lo >>= 1, i++) {
-                       if (ue_status_lo & 1)
+       if (ue_lo) {
+               for (i = 0; ue_lo; ue_lo >>= 1, i++) {
+                       if (ue_lo & 1)
                                dev_err(&adapter->pdev->dev,
                                "UE: %s bit set\n", ue_status_low_desc[i]);
                }
        }
-       if (ue_status_hi) {
-               for (i = 0; ue_status_hi; ue_status_hi >>= 1, i++) {
-                       if (ue_status_hi & 1)
+       if (ue_hi) {
+               for (i = 0; ue_hi; ue_hi >>= 1, i++) {
+                       if (ue_hi & 1)
                                dev_err(&adapter->pdev->dev,
                                "UE: %s bit set\n", ue_status_hi_desc[i]);
                }
        }
 
+       if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
+               dev_err(&adapter->pdev->dev,
+                       "sliport status 0x%x\n", sliport_status);
+               dev_err(&adapter->pdev->dev,
+                       "sliport error1 0x%x\n", sliport_err1);
+               dev_err(&adapter->pdev->dev,
+                       "sliport error2 0x%x\n", sliport_err2);
+       }
 }
 
 static void be_worker(struct work_struct *work)
@@ -2022,7 +2042,7 @@ static void be_worker(struct work_struct *work)
        struct be_rx_obj *rxo;
        int i;
 
-       if (!adapter->ue_detected && !lancer_chip(adapter))
+       if (!adapter->ue_detected)
                be_detect_dump_ue(adapter);
 
        /* when interrupts are not yet enabled, just reap any pending
index 40e1a175fcebcc5668cb9e137336dfd1cdc21f42..ba82a266051ddfa6a22a84412f79b1f64455de16 100644 (file)
  *
  *********************************************************/
 
-#define filename __FILE__
-
 #define timeout_msg "*** timeout at %s:%s (line %d) ***\n"
 #define TIMEOUT_MSG(lineno) \
-       pr_notice(timeout_msg, filename, __func__, (lineno))
+       pr_notice(timeout_msg, __FILE__, __func__, (lineno))
 
 #define invalid_pcb_msg "*** invalid pcb length %d at %s:%s (line %d) ***\n"
 #define INVALID_PCB_MSG(len) \
-       pr_notice(invalid_pcb_msg, (len), filename, __func__, __LINE__)
+       pr_notice(invalid_pcb_msg, (len), __FILE__, __func__, __LINE__)
 
 #define search_msg "%s: Looking for 3c505 adapter at address %#x..."
 
index ae17cd1a907fb80749227cb79f6c6c407a5fc3be..5a2fdf7a00c8bbbe9a8f9f7a3a7abf8fd1be180f 100644 (file)
@@ -2810,6 +2810,10 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 
        e100_get_defaults(nic);
 
+       /* D100 MAC doesn't allow rx of vlan packets with normal MTU */
+       if (nic->mac < mac_82558_D101_A4)
+               netdev->features |= NETIF_F_VLAN_CHALLENGED;
+
        /* locks must be initialized before calling hw_reset */
        spin_lock_init(&nic->cb_lock);
        spin_lock_init(&nic->cmd_lock);
index 6a17c62cb86f5ad5210d9b5a1452b00995e7da7d..e2a80a283fd313e0e76fa87a8470c09b37415f27 100644 (file)
@@ -866,8 +866,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 
        if (test_and_set_bit(__E1000_ACCESS_SHARED_RESOURCE,
                             &hw->adapter->state)) {
-               WARN(1, "e1000e: %s: contention for Phy access\n",
-                    hw->adapter->netdev->name);
+               e_dbg("contention for Phy access\n");
                return -E1000_ERR_PHY;
        }
 
index 7edf31efe756a45bc56844eb672bd025a1dd7fb6..b17d7c20f8177816434f914f15635b3624361d5e 100644 (file)
@@ -1687,7 +1687,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
                if (ret_val)
                        goto out;
 
-               is_cm = !(phy_data & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+               is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
 
                /* Populate the phy structure with cable length in meters */
                phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
index 834f044be4c3d7fe15febecca9fcd8d8d44c0a59..f1365fef4ed2b7b4e9e3050dce1715ac41c63edc 100644 (file)
@@ -3344,7 +3344,7 @@ static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
 static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
                                        u32 length)
 {
-       u32 hicr, i;
+       u32 hicr, i, bi;
        u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
        u8 buf_len, dword_len;
 
@@ -3398,9 +3398,9 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
        dword_len = hdr_size >> 2;
 
        /* first pull in the header so we know the buffer length */
-       for (i = 0; i < dword_len; i++) {
-               buffer[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
-               le32_to_cpus(&buffer[i]);
+       for (bi = 0; bi < dword_len; bi++) {
+               buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+               le32_to_cpus(&buffer[bi]);
        }
 
        /* If there is any thing in data position pull it in */
@@ -3414,12 +3414,14 @@ static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
                goto out;
        }
 
-       /* Calculate length in DWORDs, add one for odd lengths */
-       dword_len = (buf_len + 1) >> 2;
+       /* Calculate length in DWORDs, add 3 for odd lengths */
+       dword_len = (buf_len + 3) >> 2;
 
-       /* Pull in the rest of the buffer (i is where we left off)*/
-       for (; i < buf_len; i++)
-               buffer[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+       /* Pull in the rest of the buffer (bi is where we left off)*/
+       for (; bi <= dword_len; bi++) {
+               buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+               le32_to_cpus(&buffer[bi]);
+       }
 
 out:
        return ret_val;
index 3631d639d86a596cf023f3f7f9474508c10ebc93..33b93ffb87cb1e348a16115913b6d5d1ef0623a5 100644 (file)
@@ -561,11 +561,12 @@ static int ixgbe_dcbnl_ieee_getets(struct net_device *dev,
        struct ixgbe_adapter *adapter = netdev_priv(dev);
        struct ieee_ets *my_ets = adapter->ixgbe_ieee_ets;
 
+       ets->ets_cap = adapter->dcb_cfg.num_tcs.pg_tcs;
+
        /* No IEEE PFC settings available */
        if (!my_ets)
-               return -EINVAL;
+               return 0;
 
-       ets->ets_cap = adapter->dcb_cfg.num_tcs.pg_tcs;
        ets->cbs = my_ets->cbs;
        memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw));
        memcpy(ets->tc_rx_bw, my_ets->tc_rx_bw, sizeof(ets->tc_rx_bw));
@@ -621,11 +622,12 @@ static int ixgbe_dcbnl_ieee_getpfc(struct net_device *dev,
        struct ieee_pfc *my_pfc = adapter->ixgbe_ieee_pfc;
        int i;
 
+       pfc->pfc_cap = adapter->dcb_cfg.num_tcs.pfc_tcs;
+
        /* No IEEE PFC settings available */
        if (!my_pfc)
-               return -EINVAL;
+               return 0;
 
-       pfc->pfc_cap = adapter->dcb_cfg.num_tcs.pfc_tcs;
        pfc->pfc_en = my_pfc->pfc_en;
        pfc->mbc = my_pfc->mbc;
        pfc->delay = my_pfc->delay;
index 09b8e88b299940071d49ae3e349133c0178adb62..8ef92d1a6aa126037c3ce9d1049a7fde3f766790 100644 (file)
@@ -3345,34 +3345,25 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
 
        hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
 
-       /* reconfigure the hardware */
-       if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) {
 #ifdef IXGBE_FCOE
-               if (adapter->netdev->features & NETIF_F_FCOE_MTU)
-                       max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
+       if (adapter->netdev->features & NETIF_F_FCOE_MTU)
+               max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
 #endif
+
+       /* reconfigure the hardware */
+       if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) {
                ixgbe_dcb_calculate_tc_credits(hw, &adapter->dcb_cfg, max_frame,
                                                DCB_TX_CONFIG);
                ixgbe_dcb_calculate_tc_credits(hw, &adapter->dcb_cfg, max_frame,
                                                DCB_RX_CONFIG);
                ixgbe_dcb_hw_config(hw, &adapter->dcb_cfg);
-       } else {
-               struct net_device *dev = adapter->netdev;
-
-               if (adapter->ixgbe_ieee_ets) {
-                       struct ieee_ets *ets = adapter->ixgbe_ieee_ets;
-                       int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
-
-                       ixgbe_dcb_hw_ets(&adapter->hw, ets, max_frame);
-               }
-
-               if (adapter->ixgbe_ieee_pfc) {
-                       struct ieee_pfc *pfc = adapter->ixgbe_ieee_pfc;
-                       u8 *prio_tc = adapter->ixgbe_ieee_ets->prio_tc;
-
-                       ixgbe_dcb_hw_pfc_config(&adapter->hw, pfc->pfc_en,
-                                               prio_tc);
-               }
+       } else if (adapter->ixgbe_ieee_ets && adapter->ixgbe_ieee_pfc) {
+               ixgbe_dcb_hw_ets(&adapter->hw,
+                                adapter->ixgbe_ieee_ets,
+                                max_frame);
+               ixgbe_dcb_hw_pfc_config(&adapter->hw,
+                                       adapter->ixgbe_ieee_pfc->pfc_en,
+                                       adapter->ixgbe_ieee_ets->prio_tc);
        }
 
        /* Enable RSS Hash per TC */
@@ -6125,7 +6116,6 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
        autoneg = hw->phy.autoneg_advertised;
        if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
                hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiation);
-       hw->mac.autotry_restart = false;
        if (hw->mac.ops.setup_link)
                hw->mac.ops.setup_link(hw, autoneg, negotiation, true);
 
@@ -7589,13 +7579,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
                goto err_eeprom;
        }
 
-       /* power down the optics for multispeed fiber and 82599 SFP+ fiber */
-       if (hw->mac.ops.disable_tx_laser &&
-           ((hw->phy.multispeed_fiber) ||
-            ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
-             (hw->mac.type == ixgbe_mac_82599EB))))
-               hw->mac.ops.disable_tx_laser(hw);
-
        setup_timer(&adapter->service_timer, &ixgbe_service_timer,
                    (unsigned long) adapter);
 
@@ -7693,6 +7676,13 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
        if (err)
                goto err_register;
 
+       /* power down the optics for multispeed fiber and 82599 SFP+ fiber */
+       if (hw->mac.ops.disable_tx_laser &&
+           ((hw->phy.multispeed_fiber) ||
+            ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
+             (hw->mac.type == ixgbe_mac_82599EB))))
+               hw->mac.ops.disable_tx_laser(hw);
+
        /* carrier off reporting is important to ethtool even BEFORE open */
        netif_carrier_off(netdev);
 
index 5a7e1eb3359996b5930b0e4cd5808e911b34ed21..4a5d8897faab47ec93358c7a7d16c4b44d983b83 100644 (file)
@@ -42,10 +42,12 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
                            int vf, struct ifla_vf_info *ivi);
 void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
+#ifdef CONFIG_PCI_IOV
 void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
 void ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
                        const struct ixgbe_info *ii);
 int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
+#endif
 
 
 #endif /* _IXGBE_SRIOV_H_ */
index 5e92cc2079bd2f64de50a3f361042f4f521db6f3..4c8e19951d57e58d53dedd3249c72d976413d020 100644 (file)
@@ -54,7 +54,7 @@ char ixgbevf_driver_name[] = "ixgbevf";
 static const char ixgbevf_driver_string[] =
        "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver";
 
-#define DRV_VERSION "2.1.0-k"
+#define DRV_VERSION "2.2.0-k"
 const char ixgbevf_driver_version[] = DRV_VERSION;
 static char ixgbevf_copyright[] =
        "Copyright (c) 2009 - 2010 Intel Corporation.";
index 84083ec6e612774c1c026c48295e216db85a3bf6..0578859a3c73e6ab4952a4b925c64ffee07fab7b 100644 (file)
@@ -115,16 +115,4 @@ config R8169
          To compile this driver as a module, choose M here: the module
          will be called r8169.  This is recommended.
 
-config SC92031
-       tristate "Silan SC92031 PCI Fast Ethernet Adapter driver (EXPERIMENTAL)"
-       depends on PCI && EXPERIMENTAL
-       select CRC32
-       ---help---
-         This is a driver for the Fast Ethernet PCI network cards based on
-         the Silan SC92031 chip (sometimes also called Rsltek 8139D). If you
-         have one of these, say Y here.
-
-         To compile this driver as a module, choose M here: the module
-         will be called sc92031.  This is recommended.
-
 endif # NET_VENDOR_REALTEK
index e48cfb6ac42defd17a2a9a69cc9b52094bc1cc07..71b1da30ecb5b380cab2cfb0baf6b372b4a75fc0 100644 (file)
@@ -6,4 +6,3 @@ obj-$(CONFIG_8139CP) += 8139cp.o
 obj-$(CONFIG_8139TOO) += 8139too.o
 obj-$(CONFIG_ATP) += atp.o
 obj-$(CONFIG_R8169) += r8169.o
-obj-$(CONFIG_SC92031) += sc92031.o
diff --git a/drivers/net/ethernet/realtek/sc92031.c b/drivers/net/ethernet/realtek/sc92031.c
deleted file mode 100644 (file)
index a284d64..0000000
+++ /dev/null
@@ -1,1609 +0,0 @@
-/*  Silan SC92031 PCI Fast Ethernet Adapter driver
- *
- *  Based on vendor drivers:
- *  Silan Fast Ethernet Netcard Driver:
- *    MODULE_AUTHOR ("gaoyonghong");
- *    MODULE_DESCRIPTION ("SILAN Fast Ethernet driver");
- *    MODULE_LICENSE("GPL");
- *  8139D Fast Ethernet driver:
- *    (C) 2002 by gaoyonghong
- *    MODULE_AUTHOR ("gaoyonghong");
- *    MODULE_DESCRIPTION ("Rsltek 8139D PCI Fast Ethernet Adapter driver");
- *    MODULE_LICENSE("GPL");
- *  Both are almost identical and seem to be based on pci-skeleton.c
- *
- *  Rewritten for 2.6 by Cesar Eduardo Barros
- *
- *  A datasheet for this chip can be found at
- *  http://www.silan.com.cn/english/product/pdf/SC92031AY.pdf 
- */
-
-/* Note about set_mac_address: I don't know how to change the hardware
- * matching, so you need to enable IFF_PROMISC when using it.
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/crc32.h>
-
-#include <asm/irq.h>
-
-#define SC92031_NAME "sc92031"
-
-/* BAR 0 is MMIO, BAR 1 is PIO */
-#ifndef SC92031_USE_BAR
-#define SC92031_USE_BAR 0
-#endif
-
-/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). */
-static int multicast_filter_limit = 64;
-module_param(multicast_filter_limit, int, 0);
-MODULE_PARM_DESC(multicast_filter_limit,
-       "Maximum number of filtered multicast addresses");
-
-static int media;
-module_param(media, int, 0);
-MODULE_PARM_DESC(media, "Media type (0x00 = autodetect,"
-       " 0x01 = 10M half, 0x02 = 10M full,"
-       " 0x04 = 100M half, 0x08 = 100M full)");
-
-/* Size of the in-memory receive ring. */
-#define  RX_BUF_LEN_IDX  3 /* 0==8K, 1==16K, 2==32K, 3==64K ,4==128K*/
-#define  RX_BUF_LEN    (8192 << RX_BUF_LEN_IDX)
-
-/* Number of Tx descriptor registers. */
-#define  NUM_TX_DESC      4
-
-/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/
-#define  MAX_ETH_FRAME_SIZE      1536
-
-/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */
-#define  TX_BUF_SIZE       MAX_ETH_FRAME_SIZE
-#define  TX_BUF_TOT_LEN    (TX_BUF_SIZE * NUM_TX_DESC)
-
-/* The following settings are log_2(bytes)-4:  0 == 16 bytes .. 6==1024, 7==end of packet. */
-#define  RX_FIFO_THRESH    7     /* Rx buffer level before first PCI xfer.  */
-
-/* Time in jiffies before concluding the transmitter is hung. */
-#define  TX_TIMEOUT     (4*HZ)
-
-#define  SILAN_STATS_NUM    2    /* number of ETHTOOL_GSTATS */
-
-/* media options */
-#define  AUTOSELECT    0x00
-#define  M10_HALF      0x01
-#define  M10_FULL      0x02
-#define  M100_HALF     0x04
-#define  M100_FULL     0x08
-
- /* Symbolic offsets to registers. */
-enum  silan_registers {
-   Config0    = 0x00,         // Config0
-   Config1    = 0x04,         // Config1
-   RxBufWPtr  = 0x08,         // Rx buffer writer poiter
-   IntrStatus = 0x0C,         // Interrupt status
-   IntrMask   = 0x10,         // Interrupt mask
-   RxbufAddr  = 0x14,         // Rx buffer start address
-   RxBufRPtr  = 0x18,         // Rx buffer read pointer
-   Txstatusall = 0x1C,        // Transmit status of all descriptors
-   TxStatus0  = 0x20,        // Transmit status (Four 32bit registers).
-   TxAddr0    = 0x30,         // Tx descriptors (also four 32bit).
-   RxConfig   = 0x40,         // Rx configuration
-   MAC0              = 0x44,         // Ethernet hardware address.
-   MAR0              = 0x4C,         // Multicast filter.
-   RxStatus0  = 0x54,         // Rx status
-   TxConfig   = 0x5C,         // Tx configuration
-   PhyCtrl    = 0x60,         // physical control
-   FlowCtrlConfig = 0x64,     // flow control
-   Miicmd0    = 0x68,         // Mii command0 register
-   Miicmd1    = 0x6C,         // Mii command1 register
-   Miistatus  = 0x70,         // Mii status register
-   Timercnt   = 0x74,         // Timer counter register
-   TimerIntr  = 0x78,         // Timer interrupt register
-   PMConfig   = 0x7C,         // Power Manager configuration
-   CRC0       = 0x80,         // Power Manager CRC ( Two 32bit regisers)
-   Wakeup0    = 0x88,         // power Manager wakeup( Eight 64bit regiser)
-   LSBCRC0    = 0xC8,         // power Manager LSBCRC(Two 32bit regiser)
-   TestD0     = 0xD0,
-   TestD4     = 0xD4,
-   TestD8     = 0xD8,
-};
-
-#define MII_JAB             16
-#define MII_OutputStatus    24
-
-#define PHY_16_JAB_ENB      0x1000
-#define PHY_16_PORT_ENB     0x1
-
-enum IntrStatusBits {
-   LinkFail       = 0x80000000,
-   LinkOK         = 0x40000000,
-   TimeOut        = 0x20000000,
-   RxOverflow     = 0x0040,
-   RxOK           = 0x0020,
-   TxOK           = 0x0001,
-   IntrBits = LinkFail|LinkOK|TimeOut|RxOverflow|RxOK|TxOK,
-};
-
-enum TxStatusBits {
-   TxCarrierLost = 0x20000000,
-   TxAborted     = 0x10000000,
-   TxOutOfWindow = 0x08000000,
-   TxNccShift    = 22,
-   EarlyTxThresShift = 16,
-   TxStatOK      = 0x8000,
-   TxUnderrun    = 0x4000,
-   TxOwn         = 0x2000,
-};
-
-enum RxStatusBits {
-   RxStatesOK   = 0x80000,
-   RxBadAlign   = 0x40000,
-   RxHugeFrame  = 0x20000,
-   RxSmallFrame = 0x10000,
-   RxCRCOK      = 0x8000,
-   RxCrlFrame   = 0x4000,
-   Rx_Broadcast = 0x2000,
-   Rx_Multicast = 0x1000,
-   RxAddrMatch  = 0x0800,
-   MiiErr       = 0x0400,
-};
-
-enum RxConfigBits {
-   RxFullDx    = 0x80000000,
-   RxEnb       = 0x40000000,
-   RxSmall     = 0x20000000,
-   RxHuge      = 0x10000000,
-   RxErr       = 0x08000000,
-   RxAllphys   = 0x04000000,
-   RxMulticast = 0x02000000,
-   RxBroadcast = 0x01000000,
-   RxLoopBack  = (1 << 23) | (1 << 22),
-   LowThresholdShift  = 12,
-   HighThresholdShift = 2,
-};
-
-enum TxConfigBits {
-   TxFullDx       = 0x80000000,
-   TxEnb          = 0x40000000,
-   TxEnbPad       = 0x20000000,
-   TxEnbHuge      = 0x10000000,
-   TxEnbFCS       = 0x08000000,
-   TxNoBackOff    = 0x04000000,
-   TxEnbPrem      = 0x02000000,
-   TxCareLostCrs  = 0x1000000,
-   TxExdCollNum   = 0xf00000,
-   TxDataRate     = 0x80000,
-};
-
-enum PhyCtrlconfigbits {
-   PhyCtrlAne         = 0x80000000,
-   PhyCtrlSpd100      = 0x40000000,
-   PhyCtrlSpd10       = 0x20000000,
-   PhyCtrlPhyBaseAddr = 0x1f000000,
-   PhyCtrlDux         = 0x800000,
-   PhyCtrlReset       = 0x400000,
-};
-
-enum FlowCtrlConfigBits {
-   FlowCtrlFullDX = 0x80000000,
-   FlowCtrlEnb    = 0x40000000,
-};
-
-enum Config0Bits {
-   Cfg0_Reset  = 0x80000000,
-   Cfg0_Anaoff = 0x40000000,
-   Cfg0_LDPS   = 0x20000000,
-};
-
-enum Config1Bits {
-   Cfg1_EarlyRx = 1 << 31,
-   Cfg1_EarlyTx = 1 << 30,
-
-   //rx buffer size
-   Cfg1_Rcv8K   = 0x0,
-   Cfg1_Rcv16K  = 0x1,
-   Cfg1_Rcv32K  = 0x3,
-   Cfg1_Rcv64K  = 0x7,
-   Cfg1_Rcv128K = 0xf,
-};
-
-enum MiiCmd0Bits {
-   Mii_Divider = 0x20000000,
-   Mii_WRITE   = 0x400000,
-   Mii_READ    = 0x200000,
-   Mii_SCAN    = 0x100000,
-   Mii_Tamod   = 0x80000,
-   Mii_Drvmod  = 0x40000,
-   Mii_mdc     = 0x20000,
-   Mii_mdoen   = 0x10000,
-   Mii_mdo     = 0x8000,
-   Mii_mdi     = 0x4000,
-};
-
-enum MiiStatusBits {
-    Mii_StatusBusy = 0x80000000,
-};
-
-enum PMConfigBits {
-   PM_Enable  = 1 << 31,
-   PM_LongWF  = 1 << 30,
-   PM_Magic   = 1 << 29,
-   PM_LANWake = 1 << 28,
-   PM_LWPTN   = (1 << 27 | 1<< 26),
-   PM_LinkUp  = 1 << 25,
-   PM_WakeUp  = 1 << 24,
-};
-
-/* Locking rules:
- * priv->lock protects most of the fields of priv and most of the
- * hardware registers. It does not have to protect against softirqs
- * between sc92031_disable_interrupts and sc92031_enable_interrupts;
- * it also does not need to be used in ->open and ->stop while the
- * device interrupts are off.
- * Not having to protect against softirqs is very useful due to heavy
- * use of mdelay() at _sc92031_reset.
- * Functions prefixed with _sc92031_ must be called with the lock held;
- * functions prefixed with sc92031_ must be called without the lock held.
- * Use mmiowb() before unlocking if the hardware was written to.
- */
-
-/* Locking rules for the interrupt:
- * - the interrupt and the tasklet never run at the same time
- * - neither run between sc92031_disable_interrupts and
- *   sc92031_enable_interrupt
- */
-
-struct sc92031_priv {
-       spinlock_t              lock;
-       /* iomap.h cookie */
-       void __iomem            *port_base;
-       /* pci device structure */
-       struct pci_dev          *pdev;
-       /* tasklet */
-       struct tasklet_struct   tasklet;
-
-       /* CPU address of rx ring */
-       void                    *rx_ring;
-       /* PCI address of rx ring */
-       dma_addr_t              rx_ring_dma_addr;
-       /* PCI address of rx ring read pointer */
-       dma_addr_t              rx_ring_tail;
-
-       /* tx ring write index */
-       unsigned                tx_head;
-       /* tx ring read index */
-       unsigned                tx_tail;
-       /* CPU address of tx bounce buffer */
-       void                    *tx_bufs;
-       /* PCI address of tx bounce buffer */
-       dma_addr_t              tx_bufs_dma_addr;
-
-       /* copies of some hardware registers */
-       u32                     intr_status;
-       atomic_t                intr_mask;
-       u32                     rx_config;
-       u32                     tx_config;
-       u32                     pm_config;
-
-       /* copy of some flags from dev->flags */
-       unsigned int            mc_flags;
-
-       /* for ETHTOOL_GSTATS */
-       u64                     tx_timeouts;
-       u64                     rx_loss;
-
-       /* for dev->get_stats */
-       long                    rx_value;
-};
-
-/* I don't know which registers can be safely read; however, I can guess
- * MAC0 is one of them. */
-static inline void _sc92031_dummy_read(void __iomem *port_base)
-{
-       ioread32(port_base + MAC0);
-}
-
-static u32 _sc92031_mii_wait(void __iomem *port_base)
-{
-       u32 mii_status;
-
-       do {
-               udelay(10);
-               mii_status = ioread32(port_base + Miistatus);
-       } while (mii_status & Mii_StatusBusy);
-
-       return mii_status;
-}
-
-static u32 _sc92031_mii_cmd(void __iomem *port_base, u32 cmd0, u32 cmd1)
-{
-       iowrite32(Mii_Divider, port_base + Miicmd0);
-
-       _sc92031_mii_wait(port_base);
-
-       iowrite32(cmd1, port_base + Miicmd1);
-       iowrite32(Mii_Divider | cmd0, port_base + Miicmd0);
-
-       return _sc92031_mii_wait(port_base);
-}
-
-static void _sc92031_mii_scan(void __iomem *port_base)
-{
-       _sc92031_mii_cmd(port_base, Mii_SCAN, 0x1 << 6);
-}
-
-static u16 _sc92031_mii_read(void __iomem *port_base, unsigned reg)
-{
-       return _sc92031_mii_cmd(port_base, Mii_READ, reg << 6) >> 13;
-}
-
-static void _sc92031_mii_write(void __iomem *port_base, unsigned reg, u16 val)
-{
-       _sc92031_mii_cmd(port_base, Mii_WRITE, (reg << 6) | ((u32)val << 11));
-}
-
-static void sc92031_disable_interrupts(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       /* tell the tasklet/interrupt not to enable interrupts */
-       atomic_set(&priv->intr_mask, 0);
-       wmb();
-
-       /* stop interrupts */
-       iowrite32(0, port_base + IntrMask);
-       _sc92031_dummy_read(port_base);
-       mmiowb();
-
-       /* wait for any concurrent interrupt/tasklet to finish */
-       synchronize_irq(dev->irq);
-       tasklet_disable(&priv->tasklet);
-}
-
-static void sc92031_enable_interrupts(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       tasklet_enable(&priv->tasklet);
-
-       atomic_set(&priv->intr_mask, IntrBits);
-       wmb();
-
-       iowrite32(IntrBits, port_base + IntrMask);
-       mmiowb();
-}
-
-static void _sc92031_disable_tx_rx(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       priv->rx_config &= ~RxEnb;
-       priv->tx_config &= ~TxEnb;
-       iowrite32(priv->rx_config, port_base + RxConfig);
-       iowrite32(priv->tx_config, port_base + TxConfig);
-}
-
-static void _sc92031_enable_tx_rx(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       priv->rx_config |= RxEnb;
-       priv->tx_config |= TxEnb;
-       iowrite32(priv->rx_config, port_base + RxConfig);
-       iowrite32(priv->tx_config, port_base + TxConfig);
-}
-
-static void _sc92031_tx_clear(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-
-       while (priv->tx_head - priv->tx_tail > 0) {
-               priv->tx_tail++;
-               dev->stats.tx_dropped++;
-       }
-       priv->tx_head = priv->tx_tail = 0;
-}
-
-static void _sc92031_set_mar(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 mar0 = 0, mar1 = 0;
-
-       if ((dev->flags & IFF_PROMISC) ||
-           netdev_mc_count(dev) > multicast_filter_limit ||
-           (dev->flags & IFF_ALLMULTI))
-               mar0 = mar1 = 0xffffffff;
-       else if (dev->flags & IFF_MULTICAST) {
-               struct netdev_hw_addr *ha;
-
-               netdev_for_each_mc_addr(ha, dev) {
-                       u32 crc;
-                       unsigned bit = 0;
-
-                       crc = ~ether_crc(ETH_ALEN, ha->addr);
-                       crc >>= 24;
-
-                       if (crc & 0x01) bit |= 0x02;
-                       if (crc & 0x02) bit |= 0x01;
-                       if (crc & 0x10) bit |= 0x20;
-                       if (crc & 0x20) bit |= 0x10;
-                       if (crc & 0x40) bit |= 0x08;
-                       if (crc & 0x80) bit |= 0x04;
-
-                       if (bit > 31)
-                               mar0 |= 0x1 << (bit - 32);
-                       else
-                               mar1 |= 0x1 << bit;
-               }
-       }
-
-       iowrite32(mar0, port_base + MAR0);
-       iowrite32(mar1, port_base + MAR0 + 4);
-}
-
-static void _sc92031_set_rx_config(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       unsigned int old_mc_flags;
-       u32 rx_config_bits = 0;
-
-       old_mc_flags = priv->mc_flags;
-
-       if (dev->flags & IFF_PROMISC)
-               rx_config_bits |= RxSmall | RxHuge | RxErr | RxBroadcast
-                               | RxMulticast | RxAllphys;
-
-       if (dev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
-               rx_config_bits |= RxMulticast;
-
-       if (dev->flags & IFF_BROADCAST)
-               rx_config_bits |= RxBroadcast;
-
-       priv->rx_config &= ~(RxSmall | RxHuge | RxErr | RxBroadcast
-                       | RxMulticast | RxAllphys);
-       priv->rx_config |= rx_config_bits;
-
-       priv->mc_flags = dev->flags & (IFF_PROMISC | IFF_ALLMULTI
-                       | IFF_MULTICAST | IFF_BROADCAST);
-
-       if (netif_carrier_ok(dev) && priv->mc_flags != old_mc_flags)
-               iowrite32(priv->rx_config, port_base + RxConfig);
-}
-
-static bool _sc92031_check_media(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u16 bmsr;
-
-       bmsr = _sc92031_mii_read(port_base, MII_BMSR);
-       rmb();
-       if (bmsr & BMSR_LSTATUS) {
-               bool speed_100, duplex_full;
-               u32 flow_ctrl_config = 0;
-               u16 output_status = _sc92031_mii_read(port_base,
-                               MII_OutputStatus);
-               _sc92031_mii_scan(port_base);
-
-               speed_100 = output_status & 0x2;
-               duplex_full = output_status & 0x4;
-
-               /* Initial Tx/Rx configuration */
-               priv->rx_config = (0x40 << LowThresholdShift) | (0x1c0 << HighThresholdShift);
-               priv->tx_config = 0x48800000;
-
-               /* NOTE: vendor driver had dead code here to enable tx padding */
-
-               if (!speed_100)
-                       priv->tx_config |= 0x80000;
-
-               // configure rx mode
-               _sc92031_set_rx_config(dev);
-
-               if (duplex_full) {
-                       priv->rx_config |= RxFullDx;
-                       priv->tx_config |= TxFullDx;
-                       flow_ctrl_config = FlowCtrlFullDX | FlowCtrlEnb;
-               } else {
-                       priv->rx_config &= ~RxFullDx;
-                       priv->tx_config &= ~TxFullDx;
-               }
-
-               _sc92031_set_mar(dev);
-               _sc92031_set_rx_config(dev);
-               _sc92031_enable_tx_rx(dev);
-               iowrite32(flow_ctrl_config, port_base + FlowCtrlConfig);
-
-               netif_carrier_on(dev);
-
-               if (printk_ratelimit())
-                       printk(KERN_INFO "%s: link up, %sMbps, %s-duplex\n",
-                               dev->name,
-                               speed_100 ? "100" : "10",
-                               duplex_full ? "full" : "half");
-               return true;
-       } else {
-               _sc92031_mii_scan(port_base);
-
-               netif_carrier_off(dev);
-
-               _sc92031_disable_tx_rx(dev);
-
-               if (printk_ratelimit())
-                       printk(KERN_INFO "%s: link down\n", dev->name);
-               return false;
-       }
-}
-
-static void _sc92031_phy_reset(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 phy_ctrl;
-
-       phy_ctrl = ioread32(port_base + PhyCtrl);
-       phy_ctrl &= ~(PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10);
-       phy_ctrl |= PhyCtrlAne | PhyCtrlReset;
-
-       switch (media) {
-       default:
-       case AUTOSELECT:
-               phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10;
-               break;
-       case M10_HALF:
-               phy_ctrl |= PhyCtrlSpd10;
-               break;
-       case M10_FULL:
-               phy_ctrl |= PhyCtrlDux | PhyCtrlSpd10;
-               break;
-       case M100_HALF:
-               phy_ctrl |= PhyCtrlSpd100;
-               break;
-       case M100_FULL:
-               phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100;
-               break;
-       }
-
-       iowrite32(phy_ctrl, port_base + PhyCtrl);
-       mdelay(10);
-
-       phy_ctrl &= ~PhyCtrlReset;
-       iowrite32(phy_ctrl, port_base + PhyCtrl);
-       mdelay(1);
-
-       _sc92031_mii_write(port_base, MII_JAB,
-                       PHY_16_JAB_ENB | PHY_16_PORT_ENB);
-       _sc92031_mii_scan(port_base);
-
-       netif_carrier_off(dev);
-       netif_stop_queue(dev);
-}
-
-static void _sc92031_reset(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       /* disable PM */
-       iowrite32(0, port_base + PMConfig);
-
-       /* soft reset the chip */
-       iowrite32(Cfg0_Reset, port_base + Config0);
-       mdelay(200);
-
-       iowrite32(0, port_base + Config0);
-       mdelay(10);
-
-       /* disable interrupts */
-       iowrite32(0, port_base + IntrMask);
-
-       /* clear multicast address */
-       iowrite32(0, port_base + MAR0);
-       iowrite32(0, port_base + MAR0 + 4);
-
-       /* init rx ring */
-       iowrite32(priv->rx_ring_dma_addr, port_base + RxbufAddr);
-       priv->rx_ring_tail = priv->rx_ring_dma_addr;
-
-       /* init tx ring */
-       _sc92031_tx_clear(dev);
-
-       /* clear old register values */
-       priv->intr_status = 0;
-       atomic_set(&priv->intr_mask, 0);
-       priv->rx_config = 0;
-       priv->tx_config = 0;
-       priv->mc_flags = 0;
-
-       /* configure rx buffer size */
-       /* NOTE: vendor driver had dead code here to enable early tx/rx */
-       iowrite32(Cfg1_Rcv64K, port_base + Config1);
-
-       _sc92031_phy_reset(dev);
-       _sc92031_check_media(dev);
-
-       /* calculate rx fifo overflow */
-       priv->rx_value = 0;
-
-       /* enable PM */
-       iowrite32(priv->pm_config, port_base + PMConfig);
-
-       /* clear intr register */
-       ioread32(port_base + IntrStatus);
-}
-
-static void _sc92031_tx_tasklet(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       unsigned old_tx_tail;
-       unsigned entry;
-       u32 tx_status;
-
-       old_tx_tail = priv->tx_tail;
-       while (priv->tx_head - priv->tx_tail > 0) {
-               entry = priv->tx_tail % NUM_TX_DESC;
-               tx_status = ioread32(port_base + TxStatus0 + entry * 4);
-
-               if (!(tx_status & (TxStatOK | TxUnderrun | TxAborted)))
-                       break;
-
-               priv->tx_tail++;
-
-               if (tx_status & TxStatOK) {
-                       dev->stats.tx_bytes += tx_status & 0x1fff;
-                       dev->stats.tx_packets++;
-                       /* Note: TxCarrierLost is always asserted at 100mbps. */
-                       dev->stats.collisions += (tx_status >> 22) & 0xf;
-               }
-
-               if (tx_status & (TxOutOfWindow | TxAborted)) {
-                       dev->stats.tx_errors++;
-
-                       if (tx_status & TxAborted)
-                               dev->stats.tx_aborted_errors++;
-
-                       if (tx_status & TxCarrierLost)
-                               dev->stats.tx_carrier_errors++;
-
-                       if (tx_status & TxOutOfWindow)
-                               dev->stats.tx_window_errors++;
-               }
-
-               if (tx_status & TxUnderrun)
-                       dev->stats.tx_fifo_errors++;
-       }
-
-       if (priv->tx_tail != old_tx_tail)
-               if (netif_queue_stopped(dev))
-                       netif_wake_queue(dev);
-}
-
-static void _sc92031_rx_tasklet_error(struct net_device *dev,
-                                     u32 rx_status, unsigned rx_size)
-{
-       if(rx_size > (MAX_ETH_FRAME_SIZE + 4) || rx_size < 16) {
-               dev->stats.rx_errors++;
-               dev->stats.rx_length_errors++;
-       }
-
-       if (!(rx_status & RxStatesOK)) {
-               dev->stats.rx_errors++;
-
-               if (rx_status & (RxHugeFrame | RxSmallFrame))
-                       dev->stats.rx_length_errors++;
-
-               if (rx_status & RxBadAlign)
-                       dev->stats.rx_frame_errors++;
-
-               if (!(rx_status & RxCRCOK))
-                       dev->stats.rx_crc_errors++;
-       } else {
-               struct sc92031_priv *priv = netdev_priv(dev);
-               priv->rx_loss++;
-       }
-}
-
-static void _sc92031_rx_tasklet(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       dma_addr_t rx_ring_head;
-       unsigned rx_len;
-       unsigned rx_ring_offset;
-       void *rx_ring = priv->rx_ring;
-
-       rx_ring_head = ioread32(port_base + RxBufWPtr);
-       rmb();
-
-       /* rx_ring_head is only 17 bits in the RxBufWPtr register.
-        * we need to change it to 32 bits physical address
-        */
-       rx_ring_head &= (dma_addr_t)(RX_BUF_LEN - 1);
-       rx_ring_head |= priv->rx_ring_dma_addr & ~(dma_addr_t)(RX_BUF_LEN - 1);
-       if (rx_ring_head < priv->rx_ring_dma_addr)
-               rx_ring_head += RX_BUF_LEN;
-
-       if (rx_ring_head >= priv->rx_ring_tail)
-               rx_len = rx_ring_head - priv->rx_ring_tail;
-       else
-               rx_len = RX_BUF_LEN - (priv->rx_ring_tail - rx_ring_head);
-
-       if (!rx_len)
-               return;
-
-       if (unlikely(rx_len > RX_BUF_LEN)) {
-               if (printk_ratelimit())
-                       printk(KERN_ERR "%s: rx packets length > rx buffer\n",
-                                       dev->name);
-               return;
-       }
-
-       rx_ring_offset = (priv->rx_ring_tail - priv->rx_ring_dma_addr) % RX_BUF_LEN;
-
-       while (rx_len) {
-               u32 rx_status;
-               unsigned rx_size, rx_size_align, pkt_size;
-               struct sk_buff *skb;
-
-               rx_status = le32_to_cpup((__le32 *)(rx_ring + rx_ring_offset));
-               rmb();
-
-               rx_size = rx_status >> 20;
-               rx_size_align = (rx_size + 3) & ~3;     // for 4 bytes aligned
-               pkt_size = rx_size - 4; // Omit the four octet CRC from the length.
-
-               rx_ring_offset = (rx_ring_offset + 4) % RX_BUF_LEN;
-
-               if (unlikely(rx_status == 0 ||
-                            rx_size > (MAX_ETH_FRAME_SIZE + 4) ||
-                            rx_size < 16 ||
-                            !(rx_status & RxStatesOK))) {
-                       _sc92031_rx_tasklet_error(dev, rx_status, rx_size);
-                       break;
-               }
-
-               if (unlikely(rx_size_align + 4 > rx_len)) {
-                       if (printk_ratelimit())
-                               printk(KERN_ERR "%s: rx_len is too small\n", dev->name);
-                       break;
-               }
-
-               rx_len -= rx_size_align + 4;
-
-               skb = netdev_alloc_skb_ip_align(dev, pkt_size);
-               if (unlikely(!skb)) {
-                       if (printk_ratelimit())
-                               printk(KERN_ERR "%s: Couldn't allocate a skb_buff for a packet of size %u\n",
-                                               dev->name, pkt_size);
-                       goto next;
-               }
-
-               if ((rx_ring_offset + pkt_size) > RX_BUF_LEN) {
-                       memcpy(skb_put(skb, RX_BUF_LEN - rx_ring_offset),
-                               rx_ring + rx_ring_offset, RX_BUF_LEN - rx_ring_offset);
-                       memcpy(skb_put(skb, pkt_size - (RX_BUF_LEN - rx_ring_offset)),
-                               rx_ring, pkt_size - (RX_BUF_LEN - rx_ring_offset));
-               } else {
-                       memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
-               }
-
-               skb->protocol = eth_type_trans(skb, dev);
-               netif_rx(skb);
-
-               dev->stats.rx_bytes += pkt_size;
-               dev->stats.rx_packets++;
-
-               if (rx_status & Rx_Multicast)
-                       dev->stats.multicast++;
-
-       next:
-               rx_ring_offset = (rx_ring_offset + rx_size_align) % RX_BUF_LEN;
-       }
-       mb();
-
-       priv->rx_ring_tail = rx_ring_head;
-       iowrite32(priv->rx_ring_tail, port_base + RxBufRPtr);
-}
-
-static void _sc92031_link_tasklet(struct net_device *dev)
-{
-       if (_sc92031_check_media(dev))
-               netif_wake_queue(dev);
-       else {
-               netif_stop_queue(dev);
-               dev->stats.tx_carrier_errors++;
-       }
-}
-
-static void sc92031_tasklet(unsigned long data)
-{
-       struct net_device *dev = (struct net_device *)data;
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 intr_status, intr_mask;
-
-       intr_status = priv->intr_status;
-
-       spin_lock(&priv->lock);
-
-       if (unlikely(!netif_running(dev)))
-               goto out;
-
-       if (intr_status & TxOK)
-               _sc92031_tx_tasklet(dev);
-
-       if (intr_status & RxOK)
-               _sc92031_rx_tasklet(dev);
-
-       if (intr_status & RxOverflow)
-               dev->stats.rx_errors++;
-
-       if (intr_status & TimeOut) {
-               dev->stats.rx_errors++;
-               dev->stats.rx_length_errors++;
-       }
-
-       if (intr_status & (LinkFail | LinkOK))
-               _sc92031_link_tasklet(dev);
-
-out:
-       intr_mask = atomic_read(&priv->intr_mask);
-       rmb();
-
-       iowrite32(intr_mask, port_base + IntrMask);
-       mmiowb();
-
-       spin_unlock(&priv->lock);
-}
-
-static irqreturn_t sc92031_interrupt(int irq, void *dev_id)
-{
-       struct net_device *dev = dev_id;
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 intr_status, intr_mask;
-
-       /* mask interrupts before clearing IntrStatus */
-       iowrite32(0, port_base + IntrMask);
-       _sc92031_dummy_read(port_base);
-
-       intr_status = ioread32(port_base + IntrStatus);
-       if (unlikely(intr_status == 0xffffffff))
-               return IRQ_NONE;        // hardware has gone missing
-
-       intr_status &= IntrBits;
-       if (!intr_status)
-               goto out_none;
-
-       priv->intr_status = intr_status;
-       tasklet_schedule(&priv->tasklet);
-
-       return IRQ_HANDLED;
-
-out_none:
-       intr_mask = atomic_read(&priv->intr_mask);
-       rmb();
-
-       iowrite32(intr_mask, port_base + IntrMask);
-       mmiowb();
-
-       return IRQ_NONE;
-}
-
-static struct net_device_stats *sc92031_get_stats(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-
-       // FIXME I do not understand what is this trying to do.
-       if (netif_running(dev)) {
-               int temp;
-
-               spin_lock_bh(&priv->lock);
-
-               /* Update the error count. */
-               temp = (ioread32(port_base + RxStatus0) >> 16) & 0xffff;
-
-               if (temp == 0xffff) {
-                       priv->rx_value += temp;
-                       dev->stats.rx_fifo_errors = priv->rx_value;
-               } else
-                       dev->stats.rx_fifo_errors = temp + priv->rx_value;
-
-               spin_unlock_bh(&priv->lock);
-       }
-
-       return &dev->stats;
-}
-
-static netdev_tx_t sc92031_start_xmit(struct sk_buff *skb,
-                                     struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       unsigned len;
-       unsigned entry;
-       u32 tx_status;
-
-       if (unlikely(skb->len > TX_BUF_SIZE)) {
-               dev->stats.tx_dropped++;
-               goto out;
-       }
-
-       spin_lock(&priv->lock);
-
-       if (unlikely(!netif_carrier_ok(dev))) {
-               dev->stats.tx_dropped++;
-               goto out_unlock;
-       }
-
-       BUG_ON(priv->tx_head - priv->tx_tail >= NUM_TX_DESC);
-
-       entry = priv->tx_head++ % NUM_TX_DESC;
-
-       skb_copy_and_csum_dev(skb, priv->tx_bufs + entry * TX_BUF_SIZE);
-
-       len = skb->len;
-       if (len < ETH_ZLEN) {
-               memset(priv->tx_bufs + entry * TX_BUF_SIZE + len,
-                               0, ETH_ZLEN - len);
-               len = ETH_ZLEN;
-       }
-
-       wmb();
-
-       if (len < 100)
-               tx_status = len;
-       else if (len < 300)
-               tx_status = 0x30000 | len;
-       else
-               tx_status = 0x50000 | len;
-
-       iowrite32(priv->tx_bufs_dma_addr + entry * TX_BUF_SIZE,
-                       port_base + TxAddr0 + entry * 4);
-       iowrite32(tx_status, port_base + TxStatus0 + entry * 4);
-       mmiowb();
-
-       if (priv->tx_head - priv->tx_tail >= NUM_TX_DESC)
-               netif_stop_queue(dev);
-
-out_unlock:
-       spin_unlock(&priv->lock);
-
-out:
-       dev_kfree_skb(skb);
-
-       return NETDEV_TX_OK;
-}
-
-static int sc92031_open(struct net_device *dev)
-{
-       int err;
-       struct sc92031_priv *priv = netdev_priv(dev);
-       struct pci_dev *pdev = priv->pdev;
-
-       priv->rx_ring = pci_alloc_consistent(pdev, RX_BUF_LEN,
-                       &priv->rx_ring_dma_addr);
-       if (unlikely(!priv->rx_ring)) {
-               err = -ENOMEM;
-               goto out_alloc_rx_ring;
-       }
-
-       priv->tx_bufs = pci_alloc_consistent(pdev, TX_BUF_TOT_LEN,
-                       &priv->tx_bufs_dma_addr);
-       if (unlikely(!priv->tx_bufs)) {
-               err = -ENOMEM;
-               goto out_alloc_tx_bufs;
-       }
-       priv->tx_head = priv->tx_tail = 0;
-
-       err = request_irq(pdev->irq, sc92031_interrupt,
-                       IRQF_SHARED, dev->name, dev);
-       if (unlikely(err < 0))
-               goto out_request_irq;
-
-       priv->pm_config = 0;
-
-       /* Interrupts already disabled by sc92031_stop or sc92031_probe */
-       spin_lock_bh(&priv->lock);
-
-       _sc92031_reset(dev);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-       sc92031_enable_interrupts(dev);
-
-       if (netif_carrier_ok(dev))
-               netif_start_queue(dev);
-       else
-               netif_tx_disable(dev);
-
-       return 0;
-
-out_request_irq:
-       pci_free_consistent(pdev, TX_BUF_TOT_LEN, priv->tx_bufs,
-                       priv->tx_bufs_dma_addr);
-out_alloc_tx_bufs:
-       pci_free_consistent(pdev, RX_BUF_LEN, priv->rx_ring,
-                       priv->rx_ring_dma_addr);
-out_alloc_rx_ring:
-       return err;
-}
-
-static int sc92031_stop(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       struct pci_dev *pdev = priv->pdev;
-
-       netif_tx_disable(dev);
-
-       /* Disable interrupts, stop Tx and Rx. */
-       sc92031_disable_interrupts(dev);
-
-       spin_lock_bh(&priv->lock);
-
-       _sc92031_disable_tx_rx(dev);
-       _sc92031_tx_clear(dev);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-
-       free_irq(pdev->irq, dev);
-       pci_free_consistent(pdev, TX_BUF_TOT_LEN, priv->tx_bufs,
-                       priv->tx_bufs_dma_addr);
-       pci_free_consistent(pdev, RX_BUF_LEN, priv->rx_ring,
-                       priv->rx_ring_dma_addr);
-
-       return 0;
-}
-
-static void sc92031_set_multicast_list(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-
-       spin_lock_bh(&priv->lock);
-
-       _sc92031_set_mar(dev);
-       _sc92031_set_rx_config(dev);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-}
-
-static void sc92031_tx_timeout(struct net_device *dev)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-
-       /* Disable interrupts by clearing the interrupt mask.*/
-       sc92031_disable_interrupts(dev);
-
-       spin_lock(&priv->lock);
-
-       priv->tx_timeouts++;
-
-       _sc92031_reset(dev);
-       mmiowb();
-
-       spin_unlock(&priv->lock);
-
-       /* enable interrupts */
-       sc92031_enable_interrupts(dev);
-
-       if (netif_carrier_ok(dev))
-               netif_wake_queue(dev);
-}
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void sc92031_poll_controller(struct net_device *dev)
-{
-       disable_irq(dev->irq);
-       if (sc92031_interrupt(dev->irq, dev) != IRQ_NONE)
-               sc92031_tasklet((unsigned long)dev);
-       enable_irq(dev->irq);
-}
-#endif
-
-static int sc92031_ethtool_get_settings(struct net_device *dev,
-               struct ethtool_cmd *cmd)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u8 phy_address;
-       u32 phy_ctrl;
-       u16 output_status;
-
-       spin_lock_bh(&priv->lock);
-
-       phy_address = ioread32(port_base + Miicmd1) >> 27;
-       phy_ctrl = ioread32(port_base + PhyCtrl);
-
-       output_status = _sc92031_mii_read(port_base, MII_OutputStatus);
-       _sc92031_mii_scan(port_base);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-
-       cmd->supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full
-                       | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full
-                       | SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII;
-
-       cmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
-
-       if ((phy_ctrl & (PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10))
-                       == (PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10))
-               cmd->advertising |= ADVERTISED_Autoneg;
-
-       if ((phy_ctrl & PhyCtrlSpd10) == PhyCtrlSpd10)
-               cmd->advertising |= ADVERTISED_10baseT_Half;
-
-       if ((phy_ctrl & (PhyCtrlSpd10 | PhyCtrlDux))
-                       == (PhyCtrlSpd10 | PhyCtrlDux))
-               cmd->advertising |= ADVERTISED_10baseT_Full;
-
-       if ((phy_ctrl & PhyCtrlSpd100) == PhyCtrlSpd100)
-               cmd->advertising |= ADVERTISED_100baseT_Half;
-
-       if ((phy_ctrl & (PhyCtrlSpd100 | PhyCtrlDux))
-                       == (PhyCtrlSpd100 | PhyCtrlDux))
-               cmd->advertising |= ADVERTISED_100baseT_Full;
-
-       if (phy_ctrl & PhyCtrlAne)
-               cmd->advertising |= ADVERTISED_Autoneg;
-
-       ethtool_cmd_speed_set(cmd,
-                             (output_status & 0x2) ? SPEED_100 : SPEED_10);
-       cmd->duplex = (output_status & 0x4) ? DUPLEX_FULL : DUPLEX_HALF;
-       cmd->port = PORT_MII;
-       cmd->phy_address = phy_address;
-       cmd->transceiver = XCVR_INTERNAL;
-       cmd->autoneg = (phy_ctrl & PhyCtrlAne) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
-
-       return 0;
-}
-
-static int sc92031_ethtool_set_settings(struct net_device *dev,
-               struct ethtool_cmd *cmd)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 speed = ethtool_cmd_speed(cmd);
-       u32 phy_ctrl;
-       u32 old_phy_ctrl;
-
-       if (!(speed == SPEED_10 || speed == SPEED_100))
-               return -EINVAL;
-       if (!(cmd->duplex == DUPLEX_HALF || cmd->duplex == DUPLEX_FULL))
-               return -EINVAL;
-       if (!(cmd->port == PORT_MII))
-               return -EINVAL;
-       if (!(cmd->phy_address == 0x1f))
-               return -EINVAL;
-       if (!(cmd->transceiver == XCVR_INTERNAL))
-               return -EINVAL;
-       if (!(cmd->autoneg == AUTONEG_DISABLE || cmd->autoneg == AUTONEG_ENABLE))
-               return -EINVAL;
-
-       if (cmd->autoneg == AUTONEG_ENABLE) {
-               if (!(cmd->advertising & (ADVERTISED_Autoneg
-                               | ADVERTISED_100baseT_Full
-                               | ADVERTISED_100baseT_Half
-                               | ADVERTISED_10baseT_Full
-                               | ADVERTISED_10baseT_Half)))
-                       return -EINVAL;
-
-               phy_ctrl = PhyCtrlAne;
-
-               // FIXME: I'm not sure what the original code was trying to do
-               if (cmd->advertising & ADVERTISED_Autoneg)
-                       phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10;
-               if (cmd->advertising & ADVERTISED_100baseT_Full)
-                       phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100;
-               if (cmd->advertising & ADVERTISED_100baseT_Half)
-                       phy_ctrl |= PhyCtrlSpd100;
-               if (cmd->advertising & ADVERTISED_10baseT_Full)
-                       phy_ctrl |= PhyCtrlSpd10 | PhyCtrlDux;
-               if (cmd->advertising & ADVERTISED_10baseT_Half)
-                       phy_ctrl |= PhyCtrlSpd10;
-       } else {
-               // FIXME: Whole branch guessed
-               phy_ctrl = 0;
-
-               if (speed == SPEED_10)
-                       phy_ctrl |= PhyCtrlSpd10;
-               else /* cmd->speed == SPEED_100 */
-                       phy_ctrl |= PhyCtrlSpd100;
-
-               if (cmd->duplex == DUPLEX_FULL)
-                       phy_ctrl |= PhyCtrlDux;
-       }
-
-       spin_lock_bh(&priv->lock);
-
-       old_phy_ctrl = ioread32(port_base + PhyCtrl);
-       phy_ctrl |= old_phy_ctrl & ~(PhyCtrlAne | PhyCtrlDux
-                       | PhyCtrlSpd100 | PhyCtrlSpd10);
-       if (phy_ctrl != old_phy_ctrl)
-               iowrite32(phy_ctrl, port_base + PhyCtrl);
-
-       spin_unlock_bh(&priv->lock);
-
-       return 0;
-}
-
-static void sc92031_ethtool_get_wol(struct net_device *dev,
-               struct ethtool_wolinfo *wolinfo)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 pm_config;
-
-       spin_lock_bh(&priv->lock);
-       pm_config = ioread32(port_base + PMConfig);
-       spin_unlock_bh(&priv->lock);
-
-       // FIXME: Guessed
-       wolinfo->supported = WAKE_PHY | WAKE_MAGIC
-                       | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;
-       wolinfo->wolopts = 0;
-
-       if (pm_config & PM_LinkUp)
-               wolinfo->wolopts |= WAKE_PHY;
-
-       if (pm_config & PM_Magic)
-               wolinfo->wolopts |= WAKE_MAGIC;
-
-       if (pm_config & PM_WakeUp)
-               // FIXME: Guessed
-               wolinfo->wolopts |= WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;
-}
-
-static int sc92031_ethtool_set_wol(struct net_device *dev,
-               struct ethtool_wolinfo *wolinfo)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u32 pm_config;
-
-       spin_lock_bh(&priv->lock);
-
-       pm_config = ioread32(port_base + PMConfig)
-                       & ~(PM_LinkUp | PM_Magic | PM_WakeUp);
-
-       if (wolinfo->wolopts & WAKE_PHY)
-               pm_config |= PM_LinkUp;
-
-       if (wolinfo->wolopts & WAKE_MAGIC)
-               pm_config |= PM_Magic;
-
-       // FIXME: Guessed
-       if (wolinfo->wolopts & (WAKE_UCAST | WAKE_MCAST | WAKE_BCAST))
-               pm_config |= PM_WakeUp;
-
-       priv->pm_config = pm_config;
-       iowrite32(pm_config, port_base + PMConfig);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-
-       return 0;
-}
-
-static int sc92031_ethtool_nway_reset(struct net_device *dev)
-{
-       int err = 0;
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem *port_base = priv->port_base;
-       u16 bmcr;
-
-       spin_lock_bh(&priv->lock);
-
-       bmcr = _sc92031_mii_read(port_base, MII_BMCR);
-       if (!(bmcr & BMCR_ANENABLE)) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       _sc92031_mii_write(port_base, MII_BMCR, bmcr | BMCR_ANRESTART);
-
-out:
-       _sc92031_mii_scan(port_base);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-
-       return err;
-}
-
-static const char sc92031_ethtool_stats_strings[SILAN_STATS_NUM][ETH_GSTRING_LEN] = {
-       "tx_timeout",
-       "rx_loss",
-};
-
-static void sc92031_ethtool_get_strings(struct net_device *dev,
-               u32 stringset, u8 *data)
-{
-       if (stringset == ETH_SS_STATS)
-               memcpy(data, sc92031_ethtool_stats_strings,
-                               SILAN_STATS_NUM * ETH_GSTRING_LEN);
-}
-
-static int sc92031_ethtool_get_sset_count(struct net_device *dev, int sset)
-{
-       switch (sset) {
-       case ETH_SS_STATS:
-               return SILAN_STATS_NUM;
-       default:
-               return -EOPNOTSUPP;
-       }
-}
-
-static void sc92031_ethtool_get_ethtool_stats(struct net_device *dev,
-               struct ethtool_stats *stats, u64 *data)
-{
-       struct sc92031_priv *priv = netdev_priv(dev);
-
-       spin_lock_bh(&priv->lock);
-       data[0] = priv->tx_timeouts;
-       data[1] = priv->rx_loss;
-       spin_unlock_bh(&priv->lock);
-}
-
-static const struct ethtool_ops sc92031_ethtool_ops = {
-       .get_settings           = sc92031_ethtool_get_settings,
-       .set_settings           = sc92031_ethtool_set_settings,
-       .get_wol                = sc92031_ethtool_get_wol,
-       .set_wol                = sc92031_ethtool_set_wol,
-       .nway_reset             = sc92031_ethtool_nway_reset,
-       .get_link               = ethtool_op_get_link,
-       .get_strings            = sc92031_ethtool_get_strings,
-       .get_sset_count         = sc92031_ethtool_get_sset_count,
-       .get_ethtool_stats      = sc92031_ethtool_get_ethtool_stats,
-};
-
-
-static const struct net_device_ops sc92031_netdev_ops = {
-       .ndo_get_stats          = sc92031_get_stats,
-       .ndo_start_xmit         = sc92031_start_xmit,
-       .ndo_open               = sc92031_open,
-       .ndo_stop               = sc92031_stop,
-       .ndo_set_rx_mode        = sc92031_set_multicast_list,
-       .ndo_change_mtu         = eth_change_mtu,
-       .ndo_validate_addr      = eth_validate_addr,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_tx_timeout         = sc92031_tx_timeout,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-       .ndo_poll_controller    = sc92031_poll_controller,
-#endif
-};
-
-static int __devinit sc92031_probe(struct pci_dev *pdev,
-               const struct pci_device_id *id)
-{
-       int err;
-       void __iomem* port_base;
-       struct net_device *dev;
-       struct sc92031_priv *priv;
-       u32 mac0, mac1;
-       unsigned long base_addr;
-
-       err = pci_enable_device(pdev);
-       if (unlikely(err < 0))
-               goto out_enable_device;
-
-       pci_set_master(pdev);
-
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (unlikely(err < 0))
-               goto out_set_dma_mask;
-
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (unlikely(err < 0))
-               goto out_set_dma_mask;
-
-       err = pci_request_regions(pdev, SC92031_NAME);
-       if (unlikely(err < 0))
-               goto out_request_regions;
-
-       port_base = pci_iomap(pdev, SC92031_USE_BAR, 0);
-       if (unlikely(!port_base)) {
-               err = -EIO;
-               goto out_iomap;
-       }
-
-       dev = alloc_etherdev(sizeof(struct sc92031_priv));
-       if (unlikely(!dev)) {
-               err = -ENOMEM;
-               goto out_alloc_etherdev;
-       }
-
-       pci_set_drvdata(pdev, dev);
-       SET_NETDEV_DEV(dev, &pdev->dev);
-
-#if SC92031_USE_BAR == 0
-       dev->mem_start = pci_resource_start(pdev, SC92031_USE_BAR);
-       dev->mem_end = pci_resource_end(pdev, SC92031_USE_BAR);
-#elif SC92031_USE_BAR == 1
-       dev->base_addr = pci_resource_start(pdev, SC92031_USE_BAR);
-#endif
-       dev->irq = pdev->irq;
-
-       /* faked with skb_copy_and_csum_dev */
-       dev->features = NETIF_F_SG | NETIF_F_HIGHDMA |
-               NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-
-       dev->netdev_ops         = &sc92031_netdev_ops;
-       dev->watchdog_timeo     = TX_TIMEOUT;
-       dev->ethtool_ops        = &sc92031_ethtool_ops;
-
-       priv = netdev_priv(dev);
-       spin_lock_init(&priv->lock);
-       priv->port_base = port_base;
-       priv->pdev = pdev;
-       tasklet_init(&priv->tasklet, sc92031_tasklet, (unsigned long)dev);
-       /* Fudge tasklet count so the call to sc92031_enable_interrupts at
-        * sc92031_open will work correctly */
-       tasklet_disable_nosync(&priv->tasklet);
-
-       /* PCI PM Wakeup */
-       iowrite32((~PM_LongWF & ~PM_LWPTN) | PM_Enable, port_base + PMConfig);
-
-       mac0 = ioread32(port_base + MAC0);
-       mac1 = ioread32(port_base + MAC0 + 4);
-       dev->dev_addr[0] = dev->perm_addr[0] = mac0 >> 24;
-       dev->dev_addr[1] = dev->perm_addr[1] = mac0 >> 16;
-       dev->dev_addr[2] = dev->perm_addr[2] = mac0 >> 8;
-       dev->dev_addr[3] = dev->perm_addr[3] = mac0;
-       dev->dev_addr[4] = dev->perm_addr[4] = mac1 >> 8;
-       dev->dev_addr[5] = dev->perm_addr[5] = mac1;
-
-       err = register_netdev(dev);
-       if (err < 0)
-               goto out_register_netdev;
-
-#if SC92031_USE_BAR == 0
-       base_addr = dev->mem_start;
-#elif SC92031_USE_BAR == 1
-       base_addr = dev->base_addr;
-#endif
-       printk(KERN_INFO "%s: SC92031 at 0x%lx, %pM, IRQ %d\n", dev->name,
-                       base_addr, dev->dev_addr, dev->irq);
-
-       return 0;
-
-out_register_netdev:
-       free_netdev(dev);
-out_alloc_etherdev:
-       pci_iounmap(pdev, port_base);
-out_iomap:
-       pci_release_regions(pdev);
-out_request_regions:
-out_set_dma_mask:
-       pci_disable_device(pdev);
-out_enable_device:
-       return err;
-}
-
-static void __devexit sc92031_remove(struct pci_dev *pdev)
-{
-       struct net_device *dev = pci_get_drvdata(pdev);
-       struct sc92031_priv *priv = netdev_priv(dev);
-       void __iomem* port_base = priv->port_base;
-
-       unregister_netdev(dev);
-       free_netdev(dev);
-       pci_iounmap(pdev, port_base);
-       pci_release_regions(pdev);
-       pci_disable_device(pdev);
-}
-
-static int sc92031_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-       struct net_device *dev = pci_get_drvdata(pdev);
-       struct sc92031_priv *priv = netdev_priv(dev);
-
-       pci_save_state(pdev);
-
-       if (!netif_running(dev))
-               goto out;
-
-       netif_device_detach(dev);
-
-       /* Disable interrupts, stop Tx and Rx. */
-       sc92031_disable_interrupts(dev);
-
-       spin_lock_bh(&priv->lock);
-
-       _sc92031_disable_tx_rx(dev);
-       _sc92031_tx_clear(dev);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-
-out:
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
-       return 0;
-}
-
-static int sc92031_resume(struct pci_dev *pdev)
-{
-       struct net_device *dev = pci_get_drvdata(pdev);
-       struct sc92031_priv *priv = netdev_priv(dev);
-
-       pci_restore_state(pdev);
-       pci_set_power_state(pdev, PCI_D0);
-
-       if (!netif_running(dev))
-               goto out;
-
-       /* Interrupts already disabled by sc92031_suspend */
-       spin_lock_bh(&priv->lock);
-
-       _sc92031_reset(dev);
-       mmiowb();
-
-       spin_unlock_bh(&priv->lock);
-       sc92031_enable_interrupts(dev);
-
-       netif_device_attach(dev);
-
-       if (netif_carrier_ok(dev))
-               netif_wake_queue(dev);
-       else
-               netif_tx_disable(dev);
-
-out:
-       return 0;
-}
-
-static DEFINE_PCI_DEVICE_TABLE(sc92031_pci_device_id_table) = {
-       { PCI_DEVICE(PCI_VENDOR_ID_SILAN, 0x2031) },
-       { PCI_DEVICE(PCI_VENDOR_ID_SILAN, 0x8139) },
-       { PCI_DEVICE(0x1088, 0x2031) },
-       { 0, }
-};
-MODULE_DEVICE_TABLE(pci, sc92031_pci_device_id_table);
-
-static struct pci_driver sc92031_pci_driver = {
-       .name           = SC92031_NAME,
-       .id_table       = sc92031_pci_device_id_table,
-       .probe          = sc92031_probe,
-       .remove         = __devexit_p(sc92031_remove),
-       .suspend        = sc92031_suspend,
-       .resume         = sc92031_resume,
-};
-
-static int __init sc92031_init(void)
-{
-       return pci_register_driver(&sc92031_pci_driver);
-}
-
-static void __exit sc92031_exit(void)
-{
-       pci_unregister_driver(&sc92031_pci_driver);
-}
-
-module_init(sc92031_init);
-module_exit(sc92031_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Cesar Eduardo Barros <cesarb@cesarb.net>");
-MODULE_DESCRIPTION("Silan SC92031 PCI Fast Ethernet Adapter driver");
diff --git a/drivers/net/ethernet/silan/Kconfig b/drivers/net/ethernet/silan/Kconfig
new file mode 100644 (file)
index 0000000..ae1ce17
--- /dev/null
@@ -0,0 +1,33 @@
+#
+# Silan device configuration
+#
+
+config NET_VENDOR_SILAN
+       bool "Silan devices"
+       default y
+       depends on PCI && EXPERIMENTAL
+       ---help---
+         If you have a network (Ethernet) card belonging to this class, say Y
+         and read the Ethernet-HOWTO, available from
+         <http://www.tldp.org/docs.html#howto>.
+
+         Note that the answer to this question doesn't directly affect the
+         kernel: saying N will just cause the configurator to skip all
+         the questions about Silan devices. If you say Y, you will be asked for
+         your specific card in the following questions.
+
+if NET_VENDOR_SILAN
+
+config SC92031
+       tristate "Silan SC92031 PCI Fast Ethernet Adapter driver (EXPERIMENTAL)"
+       depends on PCI && EXPERIMENTAL
+       select CRC32
+       ---help---
+         This is a driver for the Fast Ethernet PCI network cards based on
+         the Silan SC92031 chip (sometimes also called Rsltek 8139D). If you
+         have one of these, say Y here.
+
+         To compile this driver as a module, choose M here: the module
+         will be called sc92031.  This is recommended.
+
+endif # NET_VENDOR_SILAN
diff --git a/drivers/net/ethernet/silan/Makefile b/drivers/net/ethernet/silan/Makefile
new file mode 100644 (file)
index 0000000..4ad3523
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Makefile for the Silan network device drivers.
+#
+
+obj-$(CONFIG_SC92031) += sc92031.o
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
new file mode 100644 (file)
index 0000000..a284d64
--- /dev/null
@@ -0,0 +1,1609 @@
+/*  Silan SC92031 PCI Fast Ethernet Adapter driver
+ *
+ *  Based on vendor drivers:
+ *  Silan Fast Ethernet Netcard Driver:
+ *    MODULE_AUTHOR ("gaoyonghong");
+ *    MODULE_DESCRIPTION ("SILAN Fast Ethernet driver");
+ *    MODULE_LICENSE("GPL");
+ *  8139D Fast Ethernet driver:
+ *    (C) 2002 by gaoyonghong
+ *    MODULE_AUTHOR ("gaoyonghong");
+ *    MODULE_DESCRIPTION ("Rsltek 8139D PCI Fast Ethernet Adapter driver");
+ *    MODULE_LICENSE("GPL");
+ *  Both are almost identical and seem to be based on pci-skeleton.c
+ *
+ *  Rewritten for 2.6 by Cesar Eduardo Barros
+ *
+ *  A datasheet for this chip can be found at
+ *  http://www.silan.com.cn/english/product/pdf/SC92031AY.pdf 
+ */
+
+/* Note about set_mac_address: I don't know how to change the hardware
+ * matching, so you need to enable IFF_PROMISC when using it.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/crc32.h>
+
+#include <asm/irq.h>
+
+#define SC92031_NAME "sc92031"
+
+/* BAR 0 is MMIO, BAR 1 is PIO */
+#ifndef SC92031_USE_BAR
+#define SC92031_USE_BAR 0
+#endif
+
+/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). */
+static int multicast_filter_limit = 64;
+module_param(multicast_filter_limit, int, 0);
+MODULE_PARM_DESC(multicast_filter_limit,
+       "Maximum number of filtered multicast addresses");
+
+static int media;
+module_param(media, int, 0);
+MODULE_PARM_DESC(media, "Media type (0x00 = autodetect,"
+       " 0x01 = 10M half, 0x02 = 10M full,"
+       " 0x04 = 100M half, 0x08 = 100M full)");
+
+/* Size of the in-memory receive ring. */
+#define  RX_BUF_LEN_IDX  3 /* 0==8K, 1==16K, 2==32K, 3==64K ,4==128K*/
+#define  RX_BUF_LEN    (8192 << RX_BUF_LEN_IDX)
+
+/* Number of Tx descriptor registers. */
+#define  NUM_TX_DESC      4
+
+/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/
+#define  MAX_ETH_FRAME_SIZE      1536
+
+/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */
+#define  TX_BUF_SIZE       MAX_ETH_FRAME_SIZE
+#define  TX_BUF_TOT_LEN    (TX_BUF_SIZE * NUM_TX_DESC)
+
+/* The following settings are log_2(bytes)-4:  0 == 16 bytes .. 6==1024, 7==end of packet. */
+#define  RX_FIFO_THRESH    7     /* Rx buffer level before first PCI xfer.  */
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define  TX_TIMEOUT     (4*HZ)
+
+#define  SILAN_STATS_NUM    2    /* number of ETHTOOL_GSTATS */
+
+/* media options */
+#define  AUTOSELECT    0x00
+#define  M10_HALF      0x01
+#define  M10_FULL      0x02
+#define  M100_HALF     0x04
+#define  M100_FULL     0x08
+
+ /* Symbolic offsets to registers. */
+enum  silan_registers {
+   Config0    = 0x00,         // Config0
+   Config1    = 0x04,         // Config1
+   RxBufWPtr  = 0x08,         // Rx buffer writer poiter
+   IntrStatus = 0x0C,         // Interrupt status
+   IntrMask   = 0x10,         // Interrupt mask
+   RxbufAddr  = 0x14,         // Rx buffer start address
+   RxBufRPtr  = 0x18,         // Rx buffer read pointer
+   Txstatusall = 0x1C,        // Transmit status of all descriptors
+   TxStatus0  = 0x20,        // Transmit status (Four 32bit registers).
+   TxAddr0    = 0x30,         // Tx descriptors (also four 32bit).
+   RxConfig   = 0x40,         // Rx configuration
+   MAC0              = 0x44,         // Ethernet hardware address.
+   MAR0              = 0x4C,         // Multicast filter.
+   RxStatus0  = 0x54,         // Rx status
+   TxConfig   = 0x5C,         // Tx configuration
+   PhyCtrl    = 0x60,         // physical control
+   FlowCtrlConfig = 0x64,     // flow control
+   Miicmd0    = 0x68,         // Mii command0 register
+   Miicmd1    = 0x6C,         // Mii command1 register
+   Miistatus  = 0x70,         // Mii status register
+   Timercnt   = 0x74,         // Timer counter register
+   TimerIntr  = 0x78,         // Timer interrupt register
+   PMConfig   = 0x7C,         // Power Manager configuration
+   CRC0       = 0x80,         // Power Manager CRC ( Two 32bit regisers)
+   Wakeup0    = 0x88,         // power Manager wakeup( Eight 64bit regiser)
+   LSBCRC0    = 0xC8,         // power Manager LSBCRC(Two 32bit regiser)
+   TestD0     = 0xD0,
+   TestD4     = 0xD4,
+   TestD8     = 0xD8,
+};
+
+#define MII_JAB             16
+#define MII_OutputStatus    24
+
+#define PHY_16_JAB_ENB      0x1000
+#define PHY_16_PORT_ENB     0x1
+
+enum IntrStatusBits {
+   LinkFail       = 0x80000000,
+   LinkOK         = 0x40000000,
+   TimeOut        = 0x20000000,
+   RxOverflow     = 0x0040,
+   RxOK           = 0x0020,
+   TxOK           = 0x0001,
+   IntrBits = LinkFail|LinkOK|TimeOut|RxOverflow|RxOK|TxOK,
+};
+
+enum TxStatusBits {
+   TxCarrierLost = 0x20000000,
+   TxAborted     = 0x10000000,
+   TxOutOfWindow = 0x08000000,
+   TxNccShift    = 22,
+   EarlyTxThresShift = 16,
+   TxStatOK      = 0x8000,
+   TxUnderrun    = 0x4000,
+   TxOwn         = 0x2000,
+};
+
+enum RxStatusBits {
+   RxStatesOK   = 0x80000,
+   RxBadAlign   = 0x40000,
+   RxHugeFrame  = 0x20000,
+   RxSmallFrame = 0x10000,
+   RxCRCOK      = 0x8000,
+   RxCrlFrame   = 0x4000,
+   Rx_Broadcast = 0x2000,
+   Rx_Multicast = 0x1000,
+   RxAddrMatch  = 0x0800,
+   MiiErr       = 0x0400,
+};
+
+enum RxConfigBits {
+   RxFullDx    = 0x80000000,
+   RxEnb       = 0x40000000,
+   RxSmall     = 0x20000000,
+   RxHuge      = 0x10000000,
+   RxErr       = 0x08000000,
+   RxAllphys   = 0x04000000,
+   RxMulticast = 0x02000000,
+   RxBroadcast = 0x01000000,
+   RxLoopBack  = (1 << 23) | (1 << 22),
+   LowThresholdShift  = 12,
+   HighThresholdShift = 2,
+};
+
+enum TxConfigBits {
+   TxFullDx       = 0x80000000,
+   TxEnb          = 0x40000000,
+   TxEnbPad       = 0x20000000,
+   TxEnbHuge      = 0x10000000,
+   TxEnbFCS       = 0x08000000,
+   TxNoBackOff    = 0x04000000,
+   TxEnbPrem      = 0x02000000,
+   TxCareLostCrs  = 0x1000000,
+   TxExdCollNum   = 0xf00000,
+   TxDataRate     = 0x80000,
+};
+
+enum PhyCtrlconfigbits {
+   PhyCtrlAne         = 0x80000000,
+   PhyCtrlSpd100      = 0x40000000,
+   PhyCtrlSpd10       = 0x20000000,
+   PhyCtrlPhyBaseAddr = 0x1f000000,
+   PhyCtrlDux         = 0x800000,
+   PhyCtrlReset       = 0x400000,
+};
+
+enum FlowCtrlConfigBits {
+   FlowCtrlFullDX = 0x80000000,
+   FlowCtrlEnb    = 0x40000000,
+};
+
+enum Config0Bits {
+   Cfg0_Reset  = 0x80000000,
+   Cfg0_Anaoff = 0x40000000,
+   Cfg0_LDPS   = 0x20000000,
+};
+
+enum Config1Bits {
+   Cfg1_EarlyRx = 1 << 31,
+   Cfg1_EarlyTx = 1 << 30,
+
+   //rx buffer size
+   Cfg1_Rcv8K   = 0x0,
+   Cfg1_Rcv16K  = 0x1,
+   Cfg1_Rcv32K  = 0x3,
+   Cfg1_Rcv64K  = 0x7,
+   Cfg1_Rcv128K = 0xf,
+};
+
+enum MiiCmd0Bits {
+   Mii_Divider = 0x20000000,
+   Mii_WRITE   = 0x400000,
+   Mii_READ    = 0x200000,
+   Mii_SCAN    = 0x100000,
+   Mii_Tamod   = 0x80000,
+   Mii_Drvmod  = 0x40000,
+   Mii_mdc     = 0x20000,
+   Mii_mdoen   = 0x10000,
+   Mii_mdo     = 0x8000,
+   Mii_mdi     = 0x4000,
+};
+
+enum MiiStatusBits {
+    Mii_StatusBusy = 0x80000000,
+};
+
+enum PMConfigBits {
+   PM_Enable  = 1 << 31,
+   PM_LongWF  = 1 << 30,
+   PM_Magic   = 1 << 29,
+   PM_LANWake = 1 << 28,
+   PM_LWPTN   = (1 << 27 | 1<< 26),
+   PM_LinkUp  = 1 << 25,
+   PM_WakeUp  = 1 << 24,
+};
+
+/* Locking rules:
+ * priv->lock protects most of the fields of priv and most of the
+ * hardware registers. It does not have to protect against softirqs
+ * between sc92031_disable_interrupts and sc92031_enable_interrupts;
+ * it also does not need to be used in ->open and ->stop while the
+ * device interrupts are off.
+ * Not having to protect against softirqs is very useful due to heavy
+ * use of mdelay() at _sc92031_reset.
+ * Functions prefixed with _sc92031_ must be called with the lock held;
+ * functions prefixed with sc92031_ must be called without the lock held.
+ * Use mmiowb() before unlocking if the hardware was written to.
+ */
+
+/* Locking rules for the interrupt:
+ * - the interrupt and the tasklet never run at the same time
+ * - neither run between sc92031_disable_interrupts and
+ *   sc92031_enable_interrupt
+ */
+
+struct sc92031_priv {
+       spinlock_t              lock;
+       /* iomap.h cookie */
+       void __iomem            *port_base;
+       /* pci device structure */
+       struct pci_dev          *pdev;
+       /* tasklet */
+       struct tasklet_struct   tasklet;
+
+       /* CPU address of rx ring */
+       void                    *rx_ring;
+       /* PCI address of rx ring */
+       dma_addr_t              rx_ring_dma_addr;
+       /* PCI address of rx ring read pointer */
+       dma_addr_t              rx_ring_tail;
+
+       /* tx ring write index */
+       unsigned                tx_head;
+       /* tx ring read index */
+       unsigned                tx_tail;
+       /* CPU address of tx bounce buffer */
+       void                    *tx_bufs;
+       /* PCI address of tx bounce buffer */
+       dma_addr_t              tx_bufs_dma_addr;
+
+       /* copies of some hardware registers */
+       u32                     intr_status;
+       atomic_t                intr_mask;
+       u32                     rx_config;
+       u32                     tx_config;
+       u32                     pm_config;
+
+       /* copy of some flags from dev->flags */
+       unsigned int            mc_flags;
+
+       /* for ETHTOOL_GSTATS */
+       u64                     tx_timeouts;
+       u64                     rx_loss;
+
+       /* for dev->get_stats */
+       long                    rx_value;
+};
+
+/* I don't know which registers can be safely read; however, I can guess
+ * MAC0 is one of them. */
+static inline void _sc92031_dummy_read(void __iomem *port_base)
+{
+       ioread32(port_base + MAC0);
+}
+
+static u32 _sc92031_mii_wait(void __iomem *port_base)
+{
+       u32 mii_status;
+
+       do {
+               udelay(10);
+               mii_status = ioread32(port_base + Miistatus);
+       } while (mii_status & Mii_StatusBusy);
+
+       return mii_status;
+}
+
+static u32 _sc92031_mii_cmd(void __iomem *port_base, u32 cmd0, u32 cmd1)
+{
+       iowrite32(Mii_Divider, port_base + Miicmd0);
+
+       _sc92031_mii_wait(port_base);
+
+       iowrite32(cmd1, port_base + Miicmd1);
+       iowrite32(Mii_Divider | cmd0, port_base + Miicmd0);
+
+       return _sc92031_mii_wait(port_base);
+}
+
+static void _sc92031_mii_scan(void __iomem *port_base)
+{
+       _sc92031_mii_cmd(port_base, Mii_SCAN, 0x1 << 6);
+}
+
+static u16 _sc92031_mii_read(void __iomem *port_base, unsigned reg)
+{
+       return _sc92031_mii_cmd(port_base, Mii_READ, reg << 6) >> 13;
+}
+
+static void _sc92031_mii_write(void __iomem *port_base, unsigned reg, u16 val)
+{
+       _sc92031_mii_cmd(port_base, Mii_WRITE, (reg << 6) | ((u32)val << 11));
+}
+
+static void sc92031_disable_interrupts(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       /* tell the tasklet/interrupt not to enable interrupts */
+       atomic_set(&priv->intr_mask, 0);
+       wmb();
+
+       /* stop interrupts */
+       iowrite32(0, port_base + IntrMask);
+       _sc92031_dummy_read(port_base);
+       mmiowb();
+
+       /* wait for any concurrent interrupt/tasklet to finish */
+       synchronize_irq(dev->irq);
+       tasklet_disable(&priv->tasklet);
+}
+
+static void sc92031_enable_interrupts(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       tasklet_enable(&priv->tasklet);
+
+       atomic_set(&priv->intr_mask, IntrBits);
+       wmb();
+
+       iowrite32(IntrBits, port_base + IntrMask);
+       mmiowb();
+}
+
+static void _sc92031_disable_tx_rx(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       priv->rx_config &= ~RxEnb;
+       priv->tx_config &= ~TxEnb;
+       iowrite32(priv->rx_config, port_base + RxConfig);
+       iowrite32(priv->tx_config, port_base + TxConfig);
+}
+
+static void _sc92031_enable_tx_rx(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       priv->rx_config |= RxEnb;
+       priv->tx_config |= TxEnb;
+       iowrite32(priv->rx_config, port_base + RxConfig);
+       iowrite32(priv->tx_config, port_base + TxConfig);
+}
+
+static void _sc92031_tx_clear(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+
+       while (priv->tx_head - priv->tx_tail > 0) {
+               priv->tx_tail++;
+               dev->stats.tx_dropped++;
+       }
+       priv->tx_head = priv->tx_tail = 0;
+}
+
+static void _sc92031_set_mar(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 mar0 = 0, mar1 = 0;
+
+       if ((dev->flags & IFF_PROMISC) ||
+           netdev_mc_count(dev) > multicast_filter_limit ||
+           (dev->flags & IFF_ALLMULTI))
+               mar0 = mar1 = 0xffffffff;
+       else if (dev->flags & IFF_MULTICAST) {
+               struct netdev_hw_addr *ha;
+
+               netdev_for_each_mc_addr(ha, dev) {
+                       u32 crc;
+                       unsigned bit = 0;
+
+                       crc = ~ether_crc(ETH_ALEN, ha->addr);
+                       crc >>= 24;
+
+                       if (crc & 0x01) bit |= 0x02;
+                       if (crc & 0x02) bit |= 0x01;
+                       if (crc & 0x10) bit |= 0x20;
+                       if (crc & 0x20) bit |= 0x10;
+                       if (crc & 0x40) bit |= 0x08;
+                       if (crc & 0x80) bit |= 0x04;
+
+                       if (bit > 31)
+                               mar0 |= 0x1 << (bit - 32);
+                       else
+                               mar1 |= 0x1 << bit;
+               }
+       }
+
+       iowrite32(mar0, port_base + MAR0);
+       iowrite32(mar1, port_base + MAR0 + 4);
+}
+
+static void _sc92031_set_rx_config(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       unsigned int old_mc_flags;
+       u32 rx_config_bits = 0;
+
+       old_mc_flags = priv->mc_flags;
+
+       if (dev->flags & IFF_PROMISC)
+               rx_config_bits |= RxSmall | RxHuge | RxErr | RxBroadcast
+                               | RxMulticast | RxAllphys;
+
+       if (dev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
+               rx_config_bits |= RxMulticast;
+
+       if (dev->flags & IFF_BROADCAST)
+               rx_config_bits |= RxBroadcast;
+
+       priv->rx_config &= ~(RxSmall | RxHuge | RxErr | RxBroadcast
+                       | RxMulticast | RxAllphys);
+       priv->rx_config |= rx_config_bits;
+
+       priv->mc_flags = dev->flags & (IFF_PROMISC | IFF_ALLMULTI
+                       | IFF_MULTICAST | IFF_BROADCAST);
+
+       if (netif_carrier_ok(dev) && priv->mc_flags != old_mc_flags)
+               iowrite32(priv->rx_config, port_base + RxConfig);
+}
+
+static bool _sc92031_check_media(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u16 bmsr;
+
+       bmsr = _sc92031_mii_read(port_base, MII_BMSR);
+       rmb();
+       if (bmsr & BMSR_LSTATUS) {
+               bool speed_100, duplex_full;
+               u32 flow_ctrl_config = 0;
+               u16 output_status = _sc92031_mii_read(port_base,
+                               MII_OutputStatus);
+               _sc92031_mii_scan(port_base);
+
+               speed_100 = output_status & 0x2;
+               duplex_full = output_status & 0x4;
+
+               /* Initial Tx/Rx configuration */
+               priv->rx_config = (0x40 << LowThresholdShift) | (0x1c0 << HighThresholdShift);
+               priv->tx_config = 0x48800000;
+
+               /* NOTE: vendor driver had dead code here to enable tx padding */
+
+               if (!speed_100)
+                       priv->tx_config |= 0x80000;
+
+               // configure rx mode
+               _sc92031_set_rx_config(dev);
+
+               if (duplex_full) {
+                       priv->rx_config |= RxFullDx;
+                       priv->tx_config |= TxFullDx;
+                       flow_ctrl_config = FlowCtrlFullDX | FlowCtrlEnb;
+               } else {
+                       priv->rx_config &= ~RxFullDx;
+                       priv->tx_config &= ~TxFullDx;
+               }
+
+               _sc92031_set_mar(dev);
+               _sc92031_set_rx_config(dev);
+               _sc92031_enable_tx_rx(dev);
+               iowrite32(flow_ctrl_config, port_base + FlowCtrlConfig);
+
+               netif_carrier_on(dev);
+
+               if (printk_ratelimit())
+                       printk(KERN_INFO "%s: link up, %sMbps, %s-duplex\n",
+                               dev->name,
+                               speed_100 ? "100" : "10",
+                               duplex_full ? "full" : "half");
+               return true;
+       } else {
+               _sc92031_mii_scan(port_base);
+
+               netif_carrier_off(dev);
+
+               _sc92031_disable_tx_rx(dev);
+
+               if (printk_ratelimit())
+                       printk(KERN_INFO "%s: link down\n", dev->name);
+               return false;
+       }
+}
+
+static void _sc92031_phy_reset(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 phy_ctrl;
+
+       phy_ctrl = ioread32(port_base + PhyCtrl);
+       phy_ctrl &= ~(PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10);
+       phy_ctrl |= PhyCtrlAne | PhyCtrlReset;
+
+       switch (media) {
+       default:
+       case AUTOSELECT:
+               phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10;
+               break;
+       case M10_HALF:
+               phy_ctrl |= PhyCtrlSpd10;
+               break;
+       case M10_FULL:
+               phy_ctrl |= PhyCtrlDux | PhyCtrlSpd10;
+               break;
+       case M100_HALF:
+               phy_ctrl |= PhyCtrlSpd100;
+               break;
+       case M100_FULL:
+               phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100;
+               break;
+       }
+
+       iowrite32(phy_ctrl, port_base + PhyCtrl);
+       mdelay(10);
+
+       phy_ctrl &= ~PhyCtrlReset;
+       iowrite32(phy_ctrl, port_base + PhyCtrl);
+       mdelay(1);
+
+       _sc92031_mii_write(port_base, MII_JAB,
+                       PHY_16_JAB_ENB | PHY_16_PORT_ENB);
+       _sc92031_mii_scan(port_base);
+
+       netif_carrier_off(dev);
+       netif_stop_queue(dev);
+}
+
+static void _sc92031_reset(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       /* disable PM */
+       iowrite32(0, port_base + PMConfig);
+
+       /* soft reset the chip */
+       iowrite32(Cfg0_Reset, port_base + Config0);
+       mdelay(200);
+
+       iowrite32(0, port_base + Config0);
+       mdelay(10);
+
+       /* disable interrupts */
+       iowrite32(0, port_base + IntrMask);
+
+       /* clear multicast address */
+       iowrite32(0, port_base + MAR0);
+       iowrite32(0, port_base + MAR0 + 4);
+
+       /* init rx ring */
+       iowrite32(priv->rx_ring_dma_addr, port_base + RxbufAddr);
+       priv->rx_ring_tail = priv->rx_ring_dma_addr;
+
+       /* init tx ring */
+       _sc92031_tx_clear(dev);
+
+       /* clear old register values */
+       priv->intr_status = 0;
+       atomic_set(&priv->intr_mask, 0);
+       priv->rx_config = 0;
+       priv->tx_config = 0;
+       priv->mc_flags = 0;
+
+       /* configure rx buffer size */
+       /* NOTE: vendor driver had dead code here to enable early tx/rx */
+       iowrite32(Cfg1_Rcv64K, port_base + Config1);
+
+       _sc92031_phy_reset(dev);
+       _sc92031_check_media(dev);
+
+       /* calculate rx fifo overflow */
+       priv->rx_value = 0;
+
+       /* enable PM */
+       iowrite32(priv->pm_config, port_base + PMConfig);
+
+       /* clear intr register */
+       ioread32(port_base + IntrStatus);
+}
+
+static void _sc92031_tx_tasklet(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       unsigned old_tx_tail;
+       unsigned entry;
+       u32 tx_status;
+
+       old_tx_tail = priv->tx_tail;
+       while (priv->tx_head - priv->tx_tail > 0) {
+               entry = priv->tx_tail % NUM_TX_DESC;
+               tx_status = ioread32(port_base + TxStatus0 + entry * 4);
+
+               if (!(tx_status & (TxStatOK | TxUnderrun | TxAborted)))
+                       break;
+
+               priv->tx_tail++;
+
+               if (tx_status & TxStatOK) {
+                       dev->stats.tx_bytes += tx_status & 0x1fff;
+                       dev->stats.tx_packets++;
+                       /* Note: TxCarrierLost is always asserted at 100mbps. */
+                       dev->stats.collisions += (tx_status >> 22) & 0xf;
+               }
+
+               if (tx_status & (TxOutOfWindow | TxAborted)) {
+                       dev->stats.tx_errors++;
+
+                       if (tx_status & TxAborted)
+                               dev->stats.tx_aborted_errors++;
+
+                       if (tx_status & TxCarrierLost)
+                               dev->stats.tx_carrier_errors++;
+
+                       if (tx_status & TxOutOfWindow)
+                               dev->stats.tx_window_errors++;
+               }
+
+               if (tx_status & TxUnderrun)
+                       dev->stats.tx_fifo_errors++;
+       }
+
+       if (priv->tx_tail != old_tx_tail)
+               if (netif_queue_stopped(dev))
+                       netif_wake_queue(dev);
+}
+
+static void _sc92031_rx_tasklet_error(struct net_device *dev,
+                                     u32 rx_status, unsigned rx_size)
+{
+       if(rx_size > (MAX_ETH_FRAME_SIZE + 4) || rx_size < 16) {
+               dev->stats.rx_errors++;
+               dev->stats.rx_length_errors++;
+       }
+
+       if (!(rx_status & RxStatesOK)) {
+               dev->stats.rx_errors++;
+
+               if (rx_status & (RxHugeFrame | RxSmallFrame))
+                       dev->stats.rx_length_errors++;
+
+               if (rx_status & RxBadAlign)
+                       dev->stats.rx_frame_errors++;
+
+               if (!(rx_status & RxCRCOK))
+                       dev->stats.rx_crc_errors++;
+       } else {
+               struct sc92031_priv *priv = netdev_priv(dev);
+               priv->rx_loss++;
+       }
+}
+
+static void _sc92031_rx_tasklet(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       dma_addr_t rx_ring_head;
+       unsigned rx_len;
+       unsigned rx_ring_offset;
+       void *rx_ring = priv->rx_ring;
+
+       rx_ring_head = ioread32(port_base + RxBufWPtr);
+       rmb();
+
+       /* rx_ring_head is only 17 bits in the RxBufWPtr register.
+        * we need to change it to 32 bits physical address
+        */
+       rx_ring_head &= (dma_addr_t)(RX_BUF_LEN - 1);
+       rx_ring_head |= priv->rx_ring_dma_addr & ~(dma_addr_t)(RX_BUF_LEN - 1);
+       if (rx_ring_head < priv->rx_ring_dma_addr)
+               rx_ring_head += RX_BUF_LEN;
+
+       if (rx_ring_head >= priv->rx_ring_tail)
+               rx_len = rx_ring_head - priv->rx_ring_tail;
+       else
+               rx_len = RX_BUF_LEN - (priv->rx_ring_tail - rx_ring_head);
+
+       if (!rx_len)
+               return;
+
+       if (unlikely(rx_len > RX_BUF_LEN)) {
+               if (printk_ratelimit())
+                       printk(KERN_ERR "%s: rx packets length > rx buffer\n",
+                                       dev->name);
+               return;
+       }
+
+       rx_ring_offset = (priv->rx_ring_tail - priv->rx_ring_dma_addr) % RX_BUF_LEN;
+
+       while (rx_len) {
+               u32 rx_status;
+               unsigned rx_size, rx_size_align, pkt_size;
+               struct sk_buff *skb;
+
+               rx_status = le32_to_cpup((__le32 *)(rx_ring + rx_ring_offset));
+               rmb();
+
+               rx_size = rx_status >> 20;
+               rx_size_align = (rx_size + 3) & ~3;     // for 4 bytes aligned
+               pkt_size = rx_size - 4; // Omit the four octet CRC from the length.
+
+               rx_ring_offset = (rx_ring_offset + 4) % RX_BUF_LEN;
+
+               if (unlikely(rx_status == 0 ||
+                            rx_size > (MAX_ETH_FRAME_SIZE + 4) ||
+                            rx_size < 16 ||
+                            !(rx_status & RxStatesOK))) {
+                       _sc92031_rx_tasklet_error(dev, rx_status, rx_size);
+                       break;
+               }
+
+               if (unlikely(rx_size_align + 4 > rx_len)) {
+                       if (printk_ratelimit())
+                               printk(KERN_ERR "%s: rx_len is too small\n", dev->name);
+                       break;
+               }
+
+               rx_len -= rx_size_align + 4;
+
+               skb = netdev_alloc_skb_ip_align(dev, pkt_size);
+               if (unlikely(!skb)) {
+                       if (printk_ratelimit())
+                               printk(KERN_ERR "%s: Couldn't allocate a skb_buff for a packet of size %u\n",
+                                               dev->name, pkt_size);
+                       goto next;
+               }
+
+               if ((rx_ring_offset + pkt_size) > RX_BUF_LEN) {
+                       memcpy(skb_put(skb, RX_BUF_LEN - rx_ring_offset),
+                               rx_ring + rx_ring_offset, RX_BUF_LEN - rx_ring_offset);
+                       memcpy(skb_put(skb, pkt_size - (RX_BUF_LEN - rx_ring_offset)),
+                               rx_ring, pkt_size - (RX_BUF_LEN - rx_ring_offset));
+               } else {
+                       memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
+               }
+
+               skb->protocol = eth_type_trans(skb, dev);
+               netif_rx(skb);
+
+               dev->stats.rx_bytes += pkt_size;
+               dev->stats.rx_packets++;
+
+               if (rx_status & Rx_Multicast)
+                       dev->stats.multicast++;
+
+       next:
+               rx_ring_offset = (rx_ring_offset + rx_size_align) % RX_BUF_LEN;
+       }
+       mb();
+
+       priv->rx_ring_tail = rx_ring_head;
+       iowrite32(priv->rx_ring_tail, port_base + RxBufRPtr);
+}
+
+static void _sc92031_link_tasklet(struct net_device *dev)
+{
+       if (_sc92031_check_media(dev))
+               netif_wake_queue(dev);
+       else {
+               netif_stop_queue(dev);
+               dev->stats.tx_carrier_errors++;
+       }
+}
+
+static void sc92031_tasklet(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 intr_status, intr_mask;
+
+       intr_status = priv->intr_status;
+
+       spin_lock(&priv->lock);
+
+       if (unlikely(!netif_running(dev)))
+               goto out;
+
+       if (intr_status & TxOK)
+               _sc92031_tx_tasklet(dev);
+
+       if (intr_status & RxOK)
+               _sc92031_rx_tasklet(dev);
+
+       if (intr_status & RxOverflow)
+               dev->stats.rx_errors++;
+
+       if (intr_status & TimeOut) {
+               dev->stats.rx_errors++;
+               dev->stats.rx_length_errors++;
+       }
+
+       if (intr_status & (LinkFail | LinkOK))
+               _sc92031_link_tasklet(dev);
+
+out:
+       intr_mask = atomic_read(&priv->intr_mask);
+       rmb();
+
+       iowrite32(intr_mask, port_base + IntrMask);
+       mmiowb();
+
+       spin_unlock(&priv->lock);
+}
+
+static irqreturn_t sc92031_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = dev_id;
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 intr_status, intr_mask;
+
+       /* mask interrupts before clearing IntrStatus */
+       iowrite32(0, port_base + IntrMask);
+       _sc92031_dummy_read(port_base);
+
+       intr_status = ioread32(port_base + IntrStatus);
+       if (unlikely(intr_status == 0xffffffff))
+               return IRQ_NONE;        // hardware has gone missing
+
+       intr_status &= IntrBits;
+       if (!intr_status)
+               goto out_none;
+
+       priv->intr_status = intr_status;
+       tasklet_schedule(&priv->tasklet);
+
+       return IRQ_HANDLED;
+
+out_none:
+       intr_mask = atomic_read(&priv->intr_mask);
+       rmb();
+
+       iowrite32(intr_mask, port_base + IntrMask);
+       mmiowb();
+
+       return IRQ_NONE;
+}
+
+static struct net_device_stats *sc92031_get_stats(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+
+       // FIXME I do not understand what is this trying to do.
+       if (netif_running(dev)) {
+               int temp;
+
+               spin_lock_bh(&priv->lock);
+
+               /* Update the error count. */
+               temp = (ioread32(port_base + RxStatus0) >> 16) & 0xffff;
+
+               if (temp == 0xffff) {
+                       priv->rx_value += temp;
+                       dev->stats.rx_fifo_errors = priv->rx_value;
+               } else
+                       dev->stats.rx_fifo_errors = temp + priv->rx_value;
+
+               spin_unlock_bh(&priv->lock);
+       }
+
+       return &dev->stats;
+}
+
+static netdev_tx_t sc92031_start_xmit(struct sk_buff *skb,
+                                     struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       unsigned len;
+       unsigned entry;
+       u32 tx_status;
+
+       if (unlikely(skb->len > TX_BUF_SIZE)) {
+               dev->stats.tx_dropped++;
+               goto out;
+       }
+
+       spin_lock(&priv->lock);
+
+       if (unlikely(!netif_carrier_ok(dev))) {
+               dev->stats.tx_dropped++;
+               goto out_unlock;
+       }
+
+       BUG_ON(priv->tx_head - priv->tx_tail >= NUM_TX_DESC);
+
+       entry = priv->tx_head++ % NUM_TX_DESC;
+
+       skb_copy_and_csum_dev(skb, priv->tx_bufs + entry * TX_BUF_SIZE);
+
+       len = skb->len;
+       if (len < ETH_ZLEN) {
+               memset(priv->tx_bufs + entry * TX_BUF_SIZE + len,
+                               0, ETH_ZLEN - len);
+               len = ETH_ZLEN;
+       }
+
+       wmb();
+
+       if (len < 100)
+               tx_status = len;
+       else if (len < 300)
+               tx_status = 0x30000 | len;
+       else
+               tx_status = 0x50000 | len;
+
+       iowrite32(priv->tx_bufs_dma_addr + entry * TX_BUF_SIZE,
+                       port_base + TxAddr0 + entry * 4);
+       iowrite32(tx_status, port_base + TxStatus0 + entry * 4);
+       mmiowb();
+
+       if (priv->tx_head - priv->tx_tail >= NUM_TX_DESC)
+               netif_stop_queue(dev);
+
+out_unlock:
+       spin_unlock(&priv->lock);
+
+out:
+       dev_kfree_skb(skb);
+
+       return NETDEV_TX_OK;
+}
+
+static int sc92031_open(struct net_device *dev)
+{
+       int err;
+       struct sc92031_priv *priv = netdev_priv(dev);
+       struct pci_dev *pdev = priv->pdev;
+
+       priv->rx_ring = pci_alloc_consistent(pdev, RX_BUF_LEN,
+                       &priv->rx_ring_dma_addr);
+       if (unlikely(!priv->rx_ring)) {
+               err = -ENOMEM;
+               goto out_alloc_rx_ring;
+       }
+
+       priv->tx_bufs = pci_alloc_consistent(pdev, TX_BUF_TOT_LEN,
+                       &priv->tx_bufs_dma_addr);
+       if (unlikely(!priv->tx_bufs)) {
+               err = -ENOMEM;
+               goto out_alloc_tx_bufs;
+       }
+       priv->tx_head = priv->tx_tail = 0;
+
+       err = request_irq(pdev->irq, sc92031_interrupt,
+                       IRQF_SHARED, dev->name, dev);
+       if (unlikely(err < 0))
+               goto out_request_irq;
+
+       priv->pm_config = 0;
+
+       /* Interrupts already disabled by sc92031_stop or sc92031_probe */
+       spin_lock_bh(&priv->lock);
+
+       _sc92031_reset(dev);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+       sc92031_enable_interrupts(dev);
+
+       if (netif_carrier_ok(dev))
+               netif_start_queue(dev);
+       else
+               netif_tx_disable(dev);
+
+       return 0;
+
+out_request_irq:
+       pci_free_consistent(pdev, TX_BUF_TOT_LEN, priv->tx_bufs,
+                       priv->tx_bufs_dma_addr);
+out_alloc_tx_bufs:
+       pci_free_consistent(pdev, RX_BUF_LEN, priv->rx_ring,
+                       priv->rx_ring_dma_addr);
+out_alloc_rx_ring:
+       return err;
+}
+
+static int sc92031_stop(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       struct pci_dev *pdev = priv->pdev;
+
+       netif_tx_disable(dev);
+
+       /* Disable interrupts, stop Tx and Rx. */
+       sc92031_disable_interrupts(dev);
+
+       spin_lock_bh(&priv->lock);
+
+       _sc92031_disable_tx_rx(dev);
+       _sc92031_tx_clear(dev);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+
+       free_irq(pdev->irq, dev);
+       pci_free_consistent(pdev, TX_BUF_TOT_LEN, priv->tx_bufs,
+                       priv->tx_bufs_dma_addr);
+       pci_free_consistent(pdev, RX_BUF_LEN, priv->rx_ring,
+                       priv->rx_ring_dma_addr);
+
+       return 0;
+}
+
+static void sc92031_set_multicast_list(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+
+       spin_lock_bh(&priv->lock);
+
+       _sc92031_set_mar(dev);
+       _sc92031_set_rx_config(dev);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+}
+
+static void sc92031_tx_timeout(struct net_device *dev)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+
+       /* Disable interrupts by clearing the interrupt mask.*/
+       sc92031_disable_interrupts(dev);
+
+       spin_lock(&priv->lock);
+
+       priv->tx_timeouts++;
+
+       _sc92031_reset(dev);
+       mmiowb();
+
+       spin_unlock(&priv->lock);
+
+       /* enable interrupts */
+       sc92031_enable_interrupts(dev);
+
+       if (netif_carrier_ok(dev))
+               netif_wake_queue(dev);
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void sc92031_poll_controller(struct net_device *dev)
+{
+       disable_irq(dev->irq);
+       if (sc92031_interrupt(dev->irq, dev) != IRQ_NONE)
+               sc92031_tasklet((unsigned long)dev);
+       enable_irq(dev->irq);
+}
+#endif
+
+static int sc92031_ethtool_get_settings(struct net_device *dev,
+               struct ethtool_cmd *cmd)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u8 phy_address;
+       u32 phy_ctrl;
+       u16 output_status;
+
+       spin_lock_bh(&priv->lock);
+
+       phy_address = ioread32(port_base + Miicmd1) >> 27;
+       phy_ctrl = ioread32(port_base + PhyCtrl);
+
+       output_status = _sc92031_mii_read(port_base, MII_OutputStatus);
+       _sc92031_mii_scan(port_base);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+
+       cmd->supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full
+                       | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full
+                       | SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII;
+
+       cmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
+
+       if ((phy_ctrl & (PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10))
+                       == (PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10))
+               cmd->advertising |= ADVERTISED_Autoneg;
+
+       if ((phy_ctrl & PhyCtrlSpd10) == PhyCtrlSpd10)
+               cmd->advertising |= ADVERTISED_10baseT_Half;
+
+       if ((phy_ctrl & (PhyCtrlSpd10 | PhyCtrlDux))
+                       == (PhyCtrlSpd10 | PhyCtrlDux))
+               cmd->advertising |= ADVERTISED_10baseT_Full;
+
+       if ((phy_ctrl & PhyCtrlSpd100) == PhyCtrlSpd100)
+               cmd->advertising |= ADVERTISED_100baseT_Half;
+
+       if ((phy_ctrl & (PhyCtrlSpd100 | PhyCtrlDux))
+                       == (PhyCtrlSpd100 | PhyCtrlDux))
+               cmd->advertising |= ADVERTISED_100baseT_Full;
+
+       if (phy_ctrl & PhyCtrlAne)
+               cmd->advertising |= ADVERTISED_Autoneg;
+
+       ethtool_cmd_speed_set(cmd,
+                             (output_status & 0x2) ? SPEED_100 : SPEED_10);
+       cmd->duplex = (output_status & 0x4) ? DUPLEX_FULL : DUPLEX_HALF;
+       cmd->port = PORT_MII;
+       cmd->phy_address = phy_address;
+       cmd->transceiver = XCVR_INTERNAL;
+       cmd->autoneg = (phy_ctrl & PhyCtrlAne) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+       return 0;
+}
+
+static int sc92031_ethtool_set_settings(struct net_device *dev,
+               struct ethtool_cmd *cmd)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 speed = ethtool_cmd_speed(cmd);
+       u32 phy_ctrl;
+       u32 old_phy_ctrl;
+
+       if (!(speed == SPEED_10 || speed == SPEED_100))
+               return -EINVAL;
+       if (!(cmd->duplex == DUPLEX_HALF || cmd->duplex == DUPLEX_FULL))
+               return -EINVAL;
+       if (!(cmd->port == PORT_MII))
+               return -EINVAL;
+       if (!(cmd->phy_address == 0x1f))
+               return -EINVAL;
+       if (!(cmd->transceiver == XCVR_INTERNAL))
+               return -EINVAL;
+       if (!(cmd->autoneg == AUTONEG_DISABLE || cmd->autoneg == AUTONEG_ENABLE))
+               return -EINVAL;
+
+       if (cmd->autoneg == AUTONEG_ENABLE) {
+               if (!(cmd->advertising & (ADVERTISED_Autoneg
+                               | ADVERTISED_100baseT_Full
+                               | ADVERTISED_100baseT_Half
+                               | ADVERTISED_10baseT_Full
+                               | ADVERTISED_10baseT_Half)))
+                       return -EINVAL;
+
+               phy_ctrl = PhyCtrlAne;
+
+               // FIXME: I'm not sure what the original code was trying to do
+               if (cmd->advertising & ADVERTISED_Autoneg)
+                       phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100 | PhyCtrlSpd10;
+               if (cmd->advertising & ADVERTISED_100baseT_Full)
+                       phy_ctrl |= PhyCtrlDux | PhyCtrlSpd100;
+               if (cmd->advertising & ADVERTISED_100baseT_Half)
+                       phy_ctrl |= PhyCtrlSpd100;
+               if (cmd->advertising & ADVERTISED_10baseT_Full)
+                       phy_ctrl |= PhyCtrlSpd10 | PhyCtrlDux;
+               if (cmd->advertising & ADVERTISED_10baseT_Half)
+                       phy_ctrl |= PhyCtrlSpd10;
+       } else {
+               // FIXME: Whole branch guessed
+               phy_ctrl = 0;
+
+               if (speed == SPEED_10)
+                       phy_ctrl |= PhyCtrlSpd10;
+               else /* cmd->speed == SPEED_100 */
+                       phy_ctrl |= PhyCtrlSpd100;
+
+               if (cmd->duplex == DUPLEX_FULL)
+                       phy_ctrl |= PhyCtrlDux;
+       }
+
+       spin_lock_bh(&priv->lock);
+
+       old_phy_ctrl = ioread32(port_base + PhyCtrl);
+       phy_ctrl |= old_phy_ctrl & ~(PhyCtrlAne | PhyCtrlDux
+                       | PhyCtrlSpd100 | PhyCtrlSpd10);
+       if (phy_ctrl != old_phy_ctrl)
+               iowrite32(phy_ctrl, port_base + PhyCtrl);
+
+       spin_unlock_bh(&priv->lock);
+
+       return 0;
+}
+
+static void sc92031_ethtool_get_wol(struct net_device *dev,
+               struct ethtool_wolinfo *wolinfo)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 pm_config;
+
+       spin_lock_bh(&priv->lock);
+       pm_config = ioread32(port_base + PMConfig);
+       spin_unlock_bh(&priv->lock);
+
+       // FIXME: Guessed
+       wolinfo->supported = WAKE_PHY | WAKE_MAGIC
+                       | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;
+       wolinfo->wolopts = 0;
+
+       if (pm_config & PM_LinkUp)
+               wolinfo->wolopts |= WAKE_PHY;
+
+       if (pm_config & PM_Magic)
+               wolinfo->wolopts |= WAKE_MAGIC;
+
+       if (pm_config & PM_WakeUp)
+               // FIXME: Guessed
+               wolinfo->wolopts |= WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;
+}
+
+static int sc92031_ethtool_set_wol(struct net_device *dev,
+               struct ethtool_wolinfo *wolinfo)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u32 pm_config;
+
+       spin_lock_bh(&priv->lock);
+
+       pm_config = ioread32(port_base + PMConfig)
+                       & ~(PM_LinkUp | PM_Magic | PM_WakeUp);
+
+       if (wolinfo->wolopts & WAKE_PHY)
+               pm_config |= PM_LinkUp;
+
+       if (wolinfo->wolopts & WAKE_MAGIC)
+               pm_config |= PM_Magic;
+
+       // FIXME: Guessed
+       if (wolinfo->wolopts & (WAKE_UCAST | WAKE_MCAST | WAKE_BCAST))
+               pm_config |= PM_WakeUp;
+
+       priv->pm_config = pm_config;
+       iowrite32(pm_config, port_base + PMConfig);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+
+       return 0;
+}
+
+static int sc92031_ethtool_nway_reset(struct net_device *dev)
+{
+       int err = 0;
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem *port_base = priv->port_base;
+       u16 bmcr;
+
+       spin_lock_bh(&priv->lock);
+
+       bmcr = _sc92031_mii_read(port_base, MII_BMCR);
+       if (!(bmcr & BMCR_ANENABLE)) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       _sc92031_mii_write(port_base, MII_BMCR, bmcr | BMCR_ANRESTART);
+
+out:
+       _sc92031_mii_scan(port_base);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+
+       return err;
+}
+
+static const char sc92031_ethtool_stats_strings[SILAN_STATS_NUM][ETH_GSTRING_LEN] = {
+       "tx_timeout",
+       "rx_loss",
+};
+
+static void sc92031_ethtool_get_strings(struct net_device *dev,
+               u32 stringset, u8 *data)
+{
+       if (stringset == ETH_SS_STATS)
+               memcpy(data, sc92031_ethtool_stats_strings,
+                               SILAN_STATS_NUM * ETH_GSTRING_LEN);
+}
+
+static int sc92031_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_STATS:
+               return SILAN_STATS_NUM;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void sc92031_ethtool_get_ethtool_stats(struct net_device *dev,
+               struct ethtool_stats *stats, u64 *data)
+{
+       struct sc92031_priv *priv = netdev_priv(dev);
+
+       spin_lock_bh(&priv->lock);
+       data[0] = priv->tx_timeouts;
+       data[1] = priv->rx_loss;
+       spin_unlock_bh(&priv->lock);
+}
+
+static const struct ethtool_ops sc92031_ethtool_ops = {
+       .get_settings           = sc92031_ethtool_get_settings,
+       .set_settings           = sc92031_ethtool_set_settings,
+       .get_wol                = sc92031_ethtool_get_wol,
+       .set_wol                = sc92031_ethtool_set_wol,
+       .nway_reset             = sc92031_ethtool_nway_reset,
+       .get_link               = ethtool_op_get_link,
+       .get_strings            = sc92031_ethtool_get_strings,
+       .get_sset_count         = sc92031_ethtool_get_sset_count,
+       .get_ethtool_stats      = sc92031_ethtool_get_ethtool_stats,
+};
+
+
+static const struct net_device_ops sc92031_netdev_ops = {
+       .ndo_get_stats          = sc92031_get_stats,
+       .ndo_start_xmit         = sc92031_start_xmit,
+       .ndo_open               = sc92031_open,
+       .ndo_stop               = sc92031_stop,
+       .ndo_set_rx_mode        = sc92031_set_multicast_list,
+       .ndo_change_mtu         = eth_change_mtu,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_tx_timeout         = sc92031_tx_timeout,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = sc92031_poll_controller,
+#endif
+};
+
+static int __devinit sc92031_probe(struct pci_dev *pdev,
+               const struct pci_device_id *id)
+{
+       int err;
+       void __iomem* port_base;
+       struct net_device *dev;
+       struct sc92031_priv *priv;
+       u32 mac0, mac1;
+       unsigned long base_addr;
+
+       err = pci_enable_device(pdev);
+       if (unlikely(err < 0))
+               goto out_enable_device;
+
+       pci_set_master(pdev);
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (unlikely(err < 0))
+               goto out_set_dma_mask;
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (unlikely(err < 0))
+               goto out_set_dma_mask;
+
+       err = pci_request_regions(pdev, SC92031_NAME);
+       if (unlikely(err < 0))
+               goto out_request_regions;
+
+       port_base = pci_iomap(pdev, SC92031_USE_BAR, 0);
+       if (unlikely(!port_base)) {
+               err = -EIO;
+               goto out_iomap;
+       }
+
+       dev = alloc_etherdev(sizeof(struct sc92031_priv));
+       if (unlikely(!dev)) {
+               err = -ENOMEM;
+               goto out_alloc_etherdev;
+       }
+
+       pci_set_drvdata(pdev, dev);
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
+#if SC92031_USE_BAR == 0
+       dev->mem_start = pci_resource_start(pdev, SC92031_USE_BAR);
+       dev->mem_end = pci_resource_end(pdev, SC92031_USE_BAR);
+#elif SC92031_USE_BAR == 1
+       dev->base_addr = pci_resource_start(pdev, SC92031_USE_BAR);
+#endif
+       dev->irq = pdev->irq;
+
+       /* faked with skb_copy_and_csum_dev */
+       dev->features = NETIF_F_SG | NETIF_F_HIGHDMA |
+               NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+       dev->netdev_ops         = &sc92031_netdev_ops;
+       dev->watchdog_timeo     = TX_TIMEOUT;
+       dev->ethtool_ops        = &sc92031_ethtool_ops;
+
+       priv = netdev_priv(dev);
+       spin_lock_init(&priv->lock);
+       priv->port_base = port_base;
+       priv->pdev = pdev;
+       tasklet_init(&priv->tasklet, sc92031_tasklet, (unsigned long)dev);
+       /* Fudge tasklet count so the call to sc92031_enable_interrupts at
+        * sc92031_open will work correctly */
+       tasklet_disable_nosync(&priv->tasklet);
+
+       /* PCI PM Wakeup */
+       iowrite32((~PM_LongWF & ~PM_LWPTN) | PM_Enable, port_base + PMConfig);
+
+       mac0 = ioread32(port_base + MAC0);
+       mac1 = ioread32(port_base + MAC0 + 4);
+       dev->dev_addr[0] = dev->perm_addr[0] = mac0 >> 24;
+       dev->dev_addr[1] = dev->perm_addr[1] = mac0 >> 16;
+       dev->dev_addr[2] = dev->perm_addr[2] = mac0 >> 8;
+       dev->dev_addr[3] = dev->perm_addr[3] = mac0;
+       dev->dev_addr[4] = dev->perm_addr[4] = mac1 >> 8;
+       dev->dev_addr[5] = dev->perm_addr[5] = mac1;
+
+       err = register_netdev(dev);
+       if (err < 0)
+               goto out_register_netdev;
+
+#if SC92031_USE_BAR == 0
+       base_addr = dev->mem_start;
+#elif SC92031_USE_BAR == 1
+       base_addr = dev->base_addr;
+#endif
+       printk(KERN_INFO "%s: SC92031 at 0x%lx, %pM, IRQ %d\n", dev->name,
+                       base_addr, dev->dev_addr, dev->irq);
+
+       return 0;
+
+out_register_netdev:
+       free_netdev(dev);
+out_alloc_etherdev:
+       pci_iounmap(pdev, port_base);
+out_iomap:
+       pci_release_regions(pdev);
+out_request_regions:
+out_set_dma_mask:
+       pci_disable_device(pdev);
+out_enable_device:
+       return err;
+}
+
+static void __devexit sc92031_remove(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct sc92031_priv *priv = netdev_priv(dev);
+       void __iomem* port_base = priv->port_base;
+
+       unregister_netdev(dev);
+       free_netdev(dev);
+       pci_iounmap(pdev, port_base);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+}
+
+static int sc92031_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct sc92031_priv *priv = netdev_priv(dev);
+
+       pci_save_state(pdev);
+
+       if (!netif_running(dev))
+               goto out;
+
+       netif_device_detach(dev);
+
+       /* Disable interrupts, stop Tx and Rx. */
+       sc92031_disable_interrupts(dev);
+
+       spin_lock_bh(&priv->lock);
+
+       _sc92031_disable_tx_rx(dev);
+       _sc92031_tx_clear(dev);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+
+out:
+       pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+       return 0;
+}
+
+static int sc92031_resume(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct sc92031_priv *priv = netdev_priv(dev);
+
+       pci_restore_state(pdev);
+       pci_set_power_state(pdev, PCI_D0);
+
+       if (!netif_running(dev))
+               goto out;
+
+       /* Interrupts already disabled by sc92031_suspend */
+       spin_lock_bh(&priv->lock);
+
+       _sc92031_reset(dev);
+       mmiowb();
+
+       spin_unlock_bh(&priv->lock);
+       sc92031_enable_interrupts(dev);
+
+       netif_device_attach(dev);
+
+       if (netif_carrier_ok(dev))
+               netif_wake_queue(dev);
+       else
+               netif_tx_disable(dev);
+
+out:
+       return 0;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(sc92031_pci_device_id_table) = {
+       { PCI_DEVICE(PCI_VENDOR_ID_SILAN, 0x2031) },
+       { PCI_DEVICE(PCI_VENDOR_ID_SILAN, 0x8139) },
+       { PCI_DEVICE(0x1088, 0x2031) },
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sc92031_pci_device_id_table);
+
+static struct pci_driver sc92031_pci_driver = {
+       .name           = SC92031_NAME,
+       .id_table       = sc92031_pci_device_id_table,
+       .probe          = sc92031_probe,
+       .remove         = __devexit_p(sc92031_remove),
+       .suspend        = sc92031_suspend,
+       .resume         = sc92031_resume,
+};
+
+static int __init sc92031_init(void)
+{
+       return pci_register_driver(&sc92031_pci_driver);
+}
+
+static void __exit sc92031_exit(void)
+{
+       pci_unregister_driver(&sc92031_pci_driver);
+}
+
+module_init(sc92031_init);
+module_exit(sc92031_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Cesar Eduardo Barros <cesarb@cesarb.net>");
+MODULE_DESCRIPTION("Silan SC92031 PCI Fast Ethernet Adapter driver");
index 4d1658e78dee50e8493994d4d5d421fe176362a9..caf3659e173cda3a04735cb1ecad7cfd94de27c7 100644 (file)
@@ -716,8 +716,8 @@ static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
                cur_p->phys = dma_map_single(ndev->dev.parent,
                                             skb_frag_address(frag),
-                                            frag_size(frag), DMA_TO_DEVICE);
-               cur_p->len = frag_size(frag);
+                                            skb_frag_size(frag), DMA_TO_DEVICE);
+               cur_p->len = skb_frag_size(frag);
                cur_p->app0 = 0;
                frag++;
        }
index a40fab44b9aed9b3fede68af733db78f061767d2..d423d18b4ad6a96574a39595632d4ea5b69179fb 100644 (file)
@@ -314,7 +314,7 @@ config TOSHIBA_FIR
 
 config AU1000_FIR
        tristate "Alchemy Au1000 SIR/FIR"
-       depends on SOC_AU1000 && IRDA
+       depends on IRDA && MIPS_ALCHEMY
 
 config SMC_IRCC_FIR
        tristate "SMSC IrCC (EXPERIMENTAL)"
index 3bb131137033cb9d9e53daaf0ad0ee89310fae65..7145714a5ec95776f187b47b022c32751cfba792 100644 (file)
@@ -88,8 +88,8 @@ static struct rio_dev **rionet_active;
 #define dev_rionet_capable(dev) \
        is_rionet_capable(dev->src_ops, dev->dst_ops)
 
-#define RIONET_MAC_MATCH(x)    (*(u32 *)x == 0x00010001)
-#define RIONET_GET_DESTID(x)   (*(u16 *)(x + 4))
+#define RIONET_MAC_MATCH(x)    (!memcmp((x), "\00\01\00\01", 4))
+#define RIONET_GET_DESTID(x)   ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5))
 
 static int rionet_rx_clean(struct net_device *ndev)
 {
index 91039ab1672825b17c0e1dbbba77a3ddd49e84d0..6ee8410443c46312ca62810199dc14ffbaf53966 100644 (file)
@@ -925,12 +925,10 @@ static void virtnet_update_status(struct virtnet_info *vi)
 {
        u16 v;
 
-       if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS))
-               return;
-
-       vi->vdev->config->get(vi->vdev,
+       if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS,
                              offsetof(struct virtio_net_config, status),
-                             &v, sizeof(v));
+                             &v) < 0)
+               return;
 
        /* Ignore unknown (future) status bits */
        v &= VIRTIO_NET_S_LINK_UP;
@@ -1006,11 +1004,9 @@ static int virtnet_probe(struct virtio_device *vdev)
        }
 
        /* Configuration may specify what MAC to use.  Otherwise random. */
-       if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
-               vdev->config->get(vdev,
+       if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC,
                                  offsetof(struct virtio_net_config, mac),
-                                 dev->dev_addr, dev->addr_len);
-       } else
+                                 dev->dev_addr, dev->addr_len) < 0)
                random_ether_addr(dev->dev_addr);
 
        /* Set up our device-specific information */
index 073548836413453c8a646c24ab863277147d4780..09602241901be1c391ae78e1f6f0eac84c28f502 100644 (file)
@@ -1,6 +1,6 @@
 menuconfig ATH_COMMON
        tristate "Atheros Wireless Cards"
-       depends on CFG80211
+       depends on CFG80211 && (!UML || BROKEN)
        ---help---
          This will enable the support for the Atheros wireless drivers.
          ath5k, ath9k, ath9k_htc and ar9170 drivers share some common code, this option
index 45e14760c16e5390b4814ff3002abcec98309083..d6c42e69bdbd467225d4db746f0e0f82d860fd68 100644 (file)
@@ -12,7 +12,7 @@ config RTL8192CE
 
 config RTL8192SE
        tristate "Realtek RTL8192SE/RTL8191SE PCIe Wireless Network Adapter"
-       depends on MAC80211 && EXPERIMENTAL
+       depends on MAC80211 && EXPERIMENTAL && PCI
        select FW_LOADER
        select RTLWIFI
        ---help---
@@ -23,7 +23,7 @@ config RTL8192SE
 
 config RTL8192DE
        tristate "Realtek RTL8192DE/RTL8188DE PCIe Wireless Network Adapter"
-       depends on MAC80211 && EXPERIMENTAL
+       depends on MAC80211 && EXPERIMENTAL && PCI
        select FW_LOADER
        select RTLWIFI
        ---help---
index 57de051a74b340bfb11da61e787fc1944321aad1..9f88641e67f9255e0219071eb5a702a1289bb066 100644 (file)
@@ -70,6 +70,7 @@ config BATTERY_DS2760
 
 config BATTERY_DS2780
        tristate "DS2780 battery driver"
+       depends on HAS_IOMEM
        select W1
        select W1_SLAVE_DS2780
        help
index 1fefe82e12e3b94f59add3abb7af9db44047f1a8..91a783d72360b314094a134d5284a89e751b3ca9 100644 (file)
@@ -39,6 +39,7 @@ struct ds2780_device_info {
        struct device *dev;
        struct power_supply bat;
        struct device *w1_dev;
+       struct task_struct *mutex_holder;
 };
 
 enum current_types {
@@ -49,8 +50,8 @@ enum current_types {
 static const char model[] = "DS2780";
 static const char manufacturer[] = "Maxim/Dallas";
 
-static inline struct ds2780_device_info *to_ds2780_device_info(
-       struct power_supply *psy)
+static inline struct ds2780_device_info *
+to_ds2780_device_info(struct power_supply *psy)
 {
        return container_of(psy, struct ds2780_device_info, bat);
 }
@@ -60,17 +61,28 @@ static inline struct power_supply *to_power_supply(struct device *dev)
        return dev_get_drvdata(dev);
 }
 
-static inline int ds2780_read8(struct device *dev, u8 *val, int addr)
+static inline int ds2780_battery_io(struct ds2780_device_info *dev_info,
+       char *buf, int addr, size_t count, int io)
 {
-       return w1_ds2780_io(dev, val, addr, sizeof(u8), 0);
+       if (dev_info->mutex_holder == current)
+               return w1_ds2780_io_nolock(dev_info->w1_dev, buf, addr, count, io);
+       else
+               return w1_ds2780_io(dev_info->w1_dev, buf, addr, count, io);
+}
+
+static inline int ds2780_read8(struct ds2780_device_info *dev_info, u8 *val,
+       int addr)
+{
+       return ds2780_battery_io(dev_info, val, addr, sizeof(u8), 0);
 }
 
-static int ds2780_read16(struct device *dev, s16 *val, int addr)
+static int ds2780_read16(struct ds2780_device_info *dev_info, s16 *val,
+       int addr)
 {
        int ret;
        u8 raw[2];
 
-       ret = w1_ds2780_io(dev, raw, addr, sizeof(u8) * 2, 0);
+       ret = ds2780_battery_io(dev_info, raw, addr, sizeof(raw), 0);
        if (ret < 0)
                return ret;
 
@@ -79,16 +91,16 @@ static int ds2780_read16(struct device *dev, s16 *val, int addr)
        return 0;
 }
 
-static inline int ds2780_read_block(struct device *dev, u8 *val, int addr,
-       size_t count)
+static inline int ds2780_read_block(struct ds2780_device_info *dev_info,
+       u8 *val, int addr, size_t count)
 {
-       return w1_ds2780_io(dev, val, addr, count, 0);
+       return ds2780_battery_io(dev_info, val, addr, count, 0);
 }
 
-static inline int ds2780_write(struct device *dev, u8 *val, int addr,
-       size_t count)
+static inline int ds2780_write(struct ds2780_device_info *dev_info, u8 *val,
+       int addr, size_t count)
 {
-       return w1_ds2780_io(dev, val, addr, count, 1);
+       return ds2780_battery_io(dev_info, val, addr, count, 1);
 }
 
 static inline int ds2780_store_eeprom(struct device *dev, int addr)
@@ -122,7 +134,7 @@ static int ds2780_set_sense_register(struct ds2780_device_info *dev_info,
 {
        int ret;
 
-       ret = ds2780_write(dev_info->w1_dev, &conductance,
+       ret = ds2780_write(dev_info, &conductance,
                                DS2780_RSNSP_REG, sizeof(u8));
        if (ret < 0)
                return ret;
@@ -134,7 +146,7 @@ static int ds2780_set_sense_register(struct ds2780_device_info *dev_info,
 static int ds2780_get_rsgain_register(struct ds2780_device_info *dev_info,
        u16 *rsgain)
 {
-       return ds2780_read16(dev_info->w1_dev, rsgain, DS2780_RSGAIN_MSB_REG);
+       return ds2780_read16(dev_info, rsgain, DS2780_RSGAIN_MSB_REG);
 }
 
 /* Set RSGAIN value from 0 to 1.999 in steps of 0.001 */
@@ -144,8 +156,8 @@ static int ds2780_set_rsgain_register(struct ds2780_device_info *dev_info,
        int ret;
        u8 raw[] = {rsgain >> 8, rsgain & 0xFF};
 
-       ret = ds2780_write(dev_info->w1_dev, raw,
-                               DS2780_RSGAIN_MSB_REG, sizeof(u8) * 2);
+       ret = ds2780_write(dev_info, raw,
+                               DS2780_RSGAIN_MSB_REG, sizeof(raw));
        if (ret < 0)
                return ret;
 
@@ -167,7 +179,7 @@ static int ds2780_get_voltage(struct ds2780_device_info *dev_info,
         * Bits 2 - 0 of the voltage value are in bits 7 - 5 of the
         * voltage LSB register
         */
-       ret = ds2780_read16(dev_info->w1_dev, &voltage_raw,
+       ret = ds2780_read16(dev_info, &voltage_raw,
                                DS2780_VOLT_MSB_REG);
        if (ret < 0)
                return ret;
@@ -196,7 +208,7 @@ static int ds2780_get_temperature(struct ds2780_device_info *dev_info,
         * Bits 2 - 0 of the temperature value are in bits 7 - 5 of the
         * temperature LSB register
         */
-       ret = ds2780_read16(dev_info->w1_dev, &temperature_raw,
+       ret = ds2780_read16(dev_info, &temperature_raw,
                                DS2780_TEMP_MSB_REG);
        if (ret < 0)
                return ret;
@@ -222,13 +234,13 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info,
         * The units of measurement for current are dependent on the value of
         * the sense resistor.
         */
-       ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG);
+       ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG);
        if (ret < 0)
                return ret;
 
        if (sense_res_raw == 0) {
                dev_err(dev_info->dev, "sense resistor value is 0\n");
-               return -ENXIO;
+               return -EINVAL;
        }
        sense_res = 1000 / sense_res_raw;
 
@@ -248,7 +260,7 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info,
         * Bits 7 - 0 of the current value are in bits 7 - 0 of the current
         * LSB register
         */
-       ret = ds2780_read16(dev_info->w1_dev, &current_raw, reg_msb);
+       ret = ds2780_read16(dev_info, &current_raw, reg_msb);
        if (ret < 0)
                return ret;
 
@@ -267,7 +279,7 @@ static int ds2780_get_accumulated_current(struct ds2780_device_info *dev_info,
         * The units of measurement for accumulated current are dependent on
         * the value of the sense resistor.
         */
-       ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG);
+       ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG);
        if (ret < 0)
                return ret;
 
@@ -285,7 +297,7 @@ static int ds2780_get_accumulated_current(struct ds2780_device_info *dev_info,
         * Bits 7 - 0 of the ACR value are in bits 7 - 0 of the ACR
         * LSB register
         */
-       ret = ds2780_read16(dev_info->w1_dev, &current_raw, DS2780_ACR_MSB_REG);
+       ret = ds2780_read16(dev_info, &current_raw, DS2780_ACR_MSB_REG);
        if (ret < 0)
                return ret;
 
@@ -299,7 +311,7 @@ static int ds2780_get_capacity(struct ds2780_device_info *dev_info,
        int ret;
        u8 raw;
 
-       ret = ds2780_read8(dev_info->w1_dev, &raw, DS2780_RARC_REG);
+       ret = ds2780_read8(dev_info, &raw, DS2780_RARC_REG);
        if (ret < 0)
                return ret;
 
@@ -345,7 +357,7 @@ static int ds2780_get_charge_now(struct ds2780_device_info *dev_info,
         * Bits 7 - 0 of the RAAC value are in bits 7 - 0 of the RAAC
         * LSB register
         */
-       ret = ds2780_read16(dev_info->w1_dev, &charge_raw, DS2780_RAAC_MSB_REG);
+       ret = ds2780_read16(dev_info, &charge_raw, DS2780_RAAC_MSB_REG);
        if (ret < 0)
                return ret;
 
@@ -356,7 +368,7 @@ static int ds2780_get_charge_now(struct ds2780_device_info *dev_info,
 static int ds2780_get_control_register(struct ds2780_device_info *dev_info,
        u8 *control_reg)
 {
-       return ds2780_read8(dev_info->w1_dev, control_reg, DS2780_CONTROL_REG);
+       return ds2780_read8(dev_info, control_reg, DS2780_CONTROL_REG);
 }
 
 static int ds2780_set_control_register(struct ds2780_device_info *dev_info,
@@ -364,7 +376,7 @@ static int ds2780_set_control_register(struct ds2780_device_info *dev_info,
 {
        int ret;
 
-       ret = ds2780_write(dev_info->w1_dev, &control_reg,
+       ret = ds2780_write(dev_info, &control_reg,
                                DS2780_CONTROL_REG, sizeof(u8));
        if (ret < 0)
                return ret;
@@ -503,7 +515,7 @@ static ssize_t ds2780_get_sense_resistor_value(struct device *dev,
        struct power_supply *psy = to_power_supply(dev);
        struct ds2780_device_info *dev_info = to_ds2780_device_info(psy);
 
-       ret = ds2780_read8(dev_info->w1_dev, &sense_resistor, DS2780_RSNSP_REG);
+       ret = ds2780_read8(dev_info, &sense_resistor, DS2780_RSNSP_REG);
        if (ret < 0)
                return ret;
 
@@ -584,7 +596,7 @@ static ssize_t ds2780_get_pio_pin(struct device *dev,
        struct power_supply *psy = to_power_supply(dev);
        struct ds2780_device_info *dev_info = to_ds2780_device_info(psy);
 
-       ret = ds2780_read8(dev_info->w1_dev, &sfr, DS2780_SFR_REG);
+       ret = ds2780_read8(dev_info, &sfr, DS2780_SFR_REG);
        if (ret < 0)
                return ret;
 
@@ -611,7 +623,7 @@ static ssize_t ds2780_set_pio_pin(struct device *dev,
                return -EINVAL;
        }
 
-       ret = ds2780_write(dev_info->w1_dev, &new_setting,
+       ret = ds2780_write(dev_info, &new_setting,
                                DS2780_SFR_REG, sizeof(u8));
        if (ret < 0)
                return ret;
@@ -632,7 +644,7 @@ static ssize_t ds2780_read_param_eeprom_bin(struct file *filp,
                DS2780_EEPROM_BLOCK1_END -
                DS2780_EEPROM_BLOCK1_START + 1 - off);
 
-       return ds2780_read_block(dev_info->w1_dev, buf,
+       return ds2780_read_block(dev_info, buf,
                                DS2780_EEPROM_BLOCK1_START + off, count);
 }
 
@@ -650,7 +662,7 @@ static ssize_t ds2780_write_param_eeprom_bin(struct file *filp,
                DS2780_EEPROM_BLOCK1_END -
                DS2780_EEPROM_BLOCK1_START + 1 - off);
 
-       ret = ds2780_write(dev_info->w1_dev, buf,
+       ret = ds2780_write(dev_info, buf,
                                DS2780_EEPROM_BLOCK1_START + off, count);
        if (ret < 0)
                return ret;
@@ -685,9 +697,8 @@ static ssize_t ds2780_read_user_eeprom_bin(struct file *filp,
                DS2780_EEPROM_BLOCK0_END -
                DS2780_EEPROM_BLOCK0_START + 1 - off);
 
-       return ds2780_read_block(dev_info->w1_dev, buf,
+       return ds2780_read_block(dev_info, buf,
                                DS2780_EEPROM_BLOCK0_START + off, count);
-
 }
 
 static ssize_t ds2780_write_user_eeprom_bin(struct file *filp,
@@ -704,7 +715,7 @@ static ssize_t ds2780_write_user_eeprom_bin(struct file *filp,
                DS2780_EEPROM_BLOCK0_END -
                DS2780_EEPROM_BLOCK0_START + 1 - off);
 
-       ret = ds2780_write(dev_info->w1_dev, buf,
+       ret = ds2780_write(dev_info, buf,
                                DS2780_EEPROM_BLOCK0_START + off, count);
        if (ret < 0)
                return ret;
@@ -768,6 +779,7 @@ static int __devinit ds2780_battery_probe(struct platform_device *pdev)
        dev_info->bat.properties        = ds2780_battery_props;
        dev_info->bat.num_properties    = ARRAY_SIZE(ds2780_battery_props);
        dev_info->bat.get_property      = ds2780_battery_get_property;
+       dev_info->mutex_holder          = current;
 
        ret = power_supply_register(&pdev->dev, &dev_info->bat);
        if (ret) {
@@ -797,6 +809,8 @@ static int __devinit ds2780_battery_probe(struct platform_device *pdev)
                goto fail_remove_bin_file;
        }
 
+       dev_info->mutex_holder = NULL;
+
        return 0;
 
 fail_remove_bin_file:
@@ -816,6 +830,8 @@ static int __devexit ds2780_battery_remove(struct platform_device *pdev)
 {
        struct ds2780_device_info *dev_info = platform_get_drvdata(pdev);
 
+       dev_info->mutex_holder = current;
+
        /* remove attributes */
        sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group);
 
index 8520a7f4dd62048eb5d9391341bf05baa26defc7..445197d4a8c44f2810d44282f09755817067146e 100644 (file)
@@ -29,4 +29,13 @@ config PPS_CLIENT_PARPORT
          If you say yes here you get support for a PPS source connected
          with the interrupt pin of your parallel port.
 
+config PPS_CLIENT_GPIO
+       tristate "PPS client using GPIO"
+       depends on PPS && GENERIC_HARDIRQS
+       help
+         If you say yes here you get support for a PPS source using
+         GPIO. To be useful you must also register a platform device
+         specifying the GPIO pin and other options, usually in your board
+         setup.
+
 endif
index 4feb7e9e71ee6c14fb8ce8585437ccd3d7dde24c..a461d15f4a2e8531a8fab7eff16ad7fa73ef4658 100644 (file)
@@ -5,5 +5,6 @@
 obj-$(CONFIG_PPS_CLIENT_KTIMER)        += pps-ktimer.o
 obj-$(CONFIG_PPS_CLIENT_LDISC) += pps-ldisc.o
 obj-$(CONFIG_PPS_CLIENT_PARPORT) += pps_parport.o
+obj-$(CONFIG_PPS_CLIENT_GPIO)  += pps-gpio.o
 
 ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
new file mode 100644 (file)
index 0000000..6550555
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * pps-gpio.c -- PPS client driver using GPIO
+ *
+ *
+ * Copyright (C) 2010 Ricardo Martins <rasm@fe.up.pt>
+ * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define PPS_GPIO_NAME "pps-gpio"
+#define pr_fmt(fmt) PPS_GPIO_NAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pps_kernel.h>
+#include <linux/pps-gpio.h>
+#include <linux/gpio.h>
+#include <linux/list.h>
+
+/* Info for each registered platform device */
+struct pps_gpio_device_data {
+       int irq;                        /* IRQ used as PPS source */
+       struct pps_device *pps;         /* PPS source device */
+       struct pps_source_info info;    /* PPS source information */
+       const struct pps_gpio_platform_data *pdata;
+};
+
+/*
+ * Report the PPS event
+ */
+
+static irqreturn_t pps_gpio_irq_handler(int irq, void *data)
+{
+       const struct pps_gpio_device_data *info;
+       struct pps_event_time ts;
+       int rising_edge;
+
+       /* Get the time stamp first */
+       pps_get_ts(&ts);
+
+       info = data;
+
+       rising_edge = gpio_get_value(info->pdata->gpio_pin);
+       if ((rising_edge && !info->pdata->assert_falling_edge) ||
+                       (!rising_edge && info->pdata->assert_falling_edge))
+               pps_event(info->pps, &ts, PPS_CAPTUREASSERT, NULL);
+       else if (info->pdata->capture_clear &&
+                       ((rising_edge && info->pdata->assert_falling_edge) ||
+                        (!rising_edge && !info->pdata->assert_falling_edge)))
+               pps_event(info->pps, &ts, PPS_CAPTURECLEAR, NULL);
+
+       return IRQ_HANDLED;
+}
+
+static int pps_gpio_setup(struct platform_device *pdev)
+{
+       int ret;
+       const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+       ret = gpio_request(pdata->gpio_pin, pdata->gpio_label);
+       if (ret) {
+               pr_warning("failed to request GPIO %u\n", pdata->gpio_pin);
+               return -EINVAL;
+       }
+
+       ret = gpio_direction_input(pdata->gpio_pin);
+       if (ret) {
+               pr_warning("failed to set pin direction\n");
+               gpio_free(pdata->gpio_pin);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static unsigned long
+get_irqf_trigger_flags(const struct pps_gpio_platform_data *pdata)
+{
+       unsigned long flags = pdata->assert_falling_edge ?
+               IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING;
+
+       if (pdata->capture_clear) {
+               flags |= ((flags & IRQF_TRIGGER_RISING) ?
+                               IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING);
+       }
+
+       return flags;
+}
+
+static int pps_gpio_probe(struct platform_device *pdev)
+{
+       struct pps_gpio_device_data *data;
+       int irq;
+       int ret;
+       int err;
+       int pps_default_params;
+       const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+
+       /* GPIO setup */
+       ret = pps_gpio_setup(pdev);
+       if (ret)
+               return -EINVAL;
+
+       /* IRQ setup */
+       irq = gpio_to_irq(pdata->gpio_pin);
+       if (irq < 0) {
+               pr_err("failed to map GPIO to IRQ: %d\n", irq);
+               err = -EINVAL;
+               goto return_error;
+       }
+
+       /* allocate space for device info */
+       data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL);
+       if (data == NULL) {
+               err = -ENOMEM;
+               goto return_error;
+       }
+
+       /* initialize PPS specific parts of the bookkeeping data structure. */
+       data->info.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
+               PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC;
+       if (pdata->capture_clear)
+               data->info.mode |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR |
+                       PPS_ECHOCLEAR;
+       data->info.owner = THIS_MODULE;
+       snprintf(data->info.name, PPS_MAX_NAME_LEN - 1, "%s.%d",
+                pdev->name, pdev->id);
+
+       /* register PPS source */
+       pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT;
+       if (pdata->capture_clear)
+               pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR;
+       data->pps = pps_register_source(&data->info, pps_default_params);
+       if (data->pps == NULL) {
+               kfree(data);
+               pr_err("failed to register IRQ %d as PPS source\n", irq);
+               err = -EINVAL;
+               goto return_error;
+       }
+
+       data->irq = irq;
+       data->pdata = pdata;
+
+       /* register IRQ interrupt handler */
+       ret = request_irq(irq, pps_gpio_irq_handler,
+                       get_irqf_trigger_flags(pdata), data->info.name, data);
+       if (ret) {
+               pps_unregister_source(data->pps);
+               kfree(data);
+               pr_err("failed to acquire IRQ %d\n", irq);
+               err = -EINVAL;
+               goto return_error;
+       }
+
+       platform_set_drvdata(pdev, data);
+       dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", irq);
+
+       return 0;
+
+return_error:
+       gpio_free(pdata->gpio_pin);
+       return err;
+}
+
+static int pps_gpio_remove(struct platform_device *pdev)
+{
+       struct pps_gpio_device_data *data = platform_get_drvdata(pdev);
+       const struct pps_gpio_platform_data *pdata = data->pdata;
+
+       platform_set_drvdata(pdev, NULL);
+       free_irq(data->irq, data);
+       gpio_free(pdata->gpio_pin);
+       pps_unregister_source(data->pps);
+       pr_info("removed IRQ %d as PPS source\n", data->irq);
+       kfree(data);
+       return 0;
+}
+
+static struct platform_driver pps_gpio_driver = {
+       .probe          = pps_gpio_probe,
+       .remove         =  __devexit_p(pps_gpio_remove),
+       .driver         = {
+               .name   = PPS_GPIO_NAME,
+               .owner  = THIS_MODULE
+       },
+};
+
+static int __init pps_gpio_init(void)
+{
+       int ret = platform_driver_register(&pps_gpio_driver);
+       if (ret < 0)
+               pr_err("failed to register platform driver\n");
+       return ret;
+}
+
+static void __exit pps_gpio_exit(void)
+{
+       platform_driver_unregister(&pps_gpio_driver);
+       pr_debug("unregistered platform driver\n");
+}
+
+module_init(pps_gpio_init);
+module_exit(pps_gpio_exit);
+
+MODULE_AUTHOR("Ricardo Martins <rasm@fe.up.pt>");
+MODULE_AUTHOR("James Nuss <jamesnuss@nanometrics.ca>");
+MODULE_DESCRIPTION("Use GPIO pin as PPS source");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
index 82583b0ff82dac05efd9c1d495654ccd244419cc..436b4e4e71a149384c1246baa2a508e4dce21f72 100644 (file)
@@ -51,17 +51,6 @@ static void pps_ktimer_event(unsigned long ptr)
        mod_timer(&ktimer, jiffies + HZ);
 }
 
-/*
- * The echo function
- */
-
-static void pps_ktimer_echo(struct pps_device *pps, int event, void *data)
-{
-       dev_info(pps->dev, "echo %s %s\n",
-               event & PPS_CAPTUREASSERT ? "assert" : "",
-               event & PPS_CAPTURECLEAR ? "clear" : "");
-}
-
 /*
  * The PPS info struct
  */
@@ -72,7 +61,6 @@ static struct pps_source_info pps_ktimer_info = {
        .mode           = PPS_CAPTUREASSERT | PPS_OFFSETASSERT |
                          PPS_ECHOASSERT |
                          PPS_CANWAIT | PPS_TSFMT_TSPEC,
-       .echo           = pps_ktimer_echo,
        .owner          = THIS_MODULE,
 };
 
index c571d6dd8f61f626bdaeb6405de5a3f8aa7a30a6..e1b4705ae3ec83ec1beddfda1dc98d4259bd2a6d 100644 (file)
@@ -133,14 +133,6 @@ out_both:
        return;
 }
 
-/* the PPS echo function */
-static void pps_echo(struct pps_device *pps, int event, void *data)
-{
-       dev_info(pps->dev, "echo %s %s\n",
-               event & PPS_CAPTUREASSERT ? "assert" : "",
-               event & PPS_CAPTURECLEAR ? "clear" : "");
-}
-
 static void parport_attach(struct parport *port)
 {
        struct pps_client_pp *device;
@@ -151,7 +143,6 @@ static void parport_attach(struct parport *port)
                                  PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \
                                  PPS_ECHOASSERT | PPS_ECHOCLEAR | \
                                  PPS_CANWAIT | PPS_TSFMT_TSPEC,
-               .echo           = pps_echo,
                .owner          = THIS_MODULE,
                .dev            = NULL
        };
index a4e8eb9fece6a53c7e2ade169df43dcd9a8696a0..f197e8ea185c4099abc1289cd1b6cf74947f3bed 100644 (file)
@@ -52,6 +52,14 @@ static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset)
        ts->sec += offset->sec;
 }
 
+static void pps_echo_client_default(struct pps_device *pps, int event,
+               void *data)
+{
+       dev_info(pps->dev, "echo %s %s\n",
+               event & PPS_CAPTUREASSERT ? "assert" : "",
+               event & PPS_CAPTURECLEAR ? "clear" : "");
+}
+
 /*
  * Exported functions
  */
@@ -80,13 +88,6 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
                err = -EINVAL;
                goto pps_register_source_exit;
        }
-       if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 &&
-                       info->echo == NULL) {
-               pr_err("%s: echo function is not defined\n",
-                                       info->name);
-               err = -EINVAL;
-               goto pps_register_source_exit;
-       }
        if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
                pr_err("%s: unspecified time format\n",
                                        info->name);
@@ -108,6 +109,11 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
        pps->params.mode = default_params;
        pps->info = *info;
 
+       /* check for default echo function */
+       if ((pps->info.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) &&
+                       pps->info.echo == NULL)
+               pps->info.echo = pps_echo_client_default;
+
        init_waitqueue_head(&pps->queue);
        spin_lock_init(&pps->lock);
 
index 070211a5955c7dc140799b55a2f57bbbe5ce0191..bc871923879303f5771a27c83f7c148850682ddd 100644 (file)
@@ -1,6 +1,8 @@
 #
 # RapidIO configuration
 #
+source "drivers/rapidio/devices/Kconfig"
+
 config RAPIDIO_DISC_TIMEOUT
        int "Discovery timeout duration (seconds)"
        depends on RAPIDIO
@@ -20,8 +22,6 @@ config RAPIDIO_ENABLE_RX_TX_PORTS
          ports for Input/Output direction to allow other traffic
          than Maintenance transfers.
 
-source "drivers/rapidio/switches/Kconfig"
-
 config RAPIDIO_DEBUG
        bool "RapidIO subsystem debug messages"
        depends on RAPIDIO
@@ -32,3 +32,5 @@ config RAPIDIO_DEBUG
          going on.
 
          If you are unsure about this, say N here.
+
+source "drivers/rapidio/switches/Kconfig"
index 89b8eca825b55f28dc6a29afa8eabed91730bd3e..ec3fb81210041e532206faac578cc9c897edaf82 100644 (file)
@@ -4,5 +4,6 @@
 obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o
 
 obj-$(CONFIG_RAPIDIO)          += switches/
+obj-$(CONFIG_RAPIDIO)          += devices/
 
 subdir-ccflags-$(CONFIG_RAPIDIO_DEBUG) := -DDEBUG
diff --git a/drivers/rapidio/devices/Kconfig b/drivers/rapidio/devices/Kconfig
new file mode 100644 (file)
index 0000000..12a9d7f
--- /dev/null
@@ -0,0 +1,10 @@
+#
+# RapidIO master port configuration
+#
+
+config RAPIDIO_TSI721
+       bool "IDT Tsi721 PCI Express SRIO Controller support"
+       depends on RAPIDIO && PCIEPORTBUS
+       default "n"
+       ---help---
+         Include support for IDT Tsi721 PCI Express Serial RapidIO controller.
diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
new file mode 100644 (file)
index 0000000..3b7b4e2
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Makefile for RapidIO devices
+#
+
+obj-$(CONFIG_RAPIDIO_TSI721)   += tsi721.o
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
new file mode 100644 (file)
index 0000000..5225930
--- /dev/null
@@ -0,0 +1,2360 @@
+/*
+ * RapidIO mport driver for Tsi721 PCIExpress-to-SRIO bridge
+ *
+ * Copyright 2011 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ * Chul Kim <chul.kim@idt.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/kfifo.h>
+#include <linux/delay.h>
+
+#include "tsi721.h"
+
+#define DEBUG_PW       /* Inbound Port-Write debugging */
+
+static void tsi721_omsg_handler(struct tsi721_device *priv, int ch);
+static void tsi721_imsg_handler(struct tsi721_device *priv, int ch);
+
+/**
+ * tsi721_lcread - read from local SREP config space
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be read into
+ *
+ * Generates a local SREP space read. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int tsi721_lcread(struct rio_mport *mport, int index, u32 offset,
+                        int len, u32 *data)
+{
+       struct tsi721_device *priv = mport->priv;
+
+       if (len != sizeof(u32))
+               return -EINVAL; /* only 32-bit access is supported */
+
+       *data = ioread32(priv->regs + offset);
+
+       return 0;
+}
+
+/**
+ * tsi721_lcwrite - write into local SREP config space
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be written
+ *
+ * Generates a local write into SREP configuration space. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int tsi721_lcwrite(struct rio_mport *mport, int index, u32 offset,
+                         int len, u32 data)
+{
+       struct tsi721_device *priv = mport->priv;
+
+       if (len != sizeof(u32))
+               return -EINVAL; /* only 32-bit access is supported */
+
+       iowrite32(data, priv->regs + offset);
+
+       return 0;
+}
+
+/**
+ * tsi721_maint_dma - Helper function to generate RapidIO maintenance
+ *                    transactions using designated Tsi721 DMA channel.
+ * @priv: pointer to tsi721 private data
+ * @sys_size: RapdiIO transport system size
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Location to be read from or write into
+ * @do_wr: Operation flag (1 == MAINT_WR)
+ *
+ * Generates a RapidIO maintenance transaction (Read or Write).
+ * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
+ */
+static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
+                       u16 destid, u8 hopcount, u32 offset, int len,
+                       u32 *data, int do_wr)
+{
+       struct tsi721_dma_desc *bd_ptr;
+       u32 rd_count, swr_ptr, ch_stat;
+       int i, err = 0;
+       u32 op = do_wr ? MAINT_WR : MAINT_RD;
+
+       if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32)))
+               return -EINVAL;
+
+       bd_ptr = priv->bdma[TSI721_DMACH_MAINT].bd_base;
+
+       rd_count = ioread32(
+                       priv->regs + TSI721_DMAC_DRDCNT(TSI721_DMACH_MAINT));
+
+       /* Initialize DMA descriptor */
+       bd_ptr[0].type_id = cpu_to_le32((DTYPE2 << 29) | (op << 19) | destid);
+       bd_ptr[0].bcount = cpu_to_le32((sys_size << 26) | 0x04);
+       bd_ptr[0].raddr_lo = cpu_to_le32((hopcount << 24) | offset);
+       bd_ptr[0].raddr_hi = 0;
+       if (do_wr)
+               bd_ptr[0].data[0] = cpu_to_be32p(data);
+       else
+               bd_ptr[0].data[0] = 0xffffffff;
+
+       mb();
+
+       /* Start DMA operation */
+       iowrite32(rd_count + 2,
+               priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+       ioread32(priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+       i = 0;
+
+       /* Wait until DMA transfer is finished */
+       while ((ch_stat = ioread32(priv->regs +
+               TSI721_DMAC_STS(TSI721_DMACH_MAINT))) & TSI721_DMAC_STS_RUN) {
+               udelay(1);
+               if (++i >= 5000000) {
+                       dev_dbg(&priv->pdev->dev,
+                               "%s : DMA[%d] read timeout ch_status=%x\n",
+                               __func__, TSI721_DMACH_MAINT, ch_stat);
+                       if (!do_wr)
+                               *data = 0xffffffff;
+                       err = -EIO;
+                       goto err_out;
+               }
+       }
+
+       if (ch_stat & TSI721_DMAC_STS_ABORT) {
+               /* If DMA operation aborted due to error,
+                * reinitialize DMA channel
+                */
+               dev_dbg(&priv->pdev->dev, "%s : DMA ABORT ch_stat=%x\n",
+                       __func__, ch_stat);
+               dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
+                       do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
+               iowrite32(TSI721_DMAC_INT_ALL,
+                       priv->regs + TSI721_DMAC_INT(TSI721_DMACH_MAINT));
+               iowrite32(TSI721_DMAC_CTL_INIT,
+                       priv->regs + TSI721_DMAC_CTL(TSI721_DMACH_MAINT));
+               udelay(10);
+               iowrite32(0, priv->regs +
+                               TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT));
+               udelay(1);
+               if (!do_wr)
+                       *data = 0xffffffff;
+               err = -EIO;
+               goto err_out;
+       }
+
+       if (!do_wr)
+               *data = be32_to_cpu(bd_ptr[0].data[0]);
+
+       /*
+        * Update descriptor status FIFO RD pointer.
+        * NOTE: Skipping check and clear FIFO entries because we are waiting
+        * for transfer to be completed.
+        */
+       swr_ptr = ioread32(priv->regs + TSI721_DMAC_DSWP(TSI721_DMACH_MAINT));
+       iowrite32(swr_ptr, priv->regs + TSI721_DMAC_DSRP(TSI721_DMACH_MAINT));
+err_out:
+
+       return err;
+}
+
+/**
+ * tsi721_cread_dma - Generate a RapidIO maintenance read transaction
+ *                    using Tsi721 BDMA engine.
+ * @mport: RapidIO master port control structure
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Location to be read into
+ *
+ * Generates a RapidIO maintenance read transaction.
+ * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
+ */
+static int tsi721_cread_dma(struct rio_mport *mport, int index, u16 destid,
+                       u8 hopcount, u32 offset, int len, u32 *data)
+{
+       struct tsi721_device *priv = mport->priv;
+
+       return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount,
+                               offset, len, data, 0);
+}
+
+/**
+ * tsi721_cwrite_dma - Generate a RapidIO maintenance write transaction
+ *                     using Tsi721 BDMA engine
+ * @mport: RapidIO master port control structure
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Value to be written
+ *
+ * Generates a RapidIO maintenance write transaction.
+ * Returns %0 on success and %-EINVAL or %-EFAULT on failure.
+ */
+static int tsi721_cwrite_dma(struct rio_mport *mport, int index, u16 destid,
+                        u8 hopcount, u32 offset, int len, u32 data)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 temp = data;
+
+       return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount,
+                               offset, len, &temp, 1);
+}
+
+/**
+ * tsi721_pw_handler - Tsi721 inbound port-write interrupt handler
+ * @mport: RapidIO master port structure
+ *
+ * Handles inbound port-write interrupts. Copies PW message from an internal
+ * buffer into PW message FIFO and schedules deferred routine to process
+ * queued messages.
+ */
+static int
+tsi721_pw_handler(struct rio_mport *mport)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 pw_stat;
+       u32 pw_buf[TSI721_RIO_PW_MSG_SIZE/sizeof(u32)];
+
+
+       pw_stat = ioread32(priv->regs + TSI721_RIO_PW_RX_STAT);
+
+       if (pw_stat & TSI721_RIO_PW_RX_STAT_PW_VAL) {
+               pw_buf[0] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(0));
+               pw_buf[1] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(1));
+               pw_buf[2] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(2));
+               pw_buf[3] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(3));
+
+               /* Queue PW message (if there is room in FIFO),
+                * otherwise discard it.
+                */
+               spin_lock(&priv->pw_fifo_lock);
+               if (kfifo_avail(&priv->pw_fifo) >= TSI721_RIO_PW_MSG_SIZE)
+                       kfifo_in(&priv->pw_fifo, pw_buf,
+                                               TSI721_RIO_PW_MSG_SIZE);
+               else
+                       priv->pw_discard_count++;
+               spin_unlock(&priv->pw_fifo_lock);
+       }
+
+       /* Clear pending PW interrupts */
+       iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL,
+                 priv->regs + TSI721_RIO_PW_RX_STAT);
+
+       schedule_work(&priv->pw_work);
+
+       return 0;
+}
+
+static void tsi721_pw_dpc(struct work_struct *work)
+{
+       struct tsi721_device *priv = container_of(work, struct tsi721_device,
+                                                   pw_work);
+       u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message
+                                                       buffer for RIO layer */
+
+       /*
+        * Process port-write messages
+        */
+       while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer,
+                        TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) {
+               /* Process one message */
+#ifdef DEBUG_PW
+               {
+               u32 i;
+               pr_debug("%s : Port-Write Message:", __func__);
+               for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) {
+                       pr_debug("0x%02x: %08x %08x %08x %08x", i*4,
+                               msg_buffer[i], msg_buffer[i + 1],
+                               msg_buffer[i + 2], msg_buffer[i + 3]);
+                       i += 4;
+               }
+               pr_debug("\n");
+               }
+#endif
+               /* Pass the port-write message to RIO core for processing */
+               rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+       }
+}
+
+/**
+ * tsi721_pw_enable - enable/disable port-write interface init
+ * @mport: Master port implementing the port write unit
+ * @enable:    1=enable; 0=disable port-write message handling
+ */
+static int tsi721_pw_enable(struct rio_mport *mport, int enable)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 rval;
+
+       rval = ioread32(priv->regs + TSI721_RIO_EM_INT_ENABLE);
+
+       if (enable)
+               rval |= TSI721_RIO_EM_INT_ENABLE_PW_RX;
+       else
+               rval &= ~TSI721_RIO_EM_INT_ENABLE_PW_RX;
+
+       /* Clear pending PW interrupts */
+       iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL,
+                 priv->regs + TSI721_RIO_PW_RX_STAT);
+       /* Update enable bits */
+       iowrite32(rval, priv->regs + TSI721_RIO_EM_INT_ENABLE);
+
+       return 0;
+}
+
+/**
+ * tsi721_dsend - Send a RapidIO doorbell
+ * @mport: RapidIO master port info
+ * @index: ID of RapidIO interface
+ * @destid: Destination ID of target device
+ * @data: 16-bit info field of RapidIO doorbell
+ *
+ * Sends a RapidIO doorbell message. Always returns %0.
+ */
+static int tsi721_dsend(struct rio_mport *mport, int index,
+                       u16 destid, u16 data)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 offset;
+
+       offset = (((mport->sys_size) ? RIO_TT_CODE_16 : RIO_TT_CODE_8) << 18) |
+                (destid << 2);
+
+       dev_dbg(&priv->pdev->dev,
+               "Send Doorbell 0x%04x to destID 0x%x\n", data, destid);
+       iowrite16be(data, priv->odb_base + offset);
+
+       return 0;
+}
+
+/**
+ * tsi721_dbell_handler - Tsi721 doorbell interrupt handler
+ * @mport: RapidIO master port structure
+ *
+ * Handles inbound doorbell interrupts. Copies doorbell entry from an internal
+ * buffer into DB message FIFO and schedules deferred  routine to process
+ * queued DBs.
+ */
+static int
+tsi721_dbell_handler(struct rio_mport *mport)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 regval;
+
+       /* Disable IDB interrupts */
+       regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+       regval &= ~TSI721_SR_CHINT_IDBQRCV;
+       iowrite32(regval,
+               priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+
+       schedule_work(&priv->idb_work);
+
+       return 0;
+}
+
+static void tsi721_db_dpc(struct work_struct *work)
+{
+       struct tsi721_device *priv = container_of(work, struct tsi721_device,
+                                                   idb_work);
+       struct rio_mport *mport;
+       struct rio_dbell *dbell;
+       int found = 0;
+       u32 wr_ptr, rd_ptr;
+       u64 *idb_entry;
+       u32 regval;
+       union {
+               u64 msg;
+               u8  bytes[8];
+       } idb;
+
+       /*
+        * Process queued inbound doorbells
+        */
+       mport = priv->mport;
+
+       wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE));
+       rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+
+       while (wr_ptr != rd_ptr) {
+               idb_entry = (u64 *)(priv->idb_base +
+                                       (TSI721_IDB_ENTRY_SIZE * rd_ptr));
+               rd_ptr++;
+               idb.msg = *idb_entry;
+               *idb_entry = 0;
+
+               /* Process one doorbell */
+               list_for_each_entry(dbell, &mport->dbells, node) {
+                       if ((dbell->res->start <= DBELL_INF(idb.bytes)) &&
+                           (dbell->res->end >= DBELL_INF(idb.bytes))) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (found) {
+                       dbell->dinb(mport, dbell->dev_id, DBELL_SID(idb.bytes),
+                                   DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+               } else {
+                       dev_dbg(&priv->pdev->dev,
+                               "spurious inb doorbell, sid %2.2x tid %2.2x"
+                               " info %4.4x\n", DBELL_SID(idb.bytes),
+                               DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+               }
+       }
+
+       iowrite32(rd_ptr & (IDB_QSIZE - 1),
+               priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+
+       /* Re-enable IDB interrupts */
+       regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+       regval |= TSI721_SR_CHINT_IDBQRCV;
+       iowrite32(regval,
+               priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+}
+
+/**
+ * tsi721_irqhandler - Tsi721 interrupt handler
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles Tsi721 interrupts signaled using MSI and INTA. Checks reported
+ * interrupt events and calls an event-specific handler(s).
+ */
+static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
+{
+       struct rio_mport *mport = (struct rio_mport *)ptr;
+       struct tsi721_device *priv = mport->priv;
+       u32 dev_int;
+       u32 dev_ch_int;
+       u32 intval;
+       u32 ch_inte;
+
+       dev_int = ioread32(priv->regs + TSI721_DEV_INT);
+       if (!dev_int)
+               return IRQ_NONE;
+
+       dev_ch_int = ioread32(priv->regs + TSI721_DEV_CHAN_INT);
+
+       if (dev_int & TSI721_DEV_INT_SR2PC_CH) {
+               /* Service SR2PC Channel interrupts */
+               if (dev_ch_int & TSI721_INT_SR2PC_CHAN(IDB_QUEUE)) {
+                       /* Service Inbound Doorbell interrupt */
+                       intval = ioread32(priv->regs +
+                                               TSI721_SR_CHINT(IDB_QUEUE));
+                       if (intval & TSI721_SR_CHINT_IDBQRCV)
+                               tsi721_dbell_handler(mport);
+                       else
+                               dev_info(&priv->pdev->dev,
+                                       "Unsupported SR_CH_INT %x\n", intval);
+
+                       /* Clear interrupts */
+                       iowrite32(intval,
+                               priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+                       ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+               }
+       }
+
+       if (dev_int & TSI721_DEV_INT_SMSG_CH) {
+               int ch;
+
+               /*
+                * Service channel interrupts from Messaging Engine
+                */
+
+               if (dev_ch_int & TSI721_INT_IMSG_CHAN_M) { /* Inbound Msg */
+                       /* Disable signaled OB MSG Channel interrupts */
+                       ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+                       ch_inte &= ~(dev_ch_int & TSI721_INT_IMSG_CHAN_M);
+                       iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+
+                       /*
+                        * Process Inbound Message interrupt for each MBOX
+                        */
+                       for (ch = 4; ch < RIO_MAX_MBOX + 4; ch++) {
+                               if (!(dev_ch_int & TSI721_INT_IMSG_CHAN(ch)))
+                                       continue;
+                               tsi721_imsg_handler(priv, ch);
+                       }
+               }
+
+               if (dev_ch_int & TSI721_INT_OMSG_CHAN_M) { /* Outbound Msg */
+                       /* Disable signaled OB MSG Channel interrupts */
+                       ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+                       ch_inte &= ~(dev_ch_int & TSI721_INT_OMSG_CHAN_M);
+                       iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+
+                       /*
+                        * Process Outbound Message interrupts for each MBOX
+                        */
+
+                       for (ch = 0; ch < RIO_MAX_MBOX; ch++) {
+                               if (!(dev_ch_int & TSI721_INT_OMSG_CHAN(ch)))
+                                       continue;
+                               tsi721_omsg_handler(priv, ch);
+                       }
+               }
+       }
+
+       if (dev_int & TSI721_DEV_INT_SRIO) {
+               /* Service SRIO MAC interrupts */
+               intval = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
+               if (intval & TSI721_RIO_EM_INT_STAT_PW_RX)
+                       tsi721_pw_handler(mport);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void tsi721_interrupts_init(struct tsi721_device *priv)
+{
+       u32 intr;
+
+       /* Enable IDB interrupts */
+       iowrite32(TSI721_SR_CHINT_ALL,
+               priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+       iowrite32(TSI721_SR_CHINT_IDBQRCV,
+               priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
+       iowrite32(TSI721_INT_SR2PC_CHAN(IDB_QUEUE),
+               priv->regs + TSI721_DEV_CHAN_INTE);
+
+       /* Enable SRIO MAC interrupts */
+       iowrite32(TSI721_RIO_EM_DEV_INT_EN_INT,
+               priv->regs + TSI721_RIO_EM_DEV_INT_EN);
+
+       if (priv->flags & TSI721_USING_MSIX)
+               intr = TSI721_DEV_INT_SRIO;
+       else
+               intr = TSI721_DEV_INT_SR2PC_CH | TSI721_DEV_INT_SRIO |
+                       TSI721_DEV_INT_SMSG_CH;
+
+       iowrite32(intr, priv->regs + TSI721_DEV_INTE);
+       ioread32(priv->regs + TSI721_DEV_INTE);
+}
+
+#ifdef CONFIG_PCI_MSI
+/**
+ * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles outbound messaging interrupts signaled using MSI-X.
+ */
+static irqreturn_t tsi721_omsg_msix(int irq, void *ptr)
+{
+       struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+       int mbox;
+
+       mbox = (irq - priv->msix[TSI721_VECT_OMB0_DONE].vector) % RIO_MAX_MBOX;
+       tsi721_omsg_handler(priv, mbox);
+       return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_imsg_msix - MSI-X interrupt handler for inbound messaging
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles inbound messaging interrupts signaled using MSI-X.
+ */
+static irqreturn_t tsi721_imsg_msix(int irq, void *ptr)
+{
+       struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+       int mbox;
+
+       mbox = (irq - priv->msix[TSI721_VECT_IMB0_RCV].vector) % RIO_MAX_MBOX;
+       tsi721_imsg_handler(priv, mbox + 4);
+       return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_srio_msix - Tsi721 MSI-X SRIO MAC interrupt handler
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles Tsi721 interrupts from SRIO MAC.
+ */
+static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
+{
+       struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+       u32 srio_int;
+
+       /* Service SRIO MAC interrupts */
+       srio_int = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
+       if (srio_int & TSI721_RIO_EM_INT_STAT_PW_RX)
+               tsi721_pw_handler((struct rio_mport *)ptr);
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_sr2pc_ch_msix - Tsi721 MSI-X SR2PC Channel interrupt handler
+ * @irq: Linux interrupt number
+ * @ptr: Pointer to interrupt-specific data (mport structure)
+ *
+ * Handles Tsi721 interrupts from SR2PC Channel.
+ * NOTE: At this moment services only one SR2PC channel associated with inbound
+ * doorbells.
+ */
+static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr)
+{
+       struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+       u32 sr_ch_int;
+
+       /* Service Inbound DB interrupt from SR2PC channel */
+       sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+       if (sr_ch_int & TSI721_SR_CHINT_IDBQRCV)
+               tsi721_dbell_handler((struct rio_mport *)ptr);
+
+       /* Clear interrupts */
+       iowrite32(sr_ch_int, priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+       /* Read back to ensure that interrupt was cleared */
+       sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * tsi721_request_msix - register interrupt service for MSI-X mode.
+ * @mport: RapidIO master port structure
+ *
+ * Registers MSI-X interrupt service routines for interrupts that are active
+ * immediately after mport initialization. Messaging interrupt service routines
+ * should be registered during corresponding open requests.
+ */
+static int tsi721_request_msix(struct rio_mport *mport)
+{
+       struct tsi721_device *priv = mport->priv;
+       int err = 0;
+
+       err = request_irq(priv->msix[TSI721_VECT_IDB].vector,
+                       tsi721_sr2pc_ch_msix, 0,
+                       priv->msix[TSI721_VECT_IDB].irq_name, (void *)mport);
+       if (err)
+               goto out;
+
+       err = request_irq(priv->msix[TSI721_VECT_PWRX].vector,
+                       tsi721_srio_msix, 0,
+                       priv->msix[TSI721_VECT_PWRX].irq_name, (void *)mport);
+       if (err)
+               free_irq(
+                       priv->msix[TSI721_VECT_IDB].vector,
+                       (void *)mport);
+out:
+       return err;
+}
+
+/**
+ * tsi721_enable_msix - Attempts to enable MSI-X support for Tsi721.
+ * @priv: pointer to tsi721 private data
+ *
+ * Configures MSI-X support for Tsi721. Supports only an exact number
+ * of requested vectors.
+ */
+static int tsi721_enable_msix(struct tsi721_device *priv)
+{
+       struct msix_entry entries[TSI721_VECT_MAX];
+       int err;
+       int i;
+
+       entries[TSI721_VECT_IDB].entry = TSI721_MSIX_SR2PC_IDBQ_RCV(IDB_QUEUE);
+       entries[TSI721_VECT_PWRX].entry = TSI721_MSIX_SRIO_MAC_INT;
+
+       /*
+        * Initialize MSI-X entries for Messaging Engine:
+        * this driver supports four RIO mailboxes (inbound and outbound)
+        * NOTE: Inbound message MBOX 0...4 use IB channels 4...7. Therefore
+        * offset +4 is added to IB MBOX number.
+        */
+       for (i = 0; i < RIO_MAX_MBOX; i++) {
+               entries[TSI721_VECT_IMB0_RCV + i].entry =
+                                       TSI721_MSIX_IMSG_DQ_RCV(i + 4);
+               entries[TSI721_VECT_IMB0_INT + i].entry =
+                                       TSI721_MSIX_IMSG_INT(i + 4);
+               entries[TSI721_VECT_OMB0_DONE + i].entry =
+                                       TSI721_MSIX_OMSG_DONE(i);
+               entries[TSI721_VECT_OMB0_INT + i].entry =
+                                       TSI721_MSIX_OMSG_INT(i);
+       }
+
+       err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries));
+       if (err) {
+               if (err > 0)
+                       dev_info(&priv->pdev->dev,
+                                "Only %d MSI-X vectors available, "
+                                "not using MSI-X\n", err);
+               return err;
+       }
+
+       /*
+        * Copy MSI-X vector information into tsi721 private structure
+        */
+       priv->msix[TSI721_VECT_IDB].vector = entries[TSI721_VECT_IDB].vector;
+       snprintf(priv->msix[TSI721_VECT_IDB].irq_name, IRQ_DEVICE_NAME_MAX,
+                DRV_NAME "-idb@pci:%s", pci_name(priv->pdev));
+       priv->msix[TSI721_VECT_PWRX].vector = entries[TSI721_VECT_PWRX].vector;
+       snprintf(priv->msix[TSI721_VECT_PWRX].irq_name, IRQ_DEVICE_NAME_MAX,
+                DRV_NAME "-pwrx@pci:%s", pci_name(priv->pdev));
+
+       for (i = 0; i < RIO_MAX_MBOX; i++) {
+               priv->msix[TSI721_VECT_IMB0_RCV + i].vector =
+                               entries[TSI721_VECT_IMB0_RCV + i].vector;
+               snprintf(priv->msix[TSI721_VECT_IMB0_RCV + i].irq_name,
+                        IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbr%d@pci:%s",
+                        i, pci_name(priv->pdev));
+
+               priv->msix[TSI721_VECT_IMB0_INT + i].vector =
+                               entries[TSI721_VECT_IMB0_INT + i].vector;
+               snprintf(priv->msix[TSI721_VECT_IMB0_INT + i].irq_name,
+                        IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbi%d@pci:%s",
+                        i, pci_name(priv->pdev));
+
+               priv->msix[TSI721_VECT_OMB0_DONE + i].vector =
+                               entries[TSI721_VECT_OMB0_DONE + i].vector;
+               snprintf(priv->msix[TSI721_VECT_OMB0_DONE + i].irq_name,
+                        IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombd%d@pci:%s",
+                        i, pci_name(priv->pdev));
+
+               priv->msix[TSI721_VECT_OMB0_INT + i].vector =
+                               entries[TSI721_VECT_OMB0_INT + i].vector;
+               snprintf(priv->msix[TSI721_VECT_OMB0_INT + i].irq_name,
+                        IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombi%d@pci:%s",
+                        i, pci_name(priv->pdev));
+       }
+
+       return 0;
+}
+#endif /* CONFIG_PCI_MSI */
+
+static int tsi721_request_irq(struct rio_mport *mport)
+{
+       struct tsi721_device *priv = mport->priv;
+       int err;
+
+#ifdef CONFIG_PCI_MSI
+       if (priv->flags & TSI721_USING_MSIX)
+               err = tsi721_request_msix(mport);
+       else
+#endif
+               err = request_irq(priv->pdev->irq, tsi721_irqhandler,
+                         (priv->flags & TSI721_USING_MSI) ? 0 : IRQF_SHARED,
+                         DRV_NAME, (void *)mport);
+
+       if (err)
+               dev_err(&priv->pdev->dev,
+                       "Unable to allocate interrupt, Error: %d\n", err);
+
+       return err;
+}
+
+/**
+ * tsi721_init_pc2sr_mapping - initializes outbound (PCIe->SRIO)
+ * translation regions.
+ * @priv: pointer to tsi721 private data
+ *
+ * Disables SREP translation regions.
+ */
+static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv)
+{
+       int i;
+
+       /* Disable all PC2SR translation windows */
+       for (i = 0; i < TSI721_OBWIN_NUM; i++)
+               iowrite32(0, priv->regs + TSI721_OBWINLB(i));
+}
+
+/**
+ * tsi721_init_sr2pc_mapping - initializes inbound (SRIO->PCIe)
+ * translation regions.
+ * @priv: pointer to tsi721 private data
+ *
+ * Disables inbound windows.
+ */
+static void tsi721_init_sr2pc_mapping(struct tsi721_device *priv)
+{
+       int i;
+
+       /* Disable all SR2PC inbound windows */
+       for (i = 0; i < TSI721_IBWIN_NUM; i++)
+               iowrite32(0, priv->regs + TSI721_IBWINLB(i));
+}
+
+/**
+ * tsi721_port_write_init - Inbound port write interface init
+ * @priv: pointer to tsi721 private data
+ *
+ * Initializes inbound port write handler.
+ * Returns %0 on success or %-ENOMEM on failure.
+ */
+static int tsi721_port_write_init(struct tsi721_device *priv)
+{
+       priv->pw_discard_count = 0;
+       INIT_WORK(&priv->pw_work, tsi721_pw_dpc);
+       spin_lock_init(&priv->pw_fifo_lock);
+       if (kfifo_alloc(&priv->pw_fifo,
+                       TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+               dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n");
+               return -ENOMEM;
+       }
+
+       /* Use reliable port-write capture mode */
+       iowrite32(TSI721_RIO_PW_CTL_PWC_REL, priv->regs + TSI721_RIO_PW_CTL);
+       return 0;
+}
+
+static int tsi721_doorbell_init(struct tsi721_device *priv)
+{
+       /* Outbound Doorbells do not require any setup.
+        * Tsi721 uses dedicated PCI BAR1 to generate doorbells.
+        * That BAR1 was mapped during the probe routine.
+        */
+
+       /* Initialize Inbound Doorbell processing DPC and queue */
+       priv->db_discard_count = 0;
+       INIT_WORK(&priv->idb_work, tsi721_db_dpc);
+
+       /* Allocate buffer for inbound doorbells queue */
+       priv->idb_base = dma_alloc_coherent(&priv->pdev->dev,
+                               IDB_QSIZE * TSI721_IDB_ENTRY_SIZE,
+                               &priv->idb_dma, GFP_KERNEL);
+       if (!priv->idb_base)
+               return -ENOMEM;
+
+       memset(priv->idb_base, 0, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE);
+
+       dev_dbg(&priv->pdev->dev, "Allocated IDB buffer @ %p (phys = %llx)\n",
+               priv->idb_base, (unsigned long long)priv->idb_dma);
+
+       iowrite32(TSI721_IDQ_SIZE_VAL(IDB_QSIZE),
+               priv->regs + TSI721_IDQ_SIZE(IDB_QUEUE));
+       iowrite32(((u64)priv->idb_dma >> 32),
+               priv->regs + TSI721_IDQ_BASEU(IDB_QUEUE));
+       iowrite32(((u64)priv->idb_dma & TSI721_IDQ_BASEL_ADDR),
+               priv->regs + TSI721_IDQ_BASEL(IDB_QUEUE));
+       /* Enable accepting all inbound doorbells */
+       iowrite32(0, priv->regs + TSI721_IDQ_MASK(IDB_QUEUE));
+
+       iowrite32(TSI721_IDQ_INIT, priv->regs + TSI721_IDQ_CTL(IDB_QUEUE));
+
+       iowrite32(0, priv->regs + TSI721_IDQ_RP(IDB_QUEUE));
+
+       return 0;
+}
+
+static void tsi721_doorbell_free(struct tsi721_device *priv)
+{
+       if (priv->idb_base == NULL)
+               return;
+
+       /* Free buffer allocated for inbound doorbell queue */
+       dma_free_coherent(&priv->pdev->dev, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE,
+                         priv->idb_base, priv->idb_dma);
+       priv->idb_base = NULL;
+}
+
+static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum)
+{
+       struct tsi721_dma_desc *bd_ptr;
+       u64             *sts_ptr;
+       dma_addr_t      bd_phys, sts_phys;
+       int             sts_size;
+       int             bd_num = priv->bdma[chnum].bd_num;
+
+       dev_dbg(&priv->pdev->dev, "Init Block DMA Engine, CH%d\n", chnum);
+
+       /*
+        * Initialize DMA channel for maintenance requests
+        */
+
+       /* Allocate space for DMA descriptors */
+       bd_ptr = dma_alloc_coherent(&priv->pdev->dev,
+                                       bd_num * sizeof(struct tsi721_dma_desc),
+                                       &bd_phys, GFP_KERNEL);
+       if (!bd_ptr)
+               return -ENOMEM;
+
+       priv->bdma[chnum].bd_phys = bd_phys;
+       priv->bdma[chnum].bd_base = bd_ptr;
+
+       memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc));
+
+       dev_dbg(&priv->pdev->dev, "DMA descriptors @ %p (phys = %llx)\n",
+               bd_ptr, (unsigned long long)bd_phys);
+
+       /* Allocate space for descriptor status FIFO */
+       sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
+                                       bd_num : TSI721_DMA_MINSTSSZ;
+       sts_size = roundup_pow_of_two(sts_size);
+       sts_ptr = dma_alloc_coherent(&priv->pdev->dev,
+                                    sts_size * sizeof(struct tsi721_dma_sts),
+                                    &sts_phys, GFP_KERNEL);
+       if (!sts_ptr) {
+               /* Free space allocated for DMA descriptors */
+               dma_free_coherent(&priv->pdev->dev,
+                                 bd_num * sizeof(struct tsi721_dma_desc),
+                                 bd_ptr, bd_phys);
+               priv->bdma[chnum].bd_base = NULL;
+               return -ENOMEM;
+       }
+
+       priv->bdma[chnum].sts_phys = sts_phys;
+       priv->bdma[chnum].sts_base = sts_ptr;
+       priv->bdma[chnum].sts_size = sts_size;
+
+       memset(sts_ptr, 0, sts_size);
+
+       dev_dbg(&priv->pdev->dev,
+               "desc status FIFO @ %p (phys = %llx) size=0x%x\n",
+               sts_ptr, (unsigned long long)sts_phys, sts_size);
+
+       /* Initialize DMA descriptors ring */
+       bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
+       bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
+                                                TSI721_DMAC_DPTRL_MASK);
+       bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
+
+       /* Setup DMA descriptor pointers */
+       iowrite32(((u64)bd_phys >> 32),
+               priv->regs + TSI721_DMAC_DPTRH(chnum));
+       iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK),
+               priv->regs + TSI721_DMAC_DPTRL(chnum));
+
+       /* Setup descriptor status FIFO */
+       iowrite32(((u64)sts_phys >> 32),
+               priv->regs + TSI721_DMAC_DSBH(chnum));
+       iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK),
+               priv->regs + TSI721_DMAC_DSBL(chnum));
+       iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size),
+               priv->regs + TSI721_DMAC_DSSZ(chnum));
+
+       /* Clear interrupt bits */
+       iowrite32(TSI721_DMAC_INT_ALL,
+               priv->regs + TSI721_DMAC_INT(chnum));
+
+       ioread32(priv->regs + TSI721_DMAC_INT(chnum));
+
+       /* Toggle DMA channel initialization */
+       iowrite32(TSI721_DMAC_CTL_INIT, priv->regs + TSI721_DMAC_CTL(chnum));
+       ioread32(priv->regs + TSI721_DMAC_CTL(chnum));
+       udelay(10);
+
+       return 0;
+}
+
+static int tsi721_bdma_ch_free(struct tsi721_device *priv, int chnum)
+{
+       u32 ch_stat;
+
+       if (priv->bdma[chnum].bd_base == NULL)
+               return 0;
+
+       /* Check if DMA channel still running */
+       ch_stat = ioread32(priv->regs + TSI721_DMAC_STS(chnum));
+       if (ch_stat & TSI721_DMAC_STS_RUN)
+               return -EFAULT;
+
+       /* Put DMA channel into init state */
+       iowrite32(TSI721_DMAC_CTL_INIT,
+               priv->regs + TSI721_DMAC_CTL(chnum));
+
+       /* Free space allocated for DMA descriptors */
+       dma_free_coherent(&priv->pdev->dev,
+               priv->bdma[chnum].bd_num * sizeof(struct tsi721_dma_desc),
+               priv->bdma[chnum].bd_base, priv->bdma[chnum].bd_phys);
+       priv->bdma[chnum].bd_base = NULL;
+
+       /* Free space allocated for status FIFO */
+       dma_free_coherent(&priv->pdev->dev,
+               priv->bdma[chnum].sts_size * sizeof(struct tsi721_dma_sts),
+               priv->bdma[chnum].sts_base, priv->bdma[chnum].sts_phys);
+       priv->bdma[chnum].sts_base = NULL;
+       return 0;
+}
+
+static int tsi721_bdma_init(struct tsi721_device *priv)
+{
+       /* Initialize BDMA channel allocated for RapidIO maintenance read/write
+        * request generation
+        */
+       priv->bdma[TSI721_DMACH_MAINT].bd_num = 2;
+       if (tsi721_bdma_ch_init(priv, TSI721_DMACH_MAINT)) {
+               dev_err(&priv->pdev->dev, "Unable to initialize maintenance DMA"
+                       " channel %d, aborting\n", TSI721_DMACH_MAINT);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void tsi721_bdma_free(struct tsi721_device *priv)
+{
+       tsi721_bdma_ch_free(priv, TSI721_DMACH_MAINT);
+}
+
+/* Enable Inbound Messaging Interrupts */
+static void
+tsi721_imsg_interrupt_enable(struct tsi721_device *priv, int ch,
+                                 u32 inte_mask)
+{
+       u32 rval;
+
+       if (!inte_mask)
+               return;
+
+       /* Clear pending Inbound Messaging interrupts */
+       iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch));
+
+       /* Enable Inbound Messaging interrupts */
+       rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch));
+       iowrite32(rval | inte_mask, priv->regs + TSI721_IBDMAC_INTE(ch));
+
+       if (priv->flags & TSI721_USING_MSIX)
+               return; /* Finished if we are in MSI-X mode */
+
+       /*
+        * For MSI and INTA interrupt signalling we need to enable next levels
+        */
+
+       /* Enable Device Channel Interrupt */
+       rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+       iowrite32(rval | TSI721_INT_IMSG_CHAN(ch),
+                 priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/* Disable Inbound Messaging Interrupts */
+static void
+tsi721_imsg_interrupt_disable(struct tsi721_device *priv, int ch,
+                                  u32 inte_mask)
+{
+       u32 rval;
+
+       if (!inte_mask)
+               return;
+
+       /* Clear pending Inbound Messaging interrupts */
+       iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch));
+
+       /* Disable Inbound Messaging interrupts */
+       rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch));
+       rval &= ~inte_mask;
+       iowrite32(rval, priv->regs + TSI721_IBDMAC_INTE(ch));
+
+       if (priv->flags & TSI721_USING_MSIX)
+               return; /* Finished if we are in MSI-X mode */
+
+       /*
+        * For MSI and INTA interrupt signalling we need to disable next levels
+        */
+
+       /* Disable Device Channel Interrupt */
+       rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+       rval &= ~TSI721_INT_IMSG_CHAN(ch);
+       iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/* Enable Outbound Messaging interrupts */
+static void
+tsi721_omsg_interrupt_enable(struct tsi721_device *priv, int ch,
+                                 u32 inte_mask)
+{
+       u32 rval;
+
+       if (!inte_mask)
+               return;
+
+       /* Clear pending Outbound Messaging interrupts */
+       iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch));
+
+       /* Enable Outbound Messaging channel interrupts */
+       rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch));
+       iowrite32(rval | inte_mask, priv->regs + TSI721_OBDMAC_INTE(ch));
+
+       if (priv->flags & TSI721_USING_MSIX)
+               return; /* Finished if we are in MSI-X mode */
+
+       /*
+        * For MSI and INTA interrupt signalling we need to enable next levels
+        */
+
+       /* Enable Device Channel Interrupt */
+       rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+       iowrite32(rval | TSI721_INT_OMSG_CHAN(ch),
+                 priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/* Disable Outbound Messaging interrupts */
+static void
+tsi721_omsg_interrupt_disable(struct tsi721_device *priv, int ch,
+                                  u32 inte_mask)
+{
+       u32 rval;
+
+       if (!inte_mask)
+               return;
+
+       /* Clear pending Outbound Messaging interrupts */
+       iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch));
+
+       /* Disable Outbound Messaging interrupts */
+       rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch));
+       rval &= ~inte_mask;
+       iowrite32(rval, priv->regs + TSI721_OBDMAC_INTE(ch));
+
+       if (priv->flags & TSI721_USING_MSIX)
+               return; /* Finished if we are in MSI-X mode */
+
+       /*
+        * For MSI and INTA interrupt signalling we need to disable next levels
+        */
+
+       /* Disable Device Channel Interrupt */
+       rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+       rval &= ~TSI721_INT_OMSG_CHAN(ch);
+       iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE);
+}
+
+/**
+ * tsi721_add_outb_message - Add message to the Tsi721 outbound message queue
+ * @mport: Master port with outbound message queue
+ * @rdev: Target of outbound message
+ * @mbox: Outbound mailbox
+ * @buffer: Message to add to outbound queue
+ * @len: Length of message
+ */
+static int
+tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
+                       void *buffer, size_t len)
+{
+       struct tsi721_device *priv = mport->priv;
+       struct tsi721_omsg_desc *desc;
+       u32 tx_slot;
+
+       if (!priv->omsg_init[mbox] ||
+           len > TSI721_MSG_MAX_SIZE || len < 8)
+               return -EINVAL;
+
+       tx_slot = priv->omsg_ring[mbox].tx_slot;
+
+       /* Copy copy message into transfer buffer */
+       memcpy(priv->omsg_ring[mbox].omq_base[tx_slot], buffer, len);
+
+       if (len & 0x7)
+               len += 8;
+
+       /* Build descriptor associated with buffer */
+       desc = priv->omsg_ring[mbox].omd_base;
+       desc[tx_slot].type_id = cpu_to_le32((DTYPE4 << 29) | rdev->destid);
+       if (tx_slot % 4 == 0)
+               desc[tx_slot].type_id |= cpu_to_le32(TSI721_OMD_IOF);
+
+       desc[tx_slot].msg_info =
+               cpu_to_le32((mport->sys_size << 26) | (mbox << 22) |
+                           (0xe << 12) | (len & 0xff8));
+       desc[tx_slot].bufptr_lo =
+               cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] &
+                           0xffffffff);
+       desc[tx_slot].bufptr_hi =
+               cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] >> 32);
+
+       priv->omsg_ring[mbox].wr_count++;
+
+       /* Go to next descriptor */
+       if (++priv->omsg_ring[mbox].tx_slot == priv->omsg_ring[mbox].size) {
+               priv->omsg_ring[mbox].tx_slot = 0;
+               /* Move through the ring link descriptor at the end */
+               priv->omsg_ring[mbox].wr_count++;
+       }
+
+       mb();
+
+       /* Set new write count value */
+       iowrite32(priv->omsg_ring[mbox].wr_count,
+               priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
+       ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
+
+       return 0;
+}
+
+/**
+ * tsi721_omsg_handler - Outbound Message Interrupt Handler
+ * @priv: pointer to tsi721 private data
+ * @ch:   number of OB MSG channel to service
+ *
+ * Services channel interrupts from outbound messaging engine.
+ */
+static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
+{
+       u32 omsg_int;
+
+       spin_lock(&priv->omsg_ring[ch].lock);
+
+       omsg_int = ioread32(priv->regs + TSI721_OBDMAC_INT(ch));
+
+       if (omsg_int & TSI721_OBDMAC_INT_ST_FULL)
+               dev_info(&priv->pdev->dev,
+                       "OB MBOX%d: Status FIFO is full\n", ch);
+
+       if (omsg_int & (TSI721_OBDMAC_INT_DONE | TSI721_OBDMAC_INT_IOF_DONE)) {
+               u32 srd_ptr;
+               u64 *sts_ptr, last_ptr = 0, prev_ptr = 0;
+               int i, j;
+               u32 tx_slot;
+
+               /*
+                * Find last successfully processed descriptor
+                */
+
+               /* Check and clear descriptor status FIFO entries */
+               srd_ptr = priv->omsg_ring[ch].sts_rdptr;
+               sts_ptr = priv->omsg_ring[ch].sts_base;
+               j = srd_ptr * 8;
+               while (sts_ptr[j]) {
+                       for (i = 0; i < 8 && sts_ptr[j]; i++, j++) {
+                               prev_ptr = last_ptr;
+                               last_ptr = le64_to_cpu(sts_ptr[j]);
+                               sts_ptr[j] = 0;
+                       }
+
+                       ++srd_ptr;
+                       srd_ptr %= priv->omsg_ring[ch].sts_size;
+                       j = srd_ptr * 8;
+               }
+
+               if (last_ptr == 0)
+                       goto no_sts_update;
+
+               priv->omsg_ring[ch].sts_rdptr = srd_ptr;
+               iowrite32(srd_ptr, priv->regs + TSI721_OBDMAC_DSRP(ch));
+
+               if (!priv->mport->outb_msg[ch].mcback)
+                       goto no_sts_update;
+
+               /* Inform upper layer about transfer completion */
+
+               tx_slot = (last_ptr - (u64)priv->omsg_ring[ch].omd_phys)/
+                                               sizeof(struct tsi721_omsg_desc);
+
+               /*
+                * Check if this is a Link Descriptor (LD).
+                * If yes, ignore LD and use descriptor processed
+                * before LD.
+                */
+               if (tx_slot == priv->omsg_ring[ch].size) {
+                       if (prev_ptr)
+                               tx_slot = (prev_ptr -
+                                       (u64)priv->omsg_ring[ch].omd_phys)/
+                                               sizeof(struct tsi721_omsg_desc);
+                       else
+                               goto no_sts_update;
+               }
+
+               /* Move slot index to the next message to be sent */
+               ++tx_slot;
+               if (tx_slot == priv->omsg_ring[ch].size)
+                       tx_slot = 0;
+               BUG_ON(tx_slot >= priv->omsg_ring[ch].size);
+               priv->mport->outb_msg[ch].mcback(priv->mport,
+                               priv->omsg_ring[ch].dev_id, ch,
+                               tx_slot);
+       }
+
+no_sts_update:
+
+       if (omsg_int & TSI721_OBDMAC_INT_ERROR) {
+               /*
+               * Outbound message operation aborted due to error,
+               * reinitialize OB MSG channel
+               */
+
+               dev_dbg(&priv->pdev->dev, "OB MSG ABORT ch_stat=%x\n",
+                       ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
+
+               iowrite32(TSI721_OBDMAC_INT_ERROR,
+                               priv->regs + TSI721_OBDMAC_INT(ch));
+               iowrite32(TSI721_OBDMAC_CTL_INIT,
+                               priv->regs + TSI721_OBDMAC_CTL(ch));
+               ioread32(priv->regs + TSI721_OBDMAC_CTL(ch));
+
+               /* Inform upper level to clear all pending tx slots */
+               if (priv->mport->outb_msg[ch].mcback)
+                       priv->mport->outb_msg[ch].mcback(priv->mport,
+                                       priv->omsg_ring[ch].dev_id, ch,
+                                       priv->omsg_ring[ch].tx_slot);
+               /* Synch tx_slot tracking */
+               iowrite32(priv->omsg_ring[ch].tx_slot,
+                       priv->regs + TSI721_OBDMAC_DRDCNT(ch));
+               ioread32(priv->regs + TSI721_OBDMAC_DRDCNT(ch));
+               priv->omsg_ring[ch].wr_count = priv->omsg_ring[ch].tx_slot;
+               priv->omsg_ring[ch].sts_rdptr = 0;
+       }
+
+       /* Clear channel interrupts */
+       iowrite32(omsg_int, priv->regs + TSI721_OBDMAC_INT(ch));
+
+       if (!(priv->flags & TSI721_USING_MSIX)) {
+               u32 ch_inte;
+
+               /* Re-enable channel interrupts */
+               ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+               ch_inte |= TSI721_INT_OMSG_CHAN(ch);
+               iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+       }
+
+       spin_unlock(&priv->omsg_ring[ch].lock);
+}
+
+/**
+ * tsi721_open_outb_mbox - Initialize Tsi721 outbound mailbox
+ * @mport: Master port implementing Outbound Messaging Engine
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the outbound mailbox ring
+ */
+static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
+                                int mbox, int entries)
+{
+       struct tsi721_device *priv = mport->priv;
+       struct tsi721_omsg_desc *bd_ptr;
+       int i, rc = 0;
+
+       if ((entries < TSI721_OMSGD_MIN_RING_SIZE) ||
+           (entries > (TSI721_OMSGD_RING_SIZE)) ||
+           (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       priv->omsg_ring[mbox].dev_id = dev_id;
+       priv->omsg_ring[mbox].size = entries;
+       priv->omsg_ring[mbox].sts_rdptr = 0;
+       spin_lock_init(&priv->omsg_ring[mbox].lock);
+
+       /* Outbound Msg Buffer allocation based on
+          the number of maximum descriptor entries */
+       for (i = 0; i < entries; i++) {
+               priv->omsg_ring[mbox].omq_base[i] =
+                       dma_alloc_coherent(
+                               &priv->pdev->dev, TSI721_MSG_BUFFER_SIZE,
+                               &priv->omsg_ring[mbox].omq_phys[i],
+                               GFP_KERNEL);
+               if (priv->omsg_ring[mbox].omq_base[i] == NULL) {
+                       dev_dbg(&priv->pdev->dev,
+                               "Unable to allocate OB MSG data buffer for"
+                               " MBOX%d\n", mbox);
+                       rc = -ENOMEM;
+                       goto out_buf;
+               }
+       }
+
+       /* Outbound message descriptor allocation */
+       priv->omsg_ring[mbox].omd_base = dma_alloc_coherent(
+                               &priv->pdev->dev,
+                               (entries + 1) * sizeof(struct tsi721_omsg_desc),
+                               &priv->omsg_ring[mbox].omd_phys, GFP_KERNEL);
+       if (priv->omsg_ring[mbox].omd_base == NULL) {
+               dev_dbg(&priv->pdev->dev,
+                       "Unable to allocate OB MSG descriptor memory "
+                       "for MBOX%d\n", mbox);
+               rc = -ENOMEM;
+               goto out_buf;
+       }
+
+       priv->omsg_ring[mbox].tx_slot = 0;
+
+       /* Outbound message descriptor status FIFO allocation */
+       priv->omsg_ring[mbox].sts_size = roundup_pow_of_two(entries + 1);
+       priv->omsg_ring[mbox].sts_base = dma_alloc_coherent(&priv->pdev->dev,
+                       priv->omsg_ring[mbox].sts_size *
+                                               sizeof(struct tsi721_dma_sts),
+                       &priv->omsg_ring[mbox].sts_phys, GFP_KERNEL);
+       if (priv->omsg_ring[mbox].sts_base == NULL) {
+               dev_dbg(&priv->pdev->dev,
+                       "Unable to allocate OB MSG descriptor status FIFO "
+                       "for MBOX%d\n", mbox);
+               rc = -ENOMEM;
+               goto out_desc;
+       }
+
+       memset(priv->omsg_ring[mbox].sts_base, 0,
+               entries * sizeof(struct tsi721_dma_sts));
+
+       /*
+        * Configure Outbound Messaging Engine
+        */
+
+       /* Setup Outbound Message descriptor pointer */
+       iowrite32(((u64)priv->omsg_ring[mbox].omd_phys >> 32),
+                       priv->regs + TSI721_OBDMAC_DPTRH(mbox));
+       iowrite32(((u64)priv->omsg_ring[mbox].omd_phys &
+                                       TSI721_OBDMAC_DPTRL_MASK),
+                       priv->regs + TSI721_OBDMAC_DPTRL(mbox));
+
+       /* Setup Outbound Message descriptor status FIFO */
+       iowrite32(((u64)priv->omsg_ring[mbox].sts_phys >> 32),
+                       priv->regs + TSI721_OBDMAC_DSBH(mbox));
+       iowrite32(((u64)priv->omsg_ring[mbox].sts_phys &
+                                       TSI721_OBDMAC_DSBL_MASK),
+                       priv->regs + TSI721_OBDMAC_DSBL(mbox));
+       iowrite32(TSI721_DMAC_DSSZ_SIZE(priv->omsg_ring[mbox].sts_size),
+               priv->regs + (u32)TSI721_OBDMAC_DSSZ(mbox));
+
+       /* Enable interrupts */
+
+#ifdef CONFIG_PCI_MSI
+       if (priv->flags & TSI721_USING_MSIX) {
+               /* Request interrupt service if we are in MSI-X mode */
+               rc = request_irq(
+                       priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
+                       tsi721_omsg_msix, 0,
+                       priv->msix[TSI721_VECT_OMB0_DONE + mbox].irq_name,
+                       (void *)mport);
+
+               if (rc) {
+                       dev_dbg(&priv->pdev->dev,
+                               "Unable to allocate MSI-X interrupt for "
+                               "OBOX%d-DONE\n", mbox);
+                       goto out_stat;
+               }
+
+               rc = request_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
+                       tsi721_omsg_msix, 0,
+                       priv->msix[TSI721_VECT_OMB0_INT + mbox].irq_name,
+                       (void *)mport);
+
+               if (rc) {
+                       dev_dbg(&priv->pdev->dev,
+                               "Unable to allocate MSI-X interrupt for "
+                               "MBOX%d-INT\n", mbox);
+                       free_irq(
+                               priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
+                               (void *)mport);
+                       goto out_stat;
+               }
+       }
+#endif /* CONFIG_PCI_MSI */
+
+       tsi721_omsg_interrupt_enable(priv, mbox, TSI721_OBDMAC_INT_ALL);
+
+       /* Initialize Outbound Message descriptors ring */
+       bd_ptr = priv->omsg_ring[mbox].omd_base;
+       bd_ptr[entries].type_id = cpu_to_le32(DTYPE5 << 29);
+       bd_ptr[entries].msg_info = 0;
+       bd_ptr[entries].next_lo =
+               cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys &
+               TSI721_OBDMAC_DPTRL_MASK);
+       bd_ptr[entries].next_hi =
+               cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys >> 32);
+       priv->omsg_ring[mbox].wr_count = 0;
+       mb();
+
+       /* Initialize Outbound Message engine */
+       iowrite32(TSI721_OBDMAC_CTL_INIT, priv->regs + TSI721_OBDMAC_CTL(mbox));
+       ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
+       udelay(10);
+
+       priv->omsg_init[mbox] = 1;
+
+       return 0;
+
+#ifdef CONFIG_PCI_MSI
+out_stat:
+       dma_free_coherent(&priv->pdev->dev,
+               priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts),
+               priv->omsg_ring[mbox].sts_base,
+               priv->omsg_ring[mbox].sts_phys);
+
+       priv->omsg_ring[mbox].sts_base = NULL;
+#endif /* CONFIG_PCI_MSI */
+
+out_desc:
+       dma_free_coherent(&priv->pdev->dev,
+               (entries + 1) * sizeof(struct tsi721_omsg_desc),
+               priv->omsg_ring[mbox].omd_base,
+               priv->omsg_ring[mbox].omd_phys);
+
+       priv->omsg_ring[mbox].omd_base = NULL;
+
+out_buf:
+       for (i = 0; i < priv->omsg_ring[mbox].size; i++) {
+               if (priv->omsg_ring[mbox].omq_base[i]) {
+                       dma_free_coherent(&priv->pdev->dev,
+                               TSI721_MSG_BUFFER_SIZE,
+                               priv->omsg_ring[mbox].omq_base[i],
+                               priv->omsg_ring[mbox].omq_phys[i]);
+
+                       priv->omsg_ring[mbox].omq_base[i] = NULL;
+               }
+       }
+
+out:
+       return rc;
+}
+
+/**
+ * tsi721_close_outb_mbox - Close Tsi721 outbound mailbox
+ * @mport: Master port implementing the outbound message unit
+ * @mbox: Mailbox to close
+ */
+static void tsi721_close_outb_mbox(struct rio_mport *mport, int mbox)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 i;
+
+       if (!priv->omsg_init[mbox])
+               return;
+       priv->omsg_init[mbox] = 0;
+
+       /* Disable Interrupts */
+
+       tsi721_omsg_interrupt_disable(priv, mbox, TSI721_OBDMAC_INT_ALL);
+
+#ifdef CONFIG_PCI_MSI
+       if (priv->flags & TSI721_USING_MSIX) {
+               free_irq(priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
+                        (void *)mport);
+               free_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
+                        (void *)mport);
+       }
+#endif /* CONFIG_PCI_MSI */
+
+       /* Free OMSG Descriptor Status FIFO */
+       dma_free_coherent(&priv->pdev->dev,
+               priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts),
+               priv->omsg_ring[mbox].sts_base,
+               priv->omsg_ring[mbox].sts_phys);
+
+       priv->omsg_ring[mbox].sts_base = NULL;
+
+       /* Free OMSG descriptors */
+       dma_free_coherent(&priv->pdev->dev,
+               (priv->omsg_ring[mbox].size + 1) *
+                       sizeof(struct tsi721_omsg_desc),
+               priv->omsg_ring[mbox].omd_base,
+               priv->omsg_ring[mbox].omd_phys);
+
+       priv->omsg_ring[mbox].omd_base = NULL;
+
+       /* Free message buffers */
+       for (i = 0; i < priv->omsg_ring[mbox].size; i++) {
+               if (priv->omsg_ring[mbox].omq_base[i]) {
+                       dma_free_coherent(&priv->pdev->dev,
+                               TSI721_MSG_BUFFER_SIZE,
+                               priv->omsg_ring[mbox].omq_base[i],
+                               priv->omsg_ring[mbox].omq_phys[i]);
+
+                       priv->omsg_ring[mbox].omq_base[i] = NULL;
+               }
+       }
+}
+
+/**
+ * tsi721_imsg_handler - Inbound Message Interrupt Handler
+ * @priv: pointer to tsi721 private data
+ * @ch: inbound message channel number to service
+ *
+ * Services channel interrupts from inbound messaging engine.
+ */
+static void tsi721_imsg_handler(struct tsi721_device *priv, int ch)
+{
+       u32 mbox = ch - 4;
+       u32 imsg_int;
+
+       spin_lock(&priv->imsg_ring[mbox].lock);
+
+       imsg_int = ioread32(priv->regs + TSI721_IBDMAC_INT(ch));
+
+       if (imsg_int & TSI721_IBDMAC_INT_SRTO)
+               dev_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout\n",
+                       mbox);
+
+       if (imsg_int & TSI721_IBDMAC_INT_PC_ERROR)
+               dev_info(&priv->pdev->dev, "IB MBOX%d PCIe error\n",
+                       mbox);
+
+       if (imsg_int & TSI721_IBDMAC_INT_FQ_LOW)
+               dev_info(&priv->pdev->dev,
+                       "IB MBOX%d IB free queue low\n", mbox);
+
+       /* Clear IB channel interrupts */
+       iowrite32(imsg_int, priv->regs + TSI721_IBDMAC_INT(ch));
+
+       /* If an IB Msg is received notify the upper layer */
+       if (imsg_int & TSI721_IBDMAC_INT_DQ_RCV &&
+               priv->mport->inb_msg[mbox].mcback)
+               priv->mport->inb_msg[mbox].mcback(priv->mport,
+                               priv->imsg_ring[mbox].dev_id, mbox, -1);
+
+       if (!(priv->flags & TSI721_USING_MSIX)) {
+               u32 ch_inte;
+
+               /* Re-enable channel interrupts */
+               ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE);
+               ch_inte |= TSI721_INT_IMSG_CHAN(ch);
+               iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE);
+       }
+
+       spin_unlock(&priv->imsg_ring[mbox].lock);
+}
+
+/**
+ * tsi721_open_inb_mbox - Initialize Tsi721 inbound mailbox
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the inbound mailbox ring
+ */
+static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
+                               int mbox, int entries)
+{
+       struct tsi721_device *priv = mport->priv;
+       int ch = mbox + 4;
+       int i;
+       u64 *free_ptr;
+       int rc = 0;
+
+       if ((entries < TSI721_IMSGD_MIN_RING_SIZE) ||
+           (entries > TSI721_IMSGD_RING_SIZE) ||
+           (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       /* Initialize IB Messaging Ring */
+       priv->imsg_ring[mbox].dev_id = dev_id;
+       priv->imsg_ring[mbox].size = entries;
+       priv->imsg_ring[mbox].rx_slot = 0;
+       priv->imsg_ring[mbox].desc_rdptr = 0;
+       priv->imsg_ring[mbox].fq_wrptr = 0;
+       for (i = 0; i < priv->imsg_ring[mbox].size; i++)
+               priv->imsg_ring[mbox].imq_base[i] = NULL;
+       spin_lock_init(&priv->imsg_ring[mbox].lock);
+
+       /* Allocate buffers for incoming messages */
+       priv->imsg_ring[mbox].buf_base =
+               dma_alloc_coherent(&priv->pdev->dev,
+                                  entries * TSI721_MSG_BUFFER_SIZE,
+                                  &priv->imsg_ring[mbox].buf_phys,
+                                  GFP_KERNEL);
+
+       if (priv->imsg_ring[mbox].buf_base == NULL) {
+               dev_err(&priv->pdev->dev,
+                       "Failed to allocate buffers for IB MBOX%d\n", mbox);
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       /* Allocate memory for circular free list */
+       priv->imsg_ring[mbox].imfq_base =
+               dma_alloc_coherent(&priv->pdev->dev,
+                                  entries * 8,
+                                  &priv->imsg_ring[mbox].imfq_phys,
+                                  GFP_KERNEL);
+
+       if (priv->imsg_ring[mbox].imfq_base == NULL) {
+               dev_err(&priv->pdev->dev,
+                       "Failed to allocate free queue for IB MBOX%d\n", mbox);
+               rc = -ENOMEM;
+               goto out_buf;
+       }
+
+       /* Allocate memory for Inbound message descriptors */
+       priv->imsg_ring[mbox].imd_base =
+               dma_alloc_coherent(&priv->pdev->dev,
+                                  entries * sizeof(struct tsi721_imsg_desc),
+                                  &priv->imsg_ring[mbox].imd_phys, GFP_KERNEL);
+
+       if (priv->imsg_ring[mbox].imd_base == NULL) {
+               dev_err(&priv->pdev->dev,
+                       "Failed to allocate descriptor memory for IB MBOX%d\n",
+                       mbox);
+               rc = -ENOMEM;
+               goto out_dma;
+       }
+
+       /* Fill free buffer pointer list */
+       free_ptr = priv->imsg_ring[mbox].imfq_base;
+       for (i = 0; i < entries; i++)
+               free_ptr[i] = cpu_to_le64(
+                               (u64)(priv->imsg_ring[mbox].buf_phys) +
+                               i * 0x1000);
+
+       mb();
+
+       /*
+        * For mapping of inbound SRIO Messages into appropriate queues we need
+        * to set Inbound Device ID register in the messaging engine. We do it
+        * once when first inbound mailbox is requested.
+        */
+       if (!(priv->flags & TSI721_IMSGID_SET)) {
+               iowrite32((u32)priv->mport->host_deviceid,
+                       priv->regs + TSI721_IB_DEVID);
+               priv->flags |= TSI721_IMSGID_SET;
+       }
+
+       /*
+        * Configure Inbound Messaging channel (ch = mbox + 4)
+        */
+
+       /* Setup Inbound Message free queue */
+       iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys >> 32),
+               priv->regs + TSI721_IBDMAC_FQBH(ch));
+       iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys &
+                       TSI721_IBDMAC_FQBL_MASK),
+               priv->regs+TSI721_IBDMAC_FQBL(ch));
+       iowrite32(TSI721_DMAC_DSSZ_SIZE(entries),
+               priv->regs + TSI721_IBDMAC_FQSZ(ch));
+
+       /* Setup Inbound Message descriptor queue */
+       iowrite32(((u64)priv->imsg_ring[mbox].imd_phys >> 32),
+               priv->regs + TSI721_IBDMAC_DQBH(ch));
+       iowrite32(((u32)priv->imsg_ring[mbox].imd_phys &
+                  (u32)TSI721_IBDMAC_DQBL_MASK),
+               priv->regs+TSI721_IBDMAC_DQBL(ch));
+       iowrite32(TSI721_DMAC_DSSZ_SIZE(entries),
+               priv->regs + TSI721_IBDMAC_DQSZ(ch));
+
+       /* Enable interrupts */
+
+#ifdef CONFIG_PCI_MSI
+       if (priv->flags & TSI721_USING_MSIX) {
+               /* Request interrupt service if we are in MSI-X mode */
+               rc = request_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
+                       tsi721_imsg_msix, 0,
+                       priv->msix[TSI721_VECT_IMB0_RCV + mbox].irq_name,
+                       (void *)mport);
+
+               if (rc) {
+                       dev_dbg(&priv->pdev->dev,
+                               "Unable to allocate MSI-X interrupt for "
+                               "IBOX%d-DONE\n", mbox);
+                       goto out_desc;
+               }
+
+               rc = request_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
+                       tsi721_imsg_msix, 0,
+                       priv->msix[TSI721_VECT_IMB0_INT + mbox].irq_name,
+                       (void *)mport);
+
+               if (rc) {
+                       dev_dbg(&priv->pdev->dev,
+                               "Unable to allocate MSI-X interrupt for "
+                               "IBOX%d-INT\n", mbox);
+                       free_irq(
+                               priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
+                               (void *)mport);
+                       goto out_desc;
+               }
+       }
+#endif /* CONFIG_PCI_MSI */
+
+       tsi721_imsg_interrupt_enable(priv, ch, TSI721_IBDMAC_INT_ALL);
+
+       /* Initialize Inbound Message Engine */
+       iowrite32(TSI721_IBDMAC_CTL_INIT, priv->regs + TSI721_IBDMAC_CTL(ch));
+       ioread32(priv->regs + TSI721_IBDMAC_CTL(ch));
+       udelay(10);
+       priv->imsg_ring[mbox].fq_wrptr = entries - 1;
+       iowrite32(entries - 1, priv->regs + TSI721_IBDMAC_FQWP(ch));
+
+       priv->imsg_init[mbox] = 1;
+       return 0;
+
+#ifdef CONFIG_PCI_MSI
+out_desc:
+       dma_free_coherent(&priv->pdev->dev,
+               priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc),
+               priv->imsg_ring[mbox].imd_base,
+               priv->imsg_ring[mbox].imd_phys);
+
+       priv->imsg_ring[mbox].imd_base = NULL;
+#endif /* CONFIG_PCI_MSI */
+
+out_dma:
+       dma_free_coherent(&priv->pdev->dev,
+               priv->imsg_ring[mbox].size * 8,
+               priv->imsg_ring[mbox].imfq_base,
+               priv->imsg_ring[mbox].imfq_phys);
+
+       priv->imsg_ring[mbox].imfq_base = NULL;
+
+out_buf:
+       dma_free_coherent(&priv->pdev->dev,
+               priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE,
+               priv->imsg_ring[mbox].buf_base,
+               priv->imsg_ring[mbox].buf_phys);
+
+       priv->imsg_ring[mbox].buf_base = NULL;
+
+out:
+       return rc;
+}
+
+/**
+ * tsi721_close_inb_mbox - Shut down Tsi721 inbound mailbox
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Mailbox to close
+ */
+static void tsi721_close_inb_mbox(struct rio_mport *mport, int mbox)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 rx_slot;
+       int ch = mbox + 4;
+
+       if (!priv->imsg_init[mbox]) /* mbox isn't initialized yet */
+               return;
+       priv->imsg_init[mbox] = 0;
+
+       /* Disable Inbound Messaging Engine */
+
+       /* Disable Interrupts */
+       tsi721_imsg_interrupt_disable(priv, ch, TSI721_OBDMAC_INT_MASK);
+
+#ifdef CONFIG_PCI_MSI
+       if (priv->flags & TSI721_USING_MSIX) {
+               free_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
+                               (void *)mport);
+               free_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
+                               (void *)mport);
+       }
+#endif /* CONFIG_PCI_MSI */
+
+       /* Clear Inbound Buffer Queue */
+       for (rx_slot = 0; rx_slot < priv->imsg_ring[mbox].size; rx_slot++)
+               priv->imsg_ring[mbox].imq_base[rx_slot] = NULL;
+
+       /* Free memory allocated for message buffers */
+       dma_free_coherent(&priv->pdev->dev,
+               priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE,
+               priv->imsg_ring[mbox].buf_base,
+               priv->imsg_ring[mbox].buf_phys);
+
+       priv->imsg_ring[mbox].buf_base = NULL;
+
+       /* Free memory allocated for free pointr list */
+       dma_free_coherent(&priv->pdev->dev,
+               priv->imsg_ring[mbox].size * 8,
+               priv->imsg_ring[mbox].imfq_base,
+               priv->imsg_ring[mbox].imfq_phys);
+
+       priv->imsg_ring[mbox].imfq_base = NULL;
+
+       /* Free memory allocated for RX descriptors */
+       dma_free_coherent(&priv->pdev->dev,
+               priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc),
+               priv->imsg_ring[mbox].imd_base,
+               priv->imsg_ring[mbox].imd_phys);
+
+       priv->imsg_ring[mbox].imd_base = NULL;
+}
+
+/**
+ * tsi721_add_inb_buffer - Add buffer to the Tsi721 inbound message queue
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Inbound mailbox number
+ * @buf: Buffer to add to inbound queue
+ */
+static int tsi721_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
+{
+       struct tsi721_device *priv = mport->priv;
+       u32 rx_slot;
+       int rc = 0;
+
+       rx_slot = priv->imsg_ring[mbox].rx_slot;
+       if (priv->imsg_ring[mbox].imq_base[rx_slot]) {
+               dev_err(&priv->pdev->dev,
+                       "Error adding inbound buffer %d, buffer exists\n",
+                       rx_slot);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       priv->imsg_ring[mbox].imq_base[rx_slot] = buf;
+
+       if (++priv->imsg_ring[mbox].rx_slot == priv->imsg_ring[mbox].size)
+               priv->imsg_ring[mbox].rx_slot = 0;
+
+out:
+       return rc;
+}
+
+/**
+ * tsi721_get_inb_message - Fetch inbound message from the Tsi721 MSG Queue
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Inbound mailbox number
+ *
+ * Returns pointer to the message on success or NULL on failure.
+ */
+static void *tsi721_get_inb_message(struct rio_mport *mport, int mbox)
+{
+       struct tsi721_device *priv = mport->priv;
+       struct tsi721_imsg_desc *desc;
+       u32 rx_slot;
+       void *rx_virt = NULL;
+       u64 rx_phys;
+       void *buf = NULL;
+       u64 *free_ptr;
+       int ch = mbox + 4;
+       int msg_size;
+
+       if (!priv->imsg_init[mbox])
+               return NULL;
+
+       desc = priv->imsg_ring[mbox].imd_base;
+       desc += priv->imsg_ring[mbox].desc_rdptr;
+
+       if (!(le32_to_cpu(desc->msg_info) & TSI721_IMD_HO))
+               goto out;
+
+       rx_slot = priv->imsg_ring[mbox].rx_slot;
+       while (priv->imsg_ring[mbox].imq_base[rx_slot] == NULL) {
+               if (++rx_slot == priv->imsg_ring[mbox].size)
+                       rx_slot = 0;
+       }
+
+       rx_phys = ((u64)le32_to_cpu(desc->bufptr_hi) << 32) |
+                       le32_to_cpu(desc->bufptr_lo);
+
+       rx_virt = priv->imsg_ring[mbox].buf_base +
+                 (rx_phys - (u64)priv->imsg_ring[mbox].buf_phys);
+
+       buf = priv->imsg_ring[mbox].imq_base[rx_slot];
+       msg_size = le32_to_cpu(desc->msg_info) & TSI721_IMD_BCOUNT;
+       if (msg_size == 0)
+               msg_size = RIO_MAX_MSG_SIZE;
+
+       memcpy(buf, rx_virt, msg_size);
+       priv->imsg_ring[mbox].imq_base[rx_slot] = NULL;
+
+       desc->msg_info &= cpu_to_le32(~TSI721_IMD_HO);
+       if (++priv->imsg_ring[mbox].desc_rdptr == priv->imsg_ring[mbox].size)
+               priv->imsg_ring[mbox].desc_rdptr = 0;
+
+       iowrite32(priv->imsg_ring[mbox].desc_rdptr,
+               priv->regs + TSI721_IBDMAC_DQRP(ch));
+
+       /* Return free buffer into the pointer list */
+       free_ptr = priv->imsg_ring[mbox].imfq_base;
+       free_ptr[priv->imsg_ring[mbox].fq_wrptr] = cpu_to_le64(rx_phys);
+
+       if (++priv->imsg_ring[mbox].fq_wrptr == priv->imsg_ring[mbox].size)
+               priv->imsg_ring[mbox].fq_wrptr = 0;
+
+       iowrite32(priv->imsg_ring[mbox].fq_wrptr,
+               priv->regs + TSI721_IBDMAC_FQWP(ch));
+out:
+       return buf;
+}
+
+/**
+ * tsi721_messages_init - Initialization of Messaging Engine
+ * @priv: pointer to tsi721 private data
+ *
+ * Configures Tsi721 messaging engine.
+ */
+static int tsi721_messages_init(struct tsi721_device *priv)
+{
+       int     ch;
+
+       iowrite32(0, priv->regs + TSI721_SMSG_ECC_LOG);
+       iowrite32(0, priv->regs + TSI721_RETRY_GEN_CNT);
+       iowrite32(0, priv->regs + TSI721_RETRY_RX_CNT);
+
+       /* Set SRIO Message Request/Response Timeout */
+       iowrite32(TSI721_RQRPTO_VAL, priv->regs + TSI721_RQRPTO);
+
+       /* Initialize Inbound Messaging Engine Registers */
+       for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++) {
+               /* Clear interrupt bits */
+               iowrite32(TSI721_IBDMAC_INT_MASK,
+                       priv->regs + TSI721_IBDMAC_INT(ch));
+               /* Clear Status */
+               iowrite32(0, priv->regs + TSI721_IBDMAC_STS(ch));
+
+               iowrite32(TSI721_SMSG_ECC_COR_LOG_MASK,
+                               priv->regs + TSI721_SMSG_ECC_COR_LOG(ch));
+               iowrite32(TSI721_SMSG_ECC_NCOR_MASK,
+                               priv->regs + TSI721_SMSG_ECC_NCOR(ch));
+       }
+
+       return 0;
+}
+
+/**
+ * tsi721_disable_ints - disables all device interrupts
+ * @priv: pointer to tsi721 private data
+ */
+static void tsi721_disable_ints(struct tsi721_device *priv)
+{
+       int ch;
+
+       /* Disable all device level interrupts */
+       iowrite32(0, priv->regs + TSI721_DEV_INTE);
+
+       /* Disable all Device Channel interrupts */
+       iowrite32(0, priv->regs + TSI721_DEV_CHAN_INTE);
+
+       /* Disable all Inbound Msg Channel interrupts */
+       for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++)
+               iowrite32(0, priv->regs + TSI721_IBDMAC_INTE(ch));
+
+       /* Disable all Outbound Msg Channel interrupts */
+       for (ch = 0; ch < TSI721_OMSG_CHNUM; ch++)
+               iowrite32(0, priv->regs + TSI721_OBDMAC_INTE(ch));
+
+       /* Disable all general messaging interrupts */
+       iowrite32(0, priv->regs + TSI721_SMSG_INTE);
+
+       /* Disable all BDMA Channel interrupts */
+       for (ch = 0; ch < TSI721_DMA_MAXCH; ch++)
+               iowrite32(0, priv->regs + TSI721_DMAC_INTE(ch));
+
+       /* Disable all general BDMA interrupts */
+       iowrite32(0, priv->regs + TSI721_BDMA_INTE);
+
+       /* Disable all SRIO Channel interrupts */
+       for (ch = 0; ch < TSI721_SRIO_MAXCH; ch++)
+               iowrite32(0, priv->regs + TSI721_SR_CHINTE(ch));
+
+       /* Disable all general SR2PC interrupts */
+       iowrite32(0, priv->regs + TSI721_SR2PC_GEN_INTE);
+
+       /* Disable all PC2SR interrupts */
+       iowrite32(0, priv->regs + TSI721_PC2SR_INTE);
+
+       /* Disable all I2C interrupts */
+       iowrite32(0, priv->regs + TSI721_I2C_INT_ENABLE);
+
+       /* Disable SRIO MAC interrupts */
+       iowrite32(0, priv->regs + TSI721_RIO_EM_INT_ENABLE);
+       iowrite32(0, priv->regs + TSI721_RIO_EM_DEV_INT_EN);
+}
+
+/**
+ * tsi721_setup_mport - Setup Tsi721 as RapidIO subsystem master port
+ * @priv: pointer to tsi721 private data
+ *
+ * Configures Tsi721 as RapidIO master port.
+ */
+static int __devinit tsi721_setup_mport(struct tsi721_device *priv)
+{
+       struct pci_dev *pdev = priv->pdev;
+       int err = 0;
+       struct rio_ops *ops;
+
+       struct rio_mport *mport;
+
+       ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
+       if (!ops) {
+               dev_dbg(&pdev->dev, "Unable to allocate memory for rio_ops\n");
+               return -ENOMEM;
+       }
+
+       ops->lcread = tsi721_lcread;
+       ops->lcwrite = tsi721_lcwrite;
+       ops->cread = tsi721_cread_dma;
+       ops->cwrite = tsi721_cwrite_dma;
+       ops->dsend = tsi721_dsend;
+       ops->open_inb_mbox = tsi721_open_inb_mbox;
+       ops->close_inb_mbox = tsi721_close_inb_mbox;
+       ops->open_outb_mbox = tsi721_open_outb_mbox;
+       ops->close_outb_mbox = tsi721_close_outb_mbox;
+       ops->add_outb_message = tsi721_add_outb_message;
+       ops->add_inb_buffer = tsi721_add_inb_buffer;
+       ops->get_inb_message = tsi721_get_inb_message;
+
+       mport = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
+       if (!mport) {
+               kfree(ops);
+               dev_dbg(&pdev->dev, "Unable to allocate memory for mport\n");
+               return -ENOMEM;
+       }
+
+       mport->ops = ops;
+       mport->index = 0;
+       mport->sys_size = 0; /* small system */
+       mport->phy_type = RIO_PHY_SERIAL;
+       mport->priv = (void *)priv;
+       mport->phys_efptr = 0x100;
+
+       INIT_LIST_HEAD(&mport->dbells);
+
+       rio_init_dbell_res(&mport->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
+       rio_init_mbox_res(&mport->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
+       rio_init_mbox_res(&mport->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0);
+       strcpy(mport->name, "Tsi721 mport");
+
+       /* Hook up interrupt handler */
+
+#ifdef CONFIG_PCI_MSI
+       if (!tsi721_enable_msix(priv))
+               priv->flags |= TSI721_USING_MSIX;
+       else if (!pci_enable_msi(pdev))
+               priv->flags |= TSI721_USING_MSI;
+       else
+               dev_info(&pdev->dev,
+                        "MSI/MSI-X is not available. Using legacy INTx.\n");
+#endif /* CONFIG_PCI_MSI */
+
+       err = tsi721_request_irq(mport);
+
+       if (!err) {
+               tsi721_interrupts_init(priv);
+               ops->pwenable = tsi721_pw_enable;
+       } else
+               dev_err(&pdev->dev, "Unable to get assigned PCI IRQ "
+                       "vector %02X err=0x%x\n", pdev->irq, err);
+
+       /* Enable SRIO link */
+       iowrite32(ioread32(priv->regs + TSI721_DEVCTL) |
+                 TSI721_DEVCTL_SRBOOT_CMPL,
+                 priv->regs + TSI721_DEVCTL);
+
+       rio_register_mport(mport);
+       priv->mport = mport;
+
+       if (mport->host_deviceid >= 0)
+               iowrite32(RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER |
+                         RIO_PORT_GEN_DISCOVERED,
+                         priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
+       else
+               iowrite32(0, priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
+
+       return 0;
+}
+
+static int __devinit tsi721_probe(struct pci_dev *pdev,
+                                 const struct pci_device_id *id)
+{
+       struct tsi721_device *priv;
+       int i;
+       int err;
+       u32 regval;
+
+       priv = kzalloc(sizeof(struct tsi721_device), GFP_KERNEL);
+       if (priv == NULL) {
+               dev_err(&pdev->dev, "Failed to allocate memory for device\n");
+               err = -ENOMEM;
+               goto err_exit;
+       }
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to enable PCI device\n");
+               goto err_clean;
+       }
+
+       priv->pdev = pdev;
+
+#ifdef DEBUG
+       for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+               dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
+                       i, (unsigned long long)pci_resource_start(pdev, i),
+                       (unsigned long)pci_resource_len(pdev, i),
+                       pci_resource_flags(pdev, i));
+       }
+#endif
+       /*
+        * Verify BAR configuration
+        */
+
+       /* BAR_0 (registers) must be 512KB+ in 32-bit address space */
+       if (!(pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM) ||
+           pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM_64 ||
+           pci_resource_len(pdev, BAR_0) < TSI721_REG_SPACE_SIZE) {
+               dev_err(&pdev->dev,
+                       "Missing or misconfigured CSR BAR0, aborting.\n");
+               err = -ENODEV;
+               goto err_disable_pdev;
+       }
+
+       /* BAR_1 (outbound doorbells) must be 16MB+ in 32-bit address space */
+       if (!(pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM) ||
+           pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM_64 ||
+           pci_resource_len(pdev, BAR_1) < TSI721_DB_WIN_SIZE) {
+               dev_err(&pdev->dev,
+                       "Missing or misconfigured Doorbell BAR1, aborting.\n");
+               err = -ENODEV;
+               goto err_disable_pdev;
+       }
+
+       /*
+        * BAR_2 and BAR_4 (outbound translation) must be in 64-bit PCIe address
+        * space.
+        * NOTE: BAR_2 and BAR_4 are not used by this version of driver.
+        * It may be a good idea to keep them disabled using HW configuration
+        * to save PCI memory space.
+        */
+       if ((pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM) &&
+           (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64)) {
+               dev_info(&pdev->dev, "Outbound BAR2 is not used but enabled.\n");
+       }
+
+       if ((pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM) &&
+           (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64)) {
+               dev_info(&pdev->dev, "Outbound BAR4 is not used but enabled.\n");
+       }
+
+       err = pci_request_regions(pdev, DRV_NAME);
+       if (err) {
+               dev_err(&pdev->dev, "Cannot obtain PCI resources, "
+                       "aborting.\n");
+               goto err_disable_pdev;
+       }
+
+       pci_set_master(pdev);
+
+       priv->regs = pci_ioremap_bar(pdev, BAR_0);
+       if (!priv->regs) {
+               dev_err(&pdev->dev,
+                       "Unable to map device registers space, aborting\n");
+               err = -ENOMEM;
+               goto err_free_res;
+       }
+
+       priv->odb_base = pci_ioremap_bar(pdev, BAR_1);
+       if (!priv->odb_base) {
+               dev_err(&pdev->dev,
+                       "Unable to map outbound doorbells space, aborting\n");
+               err = -ENOMEM;
+               goto err_unmap_bars;
+       }
+
+       /* Configure DMA attributes. */
+       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+                       dev_info(&pdev->dev, "Unable to set DMA mask\n");
+                       goto err_unmap_bars;
+               }
+
+               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
+                       dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+       } else {
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+               if (err)
+                       dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+       }
+
+       /* Clear "no snoop" and "relaxed ordering" bits. */
+       pci_read_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, &regval);
+       regval &= ~(PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN);
+       pci_write_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, regval);
+
+       /*
+        * FIXUP: correct offsets of MSI-X tables in the MSI-X Capability Block
+        */
+       pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0x01);
+       pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXTBL,
+                                               TSI721_MSIXTBL_OFFSET);
+       pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXPBA,
+                                               TSI721_MSIXPBA_OFFSET);
+       pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0);
+       /* End of FIXUP */
+
+       tsi721_disable_ints(priv);
+
+       tsi721_init_pc2sr_mapping(priv);
+       tsi721_init_sr2pc_mapping(priv);
+
+       if (tsi721_bdma_init(priv)) {
+               dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
+               err = -ENOMEM;
+               goto err_unmap_bars;
+       }
+
+       err = tsi721_doorbell_init(priv);
+       if (err)
+               goto err_free_bdma;
+
+       tsi721_port_write_init(priv);
+
+       err = tsi721_messages_init(priv);
+       if (err)
+               goto err_free_consistent;
+
+       err = tsi721_setup_mport(priv);
+       if (err)
+               goto err_free_consistent;
+
+       return 0;
+
+err_free_consistent:
+       tsi721_doorbell_free(priv);
+err_free_bdma:
+       tsi721_bdma_free(priv);
+err_unmap_bars:
+       if (priv->regs)
+               iounmap(priv->regs);
+       if (priv->odb_base)
+               iounmap(priv->odb_base);
+err_free_res:
+       pci_release_regions(pdev);
+       pci_clear_master(pdev);
+err_disable_pdev:
+       pci_disable_device(pdev);
+err_clean:
+       kfree(priv);
+err_exit:
+       return err;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(tsi721_pci_tbl) = {
+       { PCI_DEVICE(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_TSI721) },
+       { 0, }  /* terminate list */
+};
+
+MODULE_DEVICE_TABLE(pci, tsi721_pci_tbl);
+
+static struct pci_driver tsi721_driver = {
+       .name           = "tsi721",
+       .id_table       = tsi721_pci_tbl,
+       .probe          = tsi721_probe,
+};
+
+static int __init tsi721_init(void)
+{
+       return pci_register_driver(&tsi721_driver);
+}
+
+static void __exit tsi721_exit(void)
+{
+       pci_unregister_driver(&tsi721_driver);
+}
+
+device_initcall(tsi721_init);
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
new file mode 100644 (file)
index 0000000..58be4de
--- /dev/null
@@ -0,0 +1,766 @@
+/*
+ * Tsi721 PCIExpress-to-SRIO bridge definitions
+ *
+ * Copyright 2011, Integrated Device Technology, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#ifndef __TSI721_H
+#define __TSI721_H
+
+#define DRV_NAME       "tsi721"
+
+#define DEFAULT_HOPCOUNT       0xff
+#define DEFAULT_DESTID         0xff
+
+/* PCI device ID */
+#define PCI_DEVICE_ID_TSI721           0x80ab
+
+#define BAR_0  0
+#define BAR_1  1
+#define BAR_2  2
+#define BAR_4  4
+
+#define TSI721_PC2SR_BARS      2
+#define TSI721_PC2SR_WINS      8
+#define TSI721_PC2SR_ZONES     8
+#define TSI721_MAINT_WIN       0 /* Window for outbound maintenance requests */
+#define IDB_QUEUE              0 /* Inbound Doorbell Queue to use */
+#define IDB_QSIZE              512 /* Inbound Doorbell Queue size */
+
+/* Memory space sizes */
+#define TSI721_REG_SPACE_SIZE          (512 * 1024) /* 512K */
+#define TSI721_DB_WIN_SIZE             (16 * 1024 * 1024) /* 16MB */
+
+#define  RIO_TT_CODE_8         0x00000000
+#define  RIO_TT_CODE_16                0x00000001
+
+#define TSI721_DMA_MAXCH       8
+#define TSI721_DMA_MINSTSSZ    32
+#define TSI721_DMA_STSBLKSZ    8
+
+#define TSI721_SRIO_MAXCH      8
+
+#define DBELL_SID(buf)         (((u8)buf[2] << 8) | (u8)buf[3])
+#define DBELL_TID(buf)         (((u8)buf[4] << 8) | (u8)buf[5])
+#define DBELL_INF(buf)         (((u8)buf[0] << 8) | (u8)buf[1])
+
+#define TSI721_RIO_PW_MSG_SIZE 16  /* Tsi721 saves only 16 bytes of PW msg */
+
+/* Register definitions */
+
+/*
+ * Registers in PCIe configuration space
+ */
+
+#define TSI721_PCIECFG_MSIXTBL 0x0a4
+#define TSI721_MSIXTBL_OFFSET  0x2c000
+#define TSI721_PCIECFG_MSIXPBA 0x0a8
+#define TSI721_MSIXPBA_OFFSET  0x2a000
+#define TSI721_PCIECFG_EPCTL   0x400
+
+/*
+ * Event Management Registers
+ */
+
+#define TSI721_RIO_EM_INT_STAT         0x10910
+#define TSI721_RIO_EM_INT_STAT_PW_RX   0x00010000
+
+#define TSI721_RIO_EM_INT_ENABLE       0x10914
+#define TSI721_RIO_EM_INT_ENABLE_PW_RX 0x00010000
+
+#define TSI721_RIO_EM_DEV_INT_EN       0x10930
+#define TSI721_RIO_EM_DEV_INT_EN_INT   0x00000001
+
+/*
+ * Port-Write Block Registers
+ */
+
+#define TSI721_RIO_PW_CTL              0x10a04
+#define TSI721_RIO_PW_CTL_PW_TIMER     0xf0000000
+#define TSI721_RIO_PW_CTL_PWT_DIS      (0 << 28)
+#define TSI721_RIO_PW_CTL_PWT_103      (1 << 28)
+#define TSI721_RIO_PW_CTL_PWT_205      (1 << 29)
+#define TSI721_RIO_PW_CTL_PWT_410      (1 << 30)
+#define TSI721_RIO_PW_CTL_PWT_820      (1 << 31)
+#define TSI721_RIO_PW_CTL_PWC_MODE     0x01000000
+#define TSI721_RIO_PW_CTL_PWC_CONT     0x00000000
+#define TSI721_RIO_PW_CTL_PWC_REL      0x01000000
+
+#define TSI721_RIO_PW_RX_STAT          0x10a10
+#define TSI721_RIO_PW_RX_STAT_WR_SIZE  0x0000f000
+#define TSI_RIO_PW_RX_STAT_WDPTR       0x00000100
+#define TSI721_RIO_PW_RX_STAT_PW_SHORT 0x00000008
+#define TSI721_RIO_PW_RX_STAT_PW_TRUNC 0x00000004
+#define TSI721_RIO_PW_RX_STAT_PW_DISC  0x00000002
+#define TSI721_RIO_PW_RX_STAT_PW_VAL   0x00000001
+
+#define TSI721_RIO_PW_RX_CAPT(x)       (0x10a20 + (x)*4)
+
+/*
+ * Inbound Doorbells
+ */
+
+#define TSI721_IDB_ENTRY_SIZE  64
+
+#define TSI721_IDQ_CTL(x)      (0x20000 + (x) * 1000)
+#define TSI721_IDQ_SUSPEND     0x00000002
+#define TSI721_IDQ_INIT                0x00000001
+
+#define TSI721_IDQ_STS(x)      (0x20004 + (x) * 1000)
+#define TSI721_IDQ_RUN         0x00200000
+
+#define TSI721_IDQ_MASK(x)     (0x20008 + (x) * 1000)
+#define TSI721_IDQ_MASK_MASK   0xffff0000
+#define TSI721_IDQ_MASK_PATT   0x0000ffff
+
+#define TSI721_IDQ_RP(x)       (0x2000c + (x) * 1000)
+#define TSI721_IDQ_RP_PTR      0x0007ffff
+
+#define TSI721_IDQ_WP(x)       (0x20010 + (x) * 1000)
+#define TSI721_IDQ_WP_PTR      0x0007ffff
+
+#define TSI721_IDQ_BASEL(x)    (0x20014 + (x) * 1000)
+#define TSI721_IDQ_BASEL_ADDR  0xffffffc0
+#define TSI721_IDQ_BASEU(x)    (0x20018 + (x) * 1000)
+#define TSI721_IDQ_SIZE(x)     (0x2001c + (x) * 1000)
+#define TSI721_IDQ_SIZE_VAL(size)      (__fls(size) - 4)
+#define TSI721_IDQ_SIZE_MIN    512
+#define TSI721_IDQ_SIZE_MAX    (512 * 1024)
+
+#define TSI721_SR_CHINT(x)     (0x20040 + (x) * 1000)
+#define TSI721_SR_CHINTE(x)    (0x20044 + (x) * 1000)
+#define TSI721_SR_CHINTSET(x)  (0x20048 + (x) * 1000)
+#define TSI721_SR_CHINT_ODBOK  0x00000020
+#define TSI721_SR_CHINT_IDBQRCV        0x00000010
+#define TSI721_SR_CHINT_SUSP   0x00000008
+#define TSI721_SR_CHINT_ODBTO  0x00000004
+#define TSI721_SR_CHINT_ODBRTRY        0x00000002
+#define TSI721_SR_CHINT_ODBERR 0x00000001
+#define TSI721_SR_CHINT_ALL    0x0000003f
+
+#define TSI721_IBWIN_NUM       8
+
+#define TSI721_IBWINLB(x)      (0x29000 + (x) * 20)
+#define TSI721_IBWINLB_BA      0xfffff000
+#define TSI721_IBWINLB_WEN     0x00000001
+
+#define TSI721_SR2PC_GEN_INTE  0x29800
+#define TSI721_SR2PC_PWE       0x29804
+#define TSI721_SR2PC_GEN_INT   0x29808
+
+#define TSI721_DEV_INTE                0x29840
+#define TSI721_DEV_INT         0x29844
+#define TSI721_DEV_INTSET      0x29848
+#define TSI721_DEV_INT_SMSG_CH 0x00000800
+#define TSI721_DEV_INT_SMSG_NCH        0x00000400
+#define TSI721_DEV_INT_SR2PC_CH        0x00000200
+#define TSI721_DEV_INT_SRIO    0x00000020
+
+#define TSI721_DEV_CHAN_INTE   0x2984c
+#define TSI721_DEV_CHAN_INT    0x29850
+
+#define TSI721_INT_SR2PC_CHAN_M        0xff000000
+#define TSI721_INT_SR2PC_CHAN(x) (1 << (24 + (x)))
+#define TSI721_INT_IMSG_CHAN_M 0x00ff0000
+#define TSI721_INT_IMSG_CHAN(x)        (1 << (16 + (x)))
+#define TSI721_INT_OMSG_CHAN_M 0x0000ff00
+#define TSI721_INT_OMSG_CHAN(x)        (1 << (8 + (x)))
+
+/*
+ * PC2SR block registers
+ */
+#define TSI721_OBWIN_NUM       TSI721_PC2SR_WINS
+
+#define TSI721_OBWINLB(x)      (0x40000 + (x) * 20)
+#define TSI721_OBWINLB_BA      0xffff8000
+#define TSI721_OBWINLB_WEN     0x00000001
+
+#define TSI721_OBWINUB(x)      (0x40004 + (x) * 20)
+
+#define TSI721_OBWINSZ(x)      (0x40008 + (x) * 20)
+#define TSI721_OBWINSZ_SIZE    0x00001f00
+#define TSI721_OBWIN_SIZE(size)        (__fls(size) - 15)
+
+#define TSI721_ZONE_SEL                0x41300
+#define TSI721_ZONE_SEL_RD_WRB 0x00020000
+#define TSI721_ZONE_SEL_GO     0x00010000
+#define TSI721_ZONE_SEL_WIN    0x00000038
+#define TSI721_ZONE_SEL_ZONE   0x00000007
+
+#define TSI721_LUT_DATA0       0x41304
+#define TSI721_LUT_DATA0_ADD   0xfffff000
+#define TSI721_LUT_DATA0_RDTYPE        0x00000f00
+#define TSI721_LUT_DATA0_NREAD 0x00000100
+#define TSI721_LUT_DATA0_MNTRD 0x00000200
+#define TSI721_LUT_DATA0_RDCRF 0x00000020
+#define TSI721_LUT_DATA0_WRCRF 0x00000010
+#define TSI721_LUT_DATA0_WRTYPE        0x0000000f
+#define TSI721_LUT_DATA0_NWR   0x00000001
+#define TSI721_LUT_DATA0_MNTWR 0x00000002
+#define TSI721_LUT_DATA0_NWR_R 0x00000004
+
+#define TSI721_LUT_DATA1       0x41308
+
+#define TSI721_LUT_DATA2       0x4130c
+#define TSI721_LUT_DATA2_HC    0xff000000
+#define TSI721_LUT_DATA2_ADD65 0x000c0000
+#define TSI721_LUT_DATA2_TT    0x00030000
+#define TSI721_LUT_DATA2_DSTID 0x0000ffff
+
+#define TSI721_PC2SR_INTE      0x41310
+
+#define TSI721_DEVCTL          0x48004
+#define TSI721_DEVCTL_SRBOOT_CMPL      0x00000004
+
+#define TSI721_I2C_INT_ENABLE  0x49120
+
+/*
+ * Block DMA Engine Registers
+ *   x = 0..7
+ */
+
+#define TSI721_DMAC_DWRCNT(x)  (0x51000 + (x) * 0x1000)
+#define TSI721_DMAC_DRDCNT(x)  (0x51004 + (x) * 0x1000)
+
+#define TSI721_DMAC_CTL(x)     (0x51008 + (x) * 0x1000)
+#define TSI721_DMAC_CTL_SUSP   0x00000002
+#define TSI721_DMAC_CTL_INIT   0x00000001
+
+#define TSI721_DMAC_INT(x)     (0x5100c + (x) * 0x1000)
+#define TSI721_DMAC_INT_STFULL 0x00000010
+#define TSI721_DMAC_INT_DONE   0x00000008
+#define TSI721_DMAC_INT_SUSP   0x00000004
+#define TSI721_DMAC_INT_ERR    0x00000002
+#define TSI721_DMAC_INT_IOFDONE        0x00000001
+#define TSI721_DMAC_INT_ALL    0x0000001f
+
+#define TSI721_DMAC_INTSET(x)  (0x51010 + (x) * 0x1000)
+
+#define TSI721_DMAC_STS(x)     (0x51014 + (x) * 0x1000)
+#define TSI721_DMAC_STS_ABORT  0x00400000
+#define TSI721_DMAC_STS_RUN    0x00200000
+#define TSI721_DMAC_STS_CS     0x001f0000
+
+#define TSI721_DMAC_INTE(x)    (0x51018 + (x) * 0x1000)
+
+#define TSI721_DMAC_DPTRL(x)   (0x51024 + (x) * 0x1000)
+#define TSI721_DMAC_DPTRL_MASK 0xffffffe0
+
+#define TSI721_DMAC_DPTRH(x)   (0x51028 + (x) * 0x1000)
+
+#define TSI721_DMAC_DSBL(x)    (0x5102c + (x) * 0x1000)
+#define TSI721_DMAC_DSBL_MASK  0xffffffc0
+
+#define TSI721_DMAC_DSBH(x)    (0x51030 + (x) * 0x1000)
+
+#define TSI721_DMAC_DSSZ(x)    (0x51034 + (x) * 0x1000)
+#define TSI721_DMAC_DSSZ_SIZE_M        0x0000000f
+#define TSI721_DMAC_DSSZ_SIZE(size)    (__fls(size) - 4)
+
+
+#define TSI721_DMAC_DSRP(x)    (0x51038 + (x) * 0x1000)
+#define TSI721_DMAC_DSRP_MASK  0x0007ffff
+
+#define TSI721_DMAC_DSWP(x)    (0x5103c + (x) * 0x1000)
+#define TSI721_DMAC_DSWP_MASK  0x0007ffff
+
+#define TSI721_BDMA_INTE       0x5f000
+
+/*
+ * Messaging definitions
+ */
+#define TSI721_MSG_BUFFER_SIZE         RIO_MAX_MSG_SIZE
+#define TSI721_MSG_MAX_SIZE            RIO_MAX_MSG_SIZE
+#define TSI721_IMSG_MAXCH              8
+#define TSI721_IMSG_CHNUM              TSI721_IMSG_MAXCH
+#define TSI721_IMSGD_MIN_RING_SIZE     32
+#define TSI721_IMSGD_RING_SIZE         512
+
+#define TSI721_OMSG_CHNUM              4 /* One channel per MBOX */
+#define TSI721_OMSGD_MIN_RING_SIZE     32
+#define TSI721_OMSGD_RING_SIZE         512
+
+/*
+ * Outbound Messaging Engine Registers
+ *   x = 0..7
+ */
+
+#define TSI721_OBDMAC_DWRCNT(x)                (0x61000 + (x) * 0x1000)
+
+#define TSI721_OBDMAC_DRDCNT(x)                (0x61004 + (x) * 0x1000)
+
+#define TSI721_OBDMAC_CTL(x)           (0x61008 + (x) * 0x1000)
+#define TSI721_OBDMAC_CTL_MASK         0x00000007
+#define TSI721_OBDMAC_CTL_RETRY_THR    0x00000004
+#define TSI721_OBDMAC_CTL_SUSPEND      0x00000002
+#define TSI721_OBDMAC_CTL_INIT         0x00000001
+
+#define TSI721_OBDMAC_INT(x)           (0x6100c + (x) * 0x1000)
+#define TSI721_OBDMAC_INTSET(x)                (0x61010 + (x) * 0x1000)
+#define TSI721_OBDMAC_INTE(x)          (0x61018 + (x) * 0x1000)
+#define TSI721_OBDMAC_INT_MASK         0x0000001F
+#define TSI721_OBDMAC_INT_ST_FULL      0x00000010
+#define TSI721_OBDMAC_INT_DONE         0x00000008
+#define TSI721_OBDMAC_INT_SUSPENDED    0x00000004
+#define TSI721_OBDMAC_INT_ERROR                0x00000002
+#define TSI721_OBDMAC_INT_IOF_DONE     0x00000001
+#define TSI721_OBDMAC_INT_ALL          TSI721_OBDMAC_INT_MASK
+
+#define TSI721_OBDMAC_STS(x)           (0x61014 + (x) * 0x1000)
+#define TSI721_OBDMAC_STS_MASK         0x007f0000
+#define TSI721_OBDMAC_STS_ABORT                0x00400000
+#define TSI721_OBDMAC_STS_RUN          0x00200000
+#define TSI721_OBDMAC_STS_CS           0x001f0000
+
+#define TSI721_OBDMAC_PWE(x)           (0x6101c + (x) * 0x1000)
+#define TSI721_OBDMAC_PWE_MASK         0x00000002
+#define TSI721_OBDMAC_PWE_ERROR_EN     0x00000002
+
+#define TSI721_OBDMAC_DPTRL(x)         (0x61020 + (x) * 0x1000)
+#define TSI721_OBDMAC_DPTRL_MASK       0xfffffff0
+
+#define TSI721_OBDMAC_DPTRH(x)         (0x61024 + (x) * 0x1000)
+#define TSI721_OBDMAC_DPTRH_MASK       0xffffffff
+
+#define TSI721_OBDMAC_DSBL(x)          (0x61040 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSBL_MASK                0xffffffc0
+
+#define TSI721_OBDMAC_DSBH(x)          (0x61044 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSBH_MASK                0xffffffff
+
+#define TSI721_OBDMAC_DSSZ(x)          (0x61048 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSSZ_MASK                0x0000000f
+
+#define TSI721_OBDMAC_DSRP(x)          (0x6104c + (x) * 0x1000)
+#define TSI721_OBDMAC_DSRP_MASK                0x0007ffff
+
+#define TSI721_OBDMAC_DSWP(x)          (0x61050 + (x) * 0x1000)
+#define TSI721_OBDMAC_DSWP_MASK                0x0007ffff
+
+#define TSI721_RQRPTO                  0x60010
+#define TSI721_RQRPTO_MASK             0x00ffffff
+#define TSI721_RQRPTO_VAL              400     /* Response TO value */
+
+/*
+ * Inbound Messaging Engine Registers
+ *   x = 0..7
+ */
+
+#define TSI721_IB_DEVID_GLOBAL         0xffff
+#define TSI721_IBDMAC_FQBL(x)          (0x61200 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQBL_MASK                0xffffffc0
+
+#define TSI721_IBDMAC_FQBH(x)          (0x61204 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQBH_MASK                0xffffffff
+
+#define TSI721_IBDMAC_FQSZ_ENTRY_INX   TSI721_IMSGD_RING_SIZE
+#define TSI721_IBDMAC_FQSZ(x)          (0x61208 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQSZ_MASK                0x0000000f
+
+#define TSI721_IBDMAC_FQRP(x)          (0x6120c + (x) * 0x1000)
+#define TSI721_IBDMAC_FQRP_MASK                0x0007ffff
+
+#define TSI721_IBDMAC_FQWP(x)          (0x61210 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQWP_MASK                0x0007ffff
+
+#define TSI721_IBDMAC_FQTH(x)          (0x61214 + (x) * 0x1000)
+#define TSI721_IBDMAC_FQTH_MASK                0x0007ffff
+
+#define TSI721_IB_DEVID                        0x60020
+#define TSI721_IB_DEVID_MASK           0x0000ffff
+
+#define TSI721_IBDMAC_CTL(x)           (0x61240 + (x) * 0x1000)
+#define TSI721_IBDMAC_CTL_MASK         0x00000003
+#define TSI721_IBDMAC_CTL_SUSPEND      0x00000002
+#define TSI721_IBDMAC_CTL_INIT         0x00000001
+
+#define TSI721_IBDMAC_STS(x)           (0x61244 + (x) * 0x1000)
+#define TSI721_IBDMAC_STS_MASK         0x007f0000
+#define TSI721_IBSMAC_STS_ABORT                0x00400000
+#define TSI721_IBSMAC_STS_RUN          0x00200000
+#define TSI721_IBSMAC_STS_CS           0x001f0000
+
+#define TSI721_IBDMAC_INT(x)           (0x61248 + (x) * 0x1000)
+#define TSI721_IBDMAC_INTSET(x)                (0x6124c + (x) * 0x1000)
+#define TSI721_IBDMAC_INTE(x)          (0x61250 + (x) * 0x1000)
+#define TSI721_IBDMAC_INT_MASK         0x0000100f
+#define TSI721_IBDMAC_INT_SRTO         0x00001000
+#define TSI721_IBDMAC_INT_SUSPENDED    0x00000008
+#define TSI721_IBDMAC_INT_PC_ERROR     0x00000004
+#define TSI721_IBDMAC_INT_FQ_LOW       0x00000002
+#define TSI721_IBDMAC_INT_DQ_RCV       0x00000001
+#define TSI721_IBDMAC_INT_ALL          TSI721_IBDMAC_INT_MASK
+
+#define TSI721_IBDMAC_PWE(x)           (0x61254 + (x) * 0x1000)
+#define TSI721_IBDMAC_PWE_MASK         0x00001700
+#define TSI721_IBDMAC_PWE_SRTO         0x00001000
+#define TSI721_IBDMAC_PWE_ILL_FMT      0x00000400
+#define TSI721_IBDMAC_PWE_ILL_DEC      0x00000200
+#define TSI721_IBDMAC_PWE_IMP_SP       0x00000100
+
+#define TSI721_IBDMAC_DQBL(x)          (0x61300 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQBL_MASK                0xffffffc0
+#define TSI721_IBDMAC_DQBL_ADDR                0xffffffc0
+
+#define TSI721_IBDMAC_DQBH(x)          (0x61304 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQBH_MASK                0xffffffff
+
+#define TSI721_IBDMAC_DQRP(x)          (0x61308 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQRP_MASK                0x0007ffff
+
+#define TSI721_IBDMAC_DQWR(x)          (0x6130c + (x) * 0x1000)
+#define TSI721_IBDMAC_DQWR_MASK                0x0007ffff
+
+#define TSI721_IBDMAC_DQSZ(x)          (0x61314 + (x) * 0x1000)
+#define TSI721_IBDMAC_DQSZ_MASK                0x0000000f
+
+/*
+ * Messaging Engine Interrupts
+ */
+
+#define TSI721_SMSG_PWE                        0x6a004
+
+#define TSI721_SMSG_INTE               0x6a000
+#define TSI721_SMSG_INT                        0x6a008
+#define TSI721_SMSG_INTSET             0x6a010
+#define TSI721_SMSG_INT_MASK           0x0086ffff
+#define TSI721_SMSG_INT_UNS_RSP                0x00800000
+#define TSI721_SMSG_INT_ECC_NCOR       0x00040000
+#define TSI721_SMSG_INT_ECC_COR                0x00020000
+#define TSI721_SMSG_INT_ECC_NCOR_CH    0x0000ff00
+#define TSI721_SMSG_INT_ECC_COR_CH     0x000000ff
+
+#define TSI721_SMSG_ECC_LOG            0x6a014
+#define TSI721_SMSG_ECC_LOG_MASK       0x00070007
+#define TSI721_SMSG_ECC_LOG_ECC_NCOR_M 0x00070000
+#define TSI721_SMSG_ECC_LOG_ECC_COR_M  0x00000007
+
+#define TSI721_RETRY_GEN_CNT           0x6a100
+#define TSI721_RETRY_GEN_CNT_MASK      0xffffffff
+
+#define TSI721_RETRY_RX_CNT            0x6a104
+#define TSI721_RETRY_RX_CNT_MASK       0xffffffff
+
+#define TSI721_SMSG_ECC_COR_LOG(x)     (0x6a300 + (x) * 4)
+#define TSI721_SMSG_ECC_COR_LOG_MASK   0x000000ff
+
+#define TSI721_SMSG_ECC_NCOR(x)                (0x6a340 + (x) * 4)
+#define TSI721_SMSG_ECC_NCOR_MASK      0x000000ff
+
+/*
+ * Block DMA Descriptors
+ */
+
+struct tsi721_dma_desc {
+       __le32 type_id;
+
+#define TSI721_DMAD_DEVID      0x0000ffff
+#define TSI721_DMAD_CRF                0x00010000
+#define TSI721_DMAD_PRIO       0x00060000
+#define TSI721_DMAD_RTYPE      0x00780000
+#define TSI721_DMAD_IOF                0x08000000
+#define TSI721_DMAD_DTYPE      0xe0000000
+
+       __le32 bcount;
+
+#define TSI721_DMAD_BCOUNT1    0x03ffffff /* if DTYPE == 1 */
+#define TSI721_DMAD_BCOUNT2    0x0000000f /* if DTYPE == 2 */
+#define TSI721_DMAD_TT         0x0c000000
+#define TSI721_DMAD_RADDR0     0xc0000000
+
+       union {
+               __le32 raddr_lo;           /* if DTYPE == (1 || 2) */
+               __le32 next_lo;            /* if DTYPE == 3 */
+       };
+
+#define TSI721_DMAD_CFGOFF     0x00ffffff
+#define TSI721_DMAD_HOPCNT     0xff000000
+
+       union {
+               __le32 raddr_hi;           /* if DTYPE == (1 || 2) */
+               __le32 next_hi;            /* if DTYPE == 3 */
+       };
+
+       union {
+               struct {                   /* if DTYPE == 1 */
+                       __le32 bufptr_lo;
+                       __le32 bufptr_hi;
+                       __le32 s_dist;
+                       __le32 s_size;
+               } t1;
+               __le32 data[4];            /* if DTYPE == 2 */
+               u32    reserved[4];        /* if DTYPE == 3 */
+       };
+} __aligned(32);
+
+/*
+ * Inbound Messaging Descriptor
+ */
+struct tsi721_imsg_desc {
+       __le32 type_id;
+
+#define TSI721_IMD_DEVID       0x0000ffff
+#define TSI721_IMD_CRF         0x00010000
+#define TSI721_IMD_PRIO                0x00060000
+#define TSI721_IMD_TT          0x00180000
+#define TSI721_IMD_DTYPE       0xe0000000
+
+       __le32 msg_info;
+
+#define TSI721_IMD_BCOUNT      0x00000ff8
+#define TSI721_IMD_SSIZE       0x0000f000
+#define TSI721_IMD_LETER       0x00030000
+#define TSI721_IMD_XMBOX       0x003c0000
+#define TSI721_IMD_MBOX                0x00c00000
+#define TSI721_IMD_CS          0x78000000
+#define TSI721_IMD_HO          0x80000000
+
+       __le32 bufptr_lo;
+       __le32 bufptr_hi;
+       u32    reserved[12];
+
+} __aligned(64);
+
+/*
+ * Outbound Messaging Descriptor
+ */
+struct tsi721_omsg_desc {
+       __le32 type_id;
+
+#define TSI721_OMD_DEVID       0x0000ffff
+#define TSI721_OMD_CRF         0x00010000
+#define TSI721_OMD_PRIO                0x00060000
+#define TSI721_OMD_IOF         0x08000000
+#define TSI721_OMD_DTYPE       0xe0000000
+#define TSI721_OMD_RSRVD       0x17f80000
+
+       __le32 msg_info;
+
+#define TSI721_OMD_BCOUNT      0x00000ff8
+#define TSI721_OMD_SSIZE       0x0000f000
+#define TSI721_OMD_LETER       0x00030000
+#define TSI721_OMD_XMBOX       0x003c0000
+#define TSI721_OMD_MBOX                0x00c00000
+#define TSI721_OMD_TT          0x0c000000
+
+       union {
+               __le32 bufptr_lo;       /* if DTYPE == 4 */
+               __le32 next_lo;         /* if DTYPE == 5 */
+       };
+
+       union {
+               __le32 bufptr_hi;       /* if DTYPE == 4 */
+               __le32 next_hi;         /* if DTYPE == 5 */
+       };
+
+} __aligned(16);
+
+struct tsi721_dma_sts {
+       __le64  desc_sts[8];
+} __aligned(64);
+
+struct tsi721_desc_sts_fifo {
+       union {
+               __le64  da64;
+               struct {
+                       __le32  lo;
+                       __le32  hi;
+               } da32;
+       } stat[8];
+} __aligned(64);
+
+/* Descriptor types for BDMA and Messaging blocks */
+enum dma_dtype {
+       DTYPE1 = 1, /* Data Transfer DMA Descriptor */
+       DTYPE2 = 2, /* Immediate Data Transfer DMA Descriptor */
+       DTYPE3 = 3, /* Block Pointer DMA Descriptor */
+       DTYPE4 = 4, /* Outbound Msg DMA Descriptor */
+       DTYPE5 = 5, /* OB Messaging Block Pointer Descriptor */
+       DTYPE6 = 6  /* Inbound Messaging Descriptor */
+};
+
+enum dma_rtype {
+       NREAD = 0,
+       LAST_NWRITE_R = 1,
+       ALL_NWRITE = 2,
+       ALL_NWRITE_R = 3,
+       MAINT_RD = 4,
+       MAINT_WR = 5
+};
+
+/*
+ * mport Driver Definitions
+ */
+#define TSI721_DMA_CHNUM       TSI721_DMA_MAXCH
+
+#define TSI721_DMACH_MAINT     0       /* DMA channel for maint requests */
+#define TSI721_DMACH_MAINT_NBD 32      /* Number of BDs for maint requests */
+
+#define MSG_DMA_ENTRY_INX_TO_SIZE(x)   ((0x10 << (x)) & 0xFFFF0)
+
+enum tsi721_smsg_int_flag {
+       SMSG_INT_NONE           = 0x00000000,
+       SMSG_INT_ECC_COR_CH     = 0x000000ff,
+       SMSG_INT_ECC_NCOR_CH    = 0x0000ff00,
+       SMSG_INT_ECC_COR        = 0x00020000,
+       SMSG_INT_ECC_NCOR       = 0x00040000,
+       SMSG_INT_UNS_RSP        = 0x00800000,
+       SMSG_INT_ALL            = 0x0006ffff
+};
+
+/* Structures */
+
+struct tsi721_bdma_chan {
+       int             bd_num;         /* number of buffer descriptors */
+       void            *bd_base;       /* start of DMA descriptors */
+       dma_addr_t      bd_phys;
+       void            *sts_base;      /* start of DMA BD status FIFO */
+       dma_addr_t      sts_phys;
+       int             sts_size;
+};
+
+struct tsi721_imsg_ring {
+       u32             size;
+       /* VA/PA of data buffers for incoming messages */
+       void            *buf_base;
+       dma_addr_t      buf_phys;
+       /* VA/PA of circular free buffer list */
+       void            *imfq_base;
+       dma_addr_t      imfq_phys;
+       /* VA/PA of Inbound message descriptors */
+       void            *imd_base;
+       dma_addr_t      imd_phys;
+        /* Inbound Queue buffer pointers */
+       void            *imq_base[TSI721_IMSGD_RING_SIZE];
+
+       u32             rx_slot;
+       void            *dev_id;
+       u32             fq_wrptr;
+       u32             desc_rdptr;
+       spinlock_t      lock;
+};
+
+struct tsi721_omsg_ring {
+       u32             size;
+       /* VA/PA of OB Msg descriptors */
+       void            *omd_base;
+       dma_addr_t      omd_phys;
+       /* VA/PA of OB Msg data buffers */
+       void            *omq_base[TSI721_OMSGD_RING_SIZE];
+       dma_addr_t      omq_phys[TSI721_OMSGD_RING_SIZE];
+       /* VA/PA of OB Msg descriptor status FIFO */
+       void            *sts_base;
+       dma_addr_t      sts_phys;
+       u32             sts_size; /* # of allocated status entries */
+       u32             sts_rdptr;
+
+       u32             tx_slot;
+       void            *dev_id;
+       u32             wr_count;
+       spinlock_t      lock;
+};
+
+enum tsi721_flags {
+       TSI721_USING_MSI        = (1 << 0),
+       TSI721_USING_MSIX       = (1 << 1),
+       TSI721_IMSGID_SET       = (1 << 2),
+};
+
+#ifdef CONFIG_PCI_MSI
+/*
+ * MSI-X Table Entries (0 ... 69)
+ */
+#define TSI721_MSIX_DMACH_DONE(x)      (0 + (x))
+#define TSI721_MSIX_DMACH_INT(x)       (8 + (x))
+#define TSI721_MSIX_BDMA_INT           16
+#define TSI721_MSIX_OMSG_DONE(x)       (17 + (x))
+#define TSI721_MSIX_OMSG_INT(x)                (25 + (x))
+#define TSI721_MSIX_IMSG_DQ_RCV(x)     (33 + (x))
+#define TSI721_MSIX_IMSG_INT(x)                (41 + (x))
+#define TSI721_MSIX_MSG_INT            49
+#define TSI721_MSIX_SR2PC_IDBQ_RCV(x)  (50 + (x))
+#define TSI721_MSIX_SR2PC_CH_INT(x)    (58 + (x))
+#define TSI721_MSIX_SR2PC_INT          66
+#define TSI721_MSIX_PC2SR_INT          67
+#define TSI721_MSIX_SRIO_MAC_INT       68
+#define TSI721_MSIX_I2C_INT            69
+
+/* MSI-X vector and init table entry indexes */
+enum tsi721_msix_vect {
+       TSI721_VECT_IDB,
+       TSI721_VECT_PWRX, /* PW_RX is part of SRIO MAC Interrupt reporting */
+       TSI721_VECT_OMB0_DONE,
+       TSI721_VECT_OMB1_DONE,
+       TSI721_VECT_OMB2_DONE,
+       TSI721_VECT_OMB3_DONE,
+       TSI721_VECT_OMB0_INT,
+       TSI721_VECT_OMB1_INT,
+       TSI721_VECT_OMB2_INT,
+       TSI721_VECT_OMB3_INT,
+       TSI721_VECT_IMB0_RCV,
+       TSI721_VECT_IMB1_RCV,
+       TSI721_VECT_IMB2_RCV,
+       TSI721_VECT_IMB3_RCV,
+       TSI721_VECT_IMB0_INT,
+       TSI721_VECT_IMB1_INT,
+       TSI721_VECT_IMB2_INT,
+       TSI721_VECT_IMB3_INT,
+       TSI721_VECT_MAX
+};
+
+#define IRQ_DEVICE_NAME_MAX    64
+
+struct msix_irq {
+       u16     vector;
+       char    irq_name[IRQ_DEVICE_NAME_MAX];
+};
+#endif /* CONFIG_PCI_MSI */
+
+struct tsi721_device {
+       struct pci_dev  *pdev;
+       struct rio_mport *mport;
+       u32             flags;
+       void __iomem    *regs;
+#ifdef CONFIG_PCI_MSI
+       struct msix_irq msix[TSI721_VECT_MAX];
+#endif
+       /* Doorbells */
+       void __iomem    *odb_base;
+       void            *idb_base;
+       dma_addr_t      idb_dma;
+       struct work_struct idb_work;
+       u32             db_discard_count;
+
+       /* Inbound Port-Write */
+       struct work_struct pw_work;
+       struct kfifo    pw_fifo;
+       spinlock_t      pw_fifo_lock;
+       u32             pw_discard_count;
+
+       /* BDMA Engine */
+       struct tsi721_bdma_chan bdma[TSI721_DMA_CHNUM];
+
+       /* Inbound Messaging */
+       int             imsg_init[TSI721_IMSG_CHNUM];
+       struct tsi721_imsg_ring imsg_ring[TSI721_IMSG_CHNUM];
+
+       /* Outbound Messaging */
+       int             omsg_init[TSI721_OMSG_CHNUM];
+       struct tsi721_omsg_ring omsg_ring[TSI721_OMSG_CHNUM];
+};
+
+#endif
index ebe77dd87dafb8c2fe9253c7ca2b38304838173f..2bebd791a09243703da9d531eda1cec3ebd0941d 100644 (file)
@@ -516,7 +516,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
        return rdev;
 
 cleanup:
-       if (rio_is_switch(rdev))
+       if (rswitch)
                kfree(rswitch->route_table);
 
        kfree(rdev);
@@ -923,7 +923,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
  * rio_enum_complete- Tests if enumeration of a network is complete
  * @port: Master port to send transaction
  *
- * Tests the Component Tag CSR for non-zero value (enumeration
+ * Tests the PGCCSR discovered bit for non-zero value (enumeration
  * complete flag). Return %1 if enumeration is complete or %0 if
  * enumeration is incomplete.
  */
@@ -933,7 +933,7 @@ static int rio_enum_complete(struct rio_mport *port)
 
        rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR,
                                 &regval);
-       return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0;
+       return (regval & RIO_PORT_GEN_DISCOVERED) ? 1 : 0;
 }
 
 /**
index 2bb8f451cc067302dcd4145c9de157aaf10cbd6d..2d014a144365a6bdc0d3ebe60e76a7867cfb83fb 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/err.h>
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/db8500-prcmu.h>
+#include <linux/mfd/dbx500-prcmu.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/db8500-prcmu.h>
index 730f43ad415b11fb83803f84577eb1e5d7c4c738..cb2841feeefd373416f978f89cc587f380a0552c 100644 (file)
@@ -336,9 +336,9 @@ static int __devinit mc13783_regulator_probe(struct platform_device *pdev)
 {
        struct mc13xxx_regulator_priv *priv;
        struct mc13xxx *mc13783 = dev_get_drvdata(pdev->dev.parent);
-       struct mc13783_regulator_platform_data *pdata =
+       struct mc13xxx_regulator_platform_data *pdata =
                dev_get_platdata(&pdev->dev);
-       struct mc13783_regulator_init_data *init_data;
+       struct mc13xxx_regulator_init_data *init_data;
        int i, ret;
 
        dev_dbg(&pdev->dev, "%s id %d\n", __func__, pdev->id);
@@ -381,7 +381,7 @@ err:
 static int __devexit mc13783_regulator_remove(struct platform_device *pdev)
 {
        struct mc13xxx_regulator_priv *priv = platform_get_drvdata(pdev);
-       struct mc13783_regulator_platform_data *pdata =
+       struct mc13xxx_regulator_platform_data *pdata =
                dev_get_platdata(&pdev->dev);
        int i;
 
index 5a538fc1cc8582bbe714fd6d9fbcfb8cc54e81cd..53eb4e55b289dd767944dde81bd918e8aff947e5 100644 (file)
@@ -8,7 +8,7 @@ config RTC_LIB
 menuconfig RTC_CLASS
        bool "Real Time Clock"
        default n
-       depends on !S390
+       depends on !S390 && !UML
        select RTC_LIB
        help
          Generic RTC class support. If you say yes here, you will
index 01a7df5317c1bd9af23ed246e9b452cf057fdd85..e8326f26fa2f5c5ca11d26901f5e70052a1ffea6 100644 (file)
 #include "rtc-core.h"
 
 
-static DEFINE_IDR(rtc_idr);
-static DEFINE_MUTEX(idr_lock);
+static DEFINE_IDA(rtc_ida);
 struct class *rtc_class;
 
 static void rtc_device_release(struct device *dev)
 {
        struct rtc_device *rtc = to_rtc_device(dev);
-       mutex_lock(&idr_lock);
-       idr_remove(&rtc_idr, rtc->id);
-       mutex_unlock(&idr_lock);
+       ida_simple_remove(&rtc_ida, rtc->id);
        kfree(rtc);
 }
 
@@ -146,25 +143,16 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
        struct rtc_wkalrm alrm;
        int id, err;
 
-       if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
-               err = -ENOMEM;
+       id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
+       if (id < 0) {
+               err = id;
                goto exit;
        }
 
-
-       mutex_lock(&idr_lock);
-       err = idr_get_new(&rtc_idr, NULL, &id);
-       mutex_unlock(&idr_lock);
-
-       if (err < 0)
-               goto exit;
-
-       id = id & MAX_ID_MASK;
-
        rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
        if (rtc == NULL) {
                err = -ENOMEM;
-               goto exit_idr;
+               goto exit_ida;
        }
 
        rtc->id = id;
@@ -222,10 +210,8 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 exit_kfree:
        kfree(rtc);
 
-exit_idr:
-       mutex_lock(&idr_lock);
-       idr_remove(&rtc_idr, id);
-       mutex_unlock(&idr_lock);
+exit_ida:
+       ida_simple_remove(&rtc_ida, id);
 
 exit:
        dev_err(dev, "rtc core: unable to register %s, err = %d\n",
@@ -276,7 +262,7 @@ static void __exit rtc_exit(void)
 {
        rtc_dev_exit();
        class_destroy(rtc_class);
-       idr_destroy(&rtc_idr);
+       ida_destroy(&rtc_ida);
 }
 
 subsys_initcall(rtc_init);
index b2005b44e4f7cc0ee053ab9ee37896e7bce66a2f..62b0763b7b9acc50cb76a38ed7cdb9bda32615e7 100644 (file)
@@ -34,6 +34,7 @@ enum ds_type {
        ds_1388,
        ds_3231,
        m41t00,
+       mcp7941x,
        rx_8025,
        // rs5c372 too?  different address...
 };
@@ -43,6 +44,7 @@ enum ds_type {
 #define DS1307_REG_SECS                0x00    /* 00-59 */
 #      define DS1307_BIT_CH            0x80
 #      define DS1340_BIT_nEOSC         0x80
+#      define MCP7941X_BIT_ST          0x80
 #define DS1307_REG_MIN         0x01    /* 00-59 */
 #define DS1307_REG_HOUR                0x02    /* 00-23, or 1-12{am,pm} */
 #      define DS1307_BIT_12HR          0x40    /* in REG_HOUR */
@@ -50,6 +52,7 @@ enum ds_type {
 #      define DS1340_BIT_CENTURY_EN    0x80    /* in REG_HOUR */
 #      define DS1340_BIT_CENTURY       0x40    /* in REG_HOUR */
 #define DS1307_REG_WDAY                0x03    /* 01-07 */
+#      define MCP7941X_BIT_VBATEN      0x08
 #define DS1307_REG_MDAY                0x04    /* 01-31 */
 #define DS1307_REG_MONTH       0x05    /* 01-12 */
 #      define DS1337_BIT_CENTURY       0x80    /* in REG_MONTH */
@@ -137,6 +140,8 @@ static const struct chip_desc chips[] = {
 },
 [m41t00] = {
 },
+[mcp7941x] = {
+},
 [rx_8025] = {
 }, };
 
@@ -149,6 +154,7 @@ static const struct i2c_device_id ds1307_id[] = {
        { "ds1340", ds_1340 },
        { "ds3231", ds_3231 },
        { "m41t00", m41t00 },
+       { "mcp7941x", mcp7941x },
        { "pt7c4338", ds_1307 },
        { "rx8025", rx_8025 },
        { }
@@ -365,6 +371,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
                buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN
                                | DS1340_BIT_CENTURY;
                break;
+       case mcp7941x:
+               buf[DS1307_REG_SECS] |= MCP7941X_BIT_ST;
+               buf[DS1307_REG_WDAY] |= MCP7941X_BIT_VBATEN;
+               break;
        default:
                break;
        }
@@ -808,6 +818,23 @@ read_rtc:
                        i2c_smbus_write_byte_data(client, DS1340_REG_FLAG, 0);
                        dev_warn(&client->dev, "SET TIME!\n");
                }
+               break;
+       case mcp7941x:
+               /* make sure that the backup battery is enabled */
+               if (!(ds1307->regs[DS1307_REG_WDAY] & MCP7941X_BIT_VBATEN)) {
+                       i2c_smbus_write_byte_data(client, DS1307_REG_WDAY,
+                                       ds1307->regs[DS1307_REG_WDAY]
+                                       | MCP7941X_BIT_VBATEN);
+               }
+
+               /* clock halted?  turn it on, so clock can tick. */
+               if (!(tmp & MCP7941X_BIT_ST)) {
+                       i2c_smbus_write_byte_data(client, DS1307_REG_SECS,
+                                       MCP7941X_BIT_ST);
+                       dev_warn(&client->dev, "SET TIME!\n");
+                       goto read_rtc;
+               }
+
                break;
        case rx_8025:
        case ds_1337:
index a1a278bc340dbfd0ff6814c8bcfec577f6137c85..9d0c3b478d558eb42279c6598e04f0d8e91a6695 100644 (file)
@@ -309,7 +309,7 @@ static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev)
        return IRQ_HANDLED;
 }
 
-static int __devinit mc13xxx_rtc_probe(struct platform_device *pdev)
+static int __init mc13xxx_rtc_probe(struct platform_device *pdev)
 {
        int ret;
        struct mc13xxx_rtc *priv;
@@ -378,7 +378,7 @@ err_reset_irq_request:
        return ret;
 }
 
-static int __devexit mc13xxx_rtc_remove(struct platform_device *pdev)
+static int __exit mc13xxx_rtc_remove(struct platform_device *pdev)
 {
        struct mc13xxx_rtc *priv = platform_get_drvdata(pdev);
 
@@ -410,7 +410,7 @@ const struct platform_device_id mc13xxx_rtc_idtable[] = {
 
 static struct platform_driver mc13xxx_rtc_driver = {
        .id_table = mc13xxx_rtc_idtable,
-       .remove = __devexit_p(mc13xxx_rtc_remove),
+       .remove = __exit_p(mc13xxx_rtc_remove),
        .driver = {
                .name = DRIVER_NAME,
                .owner = THIS_MODULE,
index 33b2ed451e095dde15bedd14ce04ded170e17fd4..e0ada37737862b703f312c3a9eef5327cfe37fb3 100644 (file)
@@ -202,11 +202,16 @@ static int intc_set_type(struct irq_data *data, unsigned int type)
        if (!value)
                return -EINVAL;
 
+       value &= ~SENSE_VALID_FLAG;
+
        ihp = intc_find_irq(d->sense, d->nr_sense, irq);
        if (ihp) {
+               /* PINT has 2-bit sense registers, should fail on EDGE_BOTH */
+               if (value >= (1 << _INTC_WIDTH(ihp->handle)))
+                       return -EINVAL;
+
                addr = INTC_REG(d, _INTC_ADDR_E(ihp->handle), 0);
-               intc_reg_fns[_INTC_FN(ihp->handle)](addr, ihp->handle,
-                                                   value & ~SENSE_VALID_FLAG);
+               intc_reg_fns[_INTC_FN(ihp->handle)](addr, ihp->handle, value);
        }
 
        return 0;
index e32304b66cf146e7375a876feae9b59335b9c413..56bf9336b92be26e5c35cf41616a29e7b2986c67 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/sysdev.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/stat.h>
 #include <asm/sizes.h>
 #include "internals.h"
 
index 75934e3ea34e848b4e3612b031d77635da1451d1..e67fe170d8d5e8bef9c7683f6490bf1a7be9dbc6 100644 (file)
@@ -217,7 +217,7 @@ static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
 
                if (!r_width)
                        break;
-               for (n = 0; n < (r_width / f_width) * 1 << f_width; n++) {
+               for (n = 0; n < (r_width / f_width) * (1 << f_width); n++) {
                        if (config_reg->enum_ids[n] == enum_id) {
                                *crp = config_reg;
                                *indexp = n;
@@ -577,6 +577,32 @@ static void sh_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
        sh_gpio_set_value(chip_to_pinmux(chip), offset, value);
 }
 
+static int sh_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+       struct pinmux_info *gpioc = chip_to_pinmux(chip);
+       pinmux_enum_t enum_id;
+       pinmux_enum_t *enum_ids;
+       int i, k, pos;
+
+       pos = 0;
+       enum_id = 0;
+       while (1) {
+               pos = get_gpio_enum_id(gpioc, offset, pos, &enum_id);
+               if (pos <= 0 || !enum_id)
+                       break;
+
+               for (i = 0; i < gpioc->gpio_irq_size; i++) {
+                       enum_ids = gpioc->gpio_irq[i].enum_ids;
+                       for (k = 0; enum_ids[k]; k++) {
+                               if (enum_ids[k] == enum_id)
+                                       return gpioc->gpio_irq[i].irq;
+                       }
+               }
+       }
+
+       return -ENOSYS;
+}
+
 int register_pinmux(struct pinmux_info *pip)
 {
        struct gpio_chip *chip = &pip->chip;
@@ -592,6 +618,7 @@ int register_pinmux(struct pinmux_info *pip)
        chip->get = sh_gpio_get;
        chip->direction_output = sh_gpio_direction_output;
        chip->set = sh_gpio_set;
+       chip->to_irq = sh_gpio_to_irq;
 
        WARN_ON(pip->first_gpio != 0); /* needs testing */
 
index 52e2900d9d8e42d650196e083a74eaaf4c9cc471..a1fd73df5416129c62d8889f87c1d003b0b6b47c 100644 (file)
@@ -88,7 +88,7 @@ config SPI_BFIN_SPORT
 
 config SPI_AU1550
        tristate "Au1550/Au12x0 SPI Controller"
-       depends on (SOC_AU1550 || SOC_AU1200) && EXPERIMENTAL
+       depends on MIPS_ALCHEMY && EXPERIMENTAL
        select SPI_BITBANG
        help
          If you say yes to this option, support will be included for the
index f3c6060c96b844d37ebdcc8caf59021081672057..7a1955583b7d8ce1c29837f3f4cec5fe284c6b35 100644 (file)
@@ -1197,7 +1197,7 @@ const struct inode_operations pohmelfs_file_inode_operations = {
 void pohmelfs_fill_inode(struct inode *inode, struct netfs_inode_info *info)
 {
        inode->i_mode = info->mode;
-       inode->i_nlink = info->nlink;
+       set_nlink(inode, info->nlink);
        inode->i_uid = info->uid;
        inode->i_gid = info->gid;
        inode->i_blocks = info->blocks;
index 8816f53e004d8b5058dc872f7cfabade5a4f193c..b3d17416d86a393d75bc518b9198fa7277b165d8 100644 (file)
@@ -1,6 +1,6 @@
 config VT
        bool "Virtual terminal" if EXPERT
-       depends on !S390
+       depends on !S390 && !UML
        select INPUT
        default y
        ---help---
index 4ac2750491de8bf6de1bee06a398c748b0e59ea6..791f11bed6063b6fec2ffa4ab78fc4177f70b509 100644 (file)
@@ -62,7 +62,6 @@ config USB_ARCH_HAS_EHCI
        boolean
        default y if FSL_SOC
        default y if PPC_MPC512x
-       default y if SOC_AU1200
        default y if ARCH_IXP4XX
        default y if ARCH_W90X900
        default y if ARCH_AT91SAM9G45
index ed48a5d79e16402c07bfa273583af9463d1ae5de..7ca290fcb070d07ff214c3e7d47ffbf83275a7a3 100644 (file)
@@ -36,3 +36,4 @@ obj-$(CONFIG_USB_HWA_HCD)     += hwa-hc.o
 obj-$(CONFIG_USB_IMX21_HCD)    += imx21-hcd.o
 obj-$(CONFIG_USB_FSL_MPH_DR_OF)        += fsl-mph-dr-of.o
 obj-$(CONFIG_USB_OCTEON2_COMMON) += octeon2-common.o
+obj-$(CONFIG_MIPS_ALCHEMY)     += alchemy-common.o
diff --git a/drivers/usb/host/alchemy-common.c b/drivers/usb/host/alchemy-common.c
new file mode 100644 (file)
index 0000000..b4192c9
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ * USB block power/access management abstraction.
+ *
+ * Au1000+: The OHCI block control register is at the far end of the OHCI memory
+ *         area. Au1550 has OHCI on different base address. No need to handle
+ *         UDC here.
+ * Au1200:  one register to control access and clocks to O/EHCI, UDC and OTG
+ *         as well as the PHY for EHCI and UDC.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/syscore_ops.h>
+#include <asm/mach-au1x00/au1000.h>
+
+/* control register offsets */
+#define AU1000_OHCICFG 0x7fffc
+#define AU1550_OHCICFG 0x07ffc
+#define AU1200_USBCFG  0x04
+
+/* Au1000 USB block config bits */
+#define USBHEN_RD      (1 << 4)                /* OHCI reset-done indicator */
+#define USBHEN_CE      (1 << 3)                /* OHCI block clock enable */
+#define USBHEN_E       (1 << 2)                /* OHCI block enable */
+#define USBHEN_C       (1 << 1)                /* OHCI block coherency bit */
+#define USBHEN_BE      (1 << 0)                /* OHCI Big-Endian */
+
+/* Au1200 USB config bits */
+#define USBCFG_PFEN    (1 << 31)               /* prefetch enable (undoc) */
+#define USBCFG_RDCOMB  (1 << 30)               /* read combining (undoc) */
+#define USBCFG_UNKNOWN (5 << 20)               /* unknown, leave this way */
+#define USBCFG_SSD     (1 << 23)               /* serial short detect en */
+#define USBCFG_PPE     (1 << 19)               /* HS PHY PLL */
+#define USBCFG_UCE     (1 << 18)               /* UDC clock enable */
+#define USBCFG_ECE     (1 << 17)               /* EHCI clock enable */
+#define USBCFG_OCE     (1 << 16)               /* OHCI clock enable */
+#define USBCFG_FLA(x)  (((x) & 0x3f) << 8)
+#define USBCFG_UCAM    (1 << 7)                /* coherent access (undoc) */
+#define USBCFG_GME     (1 << 6)                /* OTG mem access */
+#define USBCFG_DBE     (1 << 5)                /* UDC busmaster enable */
+#define USBCFG_DME     (1 << 4)                /* UDC mem enable */
+#define USBCFG_EBE     (1 << 3)                /* EHCI busmaster enable */
+#define USBCFG_EME     (1 << 2)                /* EHCI mem enable */
+#define USBCFG_OBE     (1 << 1)                /* OHCI busmaster enable */
+#define USBCFG_OME     (1 << 0)                /* OHCI mem enable */
+#define USBCFG_INIT_AU1200     (USBCFG_PFEN | USBCFG_RDCOMB | USBCFG_UNKNOWN |\
+                                USBCFG_SSD | USBCFG_FLA(0x20) | USBCFG_UCAM | \
+                                USBCFG_GME | USBCFG_DBE | USBCFG_DME |        \
+                                USBCFG_EBE | USBCFG_EME | USBCFG_OBE |        \
+                                USBCFG_OME)
+
+
+static DEFINE_SPINLOCK(alchemy_usb_lock);
+
+
+static inline void __au1200_ohci_control(void __iomem *base, int enable)
+{
+       unsigned long r = __raw_readl(base + AU1200_USBCFG);
+       if (enable) {
+               __raw_writel(r | USBCFG_OCE, base + AU1200_USBCFG);
+               wmb();
+               udelay(2000);
+       } else {
+               __raw_writel(r & ~USBCFG_OCE, base + AU1200_USBCFG);
+               wmb();
+               udelay(1000);
+       }
+}
+
+static inline void __au1200_ehci_control(void __iomem *base, int enable)
+{
+       unsigned long r = __raw_readl(base + AU1200_USBCFG);
+       if (enable) {
+               __raw_writel(r | USBCFG_ECE | USBCFG_PPE, base + AU1200_USBCFG);
+               wmb();
+               udelay(1000);
+       } else {
+               if (!(r & USBCFG_UCE))          /* UDC also off? */
+                       r &= ~USBCFG_PPE;       /* yes: disable HS PHY PLL */
+               __raw_writel(r & ~USBCFG_ECE, base + AU1200_USBCFG);
+               wmb();
+               udelay(1000);
+       }
+}
+
+static inline void __au1200_udc_control(void __iomem *base, int enable)
+{
+       unsigned long r = __raw_readl(base + AU1200_USBCFG);
+       if (enable) {
+               __raw_writel(r | USBCFG_UCE | USBCFG_PPE, base + AU1200_USBCFG);
+               wmb();
+       } else {
+               if (!(r & USBCFG_ECE))          /* EHCI also off? */
+                       r &= ~USBCFG_PPE;       /* yes: disable HS PHY PLL */
+               __raw_writel(r & ~USBCFG_UCE, base + AU1200_USBCFG);
+               wmb();
+       }
+}
+
+static inline int au1200_coherency_bug(void)
+{
+#if defined(CONFIG_DMA_COHERENT)
+       /* Au1200 AB USB does not support coherent memory */
+       if (!(read_c0_prid() & 0xff)) {
+               printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n");
+               printk(KERN_INFO "Au1200 USB: update your board or re-configure"
+                                " the kernel\n");
+               return -ENODEV;
+       }
+#endif
+       return 0;
+}
+
+static inline int au1200_usb_control(int block, int enable)
+{
+       void __iomem *base =
+                       (void __iomem *)KSEG1ADDR(AU1200_USB_CTL_PHYS_ADDR);
+       int ret = 0;
+
+       switch (block) {
+       case ALCHEMY_USB_OHCI0:
+               ret = au1200_coherency_bug();
+               if (ret && enable)
+                       goto out;
+               __au1200_ohci_control(base, enable);
+               break;
+       case ALCHEMY_USB_UDC0:
+               __au1200_udc_control(base, enable);
+               break;
+       case ALCHEMY_USB_EHCI0:
+               ret = au1200_coherency_bug();
+               if (ret && enable)
+                       goto out;
+               __au1200_ehci_control(base, enable);
+               break;
+       default:
+               ret = -ENODEV;
+       }
+out:
+       return ret;
+}
+
+
+/* initialize USB block(s) to a known working state */
+static inline void au1200_usb_init(void)
+{
+       void __iomem *base =
+                       (void __iomem *)KSEG1ADDR(AU1200_USB_CTL_PHYS_ADDR);
+       __raw_writel(USBCFG_INIT_AU1200, base + AU1200_USBCFG);
+       wmb();
+       udelay(1000);
+}
+
+static inline void au1000_usb_init(unsigned long rb, int reg)
+{
+       void __iomem *base = (void __iomem *)KSEG1ADDR(rb + reg);
+       unsigned long r = __raw_readl(base);
+
+#if defined(__BIG_ENDIAN)
+       r |= USBHEN_BE;
+#endif
+       r |= USBHEN_C;
+
+       __raw_writel(r, base);
+       wmb();
+       udelay(1000);
+}
+
+
+static inline void __au1xx0_ohci_control(int enable, unsigned long rb, int creg)
+{
+       void __iomem *base = (void __iomem *)KSEG1ADDR(rb);
+       unsigned long r = __raw_readl(base + creg);
+
+       if (enable) {
+               __raw_writel(r | USBHEN_CE, base + creg);
+               wmb();
+               udelay(1000);
+               __raw_writel(r | USBHEN_CE | USBHEN_E, base + creg);
+               wmb();
+               udelay(1000);
+
+               /* wait for reset complete (read reg twice: au1500 erratum) */
+               while (__raw_readl(base + creg),
+                       !(__raw_readl(base + creg) & USBHEN_RD))
+                       udelay(1000);
+       } else {
+               __raw_writel(r & ~(USBHEN_CE | USBHEN_E), base + creg);
+               wmb();
+       }
+}
+
+static inline int au1000_usb_control(int block, int enable, unsigned long rb,
+                                    int creg)
+{
+       int ret = 0;
+
+       switch (block) {
+       case ALCHEMY_USB_OHCI0:
+               __au1xx0_ohci_control(enable, rb, creg);
+               break;
+       default:
+               ret = -ENODEV;
+       }
+       return ret;
+}
+
+/*
+ * alchemy_usb_control - control Alchemy on-chip USB blocks
+ * @block:     USB block to target
+ * @enable:    set 1 to enable a block, 0 to disable
+ */
+int alchemy_usb_control(int block, int enable)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&alchemy_usb_lock, flags);
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1000:
+       case ALCHEMY_CPU_AU1500:
+       case ALCHEMY_CPU_AU1100:
+               ret = au1000_usb_control(block, enable,
+                               AU1000_USB_OHCI_PHYS_ADDR, AU1000_OHCICFG);
+               break;
+       case ALCHEMY_CPU_AU1550:
+               ret = au1000_usb_control(block, enable,
+                               AU1550_USB_OHCI_PHYS_ADDR, AU1550_OHCICFG);
+               break;
+       case ALCHEMY_CPU_AU1200:
+               ret = au1200_usb_control(block, enable);
+               break;
+       default:
+               ret = -ENODEV;
+       }
+       spin_unlock_irqrestore(&alchemy_usb_lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(alchemy_usb_control);
+
+
+static unsigned long alchemy_usb_pmdata[2];
+
+static void au1000_usb_pm(unsigned long br, int creg, int susp)
+{
+       void __iomem *base = (void __iomem *)KSEG1ADDR(br);
+
+       if (susp) {
+               alchemy_usb_pmdata[0] = __raw_readl(base + creg);
+               /* There appears to be some undocumented reset register.... */
+               __raw_writel(0, base + 0x04);
+               wmb();
+               __raw_writel(0, base + creg);
+               wmb();
+       } else {
+               __raw_writel(alchemy_usb_pmdata[0], base + creg);
+               wmb();
+       }
+}
+
+static void au1200_usb_pm(int susp)
+{
+       void __iomem *base =
+                       (void __iomem *)KSEG1ADDR(AU1200_USB_OTG_PHYS_ADDR);
+       if (susp) {
+               /* save OTG_CAP/MUX registers which indicate port routing */
+               /* FIXME: write an OTG driver to do that */
+               alchemy_usb_pmdata[0] = __raw_readl(base + 0x00);
+               alchemy_usb_pmdata[1] = __raw_readl(base + 0x04);
+       } else {
+               /* restore access to all MMIO areas */
+               au1200_usb_init();
+
+               /* restore OTG_CAP/MUX registers */
+               __raw_writel(alchemy_usb_pmdata[0], base + 0x00);
+               __raw_writel(alchemy_usb_pmdata[1], base + 0x04);
+               wmb();
+       }
+}
+
+static void alchemy_usb_pm(int susp)
+{
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1000:
+       case ALCHEMY_CPU_AU1500:
+       case ALCHEMY_CPU_AU1100:
+               au1000_usb_pm(AU1000_USB_OHCI_PHYS_ADDR, AU1000_OHCICFG, susp);
+               break;
+       case ALCHEMY_CPU_AU1550:
+               au1000_usb_pm(AU1550_USB_OHCI_PHYS_ADDR, AU1550_OHCICFG, susp);
+               break;
+       case ALCHEMY_CPU_AU1200:
+               au1200_usb_pm(susp);
+               break;
+       }
+}
+
+static int alchemy_usb_suspend(void)
+{
+       alchemy_usb_pm(1);
+       return 0;
+}
+
+static void alchemy_usb_resume(void)
+{
+       alchemy_usb_pm(0);
+}
+
+static struct syscore_ops alchemy_usb_pm_ops = {
+       .suspend        = alchemy_usb_suspend,
+       .resume         = alchemy_usb_resume,
+};
+
+static int __init alchemy_usb_init(void)
+{
+       switch (alchemy_get_cputype()) {
+       case ALCHEMY_CPU_AU1000:
+       case ALCHEMY_CPU_AU1500:
+       case ALCHEMY_CPU_AU1100:
+               au1000_usb_init(AU1000_USB_OHCI_PHYS_ADDR, AU1000_OHCICFG);
+               break;
+       case ALCHEMY_CPU_AU1550:
+               au1000_usb_init(AU1550_USB_OHCI_PHYS_ADDR, AU1550_OHCICFG);
+               break;
+       case ALCHEMY_CPU_AU1200:
+               au1200_usb_init();
+               break;
+       }
+
+       register_syscore_ops(&alchemy_usb_pm_ops);
+
+       return 0;
+}
+arch_initcall(alchemy_usb_init);
index 65719e8d24e455e5d9689a896fe3ed7e45e213f8..18bafa99fe57e1fa156a7cf326e589f1938b096b 100644 (file)
 #include <linux/platform_device.h>
 #include <asm/mach-au1x00/au1000.h>
 
-#define USB_HOST_CONFIG   (USB_MSR_BASE + USB_MSR_MCFG)
-#define USB_MCFG_PFEN     (1<<31)
-#define USB_MCFG_RDCOMB   (1<<30)
-#define USB_MCFG_SSDEN    (1<<23)
-#define USB_MCFG_PHYPLLEN (1<<19)
-#define USB_MCFG_UCECLKEN (1<<18)
-#define USB_MCFG_EHCCLKEN (1<<17)
-#ifdef CONFIG_DMA_COHERENT
-#define USB_MCFG_UCAM     (1<<7)
-#else
-#define USB_MCFG_UCAM     (0)
-#endif
-#define USB_MCFG_EBMEN    (1<<3)
-#define USB_MCFG_EMEMEN   (1<<2)
-
-#define USBH_ENABLE_CE (USB_MCFG_PHYPLLEN | USB_MCFG_EHCCLKEN)
-#define USBH_ENABLE_INIT (USB_MCFG_PFEN  | USB_MCFG_RDCOMB |   \
-                         USBH_ENABLE_CE | USB_MCFG_SSDEN  |    \
-                         USB_MCFG_UCAM  | USB_MCFG_EBMEN  |    \
-                         USB_MCFG_EMEMEN)
-
-#define USBH_DISABLE      (USB_MCFG_EBMEN | USB_MCFG_EMEMEN)
 
 extern int usb_disabled(void);
 
-static void au1xxx_start_ehc(void)
-{
-       /* enable clock to EHCI block and HS PHY PLL*/
-       au_writel(au_readl(USB_HOST_CONFIG) | USBH_ENABLE_CE, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-
-       /* enable EHCI mmio */
-       au_writel(au_readl(USB_HOST_CONFIG) | USBH_ENABLE_INIT, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-}
-
-static void au1xxx_stop_ehc(void)
-{
-       unsigned long c;
-
-       /* Disable mem */
-       au_writel(au_readl(USB_HOST_CONFIG) & ~USBH_DISABLE, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-
-       /* Disable EHC clock. If the HS PHY is unused disable it too. */
-       c = au_readl(USB_HOST_CONFIG) & ~USB_MCFG_EHCCLKEN;
-       if (!(c & USB_MCFG_UCECLKEN))           /* UDC disabled? */
-               c &= ~USB_MCFG_PHYPLLEN;        /* yes: disable HS PHY PLL */
-       au_writel(c, USB_HOST_CONFIG);
-       au_sync();
-}
-
 static int au1xxx_ehci_setup(struct usb_hcd *hcd)
 {
        struct ehci_hcd *ehci = hcd_to_ehci(hcd);
@@ -136,16 +84,6 @@ static int ehci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
        if (usb_disabled())
                return -ENODEV;
 
-#if defined(CONFIG_SOC_AU1200) && defined(CONFIG_DMA_COHERENT)
-       /* Au1200 AB USB does not support coherent memory */
-       if (!(read_c0_prid() & 0xff)) {
-               printk(KERN_INFO "%s: this is chip revision AB!\n", pdev->name);
-               printk(KERN_INFO "%s: update your board or re-configure"
-                                " the kernel\n", pdev->name);
-               return -ENODEV;
-       }
-#endif
-
        if (pdev->resource[1].flags != IORESOURCE_IRQ) {
                pr_debug("resource[1] is not IORESOURCE_IRQ");
                return -ENOMEM;
@@ -171,7 +109,11 @@ static int ehci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
                goto err2;
        }
 
-       au1xxx_start_ehc();
+       if (alchemy_usb_control(ALCHEMY_USB_EHCI0, 1)) {
+               printk(KERN_INFO "%s: controller init failed!\n", pdev->name);
+               ret = -ENODEV;
+               goto err3;
+       }
 
        ehci = hcd_to_ehci(hcd);
        ehci->caps = hcd->regs;
@@ -187,7 +129,8 @@ static int ehci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
                return ret;
        }
 
-       au1xxx_stop_ehc();
+       alchemy_usb_control(ALCHEMY_USB_EHCI0, 0);
+err3:
        iounmap(hcd->regs);
 err2:
        release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
@@ -201,10 +144,10 @@ static int ehci_hcd_au1xxx_drv_remove(struct platform_device *pdev)
        struct usb_hcd *hcd = platform_get_drvdata(pdev);
 
        usb_remove_hcd(hcd);
+       alchemy_usb_control(ALCHEMY_USB_EHCI0, 0);
        iounmap(hcd->regs);
        release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
        usb_put_hcd(hcd);
-       au1xxx_stop_ehc();
        platform_set_drvdata(pdev, NULL);
 
        return 0;
@@ -236,7 +179,7 @@ static int ehci_hcd_au1xxx_drv_suspend(struct device *dev)
        // could save FLADJ in case of Vaux power loss
        // ... we'd only use it to handle clock skew
 
-       au1xxx_stop_ehc();
+       alchemy_usb_control(ALCHEMY_USB_EHCI0, 0);
 
        return rc;
 }
@@ -246,7 +189,7 @@ static int ehci_hcd_au1xxx_drv_resume(struct device *dev)
        struct usb_hcd *hcd = dev_get_drvdata(dev);
        struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
-       au1xxx_start_ehc();
+       alchemy_usb_control(ALCHEMY_USB_EHCI0, 1);
 
        // maybe restore FLADJ
 
index 59e81615e09c163daf299b3017a5e67af80e3958..3ff9f82f7263fe8a95b7cef78d39d7f5f15782d1 100644 (file)
@@ -1224,7 +1224,7 @@ MODULE_LICENSE ("GPL");
 #define PLATFORM_DRIVER                ehci_hcd_sh_driver
 #endif
 
-#ifdef CONFIG_SOC_AU1200
+#ifdef CONFIG_MIPS_ALCHEMY
 #include "ehci-au1xxx.c"
 #define        PLATFORM_DRIVER         ehci_hcd_au1xxx_driver
 #endif
index 6b7bc50dfeaa02519ab969bbdce905fa0d109a19..9b66df8278f3816e04931c07cecb6a146e873566 100644 (file)
 
 #include <asm/mach-au1x00/au1000.h>
 
-#ifndef        CONFIG_SOC_AU1200
-
-#define USBH_ENABLE_BE (1<<0)
-#define USBH_ENABLE_C  (1<<1)
-#define USBH_ENABLE_E  (1<<2)
-#define USBH_ENABLE_CE (1<<3)
-#define USBH_ENABLE_RD (1<<4)
-
-#ifdef __LITTLE_ENDIAN
-#define USBH_ENABLE_INIT (USBH_ENABLE_CE | USBH_ENABLE_E | USBH_ENABLE_C)
-#elif defined(__BIG_ENDIAN)
-#define USBH_ENABLE_INIT (USBH_ENABLE_CE | USBH_ENABLE_E | USBH_ENABLE_C | \
-                         USBH_ENABLE_BE)
-#else
-#error not byte order defined
-#endif
-
-#else   /* Au1200 */
-
-#define USB_HOST_CONFIG    (USB_MSR_BASE + USB_MSR_MCFG)
-#define USB_MCFG_PFEN     (1<<31)
-#define USB_MCFG_RDCOMB   (1<<30)
-#define USB_MCFG_SSDEN    (1<<23)
-#define USB_MCFG_OHCCLKEN (1<<16)
-#ifdef CONFIG_DMA_COHERENT
-#define USB_MCFG_UCAM     (1<<7)
-#else
-#define USB_MCFG_UCAM     (0)
-#endif
-#define USB_MCFG_OBMEN    (1<<1)
-#define USB_MCFG_OMEMEN   (1<<0)
-
-#define USBH_ENABLE_CE    USB_MCFG_OHCCLKEN
-
-#define USBH_ENABLE_INIT  (USB_MCFG_PFEN  | USB_MCFG_RDCOMB    |       \
-                          USBH_ENABLE_CE | USB_MCFG_SSDEN      |       \
-                          USB_MCFG_UCAM  |                             \
-                          USB_MCFG_OBMEN | USB_MCFG_OMEMEN)
-
-#define USBH_DISABLE      (USB_MCFG_OBMEN | USB_MCFG_OMEMEN)
-
-#endif  /* Au1200 */
 
 extern int usb_disabled(void);
 
-static void au1xxx_start_ohc(void)
-{
-       /* enable host controller */
-#ifndef CONFIG_SOC_AU1200
-       au_writel(USBH_ENABLE_CE, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-
-       au_writel(au_readl(USB_HOST_CONFIG) | USBH_ENABLE_INIT, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-
-       /* wait for reset complete (read register twice; see au1500 errata) */
-       while (au_readl(USB_HOST_CONFIG),
-               !(au_readl(USB_HOST_CONFIG) & USBH_ENABLE_RD))
-               udelay(1000);
-
-#else   /* Au1200 */
-       au_writel(au_readl(USB_HOST_CONFIG) | USBH_ENABLE_CE, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-
-       au_writel(au_readl(USB_HOST_CONFIG) | USBH_ENABLE_INIT, USB_HOST_CONFIG);
-       au_sync();
-       udelay(2000);
-#endif  /* Au1200 */
-}
-
-static void au1xxx_stop_ohc(void)
-{
-#ifdef CONFIG_SOC_AU1200
-       /* Disable mem */
-       au_writel(au_readl(USB_HOST_CONFIG) & ~USBH_DISABLE, USB_HOST_CONFIG);
-       au_sync();
-       udelay(1000);
-#endif
-       /* Disable clock */
-       au_writel(au_readl(USB_HOST_CONFIG) & ~USBH_ENABLE_CE, USB_HOST_CONFIG);
-       au_sync();
-}
-
 static int __devinit ohci_au1xxx_start(struct usb_hcd *hcd)
 {
        struct ohci_hcd *ohci = hcd_to_ohci(hcd);
@@ -178,17 +95,6 @@ static int ohci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
        if (usb_disabled())
                return -ENODEV;
 
-#if defined(CONFIG_SOC_AU1200) && defined(CONFIG_DMA_COHERENT)
-       /* Au1200 AB USB does not support coherent memory */
-       if (!(read_c0_prid() & 0xff)) {
-               printk(KERN_INFO "%s: this is chip revision AB !!\n",
-                       pdev->name);
-               printk(KERN_INFO "%s: update your board or re-configure "
-                                "the kernel\n", pdev->name);
-               return -ENODEV;
-       }
-#endif
-
        if (pdev->resource[1].flags != IORESOURCE_IRQ) {
                pr_debug("resource[1] is not IORESOURCE_IRQ\n");
                return -ENOMEM;
@@ -214,7 +120,12 @@ static int ohci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
                goto err2;
        }
 
-       au1xxx_start_ohc();
+       if (alchemy_usb_control(ALCHEMY_USB_OHCI0, 1)) {
+               printk(KERN_INFO "%s: controller init failed!\n", pdev->name);
+               ret = -ENODEV;
+               goto err3;
+       }
+
        ohci_hcd_init(hcd_to_ohci(hcd));
 
        ret = usb_add_hcd(hcd, pdev->resource[1].start,
@@ -224,7 +135,8 @@ static int ohci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
                return ret;
        }
 
-       au1xxx_stop_ohc();
+       alchemy_usb_control(ALCHEMY_USB_OHCI0, 0);
+err3:
        iounmap(hcd->regs);
 err2:
        release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
@@ -238,7 +150,7 @@ static int ohci_hcd_au1xxx_drv_remove(struct platform_device *pdev)
        struct usb_hcd *hcd = platform_get_drvdata(pdev);
 
        usb_remove_hcd(hcd);
-       au1xxx_stop_ohc();
+       alchemy_usb_control(ALCHEMY_USB_OHCI0, 0);
        iounmap(hcd->regs);
        release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
        usb_put_hcd(hcd);
@@ -275,7 +187,7 @@ static int ohci_hcd_au1xxx_drv_suspend(struct device *dev)
 
        clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
-       au1xxx_stop_ohc();
+       alchemy_usb_control(ALCHEMY_USB_OHCI0, 0);
 bail:
        spin_unlock_irqrestore(&ohci->lock, flags);
 
@@ -286,7 +198,7 @@ static int ohci_hcd_au1xxx_drv_resume(struct device *dev)
 {
        struct usb_hcd *hcd = dev_get_drvdata(dev);
 
-       au1xxx_start_ohc();
+       alchemy_usb_control(ALCHEMY_USB_OHCI0, 1);
 
        set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
        ohci_finish_controller_resume(hcd);
index 8165c5577d715c33a04e30b9a10d34a4a21dd93e..d83e967e4e15b6380fa1dc94584aac871b6a4b1d 100644 (file)
@@ -1753,7 +1753,7 @@ endchoice
 
 config FB_AU1100
        bool "Au1100 LCD Driver"
-       depends on (FB = y) && MIPS && SOC_AU1100
+       depends on (FB = y) && MIPS_ALCHEMY
        select FB_CFB_FILLRECT
        select FB_CFB_COPYAREA
        select FB_CFB_IMAGEBLIT
@@ -1764,7 +1764,7 @@ config FB_AU1100
 
 config FB_AU1200
        bool "Au1200 LCD Driver"
-       depends on (FB = y) && MIPS && SOC_AU1200
+       depends on (FB = y) && MIPS_ALCHEMY
        select FB_SYS_FILLRECT
        select FB_SYS_COPYAREA
        select FB_SYS_IMAGEBLIT
index 57e493b1bd209d7663821b60e29ddfd0bc719684..816ed08e7cf3c504f4ce5377b327964a30daa8e9 100644 (file)
@@ -35,4 +35,15 @@ config VIRTIO_BALLOON
 
         If unsure, say M.
 
+ config VIRTIO_MMIO
+       tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
+       depends on EXPERIMENTAL
+       select VIRTIO
+       select VIRTIO_RING
+       ---help---
+        This drivers provides support for memory mapped virtio
+        platform device driver.
+
+        If unsure, say N.
+
 endmenu
index 6738c446c199e5af4ebfdb655166c1d7dc2fa065..5a4c63cfd3803308726ce5fb95fd73136ef5dae5 100644 (file)
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VIRTIO) += virtio.o
 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
+obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
new file mode 100644 (file)
index 0000000..acc5e43
--- /dev/null
@@ -0,0 +1,479 @@
+/*
+ * Virtio memory mapped device driver
+ *
+ * Copyright 2011, ARM Ltd.
+ *
+ * This module allows virtio devices to be used over a virtual, memory mapped
+ * platform device.
+ *
+ * Registers layout (all 32-bit wide):
+ *
+ * offset d. name             description
+ * ------ -- ---------------- -----------------
+ *
+ * 0x000  R  MagicValue       Magic value "virt"
+ * 0x004  R  Version          Device version (current max. 1)
+ * 0x008  R  DeviceID         Virtio device ID
+ * 0x00c  R  VendorID         Virtio vendor ID
+ *
+ * 0x010  R  HostFeatures     Features supported by the host
+ * 0x014  W  HostFeaturesSel  Set of host features to access via HostFeatures
+ *
+ * 0x020  W  GuestFeatures    Features activated by the guest
+ * 0x024  W  GuestFeaturesSel Set of activated features to set via GuestFeatures
+ * 0x028  W  GuestPageSize    Size of guest's memory page in bytes
+ *
+ * 0x030  W  QueueSel         Queue selector
+ * 0x034  R  QueueNumMax      Maximum size of the currently selected queue
+ * 0x038  W  QueueNum         Queue size for the currently selected queue
+ * 0x03c  W  QueueAlign       Used Ring alignment for the current queue
+ * 0x040  RW QueuePFN         PFN for the currently selected queue
+ *
+ * 0x050  W  QueueNotify      Queue notifier
+ * 0x060  R  InterruptStatus  Interrupt status register
+ * 0x060  W  InterruptACK     Interrupt acknowledge register
+ * 0x070  RW Status           Device status register
+ *
+ * 0x100+ RW                  Device-specific configuration space
+ *
+ * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_mmio.h>
+#include <linux/virtio_ring.h>
+
+
+
+/* The alignment to use between consumer and producer parts of vring.
+ * Currently hardcoded to the page size. */
+#define VIRTIO_MMIO_VRING_ALIGN                PAGE_SIZE
+
+
+
+#define to_virtio_mmio_device(_plat_dev) \
+       container_of(_plat_dev, struct virtio_mmio_device, vdev)
+
+struct virtio_mmio_device {
+       struct virtio_device vdev;
+       struct platform_device *pdev;
+
+       void __iomem *base;
+       unsigned long version;
+
+       /* a list of queues so we can dispatch IRQs */
+       spinlock_t lock;
+       struct list_head virtqueues;
+};
+
+struct virtio_mmio_vq_info {
+       /* the actual virtqueue */
+       struct virtqueue *vq;
+
+       /* the number of entries in the queue */
+       unsigned int num;
+
+       /* the index of the queue */
+       int queue_index;
+
+       /* the virtual address of the ring queue */
+       void *queue;
+
+       /* the list node for the virtqueues list */
+       struct list_head node;
+};
+
+
+
+/* Configuration interface */
+
+static u32 vm_get_features(struct virtio_device *vdev)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+       /* TODO: Features > 32 bits */
+       writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL);
+
+       return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES);
+}
+
+static void vm_finalize_features(struct virtio_device *vdev)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+       int i;
+
+       /* Give virtio_ring a chance to accept features. */
+       vring_transport_features(vdev);
+
+       for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
+               writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SET);
+               writel(vdev->features[i],
+                               vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
+       }
+}
+
+static void vm_get(struct virtio_device *vdev, unsigned offset,
+                  void *buf, unsigned len)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+       u8 *ptr = buf;
+       int i;
+
+       for (i = 0; i < len; i++)
+               ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
+}
+
+static void vm_set(struct virtio_device *vdev, unsigned offset,
+                  const void *buf, unsigned len)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+       const u8 *ptr = buf;
+       int i;
+
+       for (i = 0; i < len; i++)
+               writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
+}
+
+static u8 vm_get_status(struct virtio_device *vdev)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+       return readl(vm_dev->base + VIRTIO_MMIO_STATUS) & 0xff;
+}
+
+static void vm_set_status(struct virtio_device *vdev, u8 status)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+       /* We should never be setting status to 0. */
+       BUG_ON(status == 0);
+
+       writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);
+}
+
+static void vm_reset(struct virtio_device *vdev)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+       /* 0 status means a reset. */
+       writel(0, vm_dev->base + VIRTIO_MMIO_STATUS);
+}
+
+
+
+/* Transport interface */
+
+/* the notify function used when creating a virt queue */
+static void vm_notify(struct virtqueue *vq)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
+       struct virtio_mmio_vq_info *info = vq->priv;
+
+       /* We write the queue's selector into the notification register to
+        * signal the other end */
+       writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
+}
+
+/* Notify all virtqueues on an interrupt. */
+static irqreturn_t vm_interrupt(int irq, void *opaque)
+{
+       struct virtio_mmio_device *vm_dev = opaque;
+       struct virtio_mmio_vq_info *info;
+       struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
+                       struct virtio_driver, driver);
+       unsigned long status;
+       unsigned long flags;
+       irqreturn_t ret = IRQ_NONE;
+
+       /* Read and acknowledge interrupts */
+       status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
+       writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
+
+       if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
+                       && vdrv && vdrv->config_changed) {
+               vdrv->config_changed(&vm_dev->vdev);
+               ret = IRQ_HANDLED;
+       }
+
+       if (likely(status & VIRTIO_MMIO_INT_VRING)) {
+               spin_lock_irqsave(&vm_dev->lock, flags);
+               list_for_each_entry(info, &vm_dev->virtqueues, node)
+                       ret |= vring_interrupt(irq, info->vq);
+               spin_unlock_irqrestore(&vm_dev->lock, flags);
+       }
+
+       return ret;
+}
+
+
+
+static void vm_del_vq(struct virtqueue *vq)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
+       struct virtio_mmio_vq_info *info = vq->priv;
+       unsigned long flags, size;
+
+       spin_lock_irqsave(&vm_dev->lock, flags);
+       list_del(&info->node);
+       spin_unlock_irqrestore(&vm_dev->lock, flags);
+
+       vring_del_virtqueue(vq);
+
+       /* Select and deactivate the queue */
+       writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
+       writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
+
+       size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
+       free_pages_exact(info->queue, size);
+       kfree(info);
+}
+
+static void vm_del_vqs(struct virtio_device *vdev)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+       struct virtqueue *vq, *n;
+
+       list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+               vm_del_vq(vq);
+
+       free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
+}
+
+
+
+static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
+                                 void (*callback)(struct virtqueue *vq),
+                                 const char *name)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+       struct virtio_mmio_vq_info *info;
+       struct virtqueue *vq;
+       unsigned long flags, size;
+       int err;
+
+       /* Select the queue we're interested in */
+       writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
+
+       /* Queue shouldn't already be set up. */
+       if (readl(vm_dev->base + VIRTIO_MMIO_QUEUE_PFN)) {
+               err = -ENOENT;
+               goto error_available;
+       }
+
+       /* Allocate and fill out our active queue description */
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               err = -ENOMEM;
+               goto error_kmalloc;
+       }
+       info->queue_index = index;
+
+       /* Allocate pages for the queue - start with a queue as big as
+        * possible (limited by maximum size allowed by device), drop down
+        * to a minimal size, just big enough to fit descriptor table
+        * and two rings (which makes it "alignment_size * 2")
+        */
+       info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
+       while (1) {
+               size = PAGE_ALIGN(vring_size(info->num,
+                               VIRTIO_MMIO_VRING_ALIGN));
+               /* Already smallest possible allocation? */
+               if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) {
+                       err = -ENOMEM;
+                       goto error_alloc_pages;
+               }
+
+               info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+               if (info->queue)
+                       break;
+
+               info->num /= 2;
+       }
+
+       /* Activate the queue */
+       writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
+       writel(VIRTIO_MMIO_VRING_ALIGN,
+                       vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
+       writel(virt_to_phys(info->queue) >> PAGE_SHIFT,
+                       vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
+
+       /* Create the vring */
+       vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
+                                vdev, info->queue, vm_notify, callback, name);
+       if (!vq) {
+               err = -ENOMEM;
+               goto error_new_virtqueue;
+       }
+
+       vq->priv = info;
+       info->vq = vq;
+
+       spin_lock_irqsave(&vm_dev->lock, flags);
+       list_add(&info->node, &vm_dev->virtqueues);
+       spin_unlock_irqrestore(&vm_dev->lock, flags);
+
+       return vq;
+
+error_new_virtqueue:
+       writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
+       free_pages_exact(info->queue, size);
+error_alloc_pages:
+       kfree(info);
+error_kmalloc:
+error_available:
+       return ERR_PTR(err);
+}
+
+static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+                      struct virtqueue *vqs[],
+                      vq_callback_t *callbacks[],
+                      const char *names[])
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+       unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
+       int i, err;
+
+       err = request_irq(irq, vm_interrupt, IRQF_SHARED,
+                       dev_name(&vdev->dev), vm_dev);
+       if (err)
+               return err;
+
+       for (i = 0; i < nvqs; ++i) {
+               vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
+               if (IS_ERR(vqs[i])) {
+                       vm_del_vqs(vdev);
+                       return PTR_ERR(vqs[i]);
+               }
+       }
+
+       return 0;
+}
+
+
+
+static struct virtio_config_ops virtio_mmio_config_ops = {
+       .get            = vm_get,
+       .set            = vm_set,
+       .get_status     = vm_get_status,
+       .set_status     = vm_set_status,
+       .reset          = vm_reset,
+       .find_vqs       = vm_find_vqs,
+       .del_vqs        = vm_del_vqs,
+       .get_features   = vm_get_features,
+       .finalize_features = vm_finalize_features,
+};
+
+
+
+/* Platform device */
+
+static int __devinit virtio_mmio_probe(struct platform_device *pdev)
+{
+       struct virtio_mmio_device *vm_dev;
+       struct resource *mem;
+       unsigned long magic;
+
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!mem)
+               return -EINVAL;
+
+       if (!devm_request_mem_region(&pdev->dev, mem->start,
+                       resource_size(mem), pdev->name))
+               return -EBUSY;
+
+       vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
+       if (!vm_dev)
+               return  -ENOMEM;
+
+       vm_dev->vdev.dev.parent = &pdev->dev;
+       vm_dev->vdev.config = &virtio_mmio_config_ops;
+       vm_dev->pdev = pdev;
+       INIT_LIST_HEAD(&vm_dev->virtqueues);
+       spin_lock_init(&vm_dev->lock);
+
+       vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+       if (vm_dev->base == NULL)
+               return -EFAULT;
+
+       /* Check magic value */
+       magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
+       if (memcmp(&magic, "virt", 4) != 0) {
+               dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
+               return -ENODEV;
+       }
+
+       /* Check device version */
+       vm_dev->version = readl(vm_dev->base + VIRTIO_MMIO_VERSION);
+       if (vm_dev->version != 1) {
+               dev_err(&pdev->dev, "Version %ld not supported!\n",
+                               vm_dev->version);
+               return -ENXIO;
+       }
+
+       vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID);
+       vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
+
+       writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
+
+       platform_set_drvdata(pdev, vm_dev);
+
+       return register_virtio_device(&vm_dev->vdev);
+}
+
+static int __devexit virtio_mmio_remove(struct platform_device *pdev)
+{
+       struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
+
+       unregister_virtio_device(&vm_dev->vdev);
+
+       return 0;
+}
+
+
+
+/* Platform driver */
+
+static struct of_device_id virtio_mmio_match[] = {
+       { .compatible = "virtio,mmio", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, virtio_mmio_match);
+
+static struct platform_driver virtio_mmio_driver = {
+       .probe          = virtio_mmio_probe,
+       .remove         = __devexit_p(virtio_mmio_remove),
+       .driver         = {
+               .name   = "virtio-mmio",
+               .owner  = THIS_MODULE,
+               .of_match_table = virtio_mmio_match,
+       },
+};
+
+static int __init virtio_mmio_init(void)
+{
+       return platform_driver_register(&virtio_mmio_driver);
+}
+
+static void __exit virtio_mmio_exit(void)
+{
+       platform_driver_unregister(&virtio_mmio_driver);
+}
+
+module_init(virtio_mmio_init);
+module_exit(virtio_mmio_exit);
+
+MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
+MODULE_DESCRIPTION("Platform bus driver for memory mapped virtio devices");
+MODULE_LICENSE("GPL");
index 4bcc8b82640be13af193478aaa2c4699bc609d2b..79a31e5b4b68143a43a370ee422453ac723b2af6 100644 (file)
@@ -415,9 +415,13 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
                }
        }
 
-       spin_lock_irqsave(&vp_dev->lock, flags);
-       list_add(&info->node, &vp_dev->virtqueues);
-       spin_unlock_irqrestore(&vp_dev->lock, flags);
+       if (callback) {
+               spin_lock_irqsave(&vp_dev->lock, flags);
+               list_add(&info->node, &vp_dev->virtqueues);
+               spin_unlock_irqrestore(&vp_dev->lock, flags);
+       } else {
+               INIT_LIST_HEAD(&info->node);
+       }
 
        return vq;
 
index 483d45180911f5729aa7ead5cd82558ccaf9c280..5754c9a4f58b49be5237ac50b70234779f0b07bd 100644 (file)
@@ -114,43 +114,7 @@ static struct bin_attribute w1_ds2760_bin_attr = {
        .read = w1_ds2760_read_bin,
 };
 
-static DEFINE_IDR(bat_idr);
-static DEFINE_MUTEX(bat_idr_lock);
-
-static int new_bat_id(void)
-{
-       int ret;
-
-       while (1) {
-               int id;
-
-               ret = idr_pre_get(&bat_idr, GFP_KERNEL);
-               if (ret == 0)
-                       return -ENOMEM;
-
-               mutex_lock(&bat_idr_lock);
-               ret = idr_get_new(&bat_idr, NULL, &id);
-               mutex_unlock(&bat_idr_lock);
-
-               if (ret == 0) {
-                       ret = id & MAX_ID_MASK;
-                       break;
-               } else if (ret == -EAGAIN) {
-                       continue;
-               } else {
-                       break;
-               }
-       }
-
-       return ret;
-}
-
-static void release_bat_id(int id)
-{
-       mutex_lock(&bat_idr_lock);
-       idr_remove(&bat_idr, id);
-       mutex_unlock(&bat_idr_lock);
-}
+static DEFINE_IDA(bat_ida);
 
 static int w1_ds2760_add_slave(struct w1_slave *sl)
 {
@@ -158,7 +122,7 @@ static int w1_ds2760_add_slave(struct w1_slave *sl)
        int id;
        struct platform_device *pdev;
 
-       id = new_bat_id();
+       id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL);
        if (id < 0) {
                ret = id;
                goto noid;
@@ -187,7 +151,7 @@ bin_attr_failed:
 pdev_add_failed:
        platform_device_unregister(pdev);
 pdev_alloc_failed:
-       release_bat_id(id);
+       ida_simple_remove(&bat_ida, id);
 noid:
 success:
        return ret;
@@ -199,7 +163,7 @@ static void w1_ds2760_remove_slave(struct w1_slave *sl)
        int id = pdev->id;
 
        platform_device_unregister(pdev);
-       release_bat_id(id);
+       ida_simple_remove(&bat_ida, id);
        sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2760_bin_attr);
 }
 
@@ -217,14 +181,14 @@ static int __init w1_ds2760_init(void)
 {
        printk(KERN_INFO "1-Wire driver for the DS2760 battery monitor "
               " chip  - (c) 2004-2005, Szabolcs Gyurko\n");
-       idr_init(&bat_idr);
+       ida_init(&bat_ida);
        return w1_register_family(&w1_ds2760_family);
 }
 
 static void __exit w1_ds2760_exit(void)
 {
        w1_unregister_family(&w1_ds2760_family);
-       idr_destroy(&bat_idr);
+       ida_destroy(&bat_ida);
 }
 
 EXPORT_SYMBOL(w1_ds2760_read);
index 274c8f38303f5748478c47309c6e255b5c69ead1..39f78c0b143cb495a15eff9d67f5e4b7eb6ce4be 100644 (file)
 #include "../w1_family.h"
 #include "w1_ds2780.h"
 
-int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
-                       int io)
+static int w1_ds2780_do_io(struct device *dev, char *buf, int addr,
+                       size_t count, int io)
 {
        struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
 
-       if (!dev)
-               return -ENODEV;
+       if (addr > DS2780_DATA_SIZE || addr < 0)
+               return 0;
 
-       mutex_lock(&sl->master->mutex);
-
-       if (addr > DS2780_DATA_SIZE || addr < 0) {
-               count = 0;
-               goto out;
-       }
        count = min_t(int, count, DS2780_DATA_SIZE - addr);
 
        if (w1_reset_select_slave(sl) == 0) {
@@ -47,7 +41,6 @@ int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
                        w1_write_8(sl->master, W1_DS2780_WRITE_DATA);
                        w1_write_8(sl->master, addr);
                        w1_write_block(sl->master, buf, count);
-                       /* XXX w1_write_block returns void, not n_written */
                } else {
                        w1_write_8(sl->master, W1_DS2780_READ_DATA);
                        w1_write_8(sl->master, addr);
@@ -55,13 +48,42 @@ int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
                }
        }
 
-out:
+       return count;
+}
+
+int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
+                       int io)
+{
+       struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
+       int ret;
+
+       if (!dev)
+               return -ENODEV;
+
+       mutex_lock(&sl->master->mutex);
+
+       ret = w1_ds2780_do_io(dev, buf, addr, count, io);
+
        mutex_unlock(&sl->master->mutex);
 
-       return count;
+       return ret;
 }
 EXPORT_SYMBOL(w1_ds2780_io);
 
+int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr, size_t count,
+                       int io)
+{
+       int ret;
+
+       if (!dev)
+               return -ENODEV;
+
+       ret = w1_ds2780_do_io(dev, buf, addr, count, io);
+
+       return ret;
+}
+EXPORT_SYMBOL(w1_ds2780_io_nolock);
+
 int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd)
 {
        struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
@@ -99,43 +121,7 @@ static struct bin_attribute w1_ds2780_bin_attr = {
        .read = w1_ds2780_read_bin,
 };
 
-static DEFINE_IDR(bat_idr);
-static DEFINE_MUTEX(bat_idr_lock);
-
-static int new_bat_id(void)
-{
-       int ret;
-
-       while (1) {
-               int id;
-
-               ret = idr_pre_get(&bat_idr, GFP_KERNEL);
-               if (ret == 0)
-                       return -ENOMEM;
-
-               mutex_lock(&bat_idr_lock);
-               ret = idr_get_new(&bat_idr, NULL, &id);
-               mutex_unlock(&bat_idr_lock);
-
-               if (ret == 0) {
-                       ret = id & MAX_ID_MASK;
-                       break;
-               } else if (ret == -EAGAIN) {
-                       continue;
-               } else {
-                       break;
-               }
-       }
-
-       return ret;
-}
-
-static void release_bat_id(int id)
-{
-       mutex_lock(&bat_idr_lock);
-       idr_remove(&bat_idr, id);
-       mutex_unlock(&bat_idr_lock);
-}
+static DEFINE_IDA(bat_ida);
 
 static int w1_ds2780_add_slave(struct w1_slave *sl)
 {
@@ -143,7 +129,7 @@ static int w1_ds2780_add_slave(struct w1_slave *sl)
        int id;
        struct platform_device *pdev;
 
-       id = new_bat_id();
+       id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL);
        if (id < 0) {
                ret = id;
                goto noid;
@@ -172,7 +158,7 @@ bin_attr_failed:
 pdev_add_failed:
        platform_device_unregister(pdev);
 pdev_alloc_failed:
-       release_bat_id(id);
+       ida_simple_remove(&bat_ida, id);
 noid:
        return ret;
 }
@@ -183,7 +169,7 @@ static void w1_ds2780_remove_slave(struct w1_slave *sl)
        int id = pdev->id;
 
        platform_device_unregister(pdev);
-       release_bat_id(id);
+       ida_simple_remove(&bat_ida, id);
        sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2780_bin_attr);
 }
 
@@ -199,14 +185,14 @@ static struct w1_family w1_ds2780_family = {
 
 static int __init w1_ds2780_init(void)
 {
-       idr_init(&bat_idr);
+       ida_init(&bat_ida);
        return w1_register_family(&w1_ds2780_family);
 }
 
 static void __exit w1_ds2780_exit(void)
 {
        w1_unregister_family(&w1_ds2780_family);
-       idr_destroy(&bat_idr);
+       ida_destroy(&bat_ida);
 }
 
 module_init(w1_ds2780_init);
index a1fba79eb1b54ea990dc9cf6923452e482a6e3e2..7373793650216cca9eed315cf9875c40863aa726 100644 (file)
 
 extern int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count,
                        int io);
+extern int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr,
+                       size_t count, int io);
 extern int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd);
 
 #endif /* !_W1_DS2780_H */
index d220bce2cee4d242532cb4d6b84114b0e45ea662..f79e62e54e8d25bf21b6bba2850ae9b4b9e6c184 100644 (file)
@@ -78,6 +78,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
        memcpy(&dev->dev, device, sizeof(struct device));
        dev_set_name(&dev->dev, "w1_bus_master%u", dev->id);
        snprintf(dev->name, sizeof(dev->name), "w1_bus_master%u", dev->id);
+       dev->dev.init_name = dev->name;
 
        dev->driver = driver;
 
index 765b37b62a4f608ceb062630ebae1afc6c5448fa..3135b2c63998c27b57cb8e1c56a563e8bb75751c 100644 (file)
@@ -158,13 +158,18 @@ EXPORT_SYMBOL_GPL(w1_write_8);
 static u8 w1_read_bit(struct w1_master *dev)
 {
        int result;
+       unsigned long flags;
 
+       /* sample timing is critical here */
+       local_irq_save(flags);
        dev->bus_master->write_bit(dev->bus_master->data, 0);
        w1_delay(6);
        dev->bus_master->write_bit(dev->bus_master->data, 1);
        w1_delay(9);
 
        result = dev->bus_master->read_bit(dev->bus_master->data);
+       local_irq_restore(flags);
+
        w1_delay(55);
 
        return result & 0x1;
index 86b0735e6aa0fd652ace4ba8546e0a49245590f2..64c6752ea2c66bfb57a5adf59ed20465eb43ad10 100644 (file)
@@ -726,7 +726,7 @@ config SBC8360_WDT
 
 config SBC7240_WDT
        tristate "SBC Nano 7240 Watchdog Timer"
-       depends on X86_32
+       depends on X86_32 && !UML
        ---help---
          This is the driver for the hardware watchdog found on the IEI
          single board computers EPIC Nano 7240 (and likely others). This
@@ -1174,6 +1174,10 @@ config XEN_WDT
          by Xen 4.0 and newer.  The watchdog timeout period is normally one
          minute but can be changed with a boot-time parameter.
 
+config UML_WATCHDOG
+       tristate "UML watchdog"
+       depends on UML
+
 #
 # ISA-based Watchdog Cards
 #
index b5a1076aaa6c1d83b070181d901a2e0022979452..879ed88517373792797f5b29cf110b50c8b7a4b2 100644 (file)
@@ -1138,7 +1138,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
        struct v9fs_session_info *v9ses = sb->s_fs_info;
        struct v9fs_inode *v9inode = V9FS_I(inode);
 
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
 
        inode->i_atime.tv_sec = stat->atime;
        inode->i_mtime.tv_sec = stat->mtime;
@@ -1164,7 +1164,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
                        /* HARDLINKCOUNT %u */
                        sscanf(ext, "%13s %u", tag_name, &i_nlink);
                        if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
-                               inode->i_nlink = i_nlink;
+                               set_nlink(inode, i_nlink);
                }
        }
        mode = stat->mode & S_IALLUGO;
index aded79fcd5cfdadc359929f3b39fad1f92ff2001..0b5745e219460fcda4d686349e3ba5ce69122318 100644 (file)
@@ -606,7 +606,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
                inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
                inode->i_uid = stat->st_uid;
                inode->i_gid = stat->st_gid;
-               inode->i_nlink = stat->st_nlink;
+               set_nlink(inode, stat->st_nlink);
 
                mode = stat->st_mode & S_IALLUGO;
                mode |= inode->i_mode & ~S_IALLUGO;
@@ -632,7 +632,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
                if (stat->st_result_mask & P9_STATS_GID)
                        inode->i_gid = stat->st_gid;
                if (stat->st_result_mask & P9_STATS_NLINK)
-                       inode->i_nlink = stat->st_nlink;
+                       set_nlink(inode, stat->st_nlink);
                if (stat->st_result_mask & P9_STATS_MODE) {
                        inode->i_mode = stat->st_mode;
                        if ((S_ISBLK(inode->i_mode)) ||
index d5250c5aae21e10c560180e1042234d3a4d91f25..1dab6a174d6a8c213d6b58bed1f1869ed6189cd4 100644 (file)
@@ -247,7 +247,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
        inode->i_gid     = ADFS_SB(sb)->s_gid;
        inode->i_ino     = obj->file_id;
        inode->i_size    = obj->size;
-       inode->i_nlink   = 2;
+       set_nlink(inode, 2);
        inode->i_blocks  = (inode->i_size + sb->s_blocksize - 1) >>
                            sb->s_blocksize_bits;
 
index 3a4557e8325c1cccad9b0498fb0dd0617ed1e589..de37ec842340c3943de5c7351a1e70cc7c5e4268 100644 (file)
@@ -215,7 +215,7 @@ affs_remove_link(struct dentry *dentry)
                                break;
                        default:
                                if (!AFFS_TAIL(sb, bh)->link_chain)
-                                       inode->i_nlink = 1;
+                                       set_nlink(inode, 1);
                        }
                        affs_free_block(sb, link_ino);
                        goto done;
@@ -316,7 +316,7 @@ affs_remove_header(struct dentry *dentry)
        if (inode->i_nlink > 1)
                retval = affs_remove_link(dentry);
        else
-               inode->i_nlink = 0;
+               clear_nlink(inode);
        affs_unlock_link(inode);
        inode->i_ctime = CURRENT_TIME_SEC;
        mark_inode_dirty(inode);
index 5d828903ac69ced919eaa3d6eb2429a49922d0e2..88a4b0b50058a85857de62c958504a52baa60a4d 100644 (file)
@@ -54,7 +54,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
        prot = be32_to_cpu(tail->protect);
 
        inode->i_size = 0;
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        inode->i_mode = 0;
        AFFS_I(inode)->i_extcnt = 1;
        AFFS_I(inode)->i_ext_last = ~1;
@@ -137,7 +137,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
                                               sbi->s_hashsize + 1;
                }
                if (tail->link_chain)
-                       inode->i_nlink = 2;
+                       set_nlink(inode, 2);
                inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
                inode->i_op = &affs_file_inode_operations;
                inode->i_fop = &affs_file_operations;
@@ -304,7 +304,7 @@ affs_new_inode(struct inode *dir)
        inode->i_uid     = current_fsuid();
        inode->i_gid     = current_fsgid();
        inode->i_ino     = block;
-       inode->i_nlink   = 1;
+       set_nlink(inode, 1);
        inode->i_mtime   = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
        atomic_set(&AFFS_I(inode)->i_opencnt, 0);
        AFFS_I(inode)->i_blkcnt = 0;
@@ -387,7 +387,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
                AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block);
                affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
                mark_buffer_dirty_inode(inode_bh, inode);
-               inode->i_nlink = 2;
+               set_nlink(inode, 2);
                ihold(inode);
        }
        affs_fix_checksum(sb, bh);
index e3e9efc1fdd8276b19eaccb02747afeb27172671..780a11dc6318b953a6160bd39cf2d631b7505cc8 100644 (file)
@@ -277,7 +277,7 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
        inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
        error = affs_add_entry(dir, inode, dentry, ST_FILE);
        if (error) {
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                iput(inode);
                return error;
        }
@@ -305,7 +305,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        error = affs_add_entry(dir, inode, dentry, ST_USERDIR);
        if (error) {
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                mark_inode_dirty(inode);
                iput(inode);
                return error;
@@ -392,7 +392,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
        return 0;
 
 err:
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        mark_inode_dirty(inode);
        iput(inode);
        return error;
index 346e3289abd70549987ce9f490e1d8b210b66bd1..2f213d109c21c143d6a7a49fb54f131884e7e8b9 100644 (file)
@@ -90,7 +90,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
                        vnode->vfs_inode.i_uid = status->owner;
                        vnode->vfs_inode.i_gid = status->group;
                        vnode->vfs_inode.i_generation = vnode->fid.unique;
-                       vnode->vfs_inode.i_nlink = status->nlink;
+                       set_nlink(&vnode->vfs_inode, status->nlink);
 
                        mode = vnode->vfs_inode.i_mode;
                        mode &= ~S_IALLUGO;
index 0fdab6e03d8781d60ea7968c5c9886b555178dcd..d890ae3b2ce6f487c1e5483cbaca8c398b37c35b 100644 (file)
@@ -67,7 +67,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
                fscache_attr_changed(vnode->cache);
 #endif
 
-       inode->i_nlink          = vnode->status.nlink;
+       set_nlink(inode, vnode->status.nlink);
        inode->i_uid            = vnode->status.owner;
        inode->i_gid            = 0;
        inode->i_size           = vnode->status.size;
@@ -174,7 +174,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
        inode->i_size           = 0;
        inode->i_mode           = S_IFDIR | S_IRUGO | S_IXUGO;
        inode->i_op             = &afs_autocell_inode_operations;
-       inode->i_nlink          = 2;
+       set_nlink(inode, 2);
        inode->i_uid            = 0;
        inode->i_gid            = 0;
        inode->i_ctime.tv_sec   = get_seconds();
index 632b235f4fbe02c237de17fbde271c8bfe6f1240..78c514cfd212d66b8e6311d4a25435be6493c26d 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -440,8 +440,6 @@ void exit_aio(struct mm_struct *mm)
 static struct kiocb *__aio_get_req(struct kioctx *ctx)
 {
        struct kiocb *req = NULL;
-       struct aio_ring *ring;
-       int okay = 0;
 
        req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
        if (unlikely(!req))
@@ -459,39 +457,114 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
        INIT_LIST_HEAD(&req->ki_run_list);
        req->ki_eventfd = NULL;
 
-       /* Check if the completion queue has enough free space to
-        * accept an event from this io.
-        */
+       return req;
+}
+
+/*
+ * struct kiocb's are allocated in batches to reduce the number of
+ * times the ctx lock is acquired and released.
+ */
+#define KIOCB_BATCH_SIZE       32L
+struct kiocb_batch {
+       struct list_head head;
+       long count; /* number of requests left to allocate */
+};
+
+static void kiocb_batch_init(struct kiocb_batch *batch, long total)
+{
+       INIT_LIST_HEAD(&batch->head);
+       batch->count = total;
+}
+
+static void kiocb_batch_free(struct kiocb_batch *batch)
+{
+       struct kiocb *req, *n;
+
+       list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
+               list_del(&req->ki_batch);
+               kmem_cache_free(kiocb_cachep, req);
+       }
+}
+
+/*
+ * Allocate a batch of kiocbs.  This avoids taking and dropping the
+ * context lock a lot during setup.
+ */
+static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
+{
+       unsigned short allocated, to_alloc;
+       long avail;
+       bool called_fput = false;
+       struct kiocb *req, *n;
+       struct aio_ring *ring;
+
+       to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
+       for (allocated = 0; allocated < to_alloc; allocated++) {
+               req = __aio_get_req(ctx);
+               if (!req)
+                       /* allocation failed, go with what we've got */
+                       break;
+               list_add(&req->ki_batch, &batch->head);
+       }
+
+       if (allocated == 0)
+               goto out;
+
+retry:
        spin_lock_irq(&ctx->ctx_lock);
-       ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
-       if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
+       ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
+
+       avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
+       BUG_ON(avail < 0);
+       if (avail == 0 && !called_fput) {
+               /*
+                * Handle a potential starvation case.  It is possible that
+                * we hold the last reference on a struct file, causing us
+                * to delay the final fput to non-irq context.  In this case,
+                * ctx->reqs_active is artificially high.  Calling the fput
+                * routine here may free up a slot in the event completion
+                * ring, allowing this allocation to succeed.
+                */
+               kunmap_atomic(ring);
+               spin_unlock_irq(&ctx->ctx_lock);
+               aio_fput_routine(NULL);
+               called_fput = true;
+               goto retry;
+       }
+
+       if (avail < allocated) {
+               /* Trim back the number of requests. */
+               list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
+                       list_del(&req->ki_batch);
+                       kmem_cache_free(kiocb_cachep, req);
+                       if (--allocated <= avail)
+                               break;
+               }
+       }
+
+       batch->count -= allocated;
+       list_for_each_entry(req, &batch->head, ki_batch) {
                list_add(&req->ki_list, &ctx->active_reqs);
                ctx->reqs_active++;
-               okay = 1;
        }
-       kunmap_atomic(ring, KM_USER0);
-       spin_unlock_irq(&ctx->ctx_lock);
 
-       if (!okay) {
-               kmem_cache_free(kiocb_cachep, req);
-               req = NULL;
-       }
+       kunmap_atomic(ring);
+       spin_unlock_irq(&ctx->ctx_lock);
 
-       return req;
+out:
+       return allocated;
 }
 
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct kiocb *aio_get_req(struct kioctx *ctx,
+                                       struct kiocb_batch *batch)
 {
        struct kiocb *req;
-       /* Handle a potential starvation case -- should be exceedingly rare as 
-        * requests will be stuck on fput_head only if the aio_fput_routine is 
-        * delayed and the requests were the last user of the struct file.
-        */
-       req = __aio_get_req(ctx);
-       if (unlikely(NULL == req)) {
-               aio_fput_routine(NULL);
-               req = __aio_get_req(ctx);
-       }
+
+       if (list_empty(&batch->head))
+               if (kiocb_batch_refill(ctx, batch) == 0)
+                       return NULL;
+       req = list_first_entry(&batch->head, struct kiocb, ki_batch);
+       list_del(&req->ki_batch);
        return req;
 }
 
@@ -1515,7 +1588,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
 }
 
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-                        struct iocb *iocb, bool compat)
+                        struct iocb *iocb, struct kiocb_batch *batch,
+                        bool compat)
 {
        struct kiocb *req;
        struct file *file;
@@ -1541,7 +1615,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        if (unlikely(!file))
                return -EBADF;
 
-       req = aio_get_req(ctx);         /* returns with 2 references to req */
+       req = aio_get_req(ctx, batch);  /* returns with 2 references to req */
        if (unlikely(!req)) {
                fput(file);
                return -EAGAIN;
@@ -1621,8 +1695,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
 {
        struct kioctx *ctx;
        long ret = 0;
-       int i;
+       int i = 0;
        struct blk_plug plug;
+       struct kiocb_batch batch;
 
        if (unlikely(nr < 0))
                return -EINVAL;
@@ -1639,6 +1714,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
                return -EINVAL;
        }
 
+       kiocb_batch_init(&batch, nr);
+
        blk_start_plug(&plug);
 
        /*
@@ -1659,12 +1736,13 @@ long do_io_submit(aio_context_t ctx_id, long nr,
                        break;
                }
 
-               ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+               ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
                if (ret)
                        break;
        }
        blk_finish_plug(&plug);
 
+       kiocb_batch_free(&batch);
        put_ioctx(ctx);
        return i ? i : ret;
 }
index 180fa2425e49310e9674a227c13297a1718f53fa..8179f1ab8175243905025444a5d58480f7098436 100644 (file)
@@ -342,7 +342,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
        inode->i_ino = get_next_ino();
 
        if (S_ISDIR(mode)) {
-               inode->i_nlink = 2;
+               set_nlink(inode, 2);
                inode->i_op = &autofs4_dir_inode_operations;
                inode->i_fop = &autofs4_dir_operations;
        } else if (S_ISLNK(mode)) {
index 720d885e8dcaa87b54f61f0fa8b027109528758b..8342ca67abcdcaf7c5992dc1335e5d12f92730e4 100644 (file)
@@ -357,7 +357,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
        inode->i_gid = befs_sb->mount_opts.use_gid ?
            befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid);
 
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
 
        /*
         * BEFS's time is 64 bits, but current VFS is 32 bits...
index b14cebfd90477ead4b15f0c1583da6393c247de5..9cc074019479e8a577651ba7e2fded8831f6d3e2 100644 (file)
@@ -199,7 +199,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
                printf("unlinking non-existent file %s:%lu (nlink=%d)\n",
                                        inode->i_sb->s_id, inode->i_ino,
                                        inode->i_nlink);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
        de->ino = 0;
        mark_buffer_dirty_inode(bh, dir);
index a8e37f81d097b0934760789836e4dbc0574fb014..697af5bf70b303d588cda27f31ed680854aebddc 100644 (file)
@@ -78,7 +78,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
        BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
        inode->i_uid =  le32_to_cpu(di->i_uid);
        inode->i_gid =  le32_to_cpu(di->i_gid);
-       inode->i_nlink =  le32_to_cpu(di->i_nlink);
+       set_nlink(inode, le32_to_cpu(di->i_nlink));
        inode->i_size = BFS_FILESIZE(di);
        inode->i_blocks = BFS_FILEBLOCKS(di);
        inode->i_atime.tv_sec =  le32_to_cpu(di->i_atime);
index dd0fdfc56d38c4753091ff44da049158048a8b6d..21ac5ee4b43f3e767b56aff2f54c6018175bc6af 100644 (file)
@@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
                         * might try to exec.  This is because the brk will
                         * follow the loader, and is not movable.  */
 #if defined(CONFIG_X86) || defined(CONFIG_ARM)
-                       load_bias = 0;
+                       /* Memory randomization might have been switched off
+                        * in runtime via sysctl.
+                        * If that is the case, retain the original non-zero
+                        * load_bias value in order to establish proper
+                        * non-randomized mappings.
+                        */
+                       if (current->flags & PF_RANDOMIZE)
+                               load_bias = 0;
+                       else
+                               load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 #else
                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 #endif
index ba1a1ae4a18a52c714ae82281e3c1f39fbfae02a..1e9edbdeda7ebcbe243b299d46859c4917f0e217 100644 (file)
@@ -521,7 +521,7 @@ static void kill_node(Node *e)
        write_unlock(&entries_lock);
 
        if (dentry) {
-               dentry->d_inode->i_nlink--;
+               drop_nlink(dentry->d_inode);
                d_drop(dentry);
                dput(dentry);
                simple_release_fs(&bm_mnt, &entry_count);
index b52c672f4c180beae28f60302071731bd343dcba..ae4d9cd10961d488dae63cc196187f95f6611ee9 100644 (file)
@@ -1641,7 +1641,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
        inode->i_gid = btrfs_stack_inode_gid(inode_item);
        btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
        inode->i_mode = btrfs_stack_inode_mode(inode_item);
-       inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
+       set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
        inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
        BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
        BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
index 07b3ac662e193e6b3a19180c3c3c9bfccbeb8028..07ea91879a91a35d99f2ca6e1cbffd52bdbcae82 100644 (file)
@@ -1705,7 +1705,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        sb->s_bdi = &fs_info->bdi;
 
        fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
-       fs_info->btree_inode->i_nlink = 1;
+       set_nlink(fs_info->btree_inode, 1);
        /*
         * we set the i_size on the btree inode to the max possible int.
         * the real end of the address space is determined by all of
index b2d004ad66a0159042c38f96691c92bb0c31537e..75686a61bd4550c5adad9a9f56091ed42217de6d 100644 (file)
@@ -2534,7 +2534,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
-       inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
+       set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
        inode->i_uid = btrfs_inode_uid(leaf, inode_item);
        inode->i_gid = btrfs_inode_gid(leaf, inode_item);
        btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
@@ -6728,7 +6728,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
        inode->i_op = &btrfs_dir_inode_operations;
        inode->i_fop = &btrfs_dir_file_operations;
 
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        btrfs_i_size_write(inode, 0);
 
        err = btrfs_update_inode(trans, new_root, inode);
index 786639fca067992f75f546e4fc68bc3f2ac2e4fb..0618aa39740b35cc7cb1620e5c1cf034c18a5400 100644 (file)
@@ -1030,7 +1030,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
        if (nlink != inode->i_nlink) {
-               inode->i_nlink = nlink;
+               set_nlink(inode, nlink);
                btrfs_update_inode(trans, root, inode);
        }
        BTRFS_I(inode)->index_cnt = (u64)-1;
index b8731bf3ef1f01c9e728788cc23a9aaecb76bbca..15b21e35078a2f5cb204ccc00d1959d47366c89b 100644 (file)
@@ -2363,7 +2363,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        }
 
        if ((issued & CEPH_CAP_LINK_EXCL) == 0)
-               inode->i_nlink = le32_to_cpu(grant->nlink);
+               set_nlink(inode, le32_to_cpu(grant->nlink));
 
        if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
                int len = le32_to_cpu(grant->xattr_len);
index 5dde7d51dc1141ed04d692dddb55de431178aee8..1616a0d37cbd785d9ade8579ba108e8e4174ff15 100644 (file)
@@ -618,7 +618,7 @@ static int fill_inode(struct inode *inode,
        }
 
        if ((issued & CEPH_CAP_LINK_EXCL) == 0)
-               inode->i_nlink = le32_to_cpu(info->nlink);
+               set_nlink(inode, le32_to_cpu(info->nlink));
 
        /* be careful with mtime, atime, size */
        ceph_decode_timespec(&atime, &info->atime);
index 2cfb695d1f89effdb7355d654cc1d2986321acd5..5d9b9acc5fcebd1b9c9eff1ab3458acc19fbc72b 100644 (file)
@@ -204,7 +204,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
 }
 
 /* first calculate 24 bytes ntlm response and then 16 byte session key */
-int setup_ntlm_response(struct cifs_ses *ses)
+int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp)
 {
        int rc = 0;
        unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
@@ -221,14 +221,14 @@ int setup_ntlm_response(struct cifs_ses *ses)
        ses->auth_key.len = temp_len;
 
        rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
-                       ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+                       ses->auth_key.response + CIFS_SESS_KEY_SIZE, nls_cp);
        if (rc) {
                cFYI(1, "%s Can't generate NTLM response, error: %d",
                        __func__, rc);
                return rc;
        }
 
-       rc = E_md4hash(ses->password, temp_key);
+       rc = E_md4hash(ses->password, temp_key, nls_cp);
        if (rc) {
                cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
                return rc;
@@ -404,7 +404,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
        }
 
        /* calculate md4 hash of password */
-       E_md4hash(ses->password, nt_hash);
+       E_md4hash(ses->password, nt_hash, nls_cp);
 
        rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
                                CIFS_NTHASH_SIZE);
index d9dbaf869cd13e8e761bedadcb9c6a02aa94e551..30ff56005d8ff16161910740b5e70741da480e84 100644 (file)
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "1.75"
+#define CIFS_VERSION   "1.76"
 #endif                         /* _CIFSFS_H */
index ef4f631e4c017550e1083897113de48a22be765c..6f4e243e0f624759251472d611353c8a2e32241e 100644 (file)
@@ -395,8 +395,9 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
                                 struct TCP_Server_Info *server,
                                __u32 expected_sequence_number);
-extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
-extern int setup_ntlm_response(struct cifs_ses *);
+extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
+                       const struct nls_table *);
+extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
 extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
 extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
 extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
@@ -448,7 +449,8 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
                const unsigned char *path,
                struct cifs_sb_info *cifs_sb, int xid);
 extern int mdfour(unsigned char *, unsigned char *, int);
-extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
+extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
+                       const struct nls_table *codepage);
 extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
                        unsigned char *p24);
 
index d545a95c30ed81d0b27e3cd52ab707897b2743e5..c0458c543f170f1ebca913f17c1116f0fd77620d 100644 (file)
@@ -3452,7 +3452,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
                else
 #endif /* CIFS_WEAK_PW_HASH */
                rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
-                                       bcc_ptr);
+                                       bcc_ptr, nls_codepage);
 
                bcc_ptr += CIFS_AUTH_RESP_SIZE;
                if (ses->capabilities & CAP_UNICODE) {
index ea096ce5d4f7e86b94df26ac319cca8769c38595..c1f063cd1b0cf2cd5d8d957a4a65f75b61a4c42a 100644 (file)
@@ -778,7 +778,6 @@ try_again:
                else {
                        mutex_lock(&cinode->lock_mutex);
                        list_del_init(&lock->blist);
-                       mutex_unlock(&cinode->lock_mutex);
                }
        }
 
@@ -794,6 +793,9 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
        struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
        unsigned char saved_type = flock->fl_type;
 
+       if ((flock->fl_flags & FL_POSIX) == 0)
+               return 1;
+
        mutex_lock(&cinode->lock_mutex);
        posix_test_lock(file, flock);
 
@@ -810,12 +812,15 @@ static int
 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 {
        struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
-       int rc;
+       int rc = 1;
+
+       if ((flock->fl_flags & FL_POSIX) == 0)
+               return rc;
 
        mutex_lock(&cinode->lock_mutex);
        if (!cinode->can_cache_brlcks) {
                mutex_unlock(&cinode->lock_mutex);
-               return 1;
+               return rc;
        }
        rc = posix_lock_file_wait(file, flock);
        mutex_unlock(&cinode->lock_mutex);
index 2c50bd2f65d1a2d614585a24e2aba989ba370ff8..e851d5b8931ec8535614ad948b741c52cf89f091 100644 (file)
@@ -132,7 +132,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
        inode->i_mtime = fattr->cf_mtime;
        inode->i_ctime = fattr->cf_ctime;
        inode->i_rdev = fattr->cf_rdev;
-       inode->i_nlink = fattr->cf_nlink;
+       set_nlink(inode, fattr->cf_nlink);
        inode->i_uid = fattr->cf_uid;
        inode->i_gid = fattr->cf_gid;
 
@@ -905,7 +905,7 @@ struct inode *cifs_root_iget(struct super_block *sb)
        if (rc && tcon->ipc) {
                cFYI(1, "ipc connection - fake read inode");
                inode->i_mode |= S_IFDIR;
-               inode->i_nlink = 2;
+               set_nlink(inode, 2);
                inode->i_op = &cifs_ipc_inode_ops;
                inode->i_fop = &simple_dir_operations;
                inode->i_uid = cifs_sb->mnt_uid;
@@ -1367,7 +1367,7 @@ mkdir_get_info:
                 /* setting nlink not necessary except in cases where we
                  * failed to get it from the server or was set bogus */
                if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
-                               direntry->d_inode->i_nlink = 2;
+                       set_nlink(direntry->d_inode, 2);
 
                mode &= ~current_umask();
                /* must turn on setgid bit if parent dir has it */
index 8693b5d0e180edaf11549d44a5fd9687aea3e077..6b0e064343910718c6fc1d58a5177940773b9cbf 100644 (file)
@@ -433,7 +433,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
        if (old_file->d_inode) {
                cifsInode = CIFS_I(old_file->d_inode);
                if (rc == 0) {
-                       old_file->d_inode->i_nlink++;
+                       inc_nlink(old_file->d_inode);
 /* BB should we make this contingent on superblock flag NOATIME? */
 /*                     old_file->d_inode->i_ctime = CURRENT_TIME;*/
                        /* parent dir timestamps will update from srv
index c7d80e24f24ea43eaafa7246a2658e0bb794b705..4ec3ee9d72ccc228b9fac929c23b80fc8f314fff 100644 (file)
@@ -683,7 +683,7 @@ ssetup_ntlmssp_authenticate:
                        cpu_to_le16(CIFS_AUTH_RESP_SIZE);
 
                /* calculate ntlm response and session key */
-               rc = setup_ntlm_response(ses);
+               rc = setup_ntlm_response(ses, nls_cp);
                if (rc) {
                        cERROR(1, "Error %d during NTLM authentication", rc);
                        goto ssetup_exit;
index ac1221d969d6d8faa33dffc39f54c5be4f45314a..7cacba12b8f114468ef56dab7d58fc5678d0c878 100644 (file)
@@ -199,75 +199,36 @@ SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
        return rc;
 }
 
-/* Routines for Windows NT MD4 Hash functions. */
-static int
-_my_wcslen(__u16 *str)
-{
-       int len = 0;
-       while (*str++ != 0)
-               len++;
-       return len;
-}
-
-/*
- * Convert a string into an NT UNICODE string.
- * Note that regardless of processor type
- * this must be in intel (little-endian)
- * format.
- */
-
-static int
-_my_mbstowcs(__u16 *dst, const unsigned char *src, int len)
-{      /* BB not a very good conversion routine - change/fix */
-       int i;
-       __u16 val;
-
-       for (i = 0; i < len; i++) {
-               val = *src;
-               SSVAL(dst, 0, val);
-               dst++;
-               src++;
-               if (val == 0)
-                       break;
-       }
-       return i;
-}
-
 /*
  * Creates the MD4 Hash of the users password in NT UNICODE.
  */
 
 int
-E_md4hash(const unsigned char *passwd, unsigned char *p16)
+E_md4hash(const unsigned char *passwd, unsigned char *p16,
+       const struct nls_table *codepage)
 {
        int rc;
        int len;
        __u16 wpwd[129];
 
        /* Password cannot be longer than 128 characters */
-       if (passwd) {
-               len = strlen((char *) passwd);
-               if (len > 128)
-                       len = 128;
-
-               /* Password must be converted to NT unicode */
-               _my_mbstowcs(wpwd, passwd, len);
-       } else
+       if (passwd) /* Password must be converted to NT unicode */
+               len = cifs_strtoUCS(wpwd, passwd, 128, codepage);
+       else {
                len = 0;
+               *wpwd = 0; /* Ensure string is null terminated */
+       }
 
-       wpwd[len] = 0;  /* Ensure string is null terminated */
-       /* Calculate length in bytes */
-       len = _my_wcslen(wpwd) * sizeof(__u16);
-
-       rc = mdfour(p16, (unsigned char *) wpwd, len);
-       memset(wpwd, 0, 129 * 2);
+       rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__u16));
+       memset(wpwd, 0, 129 * sizeof(__u16));
 
        return rc;
 }
 
 /* Does the NT MD4 hash then des encryption. */
 int
-SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
+SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24,
+               const struct nls_table *codepage)
 {
        int rc;
        unsigned char p16[16], p21[21];
@@ -275,7 +236,7 @@ SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
        memset(p16, '\0', 16);
        memset(p21, '\0', 21);
 
-       rc = E_md4hash(passwd, p16);
+       rc = E_md4hash(passwd, p16, codepage);
        if (rc) {
                cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
                return rc;
index 2bdbcc11b373abf28505c30f472bd9d77ec11428..854ace71268579b9a48e0b0001f1ebfe09c094ea 100644 (file)
@@ -104,7 +104,7 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
         if (attr->va_gid != -1)
                inode->i_gid = (gid_t) attr->va_gid;
        if (attr->va_nlink != -1)
-               inode->i_nlink = attr->va_nlink;
+               set_nlink(inode, attr->va_nlink);
        if (attr->va_size != -1)
                inode->i_size = attr->va_size;
        if (attr->va_size != -1)
index 0239433f50cbe25749896439fc124131afcdf164..28e7e135cfabf9bf2bf9e613ca5904c4eeceba3c 100644 (file)
@@ -340,7 +340,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
        if (!error) {
                /* VFS may delete the child */
                if (de->d_inode)
-                   de->d_inode->i_nlink = 0;
+                       clear_nlink(de->d_inode);
 
                /* fix the link count of the parent */
                coda_dir_drop_nlink(dir);
index a88948b8bd17d8b16c42af20ee55bcf47f358923..274f13e2f0946ed66ac186833efa1898b84db3fa 100644 (file)
@@ -225,7 +225,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 }
 
 /*
- * dentry_lru_(add|del|move_tail) must be called with d_lock held.
+ * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
  */
 static void dentry_lru_add(struct dentry *dentry)
 {
@@ -245,6 +245,9 @@ static void __dentry_lru_del(struct dentry *dentry)
        dentry_stat.nr_unused--;
 }
 
+/*
+ * Remove a dentry with references from the LRU.
+ */
 static void dentry_lru_del(struct dentry *dentry)
 {
        if (!list_empty(&dentry->d_lru)) {
@@ -254,6 +257,23 @@ static void dentry_lru_del(struct dentry *dentry)
        }
 }
 
+/*
+ * Remove a dentry that is unreferenced and about to be pruned
+ * (unhashed and destroyed) from the LRU, and inform the file system.
+ * This wrapper should be called _prior_ to unhashing a victim dentry.
+ */
+static void dentry_lru_prune(struct dentry *dentry)
+{
+       if (!list_empty(&dentry->d_lru)) {
+               if (dentry->d_flags & DCACHE_OP_PRUNE)
+                       dentry->d_op->d_prune(dentry);
+
+               spin_lock(&dcache_lru_lock);
+               __dentry_lru_del(dentry);
+               spin_unlock(&dcache_lru_lock);
+       }
+}
+
 static void dentry_lru_move_tail(struct dentry *dentry)
 {
        spin_lock(&dcache_lru_lock);
@@ -403,8 +423,12 @@ relock:
 
        if (ref)
                dentry->d_count--;
-       /* if dentry was on the d_lru list delete it from there */
-       dentry_lru_del(dentry);
+       /*
+        * if dentry was on the d_lru list delete it from there.
+        * inform the fs via d_prune that this dentry is about to be
+        * unhashed and destroyed.
+        */
+       dentry_lru_prune(dentry);
        /* if it was on the hash then remove it */
        __d_drop(dentry);
        return d_kill(dentry, parent);
@@ -854,8 +878,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                do {
                        struct inode *inode;
 
-                       /* detach from the system */
-                       dentry_lru_del(dentry);
+                       /*
+                        * remove the dentry from the lru, and inform
+                        * the fs that this dentry is about to be
+                        * unhashed and destroyed.
+                        */
+                       dentry_lru_prune(dentry);
                        __d_shrink(dentry);
 
                        if (dentry->d_count != 0) {
@@ -1283,6 +1311,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
                dentry->d_flags |= DCACHE_OP_REVALIDATE;
        if (op->d_delete)
                dentry->d_flags |= DCACHE_OP_DELETE;
+       if (op->d_prune)
+               dentry->d_flags |= DCACHE_OP_PRUNE;
 
 }
 EXPORT_SYMBOL(d_set_d_op);
index 2f27e578d4660e89c1215d7a9edc2dffa6439c89..d5d5297efe97bd4115b2d3e8f37575e6ae054f56 100644 (file)
@@ -307,7 +307,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
        inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
 
        s->s_root = d_alloc_root(inode);
        if (s->s_root)
@@ -549,7 +549,7 @@ void devpts_pty_kill(struct tty_struct *tty)
 
        dentry = d_find_alias(inode);
 
-       inode->i_nlink--;
+       drop_nlink(inode);
        d_delete(dentry);
        dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
        dput(dentry);           /* d_find_alias above */
index 11f8582d72186d679d2452d9ce71e85f277bdbf9..a36d327f15215e471628e2fda253e84ccd6100e9 100644 (file)
@@ -474,8 +474,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
                goto out_lock;
        fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
        fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-       old_dentry->d_inode->i_nlink =
-               ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
+       set_nlink(old_dentry->d_inode,
+                 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink);
        i_size_write(new_dentry->d_inode, file_size_save);
 out_lock:
        unlock_dir(lower_dir_dentry);
@@ -499,8 +499,8 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
                goto out_unlock;
        }
        fsstack_copy_attr_times(dir, lower_dir_inode);
-       dentry->d_inode->i_nlink =
-               ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink;
+       set_nlink(dentry->d_inode,
+                 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
        dentry->d_inode->i_ctime = dir->i_ctime;
        d_drop(dentry);
 out_unlock:
@@ -565,7 +565,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                goto out;
        fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
        fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-       dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
+       set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
 out:
        unlock_dir(lower_dir_dentry);
        if (!dentry->d_inode)
@@ -588,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (!rc && dentry->d_inode)
                clear_nlink(dentry->d_inode);
        fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
+       set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
        unlock_dir(lower_dir_dentry);
        if (!rc)
                d_drop(dentry);
index 9c13412e6c99c78d11c3a89a42e007c82fb1b58b..bc84f365d75c3a04516c0f58e57039ca3db8bbb6 100644 (file)
@@ -96,7 +96,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
        efs_inode = (struct efs_dinode *) (bh->b_data + offset);
     
        inode->i_mode  = be16_to_cpu(efs_inode->di_mode);
-       inode->i_nlink = be16_to_cpu(efs_inode->di_nlink);
+       set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
        inode->i_uid   = (uid_t)be16_to_cpu(efs_inode->di_uid);
        inode->i_gid   = (gid_t)be16_to_cpu(efs_inode->di_gid);
        inode->i_size  = be32_to_cpu(efs_inode->di_size);
index 3e5f3a6be90a05026e54b3eb3c6f2bb1f7ca0813..f6dbf7768ce6c7a2e7bc243fc56bd2bf57e5886a 100644 (file)
@@ -1165,7 +1165,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
        inode->i_mode = le16_to_cpu(fcb.i_mode);
        inode->i_uid = le32_to_cpu(fcb.i_uid);
        inode->i_gid = le32_to_cpu(fcb.i_gid);
-       inode->i_nlink = le16_to_cpu(fcb.i_links_count);
+       set_nlink(inode, le16_to_cpu(fcb.i_links_count));
        inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
        inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
        inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
index 8f44cef1b3efacb47ecf191f3ae00ea2737d0ea6..a8cbe1bc6ad4e7e1285ab131bee081972a690711 100644 (file)
@@ -421,7 +421,7 @@ static inline int rsv_is_empty(struct ext2_reserve_window *rsv)
 void ext2_init_block_alloc_info(struct inode *inode)
 {
        struct ext2_inode_info *ei = EXT2_I(inode);
-       struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info;
+       struct ext2_block_alloc_info *block_i;
        struct super_block *sb = inode->i_sb;
 
        block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
index ee9ed31948e1229d123cbea2fcea5e45ff42aa3f..c4e81dfb74ba4f9a3604655d52a96f5832f75e6d 100644 (file)
@@ -601,7 +601,7 @@ fail_free_drop:
 fail_drop:
        dquot_drop(inode);
        inode->i_flags |= S_NOQUOTA;
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        unlock_new_inode(inode);
        iput(inode);
        return ERR_PTR(err);
index a8a58f63f07caa70cb1cf92cc3531aa350945800..91a6945af6d848d932a4106025da1b396ed43615 100644 (file)
@@ -1321,7 +1321,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
                inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
-       inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+       set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
        inode->i_size = le32_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
        inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
index 1dd62ed35b8511efece6cd6733eb960185f9ec28..bd8ac164a3bf3db5ff8af9e6800317e683b138c2 100644 (file)
@@ -327,10 +327,10 @@ static struct inode *ext2_nfs_get_inode(struct super_block *sb,
        if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
                return ERR_PTR(-ESTALE);
 
-       /* iget isn't really right if the inode is currently unallocated!!
-        * ext2_read_inode currently does appropriate checks, but
-        * it might be "neater" to call ext2_get_inode first and check
-        * if the inode is valid.....
+       /*
+        * ext2_iget isn't quite right if the inode is currently unallocated!
+        * However ext2_iget currently does appropriate checks to handle stale
+        * inodes so everything is OK.
         */
        inode = ext2_iget(sb, ino);
        if (IS_ERR(inode))
index 6386d76f44a7d1d9939508e1e8e8be66b394033b..a2038928f9a34b060abe6534958a830f85bd372f 100644 (file)
@@ -427,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
 void ext3_init_block_alloc_info(struct inode *inode)
 {
        struct ext3_inode_info *ei = EXT3_I(inode);
-       struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+       struct ext3_block_alloc_info *block_i;
        struct super_block *sb = inode->i_sb;
 
        block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -1440,14 +1440,14 @@ out:
  *
  * Check if filesystem has at least 1 free block available for allocation.
  */
-static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
 {
        ext3_fsblk_t free_blocks, root_blocks;
 
        free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
        root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
        if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
-               sbi->s_resuid != current_fsuid() &&
+               !use_reservation && sbi->s_resuid != current_fsuid() &&
                (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
                return 0;
        }
@@ -1468,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
  */
 int ext3_should_retry_alloc(struct super_block *sb, int *retries)
 {
-       if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+       if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
                return 0;
 
        jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1546,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
        if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
                my_rsv = &block_i->rsv_window_node;
 
-       if (!ext3_has_free_blocks(sbi)) {
+       if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
                *errp = -ENOSPC;
                goto out;
        }
@@ -1924,9 +1924,10 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
  * reaches any used block. Then issue a TRIM command on this extent and free
  * the extent in the block bitmap. This is done until whole group is scanned.
  */
-ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
-                               ext3_grpblk_t start, ext3_grpblk_t max,
-                               ext3_grpblk_t minblocks)
+static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
+                                       unsigned int group,
+                                       ext3_grpblk_t start, ext3_grpblk_t max,
+                                       ext3_grpblk_t minblocks)
 {
        handle_t *handle;
        ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
index d494c554c6e69e436e4dbccee6c9e5e3d6ab8060..1860ed3563235b8e7cfefbf57b98ed5d60575b7d 100644 (file)
@@ -61,13 +61,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                goto out;
 
-       /*
-        * Taking the mutex here just to keep consistent with how fsync was
-        * called previously, however it looks like we don't need to take
-        * i_mutex at all.
-        */
-       mutex_lock(&inode->i_mutex);
-
        J_ASSERT(ext3_journal_current_handle() == NULL);
 
        /*
@@ -85,7 +78,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         *  safe in-journal, which is all fsync() needs to ensure.
         */
        if (ext3_should_journal_data(inode)) {
-               mutex_unlock(&inode->i_mutex);
                ret = ext3_force_commit(inode->i_sb);
                goto out;
        }
@@ -108,8 +100,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        if (needs_barrier)
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
-
-       mutex_unlock(&inode->i_mutex);
 out:
        trace_ext3_sync_file_exit(inode, ret);
        return ret;
index bf09cbf938cc155c3328b6cc6d6a8a6f1bed637c..5c866e06e7ab94cbc12406be7cf6f37e39fdddae 100644 (file)
@@ -177,42 +177,6 @@ error_return:
        ext3_std_error(sb, fatal);
 }
 
-/*
- * There are two policies for allocating an inode.  If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory\'s block
- * group to find a free inode.
- */
-static int find_group_dir(struct super_block *sb, struct inode *parent)
-{
-       int ngroups = EXT3_SB(sb)->s_groups_count;
-       unsigned int freei, avefreei;
-       struct ext3_group_desc *desc, *best_desc = NULL;
-       int group, best_group = -1;
-
-       freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
-       avefreei = freei / ngroups;
-
-       for (group = 0; group < ngroups; group++) {
-               desc = ext3_get_group_desc (sb, group, NULL);
-               if (!desc || !desc->bg_free_inodes_count)
-                       continue;
-               if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
-                       continue;
-               if (!best_desc ||
-                   (le16_to_cpu(desc->bg_free_blocks_count) >
-                    le16_to_cpu(best_desc->bg_free_blocks_count))) {
-                       best_group = group;
-                       best_desc = desc;
-               }
-       }
-       return best_group;
-}
-
 /*
  * Orlov's allocator for directories.
  *
@@ -436,12 +400,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
 
        sbi = EXT3_SB(sb);
        es = sbi->s_es;
-       if (S_ISDIR(mode)) {
-               if (test_opt (sb, OLDALLOC))
-                       group = find_group_dir(sb, dir);
-               else
-                       group = find_group_orlov(sb, dir);
-       } else
+       if (S_ISDIR(mode))
+               group = find_group_orlov(sb, dir);
+       else
                group = find_group_other(sb, dir);
 
        err = -ENOSPC;
@@ -621,7 +582,7 @@ fail_free_drop:
 fail_drop:
        dquot_drop(inode);
        inode->i_flags |= S_NOQUOTA;
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        unlock_new_inode(inode);
        iput(inode);
        brelse(bitmap_bh);
index 12661e1deedd03a85e643dd6ed64946051e1a0a4..85fe655fe3e0b4de006a32df3c6dee615082bc73 100644 (file)
@@ -2899,7 +2899,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
                inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
-       inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+       set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
        inode->i_size = le32_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
        inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
index c7f43944f160e080973ce449a6c97a4bc2a4d2bc..ba1b54e23cae14fd08b630db914b8f639908406d 100644 (file)
@@ -150,30 +150,6 @@ setversion_out:
                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
-#ifdef CONFIG_JBD_DEBUG
-       case EXT3_IOC_WAIT_FOR_READONLY:
-               /*
-                * This is racy - by the time we're woken up and running,
-                * the superblock could be released.  And the module could
-                * have been unloaded.  So sue me.
-                *
-                * Returns 1 if it slept, else zero.
-                */
-               {
-                       struct super_block *sb = inode->i_sb;
-                       DECLARE_WAITQUEUE(wait, current);
-                       int ret = 0;
-
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
-                       if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
-                               schedule();
-                               ret = 1;
-                       }
-                       remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
-                       return ret;
-               }
-#endif
        case EXT3_IOC_GETRSVSZ:
                if (test_opt(inode->i_sb, RESERVATION)
                        && S_ISREG(inode->i_mode)
index 0629e09f65115798575e2fb7ea2a293aa41e0a39..642dc6d66dfd0a4272c3b8c983d3155428fb821a 100644 (file)
@@ -1821,7 +1821,7 @@ retry:
        de->name_len = 2;
        strcpy (de->name, "..");
        ext3_set_de_type(dir->i_sb, de, S_IFDIR);
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
        err = ext3_journal_dirty_metadata(handle, dir_block);
        if (err)
@@ -1833,7 +1833,7 @@ retry:
 
        if (err) {
 out_clear_inode:
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                unlock_new_inode(inode);
                ext3_mark_inode_dirty(handle, inode);
                iput (inode);
@@ -2170,7 +2170,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
                ext3_warning (inode->i_sb, "ext3_unlink",
                              "Deleting nonexistent file (%lu), %d",
                              inode->i_ino, inode->i_nlink);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
        retval = ext3_delete_entry(handle, dir, de, bh);
        if (retval)
index 7beb69ae0015996140fe916712f4b1e13860faa0..922d289aeeb315ce4a5c1131371ea90f266c0df4 100644 (file)
@@ -652,8 +652,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_puts(seq, ",nouid32");
        if (test_opt(sb, DEBUG))
                seq_puts(seq, ",debug");
-       if (test_opt(sb, OLDALLOC))
-               seq_puts(seq, ",oldalloc");
 #ifdef CONFIG_EXT3_FS_XATTR
        if (test_opt(sb, XATTR_USER))
                seq_puts(seq, ",user_xattr");
@@ -1049,10 +1047,12 @@ static int parse_options (char *options, struct super_block *sb,
                        set_opt (sbi->s_mount_opt, DEBUG);
                        break;
                case Opt_oldalloc:
-                       set_opt (sbi->s_mount_opt, OLDALLOC);
+                       ext3_msg(sb, KERN_WARNING,
+                               "Ignoring deprecated oldalloc option");
                        break;
                case Opt_orlov:
-                       clear_opt (sbi->s_mount_opt, OLDALLOC);
+                       ext3_msg(sb, KERN_WARNING,
+                               "Ignoring deprecated orlov option");
                        break;
 #ifdef CONFIG_EXT3_FS_XATTR
                case Opt_user_xattr:
@@ -2669,13 +2669,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
                        /*
                         * If we have an unprocessed orphan list hanging
                         * around from a previously readonly bdev mount,
-                        * require a full umount/remount for now.
+                        * require a full umount & mount for now.
                         */
                        if (es->s_last_orphan) {
                                ext3_msg(sb, KERN_WARNING, "warning: couldn't "
                                       "remount RDWR because of unprocessed "
                                       "orphan inode list.  Please "
-                                      "umount/remount instead.");
+                                      "umount & mount instead.");
                                err = -EINVAL;
                                goto restore_opts;
                        }
index f8224adf496ed91def8db62a83ebe6fb85d890ee..f6dba4505f1cc7e4f251f28a18bbeea5b3dcb2a7 100644 (file)
@@ -28,7 +28,8 @@
  */
 
 /*
- * Calculate the block group number and offset, given a block number
+ * Calculate the block group number and offset into the block/cluster
+ * allocation bitmap, given a block number
  */
 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
                ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
@@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
        ext4_grpblk_t offset;
 
        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
-       offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
+       offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
+               EXT4_SB(sb)->s_cluster_bits;
        if (offsetp)
                *offsetp = offset;
        if (blockgrpp)
@@ -55,130 +57,169 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
        return 0;
 }
 
-static int ext4_group_used_meta_blocks(struct super_block *sb,
-                                      ext4_group_t block_group,
-                                      struct ext4_group_desc *gdp)
+/* Return the number of clusters used for file system metadata; this
+ * represents the overhead needed by the file system.
+ */
+unsigned ext4_num_overhead_clusters(struct super_block *sb,
+                                   ext4_group_t block_group,
+                                   struct ext4_group_desc *gdp)
 {
-       ext4_fsblk_t tmp;
+       unsigned num_clusters;
+       int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
+       ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
+       ext4_fsblk_t itbl_blk;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       /* block bitmap, inode bitmap, and inode table blocks */
-       int used_blocks = sbi->s_itb_per_group + 2;
 
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
-               if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
-                                       block_group))
-                       used_blocks--;
-
-               if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
-                                       block_group))
-                       used_blocks--;
-
-               tmp = ext4_inode_table(sb, gdp);
-               for (; tmp < ext4_inode_table(sb, gdp) +
-                               sbi->s_itb_per_group; tmp++) {
-                       if (!ext4_block_in_group(sb, tmp, block_group))
-                               used_blocks -= 1;
+       /* This is the number of clusters used by the superblock,
+        * block group descriptors, and reserved block group
+        * descriptor blocks */
+       num_clusters = ext4_num_base_meta_clusters(sb, block_group);
+
+       /*
+        * For the allocation bitmaps and inode table, we first need
+        * to check to see if the block is in the block group.  If it
+        * is, then check to see if the cluster is already accounted
+        * for in the clusters used for the base metadata cluster, or
+        * if we can increment the base metadata cluster to include
+        * that block.  Otherwise, we will have to track the cluster
+        * used for the allocation bitmap or inode table explicitly.
+        * Normally all of these blocks are contiguous, so the special
+        * case handling shouldn't be necessary except for *very*
+        * unusual file system layouts.
+        */
+       if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
+               block_cluster = EXT4_B2C(sbi, (start -
+                                              ext4_block_bitmap(sb, gdp)));
+               if (block_cluster < num_clusters)
+                       block_cluster = -1;
+               else if (block_cluster == num_clusters) {
+                       num_clusters++;
+                       block_cluster = -1;
                }
        }
-       return used_blocks;
-}
 
-/* Initializes an uninitialized block bitmap if given, and returns the
- * number of blocks free in the group. */
-unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
-                ext4_group_t block_group, struct ext4_group_desc *gdp)
-{
-       int bit, bit_max;
-       ext4_group_t ngroups = ext4_get_groups_count(sb);
-       unsigned free_blocks, group_blocks;
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-       if (bh) {
-               J_ASSERT_BH(bh, buffer_locked(bh));
-
-               /* If checksum is bad mark all blocks used to prevent allocation
-                * essentially implementing a per-group read-only flag. */
-               if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-                       ext4_error(sb, "Checksum bad for group %u",
-                                       block_group);
-                       ext4_free_blks_set(sb, gdp, 0);
-                       ext4_free_inodes_set(sb, gdp, 0);
-                       ext4_itable_unused_set(sb, gdp, 0);
-                       memset(bh->b_data, 0xff, sb->s_blocksize);
-                       return 0;
+       if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
+               inode_cluster = EXT4_B2C(sbi,
+                                        start - ext4_inode_bitmap(sb, gdp));
+               if (inode_cluster < num_clusters)
+                       inode_cluster = -1;
+               else if (inode_cluster == num_clusters) {
+                       num_clusters++;
+                       inode_cluster = -1;
                }
-               memset(bh->b_data, 0, sb->s_blocksize);
        }
 
-       /* Check for superblock and gdt backups in this group */
-       bit_max = ext4_bg_has_super(sb, block_group);
-
-       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
-           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
-                         sbi->s_desc_per_block) {
-               if (bit_max) {
-                       bit_max += ext4_bg_num_gdb(sb, block_group);
-                       bit_max +=
-                               le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+       itbl_blk = ext4_inode_table(sb, gdp);
+       for (i = 0; i < sbi->s_itb_per_group; i++) {
+               if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
+                       c = EXT4_B2C(sbi, start - itbl_blk + i);
+                       if ((c < num_clusters) || (c == inode_cluster) ||
+                           (c == block_cluster) || (c == itbl_cluster))
+                               continue;
+                       if (c == num_clusters) {
+                               num_clusters++;
+                               continue;
+                       }
+                       num_clusters++;
+                       itbl_cluster = c;
                }
-       } else { /* For META_BG_BLOCK_GROUPS */
-               bit_max += ext4_bg_num_gdb(sb, block_group);
        }
 
-       if (block_group == ngroups - 1) {
+       if (block_cluster != -1)
+               num_clusters++;
+       if (inode_cluster != -1)
+               num_clusters++;
+
+       return num_clusters;
+}
+
+static unsigned int num_clusters_in_group(struct super_block *sb,
+                                         ext4_group_t block_group)
+{
+       unsigned int blocks;
+
+       if (block_group == ext4_get_groups_count(sb) - 1) {
                /*
-                * Even though mke2fs always initialize first and last group
-                * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
-                * to make sure we calculate the right free blocks
+                * Even though mke2fs always initializes the first and
+                * last group, just in case some other tool was used,
+                * we need to make sure we calculate the right free
+                * blocks.
                 */
-               group_blocks = ext4_blocks_count(sbi->s_es) -
-                       ext4_group_first_block_no(sb, ngroups - 1);
-       } else {
-               group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
-       }
+               blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
+                       ext4_group_first_block_no(sb, block_group);
+       } else
+               blocks = EXT4_BLOCKS_PER_GROUP(sb);
+       return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
+}
 
-       free_blocks = group_blocks - bit_max;
+/* Initializes an uninitialized block bitmap */
+void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+                           ext4_group_t block_group,
+                           struct ext4_group_desc *gdp)
+{
+       unsigned int bit, bit_max;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       ext4_fsblk_t start, tmp;
+       int flex_bg = 0;
+
+       J_ASSERT_BH(bh, buffer_locked(bh));
+
+       /* If checksum is bad mark all blocks used to prevent allocation
+        * essentially implementing a per-group read-only flag. */
+       if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+               ext4_error(sb, "Checksum bad for group %u", block_group);
+               ext4_free_group_clusters_set(sb, gdp, 0);
+               ext4_free_inodes_set(sb, gdp, 0);
+               ext4_itable_unused_set(sb, gdp, 0);
+               memset(bh->b_data, 0xff, sb->s_blocksize);
+               return;
+       }
+       memset(bh->b_data, 0, sb->s_blocksize);
 
-       if (bh) {
-               ext4_fsblk_t start, tmp;
-               int flex_bg = 0;
+       bit_max = ext4_num_base_meta_clusters(sb, block_group);
+       for (bit = 0; bit < bit_max; bit++)
+               ext4_set_bit(bit, bh->b_data);
 
-               for (bit = 0; bit < bit_max; bit++)
-                       ext4_set_bit(bit, bh->b_data);
+       start = ext4_group_first_block_no(sb, block_group);
 
-               start = ext4_group_first_block_no(sb, block_group);
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+               flex_bg = 1;
 
-               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-                                             EXT4_FEATURE_INCOMPAT_FLEX_BG))
-                       flex_bg = 1;
+       /* Set bits for block and inode bitmaps, and inode table */
+       tmp = ext4_block_bitmap(sb, gdp);
+       if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+               ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 
-               /* Set bits for block and inode bitmaps, and inode table */
-               tmp = ext4_block_bitmap(sb, gdp);
-               if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
-                       ext4_set_bit(tmp - start, bh->b_data);
+       tmp = ext4_inode_bitmap(sb, gdp);
+       if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+               ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 
-               tmp = ext4_inode_bitmap(sb, gdp);
+       tmp = ext4_inode_table(sb, gdp);
+       for (; tmp < ext4_inode_table(sb, gdp) +
+                    sbi->s_itb_per_group; tmp++) {
                if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
-                       ext4_set_bit(tmp - start, bh->b_data);
-
-               tmp = ext4_inode_table(sb, gdp);
-               for (; tmp < ext4_inode_table(sb, gdp) +
-                               sbi->s_itb_per_group; tmp++) {
-                       if (!flex_bg ||
-                               ext4_block_in_group(sb, tmp, block_group))
-                               ext4_set_bit(tmp - start, bh->b_data);
-               }
-               /*
-                * Also if the number of blocks within the group is
-                * less than the blocksize * 8 ( which is the size
-                * of bitmap ), set rest of the block bitmap to 1
-                */
-               ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
-                                    bh->b_data);
+                       ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
        }
-       return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
+
+       /*
+        * Also if the number of blocks within the group is less than
+        * the blocksize * 8 ( which is the size of bitmap ), set rest
+        * of the block bitmap to 1
+        */
+       ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
+                            sb->s_blocksize * 8, bh->b_data);
 }
 
+/* Return the number of free blocks in a block group.  It is used when
+ * the block bitmap is uninitialized, so we can't just count the bits
+ * in the bitmap. */
+unsigned ext4_free_clusters_after_init(struct super_block *sb,
+                                      ext4_group_t block_group,
+                                      struct ext4_group_desc *gdp)
+{
+       return num_clusters_in_group(sb, block_group) - 
+               ext4_num_overhead_clusters(sb, block_group, gdp);
+}
 
 /*
  * The free blocks are managed by bitmaps.  A file system contains several
@@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 }
 
 /**
- * ext4_has_free_blocks()
+ * ext4_has_free_clusters()
  * @sbi:       in-core super block structure.
- * @nblocks:   number of needed blocks
+ * @nclusters: number of needed blocks
+ * @flags:     flags from ext4_mb_new_blocks()
  *
- * Check if filesystem has nblocks free & available for allocation.
+ * Check if filesystem has nclusters free & available for allocation.
  * On success return 1, return 0 on failure.
  */
-static int ext4_has_free_blocks(struct ext4_sb_info *sbi,
-                               s64 nblocks, unsigned int flags)
+static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
+                                 s64 nclusters, unsigned int flags)
 {
-       s64 free_blocks, dirty_blocks, root_blocks;
-       struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
-       struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
-
-       free_blocks  = percpu_counter_read_positive(fbc);
-       dirty_blocks = percpu_counter_read_positive(dbc);
-       root_blocks = ext4_r_blocks_count(sbi->s_es);
-
-       if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
-                                               EXT4_FREEBLOCKS_WATERMARK) {
-               free_blocks  = percpu_counter_sum_positive(fbc);
-               dirty_blocks = percpu_counter_sum_positive(dbc);
+       s64 free_clusters, dirty_clusters, root_clusters;
+       struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
+       struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
+
+       free_clusters  = percpu_counter_read_positive(fcc);
+       dirty_clusters = percpu_counter_read_positive(dcc);
+       root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
+
+       if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
+                                       EXT4_FREECLUSTERS_WATERMARK) {
+               free_clusters  = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
+               dirty_clusters = percpu_counter_sum_positive(dcc);
        }
-       /* Check whether we have space after
-        * accounting for current dirty blocks & root reserved blocks.
+       /* Check whether we have space after accounting for current
+        * dirty clusters & root reserved clusters.
         */
-       if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
+       if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
                return 1;
 
-       /* Hm, nope.  Are (enough) root reserved blocks available? */
+       /* Hm, nope.  Are (enough) root reserved clusters available? */
        if (sbi->s_resuid == current_fsuid() ||
            ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
            capable(CAP_SYS_RESOURCE) ||
                (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
 
-               if (free_blocks >= (nblocks + dirty_blocks))
+               if (free_clusters >= (nclusters + dirty_clusters))
                        return 1;
        }
 
        return 0;
 }
 
-int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
-                          s64 nblocks, unsigned int flags)
+int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+                            s64 nclusters, unsigned int flags)
 {
-       if (ext4_has_free_blocks(sbi, nblocks, flags)) {
-               percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
+       if (ext4_has_free_clusters(sbi, nclusters, flags)) {
+               percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
                return 0;
        } else
                return -ENOSPC;
@@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
  */
 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
-       if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) ||
+       if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
            (*retries)++ > 3 ||
            !EXT4_SB(sb)->s_journal)
                return 0;
@@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
  * @handle:             handle to this transaction
  * @inode:              file inode
  * @goal:               given target block(filesystem wide)
- * @count:             pointer to total number of blocks needed
+ * @count:             pointer to total number of clusters needed
  * @errp:               error code
  *
  * Return 1st allocated block number on success, *count stores total account
@@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
                EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-               dquot_alloc_block_nofail(inode, ar.len);
+               dquot_alloc_block_nofail(inode,
+                               EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
        }
        return ret;
 }
 
 /**
- * ext4_count_free_blocks() -- count filesystem free blocks
+ * ext4_count_free_clusters() -- count filesystem free clusters
  * @sb:                superblock
  *
- * Adds up the number of free blocks from each block group.
+ * Adds up the number of free clusters from each block group.
  */
-ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
+ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
 {
        ext4_fsblk_t desc_count;
        struct ext4_group_desc *gdp;
@@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
                gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
                        continue;
-               desc_count += ext4_free_blks_count(sb, gdp);
+               desc_count += ext4_free_group_clusters(sb, gdp);
                brelse(bitmap_bh);
                bitmap_bh = ext4_read_block_bitmap(sb, i);
                if (bitmap_bh == NULL)
@@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 
                x = ext4_count_free(bitmap_bh, sb->s_blocksize);
                printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
-                       i, ext4_free_blks_count(sb, gdp), x);
+                       i, ext4_free_group_clusters(sb, gdp), x);
                bitmap_count += x;
        }
        brelse(bitmap_bh);
-       printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
-               ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
+       printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
+              ", computed = %llu, %llu\n",
+              EXT4_B2C(sbi, ext4_free_blocks_count(es)),
               desc_count, bitmap_count);
        return bitmap_count;
 #else
@@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
                gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
                        continue;
-               desc_count += ext4_free_blks_count(sb, gdp);
+               desc_count += ext4_free_group_clusters(sb, gdp);
        }
 
        return desc_count;
@@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
 
 }
 
+/*
+ * This function returns the number of file system metadata clusters at
+ * the beginning of a block group, including the reserved gdt blocks.
+ */
+unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+                                    ext4_group_t block_group)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       unsigned num;
+
+       /* Check for superblock and gdt backups in this group */
+       num = ext4_bg_has_super(sb, block_group);
+
+       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+                         sbi->s_desc_per_block) {
+               if (num) {
+                       num += ext4_bg_num_gdb(sb, block_group);
+                       num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+               }
+       } else { /* For META_BG_BLOCK_GROUPS */
+               num += ext4_bg_num_gdb(sb, block_group);
+       }
+       return EXT4_NUM_B2C(sbi, num);
+}
 /**
  *     ext4_inode_to_goal_block - return a hint for block allocation
  *     @inode: inode for block allocation
index cec3145e532ce7f1486ef6d20a46b5b267bc9ea3..5b0e26a1272dcc6c6573463767f8563ec683e6e3 100644 (file)
@@ -144,9 +144,17 @@ struct ext4_allocation_request {
 #define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
 #define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
 #define EXT4_MAP_UNINIT                (1 << BH_Uninit)
+/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
+ * ext4_map_blocks wants to know whether or not the underlying cluster has
+ * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
+ * the requested mapping was from previously mapped (or delayed allocated)
+ * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
+ * should never appear on buffer_head's state flags.
+ */
+#define EXT4_MAP_FROM_CLUSTER  (1 << BH_AllocFromCluster)
 #define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
                                 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-                                EXT4_MAP_UNINIT)
+                                EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
 
 struct ext4_map_blocks {
        ext4_fsblk_t m_pblk;
@@ -239,8 +247,11 @@ struct ext4_io_submit {
 # define EXT4_BLOCK_SIZE(s)            (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
 #endif
 #define        EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
+#define EXT4_CLUSTER_SIZE(s)           (EXT4_BLOCK_SIZE(s) << \
+                                        EXT4_SB(s)->s_cluster_bits)
 #ifdef __KERNEL__
 # define EXT4_BLOCK_SIZE_BITS(s)       ((s)->s_blocksize_bits)
+# define EXT4_CLUSTER_BITS(s)          (EXT4_SB(s)->s_cluster_bits)
 #else
 # define EXT4_BLOCK_SIZE_BITS(s)       ((s)->s_log_block_size + 10)
 #endif
@@ -258,6 +269,14 @@ struct ext4_io_submit {
 #endif
 #define EXT4_BLOCK_ALIGN(size, blkbits)                ALIGN((size), (1 << (blkbits)))
 
+/* Translate a block number to a cluster number */
+#define EXT4_B2C(sbi, blk)     ((blk) >> (sbi)->s_cluster_bits)
+/* Translate a cluster number to a block number */
+#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
+/* Translate # of blks to # of clusters */
+#define EXT4_NUM_B2C(sbi, blks)        (((blks) + (sbi)->s_cluster_ratio - 1) >> \
+                                (sbi)->s_cluster_bits)
+
 /*
  * Structure of a blocks group descriptor
  */
@@ -289,7 +308,7 @@ struct ext4_group_desc
 
 struct flex_groups {
        atomic_t free_inodes;
-       atomic_t free_blocks;
+       atomic_t free_clusters;
        atomic_t used_dirs;
 };
 
@@ -306,6 +325,7 @@ struct flex_groups {
 #define EXT4_DESC_SIZE(s)              (EXT4_SB(s)->s_desc_size)
 #ifdef __KERNEL__
 # define EXT4_BLOCKS_PER_GROUP(s)      (EXT4_SB(s)->s_blocks_per_group)
+# define EXT4_CLUSTERS_PER_GROUP(s)    (EXT4_SB(s)->s_clusters_per_group)
 # define EXT4_DESC_PER_BLOCK(s)                (EXT4_SB(s)->s_desc_per_block)
 # define EXT4_INODES_PER_GROUP(s)      (EXT4_SB(s)->s_inodes_per_group)
 # define EXT4_DESC_PER_BLOCK_BITS(s)   (EXT4_SB(s)->s_desc_per_block_bits)
@@ -358,8 +378,7 @@ struct flex_groups {
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
-                          EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
-                          EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+                          EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
                           EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
 
@@ -520,6 +539,8 @@ struct ext4_new_group_data {
 #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT          0x0020
        /* Don't normalize allocation size (used for fallocate) */
 #define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
+       /* Request will not result in inode size update (user for fallocate) */
+#define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
 
 /*
  * Flags used by ext4_free_blocks
@@ -528,6 +549,13 @@ struct ext4_new_group_data {
 #define EXT4_FREE_BLOCKS_FORGET                0x0002
 #define EXT4_FREE_BLOCKS_VALIDATED     0x0004
 #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE        0x0008
+#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER  0x0010
+#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER   0x0020
+
+/*
+ * Flags used by ext4_discard_partial_page_buffers
+ */
+#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED  0x0001
 
 /*
  * ioctl commands
@@ -538,9 +566,6 @@ struct ext4_new_group_data {
 #define        EXT4_IOC_SETVERSION             _IOW('f', 4, long)
 #define        EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
 #define        EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
-#ifdef CONFIG_JBD2_DEBUG
-#define EXT4_IOC_WAIT_FOR_READONLY     _IOR('f', 99, long)
-#endif
 #define EXT4_IOC_GETRSVSZ              _IOR('f', 5, long)
 #define EXT4_IOC_SETRSVSZ              _IOW('f', 6, long)
 #define EXT4_IOC_GROUP_EXTEND          _IOW('f', 7, unsigned long)
@@ -563,9 +588,6 @@ struct ext4_new_group_data {
 #define EXT4_IOC32_SETRSVSZ            _IOW('f', 6, int)
 #define EXT4_IOC32_GROUP_EXTEND                _IOW('f', 7, unsigned int)
 #define EXT4_IOC32_GROUP_ADD           _IOW('f', 8, struct compat_ext4_new_group_input)
-#ifdef CONFIG_JBD2_DEBUG
-#define EXT4_IOC32_WAIT_FOR_READONLY   _IOR('f', 99, int)
-#endif
 #define EXT4_IOC32_GETVERSION_OLD      FS_IOC32_GETVERSION
 #define EXT4_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
 #endif
@@ -837,6 +859,7 @@ struct ext4_inode_info {
        ext4_group_t    i_last_alloc_group;
 
        /* allocation reservation info for delalloc */
+       /* In case of bigalloc, these refer to clusters rather than blocks */
        unsigned int i_reserved_data_blocks;
        unsigned int i_reserved_meta_blocks;
        unsigned int i_allocated_meta_blocks;
@@ -886,7 +909,6 @@ struct ext4_inode_info {
 /*
  * Mount flags
  */
-#define EXT4_MOUNT_OLDALLOC            0x00002  /* Don't use the new Orlov allocator */
 #define EXT4_MOUNT_GRPID               0x00004 /* Create files with directory's group */
 #define EXT4_MOUNT_DEBUG               0x00008 /* Some debugging messages */
 #define EXT4_MOUNT_ERRORS_CONT         0x00010 /* Continue on errors */
@@ -918,6 +940,9 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_DISCARD             0x40000000 /* Issue DISCARD requests */
 #define EXT4_MOUNT_INIT_INODE_TABLE    0x80000000 /* Initialize uninitialized itables */
 
+#define EXT4_MOUNT2_EXPLICIT_DELALLOC  0x00000001 /* User explicitly
+                                                     specified delalloc */
+
 #define clear_opt(sb, opt)             EXT4_SB(sb)->s_mount_opt &= \
                                                ~EXT4_MOUNT_##opt
 #define set_opt(sb, opt)               EXT4_SB(sb)->s_mount_opt |= \
@@ -968,9 +993,9 @@ struct ext4_super_block {
 /*10*/ __le32  s_free_inodes_count;    /* Free inodes count */
        __le32  s_first_data_block;     /* First Data Block */
        __le32  s_log_block_size;       /* Block size */
-       __le32  s_obso_log_frag_size;   /* Obsoleted fragment size */
+       __le32  s_log_cluster_size;     /* Allocation cluster size */
 /*20*/ __le32  s_blocks_per_group;     /* # Blocks per group */
-       __le32  s_obso_frags_per_group; /* Obsoleted fragments per group */
+       __le32  s_clusters_per_group;   /* # Clusters per group */
        __le32  s_inodes_per_group;     /* # Inodes per group */
        __le32  s_mtime;                /* Mount time */
 /*30*/ __le32  s_wtime;                /* Write time */
@@ -1066,7 +1091,10 @@ struct ext4_super_block {
        __u8    s_last_error_func[32];  /* function where the error happened */
 #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
        __u8    s_mount_opts[64];
-       __le32  s_reserved[112];        /* Padding to the end of the block */
+       __le32  s_usr_quota_inum;       /* inode for tracking user quota */
+       __le32  s_grp_quota_inum;       /* inode for tracking group quota */
+       __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
+       __le32  s_reserved[109];        /* Padding to the end of the block */
 };
 
 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1086,6 +1114,7 @@ struct ext4_sb_info {
        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
        unsigned long s_inodes_per_block;/* Number of inodes per block */
        unsigned long s_blocks_per_group;/* Number of blocks in a group */
+       unsigned long s_clusters_per_group; /* Number of clusters in a group */
        unsigned long s_inodes_per_group;/* Number of inodes in a group */
        unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
        unsigned long s_gdb_count;      /* Number of group descriptor blocks */
@@ -1094,6 +1123,8 @@ struct ext4_sb_info {
        ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
        unsigned long s_overhead_last;  /* Last calculated overhead */
        unsigned long s_blocks_last;    /* Last seen block count */
+       unsigned int s_cluster_ratio;   /* Number of blocks per cluster */
+       unsigned int s_cluster_bits;    /* log2 of s_cluster_ratio */
        loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
        struct buffer_head * s_sbh;     /* Buffer containing the super block */
        struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
@@ -1117,10 +1148,10 @@ struct ext4_sb_info {
        u32 s_hash_seed[4];
        int s_def_hash_version;
        int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
-       struct percpu_counter s_freeblocks_counter;
+       struct percpu_counter s_freeclusters_counter;
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
-       struct percpu_counter s_dirtyblocks_counter;
+       struct percpu_counter s_dirtyclusters_counter;
        struct blockgroup_lock *s_blockgroup_lock;
        struct proc_dir_entry *s_proc;
        struct kobject s_kobj;
@@ -1136,10 +1167,6 @@ struct ext4_sb_info {
        u32 s_max_batch_time;
        u32 s_min_batch_time;
        struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
-#endif
 #ifdef CONFIG_QUOTA
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
@@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
                 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
 
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                             struct ext4_io_end *io_end)
+{
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+       }
+}
+
 /*
  * Inode dynamic state flags
  */
@@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK       0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE     0x0040
 #define EXT4_FEATURE_RO_COMPAT_QUOTA           0x0100
+#define EXT4_FEATURE_RO_COMPAT_BIGALLOC                0x0200
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION      0x0001
 #define EXT4_FEATURE_INCOMPAT_FILETYPE         0x0002
@@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
                                         EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
                                         EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
-                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
+                                        EXT4_FEATURE_RO_COMPAT_BIGALLOC)
 
 /*
  * Default values for user and/or group using reserved blocks
@@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                                         unsigned int flags,
                                         unsigned long *count,
                                         int *errp);
-extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
-                                 s64 nblocks, unsigned int flags);
-extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+                                   s64 nclusters, unsigned int flags);
+extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
 extern void ext4_check_blocks_bitmap(struct super_block *);
 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                    ext4_group_t block_group,
@@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
 struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
                                      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh,
-                                      ext4_group_t group,
-                                      struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)                   \
-               ext4_init_block_bitmap(sb, NULL, group, desc)
+extern void ext4_init_block_bitmap(struct super_block *sb,
+                                  struct buffer_head *bh,
+                                  ext4_group_t group,
+                                  struct ext4_group_desc *desc);
+extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
+                                             ext4_group_t block_group,
+                                             struct ext4_group_desc *gdp);
+extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+                                           ext4_group_t block_group);
+extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
+                                          ext4_group_t block_group,
+                                          struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
 /* dir.c */
@@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
 
 /* ialloc.c */
 extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-                                   const struct qstr *qstr, __u32 goal);
+                                   const struct qstr *qstr, __u32 goal,
+                                   uid_t *owner);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
 extern int ext4_block_zero_page_range(handle_t *handle,
                struct address_space *mapping, loff_t from, loff_t length);
+extern int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags);
+extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
                                      struct ext4_group_desc *bg);
 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                                     struct ext4_group_desc *bg);
-extern __u32 ext4_free_blks_count(struct super_block *sb,
-                               struct ext4_group_desc *bg);
+extern __u32 ext4_free_group_clusters(struct super_block *sb,
+                                     struct ext4_group_desc *bg);
 extern __u32 ext4_free_inodes_count(struct super_block *sb,
                                 struct ext4_group_desc *bg);
 extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
 extern void ext4_inode_table_set(struct super_block *sb,
                                 struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_free_blks_set(struct super_block *sb,
-                              struct ext4_group_desc *bg, __u32 count);
+extern void ext4_free_group_clusters_set(struct super_block *sb,
+                                        struct ext4_group_desc *bg,
+                                        __u32 count);
 extern void ext4_free_inodes_set(struct super_block *sb,
                                struct ext4_group_desc *bg, __u32 count);
 extern void ext4_used_dirs_set(struct super_block *sb,
@@ -2051,13 +2103,13 @@ do {                                                            \
 } while (0)
 
 #ifdef CONFIG_SMP
-/* Each CPU can accumulate percpu_counter_batch blocks in their local
- * counters. So we need to make sure we have free blocks more
+/* Each CPU can accumulate percpu_counter_batch clusters in their local
+ * counters. So we need to make sure we have free clusters more
  * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
  */
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
+#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
 #else
-#define EXT4_FREEBLOCKS_WATERMARK 0
+#define EXT4_FREECLUSTERS_WATERMARK 0
 #endif
 
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
 enum ext4_state_bits {
        BH_Uninit       /* blocks are allocated but uninitialized on disk */
          = BH_JBDPrivateStart,
+       BH_AllocFromCluster,    /* allocated blocks were part of already
+                                * allocated cluster. Note that this flag will
+                                * never, ever appear in a buffer_head's state
+                                * flag. See EXT4_MAP_FROM_CLUSTER to see where
+                                * this is used. */
+       BH_Da_Mapped,   /* Delayed allocated block that now has a mapping. This
+                        * flag is set when ext4_map_blocks is called on a
+                        * delayed allocated block to get its real mapping. */
 };
 
 BUFFER_FNS(Uninit, uninit)
 TAS_BUFFER_FNS(Uninit, uninit)
+BUFFER_FNS(Da_Mapped, da_mapped)
 
 /*
  * Add new method to test wether block and inode bitmaps are properly
@@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb);
 
 #endif /* __KERNEL__ */
 
+#include "ext4_extents.h"
+
 #endif /* _EXT4_H */
index 095c36f3b6129e97db937f30a969174c4facacf1..a52db3a69a30e88df3af4a5d56811985effe4dfd 100644 (file)
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
                                                        struct ext4_ext_path *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
+extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
+                                     int search_hint_reverse);
 #endif /* _EXT4_EXTENTS */
 
index f5240aa15601a6b34c40b339b0153e6f8bc6867d..aca17901758249d4329d780714e328ef42851e35 100644 (file)
@@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 
        if (ext4_handle_valid(handle)) {
                err = jbd2_journal_dirty_metadata(handle, bh);
-               if (err)
-                       ext4_journal_abort_handle(where, line, __func__,
-                                                 bh, handle, err);
+               if (err) {
+                       /* Errors can only happen if there is a bug */
+                       handle->h_err = err;
+                       __ext4_journal_stop(where, line, handle);
+               }
        } else {
                if (inode)
                        mark_buffer_dirty_inode(bh, inode);
index 57cf568a98ab652afcfe3581d093d5f2b5758689..61fa9e1614afd1922bae4cf5ce0d26dfdbab6b25 100644 (file)
@@ -42,7 +42,6 @@
 #include <asm/uaccess.h>
 #include <linux/fiemap.h>
 #include "ext4_jbd2.h"
-#include "ext4_extents.h"
 
 #include <trace/events/ext4.h>
 
@@ -96,13 +95,17 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
  *  - ENOMEM
  *  - EIO
  */
-static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
-                               struct ext4_ext_path *path)
+#define ext4_ext_dirty(handle, inode, path) \
+               __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
+static int __ext4_ext_dirty(const char *where, unsigned int line,
+                           handle_t *handle, struct inode *inode,
+                           struct ext4_ext_path *path)
 {
        int err;
        if (path->p_bh) {
                /* path points to block */
-               err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
+               err = __ext4_handle_dirty_metadata(where, line, handle,
+                                                  inode, path->p_bh);
        } else {
                /* path points to leaf/index in inode body */
                err = ext4_mark_inode_dirty(handle, inode);
@@ -114,11 +117,9 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
                              struct ext4_ext_path *path,
                              ext4_lblk_t block)
 {
-       int depth;
-
        if (path) {
+               int depth = path->p_depth;
                struct ext4_extent *ex;
-               depth = path->p_depth;
 
                /*
                 * Try to predict block placement assuming that we are
@@ -180,12 +181,10 @@ static inline int ext4_ext_space_block(struct inode *inode, int check)
 
        size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
                        / sizeof(struct ext4_extent);
-       if (!check) {
 #ifdef AGGRESSIVE_TEST
-               if (size > 6)
-                       size = 6;
+       if (!check && size > 6)
+               size = 6;
 #endif
-       }
        return size;
 }
 
@@ -195,12 +194,10 @@ static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
 
        size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
                        / sizeof(struct ext4_extent_idx);
-       if (!check) {
 #ifdef AGGRESSIVE_TEST
-               if (size > 5)
-                       size = 5;
+       if (!check && size > 5)
+               size = 5;
 #endif
-       }
        return size;
 }
 
@@ -211,12 +208,10 @@ static inline int ext4_ext_space_root(struct inode *inode, int check)
        size = sizeof(EXT4_I(inode)->i_data);
        size -= sizeof(struct ext4_extent_header);
        size /= sizeof(struct ext4_extent);
-       if (!check) {
 #ifdef AGGRESSIVE_TEST
-               if (size > 3)
-                       size = 3;
+       if (!check && size > 3)
+               size = 3;
 #endif
-       }
        return size;
 }
 
@@ -227,12 +222,10 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
        size = sizeof(EXT4_I(inode)->i_data);
        size -= sizeof(struct ext4_extent_header);
        size /= sizeof(struct ext4_extent_idx);
-       if (!check) {
 #ifdef AGGRESSIVE_TEST
-               if (size > 4)
-                       size = 4;
+       if (!check && size > 4)
+               size = 4;
 #endif
-       }
        return size;
 }
 
@@ -244,7 +237,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
 int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
-       int idxs, num = 0;
+       int idxs;
 
        idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
                / sizeof(struct ext4_extent_idx));
@@ -259,6 +252,8 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
         */
        if (ei->i_da_metadata_calc_len &&
            ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+               int num = 0;
+
                if ((ei->i_da_metadata_calc_len % idxs) == 0)
                        num++;
                if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
@@ -321,8 +316,6 @@ static int ext4_valid_extent_entries(struct inode *inode,
                                struct ext4_extent_header *eh,
                                int depth)
 {
-       struct ext4_extent *ext;
-       struct ext4_extent_idx *ext_idx;
        unsigned short entries;
        if (eh->eh_entries == 0)
                return 1;
@@ -331,7 +324,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
 
        if (depth == 0) {
                /* leaf entries */
-               ext = EXT_FIRST_EXTENT(eh);
+               struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
                while (entries) {
                        if (!ext4_valid_extent(inode, ext))
                                return 0;
@@ -339,7 +332,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
                        entries--;
                }
        } else {
-               ext_idx = EXT_FIRST_INDEX(eh);
+               struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
                while (entries) {
                        if (!ext4_valid_extent_idx(inode, ext_idx))
                                return 0;
@@ -751,31 +744,30 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
                return -EIO;
        }
 
-       len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
        if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
                /* insert after */
-               if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
-                       len = (len - 1) * sizeof(struct ext4_extent_idx);
-                       len = len < 0 ? 0 : len;
-                       ext_debug("insert new index %d after: %llu. "
-                                       "move %d from 0x%p to 0x%p\n",
-                                       logical, ptr, len,
-                                       (curp->p_idx + 1), (curp->p_idx + 2));
-                       memmove(curp->p_idx + 2, curp->p_idx + 1, len);
-               }
+               ext_debug("insert new index %d after: %llu\n", logical, ptr);
                ix = curp->p_idx + 1;
        } else {
                /* insert before */
-               len = len * sizeof(struct ext4_extent_idx);
-               len = len < 0 ? 0 : len;
-               ext_debug("insert new index %d before: %llu. "
-                               "move %d from 0x%p to 0x%p\n",
-                               logical, ptr, len,
-                               curp->p_idx, (curp->p_idx + 1));
-               memmove(curp->p_idx + 1, curp->p_idx, len);
+               ext_debug("insert new index %d before: %llu\n", logical, ptr);
                ix = curp->p_idx;
        }
 
+       len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
+       BUG_ON(len < 0);
+       if (len > 0) {
+               ext_debug("insert new index %d: "
+                               "move %d indices from 0x%p to 0x%p\n",
+                               logical, len, ix, ix + 1);
+               memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
+       }
+
+       if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
+               EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
+               return -EIO;
+       }
+
        ix->ei_block = cpu_to_le32(logical);
        ext4_idx_store_pblock(ix, ptr);
        le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -1042,16 +1034,14 @@ cleanup:
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
                                 unsigned int flags,
-                                struct ext4_ext_path *path,
                                 struct ext4_extent *newext)
 {
-       struct ext4_ext_path *curp = path;
        struct ext4_extent_header *neh;
        struct buffer_head *bh;
        ext4_fsblk_t newblock;
        int err = 0;
 
-       newblock = ext4_ext_new_meta_block(handle, inode, path,
+       newblock = ext4_ext_new_meta_block(handle, inode, NULL,
                newext, &err, flags);
        if (newblock == 0)
                return err;
@@ -1071,7 +1061,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        }
 
        /* move top-level index/leaf into new block */
-       memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
+       memmove(bh->b_data, EXT4_I(inode)->i_data,
+               sizeof(EXT4_I(inode)->i_data));
 
        /* set size of new block */
        neh = ext_block_hdr(bh);
@@ -1089,32 +1080,23 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        if (err)
                goto out;
 
-       /* create index in new top-level index: num,max,pointer */
-       err = ext4_ext_get_access(handle, inode, curp);
-       if (err)
-               goto out;
-
-       curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
-       curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
-       curp->p_hdr->eh_entries = cpu_to_le16(1);
-       curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
-
-       if (path[0].p_hdr->eh_depth)
-               curp->p_idx->ei_block =
-                       EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
-       else
-               curp->p_idx->ei_block =
-                       EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
-       ext4_idx_store_pblock(curp->p_idx, newblock);
-
+       /* Update top-level index: num,max,pointer */
        neh = ext_inode_hdr(inode);
+       neh->eh_entries = cpu_to_le16(1);
+       ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
+       if (neh->eh_depth == 0) {
+               /* Root extent block becomes index block */
+               neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
+               EXT_FIRST_INDEX(neh)->ei_block =
+                       EXT_FIRST_EXTENT(neh)->ee_block;
+       }
        ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
                  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
                  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
                  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
 
-       neh->eh_depth = cpu_to_le16(path->p_depth + 1);
-       err = ext4_ext_dirty(handle, inode, curp);
+       neh->eh_depth = cpu_to_le16(neh->eh_depth + 1);
+       ext4_mark_inode_dirty(handle, inode);
 out:
        brelse(bh);
 
@@ -1162,8 +1144,7 @@ repeat:
                        err = PTR_ERR(path);
        } else {
                /* tree is full, time to grow in depth */
-               err = ext4_ext_grow_indepth(handle, inode, flags,
-                                           path, newext);
+               err = ext4_ext_grow_indepth(handle, inode, flags, newext);
                if (err)
                        goto out;
 
@@ -1235,9 +1216,9 @@ static int ext4_ext_search_left(struct inode *inode,
                        if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
                                EXT4_ERROR_INODE(inode,
                                  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
-                                 ix != NULL ? ix->ei_block : 0,
+                                 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
                                  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
-                                   EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+               le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
                                  depth);
                                return -EIO;
                        }
@@ -1260,13 +1241,14 @@ static int ext4_ext_search_left(struct inode *inode,
 /*
  * search the closest allocated block to the right for *logical
  * and returns it at @logical + it's physical address at @phys
- * if *logical is the smallest allocated block, the function
+ * if *logical is the largest allocated block, the function
  * returns 0 at @phys
  * return value contains 0 (success) or error code
  */
 static int ext4_ext_search_right(struct inode *inode,
                                 struct ext4_ext_path *path,
-                                ext4_lblk_t *logical, ext4_fsblk_t *phys)
+                                ext4_lblk_t *logical, ext4_fsblk_t *phys,
+                                struct ext4_extent **ret_ex)
 {
        struct buffer_head *bh = NULL;
        struct ext4_extent_header *eh;
@@ -1308,9 +1290,7 @@ static int ext4_ext_search_right(struct inode *inode,
                                return -EIO;
                        }
                }
-               *logical = le32_to_cpu(ex->ee_block);
-               *phys = ext4_ext_pblock(ex);
-               return 0;
+               goto found_extent;
        }
 
        if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
@@ -1323,9 +1303,7 @@ static int ext4_ext_search_right(struct inode *inode,
        if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
                /* next allocated block in this leaf */
                ex++;
-               *logical = le32_to_cpu(ex->ee_block);
-               *phys = ext4_ext_pblock(ex);
-               return 0;
+               goto found_extent;
        }
 
        /* go up and search for index to the right */
@@ -1368,9 +1346,12 @@ got_index:
                return -EIO;
        }
        ex = EXT_FIRST_EXTENT(eh);
+found_extent:
        *logical = le32_to_cpu(ex->ee_block);
        *phys = ext4_ext_pblock(ex);
-       put_bh(bh);
+       *ret_ex = ex;
+       if (bh)
+               put_bh(bh);
        return 0;
 }
 
@@ -1395,7 +1376,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
        while (depth >= 0) {
                if (depth == path->p_depth) {
                        /* leaf */
-                       if (path[depth].p_ext !=
+                       if (path[depth].p_ext &&
+                               path[depth].p_ext !=
                                        EXT_LAST_EXTENT(path[depth].p_hdr))
                          return le32_to_cpu(path[depth].p_ext[1].ee_block);
                } else {
@@ -1623,7 +1605,8 @@ static int ext4_ext_try_to_merge(struct inode *inode,
  * such that there will be no overlap, and then returns 1.
  * If there is no overlap found, it returns 0.
  */
-static unsigned int ext4_ext_check_overlap(struct inode *inode,
+static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
+                                          struct inode *inode,
                                           struct ext4_extent *newext,
                                           struct ext4_ext_path *path)
 {
@@ -1637,6 +1620,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
        if (!path[depth].p_ext)
                goto out;
        b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+       b2 &= ~(sbi->s_cluster_ratio - 1);
 
        /*
         * get the next allocated block if the extent in the path
@@ -1646,6 +1630,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
                b2 = ext4_ext_next_allocated_block(path);
                if (b2 == EXT_MAX_BLOCKS)
                        goto out;
+               b2 &= ~(sbi->s_cluster_ratio - 1);
        }
 
        /* check for wrap through zero on extent logical start block*/
@@ -1697,7 +1682,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        /* try to insert block into found extent and return */
        if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
                && ext4_can_extents_be_merged(inode, ex, newext)) {
-               ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
+               ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
                          ext4_ext_is_uninitialized(newext),
                          ext4_ext_get_actual_len(newext),
                          le32_to_cpu(ex->ee_block),
@@ -1735,7 +1720,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
                next = ext4_ext_next_leaf_block(path);
        if (next != EXT_MAX_BLOCKS) {
-               ext_debug("next leaf block - %d\n", next);
+               ext_debug("next leaf block - %u\n", next);
                BUG_ON(npath != NULL);
                npath = ext4_ext_find_extent(inode, next, NULL);
                if (IS_ERR(npath))
@@ -1773,46 +1758,51 @@ has_space:
 
        if (!nearex) {
                /* there is no extent in this leaf, create first one */
-               ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
+               ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
                                le32_to_cpu(newext->ee_block),
                                ext4_ext_pblock(newext),
                                ext4_ext_is_uninitialized(newext),
                                ext4_ext_get_actual_len(newext));
-               path[depth].p_ext = EXT_FIRST_EXTENT(eh);
-       } else if (le32_to_cpu(newext->ee_block)
+               nearex = EXT_FIRST_EXTENT(eh);
+       } else {
+               if (le32_to_cpu(newext->ee_block)
                           > le32_to_cpu(nearex->ee_block)) {
-/*             BUG_ON(newext->ee_block == nearex->ee_block); */
-               if (nearex != EXT_LAST_EXTENT(eh)) {
-                       len = EXT_MAX_EXTENT(eh) - nearex;
-                       len = (len - 1) * sizeof(struct ext4_extent);
-                       len = len < 0 ? 0 : len;
-                       ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
-                                       "move %d from 0x%p to 0x%p\n",
+                       /* Insert after */
+                       ext_debug("insert %u:%llu:[%d]%d before: "
+                                       "nearest %p\n",
                                        le32_to_cpu(newext->ee_block),
                                        ext4_ext_pblock(newext),
                                        ext4_ext_is_uninitialized(newext),
                                        ext4_ext_get_actual_len(newext),
-                                       nearex, len, nearex + 1, nearex + 2);
-                       memmove(nearex + 2, nearex + 1, len);
+                                       nearex);
+                       nearex++;
+               } else {
+                       /* Insert before */
+                       BUG_ON(newext->ee_block == nearex->ee_block);
+                       ext_debug("insert %u:%llu:[%d]%d after: "
+                                       "nearest %p\n",
+                                       le32_to_cpu(newext->ee_block),
+                                       ext4_ext_pblock(newext),
+                                       ext4_ext_is_uninitialized(newext),
+                                       ext4_ext_get_actual_len(newext),
+                                       nearex);
+               }
+               len = EXT_LAST_EXTENT(eh) - nearex + 1;
+               if (len > 0) {
+                       ext_debug("insert %u:%llu:[%d]%d: "
+                                       "move %d extents from 0x%p to 0x%p\n",
+                                       le32_to_cpu(newext->ee_block),
+                                       ext4_ext_pblock(newext),
+                                       ext4_ext_is_uninitialized(newext),
+                                       ext4_ext_get_actual_len(newext),
+                                       len, nearex, nearex + 1);
+                       memmove(nearex + 1, nearex,
+                               len * sizeof(struct ext4_extent));
                }
-               path[depth].p_ext = nearex + 1;
-       } else {
-               BUG_ON(newext->ee_block == nearex->ee_block);
-               len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
-               len = len < 0 ? 0 : len;
-               ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
-                               "move %d from 0x%p to 0x%p\n",
-                               le32_to_cpu(newext->ee_block),
-                               ext4_ext_pblock(newext),
-                               ext4_ext_is_uninitialized(newext),
-                               ext4_ext_get_actual_len(newext),
-                               nearex, len, nearex, nearex + 1);
-               memmove(nearex + 1, nearex, len);
-               path[depth].p_ext = nearex;
        }
 
        le16_add_cpu(&eh->eh_entries, 1);
-       nearex = path[depth].p_ext;
+       path[depth].p_ext = nearex;
        nearex->ee_block = newext->ee_block;
        ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
        nearex->ee_len = newext->ee_len;
@@ -1962,6 +1952,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
        struct ext4_ext_cache *cex;
        BUG_ON(len == 0);
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+       trace_ext4_ext_put_in_cache(inode, block, len, start);
        cex = &EXT4_I(inode)->i_cached_extent;
        cex->ec_block = block;
        cex->ec_len = len;
@@ -2063,6 +2054,7 @@ errout:
                sbi->extent_cache_misses++;
        else
                sbi->extent_cache_hits++;
+       trace_ext4_ext_in_cache(inode, block, ret);
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        return ret;
 }
@@ -2130,6 +2122,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
+       trace_ext4_ext_rm_idx(inode, leaf);
+
        ext4_free_blocks(handle, inode, NULL, leaf, 1,
                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
@@ -2158,7 +2152,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
                         *  need to account for leaf block credit
                         *
                         *  bitmaps and block group descriptor blocks
-                        *  and other metadat blocks still need to be
+                        *  and other metadata blocks still need to be
                         *  accounted.
                         */
                        /* 1 bitmap, 1 block group descriptor */
@@ -2195,14 +2189,40 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 }
 
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
-                               struct ext4_extent *ex,
-                               ext4_lblk_t from, ext4_lblk_t to)
+                             struct ext4_extent *ex,
+                             ext4_fsblk_t *partial_cluster,
+                             ext4_lblk_t from, ext4_lblk_t to)
 {
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        unsigned short ee_len =  ext4_ext_get_actual_len(ex);
+       ext4_fsblk_t pblk;
        int flags = EXT4_FREE_BLOCKS_FORGET;
 
        if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
                flags |= EXT4_FREE_BLOCKS_METADATA;
+       /*
+        * For bigalloc file systems, we never free a partial cluster
+        * at the beginning of the extent.  Instead, we make a note
+        * that we tried freeing the cluster, and check to see if we
+        * need to free it on a subsequent call to ext4_remove_blocks,
+        * or at the end of the ext4_truncate() operation.
+        */
+       flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
+
+       trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
+       /*
+        * If we have a partial cluster, and it's different from the
+        * cluster of the last block, we need to explicitly free the
+        * partial cluster here.
+        */
+       pblk = ext4_ext_pblock(ex) + ee_len - 1;
+       if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
+               ext4_free_blocks(handle, inode, NULL,
+                                EXT4_C2B(sbi, *partial_cluster),
+                                sbi->s_cluster_ratio, flags);
+               *partial_cluster = 0;
+       }
+
 #ifdef EXTENTS_STATS
        {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2222,12 +2242,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
            && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
                /* tail removal */
                ext4_lblk_t num;
-               ext4_fsblk_t start;
 
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
-               start = ext4_ext_pblock(ex) + ee_len - num;
-               ext_debug("free last %u blocks starting %llu\n", num, start);
-               ext4_free_blocks(handle, inode, NULL, start, num, flags);
+               pblk = ext4_ext_pblock(ex) + ee_len - num;
+               ext_debug("free last %u blocks starting %llu\n", num, pblk);
+               ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
+               /*
+                * If the block range to be freed didn't start at the
+                * beginning of a cluster, and we removed the entire
+                * extent, save the partial cluster here, since we
+                * might need to delete if we determine that the
+                * truncate operation has removed all of the blocks in
+                * the cluster.
+                */
+               if (pblk & (sbi->s_cluster_ratio - 1) &&
+                   (ee_len == num))
+                       *partial_cluster = EXT4_B2C(sbi, pblk);
+               else
+                       *partial_cluster = 0;
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                /* head removal */
@@ -2238,7 +2270,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                start = ext4_ext_pblock(ex);
 
                ext_debug("free first %u blocks starting %llu\n", num, start);
-               ext4_free_blocks(handle, inode, 0, start, num, flags);
+               ext4_free_blocks(handle, inode, NULL, start, num, flags);
 
        } else {
                printk(KERN_INFO "strange request: removal(2) "
@@ -2262,19 +2294,19 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
  */
 static int
 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
-               struct ext4_ext_path *path, ext4_lblk_t start,
-               ext4_lblk_t end)
+                struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
+                ext4_lblk_t start, ext4_lblk_t end)
 {
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        int err = 0, correct_index = 0;
        int depth = ext_depth(inode), credits;
        struct ext4_extent_header *eh;
-       ext4_lblk_t a, b, block;
+       ext4_lblk_t a, b;
        unsigned num;
        ext4_lblk_t ex_ee_block;
        unsigned short ex_ee_len;
        unsigned uninitialized = 0;
        struct ext4_extent *ex;
-       struct ext4_map_blocks map;
 
        /* the header must be checked already in ext4_ext_remove_space() */
        ext_debug("truncate since %u in leaf\n", start);
@@ -2291,6 +2323,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        ex_ee_block = le32_to_cpu(ex->ee_block);
        ex_ee_len = ext4_ext_get_actual_len(ex);
 
+       trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
+
        while (ex >= EXT_FIRST_EXTENT(eh) &&
                        ex_ee_block + ex_ee_len > start) {
 
@@ -2315,86 +2349,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        ex_ee_block = le32_to_cpu(ex->ee_block);
                        ex_ee_len = ext4_ext_get_actual_len(ex);
                        continue;
-               } else if (a != ex_ee_block &&
-                       b != ex_ee_block + ex_ee_len - 1) {
-                       /*
-                        * If this is a truncate, then this condition should
-                        * never happen because at least one of the end points
-                        * needs to be on the edge of the extent.
-                        */
-                       if (end == EXT_MAX_BLOCKS - 1) {
-                               ext_debug("  bad truncate %u:%u\n",
-                                               start, end);
-                               block = 0;
-                               num = 0;
-                               err = -EIO;
-                               goto out;
-                       }
-                       /*
-                        * else this is a hole punch, so the extent needs to
-                        * be split since neither edge of the hole is on the
-                        * extent edge
-                        */
-                       else{
-                               map.m_pblk = ext4_ext_pblock(ex);
-                               map.m_lblk = ex_ee_block;
-                               map.m_len = b - ex_ee_block;
-
-                               err = ext4_split_extent(handle,
-                                       inode, path, &map, 0,
-                                       EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
-                                       EXT4_GET_BLOCKS_PRE_IO);
-
-                               if (err < 0)
-                                       goto out;
-
-                               ex_ee_len = ext4_ext_get_actual_len(ex);
-
-                               b = ex_ee_block+ex_ee_len - 1 < end ?
-                                       ex_ee_block+ex_ee_len - 1 : end;
-
-                               /* Then remove tail of this extent */
-                               block = ex_ee_block;
-                               num = a - block;
-                       }
+               } else if (b != ex_ee_block + ex_ee_len - 1) {
+                       EXT4_ERROR_INODE(inode,"  bad truncate %u:%u\n",
+                                        start, end);
+                       err = -EIO;
+                       goto out;
                } else if (a != ex_ee_block) {
                        /* remove tail of the extent */
-                       block = ex_ee_block;
-                       num = a - block;
-               } else if (b != ex_ee_block + ex_ee_len - 1) {
-                       /* remove head of the extent */
-                       block = b;
-                       num =  ex_ee_block + ex_ee_len - b;
-
-                       /*
-                        * If this is a truncate, this condition
-                        * should never happen
-                        */
-                       if (end == EXT_MAX_BLOCKS - 1) {
-                               ext_debug("  bad truncate %u:%u\n",
-                                       start, end);
-                               err = -EIO;
-                               goto out;
-                       }
+                       num = a - ex_ee_block;
                } else {
                        /* remove whole extent: excellent! */
-                       block = ex_ee_block;
                        num = 0;
-                       if (a != ex_ee_block) {
-                               ext_debug("  bad truncate %u:%u\n",
-                                       start, end);
-                               err = -EIO;
-                               goto out;
-                       }
-
-                       if (b != ex_ee_block + ex_ee_len - 1) {
-                               ext_debug("  bad truncate %u:%u\n",
-                                       start, end);
-                               err = -EIO;
-                               goto out;
-                       }
                }
-
                /*
                 * 3 for leaf, sb, and inode plus 2 (bmap and group
                 * descriptor) for each block group; assume two block
@@ -2416,23 +2382,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (err)
                        goto out;
 
-               err = ext4_remove_blocks(handle, inode, ex, a, b);
+               err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
+                                        a, b);
                if (err)
                        goto out;
 
-               if (num == 0) {
+               if (num == 0)
                        /* this extent is removed; mark slot entirely unused */
                        ext4_ext_store_pblock(ex, 0);
-               } else if (block != ex_ee_block) {
-                       /*
-                        * If this was a head removal, then we need to update
-                        * the physical block since it is now at a different
-                        * location
-                        */
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
-               }
 
-               ex->ee_block = cpu_to_le32(block);
                ex->ee_len = cpu_to_le16(num);
                /*
                 * Do not mark uninitialized if all the blocks in the
@@ -2440,11 +2398,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                 */
                if (uninitialized && num)
                        ext4_ext_mark_uninitialized(ex);
-
-               err = ext4_ext_dirty(handle, inode, path + depth);
-               if (err)
-                       goto out;
-
                /*
                 * If the extent was completely released,
                 * we need to remove it from the leaf
@@ -2464,9 +2417,14 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                                        sizeof(struct ext4_extent));
                        }
                        le16_add_cpu(&eh->eh_entries, -1);
-               }
+               } else
+                       *partial_cluster = 0;
 
-               ext_debug("new extent: %u:%u:%llu\n", block, num,
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               if (err)
+                       goto out;
+
+               ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
                                ext4_ext_pblock(ex));
                ex--;
                ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2476,6 +2434,25 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        if (correct_index && eh->eh_entries)
                err = ext4_ext_correct_indexes(handle, inode, path);
 
+       /*
+        * If there is still a entry in the leaf node, check to see if
+        * it references the partial cluster.  This is the only place
+        * where it could; if it doesn't, we can free the cluster.
+        */
+       if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
+           (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
+            *partial_cluster)) {
+               int flags = EXT4_FREE_BLOCKS_FORGET;
+
+               if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+                       flags |= EXT4_FREE_BLOCKS_METADATA;
+
+               ext4_free_blocks(handle, inode, NULL,
+                                EXT4_C2B(sbi, *partial_cluster),
+                                sbi->s_cluster_ratio, flags);
+               *partial_cluster = 0;
+       }
+
        /* if this leaf is free, then we should
         * remove it from index block above */
        if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
@@ -2511,6 +2488,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
        struct super_block *sb = inode->i_sb;
        int depth = ext_depth(inode);
        struct ext4_ext_path *path;
+       ext4_fsblk_t partial_cluster = 0;
        handle_t *handle;
        int i, err;
 
@@ -2524,6 +2502,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
 again:
        ext4_ext_invalidate_cache(inode);
 
+       trace_ext4_ext_remove_space(inode, start, depth);
+
        /*
         * We start scanning from right side, freeing all the blocks
         * after i_size and walking into the tree depth-wise.
@@ -2546,7 +2526,8 @@ again:
                if (i == depth) {
                        /* this is leaf block */
                        err = ext4_ext_rm_leaf(handle, inode, path,
-                                       start, EXT_MAX_BLOCKS - 1);
+                                              &partial_cluster, start,
+                                              EXT_MAX_BLOCKS - 1);
                        /* root level has p_bh == NULL, brelse() eats this */
                        brelse(path[i].p_bh);
                        path[i].p_bh = NULL;
@@ -2618,6 +2599,24 @@ again:
                }
        }
 
+       trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster,
+                       path->p_hdr->eh_entries);
+
+       /* If we still have something in the partial cluster and we have removed
+        * even the first extent, then we should free the blocks in the partial
+        * cluster as well. */
+       if (partial_cluster && path->p_hdr->eh_entries == 0) {
+               int flags = EXT4_FREE_BLOCKS_FORGET;
+
+               if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+                       flags |= EXT4_FREE_BLOCKS_METADATA;
+
+               ext4_free_blocks(handle, inode, NULL,
+                                EXT4_C2B(EXT4_SB(sb), partial_cluster),
+                                EXT4_SB(sb)->s_cluster_ratio, flags);
+               partial_cluster = 0;
+       }
+
        /* TODO: flexible tree reduction should be here */
        if (path->p_hdr->eh_entries == 0) {
                /*
@@ -2909,17 +2908,29 @@ out:
  *   a> There is no split required: Entire extent should be initialized
  *   b> Splits in two extents: Write is happening at either end of the extent
  *   c> Splits in three extents: Somone is writing in middle of the extent
+ *
+ * Pre-conditions:
+ *  - The extent pointed to by 'path' is uninitialized.
+ *  - The extent pointed to by 'path' contains a superset
+ *    of the logical span [map->m_lblk, map->m_lblk + map->m_len).
+ *
+ * Post-conditions on success:
+ *  - the returned value is the number of blocks beyond map->l_lblk
+ *    that are allocated and initialized.
+ *    It is guaranteed to be >= map->m_len.
  */
 static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct inode *inode,
                                           struct ext4_map_blocks *map,
                                           struct ext4_ext_path *path)
 {
+       struct ext4_extent_header *eh;
        struct ext4_map_blocks split_map;
        struct ext4_extent zero_ex;
        struct ext4_extent *ex;
        ext4_lblk_t ee_block, eof_block;
-       unsigned int allocated, ee_len, depth;
+       unsigned int ee_len, depth;
+       int allocated;
        int err = 0;
        int split_flag = 0;
 
@@ -2933,11 +2944,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                eof_block = map->m_lblk + map->m_len;
 
        depth = ext_depth(inode);
+       eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
        allocated = ee_len - (map->m_lblk - ee_block);
 
+       trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
+
+       /* Pre-conditions */
+       BUG_ON(!ext4_ext_is_uninitialized(ex));
+       BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
+       BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
+
+       /*
+        * Attempt to transfer newly initialized blocks from the currently
+        * uninitialized extent to its left neighbor. This is much cheaper
+        * than an insertion followed by a merge as those involve costly
+        * memmove() calls. This is the common case in steady state for
+        * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
+        * writes.
+        *
+        * Limitations of the current logic:
+        *  - L1: we only deal with writes at the start of the extent.
+        *    The approach could be extended to writes at the end
+        *    of the extent but this scenario was deemed less common.
+        *  - L2: we do not deal with writes covering the whole extent.
+        *    This would require removing the extent if the transfer
+        *    is possible.
+        *  - L3: we only attempt to merge with an extent stored in the
+        *    same extent tree node.
+        */
+       if ((map->m_lblk == ee_block) &&        /*L1*/
+               (map->m_len < ee_len) &&        /*L2*/
+               (ex > EXT_FIRST_EXTENT(eh))) {  /*L3*/
+               struct ext4_extent *prev_ex;
+               ext4_lblk_t prev_lblk;
+               ext4_fsblk_t prev_pblk, ee_pblk;
+               unsigned int prev_len, write_len;
+
+               prev_ex = ex - 1;
+               prev_lblk = le32_to_cpu(prev_ex->ee_block);
+               prev_len = ext4_ext_get_actual_len(prev_ex);
+               prev_pblk = ext4_ext_pblock(prev_ex);
+               ee_pblk = ext4_ext_pblock(ex);
+               write_len = map->m_len;
+
+               /*
+                * A transfer of blocks from 'ex' to 'prev_ex' is allowed
+                * upon those conditions:
+                * - C1: prev_ex is initialized,
+                * - C2: prev_ex is logically abutting ex,
+                * - C3: prev_ex is physically abutting ex,
+                * - C4: prev_ex can receive the additional blocks without
+                *   overflowing the (initialized) length limit.
+                */
+               if ((!ext4_ext_is_uninitialized(prev_ex)) &&            /*C1*/
+                       ((prev_lblk + prev_len) == ee_block) &&         /*C2*/
+                       ((prev_pblk + prev_len) == ee_pblk) &&          /*C3*/
+                       (prev_len < (EXT_INIT_MAX_LEN - write_len))) {  /*C4*/
+                       err = ext4_ext_get_access(handle, inode, path + depth);
+                       if (err)
+                               goto out;
+
+                       trace_ext4_ext_convert_to_initialized_fastpath(inode,
+                               map, ex, prev_ex);
+
+                       /* Shift the start of ex by 'write_len' blocks */
+                       ex->ee_block = cpu_to_le32(ee_block + write_len);
+                       ext4_ext_store_pblock(ex, ee_pblk + write_len);
+                       ex->ee_len = cpu_to_le16(ee_len - write_len);
+                       ext4_ext_mark_uninitialized(ex); /* Restore the flag */
+
+                       /* Extend prev_ex by 'write_len' blocks */
+                       prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
+
+                       /* Mark the block containing both extents as dirty */
+                       ext4_ext_dirty(handle, inode, path + depth);
+
+                       /* Update path to point to the right extent */
+                       path[depth].p_ext = prev_ex;
+
+                       /* Result: number of initialized blocks past m_lblk */
+                       allocated = write_len;
+                       goto out;
+               }
+       }
+
        WARN_ON(map->m_lblk < ee_block);
        /*
         * It is safe to convert extent to initialized via explicit
@@ -3165,6 +3258,192 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
        return ext4_mark_inode_dirty(handle, inode);
 }
 
+/**
+ * ext4_find_delalloc_range: find delayed allocated block in the given range.
+ *
+ * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
+ * whether there are any buffers marked for delayed allocation. It returns '1'
+ * on the first delalloc'ed buffer head found. If no buffer head in the given
+ * range is marked for delalloc, it returns 0.
+ * lblk_start should always be <= lblk_end.
+ * search_hint_reverse is to indicate that searching in reverse from lblk_end to
+ * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
+ * block sooner). This is useful when blocks are truncated sequentially from
+ * lblk_start towards lblk_end.
+ */
+static int ext4_find_delalloc_range(struct inode *inode,
+                                   ext4_lblk_t lblk_start,
+                                   ext4_lblk_t lblk_end,
+                                   int search_hint_reverse)
+{
+       struct address_space *mapping = inode->i_mapping;
+       struct buffer_head *head, *bh = NULL;
+       struct page *page;
+       ext4_lblk_t i, pg_lblk;
+       pgoff_t index;
+
+       /* reverse search wont work if fs block size is less than page size */
+       if (inode->i_blkbits < PAGE_CACHE_SHIFT)
+               search_hint_reverse = 0;
+
+       if (search_hint_reverse)
+               i = lblk_end;
+       else
+               i = lblk_start;
+
+       index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+       while ((i >= lblk_start) && (i <= lblk_end)) {
+               page = find_get_page(mapping, index);
+               if (!page)
+                       goto nextpage;
+
+               if (!page_has_buffers(page))
+                       goto nextpage;
+
+               head = page_buffers(page);
+               if (!head)
+                       goto nextpage;
+
+               bh = head;
+               pg_lblk = index << (PAGE_CACHE_SHIFT -
+                                               inode->i_blkbits);
+               do {
+                       if (unlikely(pg_lblk < lblk_start)) {
+                               /*
+                                * This is possible when fs block size is less
+                                * than page size and our cluster starts/ends in
+                                * middle of the page. So we need to skip the
+                                * initial few blocks till we reach the 'lblk'
+                                */
+                               pg_lblk++;
+                               continue;
+                       }
+
+                       /* Check if the buffer is delayed allocated and that it
+                        * is not yet mapped. (when da-buffers are mapped during
+                        * their writeout, their da_mapped bit is set.)
+                        */
+                       if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
+                               page_cache_release(page);
+                               trace_ext4_find_delalloc_range(inode,
+                                               lblk_start, lblk_end,
+                                               search_hint_reverse,
+                                               1, i);
+                               return 1;
+                       }
+                       if (search_hint_reverse)
+                               i--;
+                       else
+                               i++;
+               } while ((i >= lblk_start) && (i <= lblk_end) &&
+                               ((bh = bh->b_this_page) != head));
+nextpage:
+               if (page)
+                       page_cache_release(page);
+               /*
+                * Move to next page. 'i' will be the first lblk in the next
+                * page.
+                */
+               if (search_hint_reverse)
+                       index--;
+               else
+                       index++;
+               i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       }
+
+       trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
+                                       search_hint_reverse, 0, 0);
+       return 0;
+}
+
+int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
+                              int search_hint_reverse)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       ext4_lblk_t lblk_start, lblk_end;
+       lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+       lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
+
+       return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
+                                       search_hint_reverse);
+}
+
+/**
+ * Determines how many complete clusters (out of those specified by the 'map')
+ * are under delalloc and were reserved quota for.
+ * This function is called when we are writing out the blocks that were
+ * originally written with their allocation delayed, but then the space was
+ * allocated using fallocate() before the delayed allocation could be resolved.
+ * The cases to look for are:
+ * ('=' indicated delayed allocated blocks
+ *  '-' indicates non-delayed allocated blocks)
+ * (a) partial clusters towards beginning and/or end outside of allocated range
+ *     are not delalloc'ed.
+ *     Ex:
+ *     |----c---=|====c====|====c====|===-c----|
+ *              |++++++ allocated ++++++|
+ *     ==> 4 complete clusters in above example
+ *
+ * (b) partial cluster (outside of allocated range) towards either end is
+ *     marked for delayed allocation. In this case, we will exclude that
+ *     cluster.
+ *     Ex:
+ *     |----====c========|========c========|
+ *          |++++++ allocated ++++++|
+ *     ==> 1 complete clusters in above example
+ *
+ *     Ex:
+ *     |================c================|
+ *            |++++++ allocated ++++++|
+ *     ==> 0 complete clusters in above example
+ *
+ * The ext4_da_update_reserve_space will be called only if we
+ * determine here that there were some "entire" clusters that span
+ * this 'allocated' range.
+ * In the non-bigalloc case, this function will just end up returning num_blks
+ * without ever calling ext4_find_delalloc_range.
+ */
+static unsigned int
+get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
+                          unsigned int num_blks)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
+       ext4_lblk_t lblk_from, lblk_to, c_offset;
+       unsigned int allocated_clusters = 0;
+
+       alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
+       alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
+
+       /* max possible clusters for this allocation */
+       allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
+
+       trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
+
+       /* Check towards left side */
+       c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+       if (c_offset) {
+               lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+               lblk_to = lblk_from + c_offset - 1;
+
+               if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+                       allocated_clusters--;
+       }
+
+       /* Now check towards right. */
+       c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+       if (allocated_clusters && c_offset) {
+               lblk_from = lblk_start + num_blks;
+               lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
+
+               if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+                       allocated_clusters--;
+       }
+
+       return allocated_clusters;
+}
+
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map,
@@ -3181,6 +3460,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                  flags, allocated);
        ext4_ext_show_leaf(inode, path);
 
+       trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
+                                                   newblock);
+
        /* get_block() before submit the IO, split the extent */
        if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
                ret = ext4_split_unwritten_extents(handle, inode, map,
@@ -3190,10 +3472,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                 * that this IO needs to conversion to written when IO is
                 * completed
                 */
-               if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
-                       io->flag = EXT4_IO_END_UNWRITTEN;
-                       atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-               } else
+               if (io)
+                       ext4_set_io_unwritten_flag(inode, io);
+               else
                        ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                if (ext4_should_dioread_nolock(inode))
                        map->m_flags |= EXT4_MAP_UNINIT;
@@ -3234,14 +3515,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 
        /* buffered write, writepage time, convert*/
        ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
-       if (ret >= 0) {
+       if (ret >= 0)
                ext4_update_inode_fsync_trans(handle, inode, 1);
-               err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
-                                        map->m_len);
-               if (err < 0)
-                       goto out2;
-       }
-
 out:
        if (ret <= 0) {
                err = ret;
@@ -3270,11 +3545,24 @@ out:
         * But fallocate would have already updated quota and block
         * count for this offset. So cancel these reservation
         */
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-               ext4_da_update_reserve_space(inode, allocated, 0);
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+               unsigned int reserved_clusters;
+               reserved_clusters = get_reserved_cluster_alloc(inode,
+                               map->m_lblk, map->m_len);
+               if (reserved_clusters)
+                       ext4_da_update_reserve_space(inode,
+                                                    reserved_clusters,
+                                                    0);
+       }
 
 map_out:
        map->m_flags |= EXT4_MAP_MAPPED;
+       if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
+               err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+                                        map->m_len);
+               if (err < 0)
+                       goto out2;
+       }
 out1:
        if (allocated > map->m_len)
                allocated = map->m_len;
@@ -3289,6 +3577,111 @@ out2:
        return err ? err : allocated;
 }
 
+/*
+ * get_implied_cluster_alloc - check to see if the requested
+ * allocation (in the map structure) overlaps with a cluster already
+ * allocated in an extent.
+ *     @sb     The filesystem superblock structure
+ *     @map    The requested lblk->pblk mapping
+ *     @ex     The extent structure which might contain an implied
+ *                     cluster allocation
+ *
+ * This function is called by ext4_ext_map_blocks() after we failed to
+ * find blocks that were already in the inode's extent tree.  Hence,
+ * we know that the beginning of the requested region cannot overlap
+ * the extent from the inode's extent tree.  There are three cases we
+ * want to catch.  The first is this case:
+ *
+ *              |--- cluster # N--|
+ *    |--- extent ---| |---- requested region ---|
+ *                     |==========|
+ *
+ * The second case that we need to test for is this one:
+ *
+ *   |--------- cluster # N ----------------|
+ *        |--- requested region --|   |------- extent ----|
+ *        |=======================|
+ *
+ * The third case is when the requested region lies between two extents
+ * within the same cluster:
+ *          |------------- cluster # N-------------|
+ * |----- ex -----|                  |---- ex_right ----|
+ *                  |------ requested region ------|
+ *                  |================|
+ *
+ * In each of the above cases, we need to set the map->m_pblk and
+ * map->m_len so it corresponds to the return the extent labelled as
+ * "|====|" from cluster #N, since it is already in use for data in
+ * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
+ * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
+ * as a new "allocated" block region.  Otherwise, we will return 0 and
+ * ext4_ext_map_blocks() will then allocate one or more new clusters
+ * by calling ext4_mb_new_blocks().
+ */
+static int get_implied_cluster_alloc(struct super_block *sb,
+                                    struct ext4_map_blocks *map,
+                                    struct ext4_extent *ex,
+                                    struct ext4_ext_path *path)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+       ext4_lblk_t ex_cluster_start, ex_cluster_end;
+       ext4_lblk_t rr_cluster_start, rr_cluster_end;
+       ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
+       ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
+       unsigned short ee_len = ext4_ext_get_actual_len(ex);
+
+       /* The extent passed in that we are trying to match */
+       ex_cluster_start = EXT4_B2C(sbi, ee_block);
+       ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
+
+       /* The requested region passed into ext4_map_blocks() */
+       rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
+       rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
+
+       if ((rr_cluster_start == ex_cluster_end) ||
+           (rr_cluster_start == ex_cluster_start)) {
+               if (rr_cluster_start == ex_cluster_end)
+                       ee_start += ee_len - 1;
+               map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
+                       c_offset;
+               map->m_len = min(map->m_len,
+                                (unsigned) sbi->s_cluster_ratio - c_offset);
+               /*
+                * Check for and handle this case:
+                *
+                *   |--------- cluster # N-------------|
+                *                     |------- extent ----|
+                *         |--- requested region ---|
+                *         |===========|
+                */
+
+               if (map->m_lblk < ee_block)
+                       map->m_len = min(map->m_len, ee_block - map->m_lblk);
+
+               /*
+                * Check for the case where there is already another allocated
+                * block to the right of 'ex' but before the end of the cluster.
+                *
+                *          |------------- cluster # N-------------|
+                * |----- ex -----|                  |---- ex_right ----|
+                *                  |------ requested region ------|
+                *                  |================|
+                */
+               if (map->m_lblk > ee_block) {
+                       ext4_lblk_t next = ext4_ext_next_allocated_block(path);
+                       map->m_len = min(map->m_len, next - map->m_lblk);
+               }
+
+               trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
+               return 1;
+       }
+
+       trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
+       return 0;
+}
+
+
 /*
  * Block allocation/map/preallocation routine for extents based files
  *
@@ -3311,15 +3704,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map, int flags)
 {
        struct ext4_ext_path *path = NULL;
-       struct ext4_extent newex, *ex;
+       struct ext4_extent newex, *ex, *ex2;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        ext4_fsblk_t newblock = 0;
-       int err = 0, depth, ret;
-       unsigned int allocated = 0;
+       int free_on_err = 0, err = 0, depth, ret;
+       unsigned int allocated = 0, offset = 0;
+       unsigned int allocated_clusters = 0;
        unsigned int punched_out = 0;
        unsigned int result = 0;
        struct ext4_allocation_request ar;
        ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
-       struct ext4_map_blocks punch_map;
+       ext4_lblk_t cluster_offset;
 
        ext_debug("blocks %u/%u requested for inode %lu\n",
                  map->m_lblk, map->m_len, inode->i_ino);
@@ -3329,6 +3724,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
                ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
                if (!newex.ee_start_lo && !newex.ee_start_hi) {
+                       if ((sbi->s_cluster_ratio > 1) &&
+                           ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+                               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+
                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                                /*
                                 * block isn't allocated yet and
@@ -3339,6 +3738,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        /* we should allocate requested block */
                } else {
                        /* block is already allocated */
+                       if (sbi->s_cluster_ratio > 1)
+                               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
                        newblock = map->m_lblk
                                   - le32_to_cpu(newex.ee_block)
                                   + ext4_ext_pblock(&newex);
@@ -3384,8 +3785,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                 * we split out initialized portions during a write.
                 */
                ee_len = ext4_ext_get_actual_len(ex);
+
+               trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
+
                /* if found extent covers block, simply return it */
                if (in_range(map->m_lblk, ee_block, ee_len)) {
+                       struct ext4_map_blocks punch_map;
+                       ext4_fsblk_t partial_cluster = 0;
+
                        newblock = map->m_lblk - ee_block + ee_start;
                        /* number of remaining blocks in the extent */
                        allocated = ee_len - (map->m_lblk - ee_block);
@@ -3469,7 +3876,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        ext4_ext_invalidate_cache(inode);
 
                        err = ext4_ext_rm_leaf(handle, inode, path,
-                               map->m_lblk, map->m_lblk + punched_out);
+                                              &partial_cluster, map->m_lblk,
+                                              map->m_lblk + punched_out);
 
                        if (!err && path->p_hdr->eh_entries == 0) {
                                /*
@@ -3492,6 +3900,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                }
        }
 
+       if ((sbi->s_cluster_ratio > 1) &&
+           ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+
        /*
         * requested block isn't allocated yet;
         * we couldn't try to create block if create flag is zero
@@ -3504,9 +3916,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
                goto out2;
        }
+
        /*
         * Okay, we need to do block allocation.
         */
+       map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
+       newex.ee_block = cpu_to_le32(map->m_lblk);
+       cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+
+       /*
+        * If we are doing bigalloc, check to see if the extent returned
+        * by ext4_ext_find_extent() implies a cluster we can use.
+        */
+       if (cluster_offset && ex &&
+           get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
+               ar.len = allocated = map->m_len;
+               newblock = map->m_pblk;
+               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+               goto got_allocated_blocks;
+       }
 
        /* find neighbour allocated blocks */
        ar.lleft = map->m_lblk;
@@ -3514,10 +3942,21 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        if (err)
                goto out2;
        ar.lright = map->m_lblk;
-       err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
+       ex2 = NULL;
+       err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
        if (err)
                goto out2;
 
+       /* Check if the extent after searching to the right implies a
+        * cluster we can use. */
+       if ((sbi->s_cluster_ratio > 1) && ex2 &&
+           get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
+               ar.len = allocated = map->m_len;
+               newblock = map->m_pblk;
+               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+               goto got_allocated_blocks;
+       }
+
        /*
         * See if request is beyond maximum number of blocks we can have in
         * a single extent. For an initialized extent this limit is
@@ -3532,9 +3971,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                map->m_len = EXT_UNINIT_MAX_LEN;
 
        /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
-       newex.ee_block = cpu_to_le32(map->m_lblk);
        newex.ee_len = cpu_to_le16(map->m_len);
-       err = ext4_ext_check_overlap(inode, &newex, path);
+       err = ext4_ext_check_overlap(sbi, inode, &newex, path);
        if (err)
                allocated = ext4_ext_get_actual_len(&newex);
        else
@@ -3544,7 +3982,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        ar.inode = inode;
        ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
        ar.logical = map->m_lblk;
-       ar.len = allocated;
+       /*
+        * We calculate the offset from the beginning of the cluster
+        * for the logical block number, since when we allocate a
+        * physical cluster, the physical block should start at the
+        * same offset from the beginning of the cluster.  This is
+        * needed so that future calls to get_implied_cluster_alloc()
+        * work correctly.
+        */
+       offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+       ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
+       ar.goal -= offset;
+       ar.logical -= offset;
        if (S_ISREG(inode->i_mode))
                ar.flags = EXT4_MB_HINT_DATA;
        else
@@ -3557,9 +4006,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                goto out2;
        ext_debug("allocate new block: goal %llu, found %llu/%u\n",
                  ar.goal, newblock, allocated);
+       free_on_err = 1;
+       allocated_clusters = ar.len;
+       ar.len = EXT4_C2B(sbi, ar.len) - offset;
+       if (ar.len > allocated)
+               ar.len = allocated;
 
+got_allocated_blocks:
        /* try to insert new extent into found leaf and return */
-       ext4_ext_store_pblock(&newex, newblock);
+       ext4_ext_store_pblock(&newex, newblock + offset);
        newex.ee_len = cpu_to_le16(ar.len);
        /* Mark uninitialized */
        if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
@@ -3572,10 +4027,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                 * that we need to perform conversion when IO is done.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-                       if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
-                               io->flag = EXT4_IO_END_UNWRITTEN;
-                               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-                       } else
+                       if (io)
+                               ext4_set_io_unwritten_flag(inode, io);
+                       else
                                ext4_set_inode_state(inode,
                                                     EXT4_STATE_DIO_UNWRITTEN);
                }
@@ -3583,11 +4037,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        map->m_flags |= EXT4_MAP_UNINIT;
        }
 
-       err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
+       err = 0;
+       if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
+               err = check_eofblocks_fl(handle, inode, map->m_lblk,
+                                        path, ar.len);
        if (!err)
                err = ext4_ext_insert_extent(handle, inode, path,
                                             &newex, flags);
-       if (err) {
+       if (err && free_on_err) {
                int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
                        EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
                /* free data blocks we just allocated */
@@ -3610,8 +4067,82 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
         * Update reserved blocks/metadata blocks after successful
         * block allocation which had been deferred till now.
         */
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-               ext4_da_update_reserve_space(inode, allocated, 1);
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+               unsigned int reserved_clusters;
+               /*
+                * Check how many clusters we had reserved this allocated range
+                */
+               reserved_clusters = get_reserved_cluster_alloc(inode,
+                                               map->m_lblk, allocated);
+               if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
+                       if (reserved_clusters) {
+                               /*
+                                * We have clusters reserved for this range.
+                                * But since we are not doing actual allocation
+                                * and are simply using blocks from previously
+                                * allocated cluster, we should release the
+                                * reservation and not claim quota.
+                                */
+                               ext4_da_update_reserve_space(inode,
+                                               reserved_clusters, 0);
+                       }
+               } else {
+                       BUG_ON(allocated_clusters < reserved_clusters);
+                       /* We will claim quota for all newly allocated blocks.*/
+                       ext4_da_update_reserve_space(inode, allocated_clusters,
+                                                       1);
+                       if (reserved_clusters < allocated_clusters) {
+                               struct ext4_inode_info *ei = EXT4_I(inode);
+                               int reservation = allocated_clusters -
+                                                 reserved_clusters;
+                               /*
+                                * It seems we claimed few clusters outside of
+                                * the range of this allocation. We should give
+                                * it back to the reservation pool. This can
+                                * happen in the following case:
+                                *
+                                * * Suppose s_cluster_ratio is 4 (i.e., each
+                                *   cluster has 4 blocks. Thus, the clusters
+                                *   are [0-3],[4-7],[8-11]...
+                                * * First comes delayed allocation write for
+                                *   logical blocks 10 & 11. Since there were no
+                                *   previous delayed allocated blocks in the
+                                *   range [8-11], we would reserve 1 cluster
+                                *   for this write.
+                                * * Next comes write for logical blocks 3 to 8.
+                                *   In this case, we will reserve 2 clusters
+                                *   (for [0-3] and [4-7]; and not for [8-11] as
+                                *   that range has a delayed allocated blocks.
+                                *   Thus total reserved clusters now becomes 3.
+                                * * Now, during the delayed allocation writeout
+                                *   time, we will first write blocks [3-8] and
+                                *   allocate 3 clusters for writing these
+                                *   blocks. Also, we would claim all these
+                                *   three clusters above.
+                                * * Now when we come here to writeout the
+                                *   blocks [10-11], we would expect to claim
+                                *   the reservation of 1 cluster we had made
+                                *   (and we would claim it since there are no
+                                *   more delayed allocated blocks in the range
+                                *   [8-11]. But our reserved cluster count had
+                                *   already gone to 0.
+                                *
+                                *   Thus, at the step 4 above when we determine
+                                *   that there are still some unwritten delayed
+                                *   allocated blocks outside of our current
+                                *   block range, we should increment the
+                                *   reserved clusters count so that when the
+                                *   remaining blocks finally gets written, we
+                                *   could claim them.
+                                */
+                               dquot_reserve_block(inode,
+                                               EXT4_C2B(sbi, reservation));
+                               spin_lock(&ei->i_block_reservation_lock);
+                               ei->i_reserved_data_blocks += reservation;
+                               spin_unlock(&ei->i_block_reservation_lock);
+                       }
+               }
+       }
 
        /*
         * Cache the extent and update transaction to commit on fdatasync only
@@ -3634,12 +4165,12 @@ out2:
                ext4_ext_drop_refs(path);
                kfree(path);
        }
-       trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
-               newblock, map->m_len, err ? err : allocated);
-
        result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
                        punched_out : allocated;
 
+       trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+               newblock, map->m_len, err ? err : result);
+
        return err ? err : result;
 }
 
@@ -3649,6 +4180,7 @@ void ext4_ext_truncate(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        ext4_lblk_t last_block;
        handle_t *handle;
+       loff_t page_len;
        int err = 0;
 
        /*
@@ -3665,8 +4197,16 @@ void ext4_ext_truncate(struct inode *inode)
        if (IS_ERR(handle))
                return;
 
-       if (inode->i_size & (sb->s_blocksize - 1))
-               ext4_block_truncate_page(handle, mapping, inode->i_size);
+       if (inode->i_size % PAGE_CACHE_SIZE != 0) {
+               page_len = PAGE_CACHE_SIZE -
+                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+               err = ext4_discard_partial_page_buffers(handle,
+                       mapping, inode->i_size, page_len, 0);
+
+               if (err)
+                       goto out_stop;
+       }
 
        if (ext4_orphan_add(handle, inode))
                goto out_stop;
@@ -3760,6 +4300,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
+       int flags;
        struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
 
@@ -3796,6 +4337,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
                return ret;
        }
+       flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
+       if (mode & FALLOC_FL_KEEP_SIZE)
+               flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+       /*
+        * Don't normalize the request if it can fit in one extent so
+        * that it doesn't get unnecessarily split into multiple
+        * extents.
+        */
+       if (len <= EXT_UNINIT_MAX_LEN << blkbits)
+               flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 retry:
        while (ret >= 0 && ret < max_blocks) {
                map.m_lblk = map.m_lblk + ret;
@@ -3805,9 +4356,7 @@ retry:
                        ret = PTR_ERR(handle);
                        break;
                }
-               ret = ext4_map_blocks(handle, inode, &map,
-                                     EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
-                                     EXT4_GET_BLOCKS_NO_NORMALIZE);
+               ret = ext4_map_blocks(handle, inode, &map, flags);
                if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
                        WARN_ON(ret <= 0);
@@ -4102,7 +4651,6 @@ found_delayed_extent:
                return EXT_BREAK;
        return EXT_CONTINUE;
 }
-
 /* fiemap flags we can handle specified here */
 #define EXT4_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 
@@ -4162,17 +4710,28 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
        struct address_space *mapping = inode->i_mapping;
        struct ext4_map_blocks map;
        handle_t *handle;
-       loff_t first_block_offset, last_block_offset, block_len;
-       loff_t first_page, last_page, first_page_offset, last_page_offset;
+       loff_t first_page, last_page, page_len;
+       loff_t first_page_offset, last_page_offset;
        int ret, credits, blocks_released, err = 0;
 
+       /* No need to punch hole beyond i_size */
+       if (offset >= inode->i_size)
+               return 0;
+
+       /*
+        * If the hole extends beyond i_size, set the hole
+        * to end after the page that contains i_size
+        */
+       if (offset + length > inode->i_size) {
+               length = inode->i_size +
+                  PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+                  offset;
+       }
+
        first_block = (offset + sb->s_blocksize - 1) >>
                EXT4_BLOCK_SIZE_BITS(sb);
        last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
 
-       first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
-       last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
-
        first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        last_page = (offset + length) >> PAGE_CACHE_SHIFT;
 
@@ -4185,11 +4744,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
         */
        if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                err = filemap_write_and_wait_range(mapping,
-                       first_page_offset == 0 ? 0 : first_page_offset-1,
-                       last_page_offset);
+                       offset, offset + length - 1);
 
-                       if (err)
-                               return err;
+               if (err)
+                       return err;
        }
 
        /* Now release the pages */
@@ -4211,24 +4769,64 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
                goto out;
 
        /*
-        * Now we need to zero out the un block aligned data.
-        * If the file is smaller than a block, just
-        * zero out the middle
+        * Now we need to zero out the non-page-aligned data in the
+        * pages at the start and tail of the hole, and unmap the buffer
+        * heads for the block aligned regions of the page that were
+        * completely zeroed.
         */
-       if (first_block > last_block)
-               ext4_block_zero_page_range(handle, mapping, offset, length);
-       else {
-               /* zero out the head of the hole before the first block */
-               block_len  = first_block_offset - offset;
-               if (block_len > 0)
-                       ext4_block_zero_page_range(handle, mapping,
-                                                  offset, block_len);
-
-               /* zero out the tail of the hole after the last block */
-               block_len = offset + length - last_block_offset;
-               if (block_len > 0) {
-                       ext4_block_zero_page_range(handle, mapping,
-                                       last_block_offset, block_len);
+       if (first_page > last_page) {
+               /*
+                * If the file space being truncated is contained within a page
+                * just zero out and unmap the middle of that page
+                */
+               err = ext4_discard_partial_page_buffers(handle,
+                       mapping, offset, length, 0);
+
+               if (err)
+                       goto out;
+       } else {
+               /*
+                * zero out and unmap the partial page that contains
+                * the start of the hole
+                */
+               page_len  = first_page_offset - offset;
+               if (page_len > 0) {
+                       err = ext4_discard_partial_page_buffers(handle, mapping,
+                                                  offset, page_len, 0);
+                       if (err)
+                               goto out;
+               }
+
+               /*
+                * zero out and unmap the partial page that contains
+                * the end of the hole
+                */
+               page_len = offset + length - last_page_offset;
+               if (page_len > 0) {
+                       err = ext4_discard_partial_page_buffers(handle, mapping,
+                                       last_page_offset, page_len, 0);
+                       if (err)
+                               goto out;
+               }
+       }
+
+
+       /*
+        * If i_size is contained in the last page, we need to
+        * unmap and zero the partial page after i_size
+        */
+       if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+          inode->i_size % PAGE_CACHE_SIZE != 0) {
+
+               page_len = PAGE_CACHE_SIZE -
+                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+               if (page_len > 0) {
+                       err = ext4_discard_partial_page_buffers(handle,
+                         mapping, inode->i_size, page_len, 0);
+
+                       if (err)
+                               goto out;
                }
        }
 
index b9548f477bb8882c29c894f39459e5e39b8d4f82..cb70f1812a70f5ca8452e98776cd309ad6638055 100644 (file)
@@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                path.dentry = mnt->mnt_root;
                cp = d_path(&path, buf, sizeof(buf));
                if (!IS_ERR(cp)) {
-                       memcpy(sbi->s_es->s_last_mounted, cp,
-                              sizeof(sbi->s_es->s_last_mounted));
+                       strlcpy(sbi->s_es->s_last_mounted, cp,
+                               sizeof(sbi->s_es->s_last_mounted));
                        ext4_mark_super_dirty(sb);
                }
        }
index 036f78f7a1ef92cfc34a281665cf53c588a4a64e..00a2cb753efdeca63d6bbba4912729dc1c854182 100644 (file)
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
  * to written.
  * The function return the number of pending IOs on success.
  */
-extern int ext4_flush_completed_IO(struct inode *inode)
+int ext4_flush_completed_IO(struct inode *inode)
 {
        ext4_io_end_t *io;
        struct ext4_inode_info *ei = EXT4_I(inode);
@@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode)
        int ret = 0;
        int ret2 = 0;
 
-       if (list_empty(&ei->i_completed_io_list))
-               return ret;
-
        dump_completed_IO(inode);
        spin_lock_irqsave(&ei->i_completed_io_lock, flags);
        while (!list_empty(&ei->i_completed_io_list)){
                io = list_entry(ei->i_completed_io_list.next,
                                ext4_io_end_t, list);
+               list_del_init(&io->list);
                /*
                 * Calling ext4_end_io_nolock() to convert completed
                 * IO to written.
@@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode)
                 */
                spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
                ret = ext4_end_io_nolock(io);
-               spin_lock_irqsave(&ei->i_completed_io_lock, flags);
                if (ret < 0)
                        ret2 = ret;
-               else
-                       list_del_init(&io->list);
+               spin_lock_irqsave(&ei->i_completed_io_lock, flags);
        }
        spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
        return (ret2 < 0) ? ret2 : 0;
index 9c63f273b550497759103ad0f6b4aaafd8e2049d..00beb4f9cc4ff0501012b8bdf338541fcb5e5a69 100644 (file)
@@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
                ext4_error(sb, "Checksum bad for group %u", block_group);
-               ext4_free_blks_set(sb, gdp, 0);
+               ext4_free_group_clusters_set(sb, gdp, 0);
                ext4_free_inodes_set(sb, gdp, 0);
                ext4_itable_unused_set(sb, gdp, 0);
                memset(bh->b_data, 0xff, sb->s_blocksize);
@@ -293,121 +293,9 @@ error_return:
        ext4_std_error(sb, fatal);
 }
 
-/*
- * There are two policies for allocating an inode.  If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory\'s block
- * group to find a free inode.
- */
-static int find_group_dir(struct super_block *sb, struct inode *parent,
-                               ext4_group_t *best_group)
-{
-       ext4_group_t ngroups = ext4_get_groups_count(sb);
-       unsigned int freei, avefreei;
-       struct ext4_group_desc *desc, *best_desc = NULL;
-       ext4_group_t group;
-       int ret = -1;
-
-       freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
-       avefreei = freei / ngroups;
-
-       for (group = 0; group < ngroups; group++) {
-               desc = ext4_get_group_desc(sb, group, NULL);
-               if (!desc || !ext4_free_inodes_count(sb, desc))
-                       continue;
-               if (ext4_free_inodes_count(sb, desc) < avefreei)
-                       continue;
-               if (!best_desc ||
-                   (ext4_free_blks_count(sb, desc) >
-                    ext4_free_blks_count(sb, best_desc))) {
-                       *best_group = group;
-                       best_desc = desc;
-                       ret = 0;
-               }
-       }
-       return ret;
-}
-
-#define free_block_ratio 10
-
-static int find_group_flex(struct super_block *sb, struct inode *parent,
-                          ext4_group_t *best_group)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_group_desc *desc;
-       struct flex_groups *flex_group = sbi->s_flex_groups;
-       ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
-       ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
-       ext4_group_t ngroups = ext4_get_groups_count(sb);
-       int flex_size = ext4_flex_bg_size(sbi);
-       ext4_group_t best_flex = parent_fbg_group;
-       int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
-       int flexbg_free_blocks;
-       int flex_freeb_ratio;
-       ext4_group_t n_fbg_groups;
-       ext4_group_t i;
-
-       n_fbg_groups = (ngroups + flex_size - 1) >>
-               sbi->s_log_groups_per_flex;
-
-find_close_to_parent:
-       flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
-       flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
-       if (atomic_read(&flex_group[best_flex].free_inodes) &&
-           flex_freeb_ratio > free_block_ratio)
-               goto found_flexbg;
-
-       if (best_flex && best_flex == parent_fbg_group) {
-               best_flex--;
-               goto find_close_to_parent;
-       }
-
-       for (i = 0; i < n_fbg_groups; i++) {
-               if (i == parent_fbg_group || i == parent_fbg_group - 1)
-                       continue;
-
-               flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
-               flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
-
-               if (flex_freeb_ratio > free_block_ratio &&
-                   (atomic_read(&flex_group[i].free_inodes))) {
-                       best_flex = i;
-                       goto found_flexbg;
-               }
-
-               if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
-                   ((atomic_read(&flex_group[i].free_blocks) >
-                     atomic_read(&flex_group[best_flex].free_blocks)) &&
-                    atomic_read(&flex_group[i].free_inodes)))
-                       best_flex = i;
-       }
-
-       if (!atomic_read(&flex_group[best_flex].free_inodes) ||
-           !atomic_read(&flex_group[best_flex].free_blocks))
-               return -1;
-
-found_flexbg:
-       for (i = best_flex * flex_size; i < ngroups &&
-                    i < (best_flex + 1) * flex_size; i++) {
-               desc = ext4_get_group_desc(sb, i, NULL);
-               if (ext4_free_inodes_count(sb, desc)) {
-                       *best_group = i;
-                       goto out;
-               }
-       }
-
-       return -1;
-out:
-       return 0;
-}
-
 struct orlov_stats {
        __u32 free_inodes;
-       __u32 free_blocks;
+       __u32 free_clusters;
        __u32 used_dirs;
 };
 
@@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
 
        if (flex_size > 1) {
                stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
-               stats->free_blocks = atomic_read(&flex_group[g].free_blocks);
+               stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
                stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
                return;
        }
@@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
        desc = ext4_get_group_desc(sb, g, NULL);
        if (desc) {
                stats->free_inodes = ext4_free_inodes_count(sb, desc);
-               stats->free_blocks = ext4_free_blks_count(sb, desc);
+               stats->free_clusters = ext4_free_group_clusters(sb, desc);
                stats->used_dirs = ext4_used_dirs_count(sb, desc);
        } else {
                stats->free_inodes = 0;
-               stats->free_blocks = 0;
+               stats->free_clusters = 0;
                stats->used_dirs = 0;
        }
 }
@@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
        ext4_group_t real_ngroups = ext4_get_groups_count(sb);
        int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
        unsigned int freei, avefreei;
-       ext4_fsblk_t freeb, avefreeb;
+       ext4_fsblk_t freeb, avefreec;
        unsigned int ndirs;
        int max_dirs, min_inodes;
-       ext4_grpblk_t min_blocks;
+       ext4_grpblk_t min_clusters;
        ext4_group_t i, grp, g, ngroups;
        struct ext4_group_desc *desc;
        struct orlov_stats stats;
@@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 
        freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
        avefreei = freei / ngroups;
-       freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-       avefreeb = freeb;
-       do_div(avefreeb, ngroups);
+       freeb = EXT4_C2B(sbi,
+               percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+       avefreec = freeb;
+       do_div(avefreec, ngroups);
        ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
        if (S_ISDIR(mode) &&
@@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
                                continue;
                        if (stats.free_inodes < avefreei)
                                continue;
-                       if (stats.free_blocks < avefreeb)
+                       if (stats.free_clusters < avefreec)
                                continue;
                        grp = g;
                        ret = 0;
@@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
        min_inodes = avefreei - inodes_per_group*flex_size / 4;
        if (min_inodes < 1)
                min_inodes = 1;
-       min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
+       min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
 
        /*
         * Start looking in the flex group where we last allocated an
@@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
                        continue;
                if (stats.free_inodes < min_inodes)
                        continue;
-               if (stats.free_blocks < min_blocks)
+               if (stats.free_clusters < min_clusters)
                        continue;
                goto found_flex_bg;
        }
@@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
        *group = parent_group;
        desc = ext4_get_group_desc(sb, *group, NULL);
        if (desc && ext4_free_inodes_count(sb, desc) &&
-                       ext4_free_blks_count(sb, desc))
+           ext4_free_group_clusters(sb, desc))
                return 0;
 
        /*
@@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
                        *group -= ngroups;
                desc = ext4_get_group_desc(sb, *group, NULL);
                if (desc && ext4_free_inodes_count(sb, desc) &&
-                               ext4_free_blks_count(sb, desc))
+                   ext4_free_group_clusters(sb, desc))
                        return 0;
        }
 
@@ -802,7 +691,7 @@ err_ret:
  * group to find a free inode.
  */
 struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
-                            const struct qstr *qstr, __u32 goal)
+                            const struct qstr *qstr, __u32 goal, uid_t *owner)
 {
        struct super_block *sb;
        struct buffer_head *inode_bitmap_bh = NULL;
@@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
        int ret2, err = 0;
        struct inode *ret;
        ext4_group_t i;
-       int free = 0;
-       static int once = 1;
        ext4_group_t flex_group;
 
        /* Cannot create files in a deleted directory */
@@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
                goto got_group;
        }
 
-       if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
-               ret2 = find_group_flex(sb, dir, &group);
-               if (ret2 == -1) {
-                       ret2 = find_group_other(sb, dir, &group, mode);
-                       if (ret2 == 0 && once) {
-                               once = 0;
-                               printk(KERN_NOTICE "ext4: find_group_flex "
-                                      "failed, fallback succeeded dir %lu\n",
-                                      dir->i_ino);
-                       }
-               }
-               goto got_group;
-       }
-
-       if (S_ISDIR(mode)) {
-               if (test_opt(sb, OLDALLOC))
-                       ret2 = find_group_dir(sb, dir, &group);
-               else
-                       ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
-       } else
+       if (S_ISDIR(mode))
+               ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
+       else
                ret2 = find_group_other(sb, dir, &group, mode);
 
 got_group:
@@ -950,26 +820,21 @@ got:
                        goto fail;
                }
 
-               free = 0;
-               ext4_lock_group(sb, group);
+               BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
+               err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
+               brelse(block_bitmap_bh);
+
                /* recheck and clear flag under lock if we still need to */
+               ext4_lock_group(sb, group);
                if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-                       free = ext4_free_blocks_after_init(sb, group, gdp);
                        gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-                       ext4_free_blks_set(sb, gdp, free);
+                       ext4_free_group_clusters_set(sb, gdp,
+                               ext4_free_clusters_after_init(sb, group, gdp));
                        gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
                                                                gdp);
                }
                ext4_unlock_group(sb, group);
 
-               /* Don't need to dirty bitmap block if we didn't change it */
-               if (free) {
-                       BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
-                       err = ext4_handle_dirty_metadata(handle,
-                                                       NULL, block_bitmap_bh);
-               }
-
-               brelse(block_bitmap_bh);
                if (err)
                        goto fail;
        }
@@ -987,8 +852,11 @@ got:
                flex_group = ext4_flex_group(sbi, group);
                atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
        }
-
-       if (test_opt(sb, GRPID)) {
+       if (owner) {
+               inode->i_mode = mode;
+               inode->i_uid = owner[0];
+               inode->i_gid = owner[1];
+       } else if (test_opt(sb, GRPID)) {
                inode->i_mode = mode;
                inode->i_uid = current_fsuid();
                inode->i_gid = dir->i_gid;
@@ -1005,11 +873,7 @@ got:
        ei->i_dir_start_lookup = 0;
        ei->i_disksize = 0;
 
-       /*
-        * Don't inherit extent flag from directory, amongst others. We set
-        * extent flag on newly created directory and file only if -o extent
-        * mount option is specified
-        */
+       /* Don't inherit extent flag from directory, amongst others. */
        ei->i_flags =
                ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
        ei->i_file_acl = 0;
@@ -1084,7 +948,7 @@ fail_free_drop:
 fail_drop:
        dquot_drop(inode);
        inode->i_flags |= S_NOQUOTA;
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        unlock_new_inode(inode);
        iput(inode);
        brelse(inode_bitmap_bh);
@@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
  * inode allocation from the current group, so we take alloc_sem lock, to
  * block ext4_claim_inode until we are finished.
  */
-extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
                                 int barrier)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
index 0962642119c0475953fc70da38a5ed9bbea417a2..3cfc73fbca8efee96de0d78ab594794340b9c0e5 100644 (file)
@@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
        /*
         * Okay, we need to do block allocation.
        */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+               EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
+                                "non-extent mapped inodes with bigalloc");
+               return -ENOSPC;
+       }
+
        goal = ext4_find_goal(inode, map->m_lblk, partial);
 
        /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode)
        __le32 nr = 0;
        int n = 0;
        ext4_lblk_t last_block, max_block;
+       loff_t page_len;
        unsigned blocksize = inode->i_sb->s_blocksize;
+       int err;
 
        handle = start_transaction(inode);
        if (IS_ERR(handle))
@@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode)
        max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
                                        >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
-       if (inode->i_size & (blocksize - 1))
-               if (ext4_block_truncate_page(handle, mapping, inode->i_size))
+       if (inode->i_size % PAGE_CACHE_SIZE != 0) {
+               page_len = PAGE_CACHE_SIZE -
+                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+               err = ext4_discard_partial_page_buffers(handle,
+                       mapping, inode->i_size, page_len, 0);
+
+               if (err)
                        goto out_stop;
+       }
 
        if (last_block != max_block) {
                n = ext4_block_to_path(inode, last_block, offsets, NULL);
index 0defe0bfe019a3083f0156f6473b303caf65f93b..cc5a6da030a149f31ea61cd35b9803782a4a57a7 100644 (file)
@@ -42,7 +42,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "ext4_extents.h"
 #include "truncate.h"
 
 #include <trace/events/ext4.h>
@@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
        struct ext4_inode_info *ei = EXT4_I(inode);
 
        spin_lock(&ei->i_block_reservation_lock);
-       trace_ext4_da_update_reserve_space(inode, used);
+       trace_ext4_da_update_reserve_space(inode, used, quota_claim);
        if (unlikely(used > ei->i_reserved_data_blocks)) {
                ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
                         "with only %d reserved data blocks\n",
@@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
        /* Update per-inode reservations */
        ei->i_reserved_data_blocks -= used;
        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+       percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                           used + ei->i_allocated_meta_blocks);
        ei->i_allocated_meta_blocks = 0;
 
@@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
                 * only when we have written all of the delayed
                 * allocation blocks.
                 */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                   ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
@@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
 
        /* Update quota subsystem for data blocks */
        if (quota_claim)
-               dquot_claim_block(inode, used);
+               dquot_claim_block(inode, EXT4_C2B(sbi, used));
        else {
                /*
                 * We did fallocate with an offset that is already delayed
                 * allocated. So on delayed allocated writeback we should
                 * not re-claim the quota for fallocated blocks.
                 */
-               dquot_release_reservation_block(inode, used);
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
        }
 
        /*
@@ -398,6 +397,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
        return num;
 }
 
+/*
+ * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
+ */
+static void set_buffers_da_mapped(struct inode *inode,
+                                  struct ext4_map_blocks *map)
+{
+       struct address_space *mapping = inode->i_mapping;
+       struct pagevec pvec;
+       int i, nr_pages;
+       pgoff_t index, end;
+
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + map->m_len - 1) >>
+               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+       pagevec_init(&pvec, 0);
+       while (index <= end) {
+               nr_pages = pagevec_lookup(&pvec, mapping, index,
+                                         min(end - index + 1,
+                                             (pgoff_t)PAGEVEC_SIZE));
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+                       struct buffer_head *bh, *head;
+
+                       if (unlikely(page->mapping != mapping) ||
+                           !PageDirty(page))
+                               break;
+
+                       if (page_has_buffers(page)) {
+                               bh = head = page_buffers(page);
+                               do {
+                                       set_buffer_da_mapped(bh);
+                                       bh = bh->b_this_page;
+                               } while (bh != head);
+                       }
+                       index++;
+               }
+               pagevec_release(&pvec);
+       }
+}
+
 /*
  * The ext4_map_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
@@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
  * the buffer head is mapped.
  *
  * It returns 0 if plain look up failed (blocks have not been allocated), in
- * that casem, buffer head is unmapped
+ * that case, buffer head is unmapped
  *
  * It returns the error in case of allocation failure.
  */
@@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         */
        down_read((&EXT4_I(inode)->i_data_sem));
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-               retval = ext4_ext_map_blocks(handle, inode, map, 0);
+               retval = ext4_ext_map_blocks(handle, inode, map, flags &
+                                            EXT4_GET_BLOCKS_KEEP_SIZE);
        } else {
-               retval = ext4_ind_map_blocks(handle, inode, map, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags &
+                                            EXT4_GET_BLOCKS_KEEP_SIZE);
        }
        up_read((&EXT4_I(inode)->i_data_sem));
 
@@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * Returns if the blocks have already allocated
         *
         * Note that if blocks have been preallocated
-        * ext4_ext_get_block() returns th create = 0
+        * ext4_ext_get_block() returns the create = 0
         * with buffer head unmapped.
         */
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                        (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
                        ext4_da_update_reserve_space(inode, retval, 1);
        }
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
                ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
+               /* If we have successfully mapped the delayed allocated blocks,
+                * set the BH_Da_Mapped bit on them. Its important to do this
+                * under the protection of i_data_sem.
+                */
+               if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
+                       set_buffers_da_mapped(inode, map);
+       }
+
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                int ret = check_block_validity(inode, map);
@@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file,
                        ext4_orphan_add(handle, inode);
                if (ret2 < 0)
                        ret = ret2;
+       } else {
+               unlock_page(page);
+               page_cache_release(page);
        }
+
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
@@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file,
 }
 
 /*
- * Reserve a single block located at lblock
+ * Reserve a single cluster located at lblock
  */
 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 {
        int retries = 0;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed;
+       unsigned int md_needed;
        int ret;
 
        /*
@@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
         */
 repeat:
        spin_lock(&ei->i_block_reservation_lock);
-       md_needed = ext4_calc_metadata_amount(inode, lblock);
+       md_needed = EXT4_NUM_B2C(sbi,
+                                ext4_calc_metadata_amount(inode, lblock));
        trace_ext4_da_reserve_space(inode, md_needed);
        spin_unlock(&ei->i_block_reservation_lock);
 
@@ -1063,15 +1120,15 @@ repeat:
         * us from metadata over-estimation, though we may go over by
         * a small amount in the end.  Here we just reserve for data.
         */
-       ret = dquot_reserve_block(inode, 1);
+       ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
        if (ret)
                return ret;
        /*
         * We do still charge estimated metadata to the sb though;
         * we cannot afford to run out of free blocks.
         */
-       if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
-               dquot_release_reservation_block(inode, 1);
+       if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
                        goto repeat;
@@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
                 * We can release all of the reserved metadata blocks
                 * only when we have written all of the delayed
                 * allocation blocks.
+                * Note that in case of bigalloc, i_reserved_meta_blocks,
+                * i_reserved_data_blocks, etc. refer to number of clusters.
                 */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                   ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
        }
 
        /* update fs dirty data blocks counter */
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
+       percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
 
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       dquot_release_reservation_block(inode, to_free);
+       dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
 }
 
 static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page,
        int to_release = 0;
        struct buffer_head *head, *bh;
        unsigned int curr_off = 0;
+       struct inode *inode = page->mapping->host;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       int num_clusters;
 
        head = page_buffers(page);
        bh = head;
@@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page,
                if ((offset <= curr_off) && (buffer_delay(bh))) {
                        to_release++;
                        clear_buffer_delay(bh);
+                       clear_buffer_da_mapped(bh);
                }
                curr_off = next_off;
        } while ((bh = bh->b_this_page) != head);
-       ext4_da_release_space(page->mapping->host, to_release);
+
+       /* If we have released all the blocks belonging to a cluster, then we
+        * need to release the reserved space for that cluster. */
+       num_clusters = EXT4_NUM_B2C(sbi, to_release);
+       while (num_clusters > 0) {
+               ext4_fsblk_t lblk;
+               lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+                       ((num_clusters - 1) << sbi->s_cluster_bits);
+               if (sbi->s_cluster_ratio == 1 ||
+                   !ext4_find_delalloc_cluster(inode, lblk, 1))
+                       ext4_da_release_space(inode, 1);
+
+               num_clusters--;
+       }
 }
 
 /*
@@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                                                clear_buffer_delay(bh);
                                                bh->b_blocknr = pblock;
                                        }
+                                       if (buffer_da_mapped(bh))
+                                               clear_buffer_da_mapped(bh);
                                        if (buffer_unwritten(bh) ||
                                            buffer_mapped(bh))
                                                BUG_ON(bh->b_blocknr != pblock);
@@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        printk(KERN_CRIT "Total free blocks count %lld\n",
-              ext4_count_free_blocks(inode->i_sb));
+              EXT4_C2B(EXT4_SB(inode->i_sb),
+                       ext4_count_free_clusters(inode->i_sb)));
        printk(KERN_CRIT "Free/Dirty block details\n");
        printk(KERN_CRIT "free_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_freeblocks_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_freeclusters_counter)));
        printk(KERN_CRIT "dirty_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
        printk(KERN_CRIT "Block reservation details\n");
        printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
               EXT4_I(inode)->i_reserved_data_blocks);
@@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
                if (err == -EAGAIN)
                        goto submit_io;
 
-               if (err == -ENOSPC &&
-                   ext4_count_free_blocks(sb)) {
+               if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
                        mpd->retval = err;
                        goto submit_io;
                }
@@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
 
                for (i = 0; i < map.m_len; i++)
                        unmap_underlying_metadata(bdev, map.m_pblk + i);
-       }
 
-       if (ext4_should_order_data(mpd->inode)) {
-               err = ext4_jbd2_file_inode(handle, mpd->inode);
-               if (err)
-                       /* This only happens if the journal is aborted */
-                       return;
+               if (ext4_should_order_data(mpd->inode)) {
+                       err = ext4_jbd2_file_inode(handle, mpd->inode);
+                       if (err) {
+                               /* Only if the journal is aborted */
+                               mpd->retval = err;
+                               goto submit_io;
+                       }
+               }
        }
 
        /*
@@ -1583,6 +1665,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
        return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
 }
 
+/*
+ * This function is grabs code from the very beginning of
+ * ext4_map_blocks, but assumes that the caller is from delayed write
+ * time. This function looks up the requested blocks and sets the
+ * buffer delay bit under the protection of i_data_sem.
+ */
+static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+                             struct ext4_map_blocks *map,
+                             struct buffer_head *bh)
+{
+       int retval;
+       sector_t invalid_block = ~((sector_t) 0xffff);
+
+       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+               invalid_block = ~0;
+
+       map->m_flags = 0;
+       ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, map->m_len,
+                 (unsigned long) map->m_lblk);
+       /*
+        * Try to see if we can get the block without requesting a new
+        * file system block.
+        */
+       down_read((&EXT4_I(inode)->i_data_sem));
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+       else
+               retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+
+       if (retval == 0) {
+               /*
+                * XXX: __block_prepare_write() unmaps passed block,
+                * is it OK?
+                */
+               /* If the block was allocated from previously allocated cluster,
+                * then we dont need to reserve it again. */
+               if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
+                       retval = ext4_da_reserve_space(inode, iblock);
+                       if (retval)
+                               /* not enough space to reserve */
+                               goto out_unlock;
+               }
+
+               /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
+                * and it should not appear on the bh->b_state.
+                */
+               map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
+
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+       }
+
+out_unlock:
+       up_read((&EXT4_I(inode)->i_data_sem));
+
+       return retval;
+}
+
 /*
  * This is a special get_blocks_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
@@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 {
        struct ext4_map_blocks map;
        int ret = 0;
-       sector_t invalid_block = ~((sector_t) 0xffff);
-
-       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
-               invalid_block = ~0;
 
        BUG_ON(create == 0);
        BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
         * preallocated blocks are unmapped but should treated
         * the same as allocated blocks.
         */
-       ret = ext4_map_blocks(NULL, inode, &map, 0);
-       if (ret < 0)
+       ret = ext4_da_map_blocks(inode, iblock, &map, bh);
+       if (ret <= 0)
                return ret;
-       if (ret == 0) {
-               if (buffer_delay(bh))
-                       return 0; /* Not sure this could or should happen */
-               /*
-                * XXX: __block_write_begin() unmaps passed block, is it OK?
-                */
-               ret = ext4_da_reserve_space(inode, iblock);
-               if (ret)
-                       /* not enough space to reserve */
-                       return ret;
-
-               map_bh(bh, inode->i_sb, invalid_block);
-               set_buffer_new(bh);
-               set_buffer_delay(bh);
-               return 0;
-       }
 
        map_bh(bh, inode->i_sb, map.m_pblk);
        bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@ -2050,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping,
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
        pgoff_t done_index = 0;
        pgoff_t end;
+       struct blk_plug plug;
 
        trace_ext4_da_writepages(inode, wbc);
 
@@ -2128,6 +2251,7 @@ retry:
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag_pages_for_writeback(mapping, index, end);
 
+       blk_start_plug(&plug);
        while (!ret && wbc->nr_to_write > 0) {
 
                /*
@@ -2178,11 +2302,12 @@ retry:
                        ret = 0;
                } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                        /*
-                        * got one extent now try with
-                        * rest of the pages
+                        * Got one extent now try with rest of the pages.
+                        * If mpd.retval is set -EIO, journal is aborted.
+                        * So we don't need to write any more.
                         */
                        pages_written += mpd.pages_written;
-                       ret = 0;
+                       ret = mpd.retval;
                        io_done = 1;
                } else if (wbc->nr_to_write)
                        /*
@@ -2192,6 +2317,7 @@ retry:
                         */
                        break;
        }
+       blk_finish_plug(&plug);
        if (!io_done && !cycled) {
                cycled = 1;
                index = 0;
@@ -2230,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb)
         * Delalloc need an accurate free block accounting. So switch
         * to non delalloc when we are near to error range.
         */
-       free_blocks  = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+       free_blocks  = EXT4_C2B(sbi,
+               percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
        if (2 * free_blocks < 3 * dirty_blocks ||
-               free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+               free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
                /*
                 * free block count is less than 150% of dirty blocks
                 * or free blocks is less than watermark
@@ -2259,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
        pgoff_t index;
        struct inode *inode = mapping->host;
        handle_t *handle;
+       loff_t page_len;
 
        index = pos >> PAGE_CACHE_SHIFT;
 
@@ -2305,6 +2433,13 @@ retry:
                 */
                if (pos + len > inode->i_size)
                        ext4_truncate_failed_write(inode);
+       } else {
+               page_len = pos & (PAGE_CACHE_SIZE - 1);
+               if (page_len > 0) {
+                       ret = ext4_discard_partial_page_buffers_no_lock(handle,
+                               inode, page, pos - page_len, page_len,
+                               EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+               }
        }
 
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2347,6 +2482,7 @@ static int ext4_da_write_end(struct file *file,
        loff_t new_i_size;
        unsigned long start, end;
        int write_mode = (int)(unsigned long)fsdata;
+       loff_t page_len;
 
        if (write_mode == FALL_BACK_TO_NONDELALLOC) {
                if (ext4_should_order_data(inode)) {
@@ -2395,6 +2531,16 @@ static int ext4_da_write_end(struct file *file,
        }
        ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
+
+       page_len = PAGE_CACHE_SIZE -
+                       ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
+
+       if (page_len > 0) {
+               ret = ext4_discard_partial_page_buffers_no_lock(handle,
+                       inode, page, pos + copied - 1, page_len,
+                       EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+       }
+
        copied = ret2;
        if (ret2 < 0)
                ret = ret2;
@@ -2689,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
         * but being more careful is always safe for the future change.
         */
        inode = io_end->inode;
-       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-               io_end->flag |= EXT4_IO_END_UNWRITTEN;
-               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-       }
+       ext4_set_io_unwritten_flag(inode, io_end);
 
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -2858,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
 
+       /*
+        * If we are doing data journalling we don't support O_DIRECT
+        */
+       if (ext4_should_journal_data(inode))
+               return 0;
+
        trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -2927,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = {
        .bmap                   = ext4_bmap,
        .invalidatepage         = ext4_invalidatepage,
        .releasepage            = ext4_releasepage,
+       .direct_IO              = ext4_direct_IO,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
 };
@@ -2963,6 +3113,227 @@ void ext4_set_aops(struct inode *inode)
                inode->i_mapping->a_ops = &ext4_journalled_aops;
 }
 
+
+/*
+ * ext4_discard_partial_page_buffers()
+ * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
+ * This function finds and locks the page containing the offset
+ * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
+ * Calling functions that already have the page locked should call
+ * ext4_discard_partial_page_buffers_no_lock directly.
+ */
+int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags)
+{
+       struct inode *inode = mapping->host;
+       struct page *page;
+       int err = 0;
+
+       page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+                                  mapping_gfp_mask(mapping) & ~__GFP_FS);
+       if (!page)
+               return -ENOMEM;
+
+       err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
+               from, length, flags);
+
+       unlock_page(page);
+       page_cache_release(page);
+       return err;
+}
+
+/*
+ * ext4_discard_partial_page_buffers_no_lock()
+ * Zeros a page range of length 'length' starting from offset 'from'.
+ * Buffer heads that correspond to the block aligned regions of the
+ * zeroed range will be unmapped.  Unblock aligned regions
+ * will have the corresponding buffer head mapped if needed so that
+ * that region of the page can be updated with the partial zero out.
+ *
+ * This function assumes that the page has already been  locked.  The
+ * The range to be discarded must be contained with in the given page.
+ * If the specified range exceeds the end of the page it will be shortened
+ * to the end of the page that corresponds to 'from'.  This function is
+ * appropriate for updating a page and it buffer heads to be unmapped and
+ * zeroed for blocks that have been either released, or are going to be
+ * released.
+ *
+ * handle: The journal handle
+ * inode:  The files inode
+ * page:   A locked page that contains the offset "from"
+ * from:   The starting byte offset (from the begining of the file)
+ *         to begin discarding
+ * len:    The length of bytes to discard
+ * flags:  Optional flags that may be used:
+ *
+ *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
+ *         Only zero the regions of the page whose buffer heads
+ *         have already been unmapped.  This flag is appropriate
+ *         for updateing the contents of a page whose blocks may
+ *         have already been released, and we only want to zero
+ *         out the regions that correspond to those released blocks.
+ *
+ * Returns zero on sucess or negative on failure.
+ */
+int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags)
+{
+       ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+       unsigned int offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned int blocksize, max, pos;
+       ext4_lblk_t iblock;
+       struct buffer_head *bh;
+       int err = 0;
+
+       blocksize = inode->i_sb->s_blocksize;
+       max = PAGE_CACHE_SIZE - offset;
+
+       if (index != page->index)
+               return -EINVAL;
+
+       /*
+        * correct length if it does not fall between
+        * 'from' and the end of the page
+        */
+       if (length > max || length < 0)
+               length = max;
+
+       iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+       if (!page_has_buffers(page)) {
+               /*
+                * If the range to be discarded covers a partial block
+                * we need to get the page buffers.  This is because
+                * partial blocks cannot be released and the page needs
+                * to be updated with the contents of the block before
+                * we write the zeros on top of it.
+                */
+               if ((from & (blocksize - 1)) ||
+                   ((from + length) & (blocksize - 1))) {
+                       create_empty_buffers(page, blocksize, 0);
+               } else {
+                       /*
+                        * If there are no partial blocks,
+                        * there is nothing to update,
+                        * so we can return now
+                        */
+                       return 0;
+               }
+       }
+
+       /* Find the buffer that contains "offset" */
+       bh = page_buffers(page);
+       pos = blocksize;
+       while (offset >= pos) {
+               bh = bh->b_this_page;
+               iblock++;
+               pos += blocksize;
+       }
+
+       pos = offset;
+       while (pos < offset + length) {
+               unsigned int end_of_block, range_to_discard;
+
+               err = 0;
+
+               /* The length of space left to zero and unmap */
+               range_to_discard = offset + length - pos;
+
+               /* The length of space until the end of the block */
+               end_of_block = blocksize - (pos & (blocksize-1));
+
+               /*
+                * Do not unmap or zero past end of block
+                * for this buffer head
+                */
+               if (range_to_discard > end_of_block)
+                       range_to_discard = end_of_block;
+
+
+               /*
+                * Skip this buffer head if we are only zeroing unampped
+                * regions of the page
+                */
+               if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
+                       buffer_mapped(bh))
+                               goto next;
+
+               /* If the range is block aligned, unmap */
+               if (range_to_discard == blocksize) {
+                       clear_buffer_dirty(bh);
+                       bh->b_bdev = NULL;
+                       clear_buffer_mapped(bh);
+                       clear_buffer_req(bh);
+                       clear_buffer_new(bh);
+                       clear_buffer_delay(bh);
+                       clear_buffer_unwritten(bh);
+                       clear_buffer_uptodate(bh);
+                       zero_user(page, pos, range_to_discard);
+                       BUFFER_TRACE(bh, "Buffer discarded");
+                       goto next;
+               }
+
+               /*
+                * If this block is not completely contained in the range
+                * to be discarded, then it is not going to be released. Because
+                * we need to keep this block, we need to make sure this part
+                * of the page is uptodate before we modify it by writeing
+                * partial zeros on it.
+                */
+               if (!buffer_mapped(bh)) {
+                       /*
+                        * Buffer head must be mapped before we can read
+                        * from the block
+                        */
+                       BUFFER_TRACE(bh, "unmapped");
+                       ext4_get_block(inode, iblock, bh, 0);
+                       /* unmapped? It's a hole - nothing to do */
+                       if (!buffer_mapped(bh)) {
+                               BUFFER_TRACE(bh, "still unmapped");
+                               goto next;
+                       }
+               }
+
+               /* Ok, it's mapped. Make sure it's up-to-date */
+               if (PageUptodate(page))
+                       set_buffer_uptodate(bh);
+
+               if (!buffer_uptodate(bh)) {
+                       err = -EIO;
+                       ll_rw_block(READ, 1, &bh);
+                       wait_on_buffer(bh);
+                       /* Uhhuh. Read error. Complain and punt.*/
+                       if (!buffer_uptodate(bh))
+                               goto next;
+               }
+
+               if (ext4_should_journal_data(inode)) {
+                       BUFFER_TRACE(bh, "get write access");
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (err)
+                               goto next;
+               }
+
+               zero_user(page, pos, range_to_discard);
+
+               err = 0;
+               if (ext4_should_journal_data(inode)) {
+                       err = ext4_handle_dirty_metadata(handle, inode, bh);
+               } else
+                       mark_buffer_dirty(bh);
+
+               BUFFER_TRACE(bh, "Partial buffer zeroed");
+next:
+               bh = bh->b_this_page;
+               iblock++;
+               pos += range_to_discard;
+       }
+
+       return err;
+}
+
 /*
  * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
  * up to the end of the block which corresponds to `from'.
@@ -3005,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle,
        page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
                                   mapping_gfp_mask(mapping) & ~__GFP_FS);
        if (!page)
-               return -EINVAL;
+               return -ENOMEM;
 
        blocksize = inode->i_sb->s_blocksize;
        max = blocksize - (offset & (blocksize - 1));
@@ -3074,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle,
        err = 0;
        if (ext4_should_journal_data(inode)) {
                err = ext4_handle_dirty_metadata(handle, inode, bh);
-       } else {
-               if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
-                       err = ext4_jbd2_file_inode(handle, inode);
+       } else
                mark_buffer_dirty(bh);
-       }
 
 unlock:
        unlock_page(page);
@@ -3119,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
                return -ENOTSUPP;
        }
 
+       if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
+               /* TODO: Add support for bigalloc file systems */
+               return -ENOTSUPP;
+       }
+
        return ext4_ext_punch_hole(file, offset, length);
 }
 
@@ -3418,7 +3791,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
-       inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+       set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
        ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
        ei->i_dir_start_lookup = 0;
@@ -4420,6 +4793,7 @@ retry_alloc:
                          PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
                        unlock_page(page);
                        ret = VM_FAULT_SIGBUS;
+                       ext4_journal_stop(handle);
                        goto out;
                }
                ext4_set_inode_state(inode, EXT4_STATE_JDATA);
index f18bfe37aff845cedd3dd3acf6c9d0e2225926ae..a56796814d6ab1a564af494e23b57871f2121692 100644 (file)
@@ -21,6 +21,7 @@
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = filp->f_dentry->d_inode;
+       struct super_block *sb = inode->i_sb;
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int flags;
 
@@ -173,33 +174,8 @@ setversion_out:
                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
-#ifdef CONFIG_JBD2_DEBUG
-       case EXT4_IOC_WAIT_FOR_READONLY:
-               /*
-                * This is racy - by the time we're woken up and running,
-                * the superblock could be released.  And the module could
-                * have been unloaded.  So sue me.
-                *
-                * Returns 1 if it slept, else zero.
-                */
-               {
-                       struct super_block *sb = inode->i_sb;
-                       DECLARE_WAITQUEUE(wait, current);
-                       int ret = 0;
-
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
-                       if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
-                               schedule();
-                               ret = 1;
-                       }
-                       remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
-                       return ret;
-               }
-#endif
        case EXT4_IOC_GROUP_EXTEND: {
                ext4_fsblk_t n_blocks_count;
-               struct super_block *sb = inode->i_sb;
                int err, err2=0;
 
                err = ext4_resize_begin(sb);
@@ -209,6 +185,13 @@ setversion_out:
                if (get_user(n_blocks_count, (__u32 __user *)arg))
                        return -EFAULT;
 
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online resizing not supported with bigalloc");
+                       return -EOPNOTSUPP;
+               }
+
                err = mnt_want_write(filp->f_path.mnt);
                if (err)
                        return err;
@@ -250,6 +233,13 @@ setversion_out:
                        goto mext_out;
                }
 
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online defrag not supported with bigalloc");
+                       return -EOPNOTSUPP;
+               }
+
                err = mnt_want_write(filp->f_path.mnt);
                if (err)
                        goto mext_out;
@@ -270,7 +260,6 @@ mext_out:
 
        case EXT4_IOC_GROUP_ADD: {
                struct ext4_new_group_data input;
-               struct super_block *sb = inode->i_sb;
                int err, err2=0;
 
                err = ext4_resize_begin(sb);
@@ -281,6 +270,13 @@ mext_out:
                                sizeof(input)))
                        return -EFAULT;
 
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online resizing not supported with bigalloc");
+                       return -EOPNOTSUPP;
+               }
+
                err = mnt_want_write(filp->f_path.mnt);
                if (err)
                        return err;
@@ -337,7 +333,6 @@ mext_out:
 
        case FITRIM:
        {
-               struct super_block *sb = inode->i_sb;
                struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
@@ -348,7 +343,14 @@ mext_out:
                if (!blk_queue_discard(q))
                        return -EOPNOTSUPP;
 
-               if (copy_from_user(&range, (struct fstrim_range *)arg,
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "FITRIM not supported with bigalloc");
+                       return -EOPNOTSUPP;
+               }
+
+               if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                    sizeof(range)))
                        return -EFAULT;
 
@@ -358,7 +360,7 @@ mext_out:
                if (ret < 0)
                        return ret;
 
-               if (copy_to_user((struct fstrim_range *)arg, &range,
+               if (copy_to_user((struct fstrim_range __user *)arg, &range,
                    sizeof(range)))
                        return -EFAULT;
 
@@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case EXT4_IOC32_SETVERSION_OLD:
                cmd = EXT4_IOC_SETVERSION_OLD;
                break;
-#ifdef CONFIG_JBD2_DEBUG
-       case EXT4_IOC32_WAIT_FOR_READONLY:
-               cmd = EXT4_IOC_WAIT_FOR_READONLY;
-               break;
-#endif
        case EXT4_IOC32_GETRSVSZ:
                cmd = EXT4_IOC_GETRSVSZ;
                break;
index 17a5a57c415a2dcdd36104c08473438c238f38ee..e2d8be8f28bfb8555644bef5100b2f9b8c2cbe9a 100644 (file)
@@ -70,8 +70,8 @@
  *
  * pa_lstart -> the logical start block for this prealloc space
  * pa_pstart -> the physical start block for this prealloc space
- * pa_len    -> length for this prealloc space
- * pa_free   ->  free space available in this prealloc space
+ * pa_len    -> length for this prealloc space (in clusters)
+ * pa_free   ->  free space available in this prealloc space (in clusters)
  *
  * The inode preallocation space is used looking at the _logical_ start
  * block. If only the logical file block falls within the range of prealloc
  * list. In case of inode preallocation we follow a list of heuristics
  * based on file size. This can be found in ext4_mb_normalize_request. If
  * we are doing a group prealloc we try to normalize the request to
- * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
+ * sbi->s_mb_group_prealloc.  The default value of s_mb_group_prealloc is
+ * dependent on the cluster size; for non-bigalloc file systems, it is
  * 512 blocks. This can be tuned via
  * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
  * terms of number of blocks. If we have mounted the file system with -O
@@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
                        ext4_fsblk_t blocknr;
 
                        blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
-                       blocknr += first + i;
+                       blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
                        ext4_grp_locked_error(sb, e4b->bd_group,
                                              inode ? inode->i_ino : 0,
                                              blocknr,
@@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
                                continue;
                        }
 
-                       /* both bits in buddy2 must be 0 */
+                       /* both bits in buddy2 must be 1 */
                        MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
                        MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
 
@@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
        ext4_grpblk_t chunk;
        unsigned short border;
 
-       BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
+       BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
 
        border = 2 << sb->s_blocksize_bits;
 
@@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
                                void *buddy, void *bitmap, ext4_group_t group)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-       ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
+       ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
        ext4_grpblk_t i = 0;
        ext4_grpblk_t first;
        ext4_grpblk_t len;
@@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 
        if (free != grp->bb_free) {
                ext4_grp_locked_error(sb, group, 0, 0,
-                                     "%u blocks in bitmap, %u in gd",
+                                     "%u clusters in bitmap, %u in gd",
                                      free, grp->bb_free);
                /*
                 * If we intent to continue, we consider group descritor
@@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        ext4_fsblk_t blocknr;
 
                        blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
-                       blocknr += block;
+                       blocknr += EXT4_C2B(EXT4_SB(sb), block);
                        ext4_grp_locked_error(sb, e4b->bd_group,
                                              inode ? inode->i_ino : 0,
                                              blocknr,
@@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
 {
        int next = block;
        int max;
-       int ord;
        void *buddy;
 
        assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
                if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
                        break;
 
-               ord = mb_find_order_for_block(e4b, next);
+               order = mb_find_order_for_block(e4b, next);
 
-               order = ord;
                block = next >> order;
                ex->fe_len += 1 << order;
        }
@@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
        struct ext4_free_extent *gex = &ac->ac_g_ex;
 
        BUG_ON(ex->fe_len <= 0);
-       BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
-       BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+       BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+       BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
        BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
 
        ac->ac_found++;
@@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 
        while (free && ac->ac_status == AC_STATUS_CONTINUE) {
                i = mb_find_next_zero_bit(bitmap,
-                                               EXT4_BLOCKS_PER_GROUP(sb), i);
-               if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
+                                               EXT4_CLUSTERS_PER_GROUP(sb), i);
+               if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
                        /*
                         * IF we have corrupt bitmap, we won't find any
                         * free blocks even though group info says we
                         * we have free blocks
                         */
                        ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
-                                       "%d free blocks as per "
+                                       "%d free clusters as per "
                                        "group info. But bitmap says 0",
                                        free);
                        break;
@@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                BUG_ON(ex.fe_len <= 0);
                if (free < ex.fe_len) {
                        ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
-                                       "%d free blocks as per "
+                                       "%d free clusters as per "
                                        "group info. But got %d blocks",
                                        free, ex.fe_len);
                        /*
@@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
        do_div(a, sbi->s_stripe);
        i = (a * sbi->s_stripe) - first_group_block;
 
-       while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
+       while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
                if (!mb_test_bit(i, bitmap)) {
                        max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
                        if (max >= sbi->s_stripe) {
@@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
         */
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                meta_group_info[i]->bb_free =
-                       ext4_free_blocks_after_init(sb, group, desc);
+                       ext4_free_clusters_after_init(sb, group, desc);
        } else {
                meta_group_info[i]->bb_free =
-                       ext4_free_blks_count(sb, desc);
+                       ext4_free_group_clusters(sb, desc);
        }
 
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        sbi->s_mb_stats = MB_DEFAULT_STATS;
        sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
        sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
-       sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
+       /*
+        * The default group preallocation is 512, which for 4k block
+        * sizes translates to 2 megabytes.  However for bigalloc file
+        * systems, this is probably too big (i.e, if the cluster size
+        * is 1 megabyte, then group preallocation size becomes half a
+        * gigabyte!).  As a default, we will keep a two megabyte
+        * group pralloc size for cluster sizes up to 64k, and after
+        * that, we will force a minimum group preallocation size of
+        * 32 clusters.  This translates to 8 megs when the cluster
+        * size is 256k, and 32 megs when the cluster size is 1 meg,
+        * which seems reasonable as a default.
+        */
+       sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
+                                      sbi->s_cluster_bits, 32);
        /*
         * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
         * to the lowest multiple of s_stripe which is bigger than
@@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
        if (sbi->s_locality_groups == NULL) {
                ret = -ENOMEM;
-               goto out;
+               goto out_free_groupinfo_slab;
        }
        for_each_possible_cpu(i) {
                struct ext4_locality_group *lg;
@@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
        /* init file for buddy data */
        ret = ext4_mb_init_backend(sb);
-       if (ret != 0) {
-               goto out;
-       }
+       if (ret != 0)
+               goto out_free_locality_groups;
 
        if (sbi->s_proc)
                proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
        if (sbi->s_journal)
                sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+
+       return 0;
+
+out_free_locality_groups:
+       free_percpu(sbi->s_locality_groups);
+       sbi->s_locality_groups = NULL;
+out_free_groupinfo_slab:
+       ext4_groupinfo_destroy_slabs();
 out:
-       if (ret) {
-               kfree(sbi->s_mb_offsets);
-               kfree(sbi->s_mb_maxs);
-       }
+       kfree(sbi->s_mb_offsets);
+       sbi->s_mb_offsets = NULL;
+       kfree(sbi->s_mb_maxs);
+       sbi->s_mb_maxs = NULL;
        return ret;
 }
 
@@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb)
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-               ext4_group_t block_group, ext4_grpblk_t block, int count)
+               ext4_group_t block_group, ext4_grpblk_t cluster, int count)
 {
        ext4_fsblk_t discard_block;
 
-       discard_block = block + ext4_group_first_block_no(sb, block_group);
+       discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
+                        ext4_group_first_block_no(sb, block_group));
+       count = EXT4_C2B(EXT4_SB(sb), count);
        trace_ext4_discard_blocks(sb,
                        (unsigned long long) discard_block, count);
        return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 
                if (test_opt(sb, DISCARD))
                        ext4_issue_discard(sb, entry->group,
-                                          entry->start_blk, entry->count);
+                                          entry->start_cluster, entry->count);
 
                err = ext4_mb_load_buddy(sb, entry->group, &e4b);
                /* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                ext4_lock_group(sb, entry->group);
                /* Take it out of per group rb tree */
                rb_erase(&entry->node, &(db->bb_free_root));
-               mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+               mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
 
                /*
                 * Clear the trimmed flag for the group so that the next
@@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void)
  */
 static noinline_for_stack int
 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
-                               handle_t *handle, unsigned int reserv_blks)
+                               handle_t *handle, unsigned int reserv_clstrs)
 {
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_group_desc *gdp;
@@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                goto out_err;
 
        ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
-                       ext4_free_blks_count(sb, gdp));
+                       ext4_free_group_clusters(sb, gdp));
 
        err = ext4_journal_get_write_access(handle, gdp_bh);
        if (err)
@@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
        block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
 
-       len = ac->ac_b_ex.fe_len;
+       len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
        if (!ext4_data_block_valid(sbi, block, len)) {
                ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
                           "fs metadata\n", block, block+len);
@@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                      ac->ac_b_ex.fe_len);
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-               ext4_free_blks_set(sb, gdp,
-                                       ext4_free_blocks_after_init(sb,
-                                       ac->ac_b_ex.fe_group, gdp));
+               ext4_free_group_clusters_set(sb, gdp,
+                                            ext4_free_clusters_after_init(sb,
+                                               ac->ac_b_ex.fe_group, gdp));
        }
-       len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
-       ext4_free_blks_set(sb, gdp, len);
+       len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
+       ext4_free_group_clusters_set(sb, gdp, len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 
        ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-       percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+       percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
        /*
         * Now reduce the dirty block count also. Should not go negative
         */
        if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
                /* release all the reserved blocks if non delalloc */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+                                  reserv_clstrs);
 
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi,
                                                          ac->ac_b_ex.fe_group);
                atomic_sub(ac->ac_b_ex.fe_len,
-                          &sbi->s_flex_groups[flex_group].free_blocks);
+                          &sbi->s_flex_groups[flex_group].free_clusters);
        }
 
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -2886,6 +2908,7 @@ static noinline_for_stack void
 ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
+       struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        int bsbits, max;
        ext4_lblk_t end;
        loff_t size, orig_size, start_off;
@@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
        /* first, let's learn actual file size
         * given current request is allocated */
-       size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
+       size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
        size = size << bsbits;
        if (size < i_size_read(ac->ac_inode))
                size = i_size_read(ac->ac_inode);
@@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                        continue;
                }
 
-               pa_end = pa->pa_lstart + pa->pa_len;
+               pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
+                                                 pa->pa_len);
 
                /* PA must not overlap original request */
                BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        rcu_read_lock();
        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
                ext4_lblk_t pa_end;
+
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0) {
-                       pa_end = pa->pa_lstart + pa->pa_len;
+                       pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
+                                                         pa->pa_len);
                        BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
                }
                spin_unlock(&pa->pa_lock);
@@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        }
        BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
                        start > ac->ac_o_ex.fe_logical);
-       BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+       BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
 
        /* now prepare goal request */
 
        /* XXX: is it better to align blocks WRT to logical
         * placement or satisfy big request as is */
        ac->ac_g_ex.fe_logical = start;
-       ac->ac_g_ex.fe_len = size;
+       ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
 
        /* define goal start in order to merge */
        if (ar->pright && (ar->lright == (start + size))) {
@@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
                                struct ext4_prealloc_space *pa)
 {
+       struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        ext4_fsblk_t start;
        ext4_fsblk_t end;
        int len;
 
        /* found preallocated blocks, use them */
        start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
-       end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
-       len = end - start;
+       end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
+                 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
+       len = EXT4_NUM_B2C(sbi, end - start);
        ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
                                        &ac->ac_b_ex.fe_start);
        ac->ac_b_ex.fe_len = len;
@@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
        ac->ac_pa = pa;
 
        BUG_ON(start < pa->pa_pstart);
-       BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
+       BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
        BUG_ON(pa->pa_free < len);
        pa->pa_free -= len;
 
@@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
 static noinline_for_stack int
 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
+       struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        int order, i;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
@@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                /* all fields in this condition don't change,
                 * so we can skip locking for them */
                if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
-                       ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
+                   ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
+                                              EXT4_C2B(sbi, pa->pa_len)))
                        continue;
 
                /* non-extent files can't have physical blocks past 2^32 */
                if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
-                       pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
+                   (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
+                    EXT4_MAX_BLOCK_FILE_PHYS))
                        continue;
 
                /* found preallocated blocks, use them */
@@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
        while (n) {
                entry = rb_entry(n, struct ext4_free_data, node);
-               ext4_set_bits(bitmap, entry->start_blk, entry->count);
+               ext4_set_bits(bitmap, entry->start_cluster, entry->count);
                n = rb_next(n);
        }
        return;
@@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
        ext4_group_t groupnr;
        ext4_grpblk_t start;
        int preallocated = 0;
-       int count = 0;
        int len;
 
        /* all form of preallocation discards first load group,
@@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                BUG_ON(groupnr != group);
                ext4_set_bits(bitmap, start, len);
                preallocated += len;
-               count++;
        }
        mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
 }
@@ -3412,6 +3441,7 @@ static noinline_for_stack int
 ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_prealloc_space *pa;
        struct ext4_group_info *grp;
        struct ext4_inode_info *ei;
@@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
                winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
 
                /* also, we should cover whole original request */
-               wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
+               wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
 
                /* the smallest one defines real window */
                win = min(winl, wins);
 
-               offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
+               offs = ac->ac_o_ex.fe_logical %
+                       EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
                if (offs && offs < win)
                        win = offs;
 
-               ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
+               ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
+                       EXT4_B2C(sbi, win);
                BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
                BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
        }
@@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        trace_ext4_mb_new_inode_pa(ac, pa);
 
        ext4_mb_use_inode_pa(ac, pa);
-       atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
+       atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
 
        ei = EXT4_I(ac->ac_inode);
        grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 
        BUG_ON(pa->pa_deleted == 0);
        ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
-       grp_blk_start = pa->pa_pstart - bit;
+       grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
 
@@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                free += next - bit;
 
                trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
-               trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
+               trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
+                                                   EXT4_C2B(sbi, bit)),
                                               next - bit);
                mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                bit = next + 1;
@@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        }
 
        if (needed == 0)
-               needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
+               needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
 
        INIT_LIST_HEAD(&list);
 repeat:
@@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
        if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
                return;
 
-       size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
+       size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
        isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
                >> bsbits;
 
@@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
                return;
        }
 
+       if (sbi->s_mb_group_prealloc <= 0) {
+               ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+               return;
+       }
+
        /* don't use group allocation for large files */
        size = max(size, isize);
        if (size > sbi->s_mb_stream_request) {
@@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
        len = ar->len;
 
        /* just a dirty hack to filter too big requests  */
-       if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
-               len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
+       if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
+               len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
 
        /* start searching from the goal */
        goal = ar->goal;
@@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 
        /* set up allocation goals */
        memset(ac, 0, sizeof(struct ext4_allocation_context));
-       ac->ac_b_ex.fe_logical = ar->logical;
+       ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
        ac->ac_status = AC_STATUS_CONTINUE;
        ac->ac_sb = sb;
        ac->ac_inode = ar->inode;
-       ac->ac_o_ex.fe_logical = ar->logical;
+       ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
        ac->ac_o_ex.fe_group = group;
        ac->ac_o_ex.fe_start = block;
        ac->ac_o_ex.fe_len = len;
-       ac->ac_g_ex.fe_logical = ar->logical;
-       ac->ac_g_ex.fe_group = group;
-       ac->ac_g_ex.fe_start = block;
-       ac->ac_g_ex.fe_len = len;
+       ac->ac_g_ex = ac->ac_o_ex;
        ac->ac_flags = ar->flags;
 
        /* we have to define context: we'll we work with a file or
@@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
  */
 static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 {
+       struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        struct ext4_prealloc_space *pa = ac->ac_pa;
        if (pa) {
                if (pa->pa_type == MB_GROUP_PA) {
                        /* see comment in ext4_mb_use_group_pa() */
                        spin_lock(&pa->pa_lock);
-                       pa->pa_pstart += ac->ac_b_ex.fe_len;
-                       pa->pa_lstart += ac->ac_b_ex.fe_len;
+                       pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+                       pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
                        pa->pa_free -= ac->ac_b_ex.fe_len;
                        pa->pa_len -= ac->ac_b_ex.fe_len;
                        spin_unlock(&pa->pa_lock);
@@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        struct super_block *sb;
        ext4_fsblk_t block = 0;
        unsigned int inquota = 0;
-       unsigned int reserv_blks = 0;
+       unsigned int reserv_clstrs = 0;
 
        sb = ar->inode->i_sb;
        sbi = EXT4_SB(sb);
 
        trace_ext4_request_blocks(ar);
 
+       /* Allow to use superuser reservation for quota file */
+       if (IS_NOQUOTA(ar->inode))
+               ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
+
        /*
         * For delayed allocation, we could skip the ENOSPC and
         * EDQUOT check, as blocks and quotas have been already
@@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                 * and verify allocation doesn't exceed the quota limits.
                 */
                while (ar->len &&
-                       ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
+                       ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
 
                        /* let others to free the space */
                        yield();
@@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                        *errp = -ENOSPC;
                        return 0;
                }
-               reserv_blks = ar->len;
+               reserv_clstrs = ar->len;
                if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
-                       dquot_alloc_block_nofail(ar->inode, ar->len);
+                       dquot_alloc_block_nofail(ar->inode,
+                                                EXT4_C2B(sbi, ar->len));
                } else {
                        while (ar->len &&
-                               dquot_alloc_block(ar->inode, ar->len)) {
+                               dquot_alloc_block(ar->inode,
+                                                 EXT4_C2B(sbi, ar->len))) {
 
                                ar->flags |= EXT4_MB_HINT_NOPREALLOC;
                                ar->len--;
@@ -4328,7 +4370,7 @@ repeat:
                        ext4_mb_new_preallocation(ac);
        }
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-               *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
+               *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
                if (*errp == -EAGAIN) {
                        /*
                         * drop the reference that we took
@@ -4364,13 +4406,13 @@ out:
        if (ac)
                kmem_cache_free(ext4_ac_cachep, ac);
        if (inquota && ar->len < inquota)
-               dquot_free_block(ar->inode, inquota - ar->len);
+               dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
        if (!ar->len) {
                if (!ext4_test_inode_state(ar->inode,
                                           EXT4_STATE_DELALLOC_RESERVED))
                        /* release all the reserved blocks if non delalloc */
-                       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
-                                               reserv_blks);
+                       percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+                                               reserv_clstrs);
        }
 
        trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1,
 {
        if ((entry1->t_tid == entry2->t_tid) &&
            (entry1->group == entry2->group) &&
-           ((entry1->start_blk + entry1->count) == entry2->start_blk))
+           ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
                return 1;
        return 0;
 }
@@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                      struct ext4_free_data *new_entry)
 {
        ext4_group_t group = e4b->bd_group;
-       ext4_grpblk_t block;
+       ext4_grpblk_t cluster;
        struct ext4_free_data *entry;
        struct ext4_group_info *db = e4b->bd_info;
        struct super_block *sb = e4b->bd_sb;
@@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
        BUG_ON(e4b->bd_buddy_page == NULL);
 
        new_node = &new_entry->node;
-       block = new_entry->start_blk;
+       cluster = new_entry->start_cluster;
 
        if (!*n) {
                /* first free block exent. We need to
@@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
        while (*n) {
                parent = *n;
                entry = rb_entry(parent, struct ext4_free_data, node);
-               if (block < entry->start_blk)
+               if (cluster < entry->start_cluster)
                        n = &(*n)->rb_left;
-               else if (block >= (entry->start_blk + entry->count))
+               else if (cluster >= (entry->start_cluster + entry->count))
                        n = &(*n)->rb_right;
                else {
                        ext4_grp_locked_error(sb, group, 0,
-                               ext4_group_first_block_no(sb, group) + block,
+                               ext4_group_first_block_no(sb, group) +
+                               EXT4_C2B(sbi, cluster),
                                "Block already on to-be-freed list");
                        return 0;
                }
@@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
        if (node) {
                entry = rb_entry(node, struct ext4_free_data, node);
                if (can_merge(entry, new_entry)) {
-                       new_entry->start_blk = entry->start_blk;
+                       new_entry->start_cluster = entry->start_cluster;
                        new_entry->count += entry->count;
                        rb_erase(node, &(db->bb_free_root));
                        spin_lock(&sbi->s_md_lock);
@@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
        ext4_group_t block_group;
        struct ext4_sb_info *sbi;
        struct ext4_buddy e4b;
+       unsigned int count_clusters;
        int err = 0;
        int ret;
 
@@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
        if (!ext4_should_writeback_data(inode))
                flags |= EXT4_FREE_BLOCKS_METADATA;
 
+       /*
+        * If the extent to be freed does not begin on a cluster
+        * boundary, we need to deal with partial clusters at the
+        * beginning and end of the extent.  Normally we will free
+        * blocks at the beginning or the end unless we are explicitly
+        * requested to avoid doing so.
+        */
+       overflow = block & (sbi->s_cluster_ratio - 1);
+       if (overflow) {
+               if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
+                       overflow = sbi->s_cluster_ratio - overflow;
+                       block += overflow;
+                       if (count > overflow)
+                               count -= overflow;
+                       else
+                               return;
+               } else {
+                       block -= overflow;
+                       count += overflow;
+               }
+       }
+       overflow = count & (sbi->s_cluster_ratio - 1);
+       if (overflow) {
+               if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
+                       if (count > overflow)
+                               count -= overflow;
+                       else
+                               return;
+               } else
+                       count += sbi->s_cluster_ratio - overflow;
+       }
+
 do_more:
        overflow = 0;
        ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4552,10 +4628,12 @@ do_more:
         * Check to see if we are freeing blocks across a group
         * boundary.
         */
-       if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
-               overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
+       if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+               overflow = EXT4_C2B(sbi, bit) + count -
+                       EXT4_BLOCKS_PER_GROUP(sb);
                count -= overflow;
        }
+       count_clusters = EXT4_B2C(sbi, count);
        bitmap_bh = ext4_read_block_bitmap(sb, block_group);
        if (!bitmap_bh) {
                err = -EIO;
@@ -4570,9 +4648,9 @@ do_more:
        if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
            in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
            in_range(block, ext4_inode_table(sb, gdp),
-                     EXT4_SB(sb)->s_itb_per_group) ||
+                    EXT4_SB(sb)->s_itb_per_group) ||
            in_range(block + count - 1, ext4_inode_table(sb, gdp),
-                     EXT4_SB(sb)->s_itb_per_group)) {
+                    EXT4_SB(sb)->s_itb_per_group)) {
 
                ext4_error(sb, "Freeing blocks in system zone - "
                           "Block = %llu, count = %lu", block, count);
@@ -4597,11 +4675,11 @@ do_more:
 #ifdef AGGRESSIVE_CHECK
        {
                int i;
-               for (i = 0; i < count; i++)
+               for (i = 0; i < count_clusters; i++)
                        BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
        }
 #endif
-       trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
+       trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
 
        err = ext4_mb_load_buddy(sb, block_group, &e4b);
        if (err)
@@ -4618,13 +4696,13 @@ do_more:
                        err = -ENOMEM;
                        goto error_return;
                }
-               new_entry->start_blk = bit;
+               new_entry->start_cluster = bit;
                new_entry->group  = block_group;
-               new_entry->count = count;
+               new_entry->count = count_clusters;
                new_entry->t_tid = handle->h_transaction->t_tid;
 
                ext4_lock_group(sb, block_group);
-               mb_clear_bits(bitmap_bh->b_data, bit, count);
+               mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
                ext4_mb_free_metadata(handle, &e4b, new_entry);
        } else {
                /* need to update group_info->bb_free and bitmap
@@ -4632,25 +4710,29 @@ do_more:
                 * them with group lock_held
                 */
                ext4_lock_group(sb, block_group);
-               mb_clear_bits(bitmap_bh->b_data, bit, count);
-               mb_free_blocks(inode, &e4b, bit, count);
+               mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
+               mb_free_blocks(inode, &e4b, bit, count_clusters);
        }
 
-       ret = ext4_free_blks_count(sb, gdp) + count;
-       ext4_free_blks_set(sb, gdp, ret);
+       ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
+       ext4_free_group_clusters_set(sb, gdp, ret);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
        ext4_unlock_group(sb, block_group);
-       percpu_counter_add(&sbi->s_freeblocks_counter, count);
+       percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
 
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-               atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
+               atomic_add(count_clusters,
+                          &sbi->s_flex_groups[flex_group].free_clusters);
        }
 
        ext4_mb_unload_buddy(&e4b);
 
        freed += count;
 
+       if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+               dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
+
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4669,8 +4751,6 @@ do_more:
        }
        ext4_mark_super_dirty(sb);
 error_return:
-       if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
-               dquot_free_block(inode, freed);
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
        return;
@@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
        ext4_lock_group(sb, block_group);
        mb_clear_bits(bitmap_bh->b_data, bit, count);
        mb_free_blocks(NULL, &e4b, bit, count);
-       blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
-       ext4_free_blks_set(sb, desc, blk_free_count);
+       blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
+       ext4_free_group_clusters_set(sb, desc, blk_free_count);
        desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
        ext4_unlock_group(sb, block_group);
-       percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
+       percpu_counter_add(&sbi->s_freeclusters_counter,
+                          EXT4_B2C(sbi, blocks_freed));
 
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-               atomic_add(blocks_freed,
-                          &sbi->s_flex_groups[flex_group].free_blocks);
+               atomic_add(EXT4_B2C(sbi, blocks_freed),
+                          &sbi->s_flex_groups[flex_group].free_clusters);
        }
 
        ext4_mb_unload_buddy(&e4b);
@@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
        struct ext4_group_info *grp;
        ext4_group_t first_group, last_group;
        ext4_group_t group, ngroups = ext4_get_groups_count(sb);
-       ext4_grpblk_t cnt = 0, first_block, last_block;
+       ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
        uint64_t start, len, minlen, trimmed = 0;
        ext4_fsblk_t first_data_blk =
                        le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
        len = range->len >> sb->s_blocksize_bits;
        minlen = range->minlen >> sb->s_blocksize_bits;
 
-       if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+       if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
                return -EINVAL;
        if (start + len <= first_data_blk)
                goto out;
@@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
        /* Determine first and last group to examine based on start and len */
        ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
-                                    &first_group, &first_block);
+                                    &first_group, &first_cluster);
        ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
-                                    &last_group, &last_block);
+                                    &last_group, &last_cluster);
        last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
-       last_block = EXT4_BLOCKS_PER_GROUP(sb);
+       last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
 
        if (first_group > last_group)
                return -EINVAL;
@@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
                 * change it for the last group in which case start +
                 * len < EXT4_BLOCKS_PER_GROUP(sb).
                 */
-               if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
-                       last_block = first_block + len;
-               len -= last_block - first_block;
+               if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
+                       last_cluster = first_cluster + len;
+               len -= last_cluster - first_cluster;
 
                if (grp->bb_free >= minlen) {
-                       cnt = ext4_trim_all_free(sb, group, first_block,
-                                               last_block, minlen);
+                       cnt = ext4_trim_all_free(sb, group, first_cluster,
+                                               last_cluster, minlen);
                        if (cnt < 0) {
                                ret = cnt;
                                break;
                        }
                }
                trimmed += cnt;
-               first_block = 0;
+               first_cluster = 0;
        }
        range->len = trimmed * sb->s_blocksize;
 
index 9d4a636b546c529ac993b18e1fe6abb3e99ab9f5..47705f3285e3c145f171dcd2add8c6d311d8298e 100644 (file)
@@ -106,7 +106,7 @@ struct ext4_free_data {
        ext4_group_t group;
 
        /* free block extent */
-       ext4_grpblk_t start_blk;
+       ext4_grpblk_t start_cluster;
        ext4_grpblk_t count;
 
        /* transaction which freed this extent */
@@ -139,9 +139,9 @@ enum {
 
 struct ext4_free_extent {
        ext4_lblk_t fe_logical;
-       ext4_grpblk_t fe_start;
+       ext4_grpblk_t fe_start; /* In cluster units */
        ext4_group_t fe_group;
-       ext4_grpblk_t fe_len;
+       ext4_grpblk_t fe_len;   /* In cluster units */
 };
 
 /*
@@ -175,7 +175,7 @@ struct ext4_allocation_context {
        /* the best found extent */
        struct ext4_free_extent ac_b_ex;
 
-       /* copy of the bext found extent taken before preallocation efforts */
+       /* copy of the best found extent taken before preallocation efforts */
        struct ext4_free_extent ac_f_ex;
 
        /* number of iterations done. we have to track to limit searching */
@@ -216,6 +216,7 @@ struct ext4_buddy {
 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
                                        struct ext4_free_extent *fex)
 {
-       return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
+       return ext4_group_first_block_no(sb, fex->fe_group) +
+               (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
 }
 #endif
index b57b98fb44d1457ec9f98e60290d6da3a90fb6c8..16ac228dbec64e424e6057fe122006f3b45310e9 100644 (file)
 #include <linux/module.h>
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
-#include "ext4_extents.h"
 
 /*
  * The contiguous blocks details which can be
  * represented by a single extent
  */
-struct list_blocks_struct {
-       ext4_lblk_t first_block, last_block;
+struct migrate_struct {
+       ext4_lblk_t first_block, last_block, curr_block;
        ext4_fsblk_t first_pblock, last_pblock;
 };
 
 static int finish_range(handle_t *handle, struct inode *inode,
-                               struct list_blocks_struct *lb)
+                               struct migrate_struct *lb)
 
 {
        int retval = 0, needed;
@@ -87,8 +86,7 @@ err_out:
 }
 
 static int update_extent_range(handle_t *handle, struct inode *inode,
-                               ext4_fsblk_t pblock, ext4_lblk_t blk_num,
-                               struct list_blocks_struct *lb)
+                              ext4_fsblk_t pblock, struct migrate_struct *lb)
 {
        int retval;
        /*
@@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
         */
        if (lb->first_pblock &&
                (lb->last_pblock+1 == pblock) &&
-               (lb->last_block+1 == blk_num)) {
+               (lb->last_block+1 == lb->curr_block)) {
                lb->last_pblock = pblock;
-               lb->last_block = blk_num;
+               lb->last_block = lb->curr_block;
+               lb->curr_block++;
                return 0;
        }
        /*
@@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
         */
        retval = finish_range(handle, inode, lb);
        lb->first_pblock = lb->last_pblock = pblock;
-       lb->first_block = lb->last_block = blk_num;
-
+       lb->first_block = lb->last_block = lb->curr_block;
+       lb->curr_block++;
        return retval;
 }
 
 static int update_ind_extent_range(handle_t *handle, struct inode *inode,
-                                  ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-                                  struct list_blocks_struct *lb)
+                                  ext4_fsblk_t pblock,
+                                  struct migrate_struct *lb)
 {
        struct buffer_head *bh;
        __le32 *i_data;
        int i, retval = 0;
-       ext4_lblk_t blk_count = *blk_nump;
        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-       if (!pblock) {
-               /* Only update the file block number */
-               *blk_nump += max_entries;
-               return 0;
-       }
-
        bh = sb_bread(inode->i_sb, pblock);
        if (!bh)
                return -EIO;
 
        i_data = (__le32 *)bh->b_data;
-       for (i = 0; i < max_entries; i++, blk_count++) {
+       for (i = 0; i < max_entries; i++) {
                if (i_data[i]) {
                        retval = update_extent_range(handle, inode,
-                                               le32_to_cpu(i_data[i]),
-                                               blk_count, lb);
+                                               le32_to_cpu(i_data[i]), lb);
                        if (retval)
                                break;
+               } else {
+                       lb->curr_block++;
                }
        }
-
-       /* Update the file block number */
-       *blk_nump = blk_count;
        put_bh(bh);
        return retval;
 
 }
 
 static int update_dind_extent_range(handle_t *handle, struct inode *inode,
-                                   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-                                   struct list_blocks_struct *lb)
+                                   ext4_fsblk_t pblock,
+                                   struct migrate_struct *lb)
 {
        struct buffer_head *bh;
        __le32 *i_data;
        int i, retval = 0;
-       ext4_lblk_t blk_count = *blk_nump;
        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-       if (!pblock) {
-               /* Only update the file block number */
-               *blk_nump += max_entries * max_entries;
-               return 0;
-       }
        bh = sb_bread(inode->i_sb, pblock);
        if (!bh)
                return -EIO;
@@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
        for (i = 0; i < max_entries; i++) {
                if (i_data[i]) {
                        retval = update_ind_extent_range(handle, inode,
-                                               le32_to_cpu(i_data[i]),
-                                               &blk_count, lb);
+                                               le32_to_cpu(i_data[i]), lb);
                        if (retval)
                                break;
                } else {
                        /* Only update the file block number */
-                       blk_count += max_entries;
+                       lb->curr_block += max_entries;
                }
        }
-
-       /* Update the file block number */
-       *blk_nump = blk_count;
        put_bh(bh);
        return retval;
 
 }
 
 static int update_tind_extent_range(handle_t *handle, struct inode *inode,
-                                    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-                                    struct list_blocks_struct *lb)
+                                   ext4_fsblk_t pblock,
+                                   struct migrate_struct *lb)
 {
        struct buffer_head *bh;
        __le32 *i_data;
        int i, retval = 0;
-       ext4_lblk_t blk_count = *blk_nump;
        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-       if (!pblock) {
-               /* Only update the file block number */
-               *blk_nump += max_entries * max_entries * max_entries;
-               return 0;
-       }
        bh = sb_bread(inode->i_sb, pblock);
        if (!bh)
                return -EIO;
@@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
        for (i = 0; i < max_entries; i++) {
                if (i_data[i]) {
                        retval = update_dind_extent_range(handle, inode,
-                                               le32_to_cpu(i_data[i]),
-                                               &blk_count, lb);
+                                               le32_to_cpu(i_data[i]), lb);
                        if (retval)
                                break;
-               } else
+               } else {
                        /* Only update the file block number */
-                       blk_count += max_entries * max_entries;
+                       lb->curr_block += max_entries * max_entries;
+               }
        }
-       /* Update the file block number */
-       *blk_nump = blk_count;
        put_bh(bh);
        return retval;
 
@@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode)
        handle_t *handle;
        int retval = 0, i;
        __le32 *i_data;
-       ext4_lblk_t blk_count = 0;
        struct ext4_inode_info *ei;
        struct inode *tmp_inode = NULL;
-       struct list_blocks_struct lb;
+       struct migrate_struct lb;
        unsigned long max_entries;
        __u32 goal;
+       uid_t owner[2];
 
        /*
         * If the filesystem does not support extents, or the inode
@@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode)
        }
        goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
+       owner[0] = inode->i_uid;
+       owner[1] = inode->i_gid;
        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-                                  S_IFREG, NULL, goal);
+                                  S_IFREG, NULL, goal, owner);
        if (IS_ERR(tmp_inode)) {
-               retval = -ENOMEM;
+               retval = PTR_ERR(inode);
                ext4_journal_stop(handle);
                return retval;
        }
@@ -507,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
         * Set the i_nlink to zero so it will be deleted later
         * when we drop inode reference.
         */
-       tmp_inode->i_nlink = 0;
+       clear_nlink(tmp_inode);
 
        ext4_ext_tree_init(handle, tmp_inode);
        ext4_orphan_add(handle, tmp_inode);
@@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode)
 
        /* 32 bit block address 4 bytes */
        max_entries = inode->i_sb->s_blocksize >> 2;
-       for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
+       for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
                if (i_data[i]) {
                        retval = update_extent_range(handle, tmp_inode,
-                                               le32_to_cpu(i_data[i]),
-                                               blk_count, &lb);
+                                               le32_to_cpu(i_data[i]), &lb);
                        if (retval)
                                goto err_out;
-               }
+               } else
+                       lb.curr_block++;
        }
        if (i_data[EXT4_IND_BLOCK]) {
                retval = update_ind_extent_range(handle, tmp_inode,
-                                       le32_to_cpu(i_data[EXT4_IND_BLOCK]),
-                                       &blk_count, &lb);
+                               le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
                        if (retval)
                                goto err_out;
        } else
-               blk_count +=  max_entries;
+               lb.curr_block += max_entries;
        if (i_data[EXT4_DIND_BLOCK]) {
                retval = update_dind_extent_range(handle, tmp_inode,
-                                       le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
-                                       &blk_count, &lb);
+                               le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
                        if (retval)
                                goto err_out;
        } else
-               blk_count += max_entries * max_entries;
+               lb.curr_block += max_entries * max_entries;
        if (i_data[EXT4_TIND_BLOCK]) {
                retval = update_tind_extent_range(handle, tmp_inode,
-                                       le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
-                                       &blk_count, &lb);
+                               le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
                        if (retval)
                                goto err_out;
        }
index 9bdef3f537c516999f5e84eece7335dfde42ebd6..7ea4ba4eff2ac4b9dbcae63a54ee68bb5e1a7190 100644 (file)
@@ -109,7 +109,7 @@ static int kmmpd(void *data)
        mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
        bdevname(bh->b_bdev, mmp->mmp_bdevname);
 
-       memcpy(mmp->mmp_nodename, init_utsname()->sysname,
+       memcpy(mmp->mmp_nodename, init_utsname()->nodename,
               sizeof(mmp->mmp_nodename));
 
        while (!kthread_should_stop()) {
@@ -125,8 +125,9 @@ static int kmmpd(void *data)
                 * Don't spew too many error messages. Print one every
                 * (s_mmp_update_interval * 60) seconds.
                 */
-               if (retval && (failed_writes % 60) == 0) {
-                       ext4_error(sb, "Error writing to MMP block");
+               if (retval) {
+                       if ((failed_writes % 60) == 0)
+                               ext4_error(sb, "Error writing to MMP block");
                        failed_writes++;
                }
 
@@ -295,7 +296,8 @@ skip:
        /*
         * write a new random sequence number.
         */
-       mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
+       seq = mmp_new_seq();
+       mmp->mmp_seq = cpu_to_le32(seq);
 
        retval = write_mmp_block(bh);
        if (retval)
index f57455a1b1b281bdf21e12f63bc46087abe58194..c5826c623e7af8e19aa0baf5829ce5e25fcc1f71 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
-#include "ext4_extents.h"
 #include "ext4.h"
 
 /**
index 1c924faeb6c8ed90441051f4b368b4856928d24a..aa4c782c9dd7844f74cc6b769ab762eaebef9dc2 100644 (file)
@@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        dxtrace(dx_show_index("node", frames[1].entries));
                        dxtrace(dx_show_index("node",
                               ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_handle_dirty_metadata(handle, inode, bh2);
+                       err = ext4_handle_dirty_metadata(handle, dir, bh2);
                        if (err)
                                goto journal_error;
                        brelse (bh2);
@@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        if (err)
                                goto journal_error;
                }
-               err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+               err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
                if (err) {
                        ext4_std_error(inode->i_sb, err);
                        goto cleanup;
@@ -1694,7 +1694,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
        if (is_dx(inode) && inode->i_nlink > 1) {
                /* limit is 16-bit i_links_count */
                if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
-                       inode->i_nlink = 1;
+                       set_nlink(inode, 1);
                        EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
                                              EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
                }
@@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
  */
 static void ext4_dec_count(handle_t *handle, struct inode *inode)
 {
-       drop_nlink(inode);
-       if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
-               inc_nlink(inode);
+       if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+               drop_nlink(inode);
 }
 
 
@@ -1756,7 +1755,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext4_file_inode_operations;
@@ -1792,7 +1791,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
@@ -1832,7 +1831,7 @@ retry:
                ext4_handle_sync(handle);
 
        inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-                              &dentry->d_name, 0);
+                              &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -1861,9 +1860,9 @@ retry:
        de->name_len = 2;
        strcpy(de->name, "..");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+       err = ext4_handle_dirty_metadata(handle, inode, dir_block);
        if (err)
                goto out_clear_inode;
        err = ext4_mark_inode_dirty(handle, inode);
@@ -2214,7 +2213,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
                ext4_warning(inode->i_sb,
                             "Deleting nonexistent file (%lu), %d",
                             inode->i_ino, inode->i_nlink);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
        retval = ext4_delete_entry(handle, dir, de, bh);
        if (retval)
@@ -2279,7 +2278,7 @@ retry:
                ext4_handle_sync(handle);
 
        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-                              &dentry->d_name, 0);
+                              &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-               retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+               retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
                if (retval) {
                        ext4_std_error(old_dir->i_sb, retval);
                        goto end_rename;
@@ -2539,7 +2538,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                if (new_inode) {
                        /* checked empty_dir above, can't have another parent,
                         * ext4_dec_count() won't work for many-linked dirs */
-                       new_inode->i_nlink = 0;
+                       clear_nlink(new_inode);
                } else {
                        ext4_inc_count(handle, new_dir);
                        ext4_update_dx_flag(new_dir);
index 92f38ee13f8a9efacadcfac2fe8d674d5500aebc..7ce1d0b19c94576892d0d80f016f7db44110ccb7 100644 (file)
@@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page)
 void ext4_free_io_end(ext4_io_end_t *io)
 {
        int i;
-       wait_queue_head_t *wq;
 
        BUG_ON(!io);
        if (io->page)
@@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io)
        for (i = 0; i < io->num_io_pages; i++)
                put_io_page(io->pages[i]);
        io->num_io_pages = 0;
-       wq = ext4_ioend_wq(io->inode);
-       if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
-           waitqueue_active(wq))
-               wake_up_all(wq);
+       if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
+               wake_up_all(ext4_ioend_wq(io->inode));
        kmem_cache_free(io_end_cachep, io);
 }
 
 /*
  * check a range of space and convert unwritten extents to written.
+ *
+ * Called with inode->i_mutex; we depend on this when we manipulate
+ * io->flag, since we could otherwise race with ext4_flush_completed_IO()
  */
 int ext4_end_io_nolock(ext4_io_end_t *io)
 {
        struct inode *inode = io->inode;
        loff_t offset = io->offset;
        ssize_t size = io->size;
-       wait_queue_head_t *wq;
        int ret = 0;
 
        ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
                   "list->prev 0x%p\n",
                   io, inode->i_ino, io->list.next, io->list.prev);
 
-       if (list_empty(&io->list))
-               return ret;
-
-       if (!(io->flag & EXT4_IO_END_UNWRITTEN))
-               return ret;
-
        ret = ext4_convert_unwritten_extents(inode, offset, size);
        if (ret < 0) {
-               printk(KERN_EMERG "%s: failed to convert unwritten "
-                       "extents to written extents, error is %d "
-                       "io is still on inode %lu aio dio list\n",
-                      __func__, ret, inode->i_ino);
-               return ret;
+               ext4_msg(inode->i_sb, KERN_EMERG,
+                        "failed to convert unwritten extents to written "
+                        "extents -- potential data loss!  "
+                        "(inode %lu, offset %llu, size %zd, error %d)",
+                        inode->i_ino, offset, size, ret);
        }
 
        if (io->iocb)
                aio_complete(io->iocb, io->result, 0);
-       /* clear the DIO AIO unwritten flag */
-       if (io->flag & EXT4_IO_END_UNWRITTEN) {
-               io->flag &= ~EXT4_IO_END_UNWRITTEN;
-               /* Wake up anyone waiting on unwritten extent conversion */
-               wq = ext4_ioend_wq(io->inode);
-               if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
-                   waitqueue_active(wq)) {
-                       wake_up_all(wq);
-               }
-       }
 
+       /* Wake up anyone waiting on unwritten extent conversion */
+       if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
+               wake_up_all(ext4_ioend_wq(io->inode));
        return ret;
 }
 
@@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work)
        struct inode            *inode = io->inode;
        struct ext4_inode_info  *ei = EXT4_I(inode);
        unsigned long           flags;
-       int                     ret;
+
+       spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+       if (list_empty(&io->list)) {
+               spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+               goto free;
+       }
 
        if (!mutex_trylock(&inode->i_mutex)) {
+               spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
                /*
                 * Requeue the work instead of waiting so that the work
                 * items queued after this can be processed.
@@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work)
                io->flag |= EXT4_IO_END_QUEUED;
                return;
        }
-       ret = ext4_end_io_nolock(io);
-       if (ret < 0) {
-               mutex_unlock(&inode->i_mutex);
-               return;
-       }
-
-       spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-       if (!list_empty(&io->list))
-               list_del_init(&io->list);
+       list_del_init(&io->list);
        spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+       (void) ext4_end_io_nolock(io);
        mutex_unlock(&inode->i_mutex);
+free:
        ext4_free_io_end(io);
 }
 
@@ -350,10 +336,8 @@ submit_and_retry:
        if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
            (io_end->pages[io_end->num_io_pages-1] != io_page))
                goto submit_and_retry;
-       if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-               io_end->flag |= EXT4_IO_END_UNWRITTEN;
-               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-       }
+       if (buffer_uninit(bh))
+               ext4_set_io_unwritten_flag(inode, io_end);
        io->io_end->size += bh->b_size;
        io->io_next_block++;
        ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
index 707d3f16f7ce63732222e78c247a8a87e668b433..996780ab4f4e83cdfc114e83ab8cad35242f4813 100644 (file)
@@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
        ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
        ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
-       ext4_free_blks_set(sb, gdp, input->free_blocks_count);
+       ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
        ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
        gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
@@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
                input->reserved_blocks);
 
        /* Update the free space counts */
-       percpu_counter_add(&sbi->s_freeblocks_counter,
-                          input->free_blocks_count);
+       percpu_counter_add(&sbi->s_freeclusters_counter,
+                          EXT4_B2C(sbi, input->free_blocks_count));
        percpu_counter_add(&sbi->s_freeinodes_counter,
                           EXT4_INODES_PER_GROUP(sb));
 
@@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
            sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group;
                flex_group = ext4_flex_group(sbi, input->group);
-               atomic_add(input->free_blocks_count,
-                          &sbi->s_flex_groups[flex_group].free_blocks);
+               atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
+                          &sbi->s_flex_groups[flex_group].free_clusters);
                atomic_add(EXT4_INODES_PER_GROUP(sb),
                           &sbi->s_flex_groups[flex_group].free_inodes);
        }
index 44d0c8db2239f958d08d4de4462cdc6ca936d1e1..9953d80145ad0f6331086053a7d80301c418f3a7 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/freezer.h>
 
 #include "ext4.h"
+#include "ext4_extents.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
-__u32 ext4_free_blks_count(struct super_block *sb,
-                             struct ext4_group_desc *bg)
+__u32 ext4_free_group_clusters(struct super_block *sb,
+                              struct ext4_group_desc *bg)
 {
        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
@@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb,
                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
 
-void ext4_free_blks_set(struct super_block *sb,
-                         struct ext4_group_desc *bg, __u32 count)
+void ext4_free_group_clusters_set(struct super_block *sb,
+                                 struct ext4_group_desc *bg, __u32 count)
 {
        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
@@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func,
        ext4_commit_super(sb, 1);
 }
 
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else.  Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+static int block_device_ejected(struct super_block *sb)
+{
+       struct inode *bd_inode = sb->s_bdev->bd_inode;
+       struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
+
+       return bdi->dev == NULL;
+}
+
 
 /* Deal with the reporting of failure conditions on a filesystem such as
  * inconsistencies detected or read IO failures.
@@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb)
                brelse(sbi->s_group_desc[i]);
        ext4_kvfree(sbi->s_group_desc);
        ext4_kvfree(sbi->s_flex_groups);
-       percpu_counter_destroy(&sbi->s_freeblocks_counter);
+       percpu_counter_destroy(&sbi->s_freeclusters_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+       percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
        brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
@@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_puts(seq, ",nouid32");
        if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
                seq_puts(seq, ",debug");
-       if (test_opt(sb, OLDALLOC))
-               seq_puts(seq, ",oldalloc");
 #ifdef CONFIG_EXT4_FS_XATTR
        if (test_opt(sb, XATTR_USER))
                seq_puts(seq, ",user_xattr");
@@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb,
                        set_opt(sb, DEBUG);
                        break;
                case Opt_oldalloc:
-                       set_opt(sb, OLDALLOC);
+                       ext4_msg(sb, KERN_WARNING,
+                                "Ignoring deprecated oldalloc option");
                        break;
                case Opt_orlov:
-                       clear_opt(sb, OLDALLOC);
+                       ext4_msg(sb, KERN_WARNING,
+                                "Ignoring deprecated orlov option");
                        break;
 #ifdef CONFIG_EXT4_FS_XATTR
                case Opt_user_xattr:
@@ -1801,6 +1818,7 @@ set_qf_format:
                        break;
                case Opt_nodelalloc:
                        clear_opt(sb, DELALLOC);
+                       clear_opt2(sb, EXPLICIT_DELALLOC);
                        break;
                case Opt_mblk_io_submit:
                        set_opt(sb, MBLK_IO_SUBMIT);
@@ -1817,6 +1835,7 @@ set_qf_format:
                        break;
                case Opt_delalloc:
                        set_opt(sb, DELALLOC);
+                       set_opt2(sb, EXPLICIT_DELALLOC);
                        break;
                case Opt_block_validity:
                        set_opt(sb, BLOCK_VALIDITY);
@@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                res = MS_RDONLY;
        }
        if (read_only)
-               return res;
+               goto done;
        if (!(sbi->s_mount_state & EXT4_VALID_FS))
                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
                         "running e2fsck is recommended");
@@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
        ext4_commit_super(sb, 1);
+done:
        if (test_opt(sb, DEBUG))
                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
@@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
                flex_group = ext4_flex_group(sbi, i);
                atomic_add(ext4_free_inodes_count(sb, gdp),
                           &sbi->s_flex_groups[flex_group].free_inodes);
-               atomic_add(ext4_free_blks_count(sb, gdp),
-                          &sbi->s_flex_groups[flex_group].free_blocks);
+               atomic_add(ext4_free_group_clusters(sb, gdp),
+                          &sbi->s_flex_groups[flex_group].free_clusters);
                atomic_add(ext4_used_dirs_count(sb, gdp),
                           &sbi->s_flex_groups[flex_group].used_dirs);
        }
@@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb,
        if (NULL != first_not_zeroed)
                *first_not_zeroed = grp;
 
-       ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
+       ext4_free_blocks_count_set(sbi->s_es,
+                                  EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
        sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
        return 1;
 }
@@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
                                              char *buf)
 {
        return snprintf(buf, PAGE_SIZE, "%llu\n",
-                       (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+               (s64) EXT4_C2B(sbi,
+                       percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
 }
 
 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
@@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
                        return 0;
                }
        }
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
+           !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+               ext4_msg(sb, KERN_ERR,
+                        "Can't support bigalloc feature without "
+                        "extents feature\n");
+               return 0;
+       }
        return 1;
 }
 
@@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        char *cp;
        const char *descr;
        int ret = -ENOMEM;
-       int blocksize;
+       int blocksize, clustersize;
        unsigned int db_count;
        unsigned int i;
-       int needs_recovery, has_huge_files;
+       int needs_recovery, has_huge_files, has_bigalloc;
        __u64 blocks_count;
        int err;
        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                           &journal_ioprio, NULL, 0))
                goto failed_mount;
 
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+               printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
+                           "with data=journal disables delayed "
+                           "allocation and O_DIRECT support!\n");
+               if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "both data=journal and delalloc");
+                       goto failed_mount;
+               }
+               if (test_opt(sb, DIOREAD_NOLOCK)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "both data=journal and delalloc");
+                       goto failed_mount;
+               }
+               if (test_opt(sb, DELALLOC))
+                       clear_opt(sb, DELALLOC);
+       }
+
+       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+       if (test_opt(sb, DIOREAD_NOLOCK)) {
+               if (blocksize < PAGE_SIZE) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "dioread_nolock if block size != PAGE_SIZE");
+                       goto failed_mount;
+               }
+       }
+
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
@@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
                goto failed_mount;
 
-       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
-
        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
            blocksize > EXT4_MAX_BLOCK_SIZE) {
                ext4_msg(sb, KERN_ERR,
@@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_dirt = 1;
        }
 
-       if (sbi->s_blocks_per_group > blocksize * 8) {
-               ext4_msg(sb, KERN_ERR,
-                      "#blocks per group too big: %lu",
-                      sbi->s_blocks_per_group);
-               goto failed_mount;
+       /* Handle clustersize */
+       clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
+       has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                               EXT4_FEATURE_RO_COMPAT_BIGALLOC);
+       if (has_bigalloc) {
+               if (clustersize < blocksize) {
+                       ext4_msg(sb, KERN_ERR,
+                                "cluster size (%d) smaller than "
+                                "block size (%d)", clustersize, blocksize);
+                       goto failed_mount;
+               }
+               sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
+                       le32_to_cpu(es->s_log_block_size);
+               sbi->s_clusters_per_group =
+                       le32_to_cpu(es->s_clusters_per_group);
+               if (sbi->s_clusters_per_group > blocksize * 8) {
+                       ext4_msg(sb, KERN_ERR,
+                                "#clusters per group too big: %lu",
+                                sbi->s_clusters_per_group);
+                       goto failed_mount;
+               }
+               if (sbi->s_blocks_per_group !=
+                   (sbi->s_clusters_per_group * (clustersize / blocksize))) {
+                       ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
+                                "clusters per group (%lu) inconsistent",
+                                sbi->s_blocks_per_group,
+                                sbi->s_clusters_per_group);
+                       goto failed_mount;
+               }
+       } else {
+               if (clustersize != blocksize) {
+                       ext4_warning(sb, "fragment/cluster size (%d) != "
+                                    "block size (%d)", clustersize,
+                                    blocksize);
+                       clustersize = blocksize;
+               }
+               if (sbi->s_blocks_per_group > blocksize * 8) {
+                       ext4_msg(sb, KERN_ERR,
+                                "#blocks per group too big: %lu",
+                                sbi->s_blocks_per_group);
+                       goto failed_mount;
+               }
+               sbi->s_clusters_per_group = sbi->s_blocks_per_group;
+               sbi->s_cluster_bits = 0;
        }
+       sbi->s_cluster_ratio = clustersize / blocksize;
+
        if (sbi->s_inodes_per_group > blocksize * 8) {
                ext4_msg(sb, KERN_ERR,
                       "#inodes per group too big: %lu",
@@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount;
        }
 
-#ifdef CONFIG_PROC_FS
        if (ext4_proc_root)
                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-#endif
 
        bgl_lock_init(sbi->s_blockgroup_lock);
 
@@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_err_report.function = print_daily_error_info;
        sbi->s_err_report.data = (unsigned long) sb;
 
-       err = percpu_counter_init(&sbi->s_freeblocks_counter,
-                       ext4_count_free_blocks(sb));
+       err = percpu_counter_init(&sbi->s_freeclusters_counter,
+                       ext4_count_free_clusters(sb));
        if (!err) {
                err = percpu_counter_init(&sbi->s_freeinodes_counter,
                                ext4_count_free_inodes(sb));
@@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                ext4_count_dirs(sb));
        }
        if (!err) {
-               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+               err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
        }
        if (err) {
                ext4_msg(sb, KERN_ERR, "insufficient memory");
@@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         * The journal may have updated the bg summary counts, so we
         * need to update the global counters.
         */
-       percpu_counter_set(&sbi->s_freeblocks_counter,
-                          ext4_count_free_blocks(sb));
+       percpu_counter_set(&sbi->s_freeclusters_counter,
+                          ext4_count_free_clusters(sb));
        percpu_counter_set(&sbi->s_freeinodes_counter,
                           ext4_count_free_inodes(sb));
        percpu_counter_set(&sbi->s_dirs_counter,
                           ext4_count_dirs(sb));
-       percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
+       percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
 
 no_journal:
        /*
@@ -3679,25 +3772,6 @@ no_journal:
                         "available");
        }
 
-       if (test_opt(sb, DELALLOC) &&
-           (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
-               ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
-                        "requested data journaling mode");
-               clear_opt(sb, DELALLOC);
-       }
-       if (test_opt(sb, DIOREAD_NOLOCK)) {
-               if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-                       ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
-                               "option - requested data journaling mode");
-                       clear_opt(sb, DIOREAD_NOLOCK);
-               }
-               if (sb->s_blocksize < PAGE_SIZE) {
-                       ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
-                               "option - block size is too small");
-                       clear_opt(sb, DIOREAD_NOLOCK);
-               }
-       }
-
        err = ext4_setup_system_zone(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize system "
@@ -3710,22 +3784,19 @@ no_journal:
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
                         err);
-               goto failed_mount4;
+               goto failed_mount5;
        }
 
        err = ext4_register_li_request(sb, first_not_zeroed);
        if (err)
-               goto failed_mount4;
+               goto failed_mount6;
 
        sbi->s_kobj.kset = ext4_kset;
        init_completion(&sbi->s_kobj_unregister);
        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
                                   "%s", sb->s_id);
-       if (err) {
-               ext4_mb_release(sb);
-               ext4_ext_release(sb);
-               goto failed_mount4;
-       };
+       if (err)
+               goto failed_mount7;
 
        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
        ext4_orphan_cleanup(sb, es);
@@ -3759,13 +3830,19 @@ cantfind_ext4:
                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
        goto failed_mount;
 
+failed_mount7:
+       ext4_unregister_li_request(sb);
+failed_mount6:
+       ext4_ext_release(sb);
+failed_mount5:
+       ext4_mb_release(sb);
+       ext4_release_system_zone(sb);
 failed_mount4:
        iput(root);
        sb->s_root = NULL;
        ext4_msg(sb, KERN_ERR, "mount failed");
        destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
 failed_mount_wq:
-       ext4_release_system_zone(sb);
        if (sbi->s_journal) {
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
@@ -3774,10 +3851,10 @@ failed_mount3:
        del_timer(&sbi->s_err_report);
        if (sbi->s_flex_groups)
                ext4_kvfree(sbi->s_flex_groups);
-       percpu_counter_destroy(&sbi->s_freeblocks_counter);
+       percpu_counter_destroy(&sbi->s_freeclusters_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+       percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
 failed_mount2:
@@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
        int error = 0;
 
-       if (!sbh)
+       if (!sbh || block_device_ejected(sb))
                return error;
        if (buffer_write_io_error(sbh)) {
                /*
@@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        else
                es->s_kbytes_written =
                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-       ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-                                          &EXT4_SB(sb)->s_freeblocks_counter));
+       ext4_free_blocks_count_set(es,
+                       EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
+                               &EXT4_SB(sb)->s_freeclusters_counter)));
        es->s_free_inodes_count =
                cpu_to_le32(percpu_counter_sum_positive(
                                &EXT4_SB(sb)->s_freeinodes_counter));
@@ -4506,16 +4584,34 @@ restore_opts:
        return err;
 }
 
+/*
+ * Note: calculating the overhead so we can be compatible with
+ * historical BSD practice is quite difficult in the face of
+ * clusters/bigalloc.  This is because multiple metadata blocks from
+ * different block group can end up in the same allocation cluster.
+ * Calculating the exact overhead in the face of clustered allocation
+ * requires either O(all block bitmaps) in memory or O(number of block
+ * groups**2) in time.  We will still calculate the superblock for
+ * older file systems --- and if we come across with a bigalloc file
+ * system with zero in s_overhead_clusters the estimate will be close to
+ * correct especially for very large cluster sizes --- but for newer
+ * file systems, it's better to calculate this figure once at mkfs
+ * time, and store it in the superblock.  If the superblock value is
+ * present (even for non-bigalloc file systems), we will use it.
+ */
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
+       struct ext4_group_desc *gdp;
        u64 fsid;
        s64 bfree;
 
        if (test_opt(sb, MINIX_DF)) {
                sbi->s_overhead_last = 0;
+       } else if (es->s_overhead_clusters) {
+               sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
        } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
                ext4_group_t i, ngroups = ext4_get_groups_count(sb);
                ext4_fsblk_t overhead = 0;
@@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
                 * All of the blocks before first_data_block are
                 * overhead
                 */
-               overhead = le32_to_cpu(es->s_first_data_block);
+               overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
 
                /*
-                * Add the overhead attributed to the superblock and
-                * block group descriptors.  If the sparse superblocks
-                * feature is turned on, then not all groups have this.
+                * Add the overhead found in each block group
                 */
                for (i = 0; i < ngroups; i++) {
-                       overhead += ext4_bg_has_super(sb, i) +
-                               ext4_bg_num_gdb(sb, i);
+                       gdp = ext4_get_group_desc(sb, i, NULL);
+                       overhead += ext4_num_overhead_clusters(sb, i, gdp);
                        cond_resched();
                }
-
-               /*
-                * Every block group has an inode bitmap, a block
-                * bitmap, and an inode table.
-                */
-               overhead += ngroups * (2 + sbi->s_itb_per_group);
                sbi->s_overhead_last = overhead;
                smp_wmb();
                sbi->s_blocks_last = ext4_blocks_count(es);
@@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 
        buf->f_type = EXT4_SUPER_MAGIC;
        buf->f_bsize = sb->s_blocksize;
-       buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
-       bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
-                      percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+       buf->f_blocks = (ext4_blocks_count(es) -
+                        EXT4_C2B(sbi, sbi->s_overhead_last));
+       bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
+               percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
        /* prevent underflow in case that few free space is available */
-       buf->f_bfree = max_t(s64, bfree, 0);
+       buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
        if (buf->f_bfree < ext4_r_blocks_count(es))
                buf->f_bavail = 0;
@@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void)
                return err;
        err = ext4_init_system_zone();
        if (err)
-               goto out7;
+               goto out6;
        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
        if (!ext4_kset)
-               goto out6;
-       ext4_proc_root = proc_mkdir("fs/ext4", NULL);
-       if (!ext4_proc_root)
                goto out5;
+       ext4_proc_root = proc_mkdir("fs/ext4", NULL);
 
        err = ext4_init_feat_adverts();
        if (err)
@@ -5022,12 +5109,12 @@ out2:
 out3:
        ext4_exit_feat_adverts();
 out4:
-       remove_proc_entry("fs/ext4", NULL);
-out5:
+       if (ext4_proc_root)
+               remove_proc_entry("fs/ext4", NULL);
        kset_unregister(ext4_kset);
-out6:
+out5:
        ext4_exit_system_zone();
-out7:
+out6:
        ext4_exit_pageio();
        return err;
 }
index c757adc972506d672c78b7de03e654ede7eb8b1e..93a00d89a220d4085c64e9d427c375e9ada76494 100644 (file)
@@ -820,8 +820,14 @@ inserted:
                        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
 
+                       /*
+                        * take i_data_sem because we will test
+                        * i_delalloc_reserved_flag in ext4_mb_new_blocks
+                        */
+                       down_read((&EXT4_I(inode)->i_data_sem));
                        block = ext4_new_meta_blocks(handle, inode, goal, 0,
                                                     NULL, &error);
+                       up_read((&EXT4_I(inode)->i_data_sem));
                        if (error)
                                goto cleanup;
 
@@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
        no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
        ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 
-       error = ext4_get_inode_loc(inode, &is.iloc);
-       if (error)
-               goto cleanup;
-
-       error = ext4_journal_get_write_access(handle, is.iloc.bh);
+       error = ext4_reserve_inode_write(handle, inode, &is.iloc);
        if (error)
                goto cleanup;
 
index 1726d7303047e1966bc4991b5264113a2dcb7a0d..808cac7edcfba8a05e9982cce77f940c530f95b0 100644 (file)
@@ -379,7 +379,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                        return error;
                MSDOS_I(inode)->mmu_private = inode->i_size;
 
-               inode->i_nlink = fat_subdirs(inode);
+               set_nlink(inode, fat_subdirs(inode));
        } else { /* not a directory */
                inode->i_generation |= 1;
                inode->i_mode = fat_make_mode(sbi, de->attr,
@@ -1233,7 +1233,7 @@ static int fat_read_root(struct inode *inode)
        fat_save_attrs(inode, ATTR_DIR);
        inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
        inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
-       inode->i_nlink = fat_subdirs(inode)+2;
+       set_nlink(inode, fat_subdirs(inode)+2);
 
        return 0;
 }
index 66e83b84545572d86e07ee7c8a3470f2641a007f..216b419f30e26f6858ad1119c9e1b5210b0d5c00 100644 (file)
@@ -387,7 +387,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                /* the directory was completed, just return a error */
                goto out;
        }
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
        /* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
index bb3f29c3557bbbb0c6b88b2bf525adb3e1dd0adb..a87a65663c2593fca80a06425e27c48397df6d96 100644 (file)
@@ -900,7 +900,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                goto out;
        }
        inode->i_version++;
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
        /* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
index 1a4311437a8b6f6b100a0a946ffaaa0786c6d221..7b2af5abe2fa20d779582db7aad4ae1b167702cf 100644 (file)
@@ -227,7 +227,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
        ip->i_uid = (uid_t)vip->vii_uid;
        ip->i_gid = (gid_t)vip->vii_gid;
 
-       ip->i_nlink = vip->vii_nlink;
+       set_nlink(ip, vip->vii_nlink);
        ip->i_size = vip->vii_size;
 
        ip->i_atime.tv_sec = vip->vii_atime;
index 85542a7daf4012d2dffb7b7d4c86e0b6b917cb72..42593c587d48509604f3b9cfd8c0f9ffefe9ba0e 100644 (file)
@@ -231,7 +231,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
        if (iop)
                inode->i_op = iop;
        inode->i_fop = fop;
-       inode->i_nlink = nlink;
+       set_nlink(inode, nlink);
        inode->i_private = fc;
        d_add(dentry, inode);
        return dentry;
index add96f6ffda563738a90b7a7a1572a9a1e2f3c5b..3e6d727564792edd3b59dd6c509db3173e7a082f 100644 (file)
@@ -151,7 +151,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 
        inode->i_ino     = attr->ino;
        inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
-       inode->i_nlink   = attr->nlink;
+       set_nlink(inode, attr->nlink);
        inode->i_uid     = attr->uid;
        inode->i_gid     = attr->gid;
        inode->i_blocks  = attr->blocks;
index 78418b4fa8571a379d609812ea3c98c029d8184f..1656df7aacd21a7cfc0b7509741e258a9fc5e8f7 100644 (file)
@@ -299,7 +299,7 @@ static void gfs2_set_nlink(struct inode *inode, u32 nlink)
                if (nlink == 0)
                        clear_nlink(inode);
                else
-                       inode->i_nlink = nlink;
+                       set_nlink(inode, nlink);
        }
 }
 
index 3ebc437736febb4e0ef37a3612ea6501c7eac59a..1cbdeea1db4441b21c386458046d0b080101bafb 100644 (file)
@@ -46,11 +46,26 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
        case HFS_EXT_CNID:
                hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
                                    mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz));
+               if (HFS_I(tree->inode)->alloc_blocks >
+                                       HFS_I(tree->inode)->first_blocks) {
+                       printk(KERN_ERR "hfs: invalid btree extent records\n");
+                       unlock_new_inode(tree->inode);
+                       goto free_inode;
+               }
+
                tree->inode->i_mapping->a_ops = &hfs_btree_aops;
                break;
        case HFS_CAT_CNID:
                hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize,
                                    mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz));
+
+               if (!HFS_I(tree->inode)->first_blocks) {
+                       printk(KERN_ERR "hfs: invalid btree extent records "
+                                                               "(0 size).\n");
+                       unlock_new_inode(tree->inode);
+                       goto free_inode;
+               }
+
                tree->inode->i_mapping->a_ops = &hfs_btree_aops;
                break;
        default:
@@ -59,11 +74,6 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
        }
        unlock_new_inode(tree->inode);
 
-       if (!HFS_I(tree->inode)->first_blocks) {
-               printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n");
-               goto free_inode;
-       }
-
        mapping = tree->inode->i_mapping;
        page = read_mapping_page(mapping, 0, NULL);
        if (IS_ERR(page))
index b4d70b13be92548c6ac2cd72ae5e34f3ab53ec21..bce4eef91a063c44f8386d49c97afa388da77f2b 100644 (file)
@@ -198,7 +198,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
        res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
        if (res) {
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                hfs_delete_inode(inode);
                iput(inode);
                return res;
@@ -227,7 +227,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
        if (res) {
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                hfs_delete_inode(inode);
                iput(inode);
                return res;
index 96a1b625fc74de04cd79ef2ba8fc2a2dba0301a1..a1a9fdcd2a00b57b4473b81c5b37e6d7a5be886b 100644 (file)
@@ -183,7 +183,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
        inode->i_mode = mode;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
        HFS_I(inode)->flags = 0;
        HFS_I(inode)->rsrc_inode = NULL;
@@ -313,7 +313,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
        /* Initialize the inode */
        inode->i_uid = hsb->s_uid;
        inode->i_gid = hsb->s_gid;
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
 
        if (idata->key)
                HFS_I(inode)->cat_key = *idata->key;
index 25b2443a004cd070ab49fe94e167898d32271a56..4536cd3f15aea783282e1158b280127c7565aa0e 100644 (file)
@@ -415,7 +415,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
        goto out;
 
 out_err:
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        hfsplus_delete_inode(inode);
        iput(inode);
 out:
@@ -440,7 +440,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
 
        res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
        if (res) {
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                hfsplus_delete_inode(inode);
                iput(inode);
                goto out;
index 4cc1e3a36ec71f823b54ffbf72fd1fb4ecab9dba..40e1413be4cf476a2dd1e2be1f6a025935e092e5 100644 (file)
@@ -391,7 +391,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
        inode->i_mode = mode;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
 
        hip = HFSPLUS_I(inode);
@@ -512,7 +512,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
                hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
                                        sizeof(struct hfsplus_cat_folder));
                hfsplus_get_perms(inode, &folder->permissions, 1);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
                inode->i_size = 2 + be32_to_cpu(folder->valence);
                inode->i_atime = hfsp_mt2ut(folder->access_date);
                inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
@@ -532,11 +532,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
                hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ?
                                        &file->rsrc_fork : &file->data_fork);
                hfsplus_get_perms(inode, &file->permissions, 0);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
                if (S_ISREG(inode->i_mode)) {
                        if (file->permissions.dev)
-                               inode->i_nlink =
-                                       be32_to_cpu(file->permissions.dev);
+                               set_nlink(inode,
+                                         be32_to_cpu(file->permissions.dev));
                        inode->i_op = &hfsplus_file_inode_operations;
                        inode->i_fop = &hfsplus_file_operations;
                        inode->i_mapping->a_ops = &hfsplus_aops;
index 0d22afdd4611d53383957b86bc8688cba62ef3d4..2f72da5ae6862bc9b387cd601de9b4e3993274e7 100644 (file)
@@ -541,7 +541,7 @@ static int read_name(struct inode *ino, char *name)
 
        ino->i_ino = st.ino;
        ino->i_mode = st.mode;
-       ino->i_nlink = st.nlink;
+       set_nlink(ino, st.nlink);
        ino->i_uid = st.uid;
        ino->i_gid = st.gid;
        ino->i_atime = st.atime;
index d51a98384bc03b0410f7ba3d5c021804511d10bf..dd7bc38a38251e9c123b08e87d42477f44f302b5 100644 (file)
@@ -16,7 +16,6 @@
 #include <sys/vfs.h>
 #include "hostfs.h"
 #include "os.h"
-#include "user.h"
 #include <utime.h>
 
 static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
index 96a8ed91ceddfe942dfe21d16529d2fb86567884..2fa0089a02a8ec2934cda55cbbae18e50c34a4ea 100644 (file)
@@ -247,7 +247,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
                        result->i_mode &= ~0111;
                        result->i_op = &hpfs_file_iops;
                        result->i_fop = &hpfs_file_ops;
-                       result->i_nlink = 1;
+                       set_nlink(result, 1);
                }
                unlock_new_inode(result);
        }
index 338cd8368451cd081a800aa77440ce237f24b592..3b2cec29972b167359359de4bd1891975ffd9cc0 100644 (file)
@@ -53,7 +53,7 @@ void hpfs_read_inode(struct inode *i)
                i->i_mode &= ~0111;
                i->i_op = &hpfs_file_iops;
                i->i_fop = &hpfs_file_ops;
-               i->i_nlink = 0;*/
+               clear_nlink(i);*/
                make_bad_inode(i);
                return;
        }
@@ -77,7 +77,7 @@ void hpfs_read_inode(struct inode *i)
                        i->i_mode = S_IFLNK | 0777;
                        i->i_op = &page_symlink_inode_operations;
                        i->i_data.a_ops = &hpfs_symlink_aops;
-                       i->i_nlink = 1;
+                       set_nlink(i, 1);
                        i->i_size = ea_size;
                        i->i_blocks = 1;
                        brelse(bh);
@@ -101,7 +101,7 @@ void hpfs_read_inode(struct inode *i)
                        }
                        if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
                                brelse(bh);
-                               i->i_nlink = 1;
+                               set_nlink(i, 1);
                                i->i_size = 0;
                                i->i_blocks = 1;
                                init_special_inode(i, mode,
@@ -125,13 +125,13 @@ void hpfs_read_inode(struct inode *i)
                hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL);
                i->i_blocks = 4 * n_dnodes;
                i->i_size = 2048 * n_dnodes;
-               i->i_nlink = 2 + n_subdirs;
+               set_nlink(i, 2 + n_subdirs);
        } else {
                i->i_mode |= S_IFREG;
                if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111;
                i->i_op = &hpfs_file_iops;
                i->i_fop = &hpfs_file_ops;
-               i->i_nlink = 1;
+               set_nlink(i, 1);
                i->i_size = le32_to_cpu(fnode->file_size);
                i->i_blocks = ((i->i_size + 511) >> 9) + 1;
                i->i_data.a_ops = &hpfs_aops;
index 2df69e2f07cf3ab68851241ae9681c177f051c32..ea91fcb0ef9b7bfc89d031fd14db4dfa7dd4d88e 100644 (file)
@@ -56,7 +56,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        result->i_fop = &hpfs_dir_ops;
        result->i_blocks = 4;
        result->i_size = 2048;
-       result->i_nlink = 2;
+       set_nlink(result, 2);
        if (dee.read_only)
                result->i_mode &= ~0222;
 
@@ -150,7 +150,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
        result->i_mode &= ~0111;
        result->i_op = &hpfs_file_iops;
        result->i_fop = &hpfs_file_ops;
-       result->i_nlink = 1;
+       set_nlink(result, 1);
        hpfs_i(result)->i_parent_dir = dir->i_ino;
        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
        result->i_ctime.tv_nsec = 0;
@@ -242,7 +242,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
        hpfs_i(result)->i_ea_size = 0;
        result->i_uid = current_fsuid();
        result->i_gid = current_fsgid();
-       result->i_nlink = 1;
+       set_nlink(result, 1);
        result->i_size = 0;
        result->i_blocks = 1;
        init_special_inode(result, mode, rdev);
@@ -318,7 +318,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
        result->i_uid = current_fsuid();
        result->i_gid = current_fsgid();
        result->i_blocks = 1;
-       result->i_nlink = 1;
+       set_nlink(result, 1);
        result->i_size = strlen(symlink);
        result->i_op = &page_symlink_inode_operations;
        result->i_data.a_ops = &hpfs_symlink_aops;
index 970ea987b3f61b28a99016a5a8d596b429685c42..f590b1160c6c941a74253c6b0f242080377414db 100644 (file)
@@ -702,7 +702,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
        inode->i_ctime = proc_ino->i_ctime;
        inode->i_ino = proc_ino->i_ino;
        inode->i_mode = proc_ino->i_mode;
-       inode->i_nlink = proc_ino->i_nlink;
+       set_nlink(inode, proc_ino->i_nlink);
        inode->i_size = proc_ino->i_size;
        inode->i_blocks = proc_ino->i_blocks;
 
index ec889538e5a6afe4014922e9468845195ea56193..0be5a78598d02a71f627ac0acb0ec83365b41845 100644 (file)
@@ -970,7 +970,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 
        d_instantiate(path.dentry, inode);
        inode->i_size = size;
-       inode->i_nlink = 0;
+       clear_nlink(inode);
 
        error = -ENFILE;
        file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
index ecbb68dc7e2aef9bbf52d0b94c0dd401a7d50bae..ee4e66b998f40d170b1822db68875df73d445db6 100644 (file)
@@ -142,7 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        atomic_set(&inode->i_count, 1);
        inode->i_op = &empty_iops;
        inode->i_fop = &empty_fops;
-       inode->i_nlink = 1;
+       inode->__i_nlink = 1;
        inode->i_opflags = 0;
        inode->i_uid = 0;
        inode->i_gid = 0;
index a5d03672d04edbd76f6014124922aea631ba9ed7..f950059525fc64574273f404c666e33bfb7b6666 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/statfs.h>
 #include <linux/cdrom.h>
 #include <linux/parser.h>
+#include <linux/mpage.h>
 
 #include "isofs.h"
 #include "zisofs.h"
@@ -1148,7 +1149,13 @@ struct buffer_head *isofs_bread(struct inode *inode, sector_t block)
 
 static int isofs_readpage(struct file *file, struct page *page)
 {
-       return block_read_full_page(page,isofs_get_block);
+       return mpage_readpage(page, isofs_get_block);
+}
+
+static int isofs_readpages(struct file *file, struct address_space *mapping,
+                       struct list_head *pages, unsigned nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
 }
 
 static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
@@ -1158,6 +1165,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
 
 static const struct address_space_operations isofs_aops = {
        .readpage = isofs_readpage,
+       .readpages = isofs_readpages,
        .bmap = _isofs_bmap
 };
 
@@ -1319,7 +1327,7 @@ static int isofs_read_inode(struct inode *inode)
                        inode->i_mode = S_IFDIR | sbi->s_dmode;
                else
                        inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
-               inode->i_nlink = 1;     /*
+               set_nlink(inode, 1);    /*
                                         * Set to 1.  We know there are 2, but
                                         * the find utility tries to optimize
                                         * if it is 2, and it screws up.  It is
@@ -1337,7 +1345,7 @@ static int isofs_read_inode(struct inode *inode)
                         */
                        inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
                }
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
        inode->i_uid = sbi->s_uid;
        inode->i_gid = sbi->s_gid;
index 1fbc7de88f50ea6c11e3335eff02ec95e82da4b1..70e79d0c756a9418155239f18f68d55323809e08 100644 (file)
@@ -363,7 +363,7 @@ repeat:
                        break;
                case SIG('P', 'X'):
                        inode->i_mode = isonum_733(rr->u.PX.mode);
-                       inode->i_nlink = isonum_733(rr->u.PX.n_links);
+                       set_nlink(inode, isonum_733(rr->u.PX.n_links));
                        inode->i_uid = isonum_733(rr->u.PX.uid);
                        inode->i_gid = isonum_733(rr->u.PX.gid);
                        break;
@@ -496,7 +496,7 @@ repeat:
                                goto out;
                        }
                        inode->i_mode = reloc->i_mode;
-                       inode->i_nlink = reloc->i_nlink;
+                       set_nlink(inode, reloc->i_nlink);
                        inode->i_uid = reloc->i_uid;
                        inode->i_gid = reloc->i_gid;
                        inode->i_rdev = reloc->i_rdev;
index 9fe061fb8779be389155a05672b267c8071623e7..fea8dd661d2bbca7820013f10ef34baf87bb1190 100644 (file)
@@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal)
                goto out;
        }
 
+       if (be32_to_cpu(sb->s_first) == 0 ||
+           be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+               printk(KERN_WARNING
+                       "JBD: Invalid start block of journal: %u\n",
+                       be32_to_cpu(sb->s_first));
+               goto out;
+       }
+
        return 0;
 
 out:
index eef6979821a4c8db91f48854c46f81c291ef4c27..68d704db787f108350f9cc47fd506628685399c7 100644 (file)
@@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        J_ASSERT(commit_transaction->t_state == T_RUNNING);
 
        trace_jbd2_start_commit(journal, commit_transaction);
-       jbd_debug(1, "JBD: starting commit of transaction %d\n",
+       jbd_debug(1, "JBD2: starting commit of transaction %d\n",
                        commit_transaction->t_tid);
 
        write_lock(&journal->j_state_lock);
@@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        __jbd2_journal_clean_checkpoint_list(journal);
        spin_unlock(&journal->j_list_lock);
 
-       jbd_debug (3, "JBD: commit phase 1\n");
+       jbd_debug(3, "JBD2: commit phase 1\n");
 
        /*
         * Switch to a new revoke table.
@@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        wake_up(&journal->j_wait_transaction_locked);
        write_unlock(&journal->j_state_lock);
 
-       jbd_debug (3, "JBD: commit phase 2\n");
+       jbd_debug(3, "JBD2: commit phase 2\n");
 
        /*
         * Now start flushing things to disk, in the order they appear
@@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                                          WRITE_SYNC);
        blk_finish_plug(&plug);
 
-       jbd_debug(3, "JBD: commit phase 2\n");
+       jbd_debug(3, "JBD2: commit phase 2\n");
 
        /*
         * Way to go: we have now written out all of the data for a
@@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
                        J_ASSERT (bufs == 0);
 
-                       jbd_debug(4, "JBD: get descriptor\n");
+                       jbd_debug(4, "JBD2: get descriptor\n");
 
                        descriptor = jbd2_journal_get_descriptor_buffer(journal);
                        if (!descriptor) {
@@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                        }
 
                        bh = jh2bh(descriptor);
-                       jbd_debug(4, "JBD: got buffer %llu (%p)\n",
+                       jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
                                (unsigned long long)bh->b_blocknr, bh->b_data);
                        header = (journal_header_t *)&bh->b_data[0];
                        header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
@@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                    commit_transaction->t_buffers == NULL ||
                    space_left < tag_bytes + 16) {
 
-                       jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
+                       jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
 
                        /* Write an end-of-descriptor marker before
                            submitting the IOs.  "tag" still points to
@@ -707,7 +707,7 @@ start_journal_io:
           so we incur less scheduling load.
        */
 
-       jbd_debug(3, "JBD: commit phase 3\n");
+       jbd_debug(3, "JBD2: commit phase 3\n");
 
        /*
         * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -771,7 +771,7 @@ wait_for_iobuf:
 
        J_ASSERT (commit_transaction->t_shadow_list == NULL);
 
-       jbd_debug(3, "JBD: commit phase 4\n");
+       jbd_debug(3, "JBD2: commit phase 4\n");
 
        /* Here we wait for the revoke record and descriptor record buffers */
  wait_for_ctlbuf:
@@ -801,7 +801,7 @@ wait_for_iobuf:
        if (err)
                jbd2_journal_abort(journal, err);
 
-       jbd_debug(3, "JBD: commit phase 5\n");
+       jbd_debug(3, "JBD2: commit phase 5\n");
        write_lock(&journal->j_state_lock);
        J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
        commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -830,7 +830,7 @@ wait_for_iobuf:
            transaction can be removed from any checkpoint list it was on
            before. */
 
-       jbd_debug(3, "JBD: commit phase 6\n");
+       jbd_debug(3, "JBD2: commit phase 6\n");
 
        J_ASSERT(list_empty(&commit_transaction->t_inode_list));
        J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -964,7 +964,7 @@ restart_loop:
 
        /* Done with this transaction! */
 
-       jbd_debug(3, "JBD: commit phase 7\n");
+       jbd_debug(3, "JBD2: commit phase 7\n");
 
        J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
 
@@ -1039,7 +1039,7 @@ restart_loop:
                journal->j_commit_callback(journal, commit_transaction);
 
        trace_jbd2_end_commit(journal, commit_transaction);
-       jbd_debug(1, "JBD: commit %d complete, head %d\n",
+       jbd_debug(1, "JBD2: commit %d complete, head %d\n",
                  journal->j_commit_sequence, journal->j_tail_sequence);
        if (to_free)
                kfree(commit_transaction);
index f24df13adc4e9cfb5d71d851c959193e0f1e2b61..0fa0123151d3117c5d3678c1bc36bf7b5bb00cd4 100644 (file)
@@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
                 */
 
                journal->j_commit_request = target;
-               jbd_debug(1, "JBD: requesting commit %d/%d\n",
+               jbd_debug(1, "JBD2: requesting commit %d/%d\n",
                          journal->j_commit_request,
                          journal->j_commit_sequence);
                wake_up(&journal->j_wait_commit);
@@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
                /* This should never happen, but if it does, preserve
                   the evidence before kjournald goes into a loop and
                   increments j_commit_sequence beyond all recognition. */
-               WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
+               WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
                          journal->j_commit_request,
                          journal->j_commit_sequence,
                          target, journal->j_running_transaction ? 
@@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
        }
 #endif
        while (tid_gt(tid, journal->j_commit_sequence)) {
-               jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
+               jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
                                  tid, journal->j_commit_sequence);
                wake_up(&journal->j_wait_commit);
                read_unlock(&journal->j_state_lock);
@@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal)
        first = be32_to_cpu(sb->s_first);
        last = be32_to_cpu(sb->s_maxlen);
        if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
-               printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n",
+               printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
                       first, last);
                journal_fail_superblock(journal);
                return -EINVAL;
@@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
         */
        if (sb->s_start == 0 && journal->j_tail_sequence ==
                                journal->j_transaction_sequence) {
-               jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
+               jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
                        "(start %ld, seq %d, errno %d)\n",
                        journal->j_tail, journal->j_tail_sequence,
                        journal->j_errno);
@@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
        }
 
        read_lock(&journal->j_state_lock);
-       jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+       jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
                  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
 
        sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal)
                ll_rw_block(READ, 1, &bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
-                       printk (KERN_ERR
-                               "JBD: IO error reading journal superblock\n");
+                       printk(KERN_ERR
+                               "JBD2: IO error reading journal superblock\n");
                        goto out;
                }
        }
@@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal)
 
        if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
            sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
-               printk(KERN_WARNING "JBD: no valid journal superblock found\n");
+               printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
                goto out;
        }
 
@@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal)
                journal->j_format_version = 2;
                break;
        default:
-               printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
+               printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
                goto out;
        }
 
        if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
                journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
        else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
-               printk (KERN_WARNING "JBD: journal file too short\n");
+               printk(KERN_WARNING "JBD2: journal file too short\n");
+               goto out;
+       }
+
+       if (be32_to_cpu(sb->s_first) == 0 ||
+           be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+               printk(KERN_WARNING
+                       "JBD2: Invalid start block of journal: %u\n",
+                       be32_to_cpu(sb->s_first));
                goto out;
        }
 
@@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal)
                     ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
                    (sb->s_feature_incompat &
                     ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
-                       printk (KERN_WARNING
-                               "JBD: Unrecognised features on journal\n");
+                       printk(KERN_WARNING
+                               "JBD2: Unrecognised features on journal\n");
                        return -EINVAL;
                }
        }
@@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal)
        return 0;
 
 recovery_error:
-       printk (KERN_WARNING "JBD: recovery failed\n");
+       printk(KERN_WARNING "JBD2: recovery failed\n");
        return -EIO;
 }
 
@@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
        struct buffer_head *bh;
 
        printk(KERN_WARNING
-               "JBD: Converting superblock from version 1 to 2.\n");
+               "JBD2: Converting superblock from version 1 to 2.\n");
 
        /* Pre-initialise new fields to zero */
        offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
@@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
        if (!journal->j_tail)
                goto no_recovery;
 
-       printk (KERN_WARNING "JBD: %s recovery information on journal\n",
+       printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
                write ? "Clearing" : "Ignoring");
 
        err = jbd2_journal_skip_recovery(journal);
@@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void)
        retval = 0;
        if (!jbd2_journal_head_cache) {
                retval = -ENOMEM;
-               printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
+               printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
        }
        return retval;
 }
@@ -2383,7 +2391,7 @@ static void __exit journal_exit(void)
 #ifdef CONFIG_JBD2_DEBUG
        int n = atomic_read(&nr_journal_heads);
        if (n)
-               printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+               printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
 #endif
        jbd2_remove_debugfs_entry();
        jbd2_remove_jbd_stats_proc_entry();
index 1cad869494f0ea21d294a9f15ce745c23d164080..da6d7baf1390c401cc52b1cb005cde1ff7776483 100644 (file)
@@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
                err = jbd2_journal_bmap(journal, next, &blocknr);
 
                if (err) {
-                       printk (KERN_ERR "JBD: bad block at offset %u\n",
+                       printk(KERN_ERR "JBD2: bad block at offset %u\n",
                                next);
                        goto failed;
                }
@@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
        *bhp = NULL;
 
        if (offset >= journal->j_maxlen) {
-               printk(KERN_ERR "JBD: corrupted journal superblock\n");
+               printk(KERN_ERR "JBD2: corrupted journal superblock\n");
                return -EIO;
        }
 
        err = jbd2_journal_bmap(journal, offset, &blocknr);
 
        if (err) {
-               printk (KERN_ERR "JBD: bad block at offset %u\n",
+               printk(KERN_ERR "JBD2: bad block at offset %u\n",
                        offset);
                return err;
        }
@@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
        }
 
        if (!buffer_uptodate(bh)) {
-               printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
+               printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
                        offset);
                brelse(bh);
                return -EIO;
@@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal)
        if (!err)
                err = do_one_pass(journal, &info, PASS_REPLAY);
 
-       jbd_debug(1, "JBD: recovery, exit status %d, "
+       jbd_debug(1, "JBD2: recovery, exit status %d, "
                  "recovered transactions %u to %u\n",
                  err, info.start_transaction, info.end_transaction);
-       jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
+       jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
                  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
        /* Restart the log at the next transaction ID, thus invalidating
@@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
        err = do_one_pass(journal, &info, PASS_SCAN);
 
        if (err) {
-               printk(KERN_ERR "JBD: error %d scanning journal\n", err);
+               printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
                ++journal->j_transaction_sequence;
        } else {
 #ifdef CONFIG_JBD2_DEBUG
                int dropped = info.end_transaction - 
                        be32_to_cpu(journal->j_superblock->s_sequence);
                jbd_debug(1,
-                         "JBD: ignoring %d transaction%s from the journal.\n",
+                         "JBD2: ignoring %d transaction%s from the journal.\n",
                          dropped, (dropped == 1) ? "" : "s");
 #endif
                journal->j_transaction_sequence = ++info.end_transaction;
@@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
                wrap(journal, *next_log_block);
                err = jread(&obh, journal, io_block);
                if (err) {
-                       printk(KERN_ERR "JBD: IO error %d recovering block "
+                       printk(KERN_ERR "JBD2: IO error %d recovering block "
                                "%lu in log\n", err, io_block);
                        return 1;
                } else {
@@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal,
                 * either the next descriptor block or the final commit
                 * record. */
 
-               jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+               jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
                err = jread(&bh, journal, next_log_block);
                if (err)
                        goto failed;
@@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal,
                                        /* Recover what we can, but
                                         * report failure at the end. */
                                        success = err;
-                                       printk (KERN_ERR
-                                               "JBD: IO error %d recovering "
+                                       printk(KERN_ERR
+                                               "JBD2: IO error %d recovering "
                                                "block %ld in log\n",
                                                err, io_block);
                                } else {
@@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal,
                                                        journal->j_blocksize);
                                        if (nbh == NULL) {
                                                printk(KERN_ERR
-                                                      "JBD: Out of memory "
+                                                      "JBD2: Out of memory "
                                                       "during recovery.\n");
                                                err = -ENOMEM;
                                                brelse(bh);
@@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal,
                /* It's really bad news if different passes end up at
                 * different places (but possible due to IO errors). */
                if (info->end_transaction != next_commit_ID) {
-                       printk (KERN_ERR "JBD: recovery pass %d ended at "
+                       printk(KERN_ERR "JBD2: recovery pass %d ended at "
                                "transaction %u, expected %u\n",
                                pass, next_commit_ID, info->end_transaction);
                        if (!success)
index 2d7109414cdd6b7a4d21bdb2e738ff20581523a4..a0e41a4c080e9b2d3a51f5b4c6404cb8b8bc9c2b 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/highmem.h>
 #include <linux/hrtimer.h>
 #include <linux/backing-dev.h>
+#include <linux/bug.h>
 #include <linux/module.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction,
  */
 
 static int start_this_handle(journal_t *journal, handle_t *handle,
-                            int gfp_mask)
+                            gfp_t gfp_mask)
 {
        transaction_t   *transaction, *new_transaction = NULL;
        tid_t           tid;
@@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
        unsigned long ts = jiffies;
 
        if (nblocks > journal->j_max_transaction_buffers) {
-               printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+               printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
                       current->comm, nblocks,
                       journal->j_max_transaction_buffers);
                return -ENOSPC;
@@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks)
  * Return a pointer to a newly allocated handle, or an ERR_PTR() value
  * on failure.
  */
-handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
 {
        handle_t *handle = journal_current_handle();
        int err;
@@ -443,7 +444,7 @@ out:
  * transaction capabable of guaranteeing the requested number of
  * credits.
  */
-int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
+int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
@@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh)
        char b[BDEVNAME_SIZE];
 
        printk(KERN_WARNING
-              "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
+              "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
               "There's a risk of filesystem corruption in case of system "
               "crash.\n",
               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
@@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
  * mark dirty metadata which needs to be journaled as part of the current
  * transaction.
  *
+ * The buffer must have previously had jbd2_journal_get_write_access()
+ * called so that it has a valid journal_head attached to the buffer
+ * head.
+ *
  * The buffer is placed on the transaction's metadata list and is marked
  * as belonging to the transaction.
  *
@@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
        struct journal_head *jh = bh2jh(bh);
+       int ret = 0;
 
        jbd_debug(5, "journal_head %p\n", jh);
        JBUFFER_TRACE(jh, "entry");
        if (is_handle_aborted(handle))
                goto out;
+       if (!buffer_jbd(bh)) {
+               ret = -EUCLEAN;
+               goto out;
+       }
 
        jbd_lock_bh_state(bh);
 
@@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
         */
        if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
                JBUFFER_TRACE(jh, "fastpath");
-               J_ASSERT_JH(jh, jh->b_transaction ==
-                                       journal->j_running_transaction);
+               if (unlikely(jh->b_transaction !=
+                            journal->j_running_transaction)) {
+                       printk(KERN_EMERG "JBD: %s: "
+                              "jh->b_transaction (%llu, %p, %u) != "
+                              "journal->j_running_transaction (%p, %u)",
+                              journal->j_devname,
+                              (unsigned long long) bh->b_blocknr,
+                              jh->b_transaction,
+                              jh->b_transaction ? jh->b_transaction->t_tid : 0,
+                              journal->j_running_transaction,
+                              journal->j_running_transaction ?
+                              journal->j_running_transaction->t_tid : 0);
+                       ret = -EINVAL;
+               }
                goto out_unlock_bh;
        }
 
@@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
         */
        if (jh->b_transaction != transaction) {
                JBUFFER_TRACE(jh, "already on other transaction");
-               J_ASSERT_JH(jh, jh->b_transaction ==
-                                       journal->j_committing_transaction);
-               J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+               if (unlikely(jh->b_transaction !=
+                            journal->j_committing_transaction)) {
+                       printk(KERN_EMERG "JBD: %s: "
+                              "jh->b_transaction (%llu, %p, %u) != "
+                              "journal->j_committing_transaction (%p, %u)",
+                              journal->j_devname,
+                              (unsigned long long) bh->b_blocknr,
+                              jh->b_transaction,
+                              jh->b_transaction ? jh->b_transaction->t_tid : 0,
+                              journal->j_committing_transaction,
+                              journal->j_committing_transaction ?
+                              journal->j_committing_transaction->t_tid : 0);
+                       ret = -EINVAL;
+               }
+               if (unlikely(jh->b_next_transaction != transaction)) {
+                       printk(KERN_EMERG "JBD: %s: "
+                              "jh->b_next_transaction (%llu, %p, %u) != "
+                              "transaction (%p, %u)",
+                              journal->j_devname,
+                              (unsigned long long) bh->b_blocknr,
+                              jh->b_next_transaction,
+                              jh->b_next_transaction ?
+                              jh->b_next_transaction->t_tid : 0,
+                              transaction, transaction->t_tid);
+                       ret = -EINVAL;
+               }
                /* And this case is illegal: we can't reuse another
                 * transaction's data buffer, ever. */
                goto out_unlock_bh;
@@ -1127,7 +1172,8 @@ out_unlock_bh:
        jbd_unlock_bh_state(bh);
 out:
        JBUFFER_TRACE(jh, "exit");
-       return 0;
+       WARN_ON(ret);   /* All errors are bugs, so dump the stack */
+       return ret;
 }
 
 /*
index 9659b7c00468064cdc9e51be4d68d06229fc85eb..be6169bd8acdcc90fec588645f5a671c42c7deea 100644 (file)
@@ -245,7 +245,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
        ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
                              dentry->d_name.len, dead_f, now);
        if (dead_f->inocache)
-               dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
+               set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
        if (!ret)
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
        return ret;
@@ -278,7 +278,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 
        if (!ret) {
                mutex_lock(&f->sem);
-               old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
+               set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
                mutex_unlock(&f->sem);
                d_instantiate(dentry, old_dentry->d_inode);
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -497,7 +497,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
        f = JFFS2_INODE_INFO(inode);
 
        /* Directories get nlink 2 at start */
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        /* but ic->pino_nlink is the parent ino# */
        f->inocache->pino_nlink = dir_i->i_ino;
 
index bbcb9755dd2b6c85b2df88486622dca7d817951a..7286e44ac66540822f65cd72bc4974435d8a9633 100644 (file)
@@ -278,7 +278,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
        inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
        inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
 
-       inode->i_nlink = f->inocache->pino_nlink;
+       set_nlink(inode, f->inocache->pino_nlink);
 
        inode->i_blocks = (inode->i_size + 511) >> 9;
 
@@ -291,7 +291,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
        case S_IFDIR:
        {
                struct jffs2_full_dirent *fd;
-               inode->i_nlink = 2; /* parent and '.' */
+               set_nlink(inode, 2); /* parent and '.' */
 
                for (fd=f->dents; fd; fd = fd->next) {
                        if (fd->type == DT_DIR && fd->ino)
@@ -453,7 +453,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
                iput(inode);
                return ERR_PTR(ret);
        }
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        inode->i_ino = je32_to_cpu(ri->ino);
        inode->i_mode = jemode_to_cpu(ri->mode);
        inode->i_gid = je16_to_cpu(ri->gid);
index b78b2f978f043dae0f18c24713532218b3201582..1b6f15f191b36b133c4179bfd64fb6593f047ea7 100644 (file)
@@ -457,7 +457,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
        /* read the page of fixed disk inode (AIT) in raw mode */
        mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
        if (mp == NULL) {
-               ip->i_nlink = 1;        /* Don't want iput() deleting it */
+               set_nlink(ip, 1);       /* Don't want iput() deleting it */
                iput(ip);
                return (NULL);
        }
@@ -469,7 +469,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
        /* copy on-disk inode to in-memory inode */
        if ((copy_from_dinode(dp, ip)) != 0) {
                /* handle bad return by returning NULL for ip */
-               ip->i_nlink = 1;        /* Don't want iput() deleting it */
+               set_nlink(ip, 1);       /* Don't want iput() deleting it */
                iput(ip);
                /* release the page */
                release_metapage(mp);
@@ -3076,7 +3076,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
                                ip->i_mode |= 0001;
                }
        }
-       ip->i_nlink = le32_to_cpu(dip->di_nlink);
+       set_nlink(ip, le32_to_cpu(dip->di_nlink));
 
        jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
        if (sbi->uid == -1)
index 2686531e235ab0e484e50bbdca018a5a55ef4b1d..c1a3e603279c9cbe4fb141fc2b9bdcfa1d76a033 100644 (file)
@@ -157,7 +157,7 @@ fail_drop:
        dquot_drop(inode);
        inode->i_flags |= S_NOQUOTA;
 fail_unlock:
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        unlock_new_inode(inode);
 fail_put:
        iput(inode);
index e17545e15664d77cf5d3af35c2c34c99123b929d..a112ad96e4749cd7641f4830549ac65cf7da92f7 100644 (file)
@@ -172,7 +172,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
        mutex_unlock(&JFS_IP(dip)->commit_mutex);
        if (rc) {
                free_ea_wmap(ip);
-               ip->i_nlink = 0;
+               clear_nlink(ip);
                unlock_new_inode(ip);
                iput(ip);
        } else {
@@ -292,7 +292,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
                goto out3;
        }
 
-       ip->i_nlink = 2;        /* for '.' */
+       set_nlink(ip, 2);       /* for '.' */
        ip->i_op = &jfs_dir_inode_operations;
        ip->i_fop = &jfs_dir_operations;
 
@@ -311,7 +311,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
        mutex_unlock(&JFS_IP(dip)->commit_mutex);
        if (rc) {
                free_ea_wmap(ip);
-               ip->i_nlink = 0;
+               clear_nlink(ip);
                unlock_new_inode(ip);
                iput(ip);
        } else {
@@ -844,7 +844,7 @@ static int jfs_link(struct dentry *old_dentry,
        rc = txCommit(tid, 2, &iplist[0], 0);
 
        if (rc) {
-               ip->i_nlink--; /* never instantiated */
+               drop_nlink(ip); /* never instantiated */
                iput(ip);
        } else
                d_instantiate(dentry, ip);
@@ -1048,7 +1048,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
        mutex_unlock(&JFS_IP(dip)->commit_mutex);
        if (rc) {
                free_ea_wmap(ip);
-               ip->i_nlink = 0;
+               clear_nlink(ip);
                unlock_new_inode(ip);
                iput(ip);
        } else {
@@ -1433,7 +1433,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
        mutex_unlock(&JFS_IP(dir)->commit_mutex);
        if (rc) {
                free_ea_wmap(ip);
-               ip->i_nlink = 0;
+               clear_nlink(ip);
                unlock_new_inode(ip);
                iput(ip);
        } else {
index 06c8a67cbe762ed29a801f79d2ed40c2ccd59c12..a44eff076c171a4d11b9c6029138e6f272c8ee00 100644 (file)
@@ -485,7 +485,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
                goto out_unload;
        }
        inode->i_ino = 0;
-       inode->i_nlink = 1;
        inode->i_size = sb->s_bdev->bd_inode->i_size;
        inode->i_mapping->a_ops = &jfs_metapage_aops;
        insert_inode_hash(inode);
index c18e9a1235b6594373d5c18f2e615b5bf4b06596..f6d411eef1e73d52c4d62c3b65432565accdff49 100644 (file)
@@ -490,7 +490,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        root = d_alloc_root(inode);
        if (!root) {
                iput(inode);
@@ -510,8 +510,10 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
                if (!dentry)
                        goto out;
                inode = new_inode(s);
-               if (!inode)
+               if (!inode) {
+                       dput(dentry);
                        goto out;
+               }
                inode->i_mode = S_IFREG | files->mode;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                inode->i_fop = files->ops;
index b3ff3d894165f6ae0ef2108e656f9eef904f4fa8..b7d7f67cee5acc60b254722c175bf35d5dcc021e 100644 (file)
@@ -197,7 +197,7 @@ static int logfs_remove_inode(struct inode *inode)
 {
        int ret;
 
-       inode->i_nlink--;
+       drop_nlink(inode);
        ret = write_inode(inode);
        LOGFS_BUG_ON(ret, inode->i_sb);
        return ret;
@@ -433,7 +433,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
 
        ta = kzalloc(sizeof(*ta), GFP_KERNEL);
        if (!ta) {
-               inode->i_nlink--;
+               drop_nlink(inode);
                iput(inode);
                return -ENOMEM;
        }
@@ -456,7 +456,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
                abort_transaction(inode, ta);
                li->li_flags |= LOGFS_IF_STILLBORN;
                /* FIXME: truncate symlink */
-               inode->i_nlink--;
+               drop_nlink(inode);
                iput(inode);
                goto out;
        }
@@ -563,7 +563,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
 
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        ihold(inode);
-       inode->i_nlink++;
+       inc_nlink(inode);
        mark_inode_dirty_sync(inode);
 
        return __logfs_create(dir, dentry, inode, NULL, 0);
index edfea7a3a747fa2c984a5817d82909fdccc7d0ea..7e441ad5f7923522f739354592e250a277d6076b 100644 (file)
@@ -93,7 +93,7 @@ static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
                /* inode->i_nlink == 0 can be true when called from
                 * block validator */
                /* set i_nlink to 0 to prevent caching */
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
                iget_failed(inode);
                if (!err)
@@ -199,7 +199,6 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
        inode->i_blocks = 0;
        inode->i_ctime  = CURRENT_TIME;
        inode->i_mtime  = CURRENT_TIME;
-       inode->i_nlink  = 1;
        li->li_refcount = 1;
        INIT_LIST_HEAD(&li->li_freeing_list);
 
index d8d09380c7deafd2b27e10606ef2f951fb044083..2ac4217b7901cb60b726986fc964f29b3e09168f 100644 (file)
@@ -126,7 +126,7 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
        inode->i_atime  = be64_to_timespec(di->di_atime);
        inode->i_ctime  = be64_to_timespec(di->di_ctime);
        inode->i_mtime  = be64_to_timespec(di->di_mtime);
-       inode->i_nlink  = be32_to_cpu(di->di_refcount);
+       set_nlink(inode, be32_to_cpu(di->di_refcount));
        inode->i_generation = be32_to_cpu(di->di_generation);
 
        switch (inode->i_mode & S_IFMT) {
index e7d23e25bf1d100a6706210120f98a69914a7298..64cdcd662ffccca98fecad85103f10a2348a76bc 100644 (file)
@@ -446,7 +446,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
        inode->i_mode = raw_inode->i_mode;
        inode->i_uid = (uid_t)raw_inode->i_uid;
        inode->i_gid = (gid_t)raw_inode->i_gid;
-       inode->i_nlink = raw_inode->i_nlinks;
+       set_nlink(inode, raw_inode->i_nlinks);
        inode->i_size = raw_inode->i_size;
        inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
        inode->i_mtime.tv_nsec = 0;
@@ -479,7 +479,7 @@ static struct inode *V2_minix_iget(struct inode *inode)
        inode->i_mode = raw_inode->i_mode;
        inode->i_uid = (uid_t)raw_inode->i_uid;
        inode->i_gid = (gid_t)raw_inode->i_gid;
-       inode->i_nlink = raw_inode->i_nlinks;
+       set_nlink(inode, raw_inode->i_nlinks);
        inode->i_size = raw_inode->i_size;
        inode->i_mtime.tv_sec = raw_inode->i_mtime;
        inode->i_atime.tv_sec = raw_inode->i_atime;
index 7657be4352bf641aac7590ca388ec20133add227..ac6d214da82790a8f16671e59dfffaf49e4c75bd 100644 (file)
@@ -137,7 +137,7 @@ static int do_getname(const char __user *filename, char *page)
        return retval;
 }
 
-static char *getname_flags(const char __user * filename, int flags)
+static char *getname_flags(const char __user *filename, int flags, int *empty)
 {
        char *tmp, *result;
 
@@ -148,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags)
 
                result = tmp;
                if (retval < 0) {
+                       if (retval == -ENOENT && empty)
+                               *empty = 1;
                        if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
                                __putname(tmp);
                                result = ERR_PTR(retval);
@@ -160,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags)
 
 char *getname(const char __user * filename)
 {
-       return getname_flags(filename, 0);
+       return getname_flags(filename, 0, 0);
 }
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -1798,11 +1800,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        return __lookup_hash(&this, base, NULL);
 }
 
-int user_path_at(int dfd, const char __user *name, unsigned flags,
-                struct path *path)
+int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
+                struct path *path, int *empty)
 {
        struct nameidata nd;
-       char *tmp = getname_flags(name, flags);
+       char *tmp = getname_flags(name, flags, empty);
        int err = PTR_ERR(tmp);
        if (!IS_ERR(tmp)) {
 
@@ -1816,6 +1818,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
        return err;
 }
 
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+                struct path *path)
+{
+       return user_path_at_empty(dfd, name, flags, path, 0);
+}
+
 static int user_path_parent(int dfd, const char __user *path,
                        struct nameidata *nd, char **name)
 {
index 202f370526a724979ed3a8af6d1d6156027e5a09..5b5fa33b6b9dfd0384ca0cd7654363d8e2db560c 100644 (file)
@@ -228,7 +228,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
 
        DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
 
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        inode->i_uid = server->m.uid;
        inode->i_gid = server->m.gid;
 
index 4dc6d078f10824129c88c64267cc38c21cd8d16a..c07a55aec83867ee1489ff29134862bc891dfa82 100644 (file)
@@ -320,7 +320,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
                inode->i_version = 0;
                inode->i_size = 0;
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                inode->i_uid = -2;
                inode->i_gid = -2;
                inode->i_blocks = 0;
@@ -355,7 +355,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                                | NFS_INO_INVALID_DATA
                                | NFS_INO_REVAL_PAGECACHE;
                if (fattr->valid & NFS_ATTR_FATTR_NLINK)
-                       inode->i_nlink = fattr->nlink;
+                       set_nlink(inode, fattr->nlink);
                else if (nfs_server_capable(inode, NFS_CAP_NLINK))
                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (fattr->valid & NFS_ATTR_FATTR_OWNER)
@@ -1361,7 +1361,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        invalid |= NFS_INO_INVALID_ATTR;
                        if (S_ISDIR(inode->i_mode))
                                invalid |= NFS_INO_INVALID_DATA;
-                       inode->i_nlink = fattr->nlink;
+                       set_nlink(inode, fattr->nlink);
                }
        } else if (server->caps & NFS_CAP_NLINK)
                invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
index 66d095d7955ec0a3bea156370fee9e2955afd022..b6fa792d6b858b5950c483092e7c05ca8c583446 100644 (file)
@@ -655,7 +655,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
        default:
                return nfserr_bad_xdr;
        }
-       w &= !NFS4_SHARE_ACCESS_MASK;
+       w &= ~NFS4_SHARE_ACCESS_MASK;
        if (!w)
                return nfs_ok;
        if (!argp->minorversion)
index 666628b395f136447f5d27482234f69e0190a90c..b50ffb72e5b32e8486582e8a0f6e043fe736546e 100644 (file)
@@ -354,7 +354,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
 
  failed_acl:
  failed_bmap:
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        iput(inode);  /* raw_inode will be deleted through
                         generic_delete_inode() */
        goto failed;
@@ -396,7 +396,7 @@ int nilfs_read_inode_common(struct inode *inode,
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid);
        inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid);
-       inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+       set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
        inode->i_size = le64_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
        inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
index a3141990061e28ec19b8997b132f2d3eb4bcc63b..768982de10e45676e6c040784566de7aade07510 100644 (file)
@@ -289,7 +289,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
                nilfs_warning(inode->i_sb, __func__,
                              "deleting nonexistent file (%lu), %d\n",
                              inode->i_ino, inode->i_nlink);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
        err = nilfs_delete_entry(de, page);
        if (err)
index 1371487da955aaa17d2274d08648cb53b0f96632..97e2dacbc867ea0572cf72ebe53c2fe0cbfaa4a3 100644 (file)
@@ -612,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
         * might be tricky due to vfs interactions. Need to think about this
         * some more when implementing the unlink command.
         */
-       vi->i_nlink = le16_to_cpu(m->link_count);
+       set_nlink(vi, le16_to_cpu(m->link_count));
        /*
         * FIXME: Reparse points can have the directory bit set even though
         * they would be S_IFLNK. Need to deal with this further below when we
@@ -634,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
                vi->i_mode &= ~vol->dmask;
                /* Things break without this kludge! */
                if (vi->i_nlink > 1)
-                       vi->i_nlink = 1;
+                       set_nlink(vi, 1);
        } else {
                vi->i_mode |= S_IFREG;
                /* Apply the file permissions mask set in the mount options. */
@@ -1242,7 +1242,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
        vi->i_version   = base_vi->i_version;
        vi->i_uid       = base_vi->i_uid;
        vi->i_gid       = base_vi->i_gid;
-       vi->i_nlink     = base_vi->i_nlink;
+       set_nlink(vi, base_vi->i_nlink);
        vi->i_mtime     = base_vi->i_mtime;
        vi->i_ctime     = base_vi->i_ctime;
        vi->i_atime     = base_vi->i_atime;
@@ -1508,7 +1508,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
        vi->i_version   = base_vi->i_version;
        vi->i_uid       = base_vi->i_uid;
        vi->i_gid       = base_vi->i_gid;
-       vi->i_nlink     = base_vi->i_nlink;
+       set_nlink(vi, base_vi->i_nlink);
        vi->i_mtime     = base_vi->i_mtime;
        vi->i_ctime     = base_vi->i_ctime;
        vi->i_atime     = base_vi->i_atime;
index 8582e3f4f120647d05df81556639e627882b5811..e2878b5895fb543a86c11f0128b025dbb0335c38 100644 (file)
@@ -2292,7 +2292,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
        ocfs2_journal_dirty(handle, di_bh);
 
        i_size_write(inode, size);
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        inode->i_blocks = ocfs2_inode_sector_count(inode);
 
        ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
@@ -2354,7 +2354,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
        ocfs2_journal_dirty(handle, new_bh);
 
        i_size_write(inode, inode->i_sb->s_blocksize);
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        inode->i_blocks = ocfs2_inode_sector_count(inode);
        status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
        if (status < 0) {
index 7642d7ca73e523f3bc29b7b8a5d767e8f7c7cb83..e1ed5e502ff25dc8afe39de464949ba13c2f9892 100644 (file)
@@ -2092,7 +2092,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
        inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
        inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
        inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
-       inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);
+       set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
        ocfs2_unpack_timespec(&inode->i_atime,
                              be64_to_cpu(lvb->lvb_iatime_packed));
        ocfs2_unpack_timespec(&inode->i_mtime,
index b4c8bb6b8d281f74feb175580696eb1169819677..a22d2c098890a9ca67e2056976bfc9869d74a325 100644 (file)
@@ -291,7 +291,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
                     (unsigned long long)OCFS2_I(inode)->ip_blkno,
                     (unsigned long long)le64_to_cpu(fe->i_blkno));
 
-       inode->i_nlink = ocfs2_read_links_count(fe);
+       set_nlink(inode, ocfs2_read_links_count(fe));
 
        trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
                                   le32_to_cpu(fe->i_flags));
@@ -1290,7 +1290,7 @@ void ocfs2_refresh_inode(struct inode *inode,
        OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
        ocfs2_set_inode_flags(inode);
        i_size_write(inode, le64_to_cpu(fe->i_size));
-       inode->i_nlink = ocfs2_read_links_count(fe);
+       set_nlink(inode, ocfs2_read_links_count(fe));
        inode->i_uid = le32_to_cpu(fe->i_uid);
        inode->i_gid = le32_to_cpu(fe->i_gid);
        inode->i_mode = le16_to_cpu(fe->i_mode);
index 53aa41ed7bf39d34d2ad9064813b3e6feeb57971..a8b2bfea574edd3cb70d4487572a0545a4a95682 100644 (file)
@@ -199,9 +199,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
         * these are used by the support functions here and in
         * callers. */
        if (S_ISDIR(mode))
-               inode->i_nlink = 2;
-       else
-               inode->i_nlink = 1;
+               set_nlink(inode, 2);
        inode_init_owner(inode, dir, mode);
        dquot_initialize(inode);
        return inode;
@@ -1379,7 +1377,7 @@ static int ocfs2_rename(struct inode *old_dir,
        }
 
        if (new_inode) {
-               new_inode->i_nlink--;
+               drop_nlink(new_inode);
                new_inode->i_ctime = CURRENT_TIME;
        }
        old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
@@ -1387,9 +1385,9 @@ static int ocfs2_rename(struct inode *old_dir,
        if (update_dot_dot) {
                status = ocfs2_update_entry(old_inode, handle,
                                            &old_inode_dot_dot_res, new_dir);
-               old_dir->i_nlink--;
+               drop_nlink(old_dir);
                if (new_inode) {
-                       new_inode->i_nlink--;
+                       drop_nlink(new_inode);
                } else {
                        inc_nlink(new_dir);
                        mark_inode_dirty(new_dir);
@@ -2018,7 +2016,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
        orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
        if (S_ISDIR(inode->i_mode))
                ocfs2_add_links_count(orphan_fe, 1);
-       orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
+       set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
        ocfs2_journal_dirty(handle, orphan_dir_bh);
 
        status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
@@ -2116,7 +2114,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
        orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
        if (S_ISDIR(inode->i_mode))
                ocfs2_add_links_count(orphan_fe, -1);
-       orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
+       set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
        ocfs2_journal_dirty(handle, orphan_dir_bh);
 
 leave:
@@ -2282,7 +2280,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
                goto leave;
        }
 
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        /* do the real work now. */
        status = __ocfs2_mknod_locked(dir, inode,
                                      0, &new_di_bh, parent_di_bh, handle,
@@ -2437,7 +2435,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
        di = (struct ocfs2_dinode *)di_bh->b_data;
        le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
        di->i_orphaned_slot = 0;
-       inode->i_nlink = 1;
+       set_nlink(inode, 1);
        ocfs2_set_links_count(di, inode->i_nlink);
        ocfs2_journal_dirty(handle, di_bh);
 
index a2a5bff774e3355a6de04af0e028dcb563dc87be..e4e0ff7962e2aa0431b2ad79c5285b77b097a049 100644 (file)
@@ -242,7 +242,7 @@ found:
                inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
                inode->i_op = &openprom_inode_operations;
                inode->i_fop = &openprom_operations;
-               inode->i_nlink = 2;
+               set_nlink(inode, 2);
                break;
        case op_inode_prop:
                if (!strcmp(dp->name, "options") && (len == 17) &&
@@ -251,7 +251,7 @@ found:
                else
                        inode->i_mode = S_IFREG | S_IRUGO;
                inode->i_fop = &openpromfs_prop_ops;
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
                inode->i_size = ent_oi->u.prop->length;
                break;
        }
index 8f0087e20e168ee04e6b14e7ef05a68f13e4b58e..2db1bd3173b2d77bb902273173a8ede14bf27f2e 100644 (file)
@@ -1652,12 +1652,46 @@ out:
        return error;
 }
 
+static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
+               struct kstat *stat)
+{
+       struct inode *inode = dentry->d_inode;
+       struct task_struct *task = get_proc_task(inode);
+       int rc;
+
+       if (task == NULL)
+               return -ESRCH;
+
+       rc = -EACCES;
+       if (lock_trace(task))
+               goto out_task;
+
+       generic_fillattr(inode, stat);
+       unlock_trace(task);
+       rc = 0;
+out_task:
+       put_task_struct(task);
+       return rc;
+}
+
 static const struct inode_operations proc_pid_link_inode_operations = {
        .readlink       = proc_pid_readlink,
        .follow_link    = proc_pid_follow_link,
        .setattr        = proc_setattr,
 };
 
+static const struct inode_operations proc_fdinfo_link_inode_operations = {
+       .setattr        = proc_setattr,
+       .getattr        = proc_pid_fd_link_getattr,
+};
+
+static const struct inode_operations proc_fd_link_inode_operations = {
+       .readlink       = proc_pid_readlink,
+       .follow_link    = proc_pid_follow_link,
+       .setattr        = proc_setattr,
+       .getattr        = proc_pid_fd_link_getattr,
+};
+
 
 /* building an inode */
 
@@ -1889,49 +1923,61 @@ out:
 
 static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 {
-       struct task_struct *task = get_proc_task(inode);
-       struct files_struct *files = NULL;
+       struct task_struct *task;
+       struct files_struct *files;
        struct file *file;
        int fd = proc_fd(inode);
+       int rc;
 
-       if (task) {
-               files = get_files_struct(task);
-               put_task_struct(task);
-       }
-       if (files) {
-               /*
-                * We are not taking a ref to the file structure, so we must
-                * hold ->file_lock.
-                */
-               spin_lock(&files->file_lock);
-               file = fcheck_files(files, fd);
-               if (file) {
-                       unsigned int f_flags;
-                       struct fdtable *fdt;
-
-                       fdt = files_fdtable(files);
-                       f_flags = file->f_flags & ~O_CLOEXEC;
-                       if (FD_ISSET(fd, fdt->close_on_exec))
-                               f_flags |= O_CLOEXEC;
-
-                       if (path) {
-                               *path = file->f_path;
-                               path_get(&file->f_path);
-                       }
-                       if (info)
-                               snprintf(info, PROC_FDINFO_MAX,
-                                        "pos:\t%lli\n"
-                                        "flags:\t0%o\n",
-                                        (long long) file->f_pos,
-                                        f_flags);
-                       spin_unlock(&files->file_lock);
-                       put_files_struct(files);
-                       return 0;
+       task = get_proc_task(inode);
+       if (!task)
+               return -ENOENT;
+
+       rc = -EACCES;
+       if (lock_trace(task))
+               goto out_task;
+
+       rc = -ENOENT;
+       files = get_files_struct(task);
+       if (files == NULL)
+               goto out_unlock;
+
+       /*
+        * We are not taking a ref to the file structure, so we must
+        * hold ->file_lock.
+        */
+       spin_lock(&files->file_lock);
+       file = fcheck_files(files, fd);
+       if (file) {
+               unsigned int f_flags;
+               struct fdtable *fdt;
+
+               fdt = files_fdtable(files);
+               f_flags = file->f_flags & ~O_CLOEXEC;
+               if (FD_ISSET(fd, fdt->close_on_exec))
+                       f_flags |= O_CLOEXEC;
+
+               if (path) {
+                       *path = file->f_path;
+                       path_get(&file->f_path);
                }
-               spin_unlock(&files->file_lock);
-               put_files_struct(files);
-       }
-       return -ENOENT;
+               if (info)
+                       snprintf(info, PROC_FDINFO_MAX,
+                                "pos:\t%lli\n"
+                                "flags:\t0%o\n",
+                                (long long) file->f_pos,
+                                f_flags);
+               rc = 0;
+       } else
+               rc = -ENOENT;
+       spin_unlock(&files->file_lock);
+       put_files_struct(files);
+
+out_unlock:
+       unlock_trace(task);
+out_task:
+       put_task_struct(task);
+       return rc;
 }
 
 static int proc_fd_link(struct inode *inode, struct path *path)
@@ -2026,7 +2072,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
        spin_unlock(&files->file_lock);
        put_files_struct(files);
 
-       inode->i_op = &proc_pid_link_inode_operations;
+       inode->i_op = &proc_fd_link_inode_operations;
        inode->i_size = 64;
        ei->op.proc_get_link = proc_fd_link;
        d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2058,7 +2104,12 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
        if (fd == ~0U)
                goto out;
 
+       result = ERR_PTR(-EACCES);
+       if (lock_trace(task))
+               goto out;
+
        result = instantiate(dir, dentry, task, &fd);
+       unlock_trace(task);
 out:
        put_task_struct(task);
 out_no_task:
@@ -2078,23 +2129,28 @@ static int proc_readfd_common(struct file * filp, void * dirent,
        retval = -ENOENT;
        if (!p)
                goto out_no_task;
+
+       retval = -EACCES;
+       if (lock_trace(p))
+               goto out;
+
        retval = 0;
 
        fd = filp->f_pos;
        switch (fd) {
                case 0:
                        if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
-                               goto out;
+                               goto out_unlock;
                        filp->f_pos++;
                case 1:
                        ino = parent_ino(dentry);
                        if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
-                               goto out;
+                               goto out_unlock;
                        filp->f_pos++;
                default:
                        files = get_files_struct(p);
                        if (!files)
-                               goto out;
+                               goto out_unlock;
                        rcu_read_lock();
                        for (fd = filp->f_pos-2;
                             fd < files_fdtable(files)->max_fds;
@@ -2118,6 +2174,9 @@ static int proc_readfd_common(struct file * filp, void * dirent,
                        rcu_read_unlock();
                        put_files_struct(files);
        }
+
+out_unlock:
+       unlock_trace(p);
 out:
        put_task_struct(p);
 out_no_task:
@@ -2195,6 +2254,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
        ei->fd = fd;
        inode->i_mode = S_IFREG | S_IRUSR;
        inode->i_fop = &proc_fdinfo_file_operations;
+       inode->i_op = &proc_fdinfo_link_inode_operations;
        d_set_d_op(dentry, &tid_fd_dentry_operations);
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
@@ -2248,7 +2308,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
        ei = PROC_I(inode);
        inode->i_mode = p->mode;
        if (S_ISDIR(inode->i_mode))
-               inode->i_nlink = 2;     /* Use getattr to fix if necessary */
+               set_nlink(inode, 2);    /* Use getattr to fix if necessary */
        if (p->iop)
                inode->i_op = p->iop;
        if (p->fop)
@@ -2642,7 +2702,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
 
        inode->i_mode = p->mode;
        if (S_ISDIR(inode->i_mode))
-               inode->i_nlink = 2;
+               set_nlink(inode, 2);
        if (S_ISLNK(inode->i_mode))
                inode->i_size = 64;
        if (p->iop)
@@ -2981,8 +3041,8 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
        inode->i_fop = &proc_tgid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
 
-       inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
-               ARRAY_SIZE(tgid_base_stuff));
+       set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
+                                                 ARRAY_SIZE(tgid_base_stuff)));
 
        d_set_d_op(dentry, &pid_dentry_operations);
 
@@ -3233,8 +3293,8 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
        inode->i_fop = &proc_tid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
 
-       inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
-               ARRAY_SIZE(tid_base_stuff));
+       set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
+                                                 ARRAY_SIZE(tid_base_stuff)));
 
        d_set_d_op(dentry, &pid_dentry_operations);
 
index 9d99131d0d65455e522f2f42fb3a6f8ef3dc2c28..10090d9c7ad51fd630345bb9b2ddbd6ddd8ae2de 100644 (file)
@@ -283,7 +283,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
        struct inode *inode = dentry->d_inode;
        struct proc_dir_entry *de = PROC_I(inode)->pde;
        if (de && de->nlink)
-               inode->i_nlink = de->nlink;
+               set_nlink(inode, de->nlink);
 
        generic_fillattr(inode, stat);
        return 0;
index 7ed72d6c1c6fc4d2c528e7c5578c2836a2875914..7737c5468a4088f507165230884ba30b605713ea 100644 (file)
@@ -445,7 +445,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
                if (de->size)
                        inode->i_size = de->size;
                if (de->nlink)
-                       inode->i_nlink = de->nlink;
+                       set_nlink(inode, de->nlink);
                if (de->proc_iops)
                        inode->i_op = de->proc_iops;
                if (de->proc_fops) {
index 1a77dbef226f00a2c41a5606b6e3c95fece6f5aa..a6b62173d4c310e4fa4b131a214043667fa67726 100644 (file)
@@ -3,6 +3,7 @@
  */
 #include <linux/init.h>
 #include <linux/sysctl.h>
+#include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/namei.h>
@@ -14,6 +15,15 @@ static const struct inode_operations proc_sys_inode_operations;
 static const struct file_operations proc_sys_dir_file_operations;
 static const struct inode_operations proc_sys_dir_operations;
 
+void proc_sys_poll_notify(struct ctl_table_poll *poll)
+{
+       if (!poll)
+               return;
+
+       atomic_inc(&poll->event);
+       wake_up_interruptible(&poll->wait);
+}
+
 static struct inode *proc_sys_make_inode(struct super_block *sb,
                struct ctl_table_header *head, struct ctl_table *table)
 {
@@ -39,7 +49,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
                inode->i_fop = &proc_sys_file_operations;
        } else {
                inode->i_mode |= S_IFDIR;
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                inode->i_op = &proc_sys_dir_operations;
                inode->i_fop = &proc_sys_dir_file_operations;
        }
@@ -176,6 +186,39 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 }
 
+static int proc_sys_open(struct inode *inode, struct file *filp)
+{
+       struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+
+       if (table->poll)
+               filp->private_data = proc_sys_poll_event(table->poll);
+
+       return 0;
+}
+
+static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
+{
+       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+       unsigned long event = (unsigned long)filp->private_data;
+       unsigned int ret = DEFAULT_POLLMASK;
+
+       if (!table->proc_handler)
+               goto out;
+
+       if (!table->poll)
+               goto out;
+
+       poll_wait(filp, &table->poll->wait, wait);
+
+       if (event != atomic_read(&table->poll->event)) {
+               filp->private_data = proc_sys_poll_event(table->poll);
+               ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
+       }
+
+out:
+       return ret;
+}
 
 static int proc_sys_fill_cache(struct file *filp, void *dirent,
                                filldir_t filldir,
@@ -364,12 +407,15 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
 }
 
 static const struct file_operations proc_sys_file_operations = {
+       .open           = proc_sys_open,
+       .poll           = proc_sys_poll,
        .read           = proc_sys_read,
        .write          = proc_sys_write,
        .llseek         = default_llseek,
 };
 
 static const struct file_operations proc_sys_dir_file_operations = {
+       .read           = generic_read_dir,
        .readdir        = proc_sys_readdir,
        .llseek         = generic_file_llseek,
 };
index 2b0646613f5a1f86da0637e7c131dc36c28e9005..3bdd214184321b73b1636b1e987fb3f22c1c44f6 100644 (file)
@@ -379,7 +379,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
        inode->i_mode    = le16_to_cpu(raw_inode->di_mode);
        inode->i_uid     = (uid_t)le16_to_cpu(raw_inode->di_uid);
        inode->i_gid     = (gid_t)le16_to_cpu(raw_inode->di_gid);
-       inode->i_nlink   = le16_to_cpu(raw_inode->di_nlink);
+       set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
        inode->i_size    = le32_to_cpu(raw_inode->di_size);
        inode->i_mtime.tv_sec   = le32_to_cpu(raw_inode->di_mtime);
        inode->i_mtime.tv_nsec = 0;
index 10b6be3ca280963e90e5de2b11f5556e576cea4a..aae0edb95c6c321dac9268ab5df670607eb7e7e9 100644 (file)
@@ -363,12 +363,15 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
        }
 
        sb = quotactl_block(special);
-       if (IS_ERR(sb))
-               return PTR_ERR(sb);
+       if (IS_ERR(sb)) {
+               ret = PTR_ERR(sb);
+               goto out;
+       }
 
        ret = do_quotactl(sb, type, cmds, id, addr, pathp);
 
        drop_super(sb);
+out:
        if (pathp && !IS_ERR(pathp))
                path_put(pathp);
        return ret;
index eacb166fb25916eb1f460e99f2ad1b67f15ac4ea..462ceb38fec6e350a6d223a10f3c7442681ba6a5 100644 (file)
@@ -23,7 +23,6 @@
  * caches is sufficient.
  */
 
-#include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -288,14 +287,7 @@ static int __init init_ramfs_fs(void)
 {
        return register_filesystem(&ramfs_fs_type);
 }
-
-static void __exit exit_ramfs_fs(void)
-{
-       unregister_filesystem(&ramfs_fs_type);
-}
-
 module_init(init_ramfs_fs)
-module_exit(exit_ramfs_fs)
 
 int __init init_rootfs(void)
 {
@@ -311,5 +303,3 @@ int __init init_rootfs(void)
 
        return err;
 }
-
-MODULE_LICENSE("GPL");
index 9b0d4b78b4fbf83f8591b8910b794e4219466cda..950f13af0951605e7dd63d21151fe86e84937052 100644 (file)
@@ -1154,7 +1154,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
                set_inode_item_key_version(inode, KEY_FORMAT_3_5);
                set_inode_sd_version(inode, STAT_DATA_V1);
                inode->i_mode = sd_v1_mode(sd);
-               inode->i_nlink = sd_v1_nlink(sd);
+               set_nlink(inode, sd_v1_nlink(sd));
                inode->i_uid = sd_v1_uid(sd);
                inode->i_gid = sd_v1_gid(sd);
                inode->i_size = sd_v1_size(sd);
@@ -1199,7 +1199,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
                struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
 
                inode->i_mode = sd_v2_mode(sd);
-               inode->i_nlink = sd_v2_nlink(sd);
+               set_nlink(inode, sd_v2_nlink(sd));
                inode->i_uid = sd_v2_uid(sd);
                inode->i_size = sd_v2_size(sd);
                inode->i_gid = sd_v2_gid(sd);
@@ -1444,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
                /* a stale NFS handle can trigger this without it being an error */
                pathrelse(&path_to_sd);
                reiserfs_make_bad_inode(inode);
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                return;
        }
 
@@ -1832,7 +1832,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 #endif
 
        /* fill stat data */
-       inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
+       set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
 
        /* uid and gid must already be set by the caller for quota init */
 
@@ -1987,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
        make_bad_inode(inode);
 
       out_inserted_sd:
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        th->t_trans_id = 0;     /* so the caller can't use this handle later */
        unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
        iput(inode);
index ef392324bbf14f7b06de26ef30cc75461f7a1f84..80058e8ce36174ce20cb2e870e64b6e843fa3036 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/reiserfs_xattr.h>
 #include <linux/quotaops.h>
 
-#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
+#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
 #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
 
 // directory item contains array of entry headers. This performs
@@ -622,7 +622,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
                               dentry->d_name.len, inode, 1 /*visible */ );
        if (retval) {
                int err;
-               inode->i_nlink--;
+               drop_nlink(inode);
                reiserfs_update_sd(&th, inode);
                err = journal_end(&th, dir->i_sb, jbegin_count);
                if (err)
@@ -702,7 +702,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
                               dentry->d_name.len, inode, 1 /*visible */ );
        if (retval) {
                int err;
-               inode->i_nlink--;
+               drop_nlink(inode);
                reiserfs_update_sd(&th, inode);
                err = journal_end(&th, dir->i_sb, jbegin_count);
                if (err)
@@ -787,7 +787,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                               dentry->d_name.len, inode, 1 /*visible */ );
        if (retval) {
                int err;
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                DEC_DIR_INODE_NLINK(dir);
                reiserfs_update_sd(&th, inode);
                err = journal_end(&th, dir->i_sb, jbegin_count);
@@ -964,7 +964,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
                reiserfs_warning(inode->i_sb, "reiserfs-7042",
                                 "deleting nonexistent file (%lu), %d",
                                 inode->i_ino, inode->i_nlink);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
 
        drop_nlink(inode);
@@ -1086,7 +1086,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
                                    dentry->d_name.len, inode, 1 /*visible */ );
        if (retval) {
                int err;
-               inode->i_nlink--;
+               drop_nlink(inode);
                reiserfs_update_sd(&th, inode);
                err = journal_end(&th, parent_dir->i_sb, jbegin_count);
                if (err)
@@ -1129,7 +1129,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 
        retval = journal_begin(&th, dir->i_sb, jbegin_count);
        if (retval) {
-               inode->i_nlink--;
+               drop_nlink(inode);
                reiserfs_write_unlock(dir->i_sb);
                return retval;
        }
@@ -1144,7 +1144,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 
        if (retval) {
                int err;
-               inode->i_nlink--;
+               drop_nlink(inode);
                err = journal_end(&th, dir->i_sb, jbegin_count);
                reiserfs_write_unlock(dir->i_sb);
                return err ? err : retval;
index 2305e3121cb1d3fd06ab2a3b2a6ce06a77b02355..8b4089f30408bbcbb14c687bdef16e34784f43c3 100644 (file)
@@ -337,7 +337,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
        inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK;
        inode->i_dataoffset = pos + inode->i_metasize;
 
-       i->i_nlink = 1;         /* Hard to decide.. */
+       set_nlink(i, 1);                /* Hard to decide.. */
        i->i_size = be32_to_cpu(ri.size);
        i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
        i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
index 04bebcaa237331cd3a27d252dbdcc60dd4e6d98d..fd7b3b3bda136fa720c75421b3551815417affd7 100644 (file)
@@ -159,7 +159,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                        frag_offset = 0;
                }
 
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
                inode->i_size = le32_to_cpu(sqsh_ino->file_size);
                inode->i_fop = &generic_ro_fops;
                inode->i_mode |= S_IFREG;
@@ -203,7 +203,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                }
 
                xattr_id = le32_to_cpu(sqsh_ino->xattr);
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                inode->i_size = le64_to_cpu(sqsh_ino->file_size);
                inode->i_op = &squashfs_inode_ops;
                inode->i_fop = &generic_ro_fops;
@@ -232,7 +232,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                if (err < 0)
                        goto failed_read;
 
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                inode->i_size = le16_to_cpu(sqsh_ino->file_size);
                inode->i_op = &squashfs_dir_inode_ops;
                inode->i_fop = &squashfs_dir_ops;
@@ -257,7 +257,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                        goto failed_read;
 
                xattr_id = le32_to_cpu(sqsh_ino->xattr);
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                inode->i_size = le32_to_cpu(sqsh_ino->file_size);
                inode->i_op = &squashfs_dir_inode_ops;
                inode->i_fop = &squashfs_dir_ops;
@@ -284,7 +284,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                if (err < 0)
                        goto failed_read;
 
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
                inode->i_op = &squashfs_symlink_inode_ops;
                inode->i_data.a_ops = &squashfs_symlink_aops;
@@ -325,7 +325,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                        inode->i_mode |= S_IFCHR;
                else
                        inode->i_mode |= S_IFBLK;
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                rdev = le32_to_cpu(sqsh_ino->rdev);
                init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
 
@@ -349,7 +349,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                        inode->i_mode |= S_IFBLK;
                xattr_id = le32_to_cpu(sqsh_ino->xattr);
                inode->i_op = &squashfs_inode_ops;
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                rdev = le32_to_cpu(sqsh_ino->rdev);
                init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
 
@@ -370,7 +370,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                        inode->i_mode |= S_IFIFO;
                else
                        inode->i_mode |= S_IFSOCK;
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                init_special_inode(inode, inode->i_mode, 0);
                break;
        }
@@ -389,7 +389,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
                        inode->i_mode |= S_IFSOCK;
                xattr_id = le32_to_cpu(sqsh_ino->xattr);
                inode->i_op = &squashfs_inode_ops;
-               inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+               set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
                init_special_inode(inode, inode->i_mode, 0);
                break;
        }
index b4f2ab48a61f0b7c31cdfe3201d1f4bdded5ca50..9c11519245a65296c5de9e5ba3abceb92399fc47 100644 (file)
@@ -71,6 +71,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
        dest->i_ctime = src->i_ctime;
        dest->i_blkbits = src->i_blkbits;
        dest->i_flags = src->i_flags;
-       dest->i_nlink = src->i_nlink;
+       set_nlink(dest, src->i_nlink);
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
index 78a3aa83c7eac3f153cc5e370cabd754babb9aab..8806b8997d2efcfd73944df9f3695d40207fb6bf 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -294,15 +294,16 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
 {
        struct path path;
        int error;
+       int empty = 0;
 
        if (bufsiz <= 0)
                return -EINVAL;
 
-       error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
+       error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
        if (!error) {
                struct inode *inode = path.dentry->d_inode;
 
-               error = -EINVAL;
+               error = empty ? -ENOENT : -EINVAL;
                if (inode->i_op->readlink) {
                        error = security_inode_readlink(path.dentry);
                        if (!error) {
index 32a81f3467e06835abdf8e5a3beb91acc3a2c161..afd0f1ad45e052068499f7cc94699bef6fbc4084 100644 (file)
@@ -727,8 +727,13 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 
        if (sb->s_op->remount_fs) {
                retval = sb->s_op->remount_fs(sb, &flags, data);
-               if (retval)
-                       return retval;
+               if (retval) {
+                       if (!force)
+                               return retval;
+                       /* If forced remount, go ahead despite any errors */
+                       WARN(1, "forced remount of a %s fs returned %i\n",
+                            sb->s_type->name, retval);
+               }
        }
        sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 
index e23f28894a3a304f028209de49f9e1ab979d3a41..c81b22f3ace162629e790ee4e06941861b97e576 100644 (file)
@@ -218,7 +218,7 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
        }
 
        if (sysfs_type(sd) == SYSFS_DIR)
-               inode->i_nlink = sd->s_dir.subdirs + 2;
+               set_nlink(inode, sd->s_dir.subdirs + 2);
 }
 
 int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
index 0630eb969a280885550f67b896bcbe759d756533..25ffb3e9a3f8ac0a73fc2d698c1c4a4b5adc4aa4 100644 (file)
@@ -219,7 +219,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
        inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
        inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
        inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
-       inode->i_nlink = fs16_to_cpu(sbi, raw_inode->i_nlink);
+       set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
        inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
        inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
        inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
index b28121278d469790e6b400830fbff6ae2e5dc5ef..20403dc5d4378da7a6e962601a8e60740cb6d5db 100644 (file)
@@ -129,7 +129,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
                goto out_ino;
 
        inode->i_flags |= (S_NOCMTIME | S_NOATIME);
-       inode->i_nlink = le32_to_cpu(ino->nlink);
+       set_nlink(inode, le32_to_cpu(ino->nlink));
        inode->i_uid   = le32_to_cpu(ino->uid);
        inode->i_gid   = le32_to_cpu(ino->gid);
        inode->i_atime.tv_sec  = (int64_t)le64_to_cpu(ino->atime_sec);
index 16f19f55e63fa53e20dcc2cf520794b27b1290a7..bf18f7a04544b28bb0fd989e33065ee17bb7a95e 100644 (file)
@@ -558,10 +558,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
        }
 
        ubifs_assert(inode->i_nlink == 1);
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        err = remove_xattr(c, host, inode, &nm);
        if (err)
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
 
        /* If @i_nlink is 0, 'iput()' will delete the inode */
        iput(inode);
index 95518a9f589e395fac2adb4d6bb46d335b977e11..987585bb0a1da594fef277e8b6e0374adaed6547 100644 (file)
@@ -59,8 +59,8 @@ static int __load_block_bitmap(struct super_block *sb,
        int nr_groups = bitmap->s_nr_groups;
 
        if (block_group >= nr_groups) {
-               udf_debug("block_group (%d) > nr_groups (%d)\n", block_group,
-                         nr_groups);
+               udf_debug("block_group (%d) > nr_groups (%d)\n",
+                         block_group, nr_groups);
        }
 
        if (bitmap->s_block_bitmap[block_group]) {
@@ -126,8 +126,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
        if (bloc->logicalBlockNum + count < count ||
            (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
                udf_debug("%d < %d || %d + %d > %d\n",
-                         bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
-                         count, partmap->s_partition_len);
+                         bloc->logicalBlockNum, 0,
+                         bloc->logicalBlockNum, count,
+                         partmap->s_partition_len);
                goto error_return;
        }
 
@@ -155,7 +156,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
                        if (udf_set_bit(bit + i, bh->b_data)) {
                                udf_debug("bit %ld already set\n", bit + i);
                                udf_debug("byte=%2x\n",
-                                       ((char *)bh->b_data)[(bit + i) >> 3]);
+                                         ((char *)bh->b_data)[(bit + i) >> 3]);
                        }
                }
                udf_add_free_space(sb, sbi->s_partition, count);
@@ -369,7 +370,8 @@ static void udf_table_free_blocks(struct super_block *sb,
        if (bloc->logicalBlockNum + count < count ||
            (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
                udf_debug("%d < %d || %d + %d > %d\n",
-                         bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count,
+                         bloc->logicalBlockNum, 0,
+                         bloc->logicalBlockNum, count,
                          partmap->s_partition_len);
                goto error_return;
        }
index 2ffdb6733af181a8f9943c03a25e41f9c5471422..3e44f575fb9cbaf39d8f05bb4594807bb0a7a3be 100644 (file)
@@ -162,8 +162,8 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
        int padlen;
 
        if ((!buffer) || (!offset)) {
-               udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer,
-                         offset);
+               udf_debug("invalidparms, buffer=%p, offset=%p\n",
+                         buffer, offset);
                return NULL;
        }
 
@@ -201,7 +201,7 @@ struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offs
        struct short_ad *sa;
 
        if ((!ptr) || (!offset)) {
-               printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n");
+               pr_err("%s: invalidparms\n", __func__);
                return NULL;
        }
 
@@ -223,7 +223,7 @@ struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset
        struct long_ad *la;
 
        if ((!ptr) || (!offset)) {
-               printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n");
+               pr_err("%s: invalidparms\n", __func__);
                return NULL;
        }
 
index 1d1358ed80c13e5da17849c7773bc8a6d1861e7c..4fd1d809738c519b676598f698501e7b6dc52080 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
+#include <linux/mpage.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
@@ -83,12 +84,10 @@ void udf_evict_inode(struct inode *inode)
        end_writeback(inode);
        if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
            inode->i_size != iinfo->i_lenExtents) {
-               printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
-                       "inode size %llu different from extent length %llu. "
-                       "Filesystem need not be standards compliant.\n",
-                       inode->i_sb->s_id, inode->i_ino, inode->i_mode,
-                       (unsigned long long)inode->i_size,
-                       (unsigned long long)iinfo->i_lenExtents);
+               udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
+                        inode->i_ino, inode->i_mode,
+                        (unsigned long long)inode->i_size,
+                        (unsigned long long)iinfo->i_lenExtents);
        }
        kfree(iinfo->i_ext.i_data);
        iinfo->i_ext.i_data = NULL;
@@ -104,7 +103,13 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc)
 
 static int udf_readpage(struct file *file, struct page *page)
 {
-       return block_read_full_page(page, udf_get_block);
+       return mpage_readpage(page, udf_get_block);
+}
+
+static int udf_readpages(struct file *file, struct address_space *mapping,
+                       struct list_head *pages, unsigned nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, udf_get_block);
 }
 
 static int udf_write_begin(struct file *file, struct address_space *mapping,
@@ -139,6 +144,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 
 const struct address_space_operations udf_aops = {
        .readpage       = udf_readpage,
+       .readpages      = udf_readpages,
        .writepage      = udf_writepage,
        .write_begin            = udf_write_begin,
        .write_end              = generic_write_end,
@@ -1169,16 +1175,15 @@ static void __udf_read_inode(struct inode *inode)
         */
        bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
        if (!bh) {
-               printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n",
-                      inode->i_ino);
+               udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
                make_bad_inode(inode);
                return;
        }
 
        if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
            ident != TAG_IDENT_USE) {
-               printk(KERN_ERR "udf: udf_read_inode(ino %ld) "
-                               "failed ident=%d\n", inode->i_ino, ident);
+               udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
+                       inode->i_ino, ident);
                brelse(bh);
                make_bad_inode(inode);
                return;
@@ -1218,8 +1223,8 @@ static void __udf_read_inode(struct inode *inode)
                }
                brelse(ibh);
        } else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
-               printk(KERN_ERR "udf: unsupported strategy type: %d\n",
-                      le16_to_cpu(fe->icbTag.strategyType));
+               udf_err(inode->i_sb, "unsupported strategy type: %d\n",
+                       le16_to_cpu(fe->icbTag.strategyType));
                brelse(bh);
                make_bad_inode(inode);
                return;
@@ -1236,6 +1241,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
        int offset;
        struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
        struct udf_inode_info *iinfo = UDF_I(inode);
+       unsigned int link_count;
 
        fe = (struct fileEntry *)bh->b_data;
        efe = (struct extendedFileEntry *)bh->b_data;
@@ -1318,9 +1324,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
        inode->i_mode &= ~sbi->s_umask;
        read_unlock(&sbi->s_cred_lock);
 
-       inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
-       if (!inode->i_nlink)
-               inode->i_nlink = 1;
+       link_count = le16_to_cpu(fe->fileLinkCount);
+       if (!link_count)
+               link_count = 1;
+       set_nlink(inode, link_count);
 
        inode->i_size = le64_to_cpu(fe->informationLength);
        iinfo->i_lenExtents = inode->i_size;
@@ -1413,9 +1420,8 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
                udf_debug("METADATA BITMAP FILE-----\n");
                break;
        default:
-               printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown "
-                               "file type=%d\n", inode->i_ino,
-                               fe->icbTag.fileType);
+               udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
+                       inode->i_ino, fe->icbTag.fileType);
                make_bad_inode(inode);
                return;
        }
@@ -1438,8 +1444,8 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
        iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
 
        if (!iinfo->i_ext.i_data) {
-               printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) "
-                               "no free memory\n", inode->i_ino);
+               udf_err(inode->i_sb, "(ino %ld) no free memory\n",
+                       inode->i_ino);
                return -ENOMEM;
        }
 
@@ -1689,9 +1695,8 @@ out:
        if (do_sync) {
                sync_dirty_buffer(bh);
                if (buffer_write_io_error(bh)) {
-                       printk(KERN_WARNING "IO error syncing udf inode "
-                               "[%s:%08lx]\n", inode->i_sb->s_id,
-                               inode->i_ino);
+                       udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n",
+                                inode->i_ino);
                        err = -EIO;
                }
        }
@@ -1982,8 +1987,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
                *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
                break;
        default:
-               udf_debug("alloc_type = %d unsupported\n",
-                               iinfo->i_alloc_type);
+               udf_debug("alloc_type = %d unsupported\n", iinfo->i_alloc_type);
                return -1;
        }
 
index 43e24a3b8e10422674746888120c1ca180a9c4d8..6583fe9b0645f6f4b5c417bc33ae5fea5bf99828 100644 (file)
@@ -38,7 +38,7 @@ unsigned int udf_get_last_session(struct super_block *sb)
 
        if (i == 0) {
                udf_debug("XA disk: %s, vol_desc_start=%d\n",
-                         (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba);
+                         ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
                if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
                        vol_desc_start = ms_info.addr.lba;
        } else {
index 9215700c00a4448eedd1e5306cb2c3410f3f4b45..c175b4dabc14e576eea191fbb0f923f4735171e4 100644 (file)
@@ -204,6 +204,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 {
        struct tag *tag_p;
        struct buffer_head *bh = NULL;
+       u8 checksum;
 
        /* Read the block */
        if (block == 0xFFFFFFFF)
@@ -211,8 +212,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
 
        bh = udf_tread(sb, block);
        if (!bh) {
-               udf_debug("block=%d, location=%d: read failed\n",
-                         block, location);
+               udf_err(sb, "read failed, block=%u, location=%d\n",
+                       block, location);
                return NULL;
        }
 
@@ -227,16 +228,18 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
        }
 
        /* Verify the tag checksum */
-       if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) {
-               printk(KERN_ERR "udf: tag checksum failed block %d\n", block);
+       checksum = udf_tag_checksum(tag_p);
+       if (checksum != tag_p->tagChecksum) {
+               udf_err(sb, "tag checksum failed, block %u: 0x%02x != 0x%02x\n",
+                       block, checksum, tag_p->tagChecksum);
                goto error_out;
        }
 
        /* Verify the tag version */
        if (tag_p->descVersion != cpu_to_le16(0x0002U) &&
            tag_p->descVersion != cpu_to_le16(0x0003U)) {
-               udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n",
-                         le16_to_cpu(tag_p->descVersion), block);
+               udf_err(sb, "tag version 0x%04x != 0x0002 || 0x0003, block %u\n",
+                       le16_to_cpu(tag_p->descVersion), block);
                goto error_out;
        }
 
@@ -248,8 +251,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
                return bh;
 
        udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block,
-           le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength));
-
+                 le16_to_cpu(tag_p->descCRC),
+                 le16_to_cpu(tag_p->descCRCLength));
 error_out:
        brelse(bh);
        return NULL;
index f1dce848ef966ea1853c9f50ce2d9cc997114a18..4639e137222fa91c37ec00e79a844b339343720d 100644 (file)
@@ -577,8 +577,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
 
        fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
        if (!fi) {
-               inode->i_nlink--;
-               mark_inode_dirty(inode);
+               inode_dec_link_count(inode);
                iput(inode);
                return err;
        }
@@ -618,8 +617,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
        init_special_inode(inode, mode, rdev);
        fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
        if (!fi) {
-               inode->i_nlink--;
-               mark_inode_dirty(inode);
+               inode_dec_link_count(inode);
                iput(inode);
                return err;
        }
@@ -665,12 +663,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        inode->i_fop = &udf_dir_operations;
        fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
        if (!fi) {
-               inode->i_nlink--;
-               mark_inode_dirty(inode);
+               inode_dec_link_count(inode);
                iput(inode);
                goto out;
        }
-       inode->i_nlink = 2;
+       set_nlink(inode, 2);
        cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
        cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location);
        *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
@@ -683,7 +680,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
        if (!fi) {
-               inode->i_nlink = 0;
+               clear_nlink(inode);
                mark_inode_dirty(inode);
                iput(inode);
                goto out;
@@ -799,9 +796,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
        if (retval)
                goto end_rmdir;
        if (inode->i_nlink != 2)
-               udf_warning(inode->i_sb, "udf_rmdir",
-                           "empty directory has nlink != 2 (%d)",
-                           inode->i_nlink);
+               udf_warn(inode->i_sb, "empty directory has nlink != 2 (%d)\n",
+                        inode->i_nlink);
        clear_nlink(inode);
        inode->i_size = 0;
        inode_dec_link_count(dir);
@@ -840,7 +836,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
        if (!inode->i_nlink) {
                udf_debug("Deleting nonexistent file (%lu), %d\n",
                          inode->i_ino, inode->i_nlink);
-               inode->i_nlink = 1;
+               set_nlink(inode, 1);
        }
        retval = udf_delete_entry(dir, fi, &fibh, &cfi);
        if (retval)
index a71090ea0e07034c5eb24cdcb8bbea0009ccb8a9..d6caf01a20971e2e3237e885aa55f9ccd9998569 100644 (file)
@@ -33,8 +33,8 @@ uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
        struct udf_sb_info *sbi = UDF_SB(sb);
        struct udf_part_map *map;
        if (partition >= sbi->s_partitions) {
-               udf_debug("block=%d, partition=%d, offset=%d: "
-                         "invalid partition\n", block, partition, offset);
+               udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n",
+                         block, partition, offset);
                return 0xFFFFFFFF;
        }
        map = &sbi->s_partmaps[partition];
@@ -60,8 +60,8 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
        vdata = &map->s_type_specific.s_virtual;
 
        if (block > vdata->s_num_entries) {
-               udf_debug("Trying to access block beyond end of VAT "
-                         "(%d max %d)\n", block, vdata->s_num_entries);
+               udf_debug("Trying to access block beyond end of VAT (%d max %d)\n",
+                         block, vdata->s_num_entries);
                return 0xFFFFFFFF;
        }
 
@@ -321,9 +321,14 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
        /* We shouldn't mount such media... */
        BUG_ON(!inode);
        retblk = udf_try_read_meta(inode, block, partition, offset);
-       if (retblk == 0xFFFFFFFF) {
-               udf_warning(sb, __func__, "error reading from METADATA, "
-                       "trying to read from MIRROR");
+       if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
+               udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
+               if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
+                       mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
+                               mdata->s_mirror_file_loc, map->s_partition_num);
+                       mdata->s_flags |= MF_MIRROR_FE_LOADED;
+               }
+
                inode = mdata->s_mirror_fe;
                if (!inode)
                        return 0xFFFFFFFF;
index 7b27b063ff6d673423ecd0902d0076f1c0ea3ccf..e185253470dfa71725f4dc33ebc78463805908d9 100644 (file)
@@ -75,8 +75,6 @@
 
 #define UDF_DEFAULT_BLOCKSIZE 2048
 
-static char error_buf[1024];
-
 /* These are the "meat" - everything else is stuffing */
 static int udf_fill_super(struct super_block *, void *, int);
 static void udf_put_super(struct super_block *);
@@ -92,8 +90,6 @@ static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
 static int udf_show_options(struct seq_file *, struct vfsmount *);
-static void udf_error(struct super_block *sb, const char *function,
-                     const char *fmt, ...);
 
 struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 {
@@ -244,9 +240,8 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
        sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
                                  GFP_KERNEL);
        if (!sbi->s_partmaps) {
-               udf_error(sb, __func__,
-                         "Unable to allocate space for %d partition maps",
-                         count);
+               udf_err(sb, "Unable to allocate space for %d partition maps\n",
+                       count);
                sbi->s_partitions = 0;
                return -ENOMEM;
        }
@@ -550,8 +545,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
                        uopt->dmode = option & 0777;
                        break;
                default:
-                       printk(KERN_ERR "udf: bad mount option \"%s\" "
-                              "or missing value\n", p);
+                       pr_err("bad mount option \"%s\" or missing value\n", p);
                        return 0;
                }
        }
@@ -645,20 +639,16 @@ static loff_t udf_check_vsd(struct super_block *sb)
                                udf_debug("ISO9660 Boot Record found\n");
                                break;
                        case 1:
-                               udf_debug("ISO9660 Primary Volume Descriptor "
-                                         "found\n");
+                               udf_debug("ISO9660 Primary Volume Descriptor found\n");
                                break;
                        case 2:
-                               udf_debug("ISO9660 Supplementary Volume "
-                                         "Descriptor found\n");
+                               udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
                                break;
                        case 3:
-                               udf_debug("ISO9660 Volume Partition Descriptor "
-                                         "found\n");
+                               udf_debug("ISO9660 Volume Partition Descriptor found\n");
                                break;
                        case 255:
-                               udf_debug("ISO9660 Volume Descriptor Set "
-                                         "Terminator found\n");
+                               udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
                                break;
                        default:
                                udf_debug("ISO9660 VRS (%u) found\n",
@@ -809,8 +799,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
                              pvoldesc->recordingDateAndTime)) {
 #ifdef UDFFS_DEBUG
                struct timestamp *ts = &pvoldesc->recordingDateAndTime;
-               udf_debug("recording time %04u/%02u/%02u"
-                         " %02u:%02u (%x)\n",
+               udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
                          le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
                          ts->minute, le16_to_cpu(ts->typeAndTimezone));
 #endif
@@ -821,7 +810,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
                        strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
                                outstr->u_len > 31 ? 31 : outstr->u_len);
                        udf_debug("volIdent[] = '%s'\n",
-                                       UDF_SB(sb)->s_volume_ident);
+                                 UDF_SB(sb)->s_volume_ident);
                }
 
        if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
@@ -837,64 +826,57 @@ out1:
        return ret;
 }
 
+struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
+                                       u32 meta_file_loc, u32 partition_num)
+{
+       struct kernel_lb_addr addr;
+       struct inode *metadata_fe;
+
+       addr.logicalBlockNum = meta_file_loc;
+       addr.partitionReferenceNum = partition_num;
+
+       metadata_fe = udf_iget(sb, &addr);
+
+       if (metadata_fe == NULL)
+               udf_warn(sb, "metadata inode efe not found\n");
+       else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
+               udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
+               iput(metadata_fe);
+               metadata_fe = NULL;
+       }
+
+       return metadata_fe;
+}
+
 static int udf_load_metadata_files(struct super_block *sb, int partition)
 {
        struct udf_sb_info *sbi = UDF_SB(sb);
        struct udf_part_map *map;
        struct udf_meta_data *mdata;
        struct kernel_lb_addr addr;
-       int fe_error = 0;
 
        map = &sbi->s_partmaps[partition];
        mdata = &map->s_type_specific.s_metadata;
 
        /* metadata address */
-       addr.logicalBlockNum =  mdata->s_meta_file_loc;
-       addr.partitionReferenceNum = map->s_partition_num;
-
        udf_debug("Metadata file location: block = %d part = %d\n",
-                         addr.logicalBlockNum, addr.partitionReferenceNum);
+                 mdata->s_meta_file_loc, map->s_partition_num);
 
-       mdata->s_metadata_fe = udf_iget(sb, &addr);
+       mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb,
+               mdata->s_meta_file_loc, map->s_partition_num);
 
        if (mdata->s_metadata_fe == NULL) {
-               udf_warning(sb, __func__, "metadata inode efe not found, "
-                               "will try mirror inode.");
-               fe_error = 1;
-       } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type !=
-                ICBTAG_FLAG_AD_SHORT) {
-               udf_warning(sb, __func__, "metadata inode efe does not have "
-                       "short allocation descriptors!");
-               fe_error = 1;
-               iput(mdata->s_metadata_fe);
-               mdata->s_metadata_fe = NULL;
-       }
+               /* mirror file entry */
+               udf_debug("Mirror metadata file location: block = %d part = %d\n",
+                         mdata->s_mirror_file_loc, map->s_partition_num);
 
-       /* mirror file entry */
-       addr.logicalBlockNum = mdata->s_mirror_file_loc;
-       addr.partitionReferenceNum = map->s_partition_num;
-
-       udf_debug("Mirror metadata file location: block = %d part = %d\n",
-                         addr.logicalBlockNum, addr.partitionReferenceNum);
+               mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
+                       mdata->s_mirror_file_loc, map->s_partition_num);
 
-       mdata->s_mirror_fe = udf_iget(sb, &addr);
-
-       if (mdata->s_mirror_fe == NULL) {
-               if (fe_error) {
-                       udf_error(sb, __func__, "mirror inode efe not found "
-                       "and metadata inode is missing too, exiting...");
-                       goto error_exit;
-               } else
-                       udf_warning(sb, __func__, "mirror inode efe not found,"
-                                       " but metadata inode is OK");
-       } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type !=
-                ICBTAG_FLAG_AD_SHORT) {
-               udf_warning(sb, __func__, "mirror inode efe does not have "
-                       "short allocation descriptors!");
-               iput(mdata->s_mirror_fe);
-               mdata->s_mirror_fe = NULL;
-               if (fe_error)
+               if (mdata->s_mirror_fe == NULL) {
+                       udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
                        goto error_exit;
+               }
        }
 
        /*
@@ -907,18 +889,15 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
                addr.partitionReferenceNum = map->s_partition_num;
 
                udf_debug("Bitmap file location: block = %d part = %d\n",
-                       addr.logicalBlockNum, addr.partitionReferenceNum);
+                         addr.logicalBlockNum, addr.partitionReferenceNum);
 
                mdata->s_bitmap_fe = udf_iget(sb, &addr);
 
                if (mdata->s_bitmap_fe == NULL) {
                        if (sb->s_flags & MS_RDONLY)
-                               udf_warning(sb, __func__, "bitmap inode efe "
-                                       "not found but it's ok since the disc"
-                                       " is mounted read-only");
+                               udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
                        else {
-                               udf_error(sb, __func__, "bitmap inode efe not "
-                                       "found and attempted read-write mount");
+                               udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
                                goto error_exit;
                        }
                }
@@ -971,9 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
                bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
 
        if (bitmap == NULL) {
-               udf_error(sb, __func__,
-                         "Unable to allocate space for bitmap "
-                         "and %d buffer_head pointers", nr_groups);
+               udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
+                       nr_groups);
                return NULL;
        }
 
@@ -1003,10 +981,9 @@ static int udf_fill_partdesc_info(struct super_block *sb,
        if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
                map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
 
-       udf_debug("Partition (%d type %x) starts at physical %d, "
-                 "block length %d\n", p_index,
-                 map->s_partition_type, map->s_partition_root,
-                 map->s_partition_len);
+       udf_debug("Partition (%d type %x) starts at physical %d, block length %d\n",
+                 p_index, map->s_partition_type,
+                 map->s_partition_root, map->s_partition_len);
 
        if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
            strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
@@ -1023,12 +1000,12 @@ static int udf_fill_partdesc_info(struct super_block *sb,
                map->s_uspace.s_table = udf_iget(sb, &loc);
                if (!map->s_uspace.s_table) {
                        udf_debug("cannot load unallocSpaceTable (part %d)\n",
-                                       p_index);
+                                 p_index);
                        return 1;
                }
                map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
                udf_debug("unallocSpaceTable (part %d) @ %ld\n",
-                               p_index, map->s_uspace.s_table->i_ino);
+                         p_index, map->s_uspace.s_table->i_ino);
        }
 
        if (phd->unallocSpaceBitmap.extLength) {
@@ -1041,8 +1018,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
                bitmap->s_extPosition = le32_to_cpu(
                                phd->unallocSpaceBitmap.extPosition);
                map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
-               udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index,
-                                               bitmap->s_extPosition);
+               udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
+                         p_index, bitmap->s_extPosition);
        }
 
        if (phd->partitionIntegrityTable.extLength)
@@ -1058,13 +1035,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
                map->s_fspace.s_table = udf_iget(sb, &loc);
                if (!map->s_fspace.s_table) {
                        udf_debug("cannot load freedSpaceTable (part %d)\n",
-                               p_index);
+                                 p_index);
                        return 1;
                }
 
                map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
                udf_debug("freedSpaceTable (part %d) @ %ld\n",
-                               p_index, map->s_fspace.s_table->i_ino);
+                         p_index, map->s_fspace.s_table->i_ino);
        }
 
        if (phd->freedSpaceBitmap.extLength) {
@@ -1077,8 +1054,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
                bitmap->s_extPosition = le32_to_cpu(
                                phd->freedSpaceBitmap.extPosition);
                map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
-               udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index,
-                                       bitmap->s_extPosition);
+               udf_debug("freedSpaceBitmap (part %d) @ %d\n",
+                         p_index, bitmap->s_extPosition);
        }
        return 0;
 }
@@ -1118,11 +1095,9 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
        udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
        if (!sbi->s_vat_inode &&
            sbi->s_last_block != blocks - 1) {
-               printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the"
-                      " last recorded block (%lu), retrying with the last "
-                      "block of the device (%lu).\n",
-                      (unsigned long)sbi->s_last_block,
-                      (unsigned long)blocks - 1);
+               pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n",
+                         (unsigned long)sbi->s_last_block,
+                         (unsigned long)blocks - 1);
                udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
        }
        if (!sbi->s_vat_inode)
@@ -1220,8 +1195,8 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
        if (map->s_partition_type == UDF_METADATA_MAP25) {
                ret = udf_load_metadata_files(sb, i);
                if (ret) {
-                       printk(KERN_ERR "UDF-fs: error loading MetaData "
-                       "partition map %d\n", i);
+                       udf_err(sb, "error loading MetaData partition map %d\n",
+                               i);
                        goto out_bh;
                }
        } else {
@@ -1234,9 +1209,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
                 * overwrite blocks instead of relocating them).
                 */
                sb->s_flags |= MS_RDONLY;
-               printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only "
-                       "because writing to pseudooverwrite partition is "
-                       "not implemented.\n");
+               pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
        }
 out_bh:
        /* In case loading failed, we handle cleanup in udf_fill_super */
@@ -1344,9 +1317,8 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                                struct metadataPartitionMap *mdm =
                                                (struct metadataPartitionMap *)
                                                &(lvd->partitionMaps[offset]);
-                               udf_debug("Parsing Logical vol part %d "
-                                       "type %d  id=%s\n", i, type,
-                                       UDF_ID_METADATA);
+                               udf_debug("Parsing Logical vol part %d type %d  id=%s\n",
+                                         i, type, UDF_ID_METADATA);
 
                                map->s_partition_type = UDF_METADATA_MAP25;
                                map->s_partition_func = udf_get_pblock_meta25;
@@ -1361,25 +1333,24 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                                        le32_to_cpu(mdm->allocUnitSize);
                                mdata->s_align_unit_size =
                                        le16_to_cpu(mdm->alignUnitSize);
-                               mdata->s_dup_md_flag     =
-                                       mdm->flags & 0x01;
+                               if (mdm->flags & 0x01)
+                                       mdata->s_flags |= MF_DUPLICATE_MD;
 
                                udf_debug("Metadata Ident suffix=0x%x\n",
-                                       (le16_to_cpu(
-                                        ((__le16 *)
-                                             mdm->partIdent.identSuffix)[0])));
+                                         le16_to_cpu(*(__le16 *)
+                                                     mdm->partIdent.identSuffix));
                                udf_debug("Metadata part num=%d\n",
-                                       le16_to_cpu(mdm->partitionNum));
+                                         le16_to_cpu(mdm->partitionNum));
                                udf_debug("Metadata part alloc unit size=%d\n",
-                                       le32_to_cpu(mdm->allocUnitSize));
+                                         le32_to_cpu(mdm->allocUnitSize));
                                udf_debug("Metadata file loc=%d\n",
-                                       le32_to_cpu(mdm->metadataFileLoc));
+                                         le32_to_cpu(mdm->metadataFileLoc));
                                udf_debug("Mirror file loc=%d\n",
-                                      le32_to_cpu(mdm->metadataMirrorFileLoc));
+                                         le32_to_cpu(mdm->metadataMirrorFileLoc));
                                udf_debug("Bitmap file loc=%d\n",
-                                      le32_to_cpu(mdm->metadataBitmapFileLoc));
-                               udf_debug("Duplicate Flag: %d %d\n",
-                                       mdata->s_dup_md_flag, mdm->flags);
+                                         le32_to_cpu(mdm->metadataBitmapFileLoc));
+                               udf_debug("Flags: %d %d\n",
+                                         mdata->s_flags, mdm->flags);
                        } else {
                                udf_debug("Unknown ident: %s\n",
                                          upm2->partIdent.ident);
@@ -1389,16 +1360,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                        map->s_partition_num = le16_to_cpu(upm2->partitionNum);
                }
                udf_debug("Partition (%d:%d) type %d on volume %d\n",
-                         i, map->s_partition_num, type,
-                         map->s_volumeseqnum);
+                         i, map->s_partition_num, type, map->s_volumeseqnum);
        }
 
        if (fileset) {
                struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
 
                *fileset = lelb_to_cpu(la->extLocation);
-               udf_debug("FileSet found in LogicalVolDesc at block=%d, "
-                         "partition=%d\n", fileset->logicalBlockNum,
+               udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n",
+                         fileset->logicalBlockNum,
                          fileset->partitionReferenceNum);
        }
        if (lvd->integritySeqExt.extLength)
@@ -1478,9 +1448,9 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
 
                bh = udf_read_tagged(sb, block, block, &ident);
                if (!bh) {
-                       printk(KERN_ERR "udf: Block %Lu of volume descriptor "
-                              "sequence is corrupted or we could not read "
-                              "it.\n", (unsigned long long)block);
+                       udf_err(sb,
+                               "Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
+                               (unsigned long long)block);
                        return 1;
                }
 
@@ -1553,7 +1523,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
         * in a suitable order
         */
        if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
-               printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n");
+               udf_err(sb, "Primary Volume Descriptor not found!\n");
                return 1;
        }
        if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
@@ -1740,7 +1710,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
 
        if (!sb_set_blocksize(sb, uopt->blocksize)) {
                if (!silent)
-                       printk(KERN_WARNING "UDF-fs: Bad block size\n");
+                       udf_warn(sb, "Bad block size\n");
                return 0;
        }
        sbi->s_last_block = uopt->lastblock;
@@ -1749,12 +1719,11 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
                nsr_off = udf_check_vsd(sb);
                if (!nsr_off) {
                        if (!silent)
-                               printk(KERN_WARNING "UDF-fs: No VRS found\n");
+                               udf_warn(sb, "No VRS found\n");
                        return 0;
                }
                if (nsr_off == -1)
-                       udf_debug("Failed to read byte 32768. Assuming open "
-                                 "disc. Skipping validity check\n");
+                       udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
                if (!sbi->s_last_block)
                        sbi->s_last_block = udf_get_last_block(sb);
        } else {
@@ -1765,7 +1734,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
        sbi->s_anchor = uopt->anchor;
        if (!udf_find_anchor(sb, fileset)) {
                if (!silent)
-                       printk(KERN_WARNING "UDF-fs: No anchor found\n");
+                       udf_warn(sb, "No anchor found\n");
                return 0;
        }
        return 1;
@@ -1937,8 +1906,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 
        if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
            uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
-               udf_error(sb, "udf_read_super",
-                         "utf8 cannot be combined with iocharset\n");
+               udf_err(sb, "utf8 cannot be combined with iocharset\n");
                goto error_out;
        }
 #ifdef CONFIG_UDF_NLS
@@ -1987,15 +1955,14 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
                ret = udf_load_vrs(sb, &uopt, silent, &fileset);
                if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
                        if (!silent)
-                               printk(KERN_NOTICE
-                                      "UDF-fs: Rescanning with blocksize "
-                                      "%d\n", UDF_DEFAULT_BLOCKSIZE);
+                               pr_notice("Rescanning with blocksize %d\n",
+                                         UDF_DEFAULT_BLOCKSIZE);
                        uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
                        ret = udf_load_vrs(sb, &uopt, silent, &fileset);
                }
        }
        if (!ret) {
-               printk(KERN_WARNING "UDF-fs: No partition found (1)\n");
+               udf_warn(sb, "No partition found (1)\n");
                goto error_out;
        }
 
@@ -2010,10 +1977,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
                                le16_to_cpu(lvidiu->maxUDFWriteRev); */
 
                if (minUDFReadRev > UDF_MAX_READ_VERSION) {
-                       printk(KERN_ERR "UDF-fs: minUDFReadRev=%x "
-                                       "(max is %x)\n",
-                              le16_to_cpu(lvidiu->minUDFReadRev),
-                              UDF_MAX_READ_VERSION);
+                       udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
+                               le16_to_cpu(lvidiu->minUDFReadRev),
+                               UDF_MAX_READ_VERSION);
                        goto error_out;
                } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
                        sb->s_flags |= MS_RDONLY;
@@ -2027,28 +1993,27 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
        }
 
        if (!sbi->s_partitions) {
-               printk(KERN_WARNING "UDF-fs: No partition found (2)\n");
+               udf_warn(sb, "No partition found (2)\n");
                goto error_out;
        }
 
        if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
                        UDF_PART_FLAG_READ_ONLY) {
-               printk(KERN_NOTICE "UDF-fs: Partition marked readonly; "
-                                  "forcing readonly mount\n");
+               pr_notice("Partition marked readonly; forcing readonly mount\n");
                sb->s_flags |= MS_RDONLY;
        }
 
        if (udf_find_fileset(sb, &fileset, &rootdir)) {
-               printk(KERN_WARNING "UDF-fs: No fileset found\n");
+               udf_warn(sb, "No fileset found\n");
                goto error_out;
        }
 
        if (!silent) {
                struct timestamp ts;
                udf_time_to_disk_stamp(&ts, sbi->s_record_time);
-               udf_info("UDF: Mounting volume '%s', "
-                        "timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
-                        sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day,
+               udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
+                        sbi->s_volume_ident,
+                        le16_to_cpu(ts.year), ts.month, ts.day,
                         ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone));
        }
        if (!(sb->s_flags & MS_RDONLY))
@@ -2059,8 +2024,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
        /* perhaps it's not extensible enough, but for now ... */
        inode = udf_iget(sb, &rootdir);
        if (!inode) {
-               printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, "
-                               "partition=%d\n",
+               udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
                       rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
                goto error_out;
        }
@@ -2068,7 +2032,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
        /* Allocate a dentry for the root inode */
        sb->s_root = d_alloc_root(inode);
        if (!sb->s_root) {
-               printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n");
+               udf_err(sb, "Couldn't allocate root dentry\n");
                iput(inode);
                goto error_out;
        }
@@ -2096,32 +2060,40 @@ error_out:
        return -EINVAL;
 }
 
-static void udf_error(struct super_block *sb, const char *function,
-                     const char *fmt, ...)
+void _udf_err(struct super_block *sb, const char *function,
+             const char *fmt, ...)
 {
+       struct va_format vaf;
        va_list args;
 
-       if (!(sb->s_flags & MS_RDONLY)) {
-               /* mark sb error */
+       /* mark sb error */
+       if (!(sb->s_flags & MS_RDONLY))
                sb->s_dirt = 1;
-       }
+
        va_start(args, fmt);
-       vsnprintf(error_buf, sizeof(error_buf), fmt, args);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf);
+
        va_end(args);
-       printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
-               sb->s_id, function, error_buf);
 }
 
-void udf_warning(struct super_block *sb, const char *function,
-                const char *fmt, ...)
+void _udf_warn(struct super_block *sb, const char *function,
+              const char *fmt, ...)
 {
+       struct va_format vaf;
        va_list args;
 
        va_start(args, fmt);
-       vsnprintf(error_buf, sizeof(error_buf), fmt, args);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf);
+
        va_end(args);
-       printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
-              sb->s_id, function, error_buf);
 }
 
 static void udf_put_super(struct super_block *sb)
@@ -2213,11 +2185,11 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
        bh = udf_read_ptagged(sb, &loc, 0, &ident);
 
        if (!bh) {
-               printk(KERN_ERR "udf: udf_count_free failed\n");
+               udf_err(sb, "udf_count_free failed\n");
                goto out;
        } else if (ident != TAG_IDENT_SBD) {
                brelse(bh);
-               printk(KERN_ERR "udf: udf_count_free failed\n");
+               udf_err(sb, "udf_count_free failed\n");
                goto out;
        }
 
index 8424308db4b49484151a415d68b34b37be210d0b..4b98fee8e1613131815fa6155d3e7ac81a33c0f8 100644 (file)
@@ -95,23 +95,21 @@ void udf_truncate_tail_extent(struct inode *inode)
                lbcount += elen;
                if (lbcount > inode->i_size) {
                        if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
-                               printk(KERN_WARNING
-                                      "udf_truncate_tail_extent(): Too long "
-                                      "extent after EOF in inode %u: i_size: "
-                                      "%Ld lbcount: %Ld extent %u+%u\n",
-                                      (unsigned)inode->i_ino,
-                                      (long long)inode->i_size,
-                                      (long long)lbcount,
-                                      (unsigned)eloc.logicalBlockNum,
-                                      (unsigned)elen);
+                               udf_warn(inode->i_sb,
+                                        "Too long extent after EOF in inode %u: i_size: %lld lbcount: %lld extent %u+%u\n",
+                                        (unsigned)inode->i_ino,
+                                        (long long)inode->i_size,
+                                        (long long)lbcount,
+                                        (unsigned)eloc.logicalBlockNum,
+                                        (unsigned)elen);
                        nelen = elen - (lbcount - inode->i_size);
                        epos.offset -= adsize;
                        extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
                        epos.offset += adsize;
                        if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
-                               printk(KERN_ERR "udf_truncate_tail_extent(): "
-                                      "Extent after EOF in inode %u.\n",
-                                      (unsigned)inode->i_ino);
+                               udf_err(inode->i_sb,
+                                       "Extent after EOF in inode %u\n",
+                                       (unsigned)inode->i_ino);
                        break;
                }
        }
index 4858c191242b7a4180f90b5e080606685b9cebb3..5142a82e32767339cece15bcf70e12928159591e 100644 (file)
 
 #pragma pack(1) /* XXX(hch): Why?  This file just defines in-core structures */
 
+#define MF_DUPLICATE_MD                0x01
+#define MF_MIRROR_FE_LOADED    0x02
+
 struct udf_meta_data {
        __u32   s_meta_file_loc;
        __u32   s_mirror_file_loc;
        __u32   s_bitmap_file_loc;
        __u32   s_alloc_unit_size;
        __u16   s_align_unit_size;
-       __u8    s_dup_md_flag;
+       int     s_flags;
        struct inode *s_metadata_fe;
        struct inode *s_mirror_fe;
        struct inode *s_bitmap_fe;
index dc8a8dcc5ae101d70cec4b5bd05a532f45d62e39..f34e6fc0cdaa430d7f787133134ff3309bc97356 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __UDF_DECL_H
 #define __UDF_DECL_H
 
+#define pr_fmt(fmt) "UDF-fs: " fmt
+
 #include "ecma_167.h"
 #include "osta_udf.h"
 
 #define UDF_PREALLOCATE
 #define UDF_DEFAULT_PREALLOC_BLOCKS    8
 
+extern __printf(3, 4) void _udf_err(struct super_block *sb,
+               const char *function, const char *fmt, ...);
+#define udf_err(sb, fmt, ...)                                  \
+       _udf_err(sb, __func__, fmt, ##__VA_ARGS__)
+
+extern __printf(3, 4) void _udf_warn(struct super_block *sb,
+               const char *function, const char *fmt, ...);
+#define udf_warn(sb, fmt, ...)                                 \
+       _udf_warn(sb, __func__, fmt, ##__VA_ARGS__)
+
+#define udf_info(fmt, ...)                                     \
+       pr_info("INFO " fmt, ##__VA_ARGS__)
+
 #undef UDFFS_DEBUG
 
 #ifdef UDFFS_DEBUG
-#define udf_debug(f, a...) \
-do { \
-       printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \
-               __FILE__, __LINE__, __func__); \
-       printk(f, ##a); \
-} while (0)
+#define udf_debug(fmt, ...)                                    \
+       printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt),             \
+              __FILE__, __LINE__, __func__, ##__VA_ARGS__)
 #else
-#define udf_debug(f, a...) /**/
+#define udf_debug(fmt, ...)                                    \
+       no_printk(fmt, ##__VA_ARGS__)
 #endif
 
-#define udf_info(f, a...) \
-       printk(KERN_INFO "UDF-fs INFO " f, ##a);
-
-
 #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
 #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
 
@@ -112,8 +121,6 @@ struct extent_position {
 
 /* super.c */
 
-extern __printf(3, 4) void udf_warning(struct super_block *, const char *,
-                                       const char *, ...);
 static inline void udf_updated_lvid(struct super_block *sb)
 {
        struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
@@ -126,6 +133,8 @@ static inline void udf_updated_lvid(struct super_block *sb)
        UDF_SB(sb)->s_lvid_dirty = 1;
 }
 extern u64 lvid_get_unique_id(struct super_block *sb);
+struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
+                                       u32 meta_file_loc, u32 partition_num);
 
 /* namei.c */
 extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
index b8c828c4d20034fcf046d21b2946e94044b5a74e..1f11483eba6a7b9f62d611e6f0110ec80a0ce643 100644 (file)
  * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm
  */
 
+#include "udfdecl.h"
+
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include "udfdecl.h"
 
 #define EPOCH_YEAR 1970
 
index d03a90b6ad69c5850b6e6a3b5ba0b64726df0418..44b815e57f9439116199f91b67489a98272272bc 100644 (file)
@@ -114,7 +114,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
        cmp_id = ocu_i->u_cmpID;
        if (cmp_id != 8 && cmp_id != 16) {
                memset(utf_o, 0, sizeof(struct ustr));
-               printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
+               pr_err("unknown compression code (%d) stri=%s\n",
                       cmp_id, ocu_i->u_name);
                return 0;
        }
@@ -242,7 +242,7 @@ try_again:
        if (utf_cnt) {
 error_out:
                ocu[++u_len] = '?';
-               printk(KERN_DEBUG "udf: bad UTF-8 character\n");
+               printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
        }
 
        ocu[length - 1] = (uint8_t)u_len + 1;
@@ -267,7 +267,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
        cmp_id = ocu_i->u_cmpID;
        if (cmp_id != 8 && cmp_id != 16) {
                memset(utf_o, 0, sizeof(struct ustr));
-               printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
+               pr_err("unknown compression code (%d) stri=%s\n",
                       cmp_id, ocu_i->u_name);
                return 0;
        }
index 2eabf04af3de12e98d0fbf812879825e7247e619..78a4c70d46b5de3cd42e2181b2dadf9f7c7774a7 100644 (file)
@@ -341,7 +341,7 @@ cg_found:
 
 fail_remove_inode:
        unlock_super(sb);
-       inode->i_nlink = 0;
+       clear_nlink(inode);
        iput(inode);
        UFSD("EXIT (FAILED): err %d\n", err);
        return ERR_PTR(err);
index b4d791a83207e9869f652c804112ff442626c638..879b13436fa47b8bf84c5b731069aa03978e3dfa 100644 (file)
@@ -589,7 +589,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
         * Copy data to the in-core inode.
         */
        inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
-       inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink);
+       set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
        if (inode->i_nlink == 0) {
                ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
                return -1;
@@ -637,7 +637,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
         * Copy data to the in-core inode.
         */
        inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
-       inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink);
+       set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
        if (inode->i_nlink == 0) {
                ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
                return -1;
index 9ba2a07b734375f1f07870c8179d96880ebbe5c9..23ce927973a4a739e3472b68e9f7f6ef84de202a 100644 (file)
@@ -1153,7 +1153,7 @@ xfs_setup_inode(
        hlist_add_fake(&inode->i_hash);
 
        inode->i_mode   = ip->i_d.di_mode;
-       inode->i_nlink  = ip->i_d.di_nlink;
+       set_nlink(inode, ip->i_d.di_nlink);
        inode->i_uid    = ip->i_d.di_uid;
        inode->i_gid    = ip->i_d.di_gid;
 
index 2dcb72bff4b614c44999d4c9d3bf607b3f1b19d0..2314ad8b3c9cced6a4679441d7c6b25afe500348 100644 (file)
@@ -117,6 +117,7 @@ struct kiocb {
 
        struct list_head        ki_list;        /* the aio core uses this
                                                 * for cancellation */
+       struct list_head        ki_batch;       /* batch allocation */
 
        /*
         * If the aio_resfd field of the userspace iocb is not zero,
index da7e4bc34e8cfe1339af4de1c96f55e777867161..1b7f9d5250131fe43acf75045142b544f6f1a396 100644 (file)
@@ -516,7 +516,7 @@ struct cgroup_subsys {
        struct list_head sibling;
        /* used when use_id == true */
        struct idr idr;
-       spinlock_t id_lock;
+       rwlock_t id_lock;
 
        /* should be defined only by modular subsystems */
        struct module *module;
index 62157c03caf76698367dc5341a6b56c73119375e..4df926199369622bffed05e3e60eeff8c42e4532 100644 (file)
@@ -165,6 +165,7 @@ struct dentry_operations {
                        unsigned int, const char *, const struct qstr *);
        int (*d_delete)(const struct dentry *);
        void (*d_release)(struct dentry *);
+       void (*d_prune)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
        char *(*d_dname)(struct dentry *, char *, int);
        struct vfsmount *(*d_automount)(struct path *);
@@ -184,8 +185,9 @@ struct dentry_operations {
 #define DCACHE_OP_COMPARE      0x0002
 #define DCACHE_OP_REVALIDATE   0x0004
 #define DCACHE_OP_DELETE       0x0008
+#define DCACHE_OP_PRUNE         0x0010
 
-#define        DCACHE_DISCONNECTED     0x0010
+#define        DCACHE_DISCONNECTED     0x0020
      /* This dentry is possibly not currently connected to the dcache tree, in
       * which case its parent will either be itself, or will have this flag as
       * well.  nfsd will not use a dentry with this bit set, but will first
@@ -196,8 +198,8 @@ struct dentry_operations {
       * dentry into place and return that dentry rather than the passed one,
       * typically using d_splice_alias. */
 
-#define DCACHE_REFERENCED      0x0020  /* Recently used, don't discard. */
-#define DCACHE_RCUACCESS       0x0040  /* Entry has ever been RCU-visible */
+#define DCACHE_REFERENCED      0x0040  /* Recently used, don't discard. */
+#define DCACHE_RCUACCESS       0x0080  /* Entry has ever been RCU-visible */
 
 #define DCACHE_CANT_MOUNT      0x0100
 #define DCACHE_GENOCIDE                0x0200
index 99e3e50b5c571a3cb9448b69891b95796f1f6ddf..98f34b886f955db4e9c7ecd63d892bafd30e6f1f 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/ratelimit.h>
 
 struct dm_dev;
 struct dm_target;
@@ -127,10 +128,6 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d);
  * Information about a target type
  */
 
-/*
- * Target features
- */
-
 struct target_type {
        uint64_t features;
        const char *name;
@@ -159,6 +156,30 @@ struct target_type {
        struct list_head list;
 };
 
+/*
+ * Target features
+ */
+
+/*
+ * Any table that contains an instance of this target must have only one.
+ */
+#define DM_TARGET_SINGLETON            0x00000001
+#define dm_target_needs_singleton(type)        ((type)->features & DM_TARGET_SINGLETON)
+
+/*
+ * Indicates that a target does not support read-only devices.
+ */
+#define DM_TARGET_ALWAYS_WRITEABLE     0x00000002
+#define dm_target_always_writeable(type) \
+               ((type)->features & DM_TARGET_ALWAYS_WRITEABLE)
+
+/*
+ * Any device that contains a table with an instance of this target may never
+ * have tables containing any different target type.
+ */
+#define DM_TARGET_IMMUTABLE            0x00000004
+#define dm_target_is_immutable(type)   ((type)->features & DM_TARGET_IMMUTABLE)
+
 struct dm_target {
        struct dm_table *table;
        struct target_type *type;
@@ -375,6 +396,14 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
  *---------------------------------------------------------------*/
 #define DM_NAME "device-mapper"
 
+#ifdef CONFIG_PRINTK
+extern struct ratelimit_state dm_ratelimit_state;
+
+#define dm_ratelimit() __ratelimit(&dm_ratelimit_state)
+#else
+#define dm_ratelimit() 0
+#endif
+
 #define DMCRIT(f, arg...) \
        printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
 
@@ -382,7 +411,7 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
        printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
 #define DMERR_LIMIT(f, arg...) \
        do { \
-               if (printk_ratelimit()) \
+               if (dm_ratelimit())     \
                        printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \
                               f "\n", ## arg); \
        } while (0)
@@ -391,7 +420,7 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
        printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
 #define DMWARN_LIMIT(f, arg...) \
        do { \
-               if (printk_ratelimit()) \
+               if (dm_ratelimit())     \
                        printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \
                               f "\n", ## arg); \
        } while (0)
@@ -400,7 +429,7 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
        printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
 #define DMINFO_LIMIT(f, arg...) \
        do { \
-               if (printk_ratelimit()) \
+               if (dm_ratelimit())     \
                        printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \
                               "\n", ## arg); \
        } while (0)
@@ -410,7 +439,7 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
        printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg)
 #  define DMDEBUG_LIMIT(f, arg...) \
        do { \
-               if (printk_ratelimit()) \
+               if (dm_ratelimit())     \
                        printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \
                               "\n", ## arg); \
        } while (0)
index 0cb8eff76bd6e563999df6c081251c4f8a8ef292..75fd5573516e15f04bcc864140620977405362f6 100644 (file)
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       21
+#define DM_VERSION_MINOR       22
 #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2011-07-06)"
+#define DM_VERSION_EXTRA       "-ioctl (2011-10-19)"
 
 /* Status bits */
 #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
index 5e54458e920f36466a29d83ccbe913a03f347a01..47d9d376e4e7a9e0cecb609065b3d735825cfdb9 100644 (file)
@@ -57,5 +57,9 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
                                 dm_kcopyd_notify_fn fn, void *context);
 void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
 
+int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
+                  unsigned num_dests, struct dm_io_region *dests,
+                  unsigned flags, dm_kcopyd_notify_fn fn, void *context);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_DM_KCOPYD_H */
index eeace7d3ff15ccaaf7012f909c68e6bf81e1a3d7..0678c2adc42109dd6a88b823f4bc7dfa77c8fcdf 100644 (file)
  * Payload-to-userspace:
  *     A single string containing all the argv arguments separated by ' 's
  * Payload-to-kernel:
- *     None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *     A NUL-terminated string that is the name of the device that is used
+ *     as the backing store for the log data.  'dm_get_device' will be called
+ *     on this device.  ('dm_put_device' will be called on this device
+ *     automatically after calling DM_ULOG_DTR.)  If there is no device needed
+ *     for log data, 'data_size' in the dm_ulog_request struct should be 0.
  *
  * The UUID contained in the dm_ulog_request structure is the reference that
  * will be used by all request types to a specific log.  The constructor must
- * record this assotiation with instance created.
+ * record this association with the instance created.
  *
  * When the request has been processed, user-space must return the
- * dm_ulog_request to the kernel - setting the 'error' field and
- * 'data_size' appropriately.
+ * dm_ulog_request to the kernel - setting the 'error' field, filling the
+ * data field with the log device if necessary, and setting 'data_size'
+ * appropriately.
  */
 #define DM_ULOG_CTR                    1
 
  * dm_ulog_request or a change in the way requests are
  * issued/handled.  Changes are outlined here:
  *     version 1:  Initial implementation
+ *     version 2:  DM_ULOG_CTR allowed to return a string containing a
+ *                 device name that is to be registered with DM via
+ *                 'dm_get_device'.
  */
-#define DM_ULOG_REQUEST_VERSION 1
+#define DM_ULOG_REQUEST_VERSION 2
 
 struct dm_ulog_request {
        /*
index 347fdc32177a66f685f355e9ddf6c52d7bacccd5..be86ae13893fcead97e4f9546c71ea500f867e3d 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
 
+#include <linux/string.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/dma-attrs.h>
@@ -117,6 +118,15 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
                return -EIO;
 }
 
+static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
+                                       dma_addr_t *dma_handle, gfp_t flag)
+{
+       void *ret = dma_alloc_coherent(dev, size, dma_handle, flag);
+       if (ret)
+               memset(ret, 0, size);
+       return ret;
+}
+
 #ifdef CONFIG_HAS_DMA
 static inline int dma_get_cache_alignment(void)
 {
index 4a73257b47d0cca22e1f33cdb47a8075eb1a358c..055b248bdd5328dfbac8e59836563771eb8fcd58 100644 (file)
@@ -42,4 +42,354 @@ static inline void opstate_init(void)
        return;
 }
 
+#define EDAC_MC_LABEL_LEN      31
+#define MC_PROC_NAME_MAX_LEN   7
+
+/* memory devices */
+enum dev_type {
+       DEV_UNKNOWN = 0,
+       DEV_X1,
+       DEV_X2,
+       DEV_X4,
+       DEV_X8,
+       DEV_X16,
+       DEV_X32,                /* Do these parts exist? */
+       DEV_X64                 /* Do these parts exist? */
+};
+
+#define DEV_FLAG_UNKNOWN       BIT(DEV_UNKNOWN)
+#define DEV_FLAG_X1            BIT(DEV_X1)
+#define DEV_FLAG_X2            BIT(DEV_X2)
+#define DEV_FLAG_X4            BIT(DEV_X4)
+#define DEV_FLAG_X8            BIT(DEV_X8)
+#define DEV_FLAG_X16           BIT(DEV_X16)
+#define DEV_FLAG_X32           BIT(DEV_X32)
+#define DEV_FLAG_X64           BIT(DEV_X64)
+
+/* memory types */
+enum mem_type {
+       MEM_EMPTY = 0,          /* Empty csrow */
+       MEM_RESERVED,           /* Reserved csrow type */
+       MEM_UNKNOWN,            /* Unknown csrow type */
+       MEM_FPM,                /* Fast page mode */
+       MEM_EDO,                /* Extended data out */
+       MEM_BEDO,               /* Burst Extended data out */
+       MEM_SDR,                /* Single data rate SDRAM */
+       MEM_RDR,                /* Registered single data rate SDRAM */
+       MEM_DDR,                /* Double data rate SDRAM */
+       MEM_RDDR,               /* Registered Double data rate SDRAM */
+       MEM_RMBS,               /* Rambus DRAM */
+       MEM_DDR2,               /* DDR2 RAM */
+       MEM_FB_DDR2,            /* fully buffered DDR2 */
+       MEM_RDDR2,              /* Registered DDR2 RAM */
+       MEM_XDR,                /* Rambus XDR */
+       MEM_DDR3,               /* DDR3 RAM */
+       MEM_RDDR3,              /* Registered DDR3 RAM */
+};
+
+#define MEM_FLAG_EMPTY         BIT(MEM_EMPTY)
+#define MEM_FLAG_RESERVED      BIT(MEM_RESERVED)
+#define MEM_FLAG_UNKNOWN       BIT(MEM_UNKNOWN)
+#define MEM_FLAG_FPM           BIT(MEM_FPM)
+#define MEM_FLAG_EDO           BIT(MEM_EDO)
+#define MEM_FLAG_BEDO          BIT(MEM_BEDO)
+#define MEM_FLAG_SDR           BIT(MEM_SDR)
+#define MEM_FLAG_RDR           BIT(MEM_RDR)
+#define MEM_FLAG_DDR           BIT(MEM_DDR)
+#define MEM_FLAG_RDDR          BIT(MEM_RDDR)
+#define MEM_FLAG_RMBS          BIT(MEM_RMBS)
+#define MEM_FLAG_DDR2           BIT(MEM_DDR2)
+#define MEM_FLAG_FB_DDR2        BIT(MEM_FB_DDR2)
+#define MEM_FLAG_RDDR2          BIT(MEM_RDDR2)
+#define MEM_FLAG_XDR            BIT(MEM_XDR)
+#define MEM_FLAG_DDR3           BIT(MEM_DDR3)
+#define MEM_FLAG_RDDR3          BIT(MEM_RDDR3)
+
+/* chipset Error Detection and Correction capabilities and mode */
+enum edac_type {
+       EDAC_UNKNOWN = 0,       /* Unknown if ECC is available */
+       EDAC_NONE,              /* Doesn't support ECC */
+       EDAC_RESERVED,          /* Reserved ECC type */
+       EDAC_PARITY,            /* Detects parity errors */
+       EDAC_EC,                /* Error Checking - no correction */
+       EDAC_SECDED,            /* Single bit error correction, Double detection */
+       EDAC_S2ECD2ED,          /* Chipkill x2 devices - do these exist? */
+       EDAC_S4ECD4ED,          /* Chipkill x4 devices */
+       EDAC_S8ECD8ED,          /* Chipkill x8 devices */
+       EDAC_S16ECD16ED,        /* Chipkill x16 devices */
+};
+
+#define EDAC_FLAG_UNKNOWN      BIT(EDAC_UNKNOWN)
+#define EDAC_FLAG_NONE         BIT(EDAC_NONE)
+#define EDAC_FLAG_PARITY       BIT(EDAC_PARITY)
+#define EDAC_FLAG_EC           BIT(EDAC_EC)
+#define EDAC_FLAG_SECDED       BIT(EDAC_SECDED)
+#define EDAC_FLAG_S2ECD2ED     BIT(EDAC_S2ECD2ED)
+#define EDAC_FLAG_S4ECD4ED     BIT(EDAC_S4ECD4ED)
+#define EDAC_FLAG_S8ECD8ED     BIT(EDAC_S8ECD8ED)
+#define EDAC_FLAG_S16ECD16ED   BIT(EDAC_S16ECD16ED)
+
+/* scrubbing capabilities */
+enum scrub_type {
+       SCRUB_UNKNOWN = 0,      /* Unknown if scrubber is available */
+       SCRUB_NONE,             /* No scrubber */
+       SCRUB_SW_PROG,          /* SW progressive (sequential) scrubbing */
+       SCRUB_SW_SRC,           /* Software scrub only errors */
+       SCRUB_SW_PROG_SRC,      /* Progressive software scrub from an error */
+       SCRUB_SW_TUNABLE,       /* Software scrub frequency is tunable */
+       SCRUB_HW_PROG,          /* HW progressive (sequential) scrubbing */
+       SCRUB_HW_SRC,           /* Hardware scrub only errors */
+       SCRUB_HW_PROG_SRC,      /* Progressive hardware scrub from an error */
+       SCRUB_HW_TUNABLE        /* Hardware scrub frequency is tunable */
+};
+
+#define SCRUB_FLAG_SW_PROG     BIT(SCRUB_SW_PROG)
+#define SCRUB_FLAG_SW_SRC      BIT(SCRUB_SW_SRC)
+#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
+#define SCRUB_FLAG_SW_TUN      BIT(SCRUB_SW_SCRUB_TUNABLE)
+#define SCRUB_FLAG_HW_PROG     BIT(SCRUB_HW_PROG)
+#define SCRUB_FLAG_HW_SRC      BIT(SCRUB_HW_SRC)
+#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
+#define SCRUB_FLAG_HW_TUN      BIT(SCRUB_HW_TUNABLE)
+
+/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
+
+/* EDAC internal operation states */
+#define        OP_ALLOC                0x100
+#define OP_RUNNING_POLL                0x201
+#define OP_RUNNING_INTERRUPT   0x202
+#define OP_RUNNING_POLL_INTR   0x203
+#define OP_OFFLINE             0x300
+
+/*
+ * There are several things to be aware of that aren't at all obvious:
+ *
+ *
+ * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
+ *
+ * These are some of the many terms that are thrown about that don't always
+ * mean what people think they mean (Inconceivable!).  In the interest of
+ * creating a common ground for discussion, terms and their definitions
+ * will be established.
+ *
+ * Memory devices:     The individual chip on a memory stick.  These devices
+ *                     commonly output 4 and 8 bits each.  Grouping several
+ *                     of these in parallel provides 64 bits which is common
+ *                     for a memory stick.
+ *
+ * Memory Stick:       A printed circuit board that aggregates multiple
+ *                     memory devices in parallel.  This is the atomic
+ *                     memory component that is purchaseable by Joe consumer
+ *                     and loaded into a memory socket.
+ *
+ * Socket:             A physical connector on the motherboard that accepts
+ *                     a single memory stick.
+ *
+ * Channel:            Set of memory devices on a memory stick that must be
+ *                     grouped in parallel with one or more additional
+ *                     channels from other memory sticks.  This parallel
+ *                     grouping of the output from multiple channels are
+ *                     necessary for the smallest granularity of memory access.
+ *                     Some memory controllers are capable of single channel -
+ *                     which means that memory sticks can be loaded
+ *                     individually.  Other memory controllers are only
+ *                     capable of dual channel - which means that memory
+ *                     sticks must be loaded as pairs (see "socket set").
+ *
+ * Chip-select row:    All of the memory devices that are selected together.
+ *                     for a single, minimum grain of memory access.
+ *                     This selects all of the parallel memory devices across
+ *                     all of the parallel channels.  Common chip-select rows
+ *                     for single channel are 64 bits, for dual channel 128
+ *                     bits.
+ *
+ * Single-Ranked stick:        A Single-ranked stick has 1 chip-select row of memory.
+ *                     Motherboards commonly drive two chip-select pins to
+ *                     a memory stick. A single-ranked stick, will occupy
+ *                     only one of those rows. The other will be unused.
+ *
+ * Double-Ranked stick:        A double-ranked stick has two chip-select rows which
+ *                     access different sets of memory devices.  The two
+ *                     rows cannot be accessed concurrently.
+ *
+ * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
+ *                     A double-sided stick has two chip-select rows which
+ *                     access different sets of memory devices.  The two
+ *                     rows cannot be accessed concurrently.  "Double-sided"
+ *                     is irrespective of the memory devices being mounted
+ *                     on both sides of the memory stick.
+ *
+ * Socket set:         All of the memory sticks that are required for
+ *                     a single memory access or all of the memory sticks
+ *                     spanned by a chip-select row.  A single socket set
+ *                     has two chip-select rows and if double-sided sticks
+ *                     are used these will occupy those chip-select rows.
+ *
+ * Bank:               This term is avoided because it is unclear when
+ *                     needing to distinguish between chip-select rows and
+ *                     socket sets.
+ *
+ * Controller pages:
+ *
+ * Physical pages:
+ *
+ * Virtual pages:
+ *
+ *
+ * STRUCTURE ORGANIZATION AND CHOICES
+ *
+ *
+ *
+ * PS - I enjoyed writing all that about as much as you enjoyed reading it.
+ */
+
+struct channel_info {
+       int chan_idx;           /* channel index */
+       u32 ce_count;           /* Correctable Errors for this CHANNEL */
+       char label[EDAC_MC_LABEL_LEN + 1];      /* DIMM label on motherboard */
+       struct csrow_info *csrow;       /* the parent */
+};
+
+struct csrow_info {
+       unsigned long first_page;       /* first page number in dimm */
+       unsigned long last_page;        /* last page number in dimm */
+       unsigned long page_mask;        /* used for interleaving -
+                                        * 0UL for non intlv
+                                        */
+       u32 nr_pages;           /* number of pages in csrow */
+       u32 grain;              /* granularity of reported error in bytes */
+       int csrow_idx;          /* the chip-select row */
+       enum dev_type dtype;    /* memory device type */
+       u32 ue_count;           /* Uncorrectable Errors for this csrow */
+       u32 ce_count;           /* Correctable Errors for this csrow */
+       enum mem_type mtype;    /* memory csrow type */
+       enum edac_type edac_mode;       /* EDAC mode for this csrow */
+       struct mem_ctl_info *mci;       /* the parent */
+
+       struct kobject kobj;    /* sysfs kobject for this csrow */
+
+       /* channel information for this csrow */
+       u32 nr_channels;
+       struct channel_info *channels;
+};
+
+struct mcidev_sysfs_group {
+       const char *name;                               /* group name */
+       const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
+};
+
+struct mcidev_sysfs_group_kobj {
+       struct list_head list;          /* list for all instances within a mc */
+
+       struct kobject kobj;            /* kobj for the group */
+
+       const struct mcidev_sysfs_group *grp;   /* group description table */
+       struct mem_ctl_info *mci;       /* the parent */
+};
+
+/* mcidev_sysfs_attribute structure
+ *     used for driver sysfs attributes and in mem_ctl_info
+ *     sysfs top level entries
+ */
+struct mcidev_sysfs_attribute {
+       /* It should use either attr or grp */
+       struct attribute attr;
+       const struct mcidev_sysfs_group *grp;   /* Points to a group of attributes */
+
+       /* Ops for show/store values at the attribute - not used on group */
+        ssize_t (*show)(struct mem_ctl_info *,char *);
+        ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
+};
+
+/* MEMORY controller information structure
+ */
+struct mem_ctl_info {
+       struct list_head link;  /* for global list of mem_ctl_info structs */
+
+       struct module *owner;   /* Module owner of this control struct */
+
+       unsigned long mtype_cap;        /* memory types supported by mc */
+       unsigned long edac_ctl_cap;     /* Mem controller EDAC capabilities */
+       unsigned long edac_cap; /* configuration capabilities - this is
+                                * closely related to edac_ctl_cap.  The
+                                * difference is that the controller may be
+                                * capable of s4ecd4ed which would be listed
+                                * in edac_ctl_cap, but if channels aren't
+                                * capable of s4ecd4ed then the edac_cap would
+                                * not have that capability.
+                                */
+       unsigned long scrub_cap;        /* chipset scrub capabilities */
+       enum scrub_type scrub_mode;     /* current scrub mode */
+
+       /* Translates sdram memory scrub rate given in bytes/sec to the
+          internal representation and configures whatever else needs
+          to be configured.
+        */
+       int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
+
+       /* Get the current sdram memory scrub rate from the internal
+          representation and converts it to the closest matching
+          bandwidth in bytes/sec.
+        */
+       int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
+
+
+       /* pointer to edac checking routine */
+       void (*edac_check) (struct mem_ctl_info * mci);
+
+       /*
+        * Remaps memory pages: controller pages to physical pages.
+        * For most MC's, this will be NULL.
+        */
+       /* FIXME - why not send the phys page to begin with? */
+       unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
+                                          unsigned long page);
+       int mc_idx;
+       int nr_csrows;
+       struct csrow_info *csrows;
+       /*
+        * FIXME - what about controllers on other busses? - IDs must be
+        * unique.  dev pointer should be sufficiently unique, but
+        * BUS:SLOT.FUNC numbers may not be unique.
+        */
+       struct device *dev;
+       const char *mod_name;
+       const char *mod_ver;
+       const char *ctl_name;
+       const char *dev_name;
+       char proc_name[MC_PROC_NAME_MAX_LEN + 1];
+       void *pvt_info;
+       u32 ue_noinfo_count;    /* Uncorrectable Errors w/o info */
+       u32 ce_noinfo_count;    /* Correctable Errors w/o info */
+       u32 ue_count;           /* Total Uncorrectable Errors for this MC */
+       u32 ce_count;           /* Total Correctable Errors for this MC */
+       unsigned long start_time;       /* mci load start time (in jiffies) */
+
+       struct completion complete;
+
+       /* edac sysfs device control */
+       struct kobject edac_mci_kobj;
+
+       /* list for all grp instances within a mc */
+       struct list_head grp_kobj_list;
+
+       /* Additional top controller level attributes, but specified
+        * by the low level driver.
+        *
+        * Set by the low level driver to provide attributes at the
+        * controller level, same level as 'ue_count' and 'ce_count' above.
+        * An array of structures, NULL terminated
+        *
+        * If attributes are desired, then set to array of attributes
+        * If no attributes are desired, leave NULL
+        */
+       const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
+
+       /* work struct for this MC */
+       struct delayed_work work;
+
+       /* the internal state of this controller instance */
+       int op_state;
+};
+
 #endif
diff --git a/include/linux/edac_mce.h b/include/linux/edac_mce.h
deleted file mode 100644 (file)
index f974fc0..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Provides edac interface to mcelog events
- *
- * This file may be distributed under the terms of the
- * GNU General Public License version 2.
- *
- * Copyright (c) 2009 by:
- *      Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * Red Hat Inc. http://www.redhat.com
- */
-
-#if defined(CONFIG_EDAC_MCE) || \
-                       (defined(CONFIG_EDAC_MCE_MODULE) && defined(MODULE))
-
-#include <asm/mce.h>
-#include <linux/list.h>
-
-struct edac_mce {
-       struct list_head list;
-
-       void *priv;
-       int (*check_error)(void *priv, struct mce *mce);
-};
-
-int edac_mce_register(struct edac_mce *edac_mce);
-void edac_mce_unregister(struct edac_mce *edac_mce);
-int edac_mce_parse(struct mce *mce);
-
-#else
-#define edac_mce_parse(mce) (0)
-#endif
index 53792bf36c715d4c7f16c08a4a71a02ebda0eefd..ce1b719e8bd467f7a82ed8c7b4175665b7d8ffc5 100644 (file)
@@ -197,8 +197,8 @@ struct ext2_group_desc
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
-                          EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\
-                          EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\
+                          EXT2_SYNC_FL | EXT2_NODUMP_FL |\
+                          EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\
                           EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
                           EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
 
index 81965cce6bfafb96e146e0b0f67ad473aa0bf455..dec99116a0e441dd2becf054a65206d02587553a 100644 (file)
@@ -180,8 +180,8 @@ struct ext3_group_desc
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
-                          EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
-                          EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+                          EXT3_SYNC_FL | EXT3_NODUMP_FL |\
+                          EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
                           EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
                           EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
 
@@ -381,7 +381,7 @@ struct ext3_inode {
  * Mount flags
  */
 #define EXT3_MOUNT_CHECK               0x00001 /* Do mount-time checks */
-#define EXT3_MOUNT_OLDALLOC            0x00002  /* Don't use the new Orlov allocator */
+/* EXT3_MOUNT_OLDALLOC was there */
 #define EXT3_MOUNT_GRPID               0x00004 /* Create files with directory's group */
 #define EXT3_MOUNT_DEBUG               0x00008 /* Some debugging messages */
 #define EXT3_MOUNT_ERRORS_CONT         0x00010 /* Continue on errors */
index 258088ab3c6bab4e3bad8ce5b5c987532dc9a90d..64365252f1b031c0f1ed9a094dcbc3468f51cc7a 100644 (file)
@@ -76,10 +76,6 @@ struct ext3_sb_info {
        struct mutex s_resize_lock;
        unsigned long s_commit_interval;
        struct block_device *journal_bdev;
-#ifdef CONFIG_JBD_DEBUG
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
-#endif
 #ifdef CONFIG_QUOTA
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
index 7a049fd2aa4cca0860f9dd51f7a317d7fe3b58d5..0c4df261af7e6a32d5b5c80633fa6a643e2d6285 100644 (file)
@@ -768,14 +768,25 @@ struct inode {
 
        /* Stat data, not accessed from path walking */
        unsigned long           i_ino;
-       unsigned int            i_nlink;
+       /*
+        * Filesystems may only read i_nlink directly.  They shall use the
+        * following functions for modification:
+        *
+        *    (set|clear|inc|drop)_nlink
+        *    inode_(inc|dec)_link_count
+        */
+       union {
+               const unsigned int i_nlink;
+               unsigned int __i_nlink;
+       };
        dev_t                   i_rdev;
-       loff_t                  i_size;
        struct timespec         i_atime;
        struct timespec         i_mtime;
        struct timespec         i_ctime;
-       unsigned int            i_blkbits;
+       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
+       unsigned short          i_bytes;
        blkcnt_t                i_blocks;
+       loff_t                  i_size;
 
 #ifdef __NEED_I_SIZE_ORDERED
        seqcount_t              i_size_seqcount;
@@ -783,7 +794,6 @@ struct inode {
 
        /* Misc */
        unsigned long           i_state;
-       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
        struct mutex            i_mutex;
 
        unsigned long           dirtied_when;   /* jiffies of first dirtying */
@@ -797,9 +807,10 @@ struct inode {
                struct rcu_head         i_rcu;
        };
        atomic_t                i_count;
+       unsigned int            i_blkbits;
        u64                     i_version;
-       unsigned short          i_bytes;
        atomic_t                i_dio_count;
+       atomic_t                i_writecount;
        const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
        struct file_lock        *i_flock;
        struct address_space    i_data;
@@ -823,7 +834,6 @@ struct inode {
 #ifdef CONFIG_IMA
        atomic_t                i_readcount; /* struct files open RO */
 #endif
-       atomic_t                i_writecount;
        void                    *i_private; /* fs or device private pointer */
 };
 
@@ -1754,6 +1764,19 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
        __mark_inode_dirty(inode, I_DIRTY_SYNC);
 }
 
+/**
+ * set_nlink - directly set an inode's link count
+ * @inode: inode
+ * @nlink: new nlink (should be non-zero)
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.
+ */
+static inline void set_nlink(struct inode *inode, unsigned int nlink)
+{
+       inode->__i_nlink = nlink;
+}
+
 /**
  * inc_nlink - directly increment an inode's link count
  * @inode: inode
@@ -1764,7 +1787,7 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
  */
 static inline void inc_nlink(struct inode *inode)
 {
-       inode->i_nlink++;
+       inode->__i_nlink++;
 }
 
 static inline void inode_inc_link_count(struct inode *inode)
@@ -1786,7 +1809,7 @@ static inline void inode_inc_link_count(struct inode *inode)
  */
 static inline void drop_nlink(struct inode *inode)
 {
-       inode->i_nlink--;
+       inode->__i_nlink--;
 }
 
 /**
@@ -1799,7 +1822,7 @@ static inline void drop_nlink(struct inode *inode)
  */
 static inline void clear_nlink(struct inode *inode)
 {
-       inode->i_nlink = 0;
+       inode->__i_nlink = 0;
 }
 
 static inline void inode_dec_link_count(struct inode *inode)
index 8390efc457ebd461d4568e850e8e6b82019dc4f5..08a2fee40659667819d01bc95a184e059d0fd9b1 100644 (file)
 
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/device.h>
 
 /* hwspinlock mode argument */
 #define HWLOCK_IRQSTATE        0x01    /* Disable interrupts, save state */
 #define HWLOCK_IRQ     0x02    /* Disable interrupts, don't save state */
 
 struct hwspinlock;
+struct hwspinlock_device;
+struct hwspinlock_ops;
+
+/**
+ * struct hwspinlock_pdata - platform data for hwspinlock drivers
+ * @base_id: base id for this hwspinlock device
+ *
+ * hwspinlock devices provide system-wide hardware locks that are used
+ * by remote processors that have no other way to achieve synchronization.
+ *
+ * To achieve that, each physical lock must have a system-wide id number
+ * that is agreed upon, otherwise remote processors can't possibly assume
+ * they're using the same hardware lock.
+ *
+ * Usually boards have a single hwspinlock device, which provides several
+ * hwspinlocks, and in this case, they can be trivially numbered 0 to
+ * (num-of-locks - 1).
+ *
+ * In case boards have several hwspinlocks devices, a different base id
+ * should be used for each hwspinlock device (they can't all use 0 as
+ * a starting id!).
+ *
+ * This platform data structure should be used to provide the base id
+ * for each device (which is trivially 0 when only a single hwspinlock
+ * device exists). It can be shared between different platforms, hence
+ * its location.
+ */
+struct hwspinlock_pdata {
+       int base_id;
+};
 
 #if defined(CONFIG_HWSPINLOCK) || defined(CONFIG_HWSPINLOCK_MODULE)
 
-int hwspin_lock_register(struct hwspinlock *lock);
-struct hwspinlock *hwspin_lock_unregister(unsigned int id);
+int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+               const struct hwspinlock_ops *ops, int base_id, int num_locks);
+int hwspin_lock_unregister(struct hwspinlock_device *bank);
 struct hwspinlock *hwspin_lock_request(void);
 struct hwspinlock *hwspin_lock_request_specific(unsigned int id);
 int hwspin_lock_free(struct hwspinlock *hwlock);
@@ -94,16 +126,6 @@ static inline int hwspin_lock_get_id(struct hwspinlock *hwlock)
        return 0;
 }
 
-static inline int hwspin_lock_register(struct hwspinlock *hwlock)
-{
-       return -ENODEV;
-}
-
-static inline struct hwspinlock *hwspin_lock_unregister(unsigned int id)
-{
-       return NULL;
-}
-
 #endif /* !CONFIG_HWSPINLOCK */
 
 /**
index 6427d298fbfc7d319321d8b9c287fc5761f9a2a4..530e11ba07387bfa7da5c32c800e341a9f1c4d7d 100644 (file)
@@ -129,6 +129,10 @@ enum sample_type {
 #define REG_BCICTL2             0x024
 #define TWL4030_BCI_ITHSENS    0x007
 
+/* Register and bits for GPBR1 register */
+#define TWL4030_REG_GPBR1              0x0c
+#define TWL4030_GPBR1_MADC_HFCLK_EN    (1 << 7)
+
 struct twl4030_madc_user_parms {
        int channel;
        int average;
index e6a5e34bed4fe64df66592371848e9c66270c569..c7acdde3243d6886391856c6ccd62261a4f8b9cf 100644 (file)
@@ -244,6 +244,7 @@ typedef struct journal_superblock_s
 
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/jbd_common.h>
 
 #define J_ASSERT(assert)       BUG_ON(!(assert))
 
@@ -270,69 +271,6 @@ typedef struct journal_superblock_s
 #define J_EXPECT_JH(jh, expr, why...)  __journal_expect(expr, ## why)
 #endif
 
-enum jbd_state_bits {
-       BH_JBD                  /* Has an attached ext3 journal_head */
-         = BH_PrivateStart,
-       BH_JWrite,              /* Being written to log (@@@ DEBUGGING) */
-       BH_Freed,               /* Has been freed (truncated) */
-       BH_Revoked,             /* Has been revoked from the log */
-       BH_RevokeValid,         /* Revoked flag is valid */
-       BH_JBDDirty,            /* Is dirty but journaled */
-       BH_State,               /* Pins most journal_head state */
-       BH_JournalHead,         /* Pins bh->b_private and jh->b_bh */
-       BH_Unshadow,            /* Dummy bit, for BJ_Shadow wakeup filtering */
-};
-
-BUFFER_FNS(JBD, jbd)
-BUFFER_FNS(JWrite, jwrite)
-BUFFER_FNS(JBDDirty, jbddirty)
-TAS_BUFFER_FNS(JBDDirty, jbddirty)
-BUFFER_FNS(Revoked, revoked)
-TAS_BUFFER_FNS(Revoked, revoked)
-BUFFER_FNS(RevokeValid, revokevalid)
-TAS_BUFFER_FNS(RevokeValid, revokevalid)
-BUFFER_FNS(Freed, freed)
-
-static inline struct buffer_head *jh2bh(struct journal_head *jh)
-{
-       return jh->b_bh;
-}
-
-static inline struct journal_head *bh2jh(struct buffer_head *bh)
-{
-       return bh->b_private;
-}
-
-static inline void jbd_lock_bh_state(struct buffer_head *bh)
-{
-       bit_spin_lock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_trylock_bh_state(struct buffer_head *bh)
-{
-       return bit_spin_trylock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
-{
-       return bit_spin_is_locked(BH_State, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_state(struct buffer_head *bh)
-{
-       bit_spin_unlock(BH_State, &bh->b_state);
-}
-
-static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
-{
-       bit_spin_lock(BH_JournalHead, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
-{
-       bit_spin_unlock(BH_JournalHead, &bh->b_state);
-}
-
 struct jbd_revoke_table_s;
 
 /**
index 38f307b8c3342c4bef86ac3366c785696df54cce..2092ea21e469eeeaa415a885ead161fed0ac7d38 100644 (file)
@@ -275,6 +275,7 @@ typedef struct journal_superblock_s
 
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/jbd_common.h>
 
 #define J_ASSERT(assert)       BUG_ON(!(assert))
 
@@ -302,70 +303,6 @@ typedef struct journal_superblock_s
 #define J_EXPECT_JH(jh, expr, why...)  __journal_expect(expr, ## why)
 #endif
 
-enum jbd_state_bits {
-       BH_JBD                  /* Has an attached ext3 journal_head */
-         = BH_PrivateStart,
-       BH_JWrite,              /* Being written to log (@@@ DEBUGGING) */
-       BH_Freed,               /* Has been freed (truncated) */
-       BH_Revoked,             /* Has been revoked from the log */
-       BH_RevokeValid,         /* Revoked flag is valid */
-       BH_JBDDirty,            /* Is dirty but journaled */
-       BH_State,               /* Pins most journal_head state */
-       BH_JournalHead,         /* Pins bh->b_private and jh->b_bh */
-       BH_Unshadow,            /* Dummy bit, for BJ_Shadow wakeup filtering */
-       BH_JBDPrivateStart,     /* First bit available for private use by FS */
-};
-
-BUFFER_FNS(JBD, jbd)
-BUFFER_FNS(JWrite, jwrite)
-BUFFER_FNS(JBDDirty, jbddirty)
-TAS_BUFFER_FNS(JBDDirty, jbddirty)
-BUFFER_FNS(Revoked, revoked)
-TAS_BUFFER_FNS(Revoked, revoked)
-BUFFER_FNS(RevokeValid, revokevalid)
-TAS_BUFFER_FNS(RevokeValid, revokevalid)
-BUFFER_FNS(Freed, freed)
-
-static inline struct buffer_head *jh2bh(struct journal_head *jh)
-{
-       return jh->b_bh;
-}
-
-static inline struct journal_head *bh2jh(struct buffer_head *bh)
-{
-       return bh->b_private;
-}
-
-static inline void jbd_lock_bh_state(struct buffer_head *bh)
-{
-       bit_spin_lock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_trylock_bh_state(struct buffer_head *bh)
-{
-       return bit_spin_trylock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
-{
-       return bit_spin_is_locked(BH_State, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_state(struct buffer_head *bh)
-{
-       bit_spin_unlock(BH_State, &bh->b_state);
-}
-
-static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
-{
-       bit_spin_lock(BH_JournalHead, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
-{
-       bit_spin_unlock(BH_JournalHead, &bh->b_state);
-}
-
 /* Flags in jbd_inode->i_flags */
 #define __JI_COMMIT_RUNNING 0
 /* Commit of the inode data in progress. We use this flag to protect us from
@@ -1106,9 +1043,9 @@ static inline handle_t *journal_current_handle(void)
  */
 
 extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
-extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
+extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask);
 extern int      jbd2_journal_restart(handle_t *, int nblocks);
-extern int      jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
+extern int      jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
 extern int      jbd2_journal_extend (handle_t *, int nblocks);
 extern int      jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int      jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
new file mode 100644 (file)
index 0000000..6230f85
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef _LINUX_JBD_STATE_H
+#define _LINUX_JBD_STATE_H
+
+enum jbd_state_bits {
+       BH_JBD                  /* Has an attached ext3 journal_head */
+         = BH_PrivateStart,
+       BH_JWrite,              /* Being written to log (@@@ DEBUGGING) */
+       BH_Freed,               /* Has been freed (truncated) */
+       BH_Revoked,             /* Has been revoked from the log */
+       BH_RevokeValid,         /* Revoked flag is valid */
+       BH_JBDDirty,            /* Is dirty but journaled */
+       BH_State,               /* Pins most journal_head state */
+       BH_JournalHead,         /* Pins bh->b_private and jh->b_bh */
+       BH_Unshadow,            /* Dummy bit, for BJ_Shadow wakeup filtering */
+       BH_JBDPrivateStart,     /* First bit available for private use by FS */
+};
+
+BUFFER_FNS(JBD, jbd)
+BUFFER_FNS(JWrite, jwrite)
+BUFFER_FNS(JBDDirty, jbddirty)
+TAS_BUFFER_FNS(JBDDirty, jbddirty)
+BUFFER_FNS(Revoked, revoked)
+TAS_BUFFER_FNS(Revoked, revoked)
+BUFFER_FNS(RevokeValid, revokevalid)
+TAS_BUFFER_FNS(RevokeValid, revokevalid)
+BUFFER_FNS(Freed, freed)
+
+static inline struct buffer_head *jh2bh(struct journal_head *jh)
+{
+       return jh->b_bh;
+}
+
+static inline struct journal_head *bh2jh(struct buffer_head *bh)
+{
+       return bh->b_private;
+}
+
+static inline void jbd_lock_bh_state(struct buffer_head *bh)
+{
+       bit_spin_lock(BH_State, &bh->b_state);
+}
+
+static inline int jbd_trylock_bh_state(struct buffer_head *bh)
+{
+       return bit_spin_trylock(BH_State, &bh->b_state);
+}
+
+static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
+{
+       return bit_spin_is_locked(BH_State, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_state(struct buffer_head *bh)
+{
+       bit_spin_unlock(BH_State, &bh->b_state);
+}
+
+static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+{
+       bit_spin_lock(BH_JournalHead, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+{
+       bit_spin_unlock(BH_JournalHead, &bh->b_state);
+}
+
+#endif
index 1e5df2af8d845c6a2ff63115245c88cba4a95cdc..2d4beab0d5b7270a96bbff233457b0cb3ae095d1 100644 (file)
 #define ANON_INODE_FS_MAGIC    0x09041934
 #define PSTOREFS_MAGIC         0x6165676C
 
-#define MINIX_SUPER_MAGIC      0x137F          /* original minix fs */
-#define MINIX_SUPER_MAGIC2     0x138F          /* minix fs, 30 char names */
-#define MINIX2_SUPER_MAGIC     0x2468          /* minix V2 fs */
-#define MINIX2_SUPER_MAGIC2    0x2478          /* minix V2 fs, 30 char names */
-#define MINIX3_SUPER_MAGIC     0x4d5a          /* minix V3 fs */
+#define MINIX_SUPER_MAGIC      0x137F          /* minix v1 fs, 14 char names */
+#define MINIX_SUPER_MAGIC2     0x138F          /* minix v1 fs, 30 char names */
+#define MINIX2_SUPER_MAGIC     0x2468          /* minix v2 fs, 14 char names */
+#define MINIX2_SUPER_MAGIC2    0x2478          /* minix v2 fs, 30 char names */
+#define MINIX3_SUPER_MAGIC     0x4d5a          /* minix v3 fs, 60 char names */
 
 #define MSDOS_SUPER_MAGIC      0x4d44          /* MD */
 #define NCP_SUPER_MAGIC                0x564c          /* Guess, what 0x564c is :-) */
index ac797fa03ef83503668c0cf56c16e18004bc50c4..b87068a1a09ef84ece916a4a722ebf70e921d1c6 100644 (file)
@@ -78,8 +78,8 @@ extern void mem_cgroup_uncharge_end(void);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 
-extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
-int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
+extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask);
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -88,26 +88,28 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
        rcu_read_lock();
-       mem = mem_cgroup_from_task(rcu_dereference((mm)->owner));
+       memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
        rcu_read_unlock();
-       return cgroup == mem;
+       return cgroup == memcg;
 }
 
-extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
+extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
 extern int
 mem_cgroup_prepare_migration(struct page *page,
        struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
-extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
+extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        struct page *oldpage, struct page *newpage, bool migration_ok);
 
 /*
  * For memory reclaim.
  */
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
+                                   struct zone *zone);
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
+                                   struct zone *zone);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
                                        int nid, int zid, unsigned int lrumask);
@@ -148,7 +150,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                                                gfp_t gfp_mask,
                                                unsigned long *total_scanned);
-u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
+u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -244,18 +246,20 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm
        return NULL;
 }
 
-static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
+static inline int mm_match_cgroup(struct mm_struct *mm,
+               struct mem_cgroup *memcg)
 {
        return 1;
 }
 
 static inline int task_in_mem_cgroup(struct task_struct *task,
-                                    const struct mem_cgroup *mem)
+                                    const struct mem_cgroup *memcg)
 {
        return 1;
 }
 
-static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+static inline struct cgroup_subsys_state
+               *mem_cgroup_css(struct mem_cgroup *memcg)
 {
        return NULL;
 }
@@ -267,22 +271,22 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
        return 0;
 }
 
-static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
+static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
                struct page *oldpage, struct page *newpage, bool migration_ok)
 {
 }
 
-static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
+static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
 {
        return 0;
 }
 
-static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
+static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *memcg,
                                                int priority)
 {
 }
 
-static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
+static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *memcg,
                                                int priority)
 {
 }
@@ -293,13 +297,13 @@ static inline bool mem_cgroup_disabled(void)
 }
 
 static inline int
-mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
+mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        return 1;
 }
 
 static inline int
-mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        return 1;
 }
@@ -348,7 +352,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 }
 
 static inline
-u64 mem_cgroup_get_limit(struct mem_cgroup *mem)
+u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
        return 0;
 }
diff --git a/include/linux/mfd/ab5500/ab5500.h b/include/linux/mfd/ab5500/ab5500.h
new file mode 100644 (file)
index 0000000..a720051
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) ST-Ericsson 2011
+ *
+ * License Terms: GNU General Public License v2
+ */
+#ifndef MFD_AB5500_H
+#define MFD_AB5500_H
+
+#include <linux/device.h>
+
+enum ab5500_devid {
+       AB5500_DEVID_ADC,
+       AB5500_DEVID_LEDS,
+       AB5500_DEVID_POWER,
+       AB5500_DEVID_REGULATORS,
+       AB5500_DEVID_SIM,
+       AB5500_DEVID_RTC,
+       AB5500_DEVID_CHARGER,
+       AB5500_DEVID_FUELGAUGE,
+       AB5500_DEVID_VIBRATOR,
+       AB5500_DEVID_CODEC,
+       AB5500_DEVID_USB,
+       AB5500_DEVID_OTP,
+       AB5500_DEVID_VIDEO,
+       AB5500_DEVID_DBIECI,
+       AB5500_DEVID_ONSWA,
+       AB5500_NUM_DEVICES,
+};
+
+enum ab5500_banks {
+       AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP = 0,
+       AB5500_BANK_VDDDIG_IO_I2C_CLK_TST = 1,
+       AB5500_BANK_VDENC = 2,
+       AB5500_BANK_SIM_USBSIM  = 3,
+       AB5500_BANK_LED = 4,
+       AB5500_BANK_ADC  = 5,
+       AB5500_BANK_RTC  = 6,
+       AB5500_BANK_STARTUP  = 7,
+       AB5500_BANK_DBI_ECI  = 8,
+       AB5500_BANK_CHG  = 9,
+       AB5500_BANK_FG_BATTCOM_ACC = 10,
+       AB5500_BANK_USB = 11,
+       AB5500_BANK_IT = 12,
+       AB5500_BANK_VIBRA = 13,
+       AB5500_BANK_AUDIO_HEADSETUSB = 14,
+       AB5500_NUM_BANKS = 15,
+};
+
+enum ab5500_banks_addr {
+       AB5500_ADDR_VIT_IO_I2C_CLK_TST_OTP = 0x4A,
+       AB5500_ADDR_VDDDIG_IO_I2C_CLK_TST = 0x4B,
+       AB5500_ADDR_VDENC = 0x06,
+       AB5500_ADDR_SIM_USBSIM  = 0x04,
+       AB5500_ADDR_LED = 0x10,
+       AB5500_ADDR_ADC  = 0x0A,
+       AB5500_ADDR_RTC  = 0x0F,
+       AB5500_ADDR_STARTUP  = 0x03,
+       AB5500_ADDR_DBI_ECI  = 0x07,
+       AB5500_ADDR_CHG  = 0x0B,
+       AB5500_ADDR_FG_BATTCOM_ACC = 0x0C,
+       AB5500_ADDR_USB = 0x05,
+       AB5500_ADDR_IT = 0x0E,
+       AB5500_ADDR_VIBRA = 0x02,
+       AB5500_ADDR_AUDIO_HEADSETUSB = 0x0D,
+};
+
+/*
+ * Interrupt register offsets
+ * Bank : 0x0E
+ */
+#define AB5500_IT_SOURCE0_REG          0x20
+#define AB5500_IT_SOURCE1_REG          0x21
+#define AB5500_IT_SOURCE2_REG          0x22
+#define AB5500_IT_SOURCE3_REG          0x23
+#define AB5500_IT_SOURCE4_REG          0x24
+#define AB5500_IT_SOURCE5_REG          0x25
+#define AB5500_IT_SOURCE6_REG          0x26
+#define AB5500_IT_SOURCE7_REG          0x27
+#define AB5500_IT_SOURCE8_REG          0x28
+#define AB5500_IT_SOURCE9_REG          0x29
+#define AB5500_IT_SOURCE10_REG         0x2A
+#define AB5500_IT_SOURCE11_REG         0x2B
+#define AB5500_IT_SOURCE12_REG         0x2C
+#define AB5500_IT_SOURCE13_REG         0x2D
+#define AB5500_IT_SOURCE14_REG         0x2E
+#define AB5500_IT_SOURCE15_REG         0x2F
+#define AB5500_IT_SOURCE16_REG         0x30
+#define AB5500_IT_SOURCE17_REG         0x31
+#define AB5500_IT_SOURCE18_REG         0x32
+#define AB5500_IT_SOURCE19_REG         0x33
+#define AB5500_IT_SOURCE20_REG         0x34
+#define AB5500_IT_SOURCE21_REG         0x35
+#define AB5500_IT_SOURCE22_REG         0x36
+#define AB5500_IT_SOURCE23_REG         0x37
+
+#define AB5500_NUM_IRQ_REGS            23
+
+/**
+ * struct ab5500
+ * @access_mutex: lock out concurrent accesses to the AB registers
+ * @dev: a pointer to the device struct for this chip driver
+ * @ab5500_irq: the analog baseband irq
+ * @irq_base: the platform configuration irq base for subdevices
+ * @chip_name: name of this chip variant
+ * @chip_id: 8 bit chip ID for this chip variant
+ * @irq_lock: a lock to protect the mask
+ * @abb_events: a local bit mask of the prcmu wakeup events
+ * @event_mask: a local copy of the mask event registers
+ * @last_event_mask: a copy of the last event_mask written to hardware
+ * @startup_events: a copy of the first reading of the event registers
+ * @startup_events_read: whether the first events have been read
+ */
+struct ab5500 {
+       struct mutex access_mutex;
+       struct device *dev;
+       unsigned int ab5500_irq;
+       unsigned int irq_base;
+       char chip_name[32];
+       u8 chip_id;
+       struct mutex irq_lock;
+       u32 abb_events;
+       u8 mask[AB5500_NUM_IRQ_REGS];
+       u8 oldmask[AB5500_NUM_IRQ_REGS];
+       u8 startup_events[AB5500_NUM_IRQ_REGS];
+       bool startup_events_read;
+#ifdef CONFIG_DEBUG_FS
+       unsigned int debug_bank;
+       unsigned int debug_address;
+#endif
+};
+
+struct ab5500_platform_data {
+       struct {unsigned int base; unsigned int count; } irq;
+       void *dev_data[AB5500_NUM_DEVICES];
+       struct abx500_init_settings *init_settings;
+       unsigned int init_settings_sz;
+       bool pm_power_off;
+};
+
+#endif /* MFD_AB5500_H */
index 46b954011f1685112401d7f98332064441f333b5..252966769d939fae571aa6dc260b86e1280bfc2f 100644 (file)
@@ -27,6 +27,9 @@
 struct ab8500_gpadc;
 
 struct ab8500_gpadc *ab8500_gpadc_get(char *name);
-int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 input);
+int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel);
+int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel);
+int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc,
+    u8 channel, int ad_value);
 
 #endif /* _AB8500_GPADC_H */
index 896b5e47f16ec5a9ab0ef962de083e85ddbb0d19..9970337ff0412ca255b89a72564f3ebb96da966e 100644 (file)
@@ -6,7 +6,7 @@
  *
  * ABX500 core access functions.
  * The abx500 interface is used for the Analog Baseband chip
- * ab3100, ab3550, ab5500, and ab8500.
+ * ab3100, ab5500, and ab8500.
  *
  * Author: Mattias Wallin <mattias.wallin@stericsson.com>
  * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
 #define AB3100_P1G     0xc6
 #define AB3100_R2A     0xc7
 #define AB3100_R2B     0xc8
-#define AB3550_P1A     0x10
 #define AB5500_1_0     0x20
-#define AB5500_2_0     0x21
-#define AB5500_2_1     0x22
+#define AB5500_1_1     0x21
+#define AB5500_2_0     0x24
 
 /* AB8500 CIDs*/
-#define AB8500_CUTEARLY        0x00
 #define AB8500_CUT1P0  0x10
 #define AB8500_CUT1P1  0x11
 #define AB8500_CUT2P0  0x20
 #define AB8500_CUT3P0  0x30
+#define AB8500_CUT3P3  0x33
 
 /*
  * AB3100, EVENTA1, A2 and A3 event register flags
@@ -143,39 +142,6 @@ int ab3100_event_register(struct ab3100 *ab3100,
 int ab3100_event_unregister(struct ab3100 *ab3100,
                            struct notifier_block *nb);
 
-/* AB3550, STR register flags */
-#define AB3550_STR_ONSWA                               (0x01)
-#define AB3550_STR_ONSWB                               (0x02)
-#define AB3550_STR_ONSWC                               (0x04)
-#define AB3550_STR_DCIO                                        (0x08)
-#define AB3550_STR_BOOT_MODE                           (0x10)
-#define AB3550_STR_SIM_OFF                             (0x20)
-#define AB3550_STR_BATT_REMOVAL                                (0x40)
-#define AB3550_STR_VBUS                                        (0x80)
-
-/* Interrupt mask registers */
-#define AB3550_IMR1 0x29
-#define AB3550_IMR2 0x2a
-#define AB3550_IMR3 0x2b
-#define AB3550_IMR4 0x2c
-#define AB3550_IMR5 0x2d
-
-enum ab3550_devid {
-       AB3550_DEVID_ADC,
-       AB3550_DEVID_DAC,
-       AB3550_DEVID_LEDS,
-       AB3550_DEVID_POWER,
-       AB3550_DEVID_REGULATORS,
-       AB3550_DEVID_SIM,
-       AB3550_DEVID_UART,
-       AB3550_DEVID_RTC,
-       AB3550_DEVID_CHARGER,
-       AB3550_DEVID_FUELGAUGE,
-       AB3550_DEVID_VIBRATOR,
-       AB3550_DEVID_CODEC,
-       AB3550_NUM_DEVICES,
-};
-
 /**
  * struct abx500_init_setting
  * Initial value of the registers for driver to use during setup.
@@ -186,18 +152,6 @@ struct abx500_init_settings {
        u8 setting;
 };
 
-/**
- * struct ab3550_platform_data
- * Data supplied to initialize board connections to the AB3550
- */
-struct ab3550_platform_data {
-       struct {unsigned int base; unsigned int count; } irq;
-       void *dev_data[AB3550_NUM_DEVICES];
-       size_t dev_data_sz[AB3550_NUM_DEVICES];
-       struct abx500_init_settings *init_settings;
-       unsigned int init_settings_sz;
-};
-
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
        u8 value);
 int abx500_get_register_interruptible(struct device *dev, u8 bank, u8 reg,
index f0977986402c053c38bd71fdc4558452641de5e6..9890687f582de0c36cdbcc56cf252a381e82e8ee 100644 (file)
@@ -5,21 +5,35 @@
  *
  * U5500 PRCMU API.
  */
-#ifndef __MACH_PRCMU_U5500_H
-#define __MACH_PRCMU_U5500_H
+#ifndef __MFD_DB5500_PRCMU_H
+#define __MFD_DB5500_PRCMU_H
 
-#ifdef CONFIG_UX500_SOC_DB5500
+#ifdef CONFIG_MFD_DB5500_PRCMU
 
 void db5500_prcmu_early_init(void);
-
+int db5500_prcmu_set_epod(u16 epod_id, u8 epod_state);
+int db5500_prcmu_set_display_clocks(void);
+int db5500_prcmu_disable_dsipll(void);
+int db5500_prcmu_enable_dsipll(void);
 int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
 int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
+void db5500_prcmu_enable_wakeups(u32 wakeups);
+int db5500_prcmu_request_clock(u8 clock, bool enable);
+void db5500_prcmu_config_abb_event_readout(u32 abb_events);
+void db5500_prcmu_get_abb_event_buffer(void __iomem **buf);
+int prcmu_resetout(u8 resoutn, u8 state);
+int db5500_prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+       bool keep_ap_pll);
+int db5500_prcmu_config_esram0_deep_sleep(u8 state);
+void db5500_prcmu_system_reset(u16 reset_code);
+u16 db5500_prcmu_get_reset_code(void);
+bool db5500_prcmu_is_ac_wake_requested(void);
+int db5500_prcmu_set_arm_opp(u8 opp);
+int db5500_prcmu_get_arm_opp(void);
 
 #else /* !CONFIG_UX500_SOC_DB5500 */
 
-static inline void db5500_prcmu_early_init(void)
-{
-}
+static inline void db5500_prcmu_early_init(void) {}
 
 static inline int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
 {
@@ -31,15 +45,75 @@ static inline int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
        return -ENOSYS;
 }
 
-#endif /* CONFIG_UX500_SOC_DB5500 */
+static inline int db5500_prcmu_request_clock(u8 clock, bool enable)
+{
+       return 0;
+}
+
+static inline int db5500_prcmu_set_display_clocks(void)
+{
+       return 0;
+}
+
+static inline int db5500_prcmu_disable_dsipll(void)
+{
+       return 0;
+}
+
+static inline int db5500_prcmu_enable_dsipll(void)
+{
+       return 0;
+}
 
-static inline int db5500_prcmu_config_abb_event_readout(u32 abb_events)
+static inline int db5500_prcmu_config_esram0_deep_sleep(u8 state)
 {
-#ifdef CONFIG_MACH_U5500_SIMULATOR
        return 0;
-#else
-       return -1;
-#endif
 }
 
-#endif /* __MACH_PRCMU_U5500_H */
+static inline void db5500_prcmu_enable_wakeups(u32 wakeups) {}
+
+static inline int prcmu_resetout(u8 resoutn, u8 state)
+{
+       return 0;
+}
+
+static inline int db5500_prcmu_set_epod(u16 epod_id, u8 epod_state)
+{
+       return 0;
+}
+
+static inline void db5500_prcmu_get_abb_event_buffer(void __iomem **buf) {}
+static inline void db5500_prcmu_config_abb_event_readout(u32 abb_events) {}
+
+static inline int db5500_prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+       bool keep_ap_pll)
+{
+       return 0;
+}
+
+static inline void db5500_prcmu_system_reset(u16 reset_code) {}
+
+static inline u16 db5500_prcmu_get_reset_code(void)
+{
+       return 0;
+}
+
+static inline bool db5500_prcmu_is_ac_wake_requested(void)
+{
+       return 0;
+}
+
+static inline int db5500_prcmu_set_arm_opp(u8 opp)
+{
+       return 0;
+}
+
+static inline int db5500_prcmu_get_arm_opp(void)
+{
+       return 0;
+}
+
+
+#endif /* CONFIG_MFD_DB5500_PRCMU */
+
+#endif /* __MFD_DB5500_PRCMU_H */
index 917dbcab701c1f1f7d2909a64ede8b52adf70f43..60d27f7bfc1f761d915a9635a3c275155b7577d3 100644 (file)
@@ -11,7 +11,6 @@
 #define __MFD_DB8500_PRCMU_H
 
 #include <linux/interrupt.h>
-#include <linux/notifier.h>
 
 /* This portion previously known as <mach/prcmu-fw-defs_v1.h> */
 
@@ -133,7 +132,7 @@ enum ap_pwrst {
  * @APEXECUTE_TO_APIDLE: Power state transition from ApExecute to ApIdle
  */
 enum ap_pwrst_trans {
-       NO_TRANSITION                   = 0x00,
+       PRCMU_AP_NO_CHANGE              = 0x00,
        APEXECUTE_TO_APSLEEP            = 0x01,
        APIDLE_TO_APSLEEP               = 0x02, /* To be removed */
        PRCMU_AP_SLEEP                  = 0x01,
@@ -145,54 +144,6 @@ enum ap_pwrst_trans {
        PRCMU_AP_DEEP_IDLE              = 0x07,
 };
 
-/**
- * enum ddr_pwrst - DDR power states definition
- * @DDR_PWR_STATE_UNCHANGED: SDRAM and DDR controller state is unchanged
- * @DDR_PWR_STATE_ON:
- * @DDR_PWR_STATE_OFFLOWLAT:
- * @DDR_PWR_STATE_OFFHIGHLAT:
- */
-enum ddr_pwrst {
-       DDR_PWR_STATE_UNCHANGED     = 0x00,
-       DDR_PWR_STATE_ON            = 0x01,
-       DDR_PWR_STATE_OFFLOWLAT     = 0x02,
-       DDR_PWR_STATE_OFFHIGHLAT    = 0x03
-};
-
-/**
- * enum arm_opp - ARM OPP states definition
- * @ARM_OPP_INIT:
- * @ARM_NO_CHANGE: The ARM operating point is unchanged
- * @ARM_100_OPP: The new ARM operating point is arm100opp
- * @ARM_50_OPP: The new ARM operating point is arm50opp
- * @ARM_MAX_OPP: Operating point is "max" (more than 100)
- * @ARM_MAX_FREQ100OPP: Set max opp if available, else 100
- * @ARM_EXTCLK: The new ARM operating point is armExtClk
- */
-enum arm_opp {
-       ARM_OPP_INIT = 0x00,
-       ARM_NO_CHANGE = 0x01,
-       ARM_100_OPP = 0x02,
-       ARM_50_OPP = 0x03,
-       ARM_MAX_OPP = 0x04,
-       ARM_MAX_FREQ100OPP = 0x05,
-       ARM_EXTCLK = 0x07
-};
-
-/**
- * enum ape_opp - APE OPP states definition
- * @APE_OPP_INIT:
- * @APE_NO_CHANGE: The APE operating point is unchanged
- * @APE_100_OPP: The new APE operating point is ape100opp
- * @APE_50_OPP: 50%
- */
-enum ape_opp {
-       APE_OPP_INIT = 0x00,
-       APE_NO_CHANGE = 0x01,
-       APE_100_OPP = 0x02,
-       APE_50_OPP = 0x03
-};
-
 /**
  * enum hw_acc_state - State definition for hardware accelerator
  * @HW_NO_CHANGE: The hardware accelerator state must remain unchanged
@@ -469,26 +420,6 @@ enum auto_enable {
 
 /* End of file previously known as prcmu-fw-defs_v1.h */
 
-/* PRCMU Wakeup defines */
-enum prcmu_wakeup_index {
-       PRCMU_WAKEUP_INDEX_RTC,
-       PRCMU_WAKEUP_INDEX_RTT0,
-       PRCMU_WAKEUP_INDEX_RTT1,
-       PRCMU_WAKEUP_INDEX_HSI0,
-       PRCMU_WAKEUP_INDEX_HSI1,
-       PRCMU_WAKEUP_INDEX_USB,
-       PRCMU_WAKEUP_INDEX_ABB,
-       PRCMU_WAKEUP_INDEX_ABB_FIFO,
-       PRCMU_WAKEUP_INDEX_ARM,
-       NUM_PRCMU_WAKEUP_INDICES
-};
-#define PRCMU_WAKEUP(_name) (BIT(PRCMU_WAKEUP_INDEX_##_name))
-
-/* PRCMU QoS APE OPP class */
-#define PRCMU_QOS_APE_OPP 1
-#define PRCMU_QOS_DDR_OPP 2
-#define PRCMU_QOS_DEFAULT_VALUE -1
-
 /**
  * enum hw_acc_dev - enum for hw accelerators
  * @HW_ACC_SVAMMDSP: for SVAMMDSP
@@ -526,64 +457,6 @@ enum hw_acc_dev {
        NUM_HW_ACC
 };
 
-/*
- * Ids for all EPODs (power domains)
- * - EPOD_ID_SVAMMDSP: power domain for SVA MMDSP
- * - EPOD_ID_SVAPIPE: power domain for SVA pipe
- * - EPOD_ID_SIAMMDSP: power domain for SIA MMDSP
- * - EPOD_ID_SIAPIPE: power domain for SIA pipe
- * - EPOD_ID_SGA: power domain for SGA
- * - EPOD_ID_B2R2_MCDE: power domain for B2R2 and MCDE
- * - EPOD_ID_ESRAM12: power domain for ESRAM 1 and 2
- * - EPOD_ID_ESRAM34: power domain for ESRAM 3 and 4
- * - NUM_EPOD_ID: number of power domains
- */
-#define EPOD_ID_SVAMMDSP       0
-#define EPOD_ID_SVAPIPE                1
-#define EPOD_ID_SIAMMDSP       2
-#define EPOD_ID_SIAPIPE                3
-#define EPOD_ID_SGA            4
-#define EPOD_ID_B2R2_MCDE      5
-#define EPOD_ID_ESRAM12                6
-#define EPOD_ID_ESRAM34                7
-#define NUM_EPOD_ID            8
-
-/*
- * state definition for EPOD (power domain)
- * - EPOD_STATE_NO_CHANGE: The EPOD should remain unchanged
- * - EPOD_STATE_OFF: The EPOD is switched off
- * - EPOD_STATE_RAMRET: The EPOD is switched off with its internal RAM in
- *                         retention
- * - EPOD_STATE_ON_CLK_OFF: The EPOD is switched on, clock is still off
- * - EPOD_STATE_ON: Same as above, but with clock enabled
- */
-#define EPOD_STATE_NO_CHANGE   0x00
-#define EPOD_STATE_OFF         0x01
-#define EPOD_STATE_RAMRET      0x02
-#define EPOD_STATE_ON_CLK_OFF  0x03
-#define EPOD_STATE_ON          0x04
-
-/*
- * CLKOUT sources
- */
-#define PRCMU_CLKSRC_CLK38M            0x00
-#define PRCMU_CLKSRC_ACLK              0x01
-#define PRCMU_CLKSRC_SYSCLK            0x02
-#define PRCMU_CLKSRC_LCDCLK            0x03
-#define PRCMU_CLKSRC_SDMMCCLK          0x04
-#define PRCMU_CLKSRC_TVCLK             0x05
-#define PRCMU_CLKSRC_TIMCLK            0x06
-#define PRCMU_CLKSRC_CLK009            0x07
-/* These are only valid for CLKOUT1: */
-#define PRCMU_CLKSRC_SIAMMDSPCLK       0x40
-#define PRCMU_CLKSRC_I2CCLK            0x41
-#define PRCMU_CLKSRC_MSP02CLK          0x42
-#define PRCMU_CLKSRC_ARMPLL_OBSCLK     0x43
-#define PRCMU_CLKSRC_HSIRXCLK          0x44
-#define PRCMU_CLKSRC_HSITXCLK          0x45
-#define PRCMU_CLKSRC_ARMCLKFIX         0x46
-#define PRCMU_CLKSRC_HDMICLK           0x47
-
 /*
  * Definitions for autonomous power management configuration.
  */
@@ -620,88 +493,12 @@ struct prcmu_auto_pm_config {
        u8 sva_policy;
 };
 
-/**
- * enum ddr_opp - DDR OPP states definition
- * @DDR_100_OPP: The new DDR operating point is ddr100opp
- * @DDR_50_OPP: The new DDR operating point is ddr50opp
- * @DDR_25_OPP: The new DDR operating point is ddr25opp
- */
-enum ddr_opp {
-       DDR_100_OPP = 0x00,
-       DDR_50_OPP = 0x01,
-       DDR_25_OPP = 0x02,
-};
-
-/*
- * Clock identifiers.
- */
-enum prcmu_clock {
-       PRCMU_SGACLK,
-       PRCMU_UARTCLK,
-       PRCMU_MSP02CLK,
-       PRCMU_MSP1CLK,
-       PRCMU_I2CCLK,
-       PRCMU_SDMMCCLK,
-       PRCMU_SLIMCLK,
-       PRCMU_PER1CLK,
-       PRCMU_PER2CLK,
-       PRCMU_PER3CLK,
-       PRCMU_PER5CLK,
-       PRCMU_PER6CLK,
-       PRCMU_PER7CLK,
-       PRCMU_LCDCLK,
-       PRCMU_BMLCLK,
-       PRCMU_HSITXCLK,
-       PRCMU_HSIRXCLK,
-       PRCMU_HDMICLK,
-       PRCMU_APEATCLK,
-       PRCMU_APETRACECLK,
-       PRCMU_MCDECLK,
-       PRCMU_IPI2CCLK,
-       PRCMU_DSIALTCLK,
-       PRCMU_DMACLK,
-       PRCMU_B2R2CLK,
-       PRCMU_TVCLK,
-       PRCMU_SSPCLK,
-       PRCMU_RNGCLK,
-       PRCMU_UICCCLK,
-       PRCMU_NUM_REG_CLOCKS,
-       PRCMU_SYSCLK = PRCMU_NUM_REG_CLOCKS,
-       PRCMU_TIMCLK,
-};
-
-/*
- * Definitions for controlling ESRAM0 in deep sleep.
- */
-#define ESRAM0_DEEP_SLEEP_STATE_OFF 1
-#define ESRAM0_DEEP_SLEEP_STATE_RET 2
-
-#ifdef CONFIG_MFD_DB8500_PRCMU
-void __init prcmu_early_init(void);
-int prcmu_set_display_clocks(void);
-int prcmu_disable_dsipll(void);
-int prcmu_enable_dsipll(void);
-#else
-static inline void __init prcmu_early_init(void) {}
-#endif
-
 #ifdef CONFIG_MFD_DB8500_PRCMU
 
+void db8500_prcmu_early_init(void);
 int prcmu_set_rc_a2p(enum romcode_write);
 enum romcode_read prcmu_get_rc_p2a(void);
 enum ap_pwrst prcmu_get_xp70_current_state(void);
-int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll);
-
-void prcmu_enable_wakeups(u32 wakeups);
-static inline void prcmu_disable_wakeups(void)
-{
-       prcmu_enable_wakeups(0);
-}
-
-void prcmu_config_abb_event_readout(u32 abb_events);
-void prcmu_get_abb_event_buffer(void __iomem **buf);
-int prcmu_set_arm_opp(u8 opp);
-int prcmu_get_arm_opp(void);
 bool prcmu_has_arm_maxopp(void);
 bool prcmu_is_u8400(void);
 int prcmu_set_ape_opp(u8 opp);
@@ -710,19 +507,14 @@ int prcmu_request_ape_opp_100_voltage(bool enable);
 int prcmu_release_usb_wakeup_state(void);
 int prcmu_set_ddr_opp(u8 opp);
 int prcmu_get_ddr_opp(void);
-unsigned long prcmu_qos_get_cpufreq_opp_delay(void);
-void prcmu_qos_set_cpufreq_opp_delay(unsigned long);
 /* NOTE! Use regulator framework instead */
 int prcmu_set_hwacc(u16 hw_acc_dev, u8 state);
-int prcmu_set_epod(u16 epod_id, u8 epod_state);
 void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
        struct prcmu_auto_pm_config *idle);
 bool prcmu_is_auto_pm_enabled(void);
 
 int prcmu_config_clkout(u8 clkout, u8 source, u8 div);
-int prcmu_request_clock(u8 clock, bool enable);
 int prcmu_set_clock_divider(u8 clock, u8 divider);
-int prcmu_config_esram0_deep_sleep(u8 state);
 int prcmu_config_hotdog(u8 threshold);
 int prcmu_config_hotmon(u8 low, u8 high);
 int prcmu_start_temp_sense(u16 cycles32k);
@@ -732,14 +524,36 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
 
 void prcmu_ac_wake_req(void);
 void prcmu_ac_sleep_req(void);
-void prcmu_system_reset(u16 reset_code);
 void prcmu_modem_reset(void);
-bool prcmu_is_ac_wake_requested(void);
 void prcmu_enable_spi2(void);
 void prcmu_disable_spi2(void);
 
+int prcmu_config_a9wdog(u8 num, bool sleep_auto_off);
+int prcmu_enable_a9wdog(u8 id);
+int prcmu_disable_a9wdog(u8 id);
+int prcmu_kick_a9wdog(u8 id);
+int prcmu_load_a9wdog(u8 id, u32 val);
+
+void db8500_prcmu_system_reset(u16 reset_code);
+int db8500_prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll);
+void db8500_prcmu_enable_wakeups(u32 wakeups);
+int db8500_prcmu_set_epod(u16 epod_id, u8 epod_state);
+int db8500_prcmu_request_clock(u8 clock, bool enable);
+int db8500_prcmu_set_display_clocks(void);
+int db8500_prcmu_disable_dsipll(void);
+int db8500_prcmu_enable_dsipll(void);
+void db8500_prcmu_config_abb_event_readout(u32 abb_events);
+void db8500_prcmu_get_abb_event_buffer(void __iomem **buf);
+int db8500_prcmu_config_esram0_deep_sleep(u8 state);
+u16 db8500_prcmu_get_reset_code(void);
+bool db8500_prcmu_is_ac_wake_requested(void);
+int db8500_prcmu_set_arm_opp(u8 opp);
+int db8500_prcmu_get_arm_opp(void);
+
 #else /* !CONFIG_MFD_DB8500_PRCMU */
 
+static inline void db8500_prcmu_early_init(void) {}
+
 static inline int prcmu_set_rc_a2p(enum romcode_write code)
 {
        return 0;
@@ -755,34 +569,12 @@ static inline enum ap_pwrst prcmu_get_xp70_current_state(void)
        return AP_EXECUTE;
 }
 
-static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk,
-       bool keep_ap_pll)
-{
-       return 0;
-}
-
-static inline void prcmu_enable_wakeups(u32 wakeups) {}
-
-static inline void prcmu_disable_wakeups(void) {}
-
-static inline void prcmu_config_abb_event_readout(u32 abb_events) {}
-
-static inline int prcmu_set_arm_opp(u8 opp)
-{
-       return 0;
-}
-
-static inline int prcmu_get_arm_opp(void)
-{
-       return ARM_100_OPP;
-}
-
-static bool prcmu_has_arm_maxopp(void)
+static inline bool prcmu_has_arm_maxopp(void)
 {
        return false;
 }
 
-static bool prcmu_is_u8400(void)
+static inline bool prcmu_is_u8400(void)
 {
        return false;
 }
@@ -817,13 +609,6 @@ static inline int prcmu_get_ddr_opp(void)
        return DDR_100_OPP;
 }
 
-static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void)
-{
-       return 0;
-}
-
-static inline void prcmu_qos_set_cpufreq_opp_delay(unsigned long n) {}
-
 static inline int prcmu_set_hwacc(u16 hw_acc_dev, u8 state)
 {
        return 0;
@@ -844,21 +629,11 @@ static inline int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
        return 0;
 }
 
-static inline int prcmu_request_clock(u8 clock, bool enable)
-{
-       return 0;
-}
-
 static inline int prcmu_set_clock_divider(u8 clock, u8 divider)
 {
        return 0;
 }
 
-int prcmu_config_esram0_deep_sleep(u8 state)
-{
-       return 0;
-}
-
 static inline int prcmu_config_hotdog(u8 threshold)
 {
        return 0;
@@ -893,86 +668,107 @@ static inline void prcmu_ac_wake_req(void) {}
 
 static inline void prcmu_ac_sleep_req(void) {}
 
-static inline void prcmu_system_reset(u16 reset_code) {}
-
 static inline void prcmu_modem_reset(void) {}
 
-static inline bool prcmu_is_ac_wake_requested(void)
+static inline int prcmu_enable_spi2(void)
 {
-       return false;
+       return 0;
 }
 
-#ifndef CONFIG_UX500_SOC_DB5500
-static inline int prcmu_set_display_clocks(void)
+static inline int prcmu_disable_spi2(void)
 {
        return 0;
 }
 
-static inline int prcmu_disable_dsipll(void)
+static inline void db8500_prcmu_system_reset(u16 reset_code) {}
+
+static inline int db8500_prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+       bool keep_ap_pll)
 {
        return 0;
 }
 
-static inline int prcmu_enable_dsipll(void)
+static inline void db8500_prcmu_enable_wakeups(u32 wakeups) {}
+
+static inline int db8500_prcmu_set_epod(u16 epod_id, u8 epod_state)
 {
        return 0;
 }
-#endif
 
-static inline int prcmu_enable_spi2(void)
+static inline int db8500_prcmu_request_clock(u8 clock, bool enable)
 {
        return 0;
 }
 
-static inline int prcmu_disable_spi2(void)
+static inline int db8500_prcmu_set_display_clocks(void)
 {
        return 0;
 }
 
-#endif /* !CONFIG_MFD_DB8500_PRCMU */
+static inline int db8500_prcmu_disable_dsipll(void)
+{
+       return 0;
+}
+
+static inline int db8500_prcmu_enable_dsipll(void)
+{
+       return 0;
+}
+
+static inline int db8500_prcmu_config_esram0_deep_sleep(u8 state)
+{
+       return 0;
+}
+
+static inline void db8500_prcmu_config_abb_event_readout(u32 abb_events) {}
 
-#ifdef CONFIG_UX500_PRCMU_QOS_POWER
-int prcmu_qos_requirement(int pm_qos_class);
-int prcmu_qos_add_requirement(int pm_qos_class, char *name, s32 value);
-int prcmu_qos_update_requirement(int pm_qos_class, char *name, s32 new_value);
-void prcmu_qos_remove_requirement(int pm_qos_class, char *name);
-int prcmu_qos_add_notifier(int prcmu_qos_class,
-                          struct notifier_block *notifier);
-int prcmu_qos_remove_notifier(int prcmu_qos_class,
-                             struct notifier_block *notifier);
-#else
-static inline int prcmu_qos_requirement(int prcmu_qos_class)
+static inline void db8500_prcmu_get_abb_event_buffer(void __iomem **buf) {}
+
+static inline u16 db8500_prcmu_get_reset_code(void)
 {
        return 0;
 }
 
-static inline int prcmu_qos_add_requirement(int prcmu_qos_class,
-                                           char *name, s32 value)
+static inline int prcmu_config_a9wdog(u8 num, bool sleep_auto_off)
 {
        return 0;
 }
 
-static inline int prcmu_qos_update_requirement(int prcmu_qos_class,
-                                              char *name, s32 new_value)
+static inline int prcmu_enable_a9wdog(u8 id)
 {
        return 0;
 }
 
-static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name)
+static inline int prcmu_disable_a9wdog(u8 id)
 {
+       return 0;
 }
 
-static inline int prcmu_qos_add_notifier(int prcmu_qos_class,
-                                        struct notifier_block *notifier)
+static inline int prcmu_kick_a9wdog(u8 id)
 {
        return 0;
 }
-static inline int prcmu_qos_remove_notifier(int prcmu_qos_class,
-                                           struct notifier_block *notifier)
+
+static inline int prcmu_load_a9wdog(u8 id, u32 val)
 {
        return 0;
 }
 
-#endif
+static inline bool db8500_prcmu_is_ac_wake_requested(void)
+{
+       return 0;
+}
+
+static inline int db8500_prcmu_set_arm_opp(u8 opp)
+{
+       return 0;
+}
+
+static inline int db8500_prcmu_get_arm_opp(void)
+{
+       return 0;
+}
+
+#endif /* !CONFIG_MFD_DB8500_PRCMU */
 
 #endif /* __MFD_DB8500_PRCMU_H */
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
new file mode 100644 (file)
index 0000000..bac942f
--- /dev/null
@@ -0,0 +1,549 @@
+/*
+ * Copyright (C) ST Ericsson SA 2011
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * STE Ux500 PRCMU API
+ */
+#ifndef __MACH_PRCMU_H
+#define __MACH_PRCMU_H
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <asm/mach-types.h>
+
+/* PRCMU Wakeup defines */
+enum prcmu_wakeup_index {
+       PRCMU_WAKEUP_INDEX_RTC,
+       PRCMU_WAKEUP_INDEX_RTT0,
+       PRCMU_WAKEUP_INDEX_RTT1,
+       PRCMU_WAKEUP_INDEX_HSI0,
+       PRCMU_WAKEUP_INDEX_HSI1,
+       PRCMU_WAKEUP_INDEX_USB,
+       PRCMU_WAKEUP_INDEX_ABB,
+       PRCMU_WAKEUP_INDEX_ABB_FIFO,
+       PRCMU_WAKEUP_INDEX_ARM,
+       PRCMU_WAKEUP_INDEX_CD_IRQ,
+       NUM_PRCMU_WAKEUP_INDICES
+};
+#define PRCMU_WAKEUP(_name) (BIT(PRCMU_WAKEUP_INDEX_##_name))
+
+/* EPOD (power domain) IDs */
+
+/*
+ * DB8500 EPODs
+ * - EPOD_ID_SVAMMDSP: power domain for SVA MMDSP
+ * - EPOD_ID_SVAPIPE: power domain for SVA pipe
+ * - EPOD_ID_SIAMMDSP: power domain for SIA MMDSP
+ * - EPOD_ID_SIAPIPE: power domain for SIA pipe
+ * - EPOD_ID_SGA: power domain for SGA
+ * - EPOD_ID_B2R2_MCDE: power domain for B2R2 and MCDE
+ * - EPOD_ID_ESRAM12: power domain for ESRAM 1 and 2
+ * - EPOD_ID_ESRAM34: power domain for ESRAM 3 and 4
+ * - NUM_EPOD_ID: number of power domains
+ *
+ * TODO: These should be prefixed.
+ */
+#define EPOD_ID_SVAMMDSP       0
+#define EPOD_ID_SVAPIPE                1
+#define EPOD_ID_SIAMMDSP       2
+#define EPOD_ID_SIAPIPE                3
+#define EPOD_ID_SGA            4
+#define EPOD_ID_B2R2_MCDE      5
+#define EPOD_ID_ESRAM12                6
+#define EPOD_ID_ESRAM34                7
+#define NUM_EPOD_ID            8
+
+/*
+ * DB5500 EPODs
+ */
+#define DB5500_EPOD_ID_BASE 0x0100
+#define DB5500_EPOD_ID_SGA (DB5500_EPOD_ID_BASE + 0)
+#define DB5500_EPOD_ID_HVA (DB5500_EPOD_ID_BASE + 1)
+#define DB5500_EPOD_ID_SIA (DB5500_EPOD_ID_BASE + 2)
+#define DB5500_EPOD_ID_DISP (DB5500_EPOD_ID_BASE + 3)
+#define DB5500_EPOD_ID_ESRAM12 (DB5500_EPOD_ID_BASE + 6)
+#define DB5500_NUM_EPOD_ID 7
+
+/*
+ * state definition for EPOD (power domain)
+ * - EPOD_STATE_NO_CHANGE: The EPOD should remain unchanged
+ * - EPOD_STATE_OFF: The EPOD is switched off
+ * - EPOD_STATE_RAMRET: The EPOD is switched off with its internal RAM in
+ *                         retention
+ * - EPOD_STATE_ON_CLK_OFF: The EPOD is switched on, clock is still off
+ * - EPOD_STATE_ON: Same as above, but with clock enabled
+ */
+#define EPOD_STATE_NO_CHANGE   0x00
+#define EPOD_STATE_OFF         0x01
+#define EPOD_STATE_RAMRET      0x02
+#define EPOD_STATE_ON_CLK_OFF  0x03
+#define EPOD_STATE_ON          0x04
+
+/*
+ * CLKOUT sources
+ */
+#define PRCMU_CLKSRC_CLK38M            0x00
+#define PRCMU_CLKSRC_ACLK              0x01
+#define PRCMU_CLKSRC_SYSCLK            0x02
+#define PRCMU_CLKSRC_LCDCLK            0x03
+#define PRCMU_CLKSRC_SDMMCCLK          0x04
+#define PRCMU_CLKSRC_TVCLK             0x05
+#define PRCMU_CLKSRC_TIMCLK            0x06
+#define PRCMU_CLKSRC_CLK009            0x07
+/* These are only valid for CLKOUT1: */
+#define PRCMU_CLKSRC_SIAMMDSPCLK       0x40
+#define PRCMU_CLKSRC_I2CCLK            0x41
+#define PRCMU_CLKSRC_MSP02CLK          0x42
+#define PRCMU_CLKSRC_ARMPLL_OBSCLK     0x43
+#define PRCMU_CLKSRC_HSIRXCLK          0x44
+#define PRCMU_CLKSRC_HSITXCLK          0x45
+#define PRCMU_CLKSRC_ARMCLKFIX         0x46
+#define PRCMU_CLKSRC_HDMICLK           0x47
+
+/*
+ * Clock identifiers.
+ */
+enum prcmu_clock {
+       PRCMU_SGACLK,
+       PRCMU_UARTCLK,
+       PRCMU_MSP02CLK,
+       PRCMU_MSP1CLK,
+       PRCMU_I2CCLK,
+       PRCMU_SDMMCCLK,
+       PRCMU_SLIMCLK,
+       PRCMU_PER1CLK,
+       PRCMU_PER2CLK,
+       PRCMU_PER3CLK,
+       PRCMU_PER5CLK,
+       PRCMU_PER6CLK,
+       PRCMU_PER7CLK,
+       PRCMU_LCDCLK,
+       PRCMU_BMLCLK,
+       PRCMU_HSITXCLK,
+       PRCMU_HSIRXCLK,
+       PRCMU_HDMICLK,
+       PRCMU_APEATCLK,
+       PRCMU_APETRACECLK,
+       PRCMU_MCDECLK,
+       PRCMU_IPI2CCLK,
+       PRCMU_DSIALTCLK,
+       PRCMU_DMACLK,
+       PRCMU_B2R2CLK,
+       PRCMU_TVCLK,
+       PRCMU_SSPCLK,
+       PRCMU_RNGCLK,
+       PRCMU_UICCCLK,
+       PRCMU_PWMCLK,
+       PRCMU_IRDACLK,
+       PRCMU_IRRCCLK,
+       PRCMU_SIACLK,
+       PRCMU_SVACLK,
+       PRCMU_NUM_REG_CLOCKS,
+       PRCMU_SYSCLK = PRCMU_NUM_REG_CLOCKS,
+       PRCMU_TIMCLK,
+       PRCMU_PLLSOC0,
+       PRCMU_PLLSOC1,
+       PRCMU_PLLDDR,
+};
+
+/**
+ * enum ape_opp - APE OPP states definition
+ * @APE_OPP_INIT:
+ * @APE_NO_CHANGE: The APE operating point is unchanged
+ * @APE_100_OPP: The new APE operating point is ape100opp
+ * @APE_50_OPP: 50%
+ */
+enum ape_opp {
+       APE_OPP_INIT = 0x00,
+       APE_NO_CHANGE = 0x01,
+       APE_100_OPP = 0x02,
+       APE_50_OPP = 0x03
+};
+
+/**
+ * enum arm_opp - ARM OPP states definition
+ * @ARM_OPP_INIT:
+ * @ARM_NO_CHANGE: The ARM operating point is unchanged
+ * @ARM_100_OPP: The new ARM operating point is arm100opp
+ * @ARM_50_OPP: The new ARM operating point is arm50opp
+ * @ARM_MAX_OPP: Operating point is "max" (more than 100)
+ * @ARM_MAX_FREQ100OPP: Set max opp if available, else 100
+ * @ARM_EXTCLK: The new ARM operating point is armExtClk
+ */
+enum arm_opp {
+       ARM_OPP_INIT = 0x00,
+       ARM_NO_CHANGE = 0x01,
+       ARM_100_OPP = 0x02,
+       ARM_50_OPP = 0x03,
+       ARM_MAX_OPP = 0x04,
+       ARM_MAX_FREQ100OPP = 0x05,
+       ARM_EXTCLK = 0x07
+};
+
+/**
+ * enum ddr_opp - DDR OPP states definition
+ * @DDR_100_OPP: The new DDR operating point is ddr100opp
+ * @DDR_50_OPP: The new DDR operating point is ddr50opp
+ * @DDR_25_OPP: The new DDR operating point is ddr25opp
+ */
+enum ddr_opp {
+       DDR_100_OPP = 0x00,
+       DDR_50_OPP = 0x01,
+       DDR_25_OPP = 0x02,
+};
+
+/*
+ * Definitions for controlling ESRAM0 in deep sleep.
+ */
+#define ESRAM0_DEEP_SLEEP_STATE_OFF 1
+#define ESRAM0_DEEP_SLEEP_STATE_RET 2
+
+/**
+ * enum ddr_pwrst - DDR power states definition
+ * @DDR_PWR_STATE_UNCHANGED: SDRAM and DDR controller state is unchanged
+ * @DDR_PWR_STATE_ON:
+ * @DDR_PWR_STATE_OFFLOWLAT:
+ * @DDR_PWR_STATE_OFFHIGHLAT:
+ */
+enum ddr_pwrst {
+       DDR_PWR_STATE_UNCHANGED     = 0x00,
+       DDR_PWR_STATE_ON            = 0x01,
+       DDR_PWR_STATE_OFFLOWLAT     = 0x02,
+       DDR_PWR_STATE_OFFHIGHLAT    = 0x03
+};
+
+#include <linux/mfd/db8500-prcmu.h>
+#include <linux/mfd/db5500-prcmu.h>
+
+#if defined(CONFIG_UX500_SOC_DB8500) || defined(CONFIG_UX500_SOC_DB5500)
+
+static inline void __init prcmu_early_init(void)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_early_init();
+       else
+               return db8500_prcmu_early_init();
+}
+
+static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+               bool keep_ap_pll)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_set_power_state(state, keep_ulp_clk,
+                       keep_ap_pll);
+       else
+               return db8500_prcmu_set_power_state(state, keep_ulp_clk,
+                       keep_ap_pll);
+}
+
+static inline int prcmu_set_epod(u16 epod_id, u8 epod_state)
+{
+       if (machine_is_u5500())
+               return -EINVAL;
+       else
+               return db8500_prcmu_set_epod(epod_id, epod_state);
+}
+
+static inline void prcmu_enable_wakeups(u32 wakeups)
+{
+       if (machine_is_u5500())
+               db5500_prcmu_enable_wakeups(wakeups);
+       else
+               db8500_prcmu_enable_wakeups(wakeups);
+}
+
+static inline void prcmu_disable_wakeups(void)
+{
+       prcmu_enable_wakeups(0);
+}
+
+static inline void prcmu_config_abb_event_readout(u32 abb_events)
+{
+       if (machine_is_u5500())
+               db5500_prcmu_config_abb_event_readout(abb_events);
+       else
+               db8500_prcmu_config_abb_event_readout(abb_events);
+}
+
+static inline void prcmu_get_abb_event_buffer(void __iomem **buf)
+{
+       if (machine_is_u5500())
+               db5500_prcmu_get_abb_event_buffer(buf);
+       else
+               db8500_prcmu_get_abb_event_buffer(buf);
+}
+
+int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
+int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
+
+int prcmu_config_clkout(u8 clkout, u8 source, u8 div);
+
+static inline int prcmu_request_clock(u8 clock, bool enable)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_request_clock(clock, enable);
+       else
+               return db8500_prcmu_request_clock(clock, enable);
+}
+
+int prcmu_set_ape_opp(u8 opp);
+int prcmu_get_ape_opp(void);
+int prcmu_set_ddr_opp(u8 opp);
+int prcmu_get_ddr_opp(void);
+
+static inline int prcmu_set_arm_opp(u8 opp)
+{
+       if (machine_is_u5500())
+               return -EINVAL;
+       else
+               return db8500_prcmu_set_arm_opp(opp);
+}
+
+static inline int prcmu_get_arm_opp(void)
+{
+       if (machine_is_u5500())
+               return -EINVAL;
+       else
+               return db8500_prcmu_get_arm_opp();
+}
+
+static inline void prcmu_system_reset(u16 reset_code)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_system_reset(reset_code);
+       else
+               return db8500_prcmu_system_reset(reset_code);
+}
+
+static inline u16 prcmu_get_reset_code(void)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_get_reset_code();
+       else
+               return db8500_prcmu_get_reset_code();
+}
+
+void prcmu_ac_wake_req(void);
+void prcmu_ac_sleep_req(void);
+void prcmu_modem_reset(void);
+static inline bool prcmu_is_ac_wake_requested(void)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_is_ac_wake_requested();
+       else
+               return db8500_prcmu_is_ac_wake_requested();
+}
+
+static inline int prcmu_set_display_clocks(void)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_set_display_clocks();
+       else
+               return db8500_prcmu_set_display_clocks();
+}
+
+static inline int prcmu_disable_dsipll(void)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_disable_dsipll();
+       else
+               return db8500_prcmu_disable_dsipll();
+}
+
+static inline int prcmu_enable_dsipll(void)
+{
+       if (machine_is_u5500())
+               return db5500_prcmu_enable_dsipll();
+       else
+               return db8500_prcmu_enable_dsipll();
+}
+
+static inline int prcmu_config_esram0_deep_sleep(u8 state)
+{
+       if (machine_is_u5500())
+               return -EINVAL;
+       else
+               return db8500_prcmu_config_esram0_deep_sleep(state);
+}
+#else
+
+static inline void __init prcmu_early_init(void) {}
+
+static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+       bool keep_ap_pll)
+{
+       return 0;
+}
+
+static inline int prcmu_set_epod(u16 epod_id, u8 epod_state)
+{
+       return 0;
+}
+
+static inline void prcmu_enable_wakeups(u32 wakeups) {}
+
+static inline void prcmu_disable_wakeups(void) {}
+
+static inline int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+       return -ENOSYS;
+}
+
+static inline int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+       return -ENOSYS;
+}
+
+static inline int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
+{
+       return 0;
+}
+
+static inline int prcmu_request_clock(u8 clock, bool enable)
+{
+       return 0;
+}
+
+static inline int prcmu_set_ape_opp(u8 opp)
+{
+       return 0;
+}
+
+static inline int prcmu_get_ape_opp(void)
+{
+       return APE_100_OPP;
+}
+
+static inline int prcmu_set_arm_opp(u8 opp)
+{
+       return 0;
+}
+
+static inline int prcmu_get_arm_opp(void)
+{
+       return ARM_100_OPP;
+}
+
+static inline int prcmu_set_ddr_opp(u8 opp)
+{
+       return 0;
+}
+
+static inline int prcmu_get_ddr_opp(void)
+{
+       return DDR_100_OPP;
+}
+
+static inline void prcmu_system_reset(u16 reset_code) {}
+
+static inline u16 prcmu_get_reset_code(void)
+{
+       return 0;
+}
+
+static inline void prcmu_ac_wake_req(void) {}
+
+static inline void prcmu_ac_sleep_req(void) {}
+
+static inline void prcmu_modem_reset(void) {}
+
+static inline bool prcmu_is_ac_wake_requested(void)
+{
+       return false;
+}
+
+static inline int prcmu_set_display_clocks(void)
+{
+       return 0;
+}
+
+static inline int prcmu_disable_dsipll(void)
+{
+       return 0;
+}
+
+static inline int prcmu_enable_dsipll(void)
+{
+       return 0;
+}
+
+static inline int prcmu_config_esram0_deep_sleep(u8 state)
+{
+       return 0;
+}
+
+static inline void prcmu_config_abb_event_readout(u32 abb_events) {}
+
+static inline void prcmu_get_abb_event_buffer(void __iomem **buf)
+{
+       *buf = NULL;
+}
+
+#endif
+
+/* PRCMU QoS APE OPP class */
+#define PRCMU_QOS_APE_OPP 1
+#define PRCMU_QOS_DDR_OPP 2
+#define PRCMU_QOS_DEFAULT_VALUE -1
+
+#ifdef CONFIG_UX500_PRCMU_QOS_POWER
+
+unsigned long prcmu_qos_get_cpufreq_opp_delay(void);
+void prcmu_qos_set_cpufreq_opp_delay(unsigned long);
+void prcmu_qos_force_opp(int, s32);
+int prcmu_qos_requirement(int pm_qos_class);
+int prcmu_qos_add_requirement(int pm_qos_class, char *name, s32 value);
+int prcmu_qos_update_requirement(int pm_qos_class, char *name, s32 new_value);
+void prcmu_qos_remove_requirement(int pm_qos_class, char *name);
+int prcmu_qos_add_notifier(int prcmu_qos_class,
+                          struct notifier_block *notifier);
+int prcmu_qos_remove_notifier(int prcmu_qos_class,
+                             struct notifier_block *notifier);
+
+#else
+
+static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void)
+{
+       return 0;
+}
+
+static inline void prcmu_qos_set_cpufreq_opp_delay(unsigned long n) {}
+
+static inline void prcmu_qos_force_opp(int prcmu_qos_class, s32 i) {}
+
+static inline int prcmu_qos_requirement(int prcmu_qos_class)
+{
+       return 0;
+}
+
+static inline int prcmu_qos_add_requirement(int prcmu_qos_class,
+                                           char *name, s32 value)
+{
+       return 0;
+}
+
+static inline int prcmu_qos_update_requirement(int prcmu_qos_class,
+                                              char *name, s32 new_value)
+{
+       return 0;
+}
+
+static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name)
+{
+}
+
+static inline int prcmu_qos_add_notifier(int prcmu_qos_class,
+                                        struct notifier_block *notifier)
+{
+       return 0;
+}
+static inline int prcmu_qos_remove_notifier(int prcmu_qos_class,
+                                           struct notifier_block *notifier)
+{
+       return 0;
+}
+
+#endif
+
+#endif /* __MACH_PRCMU_H */
diff --git a/include/linux/mfd/intel_msic.h b/include/linux/mfd/intel_msic.h
new file mode 100644 (file)
index 0000000..439a7a6
--- /dev/null
@@ -0,0 +1,456 @@
+/*
+ * include/linux/mfd/intel_msic.h - Core interface for Intel MSIC
+ *
+ * Copyright (C) 2011, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_INTEL_MSIC_H__
+#define __LINUX_MFD_INTEL_MSIC_H__
+
+/* ID */
+#define INTEL_MSIC_ID0                 0x000   /* RO */
+#define INTEL_MSIC_ID1                 0x001   /* RO */
+
+/* IRQ */
+#define INTEL_MSIC_IRQLVL1             0x002
+#define INTEL_MSIC_ADC1INT             0x003
+#define INTEL_MSIC_CCINT               0x004
+#define INTEL_MSIC_PWRSRCINT           0x005
+#define INTEL_MSIC_PWRSRCINT1          0x006
+#define INTEL_MSIC_CHRINT              0x007
+#define INTEL_MSIC_CHRINT1             0x008
+#define INTEL_MSIC_RTCIRQ              0x009
+#define INTEL_MSIC_GPIO0LVIRQ          0x00a
+#define INTEL_MSIC_GPIO1LVIRQ          0x00b
+#define INTEL_MSIC_GPIOHVIRQ           0x00c
+#define INTEL_MSIC_VRINT               0x00d
+#define INTEL_MSIC_OCAUDIO             0x00e
+#define INTEL_MSIC_ACCDET              0x00f
+#define INTEL_MSIC_RESETIRQ1           0x010
+#define INTEL_MSIC_RESETIRQ2           0x011
+#define INTEL_MSIC_MADC1INT            0x012
+#define INTEL_MSIC_MCCINT              0x013
+#define INTEL_MSIC_MPWRSRCINT          0x014
+#define INTEL_MSIC_MPWRSRCINT1         0x015
+#define INTEL_MSIC_MCHRINT             0x016
+#define INTEL_MSIC_MCHRINT1            0x017
+#define INTEL_MSIC_RTCIRQMASK          0x018
+#define INTEL_MSIC_GPIO0LVIRQMASK      0x019
+#define INTEL_MSIC_GPIO1LVIRQMASK      0x01a
+#define INTEL_MSIC_GPIOHVIRQMASK       0x01b
+#define INTEL_MSIC_VRINTMASK           0x01c
+#define INTEL_MSIC_OCAUDIOMASK         0x01d
+#define INTEL_MSIC_ACCDETMASK          0x01e
+#define INTEL_MSIC_RESETIRQ1MASK       0x01f
+#define INTEL_MSIC_RESETIRQ2MASK       0x020
+#define INTEL_MSIC_IRQLVL1MSK          0x021
+#define INTEL_MSIC_PBCONFIG            0x03e
+#define INTEL_MSIC_PBSTATUS            0x03f   /* RO */
+
+/* GPIO */
+#define INTEL_MSIC_GPIO0LV7CTLO                0x040
+#define INTEL_MSIC_GPIO0LV6CTLO                0x041
+#define INTEL_MSIC_GPIO0LV5CTLO                0x042
+#define INTEL_MSIC_GPIO0LV4CTLO                0x043
+#define INTEL_MSIC_GPIO0LV3CTLO                0x044
+#define INTEL_MSIC_GPIO0LV2CTLO                0x045
+#define INTEL_MSIC_GPIO0LV1CTLO                0x046
+#define INTEL_MSIC_GPIO0LV0CTLO                0x047
+#define INTEL_MSIC_GPIO1LV7CTLOS       0x048
+#define INTEL_MSIC_GPIO1LV6CTLO                0x049
+#define INTEL_MSIC_GPIO1LV5CTLO                0x04a
+#define INTEL_MSIC_GPIO1LV4CTLO                0x04b
+#define INTEL_MSIC_GPIO1LV3CTLO                0x04c
+#define INTEL_MSIC_GPIO1LV2CTLO                0x04d
+#define INTEL_MSIC_GPIO1LV1CTLO                0x04e
+#define INTEL_MSIC_GPIO1LV0CTLO                0x04f
+#define INTEL_MSIC_GPIO0LV7CTLI                0x050
+#define INTEL_MSIC_GPIO0LV6CTLI                0x051
+#define INTEL_MSIC_GPIO0LV5CTLI                0x052
+#define INTEL_MSIC_GPIO0LV4CTLI                0x053
+#define INTEL_MSIC_GPIO0LV3CTLI                0x054
+#define INTEL_MSIC_GPIO0LV2CTLI                0x055
+#define INTEL_MSIC_GPIO0LV1CTLI                0x056
+#define INTEL_MSIC_GPIO0LV0CTLI                0x057
+#define INTEL_MSIC_GPIO1LV7CTLIS       0x058
+#define INTEL_MSIC_GPIO1LV6CTLI                0x059
+#define INTEL_MSIC_GPIO1LV5CTLI                0x05a
+#define INTEL_MSIC_GPIO1LV4CTLI                0x05b
+#define INTEL_MSIC_GPIO1LV3CTLI                0x05c
+#define INTEL_MSIC_GPIO1LV2CTLI                0x05d
+#define INTEL_MSIC_GPIO1LV1CTLI                0x05e
+#define INTEL_MSIC_GPIO1LV0CTLI                0x05f
+#define INTEL_MSIC_PWM0CLKDIV1         0x061
+#define INTEL_MSIC_PWM0CLKDIV0         0x062
+#define INTEL_MSIC_PWM1CLKDIV1         0x063
+#define INTEL_MSIC_PWM1CLKDIV0         0x064
+#define INTEL_MSIC_PWM2CLKDIV1         0x065
+#define INTEL_MSIC_PWM2CLKDIV0         0x066
+#define INTEL_MSIC_PWM0DUTYCYCLE       0x067
+#define INTEL_MSIC_PWM1DUTYCYCLE       0x068
+#define INTEL_MSIC_PWM2DUTYCYCLE       0x069
+#define INTEL_MSIC_GPIO0HV3CTLO                0x06d
+#define INTEL_MSIC_GPIO0HV2CTLO                0x06e
+#define INTEL_MSIC_GPIO0HV1CTLO                0x06f
+#define INTEL_MSIC_GPIO0HV0CTLO                0x070
+#define INTEL_MSIC_GPIO1HV3CTLO                0x071
+#define INTEL_MSIC_GPIO1HV2CTLO                0x072
+#define INTEL_MSIC_GPIO1HV1CTLO                0x073
+#define INTEL_MSIC_GPIO1HV0CTLO                0x074
+#define INTEL_MSIC_GPIO0HV3CTLI                0x075
+#define INTEL_MSIC_GPIO0HV2CTLI                0x076
+#define INTEL_MSIC_GPIO0HV1CTLI                0x077
+#define INTEL_MSIC_GPIO0HV0CTLI                0x078
+#define INTEL_MSIC_GPIO1HV3CTLI                0x079
+#define INTEL_MSIC_GPIO1HV2CTLI                0x07a
+#define INTEL_MSIC_GPIO1HV1CTLI                0x07b
+#define INTEL_MSIC_GPIO1HV0CTLI                0x07c
+
+/* SVID */
+#define INTEL_MSIC_SVIDCTRL0           0x080
+#define INTEL_MSIC_SVIDCTRL1           0x081
+#define INTEL_MSIC_SVIDCTRL2           0x082
+#define INTEL_MSIC_SVIDTXLASTPKT3      0x083   /* RO */
+#define INTEL_MSIC_SVIDTXLASTPKT2      0x084   /* RO */
+#define INTEL_MSIC_SVIDTXLASTPKT1      0x085   /* RO */
+#define INTEL_MSIC_SVIDTXLASTPKT0      0x086   /* RO */
+#define INTEL_MSIC_SVIDPKTOUTBYTE3     0x087
+#define INTEL_MSIC_SVIDPKTOUTBYTE2     0x088
+#define INTEL_MSIC_SVIDPKTOUTBYTE1     0x089
+#define INTEL_MSIC_SVIDPKTOUTBYTE0     0x08a
+#define INTEL_MSIC_SVIDRXVPDEBUG1      0x08b
+#define INTEL_MSIC_SVIDRXVPDEBUG0      0x08c
+#define INTEL_MSIC_SVIDRXLASTPKT3      0x08d   /* RO */
+#define INTEL_MSIC_SVIDRXLASTPKT2      0x08e   /* RO */
+#define INTEL_MSIC_SVIDRXLASTPKT1      0x08f   /* RO */
+#define INTEL_MSIC_SVIDRXLASTPKT0      0x090   /* RO */
+#define INTEL_MSIC_SVIDRXCHKSTATUS3    0x091   /* RO */
+#define INTEL_MSIC_SVIDRXCHKSTATUS2    0x092   /* RO */
+#define INTEL_MSIC_SVIDRXCHKSTATUS1    0x093   /* RO */
+#define INTEL_MSIC_SVIDRXCHKSTATUS0    0x094   /* RO */
+
+/* VREG */
+#define INTEL_MSIC_VCCLATCH            0x0c0
+#define INTEL_MSIC_VNNLATCH            0x0c1
+#define INTEL_MSIC_VCCCNT              0x0c2
+#define INTEL_MSIC_SMPSRAMP            0x0c3
+#define INTEL_MSIC_VNNCNT              0x0c4
+#define INTEL_MSIC_VNNAONCNT           0x0c5
+#define INTEL_MSIC_VCC122AONCNT                0x0c6
+#define INTEL_MSIC_V180AONCNT          0x0c7
+#define INTEL_MSIC_V500CNT             0x0c8
+#define INTEL_MSIC_VIHFCNT             0x0c9
+#define INTEL_MSIC_LDORAMP1            0x0ca
+#define INTEL_MSIC_LDORAMP2            0x0cb
+#define INTEL_MSIC_VCC108AONCNT                0x0cc
+#define INTEL_MSIC_VCC108ASCNT         0x0cd
+#define INTEL_MSIC_VCC108CNT           0x0ce
+#define INTEL_MSIC_VCCA100ASCNT                0x0cf
+#define INTEL_MSIC_VCCA100CNT          0x0d0
+#define INTEL_MSIC_VCC180AONCNT                0x0d1
+#define INTEL_MSIC_VCC180CNT           0x0d2
+#define INTEL_MSIC_VCC330CNT           0x0d3
+#define INTEL_MSIC_VUSB330CNT          0x0d4
+#define INTEL_MSIC_VCCSDIOCNT          0x0d5
+#define INTEL_MSIC_VPROG1CNT           0x0d6
+#define INTEL_MSIC_VPROG2CNT           0x0d7
+#define INTEL_MSIC_VEMMCSCNT           0x0d8
+#define INTEL_MSIC_VEMMC1CNT           0x0d9
+#define INTEL_MSIC_VEMMC2CNT           0x0da
+#define INTEL_MSIC_VAUDACNT            0x0db
+#define INTEL_MSIC_VHSPCNT             0x0dc
+#define INTEL_MSIC_VHSNCNT             0x0dd
+#define INTEL_MSIC_VHDMICNT            0x0de
+#define INTEL_MSIC_VOTGCNT             0x0df
+#define INTEL_MSIC_V1P35CNT            0x0e0
+#define INTEL_MSIC_V330AONCNT          0x0e1
+
+/* RESET */
+#define INTEL_MSIC_CHIPCNTRL           0x100   /* WO */
+#define INTEL_MSIC_ERCONFIG            0x101
+
+/* BURST */
+#define INTEL_MSIC_BATCURRENTLIMIT12   0x102
+#define INTEL_MSIC_BATTIMELIMIT12      0x103
+#define INTEL_MSIC_BATTIMELIMIT3       0x104
+#define INTEL_MSIC_BATTIMEDB           0x105
+#define INTEL_MSIC_BRSTCONFIGOUTPUTS   0x106
+#define INTEL_MSIC_BRSTCONFIGACTIONS   0x107
+#define INTEL_MSIC_BURSTCONTROLSTATUS  0x108
+
+/* RTC */
+#define INTEL_MSIC_RTCB1               0x140   /* RO */
+#define INTEL_MSIC_RTCB2               0x141   /* RO */
+#define INTEL_MSIC_RTCB3               0x142   /* RO */
+#define INTEL_MSIC_RTCB4               0x143   /* RO */
+#define INTEL_MSIC_RTCOB1              0x144
+#define INTEL_MSIC_RTCOB2              0x145
+#define INTEL_MSIC_RTCOB3              0x146
+#define INTEL_MSIC_RTCOB4              0x147
+#define INTEL_MSIC_RTCAB1              0x148
+#define INTEL_MSIC_RTCAB2              0x149
+#define INTEL_MSIC_RTCAB3              0x14a
+#define INTEL_MSIC_RTCAB4              0x14b
+#define INTEL_MSIC_RTCWAB1             0x14c
+#define INTEL_MSIC_RTCWAB2             0x14d
+#define INTEL_MSIC_RTCWAB3             0x14e
+#define INTEL_MSIC_RTCWAB4             0x14f
+#define INTEL_MSIC_RTCSC1              0x150
+#define INTEL_MSIC_RTCSC2              0x151
+#define INTEL_MSIC_RTCSC3              0x152
+#define INTEL_MSIC_RTCSC4              0x153
+#define INTEL_MSIC_RTCSTATUS           0x154   /* RO */
+#define INTEL_MSIC_RTCCONFIG1          0x155
+#define INTEL_MSIC_RTCCONFIG2          0x156
+
+/* CHARGER */
+#define INTEL_MSIC_BDTIMER             0x180
+#define INTEL_MSIC_BATTRMV             0x181
+#define INTEL_MSIC_VBUSDET             0x182
+#define INTEL_MSIC_VBUSDET1            0x183
+#define INTEL_MSIC_ADPHVDET            0x184
+#define INTEL_MSIC_ADPLVDET            0x185
+#define INTEL_MSIC_ADPDETDBDM          0x186
+#define INTEL_MSIC_LOWBATTDET          0x187
+#define INTEL_MSIC_CHRCTRL             0x188
+#define INTEL_MSIC_CHRCVOLTAGE         0x189
+#define INTEL_MSIC_CHRCCURRENT         0x18a
+#define INTEL_MSIC_SPCHARGER           0x18b
+#define INTEL_MSIC_CHRTTIME            0x18c
+#define INTEL_MSIC_CHRCTRL1            0x18d
+#define INTEL_MSIC_PWRSRCLMT           0x18e
+#define INTEL_MSIC_CHRSTWDT            0x18f
+#define INTEL_MSIC_WDTWRITE            0x190   /* WO */
+#define INTEL_MSIC_CHRSAFELMT          0x191
+#define INTEL_MSIC_SPWRSRCINT          0x192   /* RO */
+#define INTEL_MSIC_SPWRSRCINT1         0x193   /* RO */
+#define INTEL_MSIC_CHRLEDPWM           0x194
+#define INTEL_MSIC_CHRLEDCTRL          0x195
+
+/* ADC */
+#define INTEL_MSIC_ADC1CNTL1           0x1c0
+#define INTEL_MSIC_ADC1CNTL2           0x1c1
+#define INTEL_MSIC_ADC1CNTL3           0x1c2
+#define INTEL_MSIC_ADC1OFFSETH         0x1c3   /* RO */
+#define INTEL_MSIC_ADC1OFFSETL         0x1c4   /* RO */
+#define INTEL_MSIC_ADC1ADDR0           0x1c5
+#define INTEL_MSIC_ADC1ADDR1           0x1c6
+#define INTEL_MSIC_ADC1ADDR2           0x1c7
+#define INTEL_MSIC_ADC1ADDR3           0x1c8
+#define INTEL_MSIC_ADC1ADDR4           0x1c9
+#define INTEL_MSIC_ADC1ADDR5           0x1ca
+#define INTEL_MSIC_ADC1ADDR6           0x1cb
+#define INTEL_MSIC_ADC1ADDR7           0x1cc
+#define INTEL_MSIC_ADC1ADDR8           0x1cd
+#define INTEL_MSIC_ADC1ADDR9           0x1ce
+#define INTEL_MSIC_ADC1ADDR10          0x1cf
+#define INTEL_MSIC_ADC1ADDR11          0x1d0
+#define INTEL_MSIC_ADC1ADDR12          0x1d1
+#define INTEL_MSIC_ADC1ADDR13          0x1d2
+#define INTEL_MSIC_ADC1ADDR14          0x1d3
+#define INTEL_MSIC_ADC1SNS0H           0x1d4   /* RO */
+#define INTEL_MSIC_ADC1SNS0L           0x1d5   /* RO */
+#define INTEL_MSIC_ADC1SNS1H           0x1d6   /* RO */
+#define INTEL_MSIC_ADC1SNS1L           0x1d7   /* RO */
+#define INTEL_MSIC_ADC1SNS2H           0x1d8   /* RO */
+#define INTEL_MSIC_ADC1SNS2L           0x1d9   /* RO */
+#define INTEL_MSIC_ADC1SNS3H           0x1da   /* RO */
+#define INTEL_MSIC_ADC1SNS3L           0x1db   /* RO */
+#define INTEL_MSIC_ADC1SNS4H           0x1dc   /* RO */
+#define INTEL_MSIC_ADC1SNS4L           0x1dd   /* RO */
+#define INTEL_MSIC_ADC1SNS5H           0x1de   /* RO */
+#define INTEL_MSIC_ADC1SNS5L           0x1df   /* RO */
+#define INTEL_MSIC_ADC1SNS6H           0x1e0   /* RO */
+#define INTEL_MSIC_ADC1SNS6L           0x1e1   /* RO */
+#define INTEL_MSIC_ADC1SNS7H           0x1e2   /* RO */
+#define INTEL_MSIC_ADC1SNS7L           0x1e3   /* RO */
+#define INTEL_MSIC_ADC1SNS8H           0x1e4   /* RO */
+#define INTEL_MSIC_ADC1SNS8L           0x1e5   /* RO */
+#define INTEL_MSIC_ADC1SNS9H           0x1e6   /* RO */
+#define INTEL_MSIC_ADC1SNS9L           0x1e7   /* RO */
+#define INTEL_MSIC_ADC1SNS10H          0x1e8   /* RO */
+#define INTEL_MSIC_ADC1SNS10L          0x1e9   /* RO */
+#define INTEL_MSIC_ADC1SNS11H          0x1ea   /* RO */
+#define INTEL_MSIC_ADC1SNS11L          0x1eb   /* RO */
+#define INTEL_MSIC_ADC1SNS12H          0x1ec   /* RO */
+#define INTEL_MSIC_ADC1SNS12L          0x1ed   /* RO */
+#define INTEL_MSIC_ADC1SNS13H          0x1ee   /* RO */
+#define INTEL_MSIC_ADC1SNS13L          0x1ef   /* RO */
+#define INTEL_MSIC_ADC1SNS14H          0x1f0   /* RO */
+#define INTEL_MSIC_ADC1SNS14L          0x1f1   /* RO */
+#define INTEL_MSIC_ADC1BV0H            0x1f2   /* RO */
+#define INTEL_MSIC_ADC1BV0L            0x1f3   /* RO */
+#define INTEL_MSIC_ADC1BV1H            0x1f4   /* RO */
+#define INTEL_MSIC_ADC1BV1L            0x1f5   /* RO */
+#define INTEL_MSIC_ADC1BV2H            0x1f6   /* RO */
+#define INTEL_MSIC_ADC1BV2L            0x1f7   /* RO */
+#define INTEL_MSIC_ADC1BV3H            0x1f8   /* RO */
+#define INTEL_MSIC_ADC1BV3L            0x1f9   /* RO */
+#define INTEL_MSIC_ADC1BI0H            0x1fa   /* RO */
+#define INTEL_MSIC_ADC1BI0L            0x1fb   /* RO */
+#define INTEL_MSIC_ADC1BI1H            0x1fc   /* RO */
+#define INTEL_MSIC_ADC1BI1L            0x1fd   /* RO */
+#define INTEL_MSIC_ADC1BI2H            0x1fe   /* RO */
+#define INTEL_MSIC_ADC1BI2L            0x1ff   /* RO */
+#define INTEL_MSIC_ADC1BI3H            0x200   /* RO */
+#define INTEL_MSIC_ADC1BI3L            0x201   /* RO */
+#define INTEL_MSIC_CCCNTL              0x202
+#define INTEL_MSIC_CCOFFSETH           0x203   /* RO */
+#define INTEL_MSIC_CCOFFSETL           0x204   /* RO */
+#define INTEL_MSIC_CCADCHA             0x205   /* RO */
+#define INTEL_MSIC_CCADCLA             0x206   /* RO */
+
+/* AUDIO */
+#define INTEL_MSIC_AUDPLLCTRL          0x240
+#define INTEL_MSIC_DMICBUF0123         0x241
+#define INTEL_MSIC_DMICBUF45           0x242
+#define INTEL_MSIC_DMICGPO             0x244
+#define INTEL_MSIC_DMICMUX             0x245
+#define INTEL_MSIC_DMICCLK             0x246
+#define INTEL_MSIC_MICBIAS             0x247
+#define INTEL_MSIC_ADCCONFIG           0x248
+#define INTEL_MSIC_MICAMP1             0x249
+#define INTEL_MSIC_MICAMP2             0x24a
+#define INTEL_MSIC_NOISEMUX            0x24b
+#define INTEL_MSIC_AUDIOMUX12          0x24c
+#define INTEL_MSIC_AUDIOMUX34          0x24d
+#define INTEL_MSIC_AUDIOSINC           0x24e
+#define INTEL_MSIC_AUDIOTXEN           0x24f
+#define INTEL_MSIC_HSEPRXCTRL          0x250
+#define INTEL_MSIC_IHFRXCTRL           0x251
+#define INTEL_MSIC_VOICETXVOL          0x252
+#define INTEL_MSIC_SIDETONEVOL         0x253
+#define INTEL_MSIC_MUSICSHARVOL                0x254
+#define INTEL_MSIC_VOICETXCTRL         0x255
+#define INTEL_MSIC_HSMIXER             0x256
+#define INTEL_MSIC_DACCONFIG           0x257
+#define INTEL_MSIC_SOFTMUTE            0x258
+#define INTEL_MSIC_HSLVOLCTRL          0x259
+#define INTEL_MSIC_HSRVOLCTRL          0x25a
+#define INTEL_MSIC_IHFLVOLCTRL         0x25b
+#define INTEL_MSIC_IHFRVOLCTRL         0x25c
+#define INTEL_MSIC_DRIVEREN            0x25d
+#define INTEL_MSIC_LINEOUTCTRL         0x25e
+#define INTEL_MSIC_VIB1CTRL1           0x25f
+#define INTEL_MSIC_VIB1CTRL2           0x260
+#define INTEL_MSIC_VIB1CTRL3           0x261
+#define INTEL_MSIC_VIB1SPIPCM_1                0x262
+#define INTEL_MSIC_VIB1SPIPCM_2                0x263
+#define INTEL_MSIC_VIB1CTRL5           0x264
+#define INTEL_MSIC_VIB2CTRL1           0x265
+#define INTEL_MSIC_VIB2CTRL2           0x266
+#define INTEL_MSIC_VIB2CTRL3           0x267
+#define INTEL_MSIC_VIB2SPIPCM_1                0x268
+#define INTEL_MSIC_VIB2SPIPCM_2                0x269
+#define INTEL_MSIC_VIB2CTRL5           0x26a
+#define INTEL_MSIC_BTNCTRL1            0x26b
+#define INTEL_MSIC_BTNCTRL2            0x26c
+#define INTEL_MSIC_PCM1TXSLOT01                0x26d
+#define INTEL_MSIC_PCM1TXSLOT23                0x26e
+#define INTEL_MSIC_PCM1TXSLOT45                0x26f
+#define INTEL_MSIC_PCM1RXSLOT0123      0x270
+#define INTEL_MSIC_PCM1RXSLOT045       0x271
+#define INTEL_MSIC_PCM2TXSLOT01                0x272
+#define INTEL_MSIC_PCM2TXSLOT23                0x273
+#define INTEL_MSIC_PCM2TXSLOT45                0x274
+#define INTEL_MSIC_PCM2RXSLOT01                0x275
+#define INTEL_MSIC_PCM2RXSLOT23                0x276
+#define INTEL_MSIC_PCM2RXSLOT45                0x277
+#define INTEL_MSIC_PCM1CTRL1           0x278
+#define INTEL_MSIC_PCM1CTRL2           0x279
+#define INTEL_MSIC_PCM1CTRL3           0x27a
+#define INTEL_MSIC_PCM2CTRL1           0x27b
+#define INTEL_MSIC_PCM2CTRL2           0x27c
+
+/* HDMI */
+#define INTEL_MSIC_HDMIPUEN            0x280
+#define INTEL_MSIC_HDMISTATUS          0x281   /* RO */
+
+/* Physical address of the start of the MSIC interrupt tree in SRAM */
+#define INTEL_MSIC_IRQ_PHYS_BASE       0xffff7fc0
+
+/**
+ * struct intel_msic_gpio_pdata - platform data for the MSIC GPIO driver
+ * @gpio_base: base number for the GPIOs
+ */
+struct intel_msic_gpio_pdata {
+       unsigned        gpio_base;
+};
+
+/**
+ * struct intel_msic_ocd_pdata - platform data for the MSIC OCD driver
+ * @gpio: GPIO number used for OCD interrupts
+ *
+ * The MSIC MFD driver converts @gpio into an IRQ number and passes it to
+ * the OCD driver as %IORESOURCE_IRQ.
+ */
+struct intel_msic_ocd_pdata {
+       unsigned        gpio;
+};
+
+/* MSIC embedded blocks (subdevices) */
+enum intel_msic_block {
+       INTEL_MSIC_BLOCK_TOUCH,
+       INTEL_MSIC_BLOCK_ADC,
+       INTEL_MSIC_BLOCK_BATTERY,
+       INTEL_MSIC_BLOCK_GPIO,
+       INTEL_MSIC_BLOCK_AUDIO,
+       INTEL_MSIC_BLOCK_HDMI,
+       INTEL_MSIC_BLOCK_THERMAL,
+       INTEL_MSIC_BLOCK_POWER_BTN,
+       INTEL_MSIC_BLOCK_OCD,
+
+       INTEL_MSIC_BLOCK_LAST,
+};
+
+/**
+ * struct intel_msic_platform_data - platform data for the MSIC driver
+ * @irq: array of interrupt numbers, one per device. If @irq is set to %0
+ *      for a given block, the corresponding platform device is not
+ *      created. For devices which don't have an interrupt, use %0xff
+ *      (this is same as in SFI spec).
+ * @gpio: platform data for the MSIC GPIO driver
+ * @ocd: platform data for the MSIC OCD driver
+ *
+ * Once the MSIC driver is initialized, the register interface is ready to
+ * use. All the platform devices for subdevices are created after the
+ * register interface is ready so that we can guarantee its availability to
+ * the subdevice drivers.
+ *
+ * Interrupt numbers are passed to the subdevices via %IORESOURCE_IRQ
+ * resources of the created platform device.
+ */
+struct intel_msic_platform_data {
+       int                             irq[INTEL_MSIC_BLOCK_LAST];
+       struct intel_msic_gpio_pdata    *gpio;
+       struct intel_msic_ocd_pdata     *ocd;
+};
+
+struct intel_msic;
+
+extern int intel_msic_reg_read(unsigned short reg, u8 *val);
+extern int intel_msic_reg_write(unsigned short reg, u8 val);
+extern int intel_msic_reg_update(unsigned short reg, u8 val, u8 mask);
+extern int intel_msic_bulk_read(unsigned short *reg, u8 *buf, size_t count);
+extern int intel_msic_bulk_write(unsigned short *reg, u8 *buf, size_t count);
+
+/*
+ * pdev_to_intel_msic - gets an MSIC instance from the platform device
+ * @pdev: platform device pointer
+ *
+ * The client drivers need to have pointer to the MSIC instance if they
+ * want to call intel_msic_irq_read(). This macro can be used for
+ * convenience to get the MSIC pointer from @pdev where needed. This is
+ * _only_ valid for devices which are managed by the MSIC.
+ */
+#define pdev_to_intel_msic(pdev)       (dev_get_drvdata(pdev->dev.parent))
+
+extern int intel_msic_irq_read(struct intel_msic *msic, unsigned short reg,
+                              u8 *val);
+
+#endif /* __LINUX_MFD_INTEL_MSIC_H__ */
index 5ff2400ad46cd4325b31027ff8b82554eb6bc592..3f4deb62d6b0d4c4c3475e074244860977fd259c 100644 (file)
@@ -326,7 +326,6 @@ struct max8997_dev {
        int irq;
        int ono;
        int irq_base;
-       bool wakeup;
        struct mutex irqlock;
        int irq_masks_cur[MAX8997_IRQ_GROUP_NR];
        int irq_masks_cache[MAX8997_IRQ_GROUP_NR];
index 7d0f3d6a0002c86d74b300215236f15f487c8c19..a8eeda773a7b1349d4f506b151d8a7d735b795bd 100644 (file)
 
 #include <linux/mfd/mc13xxx.h>
 
-struct mc13783;
-
-struct mc13xxx *mc13783_to_mc13xxx(struct mc13783 *mc13783);
-
-static inline void mc13783_lock(struct mc13783 *mc13783)
-{
-       mc13xxx_lock(mc13783_to_mc13xxx(mc13783));
-}
-
-static inline void mc13783_unlock(struct mc13783 *mc13783)
-{
-       mc13xxx_unlock(mc13783_to_mc13xxx(mc13783));
-}
-
-static inline int mc13783_reg_read(struct mc13783 *mc13783,
-               unsigned int offset, u32 *val)
-{
-       return mc13xxx_reg_read(mc13783_to_mc13xxx(mc13783), offset, val);
-}
-
-static inline int mc13783_reg_write(struct mc13783 *mc13783,
-               unsigned int offset, u32 val)
-{
-       return mc13xxx_reg_write(mc13783_to_mc13xxx(mc13783), offset, val);
-}
-
-static inline int mc13783_reg_rmw(struct mc13783 *mc13783,
-               unsigned int offset, u32 mask, u32 val)
-{
-       return mc13xxx_reg_rmw(mc13783_to_mc13xxx(mc13783), offset, mask, val);
-}
-
-static inline int mc13783_get_flags(struct mc13783 *mc13783)
-{
-       return mc13xxx_get_flags(mc13783_to_mc13xxx(mc13783));
-}
-
-static inline int mc13783_irq_request(struct mc13783 *mc13783, int irq,
-               irq_handler_t handler, const char *name, void *dev)
-{
-       return mc13xxx_irq_request(mc13783_to_mc13xxx(mc13783), irq,
-                       handler, name, dev);
-}
-
-static inline int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
-               irq_handler_t handler, const char *name, void *dev)
-{
-       return mc13xxx_irq_request_nounmask(mc13783_to_mc13xxx(mc13783), irq,
-                       handler, name, dev);
-}
-
-static inline int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev)
-{
-       return mc13xxx_irq_free(mc13783_to_mc13xxx(mc13783), irq, dev);
-}
-
-static inline int mc13783_irq_mask(struct mc13783 *mc13783, int irq)
-{
-       return mc13xxx_irq_mask(mc13783_to_mc13xxx(mc13783), irq);
-}
-
-static inline int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
-{
-       return mc13xxx_irq_unmask(mc13783_to_mc13xxx(mc13783), irq);
-}
-static inline int mc13783_irq_status(struct mc13783 *mc13783, int irq,
-               int *enabled, int *pending)
-{
-       return mc13xxx_irq_status(mc13783_to_mc13xxx(mc13783),
-                       irq, enabled, pending);
-}
-
-static inline int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
-{
-       return mc13xxx_irq_ack(mc13783_to_mc13xxx(mc13783), irq);
-}
-
-#define MC13783_ADC0           43
-#define MC13783_ADC0_ADREFEN           (1 << 10)
-#define MC13783_ADC0_ADREFMODE         (1 << 11)
-#define MC13783_ADC0_TSMOD0            (1 << 12)
-#define MC13783_ADC0_TSMOD1            (1 << 13)
-#define MC13783_ADC0_TSMOD2            (1 << 14)
-#define MC13783_ADC0_ADINC1            (1 << 16)
-#define MC13783_ADC0_ADINC2            (1 << 17)
-
-#define MC13783_ADC0_TSMOD_MASK                (MC13783_ADC0_TSMOD0 | \
-                                       MC13783_ADC0_TSMOD1 | \
-                                       MC13783_ADC0_TSMOD2)
-
-#define mc13783_regulator_init_data mc13xxx_regulator_init_data
-#define mc13783_regulator_platform_data mc13xxx_regulator_platform_data
-#define mc13783_led_platform_data mc13xxx_led_platform_data
-#define mc13783_leds_platform_data mc13xxx_leds_platform_data
-
-#define mc13783_platform_data mc13xxx_platform_data
-#define MC13783_USE_TOUCHSCREEN        MC13XXX_USE_TOUCHSCREEN
-#define MC13783_USE_CODEC      MC13XXX_USE_CODEC
-#define MC13783_USE_ADC                MC13XXX_USE_ADC
-#define MC13783_USE_RTC                MC13XXX_USE_RTC
-#define MC13783_USE_REGULATOR  MC13XXX_USE_REGULATOR
-#define MC13783_USE_LED                MC13XXX_USE_LED
-
-#define MC13783_ADC_MODE_TS            1
-#define MC13783_ADC_MODE_SINGLE_CHAN   2
-#define MC13783_ADC_MODE_MULT_CHAN     3
-
-int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
-               unsigned int channel, unsigned int *sample);
-
-
 #define        MC13783_REG_SW1A                0
 #define        MC13783_REG_SW1B                1
 #define        MC13783_REG_SW2A                2
index c064beaaccb7f482bd969c226ef2aae0de9e8612..3816c2fac0ad66d48b42669063c4efcf2264e700 100644 (file)
@@ -37,6 +37,9 @@ int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq);
 
 int mc13xxx_get_flags(struct mc13xxx *mc13xxx);
 
+int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
+               unsigned int mode, unsigned int channel, unsigned int *sample);
+
 #define MC13XXX_IRQ_ADCDONE    0
 #define MC13XXX_IRQ_ADCBISDONE 1
 #define MC13XXX_IRQ_TS         2
@@ -137,17 +140,48 @@ struct mc13xxx_leds_platform_data {
        char tc3_period;
 };
 
+struct mc13xxx_buttons_platform_data {
+#define MC13783_BUTTON_DBNC_0MS                0
+#define MC13783_BUTTON_DBNC_30MS       1
+#define MC13783_BUTTON_DBNC_150MS      2
+#define MC13783_BUTTON_DBNC_750MS      3
+#define MC13783_BUTTON_ENABLE          (1 << 2)
+#define MC13783_BUTTON_POL_INVERT      (1 << 3)
+#define MC13783_BUTTON_RESET_EN                (1 << 4)
+       int b1on_flags;
+       unsigned short b1on_key;
+       int b2on_flags;
+       unsigned short b2on_key;
+       int b3on_flags;
+       unsigned short b3on_key;
+};
+
 struct mc13xxx_platform_data {
 #define MC13XXX_USE_TOUCHSCREEN (1 << 0)
 #define MC13XXX_USE_CODEC      (1 << 1)
 #define MC13XXX_USE_ADC                (1 << 2)
 #define MC13XXX_USE_RTC                (1 << 3)
-#define MC13XXX_USE_REGULATOR  (1 << 4)
-#define MC13XXX_USE_LED                (1 << 5)
        unsigned int flags;
 
        struct mc13xxx_regulator_platform_data regulators;
        struct mc13xxx_leds_platform_data *leds;
+       struct mc13xxx_buttons_platform_data *buttons;
 };
 
+#define MC13XXX_ADC_MODE_TS            1
+#define MC13XXX_ADC_MODE_SINGLE_CHAN   2
+#define MC13XXX_ADC_MODE_MULT_CHAN     3
+
+#define MC13XXX_ADC0           43
+#define MC13XXX_ADC0_ADREFEN           (1 << 10)
+#define MC13XXX_ADC0_TSMOD0            (1 << 12)
+#define MC13XXX_ADC0_TSMOD1            (1 << 13)
+#define MC13XXX_ADC0_TSMOD2            (1 << 14)
+#define MC13XXX_ADC0_ADINC1            (1 << 16)
+#define MC13XXX_ADC0_ADINC2            (1 << 17)
+
+#define MC13XXX_ADC0_TSMOD_MASK                (MC13XXX_ADC0_TSMOD0 | \
+                                       MC13XXX_ADC0_TSMOD1 | \
+                                       MC13XXX_ADC0_TSMOD2)
+
 #endif /* ifndef __LINUX_MFD_MC13XXX_H */
index 50d4a047118db3e59792d1dbfdcb405db0e25758..a80840752b4cbea3481473532499b0b65513057a 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/mfd/pcf50633/backlight.h>
 
 struct pcf50633;
+struct regmap;
 
 #define PCF50633_NUM_REGULATORS        11
 
@@ -134,7 +135,7 @@ enum {
 
 struct pcf50633 {
        struct device *dev;
-       struct i2c_client *i2c_client;
+       struct regmap *regmap;
 
        struct pcf50633_platform_data *pdata;
        int irq;
index ed8fe0d04097277e9569f78617719c64cde5ed5b..4b1211859f74a9dad29a555c758b868e29a392d1 100644 (file)
@@ -382,6 +382,7 @@ struct wm831x {
 
        /* Used by the interrupt controller code to post writes */
        int gpio_update[WM831X_NUM_GPIO_REGS];
+       bool gpio_level[WM831X_NUM_GPIO_REGS];
 
        struct mutex auxadc_lock;
        struct list_head auxadc_pending;
index 62680914762434e2652a085e35de603d3e1855ac..f44bdb7273bd819b5c30b4dba7d808a4ad46e015 100644 (file)
@@ -59,6 +59,8 @@ struct wm8994 {
        struct device *dev;
        struct regmap *regmap;
 
+       bool ldo_ena_always_driven;
+
        int gpio_base;
        int irq_base;
 
index 97cf4f27d6470120a389bb8a2e1217871b8e3411..ea32f306dca6963c312fe4f4b4e3968233f68393 100644 (file)
@@ -167,6 +167,13 @@ struct wm8994_pdata {
 
        /* WM8958 microphone bias configuration */
        int micbias[2];
+
+       /* Disable the internal pull downs on the LDOs if they are
+        * always driven (eg, connected to an always on supply or
+        * GPIO that always drives an output.  If they float power
+        * consumption will rise.
+        */
+       bool ldo_ena_always_driven;
 };
 
 #endif
index 3b3e3b8bb70652fa1d1669a9520b4b5969d6c935..3dc3a8c2c4858a1d3400aa2d5fd029d36a1177c6 100644 (file)
@@ -356,36 +356,50 @@ static inline struct page *compound_head(struct page *page)
        return page;
 }
 
+/*
+ * The atomic page->_mapcount, starts from -1: so that transitions
+ * both from it and to it can be tracked, using atomic_inc_and_test
+ * and atomic_add_negative(-1).
+ */
+static inline void reset_page_mapcount(struct page *page)
+{
+       atomic_set(&(page)->_mapcount, -1);
+}
+
+static inline int page_mapcount(struct page *page)
+{
+       return atomic_read(&(page)->_mapcount) + 1;
+}
+
 static inline int page_count(struct page *page)
 {
        return atomic_read(&compound_head(page)->_count);
 }
 
+static inline void get_huge_page_tail(struct page *page)
+{
+       /*
+        * __split_huge_page_refcount() cannot run
+        * from under us.
+        */
+       VM_BUG_ON(page_mapcount(page) < 0);
+       VM_BUG_ON(atomic_read(&page->_count) != 0);
+       atomic_inc(&page->_mapcount);
+}
+
+extern bool __get_page_tail(struct page *page);
+
 static inline void get_page(struct page *page)
 {
+       if (unlikely(PageTail(page)))
+               if (likely(__get_page_tail(page)))
+                       return;
        /*
         * Getting a normal page or the head of a compound page
-        * requires to already have an elevated page->_count. Only if
-        * we're getting a tail page, the elevated page->_count is
-        * required only in the head page, so for tail pages the
-        * bugcheck only verifies that the page->_count isn't
-        * negative.
+        * requires to already have an elevated page->_count.
         */
-       VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
+       VM_BUG_ON(atomic_read(&page->_count) <= 0);
        atomic_inc(&page->_count);
-       /*
-        * Getting a tail page will elevate both the head and tail
-        * page->_count(s).
-        */
-       if (unlikely(PageTail(page))) {
-               /*
-                * This is safe only because
-                * __split_huge_page_refcount can't run under
-                * get_page().
-                */
-               VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
-               atomic_inc(&page->first_page->_count);
-       }
 }
 
 static inline struct page *virt_to_head_page(const void *x)
@@ -803,21 +817,6 @@ static inline pgoff_t page_index(struct page *page)
        return page->index;
 }
 
-/*
- * The atomic page->_mapcount, like _count, starts from -1:
- * so that transitions both from it and to it can be tracked,
- * using atomic_inc_and_test and atomic_add_negative(-1).
- */
-static inline void reset_page_mapcount(struct page *page)
-{
-       atomic_set(&(page)->_mapcount, -1);
-}
-
-static inline int page_mapcount(struct page *page)
-{
-       return atomic_read(&(page)->_mapcount) + 1;
-}
-
 /*
  * Return true if this page is mapped into pagetables.
  */
index 3e01a19a91e8025f6f6f7499369071af53aef947..5b42f1b34eb74b59964f9a717d436cad44481c05 100644 (file)
@@ -62,10 +62,23 @@ struct page {
                        struct {
 
                                union {
-                                       atomic_t _mapcount;     /* Count of ptes mapped in mms,
-                                                        * to show when page is mapped
-                                                        * & limit reverse map searches.
-                                                        */
+                                       /*
+                                        * Count of ptes mapped in
+                                        * mms, to show when page is
+                                        * mapped & limit reverse map
+                                        * searches.
+                                        *
+                                        * Used also for tail pages
+                                        * refcounting instead of
+                                        * _count. Tail pages cannot
+                                        * be mapped and keeping the
+                                        * tail page _count zero at
+                                        * all times guarantees
+                                        * get_page_unless_zero() will
+                                        * never succeed on tail
+                                        * pages.
+                                        */
+                                       atomic_t _mapcount;
 
                                        struct {
                                                unsigned inuse:16;
index 409328d1cbbb31d5173fc197154ca48c00b0492d..ffc02135c483c2c6363eed8dc0a4b15b2def6656 100644 (file)
@@ -67,6 +67,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_EMPTY           0x4000
 
 extern int user_path_at(int, const char __user *, unsigned, struct path *);
+extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
 
 #define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
 #define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path)
index f9930c87fff3a7beb35f558a788909ee1f27c5df..c3b45480ecf75e79c41b34e078732221204b0eec 100644 (file)
@@ -12,3 +12,4 @@ header-y += ipt_ah.h
 header-y += ipt_ecn.h
 header-y += ipt_realm.h
 header-y += ipt_ttl.h
+header-y += nf_nat.h
diff --git a/include/linux/netfilter_ipv4/nf_nat.h b/include/linux/netfilter_ipv4/nf_nat.h
new file mode 100644 (file)
index 0000000..7a861d0
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef _LINUX_NF_NAT_H
+#define _LINUX_NF_NAT_H
+
+#include <linux/types.h>
+
+#define IP_NAT_RANGE_MAP_IPS 1
+#define IP_NAT_RANGE_PROTO_SPECIFIED 2
+#define IP_NAT_RANGE_PROTO_RANDOM 4
+#define IP_NAT_RANGE_PERSISTENT 8
+
+/* The protocol-specific manipulable parts of the tuple. */
+union nf_conntrack_man_proto {
+       /* Add other protocols here. */
+       __be16 all;
+
+       struct {
+               __be16 port;
+       } tcp;
+       struct {
+               __be16 port;
+       } udp;
+       struct {
+               __be16 id;
+       } icmp;
+       struct {
+               __be16 port;
+       } dccp;
+       struct {
+               __be16 port;
+       } sctp;
+       struct {
+               __be16 key;     /* GRE key is 32bit, PPtP only uses 16bit */
+       } gre;
+};
+
+/* Single range specification. */
+struct nf_nat_range {
+       /* Set to OR of flags above. */
+       unsigned int flags;
+
+       /* Inclusive: network order. */
+       __be32 min_ip, max_ip;
+
+       /* Inclusive: network order */
+       union nf_conntrack_man_proto min, max;
+};
+
+/* For backwards compat: don't use in modern code. */
+struct nf_nat_multi_range_compat {
+       unsigned int rangesize; /* Must be 1. */
+
+       /* hangs off end. */
+       struct nf_nat_range range[1];
+};
+
+#define nf_nat_multi_range nf_nat_multi_range_compat
+
+#endif
diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h
new file mode 100644 (file)
index 0000000..0035abe
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * pps-gpio.h -- PPS client for GPIOs
+ *
+ *
+ * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PPS_GPIO_H
+#define _PPS_GPIO_H
+
+struct pps_gpio_platform_data {
+       bool assert_falling_edge;
+       bool capture_clear;
+       unsigned int gpio_pin;
+       const char *gpio_label;
+};
+
+#endif
index 0cee0152aca9a17c07658612f85ceec9f9986101..b66d13d1bdc04199a78665022f676e238960fa74 100644 (file)
@@ -39,5 +39,6 @@
 #define RIO_DID_IDTCPS1616             0x0379
 #define RIO_DID_IDTVPS1616             0x0377
 #define RIO_DID_IDTSPS1616             0x0378
+#define RIO_DID_TSI721                 0x80ab
 
 #endif                         /* LINUX_RIO_IDS_H */
index 1feb2de2ee57b430c7980ff6c2469548188c19a3..10d6b226afc5aebdda9f3a65c72550e1af54d3c5 100644 (file)
@@ -83,13 +83,6 @@ struct  seminfo {
 
 struct task_struct;
 
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
-       int     semval;         /* current value */
-       int     sempid;         /* pid of last operation */
-       struct list_head sem_pending; /* pending single-sop operations */
-};
-
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
        struct kern_ipc_perm    ____cacheline_aligned_in_smp
@@ -103,51 +96,21 @@ struct sem_array {
        int                     complex_count;  /* pending complex operations */
 };
 
-/* One queue for each sleeping process in the system. */
-struct sem_queue {
-       struct list_head        simple_list; /* queue of pending operations */
-       struct list_head        list;    /* queue of pending operations */
-       struct task_struct      *sleeper; /* this process */
-       struct sem_undo         *undo;   /* undo structure */
-       int                     pid;     /* process id of requesting process */
-       int                     status;  /* completion status of operation */
-       struct sembuf           *sops;   /* array of pending operations */
-       int                     nsops;   /* number of operations */
-       int                     alter;   /* does the operation alter the array? */
-};
-
-/* Each task has a list of undo requests. They are executed automatically
- * when the process exits.
- */
-struct sem_undo {
-       struct list_head        list_proc;      /* per-process list: all undos from one process. */
-                                               /* rcu protected */
-       struct rcu_head         rcu;            /* rcu struct for sem_undo() */
-       struct sem_undo_list    *ulp;           /* sem_undo_list for the process */
-       struct list_head        list_id;        /* per semaphore array list: all undos for one array */
-       int                     semid;          /* semaphore set identifier */
-       short *                 semadj;         /* array of adjustments, one per semaphore */
-};
-
-/* sem_undo_list controls shared access to the list of sem_undo structures
- * that may be shared among all a CLONE_SYSVSEM task group.
- */ 
-struct sem_undo_list {
-       atomic_t                refcnt;
-       spinlock_t              lock;
-       struct list_head        list_proc;
-};
+#ifdef CONFIG_SYSVIPC
 
 struct sysv_sem {
        struct sem_undo_list *undo_list;
 };
 
-#ifdef CONFIG_SYSVIPC
-
 extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
 extern void exit_sem(struct task_struct *tsk);
 
 #else
+
+struct sysv_sem {
+       /* empty */
+};
+
 static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
 {
        return 0;
index 30cae70874f4a1f10dd564aee7f65858091a10c3..bc8c9208f7e21c455fcc4463601ec217de6ec4da 100644 (file)
@@ -61,6 +61,14 @@ struct pinmux_data_reg {
        .reg = r, .reg_width = r_width, \
        .enum_ids = (pinmux_enum_t [r_width]) \
 
+struct pinmux_irq {
+       int irq;
+       pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_IRQ(irq_nr, ids...)                        \
+       { .irq = irq_nr, .enum_ids = (pinmux_enum_t []) { ids, 0 } }    \
+
 struct pinmux_range {
        pinmux_enum_t begin;
        pinmux_enum_t end;
@@ -87,7 +95,9 @@ struct pinmux_info {
        pinmux_enum_t *gpio_data;
        unsigned int gpio_data_size;
 
-       unsigned long *gpio_in_use;
+       struct pinmux_irq *gpio_irq;
+       unsigned int gpio_irq_size;
+
        struct gpio_chip chip;
 };
 
index 6a6b352326d7002197158f7ade024fffec3b07ae..fe864885c1edd5edbebc0504544e4950ebf2962f 100644 (file)
@@ -1806,12 +1806,12 @@ static inline void skb_frag_set_page(struct sk_buff *skb, int f,
 
 /**
  * skb_frag_dma_map - maps a paged fragment via the DMA API
- * @device: the device to map the fragment to
+ * @dev: the device to map the fragment to
  * @frag: the paged fragment to map
  * @offset: the offset within the fragment (starting at the
  *          fragment's own offset)
  * @size: the number of bytes to map
- * @direction: the direction of the mapping (%PCI_DMA_*)
+ * @dir: the direction of the mapping (%PCI_DMA_*)
  *
  * Maps the page associated with @frag to @device.
  */
index 9a1ec10fd504a087961592074ef455a7456c43c1..703cfa33a3ca60c79111ba3808f228c8f1f7b505 100644 (file)
@@ -931,6 +931,7 @@ enum
 #ifdef __KERNEL__
 #include <linux/list.h>
 #include <linux/rcupdate.h>
+#include <linux/wait.h>
 
 /* For the /proc/sys support */
 struct ctl_table;
@@ -1011,6 +1012,26 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
  * cover common cases.
  */
 
+/* Support for userspace poll() to watch for changes */
+struct ctl_table_poll {
+       atomic_t event;
+       wait_queue_head_t wait;
+};
+
+static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
+{
+       return (void *)(unsigned long)atomic_read(&poll->event);
+}
+
+void proc_sys_poll_notify(struct ctl_table_poll *poll);
+
+#define __CTL_TABLE_POLL_INITIALIZER(name) {                           \
+       .event = ATOMIC_INIT(0),                                        \
+       .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
+
+#define DEFINE_CTL_TABLE_POLL(name)                                    \
+       struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
+
 /* A sysctl table is an array of struct ctl_table: */
 struct ctl_table 
 {
@@ -1021,6 +1042,7 @@ struct ctl_table
        struct ctl_table *child;
        struct ctl_table *parent;       /* Automatically set */
        proc_handler *proc_handler;     /* Callback for text formatting */
+       struct ctl_table_poll *poll;
        void *extra1;
        void *extra2;
 };
index 4e5b0213fdc16adc61dbb4d46ee5e98030c9a1cc..c714ed75eae25c523e60c1cc6df1b249d3659134 100644 (file)
@@ -37,6 +37,14 @@ struct new_utsname {
 #include <linux/nsproxy.h>
 #include <linux/err.h>
 
+enum uts_proc {
+       UTS_PROC_OSTYPE,
+       UTS_PROC_OSRELEASE,
+       UTS_PROC_VERSION,
+       UTS_PROC_HOSTNAME,
+       UTS_PROC_DOMAINNAME,
+};
+
 struct user_namespace;
 extern struct user_namespace init_user_ns;
 
@@ -80,6 +88,14 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags,
 }
 #endif
 
+#ifdef CONFIG_PROC_SYSCTL
+extern void uts_proc_notify(enum uts_proc proc);
+#else
+static inline void uts_proc_notify(enum uts_proc proc)
+{
+}
+#endif
+
 static inline struct new_utsname *utsname(void)
 {
        return &current->nsproxy->uts_ns->name;
index 851ebf1a4476f557c28d605e52dab0cf8f27546a..4c069d8bd74038568acc37992c4eb111188f261b 100644 (file)
@@ -131,10 +131,10 @@ void unregister_virtio_device(struct virtio_device *dev);
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
  * @id_table: the ids serviced by this driver.
- * @feature_table: an array of feature numbers supported by this device.
+ * @feature_table: an array of feature numbers supported by this driver.
  * @feature_table_size: number of entries in the feature table array.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function when a device is removed.
+ * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
  */
index 39c88c5ad19dfd85c5fcbed38837d514c477679f..add4790b21fe4ee18635db9456a43db71bb39997 100644 (file)
@@ -155,6 +155,9 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 #define virtio_config_val(vdev, fbit, offset, v) \
        virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(*v))
 
+#define virtio_config_val_len(vdev, fbit, offset, v, len) \
+       virtio_config_buf((vdev), (fbit), (offset), (v), (len))
+
 static inline int virtio_config_buf(struct virtio_device *vdev,
                                    unsigned int fbit,
                                    unsigned int offset,
diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h
new file mode 100644 (file)
index 0000000..27c7ede
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Virtio platform device driver
+ *
+ * Copyright 2011, ARM Ltd.
+ *
+ * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_VIRTIO_MMIO_H
+#define _LINUX_VIRTIO_MMIO_H
+
+/*
+ * Control registers
+ */
+
+/* Magic value ("virt" string) - Read Only */
+#define VIRTIO_MMIO_MAGIC_VALUE                0x000
+
+/* Virtio device version - Read Only */
+#define VIRTIO_MMIO_VERSION            0x004
+
+/* Virtio device ID - Read Only */
+#define VIRTIO_MMIO_DEVICE_ID          0x008
+
+/* Virtio vendor ID - Read Only */
+#define VIRTIO_MMIO_VENDOR_ID          0x00c
+
+/* Bitmask of the features supported by the host
+ * (32 bits per set) - Read Only */
+#define VIRTIO_MMIO_HOST_FEATURES      0x010
+
+/* Host features set selector - Write Only */
+#define VIRTIO_MMIO_HOST_FEATURES_SEL  0x014
+
+/* Bitmask of features activated by the guest
+ * (32 bits per set) - Write Only */
+#define VIRTIO_MMIO_GUEST_FEATURES     0x020
+
+/* Activated features set selector - Write Only */
+#define VIRTIO_MMIO_GUEST_FEATURES_SET 0x024
+
+/* Guest's memory page size in bytes - Write Only */
+#define VIRTIO_MMIO_GUEST_PAGE_SIZE    0x028
+
+/* Queue selector - Write Only */
+#define VIRTIO_MMIO_QUEUE_SEL          0x030
+
+/* Maximum size of the currently selected queue - Read Only */
+#define VIRTIO_MMIO_QUEUE_NUM_MAX      0x034
+
+/* Queue size for the currently selected queue - Write Only */
+#define VIRTIO_MMIO_QUEUE_NUM          0x038
+
+/* Used Ring alignment for the currently selected queue - Write Only */
+#define VIRTIO_MMIO_QUEUE_ALIGN                0x03c
+
+/* Guest's PFN for the currently selected queue - Read Write */
+#define VIRTIO_MMIO_QUEUE_PFN          0x040
+
+/* Queue notifier - Write Only */
+#define VIRTIO_MMIO_QUEUE_NOTIFY       0x050
+
+/* Interrupt status - Read Only */
+#define VIRTIO_MMIO_INTERRUPT_STATUS   0x060
+
+/* Interrupt acknowledge - Write Only */
+#define VIRTIO_MMIO_INTERRUPT_ACK      0x064
+
+/* Device status register - Read Write */
+#define VIRTIO_MMIO_STATUS             0x070
+
+/* The config space is defined by each driver as
+ * the per-driver configuration space - Read Write */
+#define VIRTIO_MMIO_CONFIG             0x100
+
+
+
+/*
+ * Interrupt flags (re: interrupt status & acknowledge registers)
+ */
+
+#define VIRTIO_MMIO_INT_VRING          (1 << 0)
+#define VIRTIO_MMIO_INT_CONFIG         (1 << 1)
+
+#endif
index 4a32cb6da425abe92c9eaa019121f4710171c77a..36be0f6e18a9cf9608c4288cb3343f618e329c87 100644 (file)
@@ -135,13 +135,13 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
        vr->num = num;
        vr->desc = p;
        vr->avail = p + num*sizeof(struct vring_desc);
-       vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1)
-                           & ~(align - 1));
+       vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__u16)
+               + align-1) & ~(align - 1));
 }
 
 static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
-       return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
+       return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
                 + align - 1) & ~(align - 1))
                + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
 }
index 8fa4430f99c14e025ad6a189a1116f72ae831317..05b08c926aded3927f5532a63f6d5a78ee058f2c 100644 (file)
@@ -425,9 +425,9 @@ struct ip_vs_protocol {
 
        const char *(*state_name)(int state);
 
-       int (*state_transition)(struct ip_vs_conn *cp, int direction,
-                               const struct sk_buff *skb,
-                               struct ip_vs_proto_data *pd);
+       void (*state_transition)(struct ip_vs_conn *cp, int direction,
+                                const struct sk_buff *skb,
+                                struct ip_vs_proto_data *pd);
 
        int (*register_app)(struct net *net, struct ip_vs_app *inc);
 
@@ -1378,7 +1378,7 @@ static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 
 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
                                   int outin);
-extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp);
+extern int ip_vs_confirm_conntrack(struct sk_buff *skb);
 extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
                                      struct ip_vs_conn *cp, u_int8_t proto,
                                      const __be16 port, int from_rs);
@@ -1396,8 +1396,7 @@ static inline void ip_vs_update_conntrack(struct sk_buff *skb,
 {
 }
 
-static inline int ip_vs_confirm_conntrack(struct sk_buff *skb,
-                                         struct ip_vs_conn *cp)
+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
 {
        return NF_ACCEPT;
 }
index 7ca6bdd5bae64182056b20b0d782b9bb501dc63f..2f8fb77bfdd1f7d9583e42ddbee7cf0240228e17 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <linux/netfilter_ipv4/nf_nat.h>
 #include <linux/list_nulls.h>
 
 /* A `tuple' is a structure containing the information to uniquely
 
 #define NF_CT_TUPLE_L3SIZE     ARRAY_SIZE(((union nf_inet_addr *)NULL)->all)
 
-/* The protocol-specific manipulable parts of the tuple: always in
-   network order! */
-union nf_conntrack_man_proto {
-       /* Add other protocols here. */
-       __be16 all;
-
-       struct {
-               __be16 port;
-       } tcp;
-       struct {
-               __be16 port;
-       } udp;
-       struct {
-               __be16 id;
-       } icmp;
-       struct {
-               __be16 port;
-       } dccp;
-       struct {
-               __be16 port;
-       } sctp;
-       struct {
-               __be16 key;     /* GRE key is 32bit, PPtP only uses 16bit */
-       } gre;
-};
-
 /* The manipulable part of the tuple. */
 struct nf_conntrack_man {
        union nf_inet_addr u3;
index 0346b0070864c34c3bf4eaa89ca16a708ad72b88..b8872df7285f064e95679c64da141356f84de590 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
 #define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16
@@ -14,11 +15,6 @@ enum nf_nat_manip_type {
 #define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \
                             (hooknum) != NF_INET_LOCAL_IN)
 
-#define IP_NAT_RANGE_MAP_IPS 1
-#define IP_NAT_RANGE_PROTO_SPECIFIED 2
-#define IP_NAT_RANGE_PROTO_RANDOM 4
-#define IP_NAT_RANGE_PERSISTENT 8
-
 /* NAT sequence number modifications */
 struct nf_nat_seq {
        /* position of the last TCP sequence number modification (if any) */
@@ -28,26 +24,6 @@ struct nf_nat_seq {
        int16_t offset_before, offset_after;
 };
 
-/* Single range specification. */
-struct nf_nat_range {
-       /* Set to OR of flags above. */
-       unsigned int flags;
-
-       /* Inclusive: network order. */
-       __be32 min_ip, max_ip;
-
-       /* Inclusive: network order */
-       union nf_conntrack_man_proto min, max;
-};
-
-/* For backwards compat: don't use in modern code. */
-struct nf_nat_multi_range_compat {
-       unsigned int rangesize; /* Must be 1. */
-
-       /* hangs off end. */
-       struct nf_nat_range range[1];
-};
-
 #include <linux/list.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 #include <net/netfilter/nf_conntrack_extend.h>
index e147f42d643d16caad50e971e57e7b879a0bffbb..bb18c4d69aba1da89488af60999f03341648ee61 100644 (file)
@@ -1403,11 +1403,13 @@ enum tcp_seq_states {
        TCP_SEQ_STATE_TIME_WAIT,
 };
 
+int tcp_seq_open(struct inode *inode, struct file *file);
+
 struct tcp_seq_afinfo {
-       char                    *name;
-       sa_family_t             family;
-       struct file_operations  seq_fops;
-       struct seq_operations   seq_ops;
+       char                            *name;
+       sa_family_t                     family;
+       const struct file_operations    *seq_fops;
+       struct seq_operations           seq_ops;
 };
 
 struct tcp_iter_state {
index 67ea6fcb3ec063165bf0e4fe69a33834b18a8e9f..3b285f402f480e76aa9b69136447df9f6d66bb65 100644 (file)
@@ -230,12 +230,14 @@ extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *sadd
 #endif
 
 /* /proc */
+int udp_seq_open(struct inode *inode, struct file *file);
+
 struct udp_seq_afinfo {
-       char                    *name;
-       sa_family_t             family;
-       struct udp_table        *udp_table;
-       struct file_operations  seq_fops;
-       struct seq_operations   seq_ops;
+       char                            *name;
+       sa_family_t                     family;
+       struct udp_table                *udp_table;
+       const struct file_operations    *seq_fops;
+       struct seq_operations           seq_ops;
 };
 
 struct udp_iter_state {
index b50a54736242ca21d19a814ab37b27783216397a..748ff7cbe5557989e6fd40fcacb70e5b8760956f 100644 (file)
@@ -9,9 +9,12 @@
 
 struct ext4_allocation_context;
 struct ext4_allocation_request;
+struct ext4_extent;
 struct ext4_prealloc_space;
 struct ext4_inode_info;
 struct mpage_da_data;
+struct ext4_map_blocks;
+struct ext4_extent;
 
 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
@@ -1032,9 +1035,9 @@ TRACE_EVENT(ext4_forget,
 );
 
 TRACE_EVENT(ext4_da_update_reserve_space,
-       TP_PROTO(struct inode *inode, int used_blocks),
+       TP_PROTO(struct inode *inode, int used_blocks, int quota_claim),
 
-       TP_ARGS(inode, used_blocks),
+       TP_ARGS(inode, used_blocks, quota_claim),
 
        TP_STRUCT__entry(
                __field(        dev_t,  dev                     )
@@ -1045,6 +1048,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
                __field(        int,    reserved_data_blocks    )
                __field(        int,    reserved_meta_blocks    )
                __field(        int,    allocated_meta_blocks   )
+               __field(        int,    quota_claim             )
        ),
 
        TP_fast_assign(
@@ -1053,19 +1057,24 @@ TRACE_EVENT(ext4_da_update_reserve_space,
                __entry->mode   = inode->i_mode;
                __entry->i_blocks = inode->i_blocks;
                __entry->used_blocks = used_blocks;
-               __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-               __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
-               __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
+               __entry->reserved_data_blocks =
+                               EXT4_I(inode)->i_reserved_data_blocks;
+               __entry->reserved_meta_blocks =
+                               EXT4_I(inode)->i_reserved_meta_blocks;
+               __entry->allocated_meta_blocks =
+                               EXT4_I(inode)->i_allocated_meta_blocks;
+               __entry->quota_claim = quota_claim;
        ),
 
        TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
                  "reserved_data_blocks %d reserved_meta_blocks %d "
-                 "allocated_meta_blocks %d",
+                 "allocated_meta_blocks %d quota_claim %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->mode, __entry->i_blocks,
                  __entry->used_blocks, __entry->reserved_data_blocks,
-                 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+                 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks,
+                 __entry->quota_claim)
 );
 
 TRACE_EVENT(ext4_da_reserve_space,
@@ -1386,6 +1395,87 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
        TP_ARGS(inode)
 );
 
+/* 'ux' is the uninitialized extent. */
+TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
+       TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+                struct ext4_extent *ux),
+
+       TP_ARGS(inode, map, ux),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    m_lblk  )
+               __field(        unsigned,       m_len   )
+               __field(        ext4_lblk_t,    u_lblk  )
+               __field(        unsigned,       u_len   )
+               __field(        ext4_fsblk_t,   u_pblk  )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->m_lblk         = map->m_lblk;
+               __entry->m_len          = map->m_len;
+               __entry->u_lblk         = le32_to_cpu(ux->ee_block);
+               __entry->u_len          = ext4_ext_get_actual_len(ux);
+               __entry->u_pblk         = ext4_ext_pblock(ux);
+       ),
+
+       TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u "
+                 "u_pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->m_lblk, __entry->m_len,
+                 __entry->u_lblk, __entry->u_len, __entry->u_pblk)
+);
+
+/*
+ * 'ux' is the uninitialized extent.
+ * 'ix' is the initialized extent to which blocks are transferred.
+ */
+TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
+       TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+                struct ext4_extent *ux, struct ext4_extent *ix),
+
+       TP_ARGS(inode, map, ux, ix),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    m_lblk  )
+               __field(        unsigned,       m_len   )
+               __field(        ext4_lblk_t,    u_lblk  )
+               __field(        unsigned,       u_len   )
+               __field(        ext4_fsblk_t,   u_pblk  )
+               __field(        ext4_lblk_t,    i_lblk  )
+               __field(        unsigned,       i_len   )
+               __field(        ext4_fsblk_t,   i_pblk  )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->m_lblk         = map->m_lblk;
+               __entry->m_len          = map->m_len;
+               __entry->u_lblk         = le32_to_cpu(ux->ee_block);
+               __entry->u_len          = ext4_ext_get_actual_len(ux);
+               __entry->u_pblk         = ext4_ext_pblock(ux);
+               __entry->i_lblk         = le32_to_cpu(ix->ee_block);
+               __entry->i_len          = ext4_ext_get_actual_len(ix);
+               __entry->i_pblk         = ext4_ext_pblock(ix);
+       ),
+
+       TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u "
+                 "u_lblk %u u_len %u u_pblk %llu "
+                 "i_lblk %u i_len %u i_pblk %llu ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->m_lblk, __entry->m_len,
+                 __entry->u_lblk, __entry->u_len, __entry->u_pblk,
+                 __entry->i_lblk, __entry->i_len, __entry->i_pblk)
+);
+
 DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
        TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
                 unsigned int len, unsigned int flags),
@@ -1589,6 +1679,382 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free,
        TP_ARGS(sb, group, start, len)
 );
 
+TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
+       TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+                unsigned int allocated, ext4_fsblk_t newblock),
+
+       TP_ARGS(inode, map, allocated, newblock),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        ext4_lblk_t,    lblk            )
+               __field(        ext4_fsblk_t,   pblk            )
+               __field(        unsigned int,   len             )
+               __field(        int,            flags           )
+               __field(        unsigned int,   allocated       )
+               __field(        ext4_fsblk_t,   newblk          )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->lblk           = map->m_lblk;
+               __entry->pblk           = map->m_pblk;
+               __entry->len            = map->m_len;
+               __entry->flags          = map->m_flags;
+               __entry->allocated      = allocated;
+               __entry->newblk         = newblock;
+       ),
+
+       TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d"
+                 "allocated %d newblock %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+                 __entry->len, __entry->flags,
+                 (unsigned int) __entry->allocated,
+                 (unsigned long long) __entry->newblk)
+);
+
+TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
+       TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret),
+
+       TP_ARGS(sb, map, ret),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    lblk    )
+               __field(        ext4_fsblk_t,   pblk    )
+               __field(        unsigned int,   len     )
+               __field(        unsigned int,   flags   )
+               __field(        int,            ret     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->lblk   = map->m_lblk;
+               __entry->pblk   = map->m_pblk;
+               __entry->len    = map->m_len;
+               __entry->flags  = map->m_flags;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->lblk, (unsigned long long) __entry->pblk,
+                 __entry->len, __entry->flags, __entry->ret)
+);
+
+TRACE_EVENT(ext4_ext_put_in_cache,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len,
+                ext4_fsblk_t start),
+
+       TP_ARGS(inode, lblk, len, start),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    lblk    )
+               __field(        unsigned int,   len     )
+               __field(        ext4_fsblk_t,   start   )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->len    = len;
+               __entry->start  = start;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk,
+                 __entry->len,
+                 (unsigned long long) __entry->start)
+);
+
+TRACE_EVENT(ext4_ext_in_cache,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret),
+
+       TP_ARGS(inode, lblk, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    lblk    )
+               __field(        int,            ret     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk,
+                 __entry->ret)
+
+);
+
+TRACE_EVENT(ext4_find_delalloc_range,
+       TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to,
+               int reverse, int found, ext4_lblk_t found_blk),
+
+       TP_ARGS(inode, from, to, reverse, found, found_blk),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        ext4_lblk_t,    from            )
+               __field(        ext4_lblk_t,    to              )
+               __field(        int,            reverse         )
+               __field(        int,            found           )
+               __field(        ext4_lblk_t,    found_blk       )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->from           = from;
+               __entry->to             = to;
+               __entry->reverse        = reverse;
+               __entry->found          = found;
+               __entry->found_blk      = found_blk;
+       ),
+
+       TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d "
+                 "(blk = %u)",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->from, (unsigned) __entry->to,
+                 __entry->reverse, __entry->found,
+                 (unsigned) __entry->found_blk)
+);
+
+TRACE_EVENT(ext4_get_reserved_cluster_alloc,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len),
+
+       TP_ARGS(inode, lblk, len),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    lblk    )
+               __field(        unsigned int,   len     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->len    = len;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk,
+                 __entry->len)
+);
+
+TRACE_EVENT(ext4_ext_show_extent,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                unsigned short len),
+
+       TP_ARGS(inode, lblk, pblk, len),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    lblk    )
+               __field(        ext4_fsblk_t,   pblk    )
+               __field(        unsigned short, len     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->pblk   = pblk;
+               __entry->len    = len;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk,
+                 (unsigned long long) __entry->pblk,
+                 (unsigned short) __entry->len)
+);
+
+TRACE_EVENT(ext4_remove_blocks,
+           TP_PROTO(struct inode *inode, struct ext4_extent *ex,
+               ext4_lblk_t from, ext4_fsblk_t to,
+               ext4_fsblk_t partial_cluster),
+
+       TP_ARGS(inode, ex, from, to, partial_cluster),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    ee_lblk )
+               __field(        ext4_fsblk_t,   ee_pblk )
+               __field(        unsigned short, ee_len  )
+               __field(        ext4_lblk_t,    from    )
+               __field(        ext4_lblk_t,    to      )
+               __field(        ext4_fsblk_t,   partial )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ee_lblk        = cpu_to_le32(ex->ee_block);
+               __entry->ee_pblk        = ext4_ext_pblock(ex);
+               __entry->ee_len         = ext4_ext_get_actual_len(ex);
+               __entry->from           = from;
+               __entry->to             = to;
+               __entry->partial        = partial_cluster;
+       ),
+
+       TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
+                 "from %u to %u partial_cluster %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->ee_lblk,
+                 (unsigned long long) __entry->ee_pblk,
+                 (unsigned short) __entry->ee_len,
+                 (unsigned) __entry->from,
+                 (unsigned) __entry->to,
+                 (unsigned) __entry->partial)
+);
+
+TRACE_EVENT(ext4_ext_rm_leaf,
+       TP_PROTO(struct inode *inode, ext4_lblk_t start,
+                struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
+
+       TP_ARGS(inode, start, ex, partial_cluster),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    start   )
+               __field(        ext4_lblk_t,    ee_lblk )
+               __field(        ext4_fsblk_t,   ee_pblk )
+               __field(        short,          ee_len  )
+               __field(        ext4_fsblk_t,   partial )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->start          = start;
+               __entry->ee_lblk        = le32_to_cpu(ex->ee_block);
+               __entry->ee_pblk        = ext4_ext_pblock(ex);
+               __entry->ee_len         = ext4_ext_get_actual_len(ex);
+               __entry->partial        = partial_cluster;
+       ),
+
+       TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
+                 "partial_cluster %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->start,
+                 (unsigned) __entry->ee_lblk,
+                 (unsigned long long) __entry->ee_pblk,
+                 (unsigned short) __entry->ee_len,
+                 (unsigned) __entry->partial)
+);
+
+TRACE_EVENT(ext4_ext_rm_idx,
+       TP_PROTO(struct inode *inode, ext4_fsblk_t pblk),
+
+       TP_ARGS(inode, pblk),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_fsblk_t,   pblk    )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pblk   = pblk;
+       ),
+
+       TP_printk("dev %d,%d ino %lu index_pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pblk)
+);
+
+TRACE_EVENT(ext4_ext_remove_space,
+       TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth),
+
+       TP_ARGS(inode, start, depth),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    start   )
+               __field(        int,            depth   )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->start  = start;
+               __entry->depth  = depth;
+       ),
+
+       TP_printk("dev %d,%d ino %lu since %u depth %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->start,
+                 __entry->depth)
+);
+
+TRACE_EVENT(ext4_ext_remove_space_done,
+       TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth,
+               ext4_lblk_t partial, unsigned short eh_entries),
+
+       TP_ARGS(inode, start, depth, partial, eh_entries),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        ext4_lblk_t,    start           )
+               __field(        int,            depth           )
+               __field(        ext4_lblk_t,    partial         )
+               __field(        unsigned short, eh_entries      )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->start          = start;
+               __entry->depth          = depth;
+               __entry->partial        = partial;
+               __entry->eh_entries     = eh_entries;
+       ),
+
+       TP_printk("dev %d,%d ino %lu since %u depth %d partial %u "
+                 "remaining_entries %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->start,
+                 __entry->depth,
+                 (unsigned) __entry->partial,
+                 (unsigned short) __entry->eh_entries)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
index 31ba0fd0f36b5d22c326b38fdc1bb904b8e84cbc..43298f9810fba87bdf0083ac1fa5401b0819c4cb 100644 (file)
@@ -947,7 +947,7 @@ config UID16
 config SYSCTL_SYSCALL
        bool "Sysctl syscall support" if EXPERT
        depends on PROC_SYSCTL
-       default y
+       default n
        select SYSCTL
        ---help---
          sys_sysctl uses binary paths that have been found challenging
@@ -959,7 +959,7 @@ config SYSCTL_SYSCALL
          trying to save some space it is probably safe to disable this,
          making your kernel marginally smaller.
 
-         If unsure say Y here.
+         If unsure say N here.
 
 config KALLSYMS
         bool "Load all symbols for debugging/ksymoops" if EXPERT
index c0851a8e030cbcf38701f77a5d79e253d81e0327..0f6e1d985a3b2851c662337c1671b57cc4270f0a 100644 (file)
@@ -28,7 +28,7 @@ int __initdata rd_doload;     /* 1 = load RAM disk, 0 = don't load */
 int root_mountflags = MS_RDONLY | MS_SILENT;
 static char * __initdata root_device_name;
 static char __initdata saved_root_name[64];
-static int __initdata root_wait;
+static int root_wait;
 
 dev_t ROOT_DEV;
 
@@ -85,12 +85,15 @@ no_match:
 
 /**
  * devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid:      36 byte char array containing a hex ascii UUID
+ * @uuid:      min 36 byte char array containing a hex ascii UUID
  *
  * The function will return the first partition which contains a matching
  * UUID value in its partition_meta_info struct.  This does not search
  * by filesystem UUIDs.
  *
+ * If @uuid is followed by a "/PARTNROFF=%d", then the number will be
+ * extracted and used as an offset from the partition identified by the UUID.
+ *
  * Returns the matching dev_t on success or 0 on failure.
  */
 static dev_t devt_from_partuuid(char *uuid_str)
@@ -98,6 +101,28 @@ static dev_t devt_from_partuuid(char *uuid_str)
        dev_t res = 0;
        struct device *dev = NULL;
        u8 uuid[16];
+       struct gendisk *disk;
+       struct hd_struct *part;
+       int offset = 0;
+
+       if (strlen(uuid_str) < 36)
+               goto done;
+
+       /* Check for optional partition number offset attributes. */
+       if (uuid_str[36]) {
+               char c = 0;
+               /* Explicitly fail on poor PARTUUID syntax. */
+               if (sscanf(&uuid_str[36],
+                          "/PARTNROFF=%d%c", &offset, &c) != 1) {
+                       printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
+                        "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+                       if (root_wait)
+                               printk(KERN_ERR
+                                    "Disabling rootwait; root= is invalid.\n");
+                       root_wait = 0;
+                       goto done;
+               }
+       }
 
        /* Pack the requested UUID in the expected format. */
        part_pack_uuid(uuid_str, uuid);
@@ -107,8 +132,21 @@ static dev_t devt_from_partuuid(char *uuid_str)
                goto done;
 
        res = dev->devt;
-       put_device(dev);
 
+       /* Attempt to find the partition by offset. */
+       if (!offset)
+               goto no_offset;
+
+       res = 0;
+       disk = part_to_disk(dev_to_part(dev));
+       part = disk_get_part(disk, dev_to_part(dev)->partno + offset);
+       if (part) {
+               res = part_devt(part);
+               put_device(part_to_dev(part));
+       }
+
+no_offset:
+       put_device(dev);
 done:
        return res;
 }
@@ -126,6 +164,8 @@ done:
  *        used when disk name of partitioned disk ends on a digit.
  *     6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
  *        unique id of a partition if the partition table provides it.
+ *     7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ *        a partition with a known unique id.
  *
  *     If name doesn't have fall into the categories above, we return (0,0).
  *     block_class is used to check if something is a disk name. If the disk
@@ -143,8 +183,6 @@ dev_t name_to_dev_t(char *name)
 #ifdef CONFIG_BLOCK
        if (strncmp(name, "PARTUUID=", 9) == 0) {
                name += 9;
-               if (strlen(name) != 36)
-                       goto fail;
                res = devt_from_partuuid(name);
                if (!res)
                        goto fail;
index fe9acb0ae4808bd663187d802960d788f73276c1..887629e24c54a5065aa0ee9aca16bbe5d0becc66 100644 (file)
@@ -119,6 +119,20 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
                goto done;
        }
 
+       /*
+        * Read 512 bytes further to check if cramfs is padded
+        */
+       sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
+       sys_read(fd, buf, size);
+
+       if (cramfsb->magic == CRAMFS_MAGIC) {
+               printk(KERN_NOTICE
+                      "RAMDISK: cramfs filesystem found at block %d\n",
+                      start_block);
+               nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+               goto done;
+       }
+
        /*
         * Read block 1 to test for minix and ext2 superblock
         */
index c8e00f8b4be1b79bd49bf841924d8d8ea5945b8c..5215a81420df9b1802dd9f6c40c465f1b63a2bbd 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
 #include <asm/uaccess.h>
 #include "util.h"
 
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+       int     semval;         /* current value */
+       int     sempid;         /* pid of last operation */
+       struct list_head sem_pending; /* pending single-sop operations */
+};
+
+/* One queue for each sleeping process in the system. */
+struct sem_queue {
+       struct list_head        simple_list; /* queue of pending operations */
+       struct list_head        list;    /* queue of pending operations */
+       struct task_struct      *sleeper; /* this process */
+       struct sem_undo         *undo;   /* undo structure */
+       int                     pid;     /* process id of requesting process */
+       int                     status;  /* completion status of operation */
+       struct sembuf           *sops;   /* array of pending operations */
+       int                     nsops;   /* number of operations */
+       int                     alter;   /* does *sops alter the array? */
+};
+
+/* Each task has a list of undo requests. They are executed automatically
+ * when the process exits.
+ */
+struct sem_undo {
+       struct list_head        list_proc;      /* per-process list: *
+                                                * all undos from one process
+                                                * rcu protected */
+       struct rcu_head         rcu;            /* rcu struct for sem_undo */
+       struct sem_undo_list    *ulp;           /* back ptr to sem_undo_list */
+       struct list_head        list_id;        /* per semaphore array list:
+                                                * all undos for one array */
+       int                     semid;          /* semaphore set identifier */
+       short                   *semadj;        /* array of adjustments */
+                                               /* one per semaphore */
+};
+
+/* sem_undo_list controls shared access to the list of sem_undo structures
+ * that may be shared among all a CLONE_SYSVSEM task group.
+ */
+struct sem_undo_list {
+       atomic_t                refcnt;
+       spinlock_t              lock;
+       struct list_head        list_proc;
+};
+
+
 #define sem_ids(ns)    ((ns)->ids[IPC_SEM_IDS])
 
 #define sem_unlock(sma)                ipc_unlock(&(sma)->sem_perm)
@@ -1426,6 +1472,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
        queue.status = -EINTR;
        queue.sleeper = current;
+
+sleep_again:
        current->state = TASK_INTERRUPTIBLE;
        sem_unlock(sma);
 
@@ -1460,7 +1508,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
         * Array removed? If yes, leave without sem_unlock().
         */
        if (IS_ERR(sma)) {
-               error = -EIDRM;
                goto out_free;
        }
 
@@ -1479,6 +1526,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
         */
        if (timeout && jiffies_left == 0)
                error = -EAGAIN;
+
+       /*
+        * If the wakeup was spurious, just retry
+        */
+       if (error == -EINTR && !signal_pending(current))
+               goto sleep_again;
+
        unlink_queue(sma, &queue);
 
 out_unlock_free:
index 453100a4159d3eed6b82307f7fed09f12eec1937..d9d5648f3cdcc4bcde3a7a67afe845aa89dc2a1f 100644 (file)
@@ -2027,7 +2027,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
                goto out_free_group_list;
 
        /* prevent changes to the threadgroup list while we take a snapshot. */
-       rcu_read_lock();
+       read_lock(&tasklist_lock);
        if (!thread_group_leader(leader)) {
                /*
                 * a race with de_thread from another thread's exec() may strip
@@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
                 * throw this task away and try again (from cgroup_procs_write);
                 * this is "double-double-toil-and-trouble-check locking".
                 */
-               rcu_read_unlock();
+               read_unlock(&tasklist_lock);
                retval = -EAGAIN;
                goto out_free_group_list;
        }
@@ -2057,7 +2057,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
        } while_each_thread(leader, tsk);
        /* remember the number of threads in the array for later. */
        group_size = i;
-       rcu_read_unlock();
+       read_unlock(&tasklist_lock);
 
        /*
         * step 1: check that we can legitimately attach to the cgroup.
@@ -2135,14 +2135,17 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
                oldcgrp = task_cgroup_from_root(tsk, root);
                if (cgrp == oldcgrp)
                        continue;
-               /* attach each task to each subsystem */
-               for_each_subsys(root, ss) {
-                       if (ss->attach_task)
-                               ss->attach_task(cgrp, tsk);
-               }
                /* if the thread is PF_EXITING, it can just get skipped. */
                retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
-               BUG_ON(retval != 0 && retval != -ESRCH);
+               if (retval == 0) {
+                       /* attach each task to each subsystem */
+                       for_each_subsys(root, ss) {
+                               if (ss->attach_task)
+                                       ss->attach_task(cgrp, tsk);
+                       }
+               } else {
+                       BUG_ON(retval != -ESRCH);
+               }
        }
        /* nothing is sensitive to fork() after this point. */
 
@@ -4880,9 +4883,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
 
        rcu_assign_pointer(id->css, NULL);
        rcu_assign_pointer(css->id, NULL);
-       spin_lock(&ss->id_lock);
+       write_lock(&ss->id_lock);
        idr_remove(&ss->idr, id->id);
-       spin_unlock(&ss->id_lock);
+       write_unlock(&ss->id_lock);
        kfree_rcu(id, rcu_head);
 }
 EXPORT_SYMBOL_GPL(free_css_id);
@@ -4908,10 +4911,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
                error = -ENOMEM;
                goto err_out;
        }
-       spin_lock(&ss->id_lock);
+       write_lock(&ss->id_lock);
        /* Don't use 0. allocates an ID of 1-65535 */
        error = idr_get_new_above(&ss->idr, newid, 1, &myid);
-       spin_unlock(&ss->id_lock);
+       write_unlock(&ss->id_lock);
 
        /* Returns error when there are no free spaces for new ID.*/
        if (error) {
@@ -4926,9 +4929,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
        return newid;
 remove_idr:
        error = -ENOSPC;
-       spin_lock(&ss->id_lock);
+       write_lock(&ss->id_lock);
        idr_remove(&ss->idr, myid);
-       spin_unlock(&ss->id_lock);
+       write_unlock(&ss->id_lock);
 err_out:
        kfree(newid);
        return ERR_PTR(error);
@@ -4940,7 +4943,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
 {
        struct css_id *newid;
 
-       spin_lock_init(&ss->id_lock);
+       rwlock_init(&ss->id_lock);
        idr_init(&ss->idr);
 
        newid = get_new_cssid(ss, 0);
@@ -5035,9 +5038,9 @@ css_get_next(struct cgroup_subsys *ss, int id,
                 * scan next entry from bitmap(tree), tmpid is updated after
                 * idr_get_next().
                 */
-               spin_lock(&ss->id_lock);
+               read_lock(&ss->id_lock);
                tmp = idr_get_next(&ss->idr, &tmpid);
-               spin_unlock(&ss->id_lock);
+               read_unlock(&ss->id_lock);
 
                if (!tmp)
                        break;
index 10131fdaff70c36dc9fb181ace777015296891e3..ed0ff443f03640f16d0c3f68e95a5c644a948882 100644 (file)
@@ -949,6 +949,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 static void cpuset_change_task_nodemask(struct task_struct *tsk,
                                        nodemask_t *newmems)
 {
+       bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed);
+
 repeat:
        /*
         * Allow tasks that have access to memory reserves because they have
@@ -963,7 +965,6 @@ repeat:
        nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
        mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
 
-
        /*
         * ensure checking ->mems_allowed_change_disable after setting all new
         * allowed nodes.
@@ -980,9 +981,11 @@ repeat:
 
        /*
         * Allocation of memory is very fast, we needn't sleep when waiting
-        * for the read-side.
+        * for the read-side.  No wait is necessary, however, if at least one
+        * node remains unchanged.
         */
-       while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+       while (masks_disjoint &&
+                       ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
                task_unlock(tsk);
                if (!task_curr(tsk))
                        yield();
index 12a0287e03582c6969e1268ac4e7456222b111be..e1253faa34ddcf013c6cf577b98ab85ac4dd5634 100644 (file)
@@ -29,7 +29,6 @@
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
 #include <linux/uaccess.h>
-#include <linux/suspend.h>
 #include <linux/syscalls.h>
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
@@ -6853,7 +6852,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
        mutex_lock(&swhash->hlist_mutex);
-       if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
+       if (swhash->hlist_refcount > 0) {
                struct swevent_hlist *hlist;
 
                hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6942,14 +6941,7 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
        unsigned int cpu = (long)hcpu;
 
-       /*
-        * Ignore suspend/resume action, the perf_pm_notifier will
-        * take care of that.
-        */
-       if (action & CPU_TASKS_FROZEN)
-               return NOTIFY_OK;
-
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
 
        case CPU_UP_PREPARE:
        case CPU_DOWN_FAILED:
@@ -6968,90 +6960,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
        return NOTIFY_OK;
 }
 
-static void perf_pm_resume_cpu(void *unused)
-{
-       struct perf_cpu_context *cpuctx;
-       struct perf_event_context *ctx;
-       struct pmu *pmu;
-       int idx;
-
-       idx = srcu_read_lock(&pmus_srcu);
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-               ctx = cpuctx->task_ctx;
-
-               perf_ctx_lock(cpuctx, ctx);
-               perf_pmu_disable(cpuctx->ctx.pmu);
-
-               cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-               if (ctx)
-                       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-
-               perf_pmu_enable(cpuctx->ctx.pmu);
-               perf_ctx_unlock(cpuctx, ctx);
-       }
-       srcu_read_unlock(&pmus_srcu, idx);
-}
-
-static void perf_pm_suspend_cpu(void *unused)
-{
-       struct perf_cpu_context *cpuctx;
-       struct perf_event_context *ctx;
-       struct pmu *pmu;
-       int idx;
-
-       idx = srcu_read_lock(&pmus_srcu);
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-               ctx = cpuctx->task_ctx;
-
-               perf_ctx_lock(cpuctx, ctx);
-               perf_pmu_disable(cpuctx->ctx.pmu);
-
-               perf_event_sched_in(cpuctx, ctx, current);
-
-               perf_pmu_enable(cpuctx->ctx.pmu);
-               perf_ctx_unlock(cpuctx, ctx);
-       }
-       srcu_read_unlock(&pmus_srcu, idx);
-}
-
-static int perf_resume(void)
-{
-       get_online_cpus();
-       smp_call_function(perf_pm_resume_cpu, NULL, 1);
-       put_online_cpus();
-
-       return NOTIFY_OK;
-}
-
-static int perf_suspend(void)
-{
-       get_online_cpus();
-       smp_call_function(perf_pm_suspend_cpu, NULL, 1);
-       put_online_cpus();
-
-       return NOTIFY_OK;
-}
-
-static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
-{
-       switch (action) {
-       case PM_POST_HIBERNATION:
-       case PM_POST_SUSPEND:
-               return perf_resume();
-       case PM_HIBERNATION_PREPARE:
-       case PM_SUSPEND_PREPARE:
-               return perf_suspend();
-       default:
-               return NOTIFY_DONE;
-       }
-}
-
-static struct notifier_block perf_pm_notifier = {
-       .notifier_call = perf_pm,
-};
-
 void __init perf_event_init(void)
 {
        int ret;
@@ -7066,7 +6974,6 @@ void __init perf_event_init(void)
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
        register_reboot_notifier(&perf_reboot_notifier);
-       register_pm_notifier(&perf_pm_notifier);
 
        ret = init_hw_breakpoint();
        WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
index 58459509b14c9be7cdd0fb7f151b1d1e0bd950e3..d06c091e0345d7c5fb1d0efccf4e8c9e258bdab8 100644 (file)
@@ -1286,6 +1286,7 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
                memset(u->nodename + len, 0, sizeof(u->nodename) - len);
                errno = 0;
        }
+       uts_proc_notify(UTS_PROC_HOSTNAME);
        up_write(&uts_sem);
        return errno;
 }
@@ -1336,6 +1337,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
                memset(u->domainname + len, 0, sizeof(u->domainname) - len);
                errno = 0;
        }
+       uts_proc_notify(UTS_PROC_DOMAINNAME);
        up_write(&uts_sem);
        return errno;
 }
index a2cd77e70d4d8359cf3e38b8fb61cc5388fe5cb4..3b0d48ebf81d68aa50764cb4360d43a16b84e289 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/sysctl.h>
+#include <linux/wait.h>
 
 static void *get_uts(ctl_table *table, int write)
 {
@@ -51,12 +52,19 @@ static int proc_do_uts_string(ctl_table *table, int write,
        uts_table.data = get_uts(table, write);
        r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
        put_uts(table, write, uts_table.data);
+
+       if (write)
+               proc_sys_poll_notify(table->poll);
+
        return r;
 }
 #else
 #define proc_do_uts_string NULL
 #endif
 
+static DEFINE_CTL_TABLE_POLL(hostname_poll);
+static DEFINE_CTL_TABLE_POLL(domainname_poll);
+
 static struct ctl_table uts_kern_table[] = {
        {
                .procname       = "ostype",
@@ -85,6 +93,7 @@ static struct ctl_table uts_kern_table[] = {
                .maxlen         = sizeof(init_uts_ns.name.nodename),
                .mode           = 0644,
                .proc_handler   = proc_do_uts_string,
+               .poll           = &hostname_poll,
        },
        {
                .procname       = "domainname",
@@ -92,6 +101,7 @@ static struct ctl_table uts_kern_table[] = {
                .maxlen         = sizeof(init_uts_ns.name.domainname),
                .mode           = 0644,
                .proc_handler   = proc_do_uts_string,
+               .poll           = &domainname_poll,
        },
        {}
 };
@@ -105,6 +115,19 @@ static struct ctl_table uts_root_table[] = {
        {}
 };
 
+#ifdef CONFIG_PROC_SYSCTL
+/*
+ * Notify userspace about a change in a certain entry of uts_kern_table,
+ * identified by the parameter proc.
+ */
+void uts_proc_notify(enum uts_proc proc)
+{
+       struct ctl_table *table = &uts_kern_table[proc];
+
+       proc_sys_poll_notify(table->poll);
+}
+#endif
+
 static int __init utsname_sysctl_init(void)
 {
        register_sysctl_table(uts_root_table);
index bbf211aea4ebed20f0190ccdf97c91baed7ad422..ed055b297c81c127dcbef79cb821f904936245c2 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -944,6 +944,7 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 {
        int ret, id;
        unsigned int max;
+       unsigned long flags;
 
        BUG_ON((int)start < 0);
        BUG_ON((int)end < 0);
@@ -959,7 +960,7 @@ again:
        if (!ida_pre_get(ida, gfp_mask))
                return -ENOMEM;
 
-       spin_lock(&simple_ida_lock);
+       spin_lock_irqsave(&simple_ida_lock, flags);
        ret = ida_get_new_above(ida, start, &id);
        if (!ret) {
                if (id > max) {
@@ -969,7 +970,7 @@ again:
                        ret = id;
                }
        }
-       spin_unlock(&simple_ida_lock);
+       spin_unlock_irqrestore(&simple_ida_lock, flags);
 
        if (unlikely(ret == -EAGAIN))
                goto again;
@@ -985,10 +986,12 @@ EXPORT_SYMBOL(ida_simple_get);
  */
 void ida_simple_remove(struct ida *ida, unsigned int id)
 {
+       unsigned long flags;
+
        BUG_ON((int)id < 0);
-       spin_lock(&simple_ida_lock);
+       spin_lock_irqsave(&simple_ida_lock, flags);
        ida_remove(ida, id);
-       spin_unlock(&simple_ida_lock);
+       spin_unlock_irqrestore(&simple_ida_lock, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
 
index 860ec211ddd667175eca0abb95e82a01cdaa8e1e..4298abaae153033caafe1f8ac641fdef1dee097e 100644 (file)
@@ -990,7 +990,7 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm,
        page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
        VM_BUG_ON(!PageCompound(page));
        if (flags & FOLL_GET)
-               get_page(page);
+               get_page_foll(page);
 
 out:
        return page;
@@ -1202,6 +1202,7 @@ static void __split_huge_page_refcount(struct page *page)
        unsigned long head_index = page->index;
        struct zone *zone = page_zone(page);
        int zonestat;
+       int tail_count = 0;
 
        /* prevent PageLRU to go away from under us, and freeze lru stats */
        spin_lock_irq(&zone->lru_lock);
@@ -1210,11 +1211,27 @@ static void __split_huge_page_refcount(struct page *page)
        for (i = 1; i < HPAGE_PMD_NR; i++) {
                struct page *page_tail = page + i;
 
-               /* tail_page->_count cannot change */
-               atomic_sub(atomic_read(&page_tail->_count), &page->_count);
-               BUG_ON(page_count(page) <= 0);
-               atomic_add(page_mapcount(page) + 1, &page_tail->_count);
-               BUG_ON(atomic_read(&page_tail->_count) <= 0);
+               /* tail_page->_mapcount cannot change */
+               BUG_ON(page_mapcount(page_tail) < 0);
+               tail_count += page_mapcount(page_tail);
+               /* check for overflow */
+               BUG_ON(tail_count < 0);
+               BUG_ON(atomic_read(&page_tail->_count) != 0);
+               /*
+                * tail_page->_count is zero and not changing from
+                * under us. But get_page_unless_zero() may be running
+                * from under us on the tail_page. If we used
+                * atomic_set() below instead of atomic_add(), we
+                * would then run atomic_set() concurrently with
+                * get_page_unless_zero(), and atomic_set() is
+                * implemented in C not using locked ops. spin_unlock
+                * on x86 sometime uses locked ops because of PPro
+                * errata 66, 92, so unless somebody can guarantee
+                * atomic_set() here would be safe on all archs (and
+                * not only on x86), it's safer to use atomic_add().
+                */
+               atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
+                          &page_tail->_count);
 
                /* after clearing PageTail the gup refcount can be released */
                smp_mb();
@@ -1232,10 +1249,7 @@ static void __split_huge_page_refcount(struct page *page)
                                      (1L << PG_uptodate)));
                page_tail->flags |= (1L << PG_dirty);
 
-               /*
-                * 1) clear PageTail before overwriting first_page
-                * 2) clear PageTail before clearing PageHead for VM_BUG_ON
-                */
+               /* clear PageTail before overwriting first_page */
                smp_wmb();
 
                /*
@@ -1252,7 +1266,6 @@ static void __split_huge_page_refcount(struct page *page)
                 * status is achieved setting a reserved bit in the
                 * pmd, not by clearing the present bit.
                */
-               BUG_ON(page_mapcount(page_tail));
                page_tail->_mapcount = page->_mapcount;
 
                BUG_ON(page_tail->mapping);
@@ -1269,6 +1282,8 @@ static void __split_huge_page_refcount(struct page *page)
 
                lru_add_page_tail(zone, page, page_tail);
        }
+       atomic_sub(tail_count, &page->_count);
+       BUG_ON(atomic_read(&page->_count) <= 0);
 
        __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
        __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
index d071d380fb498ab36ac6700343bf86485463bdbe..2189af491783f958c1337ebf1f1bb14dc5cd8b5d 100644 (file)
@@ -37,6 +37,52 @@ static inline void __put_page(struct page *page)
        atomic_dec(&page->_count);
 }
 
+static inline void __get_page_tail_foll(struct page *page,
+                                       bool get_page_head)
+{
+       /*
+        * If we're getting a tail page, the elevated page->_count is
+        * required only in the head page and we will elevate the head
+        * page->_count and tail page->_mapcount.
+        *
+        * We elevate page_tail->_mapcount for tail pages to force
+        * page_tail->_count to be zero at all times to avoid getting
+        * false positives from get_page_unless_zero() with
+        * speculative page access (like in
+        * page_cache_get_speculative()) on tail pages.
+        */
+       VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+       VM_BUG_ON(atomic_read(&page->_count) != 0);
+       VM_BUG_ON(page_mapcount(page) < 0);
+       if (get_page_head)
+               atomic_inc(&page->first_page->_count);
+       atomic_inc(&page->_mapcount);
+}
+
+/*
+ * This is meant to be called as the FOLL_GET operation of
+ * follow_page() and it must be called while holding the proper PT
+ * lock while the pte (or pmd_trans_huge) is still mapping the page.
+ */
+static inline void get_page_foll(struct page *page)
+{
+       if (unlikely(PageTail(page)))
+               /*
+                * This is safe only because
+                * __split_huge_page_refcount() can't run under
+                * get_page_foll() because we hold the proper PT lock.
+                */
+               __get_page_tail_foll(page, true);
+       else {
+               /*
+                * Getting a normal page or the head of a compound page
+                * requires to already have an elevated page->_count.
+                */
+               VM_BUG_ON(atomic_read(&page->_count) <= 0);
+               atomic_inc(&page->_count);
+       }
+}
+
 extern unsigned long highest_memmap_pfn;
 
 /*
index 2d5755544afe5fa165e1524e2500ab56bf8ad8c8..7af1d5ee1598d485ced61f57c1b165fac4e0808d 100644 (file)
@@ -201,8 +201,8 @@ struct mem_cgroup_eventfd_list {
        struct eventfd_ctx *eventfd;
 };
 
-static void mem_cgroup_threshold(struct mem_cgroup *mem);
-static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
+static void mem_cgroup_threshold(struct mem_cgroup *memcg);
+static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
 
 /*
  * The memory controller data structure. The memory controller controls both
@@ -362,29 +362,29 @@ enum charge_type {
 #define MEM_CGROUP_RECLAIM_SOFT_BIT    0x2
 #define MEM_CGROUP_RECLAIM_SOFT                (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
 
-static void mem_cgroup_get(struct mem_cgroup *mem);
-static void mem_cgroup_put(struct mem_cgroup *mem);
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
-static void drain_all_stock_async(struct mem_cgroup *mem);
+static void mem_cgroup_get(struct mem_cgroup *memcg);
+static void mem_cgroup_put(struct mem_cgroup *memcg);
+static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
 {
-       return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+       return &memcg->info.nodeinfo[nid]->zoneinfo[zid];
 }
 
-struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
 {
-       return &mem->css;
+       return &memcg->css;
 }
 
 static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page)
+page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
 {
        int nid = page_to_nid(page);
        int zid = page_zonenum(page);
 
-       return mem_cgroup_zoneinfo(mem, nid, zid);
+       return mem_cgroup_zoneinfo(memcg, nid, zid);
 }
 
 static struct mem_cgroup_tree_per_zone *
@@ -403,7 +403,7 @@ soft_limit_tree_from_page(struct page *page)
 }
 
 static void
-__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
                                struct mem_cgroup_per_zone *mz,
                                struct mem_cgroup_tree_per_zone *mctz,
                                unsigned long long new_usage_in_excess)
@@ -437,7 +437,7 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 }
 
 static void
-__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
                                struct mem_cgroup_per_zone *mz,
                                struct mem_cgroup_tree_per_zone *mctz)
 {
@@ -448,17 +448,17 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 }
 
 static void
-mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
                                struct mem_cgroup_per_zone *mz,
                                struct mem_cgroup_tree_per_zone *mctz)
 {
        spin_lock(&mctz->lock);
-       __mem_cgroup_remove_exceeded(mem, mz, mctz);
+       __mem_cgroup_remove_exceeded(memcg, mz, mctz);
        spin_unlock(&mctz->lock);
 }
 
 
-static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 {
        unsigned long long excess;
        struct mem_cgroup_per_zone *mz;
@@ -471,9 +471,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
         * Necessary to update all ancestors when hierarchy is used.
         * because their event counter is not touched.
         */
-       for (; mem; mem = parent_mem_cgroup(mem)) {
-               mz = mem_cgroup_zoneinfo(mem, nid, zid);
-               excess = res_counter_soft_limit_excess(&mem->res);
+       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+               mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+               excess = res_counter_soft_limit_excess(&memcg->res);
                /*
                 * We have to update the tree if mz is on RB-tree or
                 * mem is over its softlimit.
@@ -482,18 +482,18 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
                        spin_lock(&mctz->lock);
                        /* if on-tree, remove it */
                        if (mz->on_tree)
-                               __mem_cgroup_remove_exceeded(mem, mz, mctz);
+                               __mem_cgroup_remove_exceeded(memcg, mz, mctz);
                        /*
                         * Insert again. mz->usage_in_excess will be updated.
                         * If excess is 0, no tree ops.
                         */
-                       __mem_cgroup_insert_exceeded(mem, mz, mctz, excess);
+                       __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
                        spin_unlock(&mctz->lock);
                }
        }
 }
 
-static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
 {
        int node, zone;
        struct mem_cgroup_per_zone *mz;
@@ -501,9 +501,9 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
 
        for_each_node_state(node, N_POSSIBLE) {
                for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-                       mz = mem_cgroup_zoneinfo(mem, node, zone);
+                       mz = mem_cgroup_zoneinfo(memcg, node, zone);
                        mctz = soft_limit_tree_node_zone(node, zone);
-                       mem_cgroup_remove_exceeded(mem, mz, mctz);
+                       mem_cgroup_remove_exceeded(memcg, mz, mctz);
                }
        }
 }
@@ -564,7 +564,7 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
  * common workload, threashold and synchonization as vmstat[] should be
  * implemented.
  */
-static long mem_cgroup_read_stat(struct mem_cgroup *mem,
+static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
                                 enum mem_cgroup_stat_index idx)
 {
        long val = 0;
@@ -572,81 +572,83 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem,
 
        get_online_cpus();
        for_each_online_cpu(cpu)
-               val += per_cpu(mem->stat->count[idx], cpu);
+               val += per_cpu(memcg->stat->count[idx], cpu);
 #ifdef CONFIG_HOTPLUG_CPU
-       spin_lock(&mem->pcp_counter_lock);
-       val += mem->nocpu_base.count[idx];
-       spin_unlock(&mem->pcp_counter_lock);
+       spin_lock(&memcg->pcp_counter_lock);
+       val += memcg->nocpu_base.count[idx];
+       spin_unlock(&memcg->pcp_counter_lock);
 #endif
        put_online_cpus();
        return val;
 }
 
-static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
                                         bool charge)
 {
        int val = (charge) ? 1 : -1;
-       this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
+       this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
 }
 
-void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
+void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val)
 {
-       this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
+       this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
 }
 
-void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
+void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val)
 {
-       this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
+       this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
 }
 
-static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
+static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
                                            enum mem_cgroup_events_index idx)
 {
        unsigned long val = 0;
        int cpu;
 
        for_each_online_cpu(cpu)
-               val += per_cpu(mem->stat->events[idx], cpu);
+               val += per_cpu(memcg->stat->events[idx], cpu);
 #ifdef CONFIG_HOTPLUG_CPU
-       spin_lock(&mem->pcp_counter_lock);
-       val += mem->nocpu_base.events[idx];
-       spin_unlock(&mem->pcp_counter_lock);
+       spin_lock(&memcg->pcp_counter_lock);
+       val += memcg->nocpu_base.events[idx];
+       spin_unlock(&memcg->pcp_counter_lock);
 #endif
        return val;
 }
 
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
                                         bool file, int nr_pages)
 {
        preempt_disable();
 
        if (file)
-               __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
+               __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
+                               nr_pages);
        else
-               __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
+               __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
+                               nr_pages);
 
        /* pagein of a big page is an event. So, ignore page size */
        if (nr_pages > 0)
-               __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
+               __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
        else {
-               __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
+               __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
                nr_pages = -nr_pages; /* for event */
        }
 
-       __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
+       __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
 
        preempt_enable();
 }
 
 unsigned long
-mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid,
+mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
                        unsigned int lru_mask)
 {
        struct mem_cgroup_per_zone *mz;
        enum lru_list l;
        unsigned long ret = 0;
 
-       mz = mem_cgroup_zoneinfo(mem, nid, zid);
+       mz = mem_cgroup_zoneinfo(memcg, nid, zid);
 
        for_each_lru(l) {
                if (BIT(l) & lru_mask)
@@ -656,44 +658,45 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid,
 }
 
 static unsigned long
-mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem,
+mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
                        int nid, unsigned int lru_mask)
 {
        u64 total = 0;
        int zid;
 
        for (zid = 0; zid < MAX_NR_ZONES; zid++)
-               total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask);
+               total += mem_cgroup_zone_nr_lru_pages(memcg,
+                                               nid, zid, lru_mask);
 
        return total;
 }
 
-static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem,
+static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
                        unsigned int lru_mask)
 {
        int nid;
        u64 total = 0;
 
        for_each_node_state(nid, N_HIGH_MEMORY)
-               total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask);
+               total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
        return total;
 }
 
-static bool __memcg_event_check(struct mem_cgroup *mem, int target)
+static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
 {
        unsigned long val, next;
 
-       val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]);
-       next = this_cpu_read(mem->stat->targets[target]);
+       val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+       next = __this_cpu_read(memcg->stat->targets[target]);
        /* from time_after() in jiffies.h */
        return ((long)next - (long)val < 0);
 }
 
-static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
+static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
 {
        unsigned long val, next;
 
-       val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+       val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
 
        switch (target) {
        case MEM_CGROUP_TARGET_THRESH:
@@ -709,34 +712,36 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
                return;
        }
 
-       this_cpu_write(mem->stat->targets[target], next);
+       __this_cpu_write(memcg->stat->targets[target], next);
 }
 
 /*
  * Check events in order.
  *
  */
-static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
+static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 {
+       preempt_disable();
        /* threshold event is triggered in finer grain than soft limit */
-       if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) {
-               mem_cgroup_threshold(mem);
-               __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
-               if (unlikely(__memcg_event_check(mem,
+       if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
+               mem_cgroup_threshold(memcg);
+               __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
+               if (unlikely(__memcg_event_check(memcg,
                             MEM_CGROUP_TARGET_SOFTLIMIT))) {
-                       mem_cgroup_update_tree(mem, page);
-                       __mem_cgroup_target_update(mem,
+                       mem_cgroup_update_tree(memcg, page);
+                       __mem_cgroup_target_update(memcg,
                                                   MEM_CGROUP_TARGET_SOFTLIMIT);
                }
 #if MAX_NUMNODES > 1
-               if (unlikely(__memcg_event_check(mem,
+               if (unlikely(__memcg_event_check(memcg,
                        MEM_CGROUP_TARGET_NUMAINFO))) {
-                       atomic_inc(&mem->numainfo_events);
-                       __mem_cgroup_target_update(mem,
+                       atomic_inc(&memcg->numainfo_events);
+                       __mem_cgroup_target_update(memcg,
                                MEM_CGROUP_TARGET_NUMAINFO);
                }
 #endif
        }
+       preempt_enable();
 }
 
 static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
@@ -762,7 +767,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 
 struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
 
        if (!mm)
                return NULL;
@@ -773,25 +778,25 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
         */
        rcu_read_lock();
        do {
-               mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-               if (unlikely(!mem))
+               memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+               if (unlikely(!memcg))
                        break;
-       } while (!css_tryget(&mem->css));
+       } while (!css_tryget(&memcg->css));
        rcu_read_unlock();
-       return mem;
+       return memcg;
 }
 
 /* The caller has to guarantee "mem" exists before calling this */
-static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
+static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg)
 {
        struct cgroup_subsys_state *css;
        int found;
 
-       if (!mem) /* ROOT cgroup has the smallest ID */
+       if (!memcg) /* ROOT cgroup has the smallest ID */
                return root_mem_cgroup; /*css_put/get against root is ignored*/
-       if (!mem->use_hierarchy) {
-               if (css_tryget(&mem->css))
-                       return mem;
+       if (!memcg->use_hierarchy) {
+               if (css_tryget(&memcg->css))
+                       return memcg;
                return NULL;
        }
        rcu_read_lock();
@@ -799,13 +804,13 @@ static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
         * searching a memory cgroup which has the smallest ID under given
         * ROOT cgroup. (ID >= 1)
         */
-       css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found);
+       css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found);
        if (css && css_tryget(css))
-               mem = container_of(css, struct mem_cgroup, css);
+               memcg = container_of(css, struct mem_cgroup, css);
        else
-               mem = NULL;
+               memcg = NULL;
        rcu_read_unlock();
-       return mem;
+       return memcg;
 }
 
 static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
@@ -859,29 +864,29 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
        for_each_mem_cgroup_tree_cond(iter, NULL, true)
 
 
-static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 {
-       return (mem == root_mem_cgroup);
+       return (memcg == root_mem_cgroup);
 }
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
 
        if (!mm)
                return;
 
        rcu_read_lock();
-       mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-       if (unlikely(!mem))
+       memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+       if (unlikely(!memcg))
                goto out;
 
        switch (idx) {
        case PGMAJFAULT:
-               mem_cgroup_pgmajfault(mem, 1);
+               mem_cgroup_pgmajfault(memcg, 1);
                break;
        case PGFAULT:
-               mem_cgroup_pgfault(mem, 1);
+               mem_cgroup_pgfault(memcg, 1);
                break;
        default:
                BUG();
@@ -990,6 +995,16 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
                return;
        pc = lookup_page_cgroup(page);
        VM_BUG_ON(PageCgroupAcctLRU(pc));
+       /*
+        * putback:                             charge:
+        * SetPageLRU                           SetPageCgroupUsed
+        * smp_mb                               smp_mb
+        * PageCgroupUsed && add to memcg LRU   PageLRU && add to memcg LRU
+        *
+        * Ensure that one of the two sides adds the page to the memcg
+        * LRU during a race.
+        */
+       smp_mb();
        if (!PageCgroupUsed(pc))
                return;
        /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
@@ -1041,7 +1056,16 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
        unsigned long flags;
        struct zone *zone = page_zone(page);
        struct page_cgroup *pc = lookup_page_cgroup(page);
-
+       /*
+        * putback:                             charge:
+        * SetPageLRU                           SetPageCgroupUsed
+        * smp_mb                               smp_mb
+        * PageCgroupUsed && add to memcg LRU   PageLRU && add to memcg LRU
+        *
+        * Ensure that one of the two sides adds the page to the memcg
+        * LRU during a race.
+        */
+       smp_mb();
        /* taking care of that the page is added to LRU while we commit it */
        if (likely(!PageLRU(page)))
                return;
@@ -1063,21 +1087,21 @@ void mem_cgroup_move_lists(struct page *page,
 }
 
 /*
- * Checks whether given mem is same or in the root_mem's
+ * Checks whether given mem is same or in the root_mem_cgroup's
  * hierarchy subtree
  */
-static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem,
-               struct mem_cgroup *mem)
+static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
+               struct mem_cgroup *memcg)
 {
-       if (root_mem != mem) {
-               return (root_mem->use_hierarchy &&
-                       css_is_ancestor(&mem->css, &root_mem->css));
+       if (root_memcg != memcg) {
+               return (root_memcg->use_hierarchy &&
+                       css_is_ancestor(&memcg->css, &root_memcg->css));
        }
 
        return true;
 }
 
-int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
 {
        int ret;
        struct mem_cgroup *curr = NULL;
@@ -1091,25 +1115,29 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
        if (!curr)
                return 0;
        /*
-        * We should check use_hierarchy of "mem" not "curr". Because checking
+        * We should check use_hierarchy of "memcg" not "curr". Because checking
         * use_hierarchy of "curr" here make this function true if hierarchy is
-        * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
-        * hierarchy(even if use_hierarchy is disabled in "mem").
+        * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
+        * hierarchy(even if use_hierarchy is disabled in "memcg").
         */
-       ret = mem_cgroup_same_or_subtree(mem, curr);
+       ret = mem_cgroup_same_or_subtree(memcg, curr);
        css_put(&curr->css);
        return ret;
 }
 
-static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
-       unsigned long active;
+       unsigned long inactive_ratio;
+       int nid = zone_to_nid(zone);
+       int zid = zone_idx(zone);
        unsigned long inactive;
+       unsigned long active;
        unsigned long gb;
-       unsigned long inactive_ratio;
 
-       inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
-       active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
+       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                               BIT(LRU_INACTIVE_ANON));
+       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                             BIT(LRU_ACTIVE_ANON));
 
        gb = (inactive + active) >> (30 - PAGE_SHIFT);
        if (gb)
@@ -1117,39 +1145,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_
        else
                inactive_ratio = 1;
 
-       if (present_pages) {
-               present_pages[0] = inactive;
-               present_pages[1] = active;
-       }
-
-       return inactive_ratio;
+       return inactive * inactive_ratio < active;
 }
 
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
-{
-       unsigned long active;
-       unsigned long inactive;
-       unsigned long present_pages[2];
-       unsigned long inactive_ratio;
-
-       inactive_ratio = calc_inactive_ratio(memcg, present_pages);
-
-       inactive = present_pages[0];
-       active = present_pages[1];
-
-       if (inactive * inactive_ratio < active)
-               return 1;
-
-       return 0;
-}
-
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        unsigned long active;
        unsigned long inactive;
+       int zid = zone_idx(zone);
+       int nid = zone_to_nid(zone);
 
-       inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
-       active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
+       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                               BIT(LRU_INACTIVE_FILE));
+       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                             BIT(LRU_ACTIVE_FILE));
 
        return (active > inactive);
 }
@@ -1254,13 +1263,13 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
  * Returns the maximum amount of memory @mem can be charged with, in
  * pages.
  */
-static unsigned long mem_cgroup_margin(struct mem_cgroup *mem)
+static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 {
        unsigned long long margin;
 
-       margin = res_counter_margin(&mem->res);
+       margin = res_counter_margin(&memcg->res);
        if (do_swap_account)
-               margin = min(margin, res_counter_margin(&mem->memsw));
+               margin = min(margin, res_counter_margin(&memcg->memsw));
        return margin >> PAGE_SHIFT;
 }
 
@@ -1275,33 +1284,33 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
        return memcg->swappiness;
 }
 
-static void mem_cgroup_start_move(struct mem_cgroup *mem)
+static void mem_cgroup_start_move(struct mem_cgroup *memcg)
 {
        int cpu;
 
        get_online_cpus();
-       spin_lock(&mem->pcp_counter_lock);
+       spin_lock(&memcg->pcp_counter_lock);
        for_each_online_cpu(cpu)
-               per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
-       mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
-       spin_unlock(&mem->pcp_counter_lock);
+               per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
+       memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
+       spin_unlock(&memcg->pcp_counter_lock);
        put_online_cpus();
 
        synchronize_rcu();
 }
 
-static void mem_cgroup_end_move(struct mem_cgroup *mem)
+static void mem_cgroup_end_move(struct mem_cgroup *memcg)
 {
        int cpu;
 
-       if (!mem)
+       if (!memcg)
                return;
        get_online_cpus();
-       spin_lock(&mem->pcp_counter_lock);
+       spin_lock(&memcg->pcp_counter_lock);
        for_each_online_cpu(cpu)
-               per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
-       mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
-       spin_unlock(&mem->pcp_counter_lock);
+               per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
+       memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
+       spin_unlock(&memcg->pcp_counter_lock);
        put_online_cpus();
 }
 /*
@@ -1316,13 +1325,13 @@ static void mem_cgroup_end_move(struct mem_cgroup *mem)
  *                       waiting at hith-memory prressure caused by "move".
  */
 
-static bool mem_cgroup_stealed(struct mem_cgroup *mem)
+static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
 {
        VM_BUG_ON(!rcu_read_lock_held());
-       return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
+       return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
 }
 
-static bool mem_cgroup_under_move(struct mem_cgroup *mem)
+static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *from;
        struct mem_cgroup *to;
@@ -1337,17 +1346,17 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem)
        if (!from)
                goto unlock;
 
-       ret = mem_cgroup_same_or_subtree(mem, from)
-               || mem_cgroup_same_or_subtree(mem, to);
+       ret = mem_cgroup_same_or_subtree(memcg, from)
+               || mem_cgroup_same_or_subtree(memcg, to);
 unlock:
        spin_unlock(&mc.lock);
        return ret;
 }
 
-static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem)
+static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
 {
        if (mc.moving_task && current != mc.moving_task) {
-               if (mem_cgroup_under_move(mem)) {
+               if (mem_cgroup_under_move(memcg)) {
                        DEFINE_WAIT(wait);
                        prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE);
                        /* moving charge context might have finished. */
@@ -1431,12 +1440,12 @@ done:
  * This function returns the number of memcg under hierarchy tree. Returns
  * 1(self count) if no children.
  */
-static int mem_cgroup_count_children(struct mem_cgroup *mem)
+static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 {
        int num = 0;
        struct mem_cgroup *iter;
 
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                num++;
        return num;
 }
@@ -1466,21 +1475,21 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
  * that to reclaim free pages from.
  */
 static struct mem_cgroup *
-mem_cgroup_select_victim(struct mem_cgroup *root_mem)
+mem_cgroup_select_victim(struct mem_cgroup *root_memcg)
 {
        struct mem_cgroup *ret = NULL;
        struct cgroup_subsys_state *css;
        int nextid, found;
 
-       if (!root_mem->use_hierarchy) {
-               css_get(&root_mem->css);
-               ret = root_mem;
+       if (!root_memcg->use_hierarchy) {
+               css_get(&root_memcg->css);
+               ret = root_memcg;
        }
 
        while (!ret) {
                rcu_read_lock();
-               nextid = root_mem->last_scanned_child + 1;
-               css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css,
+               nextid = root_memcg->last_scanned_child + 1;
+               css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css,
                                   &found);
                if (css && css_tryget(css))
                        ret = container_of(css, struct mem_cgroup, css);
@@ -1489,9 +1498,9 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
                /* Updates scanning parameter */
                if (!css) {
                        /* this means start scan from ID:1 */
-                       root_mem->last_scanned_child = 0;
+                       root_memcg->last_scanned_child = 0;
                } else
-                       root_mem->last_scanned_child = found;
+                       root_memcg->last_scanned_child = found;
        }
 
        return ret;
@@ -1507,14 +1516,14 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
  * reclaimable pages on a node. Returns true if there are any reclaimable
  * pages in the node.
  */
-static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
+static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
                int nid, bool noswap)
 {
-       if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE))
+       if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
                return true;
        if (noswap || !total_swap_pages)
                return false;
-       if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON))
+       if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
                return true;
        return false;
 
@@ -1527,29 +1536,29 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
  * nodes based on the zonelist. So update the list loosely once per 10 secs.
  *
  */
-static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
+static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
 {
        int nid;
        /*
         * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
         * pagein/pageout changes since the last update.
         */
-       if (!atomic_read(&mem->numainfo_events))
+       if (!atomic_read(&memcg->numainfo_events))
                return;
-       if (atomic_inc_return(&mem->numainfo_updating) > 1)
+       if (atomic_inc_return(&memcg->numainfo_updating) > 1)
                return;
 
        /* make a nodemask where this memcg uses memory from */
-       mem->scan_nodes = node_states[N_HIGH_MEMORY];
+       memcg->scan_nodes = node_states[N_HIGH_MEMORY];
 
        for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
 
-               if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
-                       node_clear(nid, mem->scan_nodes);
+               if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
+                       node_clear(nid, memcg->scan_nodes);
        }
 
-       atomic_set(&mem->numainfo_events, 0);
-       atomic_set(&mem->numainfo_updating, 0);
+       atomic_set(&memcg->numainfo_events, 0);
+       atomic_set(&memcg->numainfo_updating, 0);
 }
 
 /*
@@ -1564,16 +1573,16 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
  *
  * Now, we use round-robin. Better algorithm is welcomed.
  */
-int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
+int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 {
        int node;
 
-       mem_cgroup_may_update_nodemask(mem);
-       node = mem->last_scanned_node;
+       mem_cgroup_may_update_nodemask(memcg);
+       node = memcg->last_scanned_node;
 
-       node = next_node(node, mem->scan_nodes);
+       node = next_node(node, memcg->scan_nodes);
        if (node == MAX_NUMNODES)
-               node = first_node(mem->scan_nodes);
+               node = first_node(memcg->scan_nodes);
        /*
         * We call this when we hit limit, not when pages are added to LRU.
         * No LRU may hold pages because all pages are UNEVICTABLE or
@@ -1583,7 +1592,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
        if (unlikely(node == MAX_NUMNODES))
                node = numa_node_id();
 
-       mem->last_scanned_node = node;
+       memcg->last_scanned_node = node;
        return node;
 }
 
@@ -1593,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
  * unused nodes. But scan_nodes is lazily updated and may not cotain
  * enough new information. We need to do double check.
  */
-bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
+bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
 {
        int nid;
 
@@ -1601,12 +1610,12 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
         * quick check...making use of scan_node.
         * We can skip unused nodes.
         */
-       if (!nodes_empty(mem->scan_nodes)) {
-               for (nid = first_node(mem->scan_nodes);
+       if (!nodes_empty(memcg->scan_nodes)) {
+               for (nid = first_node(memcg->scan_nodes);
                     nid < MAX_NUMNODES;
-                    nid = next_node(nid, mem->scan_nodes)) {
+                    nid = next_node(nid, memcg->scan_nodes)) {
 
-                       if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
+                       if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
                                return true;
                }
        }
@@ -1614,23 +1623,23 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
         * Check rest of nodes.
         */
        for_each_node_state(nid, N_HIGH_MEMORY) {
-               if (node_isset(nid, mem->scan_nodes))
+               if (node_isset(nid, memcg->scan_nodes))
                        continue;
-               if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
+               if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
                        return true;
        }
        return false;
 }
 
 #else
-int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
+int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 {
        return 0;
 }
 
-bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
+bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
 {
-       return test_mem_cgroup_node_reclaimable(mem, 0, noswap);
+       return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
 }
 #endif
 
@@ -1639,14 +1648,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
  * we reclaimed from, so that we don't end up penalizing one child extensively
  * based on its position in the children list.
  *
- * root_mem is the original ancestor that we've been reclaim from.
+ * root_memcg is the original ancestor that we've been reclaim from.
  *
- * We give up and return to the caller when we visit root_mem twice.
+ * We give up and return to the caller when we visit root_memcg twice.
  * (other groups can be removed while we're walking....)
  *
  * If shrink==true, for avoiding to free too much, this returns immedieately.
  */
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
+static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
                                                struct zone *zone,
                                                gfp_t gfp_mask,
                                                unsigned long reclaim_options,
@@ -1661,15 +1670,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
        unsigned long excess;
        unsigned long nr_scanned;
 
-       excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
+       excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
 
        /* If memsw_is_minimum==1, swap-out is of-no-use. */
-       if (!check_soft && !shrink && root_mem->memsw_is_minimum)
+       if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
                noswap = true;
 
        while (1) {
-               victim = mem_cgroup_select_victim(root_mem);
-               if (victim == root_mem) {
+               victim = mem_cgroup_select_victim(root_memcg);
+               if (victim == root_memcg) {
                        loop++;
                        /*
                         * We are not draining per cpu cached charges during
@@ -1678,7 +1687,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                         * charges will not give any.
                         */
                        if (!check_soft && loop >= 1)
-                               drain_all_stock_async(root_mem);
+                               drain_all_stock_async(root_memcg);
                        if (loop >= 2) {
                                /*
                                 * If we have not been able to reclaim
@@ -1725,9 +1734,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                        return ret;
                total += ret;
                if (check_soft) {
-                       if (!res_counter_soft_limit_excess(&root_mem->res))
+                       if (!res_counter_soft_limit_excess(&root_memcg->res))
                                return total;
-               } else if (mem_cgroup_margin(root_mem))
+               } else if (mem_cgroup_margin(root_memcg))
                        return total;
        }
        return total;
@@ -1738,12 +1747,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
  * If someone is running, return false.
  * Has to be called with memcg_oom_lock
  */
-static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
+static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter, *failed = NULL;
        bool cond = true;
 
-       for_each_mem_cgroup_tree_cond(iter, mem, cond) {
+       for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
                if (iter->oom_lock) {
                        /*
                         * this subtree of our hierarchy is already locked
@@ -1763,7 +1772,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
         * what we set up to the failing subtree
         */
        cond = true;
-       for_each_mem_cgroup_tree_cond(iter, mem, cond) {
+       for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
                if (iter == failed) {
                        cond = false;
                        continue;
@@ -1776,24 +1785,24 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
 /*
  * Has to be called with memcg_oom_lock
  */
-static int mem_cgroup_oom_unlock(struct mem_cgroup *mem)
+static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter;
 
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                iter->oom_lock = false;
        return 0;
 }
 
-static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem)
+static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter;
 
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                atomic_inc(&iter->under_oom);
 }
 
-static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem)
+static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter;
 
@@ -1802,7 +1811,7 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem)
         * mem_cgroup_oom_lock() may not be called. We have to use
         * atomic_add_unless() here.
         */
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                atomic_add_unless(&iter->under_oom, -1, 0);
 }
 
@@ -1817,85 +1826,85 @@ struct oom_wait_info {
 static int memcg_oom_wake_function(wait_queue_t *wait,
        unsigned mode, int sync, void *arg)
 {
-       struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg,
-                         *oom_wait_mem;
+       struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg,
+                         *oom_wait_memcg;
        struct oom_wait_info *oom_wait_info;
 
        oom_wait_info = container_of(wait, struct oom_wait_info, wait);
-       oom_wait_mem = oom_wait_info->mem;
+       oom_wait_memcg = oom_wait_info->mem;
 
        /*
         * Both of oom_wait_info->mem and wake_mem are stable under us.
         * Then we can use css_is_ancestor without taking care of RCU.
         */
-       if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem)
-                       && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem))
+       if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
+               && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
                return 0;
        return autoremove_wake_function(wait, mode, sync, arg);
 }
 
-static void memcg_wakeup_oom(struct mem_cgroup *mem)
+static void memcg_wakeup_oom(struct mem_cgroup *memcg)
 {
-       /* for filtering, pass "mem" as argument. */
-       __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem);
+       /* for filtering, pass "memcg" as argument. */
+       __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
 }
 
-static void memcg_oom_recover(struct mem_cgroup *mem)
+static void memcg_oom_recover(struct mem_cgroup *memcg)
 {
-       if (mem && atomic_read(&mem->under_oom))
-               memcg_wakeup_oom(mem);
+       if (memcg && atomic_read(&memcg->under_oom))
+               memcg_wakeup_oom(memcg);
 }
 
 /*
  * try to call OOM killer. returns false if we should exit memory-reclaim loop.
  */
-bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
+bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
 {
        struct oom_wait_info owait;
        bool locked, need_to_kill;
 
-       owait.mem = mem;
+       owait.mem = memcg;
        owait.wait.flags = 0;
        owait.wait.func = memcg_oom_wake_function;
        owait.wait.private = current;
        INIT_LIST_HEAD(&owait.wait.task_list);
        need_to_kill = true;
-       mem_cgroup_mark_under_oom(mem);
+       mem_cgroup_mark_under_oom(memcg);
 
-       /* At first, try to OOM lock hierarchy under mem.*/
+       /* At first, try to OOM lock hierarchy under memcg.*/
        spin_lock(&memcg_oom_lock);
-       locked = mem_cgroup_oom_lock(mem);
+       locked = mem_cgroup_oom_lock(memcg);
        /*
         * Even if signal_pending(), we can't quit charge() loop without
         * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
         * under OOM is always welcomed, use TASK_KILLABLE here.
         */
        prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
-       if (!locked || mem->oom_kill_disable)
+       if (!locked || memcg->oom_kill_disable)
                need_to_kill = false;
        if (locked)
-               mem_cgroup_oom_notify(mem);
+               mem_cgroup_oom_notify(memcg);
        spin_unlock(&memcg_oom_lock);
 
        if (need_to_kill) {
                finish_wait(&memcg_oom_waitq, &owait.wait);
-               mem_cgroup_out_of_memory(mem, mask);
+               mem_cgroup_out_of_memory(memcg, mask);
        } else {
                schedule();
                finish_wait(&memcg_oom_waitq, &owait.wait);
        }
        spin_lock(&memcg_oom_lock);
        if (locked)
-               mem_cgroup_oom_unlock(mem);
-       memcg_wakeup_oom(mem);
+               mem_cgroup_oom_unlock(memcg);
+       memcg_wakeup_oom(memcg);
        spin_unlock(&memcg_oom_lock);
 
-       mem_cgroup_unmark_under_oom(mem);
+       mem_cgroup_unmark_under_oom(memcg);
 
        if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
                return false;
        /* Give chance to dying process */
-       schedule_timeout(1);
+       schedule_timeout_uninterruptible(1);
        return true;
 }
 
@@ -1926,7 +1935,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
 void mem_cgroup_update_page_stat(struct page *page,
                                 enum mem_cgroup_page_stat_item idx, int val)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
        struct page_cgroup *pc = lookup_page_cgroup(page);
        bool need_unlock = false;
        unsigned long uninitialized_var(flags);
@@ -1935,16 +1944,16 @@ void mem_cgroup_update_page_stat(struct page *page,
                return;
 
        rcu_read_lock();
-       mem = pc->mem_cgroup;
-       if (unlikely(!mem || !PageCgroupUsed(pc)))
+       memcg = pc->mem_cgroup;
+       if (unlikely(!memcg || !PageCgroupUsed(pc)))
                goto out;
        /* pc->mem_cgroup is unstable ? */
-       if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
+       if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
                /* take a lock against to access pc->mem_cgroup */
                move_lock_page_cgroup(pc, &flags);
                need_unlock = true;
-               mem = pc->mem_cgroup;
-               if (!mem || !PageCgroupUsed(pc))
+               memcg = pc->mem_cgroup;
+               if (!memcg || !PageCgroupUsed(pc))
                        goto out;
        }
 
@@ -1960,7 +1969,7 @@ void mem_cgroup_update_page_stat(struct page *page,
                BUG();
        }
 
-       this_cpu_add(mem->stat->count[idx], val);
+       this_cpu_add(memcg->stat->count[idx], val);
 
 out:
        if (unlikely(need_unlock))
@@ -1991,13 +2000,13 @@ static DEFINE_MUTEX(percpu_charge_mutex);
  * cgroup which is not current target, returns false. This stock will be
  * refilled.
  */
-static bool consume_stock(struct mem_cgroup *mem)
+static bool consume_stock(struct mem_cgroup *memcg)
 {
        struct memcg_stock_pcp *stock;
        bool ret = true;
 
        stock = &get_cpu_var(memcg_stock);
-       if (mem == stock->cached && stock->nr_pages)
+       if (memcg == stock->cached && stock->nr_pages)
                stock->nr_pages--;
        else /* need to call res_counter_charge */
                ret = false;
@@ -2038,24 +2047,24 @@ static void drain_local_stock(struct work_struct *dummy)
  * Cache charges(val) which is from res_counter, to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
  */
-static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages)
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
 
-       if (stock->cached != mem) { /* reset if necessary */
+       if (stock->cached != memcg) { /* reset if necessary */
                drain_stock(stock);
-               stock->cached = mem;
+               stock->cached = memcg;
        }
        stock->nr_pages += nr_pages;
        put_cpu_var(memcg_stock);
 }
 
 /*
- * Drains all per-CPU charge caches for given root_mem resp. subtree
+ * Drains all per-CPU charge caches for given root_memcg resp. subtree
  * of the hierarchy under it. sync flag says whether we should block
  * until the work is done.
  */
-static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
+static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
 {
        int cpu, curcpu;
 
@@ -2064,12 +2073,12 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
        curcpu = get_cpu();
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
-               struct mem_cgroup *mem;
+               struct mem_cgroup *memcg;
 
-               mem = stock->cached;
-               if (!mem || !stock->nr_pages)
+               memcg = stock->cached;
+               if (!memcg || !stock->nr_pages)
                        continue;
-               if (!mem_cgroup_same_or_subtree(root_mem, mem))
+               if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
                        continue;
                if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                        if (cpu == curcpu)
@@ -2098,23 +2107,23 @@ out:
  * expects some charges will be back to res_counter later but cannot wait for
  * it.
  */
-static void drain_all_stock_async(struct mem_cgroup *root_mem)
+static void drain_all_stock_async(struct mem_cgroup *root_memcg)
 {
        /*
         * If someone calls draining, avoid adding more kworker runs.
         */
        if (!mutex_trylock(&percpu_charge_mutex))
                return;
-       drain_all_stock(root_mem, false);
+       drain_all_stock(root_memcg, false);
        mutex_unlock(&percpu_charge_mutex);
 }
 
 /* This is a synchronous drain interface. */
-static void drain_all_stock_sync(struct mem_cgroup *root_mem)
+static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
 {
        /* called when force_empty is called */
        mutex_lock(&percpu_charge_mutex);
-       drain_all_stock(root_mem, true);
+       drain_all_stock(root_memcg, true);
        mutex_unlock(&percpu_charge_mutex);
 }
 
@@ -2122,35 +2131,35 @@ static void drain_all_stock_sync(struct mem_cgroup *root_mem)
  * This function drains percpu counter value from DEAD cpu and
  * move it to local cpu. Note that this function can be preempted.
  */
-static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu)
+static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
 {
        int i;
 
-       spin_lock(&mem->pcp_counter_lock);
+       spin_lock(&memcg->pcp_counter_lock);
        for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
-               long x = per_cpu(mem->stat->count[i], cpu);
+               long x = per_cpu(memcg->stat->count[i], cpu);
 
-               per_cpu(mem->stat->count[i], cpu) = 0;
-               mem->nocpu_base.count[i] += x;
+               per_cpu(memcg->stat->count[i], cpu) = 0;
+               memcg->nocpu_base.count[i] += x;
        }
        for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
-               unsigned long x = per_cpu(mem->stat->events[i], cpu);
+               unsigned long x = per_cpu(memcg->stat->events[i], cpu);
 
-               per_cpu(mem->stat->events[i], cpu) = 0;
-               mem->nocpu_base.events[i] += x;
+               per_cpu(memcg->stat->events[i], cpu) = 0;
+               memcg->nocpu_base.events[i] += x;
        }
        /* need to clear ON_MOVE value, works as a kind of lock. */
-       per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
-       spin_unlock(&mem->pcp_counter_lock);
+       per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
+       spin_unlock(&memcg->pcp_counter_lock);
 }
 
-static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu)
+static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
 {
        int idx = MEM_CGROUP_ON_MOVE;
 
-       spin_lock(&mem->pcp_counter_lock);
-       per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx];
-       spin_unlock(&mem->pcp_counter_lock);
+       spin_lock(&memcg->pcp_counter_lock);
+       per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
+       spin_unlock(&memcg->pcp_counter_lock);
 }
 
 static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
@@ -2188,7 +2197,7 @@ enum {
        CHARGE_OOM_DIE,         /* the current is killed because of OOM */
 };
 
-static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
+static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
                                unsigned int nr_pages, bool oom_check)
 {
        unsigned long csize = nr_pages * PAGE_SIZE;
@@ -2197,16 +2206,16 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
        unsigned long flags = 0;
        int ret;
 
-       ret = res_counter_charge(&mem->res, csize, &fail_res);
+       ret = res_counter_charge(&memcg->res, csize, &fail_res);
 
        if (likely(!ret)) {
                if (!do_swap_account)
                        return CHARGE_OK;
-               ret = res_counter_charge(&mem->memsw, csize, &fail_res);
+               ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
                if (likely(!ret))
                        return CHARGE_OK;
 
-               res_counter_uncharge(&mem->res, csize);
+               res_counter_uncharge(&memcg->res, csize);
                mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
                flags |= MEM_CGROUP_RECLAIM_NOSWAP;
        } else
@@ -2264,12 +2273,12 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
                                   gfp_t gfp_mask,
                                   unsigned int nr_pages,
-                                  struct mem_cgroup **memcg,
+                                  struct mem_cgroup **ptr,
                                   bool oom)
 {
        unsigned int batch = max(CHARGE_BATCH, nr_pages);
        int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
        int ret;
 
        /*
@@ -2287,17 +2296,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
         * thread group leader migrates. It's possible that mm is not
         * set, if so charge the init_mm (happens for pagecache usage).
         */
-       if (!*memcg && !mm)
+       if (!*ptr && !mm)
                goto bypass;
 again:
-       if (*memcg) { /* css should be a valid one */
-               mem = *memcg;
-               VM_BUG_ON(css_is_removed(&mem->css));
-               if (mem_cgroup_is_root(mem))
+       if (*ptr) { /* css should be a valid one */
+               memcg = *ptr;
+               VM_BUG_ON(css_is_removed(&memcg->css));
+               if (mem_cgroup_is_root(memcg))
                        goto done;
-               if (nr_pages == 1 && consume_stock(mem))
+               if (nr_pages == 1 && consume_stock(memcg))
                        goto done;
-               css_get(&mem->css);
+               css_get(&memcg->css);
        } else {
                struct task_struct *p;
 
@@ -2305,7 +2314,7 @@ again:
                p = rcu_dereference(mm->owner);
                /*
                 * Because we don't have task_lock(), "p" can exit.
-                * In that case, "mem" can point to root or p can be NULL with
+                * In that case, "memcg" can point to root or p can be NULL with
                 * race with swapoff. Then, we have small risk of mis-accouning.
                 * But such kind of mis-account by race always happens because
                 * we don't have cgroup_mutex(). It's overkill and we allo that
@@ -2313,12 +2322,12 @@ again:
                 * (*) swapoff at el will charge against mm-struct not against
                 * task-struct. So, mm->owner can be NULL.
                 */
-               mem = mem_cgroup_from_task(p);
-               if (!mem || mem_cgroup_is_root(mem)) {
+               memcg = mem_cgroup_from_task(p);
+               if (!memcg || mem_cgroup_is_root(memcg)) {
                        rcu_read_unlock();
                        goto done;
                }
-               if (nr_pages == 1 && consume_stock(mem)) {
+               if (nr_pages == 1 && consume_stock(memcg)) {
                        /*
                         * It seems dagerous to access memcg without css_get().
                         * But considering how consume_stok works, it's not
@@ -2331,7 +2340,7 @@ again:
                        goto done;
                }
                /* after here, we may be blocked. we need to get refcnt */
-               if (!css_tryget(&mem->css)) {
+               if (!css_tryget(&memcg->css)) {
                        rcu_read_unlock();
                        goto again;
                }
@@ -2343,7 +2352,7 @@ again:
 
                /* If killed, bypass charge */
                if (fatal_signal_pending(current)) {
-                       css_put(&mem->css);
+                       css_put(&memcg->css);
                        goto bypass;
                }
 
@@ -2353,43 +2362,43 @@ again:
                        nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
                }
 
-               ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check);
+               ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
                switch (ret) {
                case CHARGE_OK:
                        break;
                case CHARGE_RETRY: /* not in OOM situation but retry */
                        batch = nr_pages;
-                       css_put(&mem->css);
-                       mem = NULL;
+                       css_put(&memcg->css);
+                       memcg = NULL;
                        goto again;
                case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
-                       css_put(&mem->css);
+                       css_put(&memcg->css);
                        goto nomem;
                case CHARGE_NOMEM: /* OOM routine works */
                        if (!oom) {
-                               css_put(&mem->css);
+                               css_put(&memcg->css);
                                goto nomem;
                        }
                        /* If oom, we never return -ENOMEM */
                        nr_oom_retries--;
                        break;
                case CHARGE_OOM_DIE: /* Killed by OOM Killer */
-                       css_put(&mem->css);
+                       css_put(&memcg->css);
                        goto bypass;
                }
        } while (ret != CHARGE_OK);
 
        if (batch > nr_pages)
-               refill_stock(mem, batch - nr_pages);
-       css_put(&mem->css);
+               refill_stock(memcg, batch - nr_pages);
+       css_put(&memcg->css);
 done:
-       *memcg = mem;
+       *ptr = memcg;
        return 0;
 nomem:
-       *memcg = NULL;
+       *ptr = NULL;
        return -ENOMEM;
 bypass:
-       *memcg = NULL;
+       *ptr = NULL;
        return 0;
 }
 
@@ -2398,15 +2407,15 @@ bypass:
  * This function is for that and do uncharge, put css's refcnt.
  * gotten by try_charge().
  */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem,
+static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
                                       unsigned int nr_pages)
 {
-       if (!mem_cgroup_is_root(mem)) {
+       if (!mem_cgroup_is_root(memcg)) {
                unsigned long bytes = nr_pages * PAGE_SIZE;
 
-               res_counter_uncharge(&mem->res, bytes);
+               res_counter_uncharge(&memcg->res, bytes);
                if (do_swap_account)
-                       res_counter_uncharge(&mem->memsw, bytes);
+                       res_counter_uncharge(&memcg->memsw, bytes);
        }
 }
 
@@ -2431,7 +2440,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
        struct page_cgroup *pc;
        unsigned short id;
        swp_entry_t ent;
@@ -2441,23 +2450,23 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
        pc = lookup_page_cgroup(page);
        lock_page_cgroup(pc);
        if (PageCgroupUsed(pc)) {
-               mem = pc->mem_cgroup;
-               if (mem && !css_tryget(&mem->css))
-                       mem = NULL;
+               memcg = pc->mem_cgroup;
+               if (memcg && !css_tryget(&memcg->css))
+                       memcg = NULL;
        } else if (PageSwapCache(page)) {
                ent.val = page_private(page);
                id = lookup_swap_cgroup(ent);
                rcu_read_lock();
-               mem = mem_cgroup_lookup(id);
-               if (mem && !css_tryget(&mem->css))
-                       mem = NULL;
+               memcg = mem_cgroup_lookup(id);
+               if (memcg && !css_tryget(&memcg->css))
+                       memcg = NULL;
                rcu_read_unlock();
        }
        unlock_page_cgroup(pc);
-       return mem;
+       return memcg;
 }
 
-static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
+static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                                       struct page *page,
                                       unsigned int nr_pages,
                                       struct page_cgroup *pc,
@@ -2466,14 +2475,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
        lock_page_cgroup(pc);
        if (unlikely(PageCgroupUsed(pc))) {
                unlock_page_cgroup(pc);
-               __mem_cgroup_cancel_charge(mem, nr_pages);
+               __mem_cgroup_cancel_charge(memcg, nr_pages);
                return;
        }
        /*
         * we don't need page_cgroup_lock about tail pages, becase they are not
         * accessed by any other context at this point.
         */
-       pc->mem_cgroup = mem;
+       pc->mem_cgroup = memcg;
        /*
         * We access a page_cgroup asynchronously without lock_page_cgroup().
         * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
@@ -2496,14 +2505,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
                break;
        }
 
-       mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
+       mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
        unlock_page_cgroup(pc);
        /*
         * "charge_statistics" updated event counter. Then, check it.
         * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
         * if they exceeds softlimit.
         */
-       memcg_check_events(mem, page);
+       memcg_check_events(memcg, page);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -2690,7 +2699,7 @@ out:
 static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask, enum charge_type ctype)
 {
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
        unsigned int nr_pages = 1;
        struct page_cgroup *pc;
        bool oom = true;
@@ -2709,11 +2718,11 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
        pc = lookup_page_cgroup(page);
        BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
 
-       ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom);
-       if (ret || !mem)
+       ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
+       if (ret || !memcg)
                return ret;
 
-       __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
        return 0;
 }
 
@@ -2742,7 +2751,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
                                        enum charge_type ctype);
 
 static void
-__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
+__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
                                        enum charge_type ctype)
 {
        struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -2752,7 +2761,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
         * LRU. Take care of it.
         */
        mem_cgroup_lru_del_before_commit(page);
-       __mem_cgroup_commit_charge(mem, page, 1, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
        mem_cgroup_lru_add_after_commit(page);
        return;
 }
@@ -2760,7 +2769,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask)
 {
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
        int ret;
 
        if (mem_cgroup_disabled())
@@ -2772,8 +2781,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                mm = &init_mm;
 
        if (page_is_file_cache(page)) {
-               ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true);
-               if (ret || !mem)
+               ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
+               if (ret || !memcg)
                        return ret;
 
                /*
@@ -2781,15 +2790,15 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                 * put that would remove them from the LRU list, make
                 * sure that they get relinked properly.
                 */
-               __mem_cgroup_commit_charge_lrucare(page, mem,
+               __mem_cgroup_commit_charge_lrucare(page, memcg,
                                        MEM_CGROUP_CHARGE_TYPE_CACHE);
                return ret;
        }
        /* shmem */
        if (PageSwapCache(page)) {
-               ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
+               ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
                if (!ret)
-                       __mem_cgroup_commit_charge_swapin(page, mem,
+                       __mem_cgroup_commit_charge_swapin(page, memcg,
                                        MEM_CGROUP_CHARGE_TYPE_SHMEM);
        } else
                ret = mem_cgroup_charge_common(page, mm, gfp_mask,
@@ -2808,7 +2817,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
                                 struct page *page,
                                 gfp_t mask, struct mem_cgroup **ptr)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
        int ret;
 
        *ptr = NULL;
@@ -2826,12 +2835,12 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
         */
        if (!PageSwapCache(page))
                goto charge_cur_mm;
-       mem = try_get_mem_cgroup_from_page(page);
-       if (!mem)
+       memcg = try_get_mem_cgroup_from_page(page);
+       if (!memcg)
                goto charge_cur_mm;
-       *ptr = mem;
+       *ptr = memcg;
        ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
-       css_put(&mem->css);
+       css_put(&memcg->css);
        return ret;
 charge_cur_mm:
        if (unlikely(!mm))
@@ -2891,16 +2900,16 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
                                        MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
+void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
 {
        if (mem_cgroup_disabled())
                return;
-       if (!mem)
+       if (!memcg)
                return;
-       __mem_cgroup_cancel_charge(mem, 1);
+       __mem_cgroup_cancel_charge(memcg, 1);
 }
 
-static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
+static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
                                   unsigned int nr_pages,
                                   const enum charge_type ctype)
 {
@@ -2918,7 +2927,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
         * uncharges. Then, it's ok to ignore memcg's refcnt.
         */
        if (!batch->memcg)
-               batch->memcg = mem;
+               batch->memcg = memcg;
        /*
         * do_batch > 0 when unmapping pages or inode invalidate/truncate.
         * In those cases, all pages freed continuously can be expected to be in
@@ -2938,7 +2947,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
         * merge a series of uncharges to an uncharge of res_counter.
         * If not, we uncharge res_counter ony by one.
         */
-       if (batch->memcg != mem)
+       if (batch->memcg != memcg)
                goto direct_uncharge;
        /* remember freed charge and uncharge it later */
        batch->nr_pages++;
@@ -2946,11 +2955,11 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
                batch->memsw_nr_pages++;
        return;
 direct_uncharge:
-       res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE);
+       res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
        if (uncharge_memsw)
-               res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE);
-       if (unlikely(batch->memcg != mem))
-               memcg_oom_recover(mem);
+               res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
+       if (unlikely(batch->memcg != memcg))
+               memcg_oom_recover(memcg);
        return;
 }
 
@@ -2960,7 +2969,7 @@ direct_uncharge:
 static struct mem_cgroup *
 __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 {
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
        unsigned int nr_pages = 1;
        struct page_cgroup *pc;
 
@@ -2983,7 +2992,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 
        lock_page_cgroup(pc);
 
-       mem = pc->mem_cgroup;
+       memcg = pc->mem_cgroup;
 
        if (!PageCgroupUsed(pc))
                goto unlock_out;
@@ -3006,7 +3015,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
                break;
        }
 
-       mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages);
+       mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages);
 
        ClearPageCgroupUsed(pc);
        /*
@@ -3018,18 +3027,18 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 
        unlock_page_cgroup(pc);
        /*
-        * even after unlock, we have mem->res.usage here and this memcg
+        * even after unlock, we have memcg->res.usage here and this memcg
         * will never be freed.
         */
-       memcg_check_events(mem, page);
+       memcg_check_events(memcg, page);
        if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
-               mem_cgroup_swap_statistics(mem, true);
-               mem_cgroup_get(mem);
+               mem_cgroup_swap_statistics(memcg, true);
+               mem_cgroup_get(memcg);
        }
-       if (!mem_cgroup_is_root(mem))
-               mem_cgroup_do_uncharge(mem, nr_pages, ctype);
+       if (!mem_cgroup_is_root(memcg))
+               mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
 
-       return mem;
+       return memcg;
 
 unlock_out:
        unlock_page_cgroup(pc);
@@ -3219,7 +3228,7 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
 int mem_cgroup_prepare_migration(struct page *page,
        struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
 {
-       struct mem_cgroup *mem = NULL;
+       struct mem_cgroup *memcg = NULL;
        struct page_cgroup *pc;
        enum charge_type ctype;
        int ret = 0;
@@ -3233,8 +3242,8 @@ int mem_cgroup_prepare_migration(struct page *page,
        pc = lookup_page_cgroup(page);
        lock_page_cgroup(pc);
        if (PageCgroupUsed(pc)) {
-               mem = pc->mem_cgroup;
-               css_get(&mem->css);
+               memcg = pc->mem_cgroup;
+               css_get(&memcg->css);
                /*
                 * At migrating an anonymous page, its mapcount goes down
                 * to 0 and uncharge() will be called. But, even if it's fully
@@ -3272,12 +3281,12 @@ int mem_cgroup_prepare_migration(struct page *page,
         * If the page is not charged at this point,
         * we return here.
         */
-       if (!mem)
+       if (!memcg)
                return 0;
 
-       *ptr = mem;
+       *ptr = memcg;
        ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
-       css_put(&mem->css);/* drop extra refcnt */
+       css_put(&memcg->css);/* drop extra refcnt */
        if (ret || *ptr == NULL) {
                if (PageAnon(page)) {
                        lock_page_cgroup(pc);
@@ -3303,21 +3312,21 @@ int mem_cgroup_prepare_migration(struct page *page,
                ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
        else
                ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-       __mem_cgroup_commit_charge(mem, page, 1, pc, ctype);
+       __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
        return ret;
 }
 
 /* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *mem,
+void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        struct page *oldpage, struct page *newpage, bool migration_ok)
 {
        struct page *used, *unused;
        struct page_cgroup *pc;
 
-       if (!mem)
+       if (!memcg)
                return;
        /* blocks rmdir() */
-       cgroup_exclude_rmdir(&mem->css);
+       cgroup_exclude_rmdir(&memcg->css);
        if (!migration_ok) {
                used = oldpage;
                unused = newpage;
@@ -3353,7 +3362,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
         * So, rmdir()->pre_destroy() can be called while we do this charge.
         * In that case, we need to call pre_destroy() again. check it here.
         */
-       cgroup_release_and_wakeup_rmdir(&mem->css);
+       cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -3432,7 +3441,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                /*
                 * Rather than hide all in some function, I do this in
                 * open coded manner. You see what this really does.
-                * We have to guarantee mem->res.limit < mem->memsw.limit.
+                * We have to guarantee memcg->res.limit < memcg->memsw.limit.
                 */
                mutex_lock(&set_limit_mutex);
                memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
@@ -3494,7 +3503,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                /*
                 * Rather than hide all in some function, I do this in
                 * open coded manner. You see what this really does.
-                * We have to guarantee mem->res.limit < mem->memsw.limit.
+                * We have to guarantee memcg->res.limit < memcg->memsw.limit.
                 */
                mutex_lock(&set_limit_mutex);
                memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -3632,7 +3641,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
  */
-static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
+static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
                                int node, int zid, enum lru_list lru)
 {
        struct zone *zone;
@@ -3643,7 +3652,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
        int ret = 0;
 
        zone = &NODE_DATA(node)->node_zones[zid];
-       mz = mem_cgroup_zoneinfo(mem, node, zid);
+       mz = mem_cgroup_zoneinfo(memcg, node, zid);
        list = &mz->lists[lru];
 
        loop = MEM_CGROUP_ZSTAT(mz, lru);
@@ -3670,7 +3679,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 
                page = lookup_cgroup_page(pc);
 
-               ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
+               ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
                if (ret == -ENOMEM)
                        break;
 
@@ -3691,14 +3700,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
  * make mem_cgroup's charge to be 0 if there is no task.
  * This enables deleting this mem_cgroup.
  */
-static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
+static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all)
 {
        int ret;
        int node, zid, shrink;
        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       struct cgroup *cgrp = mem->css.cgroup;
+       struct cgroup *cgrp = memcg->css.cgroup;
 
-       css_get(&mem->css);
+       css_get(&memcg->css);
 
        shrink = 0;
        /* should free all ? */
@@ -3714,14 +3723,14 @@ move_account:
                        goto out;
                /* This is for making all *used* pages to be on LRU. */
                lru_add_drain_all();
-               drain_all_stock_sync(mem);
+               drain_all_stock_sync(memcg);
                ret = 0;
-               mem_cgroup_start_move(mem);
+               mem_cgroup_start_move(memcg);
                for_each_node_state(node, N_HIGH_MEMORY) {
                        for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
                                enum lru_list l;
                                for_each_lru(l) {
-                                       ret = mem_cgroup_force_empty_list(mem,
+                                       ret = mem_cgroup_force_empty_list(memcg,
                                                        node, zid, l);
                                        if (ret)
                                                break;
@@ -3730,16 +3739,16 @@ move_account:
                        if (ret)
                                break;
                }
-               mem_cgroup_end_move(mem);
-               memcg_oom_recover(mem);
+               mem_cgroup_end_move(memcg);
+               memcg_oom_recover(memcg);
                /* it seems parent cgroup doesn't have enough mem */
                if (ret == -ENOMEM)
                        goto try_to_free;
                cond_resched();
        /* "ret" should also be checked to ensure all lists are empty. */
-       } while (mem->res.usage > 0 || ret);
+       } while (memcg->res.usage > 0 || ret);
 out:
-       css_put(&mem->css);
+       css_put(&memcg->css);
        return ret;
 
 try_to_free:
@@ -3752,14 +3761,14 @@ try_to_free:
        lru_add_drain_all();
        /* try to free all pages in this cgroup */
        shrink = 1;
-       while (nr_retries && mem->res.usage > 0) {
+       while (nr_retries && memcg->res.usage > 0) {
                int progress;
 
                if (signal_pending(current)) {
                        ret = -EINTR;
                        goto out;
                }
-               progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
+               progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
                                                false);
                if (!progress) {
                        nr_retries--;
@@ -3788,12 +3797,12 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
                                        u64 val)
 {
        int retval = 0;
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
        struct cgroup *parent = cont->parent;
-       struct mem_cgroup *parent_mem = NULL;
+       struct mem_cgroup *parent_memcg = NULL;
 
        if (parent)
-               parent_mem = mem_cgroup_from_cont(parent);
+               parent_memcg = mem_cgroup_from_cont(parent);
 
        cgroup_lock();
        /*
@@ -3804,10 +3813,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
         * For the root cgroup, parent_mem is NULL, we allow value to be
         * set if there are no children.
         */
-       if ((!parent_mem || !parent_mem->use_hierarchy) &&
+       if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
                                (val == 1 || val == 0)) {
                if (list_empty(&cont->children))
-                       mem->use_hierarchy = val;
+                       memcg->use_hierarchy = val;
                else
                        retval = -EBUSY;
        } else
@@ -3818,14 +3827,14 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
 }
 
 
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
+static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
                                               enum mem_cgroup_stat_index idx)
 {
        struct mem_cgroup *iter;
        long val = 0;
 
        /* Per-cpu values can be negative, use a signed accumulator */
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                val += mem_cgroup_read_stat(iter, idx);
 
        if (val < 0) /* race ? */
@@ -3833,29 +3842,29 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
        return val;
 }
 
-static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
+static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
        u64 val;
 
-       if (!mem_cgroup_is_root(mem)) {
+       if (!mem_cgroup_is_root(memcg)) {
                if (!swap)
-                       return res_counter_read_u64(&mem->res, RES_USAGE);
+                       return res_counter_read_u64(&memcg->res, RES_USAGE);
                else
-                       return res_counter_read_u64(&mem->memsw, RES_USAGE);
+                       return res_counter_read_u64(&memcg->memsw, RES_USAGE);
        }
 
-       val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE);
-       val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS);
+       val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+       val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
 
        if (swap)
-               val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
+               val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
 
        return val << PAGE_SHIFT;
 }
 
 static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
        u64 val;
        int type, name;
 
@@ -3864,15 +3873,15 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
        switch (type) {
        case _MEM:
                if (name == RES_USAGE)
-                       val = mem_cgroup_usage(mem, false);
+                       val = mem_cgroup_usage(memcg, false);
                else
-                       val = res_counter_read_u64(&mem->res, name);
+                       val = res_counter_read_u64(&memcg->res, name);
                break;
        case _MEMSWAP:
                if (name == RES_USAGE)
-                       val = mem_cgroup_usage(mem, true);
+                       val = mem_cgroup_usage(memcg, true);
                else
-                       val = res_counter_read_u64(&mem->memsw, name);
+                       val = res_counter_read_u64(&memcg->memsw, name);
                break;
        default:
                BUG();
@@ -3960,24 +3969,24 @@ out:
 
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 {
-       struct mem_cgroup *mem;
+       struct mem_cgroup *memcg;
        int type, name;
 
-       mem = mem_cgroup_from_cont(cont);
+       memcg = mem_cgroup_from_cont(cont);
        type = MEMFILE_TYPE(event);
        name = MEMFILE_ATTR(event);
        switch (name) {
        case RES_MAX_USAGE:
                if (type == _MEM)
-                       res_counter_reset_max(&mem->res);
+                       res_counter_reset_max(&memcg->res);
                else
-                       res_counter_reset_max(&mem->memsw);
+                       res_counter_reset_max(&memcg->memsw);
                break;
        case RES_FAILCNT:
                if (type == _MEM)
-                       res_counter_reset_failcnt(&mem->res);
+                       res_counter_reset_failcnt(&memcg->res);
                else
-                       res_counter_reset_failcnt(&mem->memsw);
+                       res_counter_reset_failcnt(&memcg->memsw);
                break;
        }
 
@@ -3994,7 +4003,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
 static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
                                        struct cftype *cft, u64 val)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
 
        if (val >= (1 << NR_MOVE_TYPE))
                return -EINVAL;
@@ -4004,7 +4013,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
         * inconsistent.
         */
        cgroup_lock();
-       mem->move_charge_at_immigrate = val;
+       memcg->move_charge_at_immigrate = val;
        cgroup_unlock();
 
        return 0;
@@ -4061,49 +4070,49 @@ struct {
 
 
 static void
-mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
+mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
 {
        s64 val;
 
        /* per cpu stat */
-       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
+       val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
        s->stat[MCS_CACHE] += val * PAGE_SIZE;
-       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
+       val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
        s->stat[MCS_RSS] += val * PAGE_SIZE;
-       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
+       val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
        s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
-       val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN);
+       val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN);
        s->stat[MCS_PGPGIN] += val;
-       val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT);
+       val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT);
        s->stat[MCS_PGPGOUT] += val;
        if (do_swap_account) {
-               val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
+               val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
                s->stat[MCS_SWAP] += val * PAGE_SIZE;
        }
-       val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
+       val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT);
        s->stat[MCS_PGFAULT] += val;
-       val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
+       val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT);
        s->stat[MCS_PGMAJFAULT] += val;
 
        /* per zone stat */
-       val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON));
+       val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
        s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;
-       val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON));
+       val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
        s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;
-       val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE));
+       val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
        s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;
-       val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE));
+       val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
        s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
-       val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE));
+       val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
        s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
 }
 
 static void
-mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
+mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
 {
        struct mem_cgroup *iter;
 
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                mem_cgroup_get_local_stat(iter, s);
 }
 
@@ -4189,8 +4198,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        }
 
 #ifdef CONFIG_DEBUG_VM
-       cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
-
        {
                int nid, zid;
                struct mem_cgroup_per_zone *mz;
@@ -4327,20 +4334,20 @@ static int compare_thresholds(const void *a, const void *b)
        return _a->threshold - _b->threshold;
 }
 
-static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem)
+static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
 {
        struct mem_cgroup_eventfd_list *ev;
 
-       list_for_each_entry(ev, &mem->oom_notify, list)
+       list_for_each_entry(ev, &memcg->oom_notify, list)
                eventfd_signal(ev->eventfd, 1);
        return 0;
 }
 
-static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
+static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter;
 
-       for_each_mem_cgroup_tree(iter, mem)
+       for_each_mem_cgroup_tree(iter, memcg)
                mem_cgroup_oom_notify_cb(iter);
 }
 
@@ -4530,7 +4537,7 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
 static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
        struct cftype *cft, struct eventfd_ctx *eventfd)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
        struct mem_cgroup_eventfd_list *ev, *tmp;
        int type = MEMFILE_TYPE(cft->private);
 
@@ -4538,7 +4545,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
 
        spin_lock(&memcg_oom_lock);
 
-       list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) {
+       list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
                if (ev->eventfd == eventfd) {
                        list_del(&ev->list);
                        kfree(ev);
@@ -4551,11 +4558,11 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
 static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
        struct cftype *cft,  struct cgroup_map_cb *cb)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
 
-       cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable);
+       cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
 
-       if (atomic_read(&mem->under_oom))
+       if (atomic_read(&memcg->under_oom))
                cb->fill(cb, "under_oom", 1);
        else
                cb->fill(cb, "under_oom", 0);
@@ -4565,7 +4572,7 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
 static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
        struct cftype *cft, u64 val)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
        struct mem_cgroup *parent;
 
        /* cannot set to root cgroup and only 0 and 1 are allowed */
@@ -4577,13 +4584,13 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
        cgroup_lock();
        /* oom-kill-disable is a flag for subhierarchy. */
        if ((parent->use_hierarchy) ||
-           (mem->use_hierarchy && !list_empty(&cgrp->children))) {
+           (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
                cgroup_unlock();
                return -EINVAL;
        }
-       mem->oom_kill_disable = val;
+       memcg->oom_kill_disable = val;
        if (!val)
-               memcg_oom_recover(mem);
+               memcg_oom_recover(memcg);
        cgroup_unlock();
        return 0;
 }
@@ -4719,7 +4726,7 @@ static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
 }
 #endif
 
-static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
+static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 {
        struct mem_cgroup_per_node *pn;
        struct mem_cgroup_per_zone *mz;
@@ -4739,21 +4746,21 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
        if (!pn)
                return 1;
 
-       mem->info.nodeinfo[node] = pn;
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
                for_each_lru(l)
                        INIT_LIST_HEAD(&mz->lists[l]);
                mz->usage_in_excess = 0;
                mz->on_tree = false;
-               mz->mem = mem;
+               mz->mem = memcg;
        }
+       memcg->info.nodeinfo[node] = pn;
        return 0;
 }
 
-static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
+static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 {
-       kfree(mem->info.nodeinfo[node]);
+       kfree(memcg->info.nodeinfo[node]);
 }
 
 static struct mem_cgroup *mem_cgroup_alloc(void)
@@ -4795,51 +4802,51 @@ out_free:
  * Removal of cgroup itself succeeds regardless of refs from swap.
  */
 
-static void __mem_cgroup_free(struct mem_cgroup *mem)
+static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
        int node;
 
-       mem_cgroup_remove_from_trees(mem);
-       free_css_id(&mem_cgroup_subsys, &mem->css);
+       mem_cgroup_remove_from_trees(memcg);
+       free_css_id(&mem_cgroup_subsys, &memcg->css);
 
        for_each_node_state(node, N_POSSIBLE)
-               free_mem_cgroup_per_zone_info(mem, node);
+               free_mem_cgroup_per_zone_info(memcg, node);
 
-       free_percpu(mem->stat);
+       free_percpu(memcg->stat);
        if (sizeof(struct mem_cgroup) < PAGE_SIZE)
-               kfree(mem);
+               kfree(memcg);
        else
-               vfree(mem);
+               vfree(memcg);
 }
 
-static void mem_cgroup_get(struct mem_cgroup *mem)
+static void mem_cgroup_get(struct mem_cgroup *memcg)
 {
-       atomic_inc(&mem->refcnt);
+       atomic_inc(&memcg->refcnt);
 }
 
-static void __mem_cgroup_put(struct mem_cgroup *mem, int count)
+static void __mem_cgroup_put(struct mem_cgroup *memcg, int count)
 {
-       if (atomic_sub_and_test(count, &mem->refcnt)) {
-               struct mem_cgroup *parent = parent_mem_cgroup(mem);
-               __mem_cgroup_free(mem);
+       if (atomic_sub_and_test(count, &memcg->refcnt)) {
+               struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+               __mem_cgroup_free(memcg);
                if (parent)
                        mem_cgroup_put(parent);
        }
 }
 
-static void mem_cgroup_put(struct mem_cgroup *mem)
+static void mem_cgroup_put(struct mem_cgroup *memcg)
 {
-       __mem_cgroup_put(mem, 1);
+       __mem_cgroup_put(memcg, 1);
 }
 
 /*
  * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
  */
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
+static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
-       if (!mem->res.parent)
+       if (!memcg->res.parent)
                return NULL;
-       return mem_cgroup_from_res_counter(mem->res.parent, res);
+       return mem_cgroup_from_res_counter(memcg->res.parent, res);
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -4882,16 +4889,16 @@ static int mem_cgroup_soft_limit_tree_init(void)
 static struct cgroup_subsys_state * __ref
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-       struct mem_cgroup *mem, *parent;
+       struct mem_cgroup *memcg, *parent;
        long error = -ENOMEM;
        int node;
 
-       mem = mem_cgroup_alloc();
-       if (!mem)
+       memcg = mem_cgroup_alloc();
+       if (!memcg)
                return ERR_PTR(error);
 
        for_each_node_state(node, N_POSSIBLE)
-               if (alloc_mem_cgroup_per_zone_info(mem, node))
+               if (alloc_mem_cgroup_per_zone_info(memcg, node))
                        goto free_out;
 
        /* root ? */
@@ -4899,7 +4906,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                int cpu;
                enable_swap_cgroup();
                parent = NULL;
-               root_mem_cgroup = mem;
+               root_mem_cgroup = memcg;
                if (mem_cgroup_soft_limit_tree_init())
                        goto free_out;
                for_each_possible_cpu(cpu) {
@@ -4910,13 +4917,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
        } else {
                parent = mem_cgroup_from_cont(cont->parent);
-               mem->use_hierarchy = parent->use_hierarchy;
-               mem->oom_kill_disable = parent->oom_kill_disable;
+               memcg->use_hierarchy = parent->use_hierarchy;
+               memcg->oom_kill_disable = parent->oom_kill_disable;
        }
 
        if (parent && parent->use_hierarchy) {
-               res_counter_init(&mem->res, &parent->res);
-               res_counter_init(&mem->memsw, &parent->memsw);
+               res_counter_init(&memcg->res, &parent->res);
+               res_counter_init(&memcg->memsw, &parent->memsw);
                /*
                 * We increment refcnt of the parent to ensure that we can
                 * safely access it on res_counter_charge/uncharge.
@@ -4925,21 +4932,21 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                 */
                mem_cgroup_get(parent);
        } else {
-               res_counter_init(&mem->res, NULL);
-               res_counter_init(&mem->memsw, NULL);
+               res_counter_init(&memcg->res, NULL);
+               res_counter_init(&memcg->memsw, NULL);
        }
-       mem->last_scanned_child = 0;
-       mem->last_scanned_node = MAX_NUMNODES;
-       INIT_LIST_HEAD(&mem->oom_notify);
+       memcg->last_scanned_child = 0;
+       memcg->last_scanned_node = MAX_NUMNODES;
+       INIT_LIST_HEAD(&memcg->oom_notify);
 
        if (parent)
-               mem->swappiness = mem_cgroup_swappiness(parent);
-       atomic_set(&mem->refcnt, 1);
-       mem->move_charge_at_immigrate = 0;
-       mutex_init(&mem->thresholds_lock);
-       return &mem->css;
+               memcg->swappiness = mem_cgroup_swappiness(parent);
+       atomic_set(&memcg->refcnt, 1);
+       memcg->move_charge_at_immigrate = 0;
+       mutex_init(&memcg->thresholds_lock);
+       return &memcg->css;
 free_out:
-       __mem_cgroup_free(mem);
+       __mem_cgroup_free(memcg);
        root_mem_cgroup = NULL;
        return ERR_PTR(error);
 }
@@ -4947,17 +4954,17 @@ free_out:
 static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
                                        struct cgroup *cont)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
-       return mem_cgroup_force_empty(mem, false);
+       return mem_cgroup_force_empty(memcg, false);
 }
 
 static void mem_cgroup_destroy(struct cgroup_subsys *ss,
                                struct cgroup *cont)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
-       mem_cgroup_put(mem);
+       mem_cgroup_put(memcg);
 }
 
 static int mem_cgroup_populate(struct cgroup_subsys *ss,
@@ -4980,9 +4987,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
 {
        int ret = 0;
        int batch_count = PRECHARGE_COUNT_AT_ONCE;
-       struct mem_cgroup *mem = mc.to;
+       struct mem_cgroup *memcg = mc.to;
 
-       if (mem_cgroup_is_root(mem)) {
+       if (mem_cgroup_is_root(memcg)) {
                mc.precharge += count;
                /* we don't need css_get for root */
                return ret;
@@ -4991,16 +4998,16 @@ static int mem_cgroup_do_precharge(unsigned long count)
        if (count > 1) {
                struct res_counter *dummy;
                /*
-                * "mem" cannot be under rmdir() because we've already checked
+                * "memcg" cannot be under rmdir() because we've already checked
                 * by cgroup_lock_live_cgroup() that it is not removed and we
                 * are still under the same cgroup_mutex. So we can postpone
                 * css_get().
                 */
-               if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy))
+               if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
                        goto one_by_one;
-               if (do_swap_account && res_counter_charge(&mem->memsw,
+               if (do_swap_account && res_counter_charge(&memcg->memsw,
                                                PAGE_SIZE * count, &dummy)) {
-                       res_counter_uncharge(&mem->res, PAGE_SIZE * count);
+                       res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
                        goto one_by_one;
                }
                mc.precharge += count;
@@ -5017,8 +5024,9 @@ one_by_one:
                        batch_count = PRECHARGE_COUNT_AT_ONCE;
                        cond_resched();
                }
-               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false);
-               if (ret || !mem)
+               ret = __mem_cgroup_try_charge(NULL,
+                                       GFP_KERNEL, 1, &memcg, false);
+               if (ret || !memcg)
                        /* mem_cgroup_clear_mc() will do uncharge later */
                        return -ENOMEM;
                mc.precharge++;
@@ -5292,13 +5300,13 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                                struct task_struct *p)
 {
        int ret = 0;
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
 
-       if (mem->move_charge_at_immigrate) {
+       if (memcg->move_charge_at_immigrate) {
                struct mm_struct *mm;
                struct mem_cgroup *from = mem_cgroup_from_task(p);
 
-               VM_BUG_ON(from == mem);
+               VM_BUG_ON(from == memcg);
 
                mm = get_task_mm(p);
                if (!mm)
@@ -5313,7 +5321,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                        mem_cgroup_start_move(from);
                        spin_lock(&mc.lock);
                        mc.from = from;
-                       mc.to = mem;
+                       mc.to = memcg;
                        spin_unlock(&mc.lock);
                        /* We set mc.moving_task later */
 
index a56e3ba816b21e52016a1a46be99479afe45ad08..b2b87315cdc638bea2f99775113bf736d9aac505 100644 (file)
@@ -1503,7 +1503,7 @@ split_fallthrough:
        }
 
        if (flags & FOLL_GET)
-               get_page(page);
+               get_page_foll(page);
        if (flags & FOLL_TOUCH) {
                if ((flags & FOLL_WRITE) &&
                    !pte_dirty(pte) && !PageDirty(page))
index 6bdc67dbbc28920c36d0511057071d7229f58e39..2d123f94a8df49addd2ee3167d31861073da9f84 100644 (file)
@@ -133,10 +133,13 @@ struct page *lookup_cgroup_page(struct page_cgroup *pc)
 static void *__meminit alloc_page_cgroup(size_t size, int nid)
 {
        void *addr = NULL;
+       gfp_t flags = GFP_KERNEL | __GFP_NOWARN;
 
-       addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
-       if (addr)
+       addr = alloc_pages_exact_nid(nid, size, flags);
+       if (addr) {
+               kmemleak_alloc(addr, size, 1, flags);
                return addr;
+       }
 
        if (node_state(nid, N_HIGH_MEMORY))
                addr = vmalloc_node(size, nid);
@@ -357,7 +360,7 @@ struct swap_cgroup_ctrl {
        spinlock_t      lock;
 };
 
-struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
+static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
 
 struct swap_cgroup {
        unsigned short          id;
index fa4fa6ce13bc431c65de6725d9b86f6db24bf041..45b9acb575f9c5c2ad6a869d1e7c28abba3a890e 100644 (file)
@@ -2503,7 +2503,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 
        d_instantiate(path.dentry, inode);
        inode->i_size = size;
-       inode->i_nlink = 0;     /* It is unlinked */
+       clear_nlink(inode);     /* It is unlinked */
 #ifndef CONFIG_MMU
        error = ramfs_nommu_expand_for_mapping(inode, size);
        if (error)
index 3a442f18b0b3dab5acfd99b5fc4e85389b9b5439..87627f181c3f333075cb15773bd3d45db3ec1ea9 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -78,39 +78,22 @@ static void put_compound_page(struct page *page)
 {
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
-               struct page *page_head = page->first_page;
-               smp_rmb();
-               /*
-                * If PageTail is still set after smp_rmb() we can be sure
-                * that the page->first_page we read wasn't a dangling pointer.
-                * See __split_huge_page_refcount() smp_wmb().
-                */
-               if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+               struct page *page_head = compound_trans_head(page);
+
+               if (likely(page != page_head &&
+                          get_page_unless_zero(page_head))) {
                        unsigned long flags;
                        /*
-                        * Verify that our page_head wasn't converted
-                        * to a a regular page before we got a
-                        * reference on it.
+                        * page_head wasn't a dangling pointer but it
+                        * may not be a head page anymore by the time
+                        * we obtain the lock. That is ok as long as it
+                        * can't be freed from under us.
                         */
-                       if (unlikely(!PageHead(page_head))) {
-                               /* PageHead is cleared after PageTail */
-                               smp_rmb();
-                               VM_BUG_ON(PageTail(page));
-                               goto out_put_head;
-                       }
-                       /*
-                        * Only run compound_lock on a valid PageHead,
-                        * after having it pinned with
-                        * get_page_unless_zero() above.
-                        */
-                       smp_mb();
-                       /* page_head wasn't a dangling pointer */
                        flags = compound_lock_irqsave(page_head);
                        if (unlikely(!PageTail(page))) {
                                /* __split_huge_page_refcount run before us */
                                compound_unlock_irqrestore(page_head, flags);
                                VM_BUG_ON(PageHead(page_head));
-                       out_put_head:
                                if (put_page_testzero(page_head))
                                        __put_single_page(page_head);
                        out_put_single:
@@ -121,16 +104,17 @@ static void put_compound_page(struct page *page)
                        VM_BUG_ON(page_head != page->first_page);
                        /*
                         * We can release the refcount taken by
-                        * get_page_unless_zero now that
-                        * split_huge_page_refcount is blocked on the
-                        * compound_lock.
+                        * get_page_unless_zero() now that
+                        * __split_huge_page_refcount() is blocked on
+                        * the compound_lock.
                         */
                        if (put_page_testzero(page_head))
                                VM_BUG_ON(1);
                        /* __split_huge_page_refcount will wait now */
-                       VM_BUG_ON(atomic_read(&page->_count) <= 0);
-                       atomic_dec(&page->_count);
+                       VM_BUG_ON(page_mapcount(page) <= 0);
+                       atomic_dec(&page->_mapcount);
                        VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+                       VM_BUG_ON(atomic_read(&page->_count) != 0);
                        compound_unlock_irqrestore(page_head, flags);
                        if (put_page_testzero(page_head)) {
                                if (PageHead(page_head))
@@ -160,6 +144,45 @@ void put_page(struct page *page)
 }
 EXPORT_SYMBOL(put_page);
 
+/*
+ * This function is exported but must not be called by anything other
+ * than get_page(). It implements the slow path of get_page().
+ */
+bool __get_page_tail(struct page *page)
+{
+       /*
+        * This takes care of get_page() if run on a tail page
+        * returned by one of the get_user_pages/follow_page variants.
+        * get_user_pages/follow_page itself doesn't need the compound
+        * lock because it runs __get_page_tail_foll() under the
+        * proper PT lock that already serializes against
+        * split_huge_page().
+        */
+       unsigned long flags;
+       bool got = false;
+       struct page *page_head = compound_trans_head(page);
+
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+                * we obtain the lock. That is ok as long as it
+                * can't be freed from under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               /* here __split_huge_page_refcount won't run anymore */
+               if (likely(PageTail(page))) {
+                       __get_page_tail_foll(page, false);
+                       got = true;
+               }
+               compound_unlock_irqrestore(page_head, flags);
+               if (unlikely(!got))
+                       put_page(page_head);
+       }
+       return got;
+}
+EXPORT_SYMBOL(__get_page_tail);
+
 /**
  * put_pages_list() - release a list of pages
  * @pages: list of pages threaded on page->lru
index a90c603a8d02937fd41bac6a7e72d2891f86bfaa..132d1ddb2238179466f45a599b1ccd687f9ee3f8 100644 (file)
@@ -1767,7 +1767,7 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
        if (scanning_global_lru(sc))
                low = inactive_anon_is_low_global(zone);
        else
-               low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
+               low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
        return low;
 }
 #else
@@ -1810,7 +1810,7 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
        if (scanning_global_lru(sc))
                low = inactive_file_is_low_global(zone);
        else
-               low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
+               low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
        return low;
 }
 
index c8cf9391417ec9442b47f31e3c3cde891cfeaf1d..bc252862458385ef5d37367a643fcdf9ef5ad9af 100644 (file)
@@ -470,10 +470,12 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
        struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 
-       if (change & IFF_ALLMULTI)
-               dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
-       if (change & IFF_PROMISC)
-               dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
+       if (dev->flags & IFF_UP) {
+               if (change & IFF_ALLMULTI)
+                       dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
+               if (change & IFF_PROMISC)
+                       dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
+       }
 }
 
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
index bf2a333ca7c7651812f5318d644d19d185efff61..5449294bdd5e7babc41b20d5bfcb523d97b74e1d 100644 (file)
@@ -102,16 +102,15 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
        unsigned int n;
 
        n = max(size, nlbufsiz);
-       skb = alloc_skb(n, GFP_ATOMIC);
+       skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
        if (!skb) {
-               pr_debug("cannot alloc whole buffer of size %ub!\n", n);
                if (n > size) {
                        /* try to allocate only as much as we need for
                         * current packet */
                        skb = alloc_skb(size, GFP_ATOMIC);
                        if (!skb)
-                               pr_debug("cannot even allocate "
-                                        "buffer of size %ub\n", size);
+                               pr_debug("cannot even allocate buffer of size %ub\n",
+                                        size);
                }
        }
 
index 909ecb3c2a3344077332bfc9fb66fd7f0bfa8031..039d51e6c284e7ab655319d399b9d40060357dcf 100644 (file)
@@ -872,12 +872,8 @@ static void neigh_timer_handler(unsigned long arg)
        now = jiffies;
        next = now + HZ;
 
-       if (!(state & NUD_IN_TIMER)) {
-#ifndef CONFIG_SMP
-               printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
-#endif
+       if (!(state & NUD_IN_TIMER))
                goto out;
-       }
 
        if (state & NUD_REACHABLE) {
                if (time_before_eq(now,
index ca4db40e75b84becaab9c9acef48e5677d202a6c..18a3cebb753d39d57b2962f991f32fb048d76c29 100644 (file)
@@ -189,6 +189,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
         * aligned memory blocks, unless SLUB/SLAB debug is enabled.
         * Both skb->head and skb_shared_info are cache line aligned.
         */
+       size = SKB_DATA_ALIGN(size);
        size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
        data = kmalloc_node_track_caller(size, gfp_mask, node);
        if (!data)
index 332639b56f4d76e93888006aedf6832df4ef47c3..90a919afbed79ee5998f510badfaaa199a441086 100644 (file)
@@ -433,6 +433,7 @@ exit:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
        return NULL;
 put_and_exit:
+       bh_unlock_sock(newsk);
        sock_put(newsk);
        goto exit;
 }
index db8d22db425f6917b23d8d31661905ab58d3ef2d..a639967eb727284a668354374d9bc4103a7ca1d9 100644 (file)
@@ -395,7 +395,6 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
                        config = clusterip_config_init(cipinfo,
                                                        e->ip.dst.s_addr, dev);
                        if (!config) {
-                               pr_info("cannot allocate config\n");
                                dev_put(dev);
                                return -ENOMEM;
                        }
index 446e0f467a17eed968b7a9cba12b001b05de1154..b5508151e5476a23d3285b2bf919dba3132bc322 100644 (file)
@@ -135,10 +135,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
         * due to slab allocator restrictions */
 
        n = max(size, nlbufsiz);
-       skb = alloc_skb(n, GFP_ATOMIC);
+       skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
        if (!skb) {
-               pr_debug("cannot alloc whole buffer %ub!\n", n);
-
                if (n > size) {
                        /* try to allocate only as much as we need for
                         * current packet */
index d1cb412c18e02665db35e95021f29ecae38bf35c..2133c30a4a5f4799be74c729dac78a9b579c6101 100644 (file)
@@ -400,11 +400,8 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
        *len = 0;
 
        *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
-       if (*octets == NULL) {
-               if (net_ratelimit())
-                       pr_notice("OOM in bsalg (%d)\n", __LINE__);
+       if (*octets == NULL)
                return 0;
-       }
 
        ptr = *octets;
        while (ctx->pointer < eoc) {
@@ -451,11 +448,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
                return 0;
 
        *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
-       if (*oid == NULL) {
-               if (net_ratelimit())
-                       pr_notice("OOM in bsalg (%d)\n", __LINE__);
+       if (*oid == NULL)
                return 0;
-       }
 
        optr = *oid;
 
@@ -728,8 +722,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
                if (*obj == NULL) {
                        kfree(id);
-                       if (net_ratelimit())
-                               pr_notice("OOM in bsalg (%d)\n", __LINE__);
                        return 0;
                }
                (*obj)->syntax.l[0] = l;
@@ -744,8 +736,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                if (*obj == NULL) {
                        kfree(p);
                        kfree(id);
-                       if (net_ratelimit())
-                               pr_notice("OOM in bsalg (%d)\n", __LINE__);
                        return 0;
                }
                memcpy((*obj)->syntax.c, p, len);
@@ -759,8 +749,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
                if (*obj == NULL) {
                        kfree(id);
-                       if (net_ratelimit())
-                               pr_notice("OOM in bsalg (%d)\n", __LINE__);
                        return 0;
                }
                if (!asn1_null_decode(ctx, end)) {
@@ -780,8 +768,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                if (*obj == NULL) {
                        kfree(lp);
                        kfree(id);
-                       if (net_ratelimit())
-                               pr_notice("OOM in bsalg (%d)\n", __LINE__);
                        return 0;
                }
                memcpy((*obj)->syntax.ul, lp, len);
@@ -801,8 +787,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                if (*obj == NULL) {
                        kfree(p);
                        kfree(id);
-                       if (net_ratelimit())
-                               pr_notice("OOM in bsalg (%d)\n", __LINE__);
                        return 0;
                }
                memcpy((*obj)->syntax.uc, p, len);
@@ -819,8 +803,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
                if (*obj == NULL) {
                        kfree(id);
-                       if (net_ratelimit())
-                               pr_notice("OOM in bsalg (%d)\n", __LINE__);
                        return 0;
                }
                (*obj)->syntax.ul[0] = ul;
index 0ea10eefa60f005911b0849dd14218a1eb9ccf1d..a7443159c400450e73ff9b83230fe40a48d0da39 100644 (file)
@@ -1510,6 +1510,7 @@ exit:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
        return NULL;
 put_and_exit:
+       bh_unlock_sock(newsk);
        sock_put(newsk);
        goto exit;
 }
@@ -2339,7 +2340,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
        }
 }
 
-static int tcp_seq_open(struct inode *inode, struct file *file)
+int tcp_seq_open(struct inode *inode, struct file *file)
 {
        struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
        struct tcp_iter_state *s;
@@ -2355,23 +2356,19 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
        s->last_pos             = 0;
        return 0;
 }
+EXPORT_SYMBOL(tcp_seq_open);
 
 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
 {
        int rc = 0;
        struct proc_dir_entry *p;
 
-       afinfo->seq_fops.open           = tcp_seq_open;
-       afinfo->seq_fops.read           = seq_read;
-       afinfo->seq_fops.llseek         = seq_lseek;
-       afinfo->seq_fops.release        = seq_release_net;
-
        afinfo->seq_ops.start           = tcp_seq_start;
        afinfo->seq_ops.next            = tcp_seq_next;
        afinfo->seq_ops.stop            = tcp_seq_stop;
 
        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
-                            &afinfo->seq_fops, afinfo);
+                            afinfo->seq_fops, afinfo);
        if (!p)
                rc = -ENOMEM;
        return rc;
@@ -2520,12 +2517,18 @@ out:
        return 0;
 }
 
+static const struct file_operations tcp_afinfo_seq_fops = {
+       .owner   = THIS_MODULE,
+       .open    = tcp_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_net
+};
+
 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
        .name           = "tcp",
        .family         = AF_INET,
-       .seq_fops       = {
-               .owner          = THIS_MODULE,
-       },
+       .seq_fops       = &tcp_afinfo_seq_fops,
        .seq_ops        = {
                .show           = tcp4_seq_show,
        },
index ebaa96bd346475dd550baadc9e32dd7a4e5d8ef6..ab0966df1e2a8aec9e4ecb40e77332cbebbde466 100644 (file)
@@ -1397,6 +1397,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
        nf_reset(skb);
 
        if (up->encap_type) {
+               int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+
                /*
                 * This is an encapsulation socket so pass the skb to
                 * the socket's udp_encap_rcv() hook. Otherwise, just
@@ -1409,11 +1411,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                 */
 
                /* if we're overly short, let UDP handle it */
-               if (skb->len > sizeof(struct udphdr) &&
-                   up->encap_rcv != NULL) {
+               encap_rcv = ACCESS_ONCE(up->encap_rcv);
+               if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
                        int ret;
 
-                       ret = (*up->encap_rcv)(sk, skb);
+                       ret = encap_rcv(sk, skb);
                        if (ret <= 0) {
                                UDP_INC_STATS_BH(sock_net(sk),
                                                 UDP_MIB_INDATAGRAMS,
@@ -2037,7 +2039,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
                spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
 }
 
-static int udp_seq_open(struct inode *inode, struct file *file)
+int udp_seq_open(struct inode *inode, struct file *file)
 {
        struct udp_seq_afinfo *afinfo = PDE(inode)->data;
        struct udp_iter_state *s;
@@ -2053,6 +2055,7 @@ static int udp_seq_open(struct inode *inode, struct file *file)
        s->udp_table            = afinfo->udp_table;
        return err;
 }
+EXPORT_SYMBOL(udp_seq_open);
 
 /* ------------------------------------------------------------------------ */
 int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
@@ -2060,17 +2063,12 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
        struct proc_dir_entry *p;
        int rc = 0;
 
-       afinfo->seq_fops.open           = udp_seq_open;
-       afinfo->seq_fops.read           = seq_read;
-       afinfo->seq_fops.llseek         = seq_lseek;
-       afinfo->seq_fops.release        = seq_release_net;
-
        afinfo->seq_ops.start           = udp_seq_start;
        afinfo->seq_ops.next            = udp_seq_next;
        afinfo->seq_ops.stop            = udp_seq_stop;
 
        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
-                            &afinfo->seq_fops, afinfo);
+                            afinfo->seq_fops, afinfo);
        if (!p)
                rc = -ENOMEM;
        return rc;
@@ -2120,14 +2118,20 @@ int udp4_seq_show(struct seq_file *seq, void *v)
        return 0;
 }
 
+static const struct file_operations udp_afinfo_seq_fops = {
+       .owner    = THIS_MODULE,
+       .open     = udp_seq_open,
+       .read     = seq_read,
+       .llseek   = seq_lseek,
+       .release  = seq_release_net
+};
+
 /* ------------------------------------------------------------------------ */
 static struct udp_seq_afinfo udp4_seq_afinfo = {
        .name           = "udp",
        .family         = AF_INET,
        .udp_table      = &udp_table,
-       .seq_fops       = {
-               .owner  =       THIS_MODULE,
-       },
+       .seq_fops       = &udp_afinfo_seq_fops,
        .seq_ops        = {
                .show           = udp4_seq_show,
        },
index aee9963f7f5a497efc06429d3ab730e9f3efc999..08383eb542087a9de6699bc5179291b446d37b79 100644 (file)
@@ -71,13 +71,20 @@ static struct inet_protosw udplite4_protosw = {
 };
 
 #ifdef CONFIG_PROC_FS
+
+static const struct file_operations udplite_afinfo_seq_fops = {
+       .owner    = THIS_MODULE,
+       .open     = udp_seq_open,
+       .read     = seq_read,
+       .llseek   = seq_lseek,
+       .release  = seq_release_net
+};
+
 static struct udp_seq_afinfo udplite4_seq_afinfo = {
        .name           = "udplite",
        .family         = AF_INET,
        .udp_table      = &udplite_table,
-       .seq_fops       = {
-               .owner  =       THIS_MODULE,
-       },
+       .seq_fops       = &udplite_afinfo_seq_fops,
        .seq_ops        = {
                .show           = udp4_seq_show,
        },
index 30fcee465448618b326f8cb9172deeadfc055e62..8992cf6651d47da90de5d3c76832672de50f547c 100644 (file)
@@ -100,9 +100,16 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
                .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
        };
        const void *sk = strict ? &fake_sk : NULL;
-
-       *dst = ip6_route_output(net, sk, &fl->u.ip6);
-       return (*dst)->error;
+       struct dst_entry *result;
+       int err;
+
+       result = ip6_route_output(net, sk, &fl->u.ip6);
+       err = result->error;
+       if (err)
+               dst_release(result);
+       else
+               *dst = result;
+       return err;
 }
 
 __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
index e8762c73b170f3c73f675e7b49d6a13c5d3a6174..38f00b0298d3f53327f2e35447e8ab97a1568e83 100644 (file)
@@ -182,7 +182,6 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
        return container_of(q, struct nf_ct_frag6_queue, q);
 
 oom:
-       pr_debug("Can't alloc new queue\n");
        return NULL;
 }
 
@@ -370,10 +369,10 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
                struct sk_buff *clone;
                int i, plen = 0;
 
-               if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
-                       pr_debug("Can't alloc skb\n");
+               clone = alloc_skb(0, GFP_ATOMIC);
+               if (clone == NULL)
                        goto out_oom;
-               }
+
                clone->next = head->next;
                head->next = clone;
                skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
index 10b2b3165a1aff4ebc6cde947aae64a687b11a5b..36131d122a6f3f9007776ff343a9197bd0430b0a 100644 (file)
@@ -2161,12 +2161,18 @@ out:
        return 0;
 }
 
+static const struct file_operations tcp6_afinfo_seq_fops = {
+       .owner   = THIS_MODULE,
+       .open    = tcp_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release_net
+};
+
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
        .name           = "tcp6",
        .family         = AF_INET6,
-       .seq_fops       = {
-               .owner          = THIS_MODULE,
-       },
+       .seq_fops       = &tcp6_afinfo_seq_fops,
        .seq_ops        = {
                .show           = tcp6_seq_show,
        },
index f4ca0a5b3457d857ce810b78f2d7f890d11470f8..846f4757eb8d46394a604595be0698d485ae1ab0 100644 (file)
@@ -1424,13 +1424,19 @@ int udp6_seq_show(struct seq_file *seq, void *v)
        return 0;
 }
 
+static const struct file_operations udp6_afinfo_seq_fops = {
+       .owner    = THIS_MODULE,
+       .open     = udp_seq_open,
+       .read     = seq_read,
+       .llseek   = seq_lseek,
+       .release  = seq_release_net
+};
+
 static struct udp_seq_afinfo udp6_seq_afinfo = {
        .name           = "udp6",
        .family         = AF_INET6,
        .udp_table      = &udp_table,
-       .seq_fops       = {
-               .owner  =       THIS_MODULE,
-       },
+       .seq_fops       = &udp6_afinfo_seq_fops,
        .seq_ops        = {
                .show           = udp6_seq_show,
        },
index 986c4de5292eedf715b25b8bae98ee640917d97e..8889aa22ed47197129c9ceb6585d9c2d4a06bb5b 100644 (file)
@@ -93,13 +93,20 @@ void udplitev6_exit(void)
 }
 
 #ifdef CONFIG_PROC_FS
+
+static const struct file_operations udplite6_afinfo_seq_fops = {
+       .owner    = THIS_MODULE,
+       .open     = udp_seq_open,
+       .read     = seq_read,
+       .llseek   = seq_lseek,
+       .release  = seq_release_net
+};
+
 static struct udp_seq_afinfo udplite6_seq_afinfo = {
        .name           = "udplite6",
        .family         = AF_INET6,
        .udp_table      = &udplite_table,
-       .seq_fops       = {
-               .owner  =       THIS_MODULE,
-       },
+       .seq_fops       = &udplite6_afinfo_seq_fops,
        .seq_ops        = {
                .show           = udp6_seq_show,
        },
index 34b2ddeacb673b3b8d4f3e92bf726fc567c49a81..bf8d50c67931e8e588520b7c49ad49fca51b40b3 100644 (file)
@@ -397,6 +397,7 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
         * expect to send up next, dequeue it and any other
         * in-sequence packets behind it.
         */
+start:
        spin_lock_bh(&session->reorder_q.lock);
        skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
                if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
@@ -433,7 +434,7 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
                 */
                spin_unlock_bh(&session->reorder_q.lock);
                l2tp_recv_dequeue_skb(session, skb);
-               spin_lock_bh(&session->reorder_q.lock);
+               goto start;
        }
 
 out:
index 3346829ea07f09e28107c4b227d8b5d5763be30b..afca6c78948cfd77a1ad8b429f733482c4b11c1a 100644 (file)
@@ -180,17 +180,16 @@ next_hook:
                if (ret == 0)
                        ret = -EPERM;
        } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-                              verdict >> NF_VERDICT_QBITS);
-               if (ret < 0) {
-                       if (ret == -ECANCELED)
+               int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+                                               verdict >> NF_VERDICT_QBITS);
+               if (err < 0) {
+                       if (err == -ECANCELED)
                                goto next_hook;
-                       if (ret == -ESRCH &&
+                       if (err == -ESRCH &&
                           (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
                                goto next_hook;
                        kfree_skb(skb);
                }
-               ret = 0;
        }
        rcu_read_unlock();
        return ret;
index d7e86ef9d23aa0a11fbd5d80a8f64e4a01c7a8ea..86137b558f455d9e1740a977aa09ea1e294bb241 100644 (file)
@@ -1699,10 +1699,8 @@ ip_set_init(void)
 
        ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
                              GFP_KERNEL);
-       if (!ip_set_list) {
-               pr_err("ip_set: Unable to create ip_set_list\n");
+       if (!ip_set_list)
                return -ENOMEM;
-       }
 
        ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
        if (ret != 0) {
index 4f77bb16d22abea2f60ea479318329bbab769a49..093cc327020fba6f09a6b4bfcabfc9386d892d02 100644 (file)
@@ -188,14 +188,13 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 }
 
 
-static inline int
+static inline void
 ip_vs_set_state(struct ip_vs_conn *cp, int direction,
                const struct sk_buff *skb,
                struct ip_vs_proto_data *pd)
 {
-       if (unlikely(!pd->pp->state_transition))
-               return 0;
-       return pd->pp->state_transition(cp, direction, skb, pd);
+       if (likely(pd->pp->state_transition))
+               pd->pp->state_transition(cp, direction, skb, pd);
 }
 
 static inline int
@@ -530,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
           a cache_bypass connection entry */
        ipvs = net_ipvs(net);
        if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
-               int ret, cs;
+               int ret;
                struct ip_vs_conn *cp;
                unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
                                      iph.protocol == IPPROTO_UDP)?
@@ -557,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                ip_vs_in_stats(cp, skb);
 
                /* set state */
-               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
+               ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 
                /* transmit the first SYN packet */
                ret = cp->packet_xmit(skb, cp, pd->pp);
@@ -1490,7 +1489,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        struct ip_vs_protocol *pp;
        struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
-       int ret, restart, pkts;
+       int ret, pkts;
        struct netns_ipvs *ipvs;
 
        /* Already marked as IPVS request or reply? */
@@ -1591,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        }
 
        ip_vs_in_stats(cp, skb);
-       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
+       ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
        if (cp->packet_xmit)
                ret = cp->packet_xmit(skb, cp, pp);
                /* do not touch skb anymore */
@@ -1878,10 +1877,9 @@ static int __net_init __ip_vs_init(struct net *net)
        struct netns_ipvs *ipvs;
 
        ipvs = net_generic(net, ip_vs_net_id);
-       if (ipvs == NULL) {
-               pr_err("%s(): no memory.\n", __func__);
+       if (ipvs == NULL)
                return -ENOMEM;
-       }
+
        /* Hold the beast until a service is registerd */
        ipvs->enable = 0;
        ipvs->net = net;
index e3be48bf4dcdb0779c0a20072e43839b8e01d9fc..008bf97cc91a58b14a0ef9fd6924eb6f71063f11 100644 (file)
@@ -856,15 +856,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
        }
 
        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
-       if (dest == NULL) {
-               pr_err("%s(): no memory.\n", __func__);
+       if (dest == NULL)
                return -ENOMEM;
-       }
+
        dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-       if (!dest->stats.cpustats) {
-               pr_err("%s() alloc_percpu failed\n", __func__);
+       if (!dest->stats.cpustats)
                goto err_alloc;
-       }
 
        dest->af = svc->af;
        dest->protocol = svc->protocol;
@@ -1168,10 +1165,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
                goto out_err;
        }
        svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-       if (!svc->stats.cpustats) {
-               pr_err("%s() alloc_percpu failed\n", __func__);
+       if (!svc->stats.cpustats)
                goto out_err;
-       }
 
        /* I'm the first user of the service */
        atomic_set(&svc->usecnt, 0);
@@ -3326,10 +3321,8 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        int ret = 0, cmd;
        int need_full_svc = 0, need_full_dest = 0;
        struct net *net;
-       struct netns_ipvs *ipvs;
 
        net = skb_sknet(skb);
-       ipvs = net_ipvs(net);
        cmd = info->genlhdr->cmd;
 
        mutex_lock(&__ip_vs_mutex);
@@ -3421,10 +3414,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        void *reply;
        int ret, cmd, reply_cmd;
        struct net *net;
-       struct netns_ipvs *ipvs;
 
        net = skb_sknet(skb);
-       ipvs = net_ipvs(net);
        cmd = info->genlhdr->cmd;
 
        if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3720,10 +3711,9 @@ int __net_init ip_vs_control_net_init(struct net *net)
 
        /* procfs stats */
        ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-       if (!ipvs->tot_stats.cpustats) {
-               pr_err("%s(): alloc_percpu.\n", __func__);
+       if (!ipvs->tot_stats.cpustats)
                return -ENOMEM;
-       }
+
        spin_lock_init(&ipvs->tot_stats.lock);
 
        proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
index 95fd0d14200b338a41e0a580e890e85ea5ec5e1e..1c269e56200ad2e67b49cf97dd7a04d78aa019dc 100644 (file)
@@ -150,10 +150,9 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
        /* allocate the DH table for this service */
        tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
                      GFP_ATOMIC);
-       if (tbl == NULL) {
-               pr_err("%s(): no memory\n", __func__);
+       if (tbl == NULL)
                return -ENOMEM;
-       }
+
        svc->sched_data = tbl;
        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
                  "current service\n",
index 4490a32ad5b2fbfb3b47f3df0b21e54b30020597..538d74ee4f68bc18e7bb379d7432388aba305425 100644 (file)
@@ -52,8 +52,9 @@
  * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
  * First port is set to the default port.
  */
+static unsigned int ports_count = 1;
 static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
-module_param_array(ports, ushort, NULL, 0);
+module_param_array(ports, ushort, &ports_count, 0444);
 MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
 
 
@@ -449,7 +450,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
        if (ret)
                goto err_exit;
 
-       for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+       for (i = 0; i < ports_count; i++) {
                if (!ports[i])
                        continue;
                ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
index 87e40ea77a95244e6ca94c21c9475ed296e21d1b..0f16283fd05854fccc68fad349c7f29509926252 100644 (file)
@@ -202,10 +202,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
        en = ip_vs_lblc_get(dest->af, tbl, daddr);
        if (!en) {
                en = kmalloc(sizeof(*en), GFP_ATOMIC);
-               if (!en) {
-                       pr_err("%s(): no memory\n", __func__);
+               if (!en)
                        return NULL;
-               }
 
                en->af = dest->af;
                ip_vs_addr_copy(dest->af, &en->addr, daddr);
@@ -345,10 +343,9 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
         *    Allocate the ip_vs_lblc_table for this service
         */
        tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
-       if (tbl == NULL) {
-               pr_err("%s(): no memory\n", __func__);
+       if (tbl == NULL)
                return -ENOMEM;
-       }
+
        svc->sched_data = tbl;
        IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
                  "current service\n", sizeof(*tbl));
index 90f618ab6ddac3b04fe118c3573cf0fad779f744..eec797f8cce705a1676caeb3c3078053ddcdf523 100644 (file)
@@ -112,10 +112,8 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
        }
 
        e = kmalloc(sizeof(*e), GFP_ATOMIC);
-       if (e == NULL) {
-               pr_err("%s(): no memory\n", __func__);
+       if (e == NULL)
                return NULL;
-       }
 
        atomic_inc(&dest->refcnt);
        e->dest = dest;
@@ -373,10 +371,8 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
        en = ip_vs_lblcr_get(dest->af, tbl, daddr);
        if (!en) {
                en = kmalloc(sizeof(*en), GFP_ATOMIC);
-               if (!en) {
-                       pr_err("%s(): no memory\n", __func__);
+               if (!en)
                        return NULL;
-               }
 
                en->af = dest->af;
                ip_vs_addr_copy(dest->af, &en->addr, daddr);
@@ -516,10 +512,9 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
         *    Allocate the ip_vs_lblcr_table for this service
         */
        tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
-       if (tbl == NULL) {
-               pr_err("%s(): no memory\n", __func__);
+       if (tbl == NULL)
                return -ENOMEM;
-       }
+
        svc->sched_data = tbl;
        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
                  "current service\n", sizeof(*tbl));
index f454c80df0a7868d29f639fed1555225462a36d6..022e77e1e766450136340710a0e6c4b063e9a212 100644 (file)
@@ -127,7 +127,7 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
        nf_conntrack_alter_reply(ct, &new_tuple);
 }
 
-int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+int ip_vs_confirm_conntrack(struct sk_buff *skb)
 {
        return nf_conntrack_confirm(skb);
 }
index 52d073c105e9ee7af155b1a9439017e9bc74b414..85312939695f86f59357039799cb14364e1f638e 100644 (file)
@@ -74,10 +74,9 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
        struct ip_vs_proto_data *pd =
                        kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
 
-       if (!pd) {
-               pr_err("%s(): no memory.\n", __func__);
+       if (!pd)
                return -ENOMEM;
-       }
+
        pd->pp = pp;    /* For speed issues */
        pd->next = ipvs->proto_data_table[hash];
        ipvs->proto_data_table[hash] = pd;
index d12ed53ec95ff6a67950ce39b902e6816d40b249..1fbf7a2816f5ade317a747737840588d41621bf0 100644 (file)
@@ -906,7 +906,7 @@ static const char *sctp_state_name(int state)
        return "?";
 }
 
-static inline int
+static inline void
 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                int direction, const struct sk_buff *skb)
 {
@@ -924,7 +924,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
        sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
                                sizeof(_sctpch), &_sctpch);
        if (sch == NULL)
-               return 0;
+               return;
 
        chunk_type = sch->type;
        /*
@@ -993,21 +993,15 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                cp->timeout = pd->timeout_table[cp->state = next_state];
        else    /* What to do ? */
                cp->timeout = sctp_timeouts[cp->state = next_state];
-
-       return 1;
 }
 
-static int
+static void
 sctp_state_transition(struct ip_vs_conn *cp, int direction,
                const struct sk_buff *skb, struct ip_vs_proto_data *pd)
 {
-       int ret = 0;
-
        spin_lock(&cp->lock);
-       ret = set_sctp_state(pd, cp, direction, skb);
+       set_sctp_state(pd, cp, direction, skb);
        spin_unlock(&cp->lock);
-
-       return ret;
 }
 
 static inline __u16 sctp_app_hashkey(__be16 port)
index c0cc341b840d38180a948c51694d3e4d877e10fb..ef8641f7af8300efae329a3a74cb4325eba1761f 100644 (file)
@@ -546,7 +546,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 /*
  *     Handle state transitions
  */
-static int
+static void
 tcp_state_transition(struct ip_vs_conn *cp, int direction,
                     const struct sk_buff *skb,
                     struct ip_vs_proto_data *pd)
@@ -561,13 +561,11 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 
        th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
        if (th == NULL)
-               return 0;
+               return;
 
        spin_lock(&cp->lock);
        set_tcp_state(pd, cp, direction, th);
        spin_unlock(&cp->lock);
-
-       return 1;
 }
 
 static inline __u16 tcp_app_hashkey(__be16 port)
index f1282cbe6fe3f92d9ff74bf5fe8fbe9ed4e31176..f4b7262896bbd272fb8d3cff5298455b1deff703 100644 (file)
@@ -454,18 +454,17 @@ static const char * udp_state_name(int state)
        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
 }
 
-static int
+static void
 udp_state_transition(struct ip_vs_conn *cp, int direction,
                     const struct sk_buff *skb,
                     struct ip_vs_proto_data *pd)
 {
        if (unlikely(!pd)) {
                pr_err("UDP no ns data\n");
-               return 0;
+               return;
        }
 
        cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
-       return 1;
 }
 
 static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
index b5e2556c581ad4c7ddabad5807d6af2d50add98f..33815f4fb451c42aeb0dad99cf157c37cc32dfbf 100644 (file)
@@ -147,10 +147,9 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
        /* allocate the SH table for this service */
        tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
                      GFP_ATOMIC);
-       if (tbl == NULL) {
-               pr_err("%s(): no memory\n", __func__);
+       if (tbl == NULL)
                return -ENOMEM;
-       }
+
        svc->sched_data = tbl;
        IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
                  "current service\n",
index 1ef41f50723c04c13cfcff46ac1ca387aa6def7a..fd0d4e09876a6177d167af378ef503f6ca64d6a8 100644 (file)
@@ -85,10 +85,9 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
         *    Allocate the mark variable for WRR scheduling
         */
        mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
-       if (mark == NULL) {
-               pr_err("%s(): no memory\n", __func__);
+       if (mark == NULL)
                return -ENOMEM;
-       }
+
        mark->cl = &svc->destinations;
        mark->cw = 0;
        mark->mw = ip_vs_wrr_max_weight(svc);
index ee319a4338b0f72cd9deb10ce2223cca2bf0b367..aa2d7206ee8a064a5fbc57e88f655fb42e7fe21f 100644 (file)
@@ -339,7 +339,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
                                                                \
        (skb)->ipvs_property = 1;                               \
        if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))          \
-               __ret = ip_vs_confirm_conntrack(skb, cp);       \
+               __ret = ip_vs_confirm_conntrack(skb);           \
        if (__ret == NF_ACCEPT) {                               \
                nf_reset(skb);                                  \
                skb_forward_csum(skb);                          \
index 5acfaf59a9c3c26547f001d453ebd9a00a912f77..7202b0631cd6eb725debc17062a0d1f4a1087f41 100644 (file)
@@ -661,7 +661,6 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
         */
        ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
        if (ct == NULL) {
-               pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
                atomic_dec(&net->ct.count);
                return ERR_PTR(-ENOMEM);
        }
@@ -749,10 +748,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 
        ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
                                  hash);
-       if (IS_ERR(ct)) {
-               pr_debug("Can't allocate conntrack.\n");
+       if (IS_ERR(ct))
                return (struct nf_conntrack_tuple_hash *)ct;
-       }
 
        if (!l4proto->new(ct, skb, dataoff)) {
                nf_conntrack_free(ct);
index 2d8158acf6faf5298c84e4fdabc6d8f9847b737b..66b2c54c544f6bbf87c20500d97452122f2b2fcf 100644 (file)
@@ -307,17 +307,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
        n = max(inst_size, pkt_size);
        skb = alloc_skb(n, GFP_ATOMIC);
        if (!skb) {
-               pr_notice("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
-                       inst_size);
-
                if (n > pkt_size) {
                        /* try to allocate only as much as we need for current
                         * packet */
 
                        skb = alloc_skb(pkt_size, GFP_ATOMIC);
                        if (!skb)
-                               pr_err("nfnetlink_log: can't even alloc %u "
-                                      "bytes\n", pkt_size);
+                               pr_err("nfnetlink_log: can't even alloc %u bytes\n",
+                                      pkt_size);
                }
        }
 
index 3bdd443aaf154d7946abdd802bd080df4e2c8fad..f407ebc13481ae5caa0f634db643d034c7af7e6a 100644 (file)
@@ -122,14 +122,12 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
 
        info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
        if (!info->timer) {
-               pr_debug("couldn't alloc timer\n");
                ret = -ENOMEM;
                goto out;
        }
 
        info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
        if (!info->timer->attr.attr.name) {
-               pr_debug("couldn't alloc attribute name\n");
                ret = -ENOMEM;
                goto out_free_timer;
        }
index 9228ee0dc11a307a49d131e7f879c738351dcef5..dfd52bad1523f73535110e0d1b2c9f4a87c6545a 100644 (file)
@@ -176,10 +176,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
                ent = NULL;
        } else
                ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
-       if (!ent) {
-               if (net_ratelimit())
-                       pr_err("cannot allocate dsthash_ent\n");
-       } else {
+       if (ent) {
                memcpy(&ent->dst, dst, sizeof(ent->dst));
                spin_lock_init(&ent->lock);
 
index 03bb45adf2fcf5f76ff5540e8722fdf95726cc9b..82a6f34d39d012fb35d9a0d490503fcc2048e6e2 100644 (file)
@@ -335,7 +335,7 @@ struct packet_skb_cb {
        (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
        ((x)->kactive_blk_num+1) : 0)
 
-static inline struct packet_sock *pkt_sk(struct sock *sk)
+static struct packet_sock *pkt_sk(struct sock *sk)
 {
        return (struct packet_sock *)sk;
 }
@@ -477,7 +477,7 @@ static void *packet_lookup_frame(struct packet_sock *po,
        return h.raw;
 }
 
-static inline void *packet_current_frame(struct packet_sock *po,
+static void *packet_current_frame(struct packet_sock *po,
                struct packet_ring_buffer *rb,
                int status)
 {
@@ -715,7 +715,7 @@ out:
        spin_unlock(&po->sk.sk_receive_queue.lock);
 }
 
-static inline void prb_flush_block(struct tpacket_kbdq_core *pkc1,
+static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
                struct tpacket_block_desc *pbd1, __u32 status)
 {
        /* Flush everything minus the block header */
@@ -793,7 +793,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
        pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
 }
 
-static inline void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
+static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
 {
        pkc->reset_pending_on_curr_blk = 0;
 }
@@ -869,7 +869,7 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
  *         case and __packet_lookup_frame_in_block will check if block-0
  *         is free and can now be re-used.
  */
-static inline void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
+static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
                                  struct packet_sock *po)
 {
        pkc->reset_pending_on_curr_blk = 1;
@@ -940,36 +940,36 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
        BUG();
 }
 
-static inline int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
+static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
                                      struct tpacket_block_desc *pbd)
 {
        return TP_STATUS_USER & BLOCK_STATUS(pbd);
 }
 
-static inline int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
+static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
 {
        return pkc->reset_pending_on_curr_blk;
 }
 
-static inline void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
+static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
 {
        struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
        atomic_dec(&pkc->blk_fill_in_prog);
 }
 
-static inline void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
+static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
 {
        ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
 }
 
-static inline void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
+static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
 {
        ppd->hv1.tp_rxhash = 0;
 }
 
-static inline void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
+static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
 {
        if (vlan_tx_tag_present(pkc->skb)) {
@@ -991,7 +991,7 @@ static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
                prb_clear_rxhash(pkc, ppd);
 }
 
-static inline void prb_fill_curr_block(char *curr,
+static void prb_fill_curr_block(char *curr,
                                struct tpacket_kbdq_core *pkc,
                                struct tpacket_block_desc *pbd,
                                unsigned int len)
@@ -1071,7 +1071,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po,
        return NULL;
 }
 
-static inline void *packet_current_rx_frame(struct packet_sock *po,
+static void *packet_current_rx_frame(struct packet_sock *po,
                                            struct sk_buff *skb,
                                            int status, unsigned int len)
 {
@@ -1091,7 +1091,7 @@ static inline void *packet_current_rx_frame(struct packet_sock *po,
        }
 }
 
-static inline void *prb_lookup_block(struct packet_sock *po,
+static void *prb_lookup_block(struct packet_sock *po,
                                     struct packet_ring_buffer *rb,
                                     unsigned int previous,
                                     int status)
@@ -1104,7 +1104,7 @@ static inline void *prb_lookup_block(struct packet_sock *po,
        return pbd;
 }
 
-static inline int prb_previous_blk_num(struct packet_ring_buffer *rb)
+static int prb_previous_blk_num(struct packet_ring_buffer *rb)
 {
        unsigned int prev;
        if (rb->prb_bdqc.kactive_blk_num)
@@ -1115,7 +1115,7 @@ static inline int prb_previous_blk_num(struct packet_ring_buffer *rb)
 }
 
 /* Assumes caller has held the rx_queue.lock */
-static inline void *__prb_previous_block(struct packet_sock *po,
+static void *__prb_previous_block(struct packet_sock *po,
                                         struct packet_ring_buffer *rb,
                                         int status)
 {
@@ -1123,7 +1123,7 @@ static inline void *__prb_previous_block(struct packet_sock *po,
        return prb_lookup_block(po, rb, previous, status);
 }
 
-static inline void *packet_previous_rx_frame(struct packet_sock *po,
+static void *packet_previous_rx_frame(struct packet_sock *po,
                                             struct packet_ring_buffer *rb,
                                             int status)
 {
@@ -1133,7 +1133,7 @@ static inline void *packet_previous_rx_frame(struct packet_sock *po,
        return __prb_previous_block(po, rb, status);
 }
 
-static inline void packet_increment_rx_head(struct packet_sock *po,
+static void packet_increment_rx_head(struct packet_sock *po,
                                            struct packet_ring_buffer *rb)
 {
        switch (po->tp_version) {
@@ -1148,7 +1148,7 @@ static inline void packet_increment_rx_head(struct packet_sock *po,
        }
 }
 
-static inline void *packet_previous_frame(struct packet_sock *po,
+static void *packet_previous_frame(struct packet_sock *po,
                struct packet_ring_buffer *rb,
                int status)
 {
@@ -1156,7 +1156,7 @@ static inline void *packet_previous_frame(struct packet_sock *po,
        return packet_lookup_frame(po, rb, previous, status);
 }
 
-static inline void packet_increment_head(struct packet_ring_buffer *buff)
+static void packet_increment_head(struct packet_ring_buffer *buff)
 {
        buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
 }
@@ -1558,7 +1558,7 @@ out_free:
        return err;
 }
 
-static inline unsigned int run_filter(const struct sk_buff *skb,
+static unsigned int run_filter(const struct sk_buff *skb,
                                      const struct sock *sk,
                                      unsigned int res)
 {
@@ -2167,10 +2167,10 @@ out:
        return err;
 }
 
-static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
-                                              size_t reserve, size_t len,
-                                              size_t linear, int noblock,
-                                              int *err)
+static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
+                                       size_t reserve, size_t len,
+                                       size_t linear, int noblock,
+                                       int *err)
 {
        struct sk_buff *skb;
 
@@ -3494,7 +3494,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
        kfree(pg_vec);
 }
 
-static inline char *alloc_one_pg_vec_page(unsigned long order)
+static char *alloc_one_pg_vec_page(unsigned long order)
 {
        char *buffer = NULL;
        gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
index 5f03e4ea65bff5f137aebe122f843fb7e5c6a4ca..3e16c6abde4f4bea8a800dbecfd2d98068d37cf8 100644 (file)
@@ -1261,14 +1261,19 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
        struct x25_sock *x25 = x25_sk(sk);
        struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name;
        size_t copied;
-       int qbit, header_len = x25->neighbour->extended ?
-               X25_EXT_MIN_LEN : X25_STD_MIN_LEN;
-
+       int qbit, header_len;
        struct sk_buff *skb;
        unsigned char *asmptr;
        int rc = -ENOTCONN;
 
        lock_sock(sk);
+
+       if (x25->neighbour == NULL)
+               goto out;
+
+       header_len = x25->neighbour->extended ?
+               X25_EXT_MIN_LEN : X25_STD_MIN_LEN;
+
        /*
         * This works for seqpacket too. The receiver has ordered the queue for
         * us! We do one quick check first though
index 19c053b823035a48bc2e47f735bdf125aad8384e..4f554f20dc9761b4cb55c1126f55b0c1d638062d 100644 (file)
@@ -9,7 +9,7 @@ config IMA
        select CRYPTO_HMAC
        select CRYPTO_MD5
        select CRYPTO_SHA1
-       select TCG_TPM if !S390
+       select TCG_TPM if !S390 && !UML
        select TCG_TIS if TCG_TPM
        help
          The Trusted Computing Group(TCG) runtime Integrity
index 150911c7ff087d4578458c3d89365fbb21ee3e7b..c47d3ce6c7333146e44eb7894ea56a5dbb427f7f 100644 (file)
@@ -966,6 +966,9 @@ static bool tomoyo_manager(void)
        return found;
 }
 
+static struct tomoyo_domain_info *tomoyo_find_domain_by_qid
+(unsigned int serial);
+
 /**
  * tomoyo_select_domain - Parse select command.
  *
@@ -999,6 +1002,8 @@ static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
        } else if (!strncmp(data, "domain=", 7)) {
                if (tomoyo_domain_def(data + 7))
                        domain = tomoyo_find_domain(data + 7);
+       } else if (sscanf(data, "Q=%u", &pid) == 1) {
+               domain = tomoyo_find_domain_by_qid(pid);
        } else
                return false;
        head->w.domain = domain;
@@ -1894,6 +1899,7 @@ static DECLARE_WAIT_QUEUE_HEAD(tomoyo_answer_wait);
 /* Structure for query. */
 struct tomoyo_query {
        struct list_head list;
+       struct tomoyo_domain_info *domain;
        char *query;
        size_t query_len;
        unsigned int serial;
@@ -2044,6 +2050,7 @@ int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
                goto out;
        }
        len = tomoyo_round2(entry.query_len);
+       entry.domain = r->domain;
        spin_lock(&tomoyo_query_list_lock);
        if (tomoyo_memory_quota[TOMOYO_MEMORY_QUERY] &&
            tomoyo_memory_used[TOMOYO_MEMORY_QUERY] + len
@@ -2090,6 +2097,29 @@ out:
        return error;
 }
 
+/**
+ * tomoyo_find_domain_by_qid - Get domain by query id.
+ *
+ * @serial: Query ID assigned by tomoyo_supervisor().
+ *
+ * Returns pointer to "struct tomoyo_domain_info" if found, NULL otherwise.
+ */
+static struct tomoyo_domain_info *tomoyo_find_domain_by_qid
+(unsigned int serial)
+{
+       struct tomoyo_query *ptr;
+       struct tomoyo_domain_info *domain = NULL;
+       spin_lock(&tomoyo_query_list_lock);
+       list_for_each_entry(ptr, &tomoyo_query_list, list) {
+               if (ptr->serial != serial || ptr->answer)
+                       continue;
+               domain = ptr->domain;
+               break;
+       }
+       spin_unlock(&tomoyo_query_list_lock);
+       return domain;
+}
+
 /**
  * tomoyo_poll_query - poll() for /sys/kernel/security/tomoyo/query.
  *
index 1fef141ef8e720ea5f93a6849db18a282f5708d4..261a03c8a2095f36bcac187230e91e328bf11545 100644 (file)
@@ -59,7 +59,7 @@ config SOUND_OSS_CORE_PRECLAIM
 
 source "sound/oss/dmasound/Kconfig"
 
-if !M68K
+if !M68K && !UML
 
 menuconfig SND
        tristate "Advanced Linux Sound Architecture"
index 77dd0a13aecc40f59ac9361f241f376a61173934..d2f615ab177a7ca021d9f802d61ba2b57b10a7ca 100644 (file)
@@ -24,7 +24,7 @@ config SND_SGI_HAL2
 
 config SND_AU1X00
        tristate "Au1x00 AC97 Port Driver (DEPRECATED)"
-       depends on SOC_AU1000 || SOC_AU1100 || SOC_AU1500
+       depends on MIPS_ALCHEMY
        select SND_PCM
        select SND_AC97_CODEC
        help
index 6d592546e8fcc2bd2a0ea343baf929f0a930762f..e908a8123110d8011ffa5a3ed8a595a118dd862b 100644 (file)
@@ -3,7 +3,7 @@
 ##
 config SND_SOC_AU1XPSC
        tristate "SoC Audio for Au1200/Au1250/Au1550"
-       depends on SOC_AU1200 || SOC_AU1550
+       depends on MIPS_ALCHEMY
        help
          This option enables support for the Programmable Serial
          Controllers in AC97 and I2S mode, and the Descriptor-Based DMA