From: Benjamin Herrenschmidt Date: Fri, 11 Oct 2013 07:23:53 +0000 (+1100) Subject: Merge branch 'for-kvm' into next X-Git-Tag: firefly_0821_release~176^2~4907^2~56 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3ad26e5c4459d3793ad65bc8929037c70515df83;p=firefly-linux-kernel-4.4.55.git Merge branch 'for-kvm' into next Topic branch for commits that the KVM tree might want to pull in separately. Hand merged a few files due to conflicts with the LE stuff Signed-off-by: Benjamin Herrenschmidt --- 3ad26e5c4459d3793ad65bc8929037c70515df83 diff --cc arch/powerpc/include/asm/ppc_asm.h index ce05bba0bfc6,140f67090f0b..8deaaad3b32f --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@@ -124,110 -124,14 +124,25 @@@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPL #define REST_16VRS(n,b,base) REST_8VRS(n,b,base); REST_8VRS(n+8,b,base) #define REST_32VRS(n,b,base) REST_16VRS(n,b,base); REST_16VRS(n+16,b,base) - /* Save/restore FPRs, VRs and VSRs from their checkpointed backups in - * thread_struct: - */ - #define SAVE_FPR_TRANSACT(n, base) stfd n,THREAD_TRANSACT_FPR0+ \ - 8*TS_FPRWIDTH*(n)(base) - #define SAVE_2FPRS_TRANSACT(n, base) SAVE_FPR_TRANSACT(n, base); \ - SAVE_FPR_TRANSACT(n+1, base) - #define SAVE_4FPRS_TRANSACT(n, base) SAVE_2FPRS_TRANSACT(n, base); \ - SAVE_2FPRS_TRANSACT(n+2, base) - #define SAVE_8FPRS_TRANSACT(n, base) SAVE_4FPRS_TRANSACT(n, base); \ - SAVE_4FPRS_TRANSACT(n+4, base) - #define SAVE_16FPRS_TRANSACT(n, base) SAVE_8FPRS_TRANSACT(n, base); \ - SAVE_8FPRS_TRANSACT(n+8, base) - #define SAVE_32FPRS_TRANSACT(n, base) SAVE_16FPRS_TRANSACT(n, base); \ - SAVE_16FPRS_TRANSACT(n+16, base) - - #define REST_FPR_TRANSACT(n, base) lfd n,THREAD_TRANSACT_FPR0+ \ - 8*TS_FPRWIDTH*(n)(base) - #define REST_2FPRS_TRANSACT(n, base) REST_FPR_TRANSACT(n, base); \ - REST_FPR_TRANSACT(n+1, base) - #define REST_4FPRS_TRANSACT(n, base) REST_2FPRS_TRANSACT(n, base); \ - REST_2FPRS_TRANSACT(n+2, base) - #define REST_8FPRS_TRANSACT(n, base) REST_4FPRS_TRANSACT(n, base); \ - REST_4FPRS_TRANSACT(n+4, base) - #define REST_16FPRS_TRANSACT(n, base) REST_8FPRS_TRANSACT(n, base); \ - REST_8FPRS_TRANSACT(n+8, base) - #define REST_32FPRS_TRANSACT(n, base) REST_16FPRS_TRANSACT(n, base); \ - REST_16FPRS_TRANSACT(n+16, base) - - - #define SAVE_VR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VR0+(16*(n)); \ - stvx n,b,base - #define SAVE_2VRS_TRANSACT(n,b,base) SAVE_VR_TRANSACT(n,b,base); \ - SAVE_VR_TRANSACT(n+1,b,base) - #define SAVE_4VRS_TRANSACT(n,b,base) SAVE_2VRS_TRANSACT(n,b,base); \ - SAVE_2VRS_TRANSACT(n+2,b,base) - #define SAVE_8VRS_TRANSACT(n,b,base) SAVE_4VRS_TRANSACT(n,b,base); \ - SAVE_4VRS_TRANSACT(n+4,b,base) - #define SAVE_16VRS_TRANSACT(n,b,base) SAVE_8VRS_TRANSACT(n,b,base); \ - SAVE_8VRS_TRANSACT(n+8,b,base) - #define SAVE_32VRS_TRANSACT(n,b,base) SAVE_16VRS_TRANSACT(n,b,base); \ - SAVE_16VRS_TRANSACT(n+16,b,base) - - #define REST_VR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VR0+(16*(n)); \ - lvx n,b,base - #define REST_2VRS_TRANSACT(n,b,base) REST_VR_TRANSACT(n,b,base); \ - REST_VR_TRANSACT(n+1,b,base) - #define REST_4VRS_TRANSACT(n,b,base) REST_2VRS_TRANSACT(n,b,base); \ - REST_2VRS_TRANSACT(n+2,b,base) - #define REST_8VRS_TRANSACT(n,b,base) REST_4VRS_TRANSACT(n,b,base); \ - REST_4VRS_TRANSACT(n+4,b,base) - #define REST_16VRS_TRANSACT(n,b,base) REST_8VRS_TRANSACT(n,b,base); \ - REST_8VRS_TRANSACT(n+8,b,base) - #define REST_32VRS_TRANSACT(n,b,base) REST_16VRS_TRANSACT(n,b,base); \ - REST_16VRS_TRANSACT(n+16,b,base) - +#ifdef __BIG_ENDIAN__ +#define STXVD2X_ROT(n,b,base) STXVD2X(n,b,base) +#define LXVD2X_ROT(n,b,base) LXVD2X(n,b,base) +#else +#define STXVD2X_ROT(n,b,base) XXSWAPD(n,n); \ + STXVD2X(n,b,base); \ + XXSWAPD(n,n) + +#define LXVD2X_ROT(n,b,base) LXVD2X(n,b,base); \ + XXSWAPD(n,n) +#endif - - #define SAVE_VSR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VSR0+(16*(n)); \ - STXVD2X_ROT(n,R##base,R##b) - #define SAVE_2VSRS_TRANSACT(n,b,base) SAVE_VSR_TRANSACT(n,b,base); \ - SAVE_VSR_TRANSACT(n+1,b,base) - #define SAVE_4VSRS_TRANSACT(n,b,base) SAVE_2VSRS_TRANSACT(n,b,base); \ - SAVE_2VSRS_TRANSACT(n+2,b,base) - #define SAVE_8VSRS_TRANSACT(n,b,base) SAVE_4VSRS_TRANSACT(n,b,base); \ - SAVE_4VSRS_TRANSACT(n+4,b,base) - #define SAVE_16VSRS_TRANSACT(n,b,base) SAVE_8VSRS_TRANSACT(n,b,base); \ - SAVE_8VSRS_TRANSACT(n+8,b,base) - #define SAVE_32VSRS_TRANSACT(n,b,base) SAVE_16VSRS_TRANSACT(n,b,base); \ - SAVE_16VSRS_TRANSACT(n+16,b,base) - - #define REST_VSR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VSR0+(16*(n)); \ - LXVD2X_ROT(n,R##base,R##b) - #define REST_2VSRS_TRANSACT(n,b,base) REST_VSR_TRANSACT(n,b,base); \ - REST_VSR_TRANSACT(n+1,b,base) - #define REST_4VSRS_TRANSACT(n,b,base) REST_2VSRS_TRANSACT(n,b,base); \ - REST_2VSRS_TRANSACT(n+2,b,base) - #define REST_8VSRS_TRANSACT(n,b,base) REST_4VSRS_TRANSACT(n,b,base); \ - REST_4VSRS_TRANSACT(n+4,b,base) - #define REST_16VSRS_TRANSACT(n,b,base) REST_8VSRS_TRANSACT(n,b,base); \ - REST_8VSRS_TRANSACT(n+8,b,base) - #define REST_32VSRS_TRANSACT(n,b,base) REST_16VSRS_TRANSACT(n,b,base); \ - REST_16VSRS_TRANSACT(n+16,b,base) - /* Save the lower 32 VSRs in the thread VSR region */ - #define SAVE_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); \ - STXVD2X_ROT(n,R##base,R##b) -#define SAVE_VSR(n,b,base) li b,16*(n); STXVD2X(n,R##base,R##b) ++#define SAVE_VSR(n,b,base) li b,16*(n); STXVD2X_ROT(n,R##base,R##b) #define SAVE_2VSRS(n,b,base) SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base) #define SAVE_4VSRS(n,b,base) SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base) #define SAVE_8VSRS(n,b,base) SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base) #define SAVE_16VSRS(n,b,base) SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base) #define SAVE_32VSRS(n,b,base) SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base) - #define REST_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); \ - LXVD2X_ROT(n,R##base,R##b) -#define REST_VSR(n,b,base) li b,16*(n); LXVD2X(n,R##base,R##b) ++#define REST_VSR(n,b,base) li b,16*(n); LXVD2X_ROT(n,R##base,R##b) #define REST_2VSRS(n,b,base) REST_VSR(n,b,base); REST_VSR(n+1,b,base) #define REST_4VSRS(n,b,base) REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base) #define REST_8VSRS(n,b,base) REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base) diff --cc arch/powerpc/include/asm/processor.h index 82c6ee9df9a1,ea88e7bd4a34..c1583070937d --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@@ -152,8 -142,22 +152,20 @@@ typedef struct unsigned long seg; } mm_segment_t; - #define TS_FPR(i) fpr[i][TS_FPROFFSET] - #define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET] -#define TS_FPROFFSET 0 -#define TS_VSRLOWOFFSET 1 + #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET] + #define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET] + + /* FP and VSX 0-31 register set */ + struct thread_fp_state { + u64 fpr[32][TS_FPRWIDTH] __attribute__((aligned(16))); + u64 fpscr; /* Floating point status */ + }; + + /* Complete AltiVec register set including VSCR */ + struct thread_vr_state { + vector128 vr[32] __attribute__((aligned(16))); + vector128 vscr __attribute__((aligned(16))); + }; struct thread_struct { unsigned long ksp; /* Kernel stack pointer */ diff --cc arch/powerpc/kernel/align.c index 59f70adcbcd9,eaa16bc17e9d..6e3f9772aaba --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@@ -652,9 -658,9 +652,9 @@@ static int emulate_vsx(unsigned char __ flush_vsx_to_thread(current); if (reg < 32) - ptr = (char *) ¤t->thread.fpr[reg][0]; - ptr = (char *) ¤t->thread.TS_FPR(reg); ++ ptr = (char *) ¤t->thread.fp_state.fpr[reg][0]; else - ptr = (char *) ¤t->thread.vr[reg - 32]; + ptr = (char *) ¤t->thread.vr_state.vr[reg - 32]; lptr = (unsigned long *) ptr; @@@ -921,30 -878,26 +921,30 @@@ int fix_alignment(struct pt_regs *regs * get it from register values */ if (!(flags & ST)) { - data.ll = 0; - ret = 0; - p = (unsigned long) addr; + unsigned int start = 0; + switch (nb) { - case 8: - ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++)); case 4: - ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++)); + start = offsetof(union data, x32.low32); + break; case 2: - ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++)); - ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++)); - if (unlikely(ret)) - return -EFAULT; + start = offsetof(union data, x16.low16); + break; } + + data.ll = 0; + ret = 0; + p = (unsigned long)addr; + + for (i = 0; i < nb; i++) + ret |= __get_user_inatomic(data.v[start + i], + SWIZ_PTR(p++)); + + if (unlikely(ret)) + return -EFAULT; + } else if (flags & F) { - data.dd = current->thread.TS_FPR(reg); + data.ll = current->thread.TS_FPR(reg); if (flags & S) { /* Single-precision FP store requires conversion... */ #ifdef CONFIG_PPC_FPU diff --cc arch/powerpc/kernel/ptrace.c index 8d5d4e921a5e,238580043d85..1ca589c9ec6d --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@@ -1554,10 -1555,10 +1555,10 @@@ long arch_ptrace(struct task_struct *ch flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - tmp = ((unsigned long *)child->thread.fp_state.fpr) - [fpidx * TS_FPRWIDTH]; ++ memcpy(&tmp, &child->thread.fp_state.fpr, + sizeof(long)); else - tmp = child->thread.fpscr.val; + tmp = child->thread.fp_state.fpscr; } ret = put_user(tmp, datalp); break; @@@ -1587,10 -1588,10 +1588,10 @@@ flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - ((unsigned long *)child->thread.fp_state.fpr) - [fpidx * TS_FPRWIDTH] = data; ++ memcpy(&child->thread.fp_state.fpr, &data, + sizeof(long)); else - child->thread.fpscr.val = data; + child->thread.fp_state.fpscr = data; ret = 0; } break; diff --cc arch/powerpc/platforms/powernv/pci-ioda.c index a6531d2ff6c2,307015d9cd99..c639af7d4826 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@@ -454,10 -464,13 +464,13 @@@ static void pnv_ioda_setup_bus_dma(stru } } - static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp) + static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, + struct iommu_table *tbl, - u64 *startp, u64 *endp, bool rm) ++ __be64 *startp, __be64 *endp, bool rm) { - __be64 __iomem *invalidate = (__be64 __iomem *)tbl->it_index; - u64 __iomem *invalidate = rm ? - (u64 __iomem *)pe->tce_inval_reg_phys : - (u64 __iomem *)tbl->it_index; ++ __be64 __iomem *invalidate = rm ? ++ (__be64 __iomem *)pe->tce_inval_reg_phys : ++ (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; start = __pa(startp); @@@ -484,7 -497,10 +497,10 @@@ mb(); /* Ensure above stores are visible */ while (start <= end) { - __raw_writeq(cpu_to_be64(start), invalidate); + if (rm) - __raw_rm_writeq(start, invalidate); ++ __raw_rm_writeq(cpu_to_be64(start), invalidate); + else - __raw_writeq(start, invalidate); ++ __raw_writeq(cpu_to_be64(start), invalidate); start += inc; } @@@ -496,10 -512,12 +512,12 @@@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, struct iommu_table *tbl, - __be64 *startp, __be64 *endp) - u64 *startp, u64 *endp, bool rm) ++ __be64 *startp, __be64 *endp, bool rm) { unsigned long start, end, inc; - __be64 __iomem *invalidate = (__be64 __iomem *)tbl->it_index; - u64 __iomem *invalidate = rm ? - (u64 __iomem *)pe->tce_inval_reg_phys : - (u64 __iomem *)tbl->it_index; ++ __be64 __iomem *invalidate = rm ? ++ (__be64 __iomem *)pe->tce_inval_reg_phys : ++ (__be64 __iomem *)tbl->it_index; /* We'll invalidate DMA address in PE scope */ start = 0x2ul << 60; @@@ -515,13 -533,16 +533,16 @@@ mb(); while (start <= end) { - __raw_writeq(cpu_to_be64(start), invalidate); + if (rm) - __raw_rm_writeq(start, invalidate); ++ __raw_rm_writeq(cpu_to_be64(start), invalidate); + else - __raw_writeq(start, invalidate); ++ __raw_writeq(cpu_to_be64(start), invalidate); start += inc; } } void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp) - u64 *startp, u64 *endp, bool rm) ++ __be64 *startp, __be64 *endp, bool rm) { struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, tce32_table); diff --cc arch/powerpc/platforms/powernv/pci.c index a26956c5f38c,420abe3baab9..921ae673baf3 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@@ -401,10 -401,10 +401,10 @@@ struct pci_ops pnv_pci_ops = static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + struct dma_attrs *attrs, bool rm) { u64 proto_tce; - u64 *tcep, *tces; + __be64 *tcep, *tces; u64 rpn; proto_tce = TCE_PCI_READ; // Read allowed @@@ -428,17 -428,32 +428,32 @@@ return 0; } - static void pnv_tce_free(struct iommu_table *tbl, long index, long npages) + static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) + { + return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, + false); + } + + static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, + bool rm) { - u64 *tcep, *tces; + __be64 *tcep, *tces; - tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset; + tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; while (npages--) - *(tcep++) = 0; + *(tcep++) = cpu_to_be64(0); if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); + } + + static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages) + { + pnv_tce_free(tbl, index, npages, false); } static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) diff --cc arch/powerpc/platforms/powernv/pci.h index d0bb5204ece3,170dd98629d7..64d3b12e5b6d --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@@ -193,6 -194,6 +194,6 @@@ extern void pnv_pci_init_p5ioc2_hub(str extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp); - u64 *startp, u64 *endp, bool rm); ++ __be64 *startp, __be64 *endp, bool rm); #endif /* __POWERNV_PCI_H */