x86/fpu: Rename fpu__activate_fpstate() to fpu__activate_fpstate_write()
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kernel / fpu / core.c
index ac390c69094437c0937d3ebe853e8f5a39af2924..6b0955a62d340c36d57f82410594b3259b7b408f 100644 (file)
@@ -5,7 +5,18 @@
  *  General FPU state handling cleanups
  *     Gareth Hughes <gareth@valinux.com>, May 2000
  */
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/regset.h>
+#include <asm/fpu/signal.h>
+#include <asm/traps.h>
+
+#include <linux/hardirq.h>
+
+/*
+ * Represents the initial FPU state. It's mostly (but not completely) zeroes,
+ * depending on the FPU hardware format:
+ */
+union fpregs_state init_fpstate __read_mostly;
 
 /*
  * Track whether the kernel is using the FPU state
@@ -27,13 +38,13 @@ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
 static void kernel_fpu_disable(void)
 {
-       WARN_ON(this_cpu_read(in_kernel_fpu));
+       WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
        this_cpu_write(in_kernel_fpu, true);
 }
 
 static void kernel_fpu_enable(void)
 {
-       WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu));
+       WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
        this_cpu_write(in_kernel_fpu, false);
 }
 
@@ -62,7 +73,7 @@ static bool interrupted_kernel_fpu_idle(void)
        if (use_eager_fpu())
                return true;
 
-       return !current->thread.fpu.has_fpu && (read_cr0() & X86_CR0_TS);
+       return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
 }
 
 /*
@@ -98,634 +109,434 @@ void __kernel_fpu_begin(void)
 {
        struct fpu *fpu = &current->thread.fpu;
 
+       WARN_ON_FPU(!irq_fpu_usable());
+
        kernel_fpu_disable();
 
-       if (fpu->has_fpu) {
-               fpu_save_init(fpu);
+       if (fpu->fpregs_active) {
+               copy_fpregs_to_fpstate(fpu);
        } else {
                this_cpu_write(fpu_fpregs_owner_ctx, NULL);
-               if (!use_eager_fpu())
-                       clts();
+               __fpregs_activate_hw();
        }
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
 void __kernel_fpu_end(void)
 {
-       struct task_struct *me = current;
-       struct fpu *fpu = &me->thread.fpu;
+       struct fpu *fpu = &current->thread.fpu;
 
-       if (fpu->has_fpu) {
-               if (WARN_ON(restore_fpu_checking(me)))
-                       fpu_reset_state(me);
-       } else if (!use_eager_fpu()) {
-               stts();
+       if (fpu->fpregs_active) {
+               if (WARN_ON_FPU(copy_fpstate_to_fpregs(fpu)))
+                       fpu__clear(fpu);
+       } else {
+               __fpregs_deactivate_hw();
        }
 
        kernel_fpu_enable();
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
 
-/*
- * Save the FPU state (initialize it if necessary):
- *
- * This only ever gets called for the current task.
- */
-void fpu__save(struct task_struct *tsk)
+void kernel_fpu_begin(void)
 {
-       struct fpu *fpu = &tsk->thread.fpu;
-
-       WARN_ON(tsk != current);
-
        preempt_disable();
-       if (fpu->has_fpu) {
-               if (use_eager_fpu()) {
-                       __save_fpu(tsk);
-               } else {
-                       fpu_save_init(fpu);
-                       __thread_fpu_end(tsk);
-               }
-       }
-       preempt_enable();
+       __kernel_fpu_begin();
 }
-EXPORT_SYMBOL_GPL(fpu__save);
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 
-void fpstate_init(struct fpu *fpu)
+void kernel_fpu_end(void)
 {
-       if (!cpu_has_fpu) {
-               finit_soft_fpu(&fpu->state->soft);
-               return;
-       }
-
-       memset(fpu->state, 0, xstate_size);
-
-       if (cpu_has_fxsr) {
-               fx_finit(&fpu->state->fxsave);
-       } else {
-               struct i387_fsave_struct *fp = &fpu->state->fsave;
-               fp->cwd = 0xffff037fu;
-               fp->swd = 0xffff0000u;
-               fp->twd = 0xffffffffu;
-               fp->fos = 0xffff0000u;
-       }
+       __kernel_fpu_end();
+       preempt_enable();
 }
-EXPORT_SYMBOL_GPL(fpstate_init);
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
 
 /*
- * FPU state allocation:
+ * CR0::TS save/restore functions:
  */
-static struct kmem_cache *task_xstate_cachep;
-
-void fpstate_cache_init(void)
+int irq_ts_save(void)
 {
-       task_xstate_cachep =
-               kmem_cache_create("task_xstate", xstate_size,
-                                 __alignof__(union thread_xstate),
-                                 SLAB_PANIC | SLAB_NOTRACK, NULL);
-       setup_xstate_comp();
-}
-
-int fpstate_alloc(struct fpu *fpu)
-{
-       if (fpu->state)
+       /*
+        * If in process context and not atomic, we can take a spurious DNA fault.
+        * Otherwise, doing clts() in process context requires disabling preemption
+        * or some heavy lifting like kernel_fpu_begin()
+        */
+       if (!in_atomic())
                return 0;
 
-       fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
-       if (!fpu->state)
-               return -ENOMEM;
-
-       /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */
-       WARN_ON((unsigned long)fpu->state & 15);
+       if (read_cr0() & X86_CR0_TS) {
+               clts();
+               return 1;
+       }
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(fpstate_alloc);
+EXPORT_SYMBOL_GPL(irq_ts_save);
 
-void fpstate_free(struct fpu *fpu)
+void irq_ts_restore(int TS_state)
 {
-       if (fpu->state) {
-               kmem_cache_free(task_xstate_cachep, fpu->state);
-               fpu->state = NULL;
-       }
+       if (TS_state)
+               stts();
 }
-EXPORT_SYMBOL_GPL(fpstate_free);
+EXPORT_SYMBOL_GPL(irq_ts_restore);
 
 /*
- * Copy the current task's FPU state to a new task's FPU context.
- *
- * In the 'eager' case we just save to the destination context.
+ * Save the FPU state (mark it for reload if necessary):
  *
- * In the 'lazy' case we save to the source context, mark the FPU lazy
- * via stts() and copy the source context into the destination context.
+ * This only ever gets called for the current task.
  */
-static void fpu_copy(struct task_struct *dst, struct task_struct *src)
+void fpu__save(struct fpu *fpu)
 {
-       WARN_ON(src != current);
-
-       if (use_eager_fpu()) {
-               memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
-               __save_fpu(dst);
-       } else {
-               struct fpu *dfpu = &dst->thread.fpu;
-               struct fpu *sfpu = &src->thread.fpu;
+       WARN_ON_FPU(fpu != &current->thread.fpu);
 
-               fpu__save(src);
-               memcpy(dfpu->state, sfpu->state, xstate_size);
+       preempt_disable();
+       if (fpu->fpregs_active) {
+               if (!copy_fpregs_to_fpstate(fpu))
+                       fpregs_deactivate(fpu);
        }
+       preempt_enable();
 }
+EXPORT_SYMBOL_GPL(fpu__save);
 
-int fpu__copy(struct task_struct *dst, struct task_struct *src)
+/*
+ * Legacy x87 fpstate state init:
+ */
+static inline void fpstate_init_fstate(struct fregs_state *fp)
 {
-       dst->thread.fpu.counter = 0;
-       dst->thread.fpu.has_fpu = 0;
-       dst->thread.fpu.state = NULL;
+       fp->cwd = 0xffff037fu;
+       fp->swd = 0xffff0000u;
+       fp->twd = 0xffffffffu;
+       fp->fos = 0xffff0000u;
+}
 
-       task_disable_lazy_fpu_restore(dst);
+void fpstate_init(union fpregs_state *state)
+{
+       if (!cpu_has_fpu) {
+               fpstate_init_soft(&state->soft);
+               return;
+       }
 
-       if (tsk_used_math(src)) {
-               int err = fpstate_alloc(&dst->thread.fpu);
+       memset(state, 0, xstate_size);
 
-               if (err)
-                       return err;
-               fpu_copy(dst, src);
-       }
-       return 0;
+       if (cpu_has_fxsr)
+               fpstate_init_fxstate(&state->fxsave);
+       else
+               fpstate_init_fstate(&state->fsave);
 }
+EXPORT_SYMBOL_GPL(fpstate_init);
 
 /*
- * Allocate the backing store for the current task's FPU registers
- * and initialize the registers themselves as well.
+ * Copy the current task's FPU state to a new task's FPU context.
  *
- * Can fail.
+ * In both the 'eager' and the 'lazy' case we save hardware registers
+ * directly to the destination buffer.
  */
-int fpstate_alloc_init(struct task_struct *curr)
+static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
-       int ret;
+       WARN_ON_FPU(src_fpu != &current->thread.fpu);
 
-       if (WARN_ON_ONCE(curr != current))
-               return -EINVAL;
-       if (WARN_ON_ONCE(curr->flags & PF_USED_MATH))
-               return -EINVAL;
+       /*
+        * Don't let 'init optimized' areas of the XSAVE area
+        * leak into the child task:
+        */
+       if (use_eager_fpu())
+               memset(&dst_fpu->state.xsave, 0, xstate_size);
 
        /*
-        * Memory allocation at the first usage of the FPU and other state.
+        * Save current FPU registers directly into the child
+        * FPU context, without any memory-to-memory copying.
+        *
+        * If the FPU context got destroyed in the process (FNSAVE
+        * done on old CPUs) then copy it back into the source
+        * context and mark the current task for lazy restore.
+        *
+        * We have to do all this with preemption disabled,
+        * mostly because of the FNSAVE case, because in that
+        * case we must not allow preemption in the window
+        * between the FNSAVE and us marking the context lazy.
+        *
+        * It shouldn't be an issue as even FNSAVE is plenty
+        * fast in terms of critical section length.
         */
-       ret = fpstate_alloc(&curr->thread.fpu);
-       if (ret)
-               return ret;
+       preempt_disable();
+       if (!copy_fpregs_to_fpstate(dst_fpu)) {
+               memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+               fpregs_deactivate(src_fpu);
+       }
+       preempt_enable();
+}
 
-       fpstate_init(&curr->thread.fpu);
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+{
+       dst_fpu->counter = 0;
+       dst_fpu->fpregs_active = 0;
+       dst_fpu->last_cpu = -1;
 
-       /* Safe to do for the current task: */
-       curr->flags |= PF_USED_MATH;
+       if (src_fpu->fpstate_active)
+               fpu_copy(dst_fpu, src_fpu);
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(fpstate_alloc_init);
 
 /*
- * The _current_ task is using the FPU for the first time
- * so initialize it and set the mxcsr to its default
- * value at reset if we support XMM instructions and then
- * remember the current task has used the FPU.
+ * Activate the current task's in-memory FPU context,
+ * if it has not been used before:
  */
-static int fpu__unlazy_stopped(struct task_struct *child)
+void fpu__activate_curr(struct fpu *fpu)
 {
-       int ret;
+       WARN_ON_FPU(fpu != &current->thread.fpu);
 
-       if (WARN_ON_ONCE(child == current))
-               return -EINVAL;
+       if (!fpu->fpstate_active) {
+               fpstate_init(&fpu->state);
 
-       if (child->flags & PF_USED_MATH) {
-               task_disable_lazy_fpu_restore(child);
-               return 0;
+               /* Safe to do for the current task: */
+               fpu->fpstate_active = 1;
        }
-
-       /*
-        * Memory allocation at the first usage of the FPU and other state.
-        */
-       ret = fpstate_alloc(&child->thread.fpu);
-       if (ret)
-               return ret;
-
-       fpstate_init(&child->thread.fpu);
-
-       /* Safe to do for stopped child tasks: */
-       child->flags |= PF_USED_MATH;
-
-       return 0;
 }
+EXPORT_SYMBOL_GPL(fpu__activate_curr);
 
 /*
- * 'fpu__restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
+ * This function must be called before we read a task's fpstate.
  *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
+ * If the task has not used the FPU before then initialize its
+ * fpstate.
  *
- * Must be called with kernel preemption disabled (eg with local
- * local interrupts as in the case of do_device_not_available).
+ * If the task has used the FPU before then save it.
  */
-void fpu__restore(void)
+void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-       struct task_struct *tsk = current;
-
-       if (!tsk_used_math(tsk)) {
-               local_irq_enable();
-               /*
-                * does a slab alloc which can sleep
-                */
-               if (fpstate_alloc_init(tsk)) {
-                       /*
-                        * ran out of memory!
-                        */
-                       do_group_exit(SIGKILL);
-                       return;
-               }
-               local_irq_disable();
-       }
-
-       /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
-       kernel_fpu_disable();
-       __thread_fpu_begin(tsk);
-       if (unlikely(restore_fpu_checking(tsk))) {
-               fpu_reset_state(tsk);
-               force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
+       /*
+        * If fpregs are active (in the current CPU), then
+        * copy them to the fpstate:
+        */
+       if (fpu->fpregs_active) {
+               fpu__save(fpu);
        } else {
-               tsk->thread.fpu.counter++;
-       }
-       kernel_fpu_enable();
-}
-EXPORT_SYMBOL_GPL(fpu__restore);
+               if (!fpu->fpstate_active) {
+                       fpstate_init(&fpu->state);
 
-void fpu__flush_thread(struct task_struct *tsk)
-{
-       if (!use_eager_fpu()) {
-               /* FPU state will be reallocated lazily at the first use. */
-               drop_fpu(tsk);
-               fpstate_free(&tsk->thread.fpu);
-       } else {
-               if (!tsk_used_math(tsk)) {
-                       /* kthread execs. TODO: cleanup this horror. */
-               if (WARN_ON(fpstate_alloc_init(tsk)))
-                               force_sig(SIGKILL, tsk);
-                       user_fpu_begin();
+                       /* Safe to do for current and for stopped child tasks: */
+                       fpu->fpstate_active = 1;
                }
-               restore_init_xstate();
        }
 }
 
 /*
- * The xstateregs_active() routine is the same as the fpregs_active() routine,
- * as the "regset->n" for the xstate regset will be updated based on the feature
- * capabilites supported by the xsave.
+ * This function must be called before we read or write a task's fpstate.
+ *
+ * If the task has not used the FPU before then initialize its
+ * fpstate.
+ *
+ * If the task has used the FPU before then save and unlazy it.
+ *
+ * [ If this function is used for non-current child tasks, then
+ *   after this function call, after registers in the fpstate are
+ *   modified and the child task has woken up, the child task will
+ *   restore the modified FPU state from the modified context. If we
+ *   didn't clear its lazy status here then the lazy in-registers
+ *   state pending on its former CPU could be restored, corrupting
+ *   the modifications.
+ *
+ *   This function can be used for the current task as well, but
+ *   only for reading the fpstate. Modifications to the fpstate
+ *   will be lost on eagerfpu systems. ]
+ *
+ * TODO: A future optimization would be to skip the unlazying in
+ *       the read-only case, it's not strictly necessary for
+ *       read-only access to the context.
  */
-int fpregs_active(struct task_struct *target, const struct user_regset *regset)
-{
-       return tsk_used_math(target) ? regset->n : 0;
-}
-
-int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
+void fpu__activate_fpstate_write(struct fpu *fpu)
 {
-       return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
-}
-
-int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
-               unsigned int pos, unsigned int count,
-               void *kbuf, void __user *ubuf)
-{
-       int ret;
-
-       if (!cpu_has_fxsr)
-               return -ENODEV;
-
-       ret = fpu__unlazy_stopped(target);
-       if (ret)
-               return ret;
-
-       sanitize_i387_state(target);
-
-       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                  &target->thread.fpu.state->fxsave, 0, -1);
-}
-
-int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
-               unsigned int pos, unsigned int count,
-               const void *kbuf, const void __user *ubuf)
-{
-       int ret;
-
-       if (!cpu_has_fxsr)
-               return -ENODEV;
-
-       ret = fpu__unlazy_stopped(target);
-       if (ret)
-               return ret;
-
-       sanitize_i387_state(target);
-
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &target->thread.fpu.state->fxsave, 0, -1);
-
-       /*
-        * mxcsr reserved bits must be masked to zero for security reasons.
-        */
-       target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
        /*
-        * update the header bits in the xsave header, indicating the
-        * presence of FP and SSE state.
+        * If fpregs are active (in the current CPU), then
+        * copy them to the fpstate:
         */
-       if (cpu_has_xsave)
-               target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+       if (fpu->fpregs_active) {
+               fpu__save(fpu);
+       } else {
+               if (fpu->fpstate_active) {
+                       /* Invalidate any lazy state: */
+                       fpu->last_cpu = -1;
+               } else {
+                       fpstate_init(&fpu->state);
 
-       return ret;
+                       /* Safe to do for current and for stopped child tasks: */
+                       fpu->fpstate_active = 1;
+               }
+       }
 }
 
-int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
-               unsigned int pos, unsigned int count,
-               void *kbuf, void __user *ubuf)
+/*
+ * 'fpu__restore()' is called to copy FPU registers from
+ * the FPU fpstate to the live hw registers and to activate
+ * access to the hardware registers, so that FPU instructions
+ * can be used afterwards.
+ *
+ * Must be called with kernel preemption disabled (for example
+ * with local interrupts disabled, as it is in the case of
+ * do_device_not_available()).
+ */
+void fpu__restore(struct fpu *fpu)
 {
-       struct xsave_struct *xsave;
-       int ret;
+       fpu__activate_curr(fpu);
 
-       if (!cpu_has_xsave)
-               return -ENODEV;
-
-       ret = fpu__unlazy_stopped(target);
-       if (ret)
-               return ret;
-
-       xsave = &target->thread.fpu.state->xsave;
-
-       /*
-        * Copy the 48bytes defined by the software first into the xstate
-        * memory layout in the thread struct, so that we can copy the entire
-        * xstateregs to the user using one user_regset_copyout().
-        */
-       memcpy(&xsave->i387.sw_reserved,
-               xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-       /*
-        * Copy the xstate memory layout.
-        */
-       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-       return ret;
+       /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
+       kernel_fpu_disable();
+       fpregs_activate(fpu);
+       if (unlikely(copy_fpstate_to_fpregs(fpu))) {
+               fpu__clear(fpu);
+               force_sig_info(SIGSEGV, SEND_SIG_PRIV, current);
+       } else {
+               fpu->counter++;
+       }
+       kernel_fpu_enable();
 }
+EXPORT_SYMBOL_GPL(fpu__restore);
 
-int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
-                 unsigned int pos, unsigned int count,
-                 const void *kbuf, const void __user *ubuf)
+/*
+ * Drops current FPU state: deactivates the fpregs and
+ * the fpstate. NOTE: it still leaves previous contents
+ * in the fpregs in the eager-FPU case.
+ *
+ * This function can be used in cases where we know that
+ * a state-restore is coming: either an explicit one,
+ * or a reschedule.
+ */
+void fpu__drop(struct fpu *fpu)
 {
-       struct xsave_struct *xsave;
-       int ret;
-
-       if (!cpu_has_xsave)
-               return -ENODEV;
-
-       ret = fpu__unlazy_stopped(target);
-       if (ret)
-               return ret;
+       preempt_disable();
+       fpu->counter = 0;
+
+       if (fpu->fpregs_active) {
+               /* Ignore delayed exceptions from user space */
+               asm volatile("1: fwait\n"
+                            "2:\n"
+                            _ASM_EXTABLE(1b, 2b));
+               fpregs_deactivate(fpu);
+       }
 
-       xsave = &target->thread.fpu.state->xsave;
+       fpu->fpstate_active = 0;
 
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-       /*
-        * mxcsr reserved bits must be masked to zero for security reasons.
-        */
-       xsave->i387.mxcsr &= mxcsr_feature_mask;
-       xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
-       /*
-        * These bits must be zero.
-        */
-       memset(&xsave->xsave_hdr.reserved, 0, 48);
-       return ret;
+       preempt_enable();
 }
 
-#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-
 /*
- * FPU tag word conversions.
+ * Clear FPU registers by setting them up from
+ * the init fpstate:
  */
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+static inline void copy_init_fpstate_to_fpregs(void)
 {
-       unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
-       /* Transform each pair of bits into 01 (valid) or 00 (empty) */
-       tmp = ~twd;
-       tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-       /* and move the valid bits to the lower byte. */
-       tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-       tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-       tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-
-       return tmp;
+       if (use_xsave())
+               copy_kernel_to_xregs(&init_fpstate.xsave, -1);
+       else
+               copy_kernel_to_fxregs(&init_fpstate.fxsave);
 }
 
-#define FPREG_ADDR(f, n)       ((void *)&(f)->st_space + (n) * 16)
-#define FP_EXP_TAG_VALID       0
-#define FP_EXP_TAG_ZERO                1
-#define FP_EXP_TAG_SPECIAL     2
-#define FP_EXP_TAG_EMPTY       3
-
-static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
+/*
+ * Clear the FPU state back to init state.
+ *
+ * Called by sys_execve(), by the signal handler code and by various
+ * error paths.
+ */
+void fpu__clear(struct fpu *fpu)
 {
-       struct _fpxreg *st;
-       u32 tos = (fxsave->swd >> 11) & 7;
-       u32 twd = (unsigned long) fxsave->twd;
-       u32 tag;
-       u32 ret = 0xffff0000u;
-       int i;
-
-       for (i = 0; i < 8; i++, twd >>= 1) {
-               if (twd & 0x1) {
-                       st = FPREG_ADDR(fxsave, (i - tos) & 7);
-
-                       switch (st->exponent & 0x7fff) {
-                       case 0x7fff:
-                               tag = FP_EXP_TAG_SPECIAL;
-                               break;
-                       case 0x0000:
-                               if (!st->significand[0] &&
-                                   !st->significand[1] &&
-                                   !st->significand[2] &&
-                                   !st->significand[3])
-                                       tag = FP_EXP_TAG_ZERO;
-                               else
-                                       tag = FP_EXP_TAG_SPECIAL;
-                               break;
-                       default:
-                               if (st->significand[3] & 0x8000)
-                                       tag = FP_EXP_TAG_VALID;
-                               else
-                                       tag = FP_EXP_TAG_SPECIAL;
-                               break;
-                       }
-               } else {
-                       tag = FP_EXP_TAG_EMPTY;
+       WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
+
+       if (!use_eager_fpu()) {
+               /* FPU state will be reallocated lazily at the first use. */
+               fpu__drop(fpu);
+       } else {
+               if (!fpu->fpstate_active) {
+                       fpu__activate_curr(fpu);
+                       user_fpu_begin();
                }
-               ret |= tag << (2 * i);
+               copy_init_fpstate_to_fpregs();
        }
-       return ret;
 }
 
 /*
- * FXSR floating point environment conversions.
+ * x87 math exception handling:
  */
 
-void
-convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
+static inline unsigned short get_fpu_cwd(struct fpu *fpu)
 {
-       struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
-       struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
-       struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
-       int i;
-
-       env->cwd = fxsave->cwd | 0xffff0000u;
-       env->swd = fxsave->swd | 0xffff0000u;
-       env->twd = twd_fxsr_to_i387(fxsave);
-
-#ifdef CONFIG_X86_64
-       env->fip = fxsave->rip;
-       env->foo = fxsave->rdp;
-       /*
-        * should be actually ds/cs at fpu exception time, but
-        * that information is not available in 64bit mode.
-        */
-       env->fcs = task_pt_regs(tsk)->cs;
-       if (tsk == current) {
-               savesegment(ds, env->fos);
+       if (cpu_has_fxsr) {
+               return fpu->state.fxsave.cwd;
        } else {
-               env->fos = tsk->thread.ds;
+               return (unsigned short)fpu->state.fsave.cwd;
        }
-       env->fos |= 0xffff0000;
-#else
-       env->fip = fxsave->fip;
-       env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
-       env->foo = fxsave->foo;
-       env->fos = fxsave->fos;
-#endif
-
-       for (i = 0; i < 8; ++i)
-               memcpy(&to[i], &from[i], sizeof(to[0]));
 }
 
-void convert_to_fxsr(struct task_struct *tsk,
-                    const struct user_i387_ia32_struct *env)
-
+static inline unsigned short get_fpu_swd(struct fpu *fpu)
 {
-       struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
-       struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
-       struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
-       int i;
-
-       fxsave->cwd = env->cwd;
-       fxsave->swd = env->swd;
-       fxsave->twd = twd_i387_to_fxsr(env->twd);
-       fxsave->fop = (u16) ((u32) env->fcs >> 16);
-#ifdef CONFIG_X86_64
-       fxsave->rip = env->fip;
-       fxsave->rdp = env->foo;
-       /* cs and ds ignored */
-#else
-       fxsave->fip = env->fip;
-       fxsave->fcs = (env->fcs & 0xffff);
-       fxsave->foo = env->foo;
-       fxsave->fos = env->fos;
-#endif
-
-       for (i = 0; i < 8; ++i)
-               memcpy(&to[i], &from[i], sizeof(from[0]));
+       if (cpu_has_fxsr) {
+               return fpu->state.fxsave.swd;
+       } else {
+               return (unsigned short)fpu->state.fsave.swd;
+       }
 }
 
-int fpregs_get(struct task_struct *target, const struct user_regset *regset,
-              unsigned int pos, unsigned int count,
-              void *kbuf, void __user *ubuf)
+static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
 {
-       struct user_i387_ia32_struct env;
-       int ret;
-
-       ret = fpu__unlazy_stopped(target);
-       if (ret)
-               return ret;
-
-       if (!static_cpu_has(X86_FEATURE_FPU))
-               return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
-
-       if (!cpu_has_fxsr)
-               return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                          &target->thread.fpu.state->fsave, 0,
-                                          -1);
-
-       sanitize_i387_state(target);
-
-       if (kbuf && pos == 0 && count == sizeof(env)) {
-               convert_from_fxsr(kbuf, target);
-               return 0;
+       if (cpu_has_xmm) {
+               return fpu->state.fxsave.mxcsr;
+       } else {
+               return MXCSR_DEFAULT;
        }
-
-       convert_from_fxsr(&env, target);
-
-       return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
 }
 
-int fpregs_set(struct task_struct *target, const struct user_regset *regset,
-              unsigned int pos, unsigned int count,
-              const void *kbuf, const void __user *ubuf)
+int fpu__exception_code(struct fpu *fpu, int trap_nr)
 {
-       struct user_i387_ia32_struct env;
-       int ret;
-
-       ret = fpu__unlazy_stopped(target);
-       if (ret)
-               return ret;
-
-       sanitize_i387_state(target);
-
-       if (!static_cpu_has(X86_FEATURE_FPU))
-               return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+       int err;
 
-       if (!cpu_has_fxsr)
-               return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                         &target->thread.fpu.state->fsave, 0,
-                                         -1);
+       if (trap_nr == X86_TRAP_MF) {
+               unsigned short cwd, swd;
+               /*
+                * (~cwd & swd) will mask out exceptions that are not set to unmasked
+                * status.  0x3f is the exception bits in these regs, 0x200 is the
+                * C1 reg you need in case of a stack fault, 0x040 is the stack
+                * fault bit.  We should only be taking one exception at a time,
+                * so if this combination doesn't produce any single exception,
+                * then we have a bad program that isn't synchronizing its FPU usage
+                * and it will suffer the consequences since we won't be able to
+                * fully reproduce the context of the exception
+                */
+               cwd = get_fpu_cwd(fpu);
+               swd = get_fpu_swd(fpu);
 
-       if (pos > 0 || count < sizeof(env))
-               convert_from_fxsr(&env, target);
+               err = swd & ~cwd;
+       } else {
+               /*
+                * The SIMD FPU exceptions are handled a little differently, as there
+                * is only a single status/control register.  Thus, to determine which
+                * unmasked exception was caught we must mask the exception mask bits
+                * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+                */
+               unsigned short mxcsr = get_fpu_mxcsr(fpu);
+               err = ~(mxcsr >> 7) & mxcsr;
+       }
 
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
-       if (!ret)
-               convert_to_fxsr(target, &env);
+       if (err & 0x001) {      /* Invalid op */
+               /*
+                * swd & 0x240 == 0x040: Stack Underflow
+                * swd & 0x240 == 0x240: Stack Overflow
+                * User must clear the SF bit (0x40) if set
+                */
+               return FPE_FLTINV;
+       } else if (err & 0x004) { /* Divide by Zero */
+               return FPE_FLTDIV;
+       } else if (err & 0x008) { /* Overflow */
+               return FPE_FLTOVF;
+       } else if (err & 0x012) { /* Denormal, Underflow */
+               return FPE_FLTUND;
+       } else if (err & 0x020) { /* Precision */
+               return FPE_FLTRES;
+       }
 
        /*
-        * update the header bit in the xsave header, indicating the
-        * presence of FP.
+        * If we're using IRQ 13, or supposedly even some trap
+        * X86_TRAP_MF implementations, it's possible
+        * we get a spurious trap, which is not an error.
         */
-       if (cpu_has_xsave)
-               target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
-       return ret;
-}
-
-/*
- * FPU state for core dumps.
- * This is only used for a.out dumps now.
- * It is declared generically using elf_fpregset_t (which is
- * struct user_i387_struct) but is in fact only used for 32-bit
- * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
- */
-int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
-{
-       struct task_struct *tsk = current;
-       int fpvalid;
-
-       fpvalid = !!used_math();
-       if (fpvalid)
-               fpvalid = !fpregs_get(tsk, NULL,
-                                     0, sizeof(struct user_i387_ia32_struct),
-                                     fpu, NULL);
-
-       return fpvalid;
+       return 0;
 }
-EXPORT_SYMBOL(dump_fpu);
-
-#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */