x86/fpu: Add kernel_fpu_disabled()
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kernel / fpu / core.c
1 /*
2  *  Copyright (C) 1994 Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *  General FPU state handling cleanups
6  *      Gareth Hughes <gareth@valinux.com>, May 2000
7  */
8 #include <asm/fpu-internal.h>
9
10 /*
11  * Track whether the kernel is using the FPU state
12  * currently.
13  *
14  * This flag is used:
15  *
16  *   - by IRQ context code to potentially use the FPU
17  *     if it's unused.
18  *
19  *   - to debug kernel_fpu_begin()/end() correctness
20  */
21 static DEFINE_PER_CPU(bool, in_kernel_fpu);
22
23 static void kernel_fpu_disable(void)
24 {
25         WARN_ON(this_cpu_read(in_kernel_fpu));
26         this_cpu_write(in_kernel_fpu, true);
27 }
28
29 static void kernel_fpu_enable(void)
30 {
31         WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu));
32         this_cpu_write(in_kernel_fpu, false);
33 }
34
35 static bool kernel_fpu_disabled(void)
36 {
37         return this_cpu_read(in_kernel_fpu);
38 }
39
40 /*
41  * Were we in an interrupt that interrupted kernel mode?
42  *
43  * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
44  * pair does nothing at all: the thread must not have fpu (so
45  * that we don't try to save the FPU state), and TS must
46  * be set (so that the clts/stts pair does nothing that is
47  * visible in the interrupted kernel thread).
48  *
49  * Except for the eagerfpu case when we return true; in the likely case
50  * the thread has FPU but we are not going to set/clear TS.
51  */
52 static bool interrupted_kernel_fpu_idle(void)
53 {
54         if (kernel_fpu_disabled())
55                 return false;
56
57         if (use_eager_fpu())
58                 return true;
59
60         return !__thread_has_fpu(current) &&
61                 (read_cr0() & X86_CR0_TS);
62 }
63
64 /*
65  * Were we in user mode (or vm86 mode) when we were
66  * interrupted?
67  *
68  * Doing kernel_fpu_begin/end() is ok if we are running
69  * in an interrupt context from user mode - we'll just
70  * save the FPU state as required.
71  */
72 static bool interrupted_user_mode(void)
73 {
74         struct pt_regs *regs = get_irq_regs();
75         return regs && user_mode(regs);
76 }
77
78 /*
79  * Can we use the FPU in kernel mode with the
80  * whole "kernel_fpu_begin/end()" sequence?
81  *
82  * It's always ok in process context (ie "not interrupt")
83  * but it is sometimes ok even from an irq.
84  */
85 bool irq_fpu_usable(void)
86 {
87         return !in_interrupt() ||
88                 interrupted_user_mode() ||
89                 interrupted_kernel_fpu_idle();
90 }
91 EXPORT_SYMBOL(irq_fpu_usable);
92
93 void __kernel_fpu_begin(void)
94 {
95         struct task_struct *me = current;
96
97         kernel_fpu_disable();
98
99         if (__thread_has_fpu(me)) {
100                 __save_init_fpu(me);
101         } else {
102                 this_cpu_write(fpu_owner_task, NULL);
103                 if (!use_eager_fpu())
104                         clts();
105         }
106 }
107 EXPORT_SYMBOL(__kernel_fpu_begin);
108
109 void __kernel_fpu_end(void)
110 {
111         struct task_struct *me = current;
112
113         if (__thread_has_fpu(me)) {
114                 if (WARN_ON(restore_fpu_checking(me)))
115                         fpu_reset_state(me);
116         } else if (!use_eager_fpu()) {
117                 stts();
118         }
119
120         kernel_fpu_enable();
121 }
122 EXPORT_SYMBOL(__kernel_fpu_end);
123
124 /*
125  * Save the FPU state (initialize it if necessary):
126  *
127  * This only ever gets called for the current task.
128  */
129 void fpu__save(struct task_struct *tsk)
130 {
131         WARN_ON(tsk != current);
132
133         preempt_disable();
134         if (__thread_has_fpu(tsk)) {
135                 if (use_eager_fpu()) {
136                         __save_fpu(tsk);
137                 } else {
138                         __save_init_fpu(tsk);
139                         __thread_fpu_end(tsk);
140                 }
141         }
142         preempt_enable();
143 }
144 EXPORT_SYMBOL_GPL(fpu__save);
145
146 void fpstate_init(struct fpu *fpu)
147 {
148         if (!cpu_has_fpu) {
149                 finit_soft_fpu(&fpu->state->soft);
150                 return;
151         }
152
153         memset(fpu->state, 0, xstate_size);
154
155         if (cpu_has_fxsr) {
156                 fx_finit(&fpu->state->fxsave);
157         } else {
158                 struct i387_fsave_struct *fp = &fpu->state->fsave;
159                 fp->cwd = 0xffff037fu;
160                 fp->swd = 0xffff0000u;
161                 fp->twd = 0xffffffffu;
162                 fp->fos = 0xffff0000u;
163         }
164 }
165 EXPORT_SYMBOL_GPL(fpstate_init);
166
167 /*
168  * FPU state allocation:
169  */
170 static struct kmem_cache *task_xstate_cachep;
171
172 void fpstate_cache_init(void)
173 {
174         task_xstate_cachep =
175                 kmem_cache_create("task_xstate", xstate_size,
176                                   __alignof__(union thread_xstate),
177                                   SLAB_PANIC | SLAB_NOTRACK, NULL);
178         setup_xstate_comp();
179 }
180
181 int fpstate_alloc(struct fpu *fpu)
182 {
183         if (fpu->state)
184                 return 0;
185
186         fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
187         if (!fpu->state)
188                 return -ENOMEM;
189
190         /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */
191         WARN_ON((unsigned long)fpu->state & 15);
192
193         return 0;
194 }
195 EXPORT_SYMBOL_GPL(fpstate_alloc);
196
197 void fpstate_free(struct fpu *fpu)
198 {
199         if (fpu->state) {
200                 kmem_cache_free(task_xstate_cachep, fpu->state);
201                 fpu->state = NULL;
202         }
203 }
204 EXPORT_SYMBOL_GPL(fpstate_free);
205
206 int fpu__copy(struct task_struct *dst, struct task_struct *src)
207 {
208         dst->thread.fpu.counter = 0;
209         dst->thread.fpu.has_fpu = 0;
210         dst->thread.fpu.state = NULL;
211
212         task_disable_lazy_fpu_restore(dst);
213
214         if (tsk_used_math(src)) {
215                 int err = fpstate_alloc(&dst->thread.fpu);
216
217                 if (err)
218                         return err;
219                 fpu_copy(dst, src);
220         }
221         return 0;
222 }
223
224 /*
225  * Allocate the backing store for the current task's FPU registers
226  * and initialize the registers themselves as well.
227  *
228  * Can fail.
229  */
230 int fpstate_alloc_init(struct task_struct *curr)
231 {
232         int ret;
233
234         if (WARN_ON_ONCE(curr != current))
235                 return -EINVAL;
236         if (WARN_ON_ONCE(curr->flags & PF_USED_MATH))
237                 return -EINVAL;
238
239         /*
240          * Memory allocation at the first usage of the FPU and other state.
241          */
242         ret = fpstate_alloc(&curr->thread.fpu);
243         if (ret)
244                 return ret;
245
246         fpstate_init(&curr->thread.fpu);
247
248         /* Safe to do for the current task: */
249         curr->flags |= PF_USED_MATH;
250
251         return 0;
252 }
253 EXPORT_SYMBOL_GPL(fpstate_alloc_init);
254
255 /*
256  * The _current_ task is using the FPU for the first time
257  * so initialize it and set the mxcsr to its default
258  * value at reset if we support XMM instructions and then
259  * remember the current task has used the FPU.
260  */
261 static int fpu__unlazy_stopped(struct task_struct *child)
262 {
263         int ret;
264
265         if (WARN_ON_ONCE(child == current))
266                 return -EINVAL;
267
268         if (child->flags & PF_USED_MATH) {
269                 task_disable_lazy_fpu_restore(child);
270                 return 0;
271         }
272
273         /*
274          * Memory allocation at the first usage of the FPU and other state.
275          */
276         ret = fpstate_alloc(&child->thread.fpu);
277         if (ret)
278                 return ret;
279
280         fpstate_init(&child->thread.fpu);
281
282         /* Safe to do for stopped child tasks: */
283         child->flags |= PF_USED_MATH;
284
285         return 0;
286 }
287
288 /*
289  * 'fpu__restore()' saves the current math information in the
290  * old math state array, and gets the new ones from the current task
291  *
292  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
293  * Don't touch unless you *really* know how it works.
294  *
295  * Must be called with kernel preemption disabled (eg with local
296  * local interrupts as in the case of do_device_not_available).
297  */
298 void fpu__restore(void)
299 {
300         struct task_struct *tsk = current;
301
302         if (!tsk_used_math(tsk)) {
303                 local_irq_enable();
304                 /*
305                  * does a slab alloc which can sleep
306                  */
307                 if (fpstate_alloc_init(tsk)) {
308                         /*
309                          * ran out of memory!
310                          */
311                         do_group_exit(SIGKILL);
312                         return;
313                 }
314                 local_irq_disable();
315         }
316
317         /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
318         kernel_fpu_disable();
319         __thread_fpu_begin(tsk);
320         if (unlikely(restore_fpu_checking(tsk))) {
321                 fpu_reset_state(tsk);
322                 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
323         } else {
324                 tsk->thread.fpu.counter++;
325         }
326         kernel_fpu_enable();
327 }
328 EXPORT_SYMBOL_GPL(fpu__restore);
329
330 void fpu__flush_thread(struct task_struct *tsk)
331 {
332         if (!use_eager_fpu()) {
333                 /* FPU state will be reallocated lazily at the first use. */
334                 drop_fpu(tsk);
335                 fpstate_free(&tsk->thread.fpu);
336         } else {
337                 if (!tsk_used_math(tsk)) {
338                         /* kthread execs. TODO: cleanup this horror. */
339                 if (WARN_ON(fpstate_alloc_init(tsk)))
340                                 force_sig(SIGKILL, tsk);
341                         user_fpu_begin();
342                 }
343                 restore_init_xstate();
344         }
345 }
346
347 /*
348  * The xstateregs_active() routine is the same as the fpregs_active() routine,
349  * as the "regset->n" for the xstate regset will be updated based on the feature
350  * capabilites supported by the xsave.
351  */
352 int fpregs_active(struct task_struct *target, const struct user_regset *regset)
353 {
354         return tsk_used_math(target) ? regset->n : 0;
355 }
356
357 int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
358 {
359         return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
360 }
361
362 int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
363                 unsigned int pos, unsigned int count,
364                 void *kbuf, void __user *ubuf)
365 {
366         int ret;
367
368         if (!cpu_has_fxsr)
369                 return -ENODEV;
370
371         ret = fpu__unlazy_stopped(target);
372         if (ret)
373                 return ret;
374
375         sanitize_i387_state(target);
376
377         return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
378                                    &target->thread.fpu.state->fxsave, 0, -1);
379 }
380
381 int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
382                 unsigned int pos, unsigned int count,
383                 const void *kbuf, const void __user *ubuf)
384 {
385         int ret;
386
387         if (!cpu_has_fxsr)
388                 return -ENODEV;
389
390         ret = fpu__unlazy_stopped(target);
391         if (ret)
392                 return ret;
393
394         sanitize_i387_state(target);
395
396         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
397                                  &target->thread.fpu.state->fxsave, 0, -1);
398
399         /*
400          * mxcsr reserved bits must be masked to zero for security reasons.
401          */
402         target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
403
404         /*
405          * update the header bits in the xsave header, indicating the
406          * presence of FP and SSE state.
407          */
408         if (cpu_has_xsave)
409                 target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
410
411         return ret;
412 }
413
414 int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
415                 unsigned int pos, unsigned int count,
416                 void *kbuf, void __user *ubuf)
417 {
418         struct xsave_struct *xsave;
419         int ret;
420
421         if (!cpu_has_xsave)
422                 return -ENODEV;
423
424         ret = fpu__unlazy_stopped(target);
425         if (ret)
426                 return ret;
427
428         xsave = &target->thread.fpu.state->xsave;
429
430         /*
431          * Copy the 48bytes defined by the software first into the xstate
432          * memory layout in the thread struct, so that we can copy the entire
433          * xstateregs to the user using one user_regset_copyout().
434          */
435         memcpy(&xsave->i387.sw_reserved,
436                 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
437         /*
438          * Copy the xstate memory layout.
439          */
440         ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
441         return ret;
442 }
443
444 int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
445                   unsigned int pos, unsigned int count,
446                   const void *kbuf, const void __user *ubuf)
447 {
448         struct xsave_struct *xsave;
449         int ret;
450
451         if (!cpu_has_xsave)
452                 return -ENODEV;
453
454         ret = fpu__unlazy_stopped(target);
455         if (ret)
456                 return ret;
457
458         xsave = &target->thread.fpu.state->xsave;
459
460         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
461         /*
462          * mxcsr reserved bits must be masked to zero for security reasons.
463          */
464         xsave->i387.mxcsr &= mxcsr_feature_mask;
465         xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
466         /*
467          * These bits must be zero.
468          */
469         memset(&xsave->xsave_hdr.reserved, 0, 48);
470         return ret;
471 }
472
473 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
474
475 /*
476  * FPU tag word conversions.
477  */
478
479 static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
480 {
481         unsigned int tmp; /* to avoid 16 bit prefixes in the code */
482
483         /* Transform each pair of bits into 01 (valid) or 00 (empty) */
484         tmp = ~twd;
485         tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
486         /* and move the valid bits to the lower byte. */
487         tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
488         tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
489         tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
490
491         return tmp;
492 }
493
494 #define FPREG_ADDR(f, n)        ((void *)&(f)->st_space + (n) * 16)
495 #define FP_EXP_TAG_VALID        0
496 #define FP_EXP_TAG_ZERO         1
497 #define FP_EXP_TAG_SPECIAL      2
498 #define FP_EXP_TAG_EMPTY        3
499
500 static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
501 {
502         struct _fpxreg *st;
503         u32 tos = (fxsave->swd >> 11) & 7;
504         u32 twd = (unsigned long) fxsave->twd;
505         u32 tag;
506         u32 ret = 0xffff0000u;
507         int i;
508
509         for (i = 0; i < 8; i++, twd >>= 1) {
510                 if (twd & 0x1) {
511                         st = FPREG_ADDR(fxsave, (i - tos) & 7);
512
513                         switch (st->exponent & 0x7fff) {
514                         case 0x7fff:
515                                 tag = FP_EXP_TAG_SPECIAL;
516                                 break;
517                         case 0x0000:
518                                 if (!st->significand[0] &&
519                                     !st->significand[1] &&
520                                     !st->significand[2] &&
521                                     !st->significand[3])
522                                         tag = FP_EXP_TAG_ZERO;
523                                 else
524                                         tag = FP_EXP_TAG_SPECIAL;
525                                 break;
526                         default:
527                                 if (st->significand[3] & 0x8000)
528                                         tag = FP_EXP_TAG_VALID;
529                                 else
530                                         tag = FP_EXP_TAG_SPECIAL;
531                                 break;
532                         }
533                 } else {
534                         tag = FP_EXP_TAG_EMPTY;
535                 }
536                 ret |= tag << (2 * i);
537         }
538         return ret;
539 }
540
541 /*
542  * FXSR floating point environment conversions.
543  */
544
545 void
546 convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
547 {
548         struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
549         struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
550         struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
551         int i;
552
553         env->cwd = fxsave->cwd | 0xffff0000u;
554         env->swd = fxsave->swd | 0xffff0000u;
555         env->twd = twd_fxsr_to_i387(fxsave);
556
557 #ifdef CONFIG_X86_64
558         env->fip = fxsave->rip;
559         env->foo = fxsave->rdp;
560         /*
561          * should be actually ds/cs at fpu exception time, but
562          * that information is not available in 64bit mode.
563          */
564         env->fcs = task_pt_regs(tsk)->cs;
565         if (tsk == current) {
566                 savesegment(ds, env->fos);
567         } else {
568                 env->fos = tsk->thread.ds;
569         }
570         env->fos |= 0xffff0000;
571 #else
572         env->fip = fxsave->fip;
573         env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
574         env->foo = fxsave->foo;
575         env->fos = fxsave->fos;
576 #endif
577
578         for (i = 0; i < 8; ++i)
579                 memcpy(&to[i], &from[i], sizeof(to[0]));
580 }
581
582 void convert_to_fxsr(struct task_struct *tsk,
583                      const struct user_i387_ia32_struct *env)
584
585 {
586         struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
587         struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
588         struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
589         int i;
590
591         fxsave->cwd = env->cwd;
592         fxsave->swd = env->swd;
593         fxsave->twd = twd_i387_to_fxsr(env->twd);
594         fxsave->fop = (u16) ((u32) env->fcs >> 16);
595 #ifdef CONFIG_X86_64
596         fxsave->rip = env->fip;
597         fxsave->rdp = env->foo;
598         /* cs and ds ignored */
599 #else
600         fxsave->fip = env->fip;
601         fxsave->fcs = (env->fcs & 0xffff);
602         fxsave->foo = env->foo;
603         fxsave->fos = env->fos;
604 #endif
605
606         for (i = 0; i < 8; ++i)
607                 memcpy(&to[i], &from[i], sizeof(from[0]));
608 }
609
610 int fpregs_get(struct task_struct *target, const struct user_regset *regset,
611                unsigned int pos, unsigned int count,
612                void *kbuf, void __user *ubuf)
613 {
614         struct user_i387_ia32_struct env;
615         int ret;
616
617         ret = fpu__unlazy_stopped(target);
618         if (ret)
619                 return ret;
620
621         if (!static_cpu_has(X86_FEATURE_FPU))
622                 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
623
624         if (!cpu_has_fxsr)
625                 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
626                                            &target->thread.fpu.state->fsave, 0,
627                                            -1);
628
629         sanitize_i387_state(target);
630
631         if (kbuf && pos == 0 && count == sizeof(env)) {
632                 convert_from_fxsr(kbuf, target);
633                 return 0;
634         }
635
636         convert_from_fxsr(&env, target);
637
638         return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
639 }
640
641 int fpregs_set(struct task_struct *target, const struct user_regset *regset,
642                unsigned int pos, unsigned int count,
643                const void *kbuf, const void __user *ubuf)
644 {
645         struct user_i387_ia32_struct env;
646         int ret;
647
648         ret = fpu__unlazy_stopped(target);
649         if (ret)
650                 return ret;
651
652         sanitize_i387_state(target);
653
654         if (!static_cpu_has(X86_FEATURE_FPU))
655                 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
656
657         if (!cpu_has_fxsr)
658                 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
659                                           &target->thread.fpu.state->fsave, 0,
660                                           -1);
661
662         if (pos > 0 || count < sizeof(env))
663                 convert_from_fxsr(&env, target);
664
665         ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
666         if (!ret)
667                 convert_to_fxsr(target, &env);
668
669         /*
670          * update the header bit in the xsave header, indicating the
671          * presence of FP.
672          */
673         if (cpu_has_xsave)
674                 target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
675         return ret;
676 }
677
678 /*
679  * FPU state for core dumps.
680  * This is only used for a.out dumps now.
681  * It is declared generically using elf_fpregset_t (which is
682  * struct user_i387_struct) but is in fact only used for 32-bit
683  * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
684  */
685 int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
686 {
687         struct task_struct *tsk = current;
688         int fpvalid;
689
690         fpvalid = !!used_math();
691         if (fpvalid)
692                 fpvalid = !fpregs_get(tsk, NULL,
693                                       0, sizeof(struct user_i387_ia32_struct),
694                                       fpu, NULL);
695
696         return fpvalid;
697 }
698 EXPORT_SYMBOL(dump_fpu);
699
700 #endif  /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */