xen: add m2p override mechanism
[firefly-linux-kernel-4.4.55.git] / arch / x86 / xen / p2m.c
1 /*
2  * Xen leaves the responsibility for maintaining p2m mappings to the
3  * guests themselves, but it must also access and update the p2m array
4  * during suspend/resume when all the pages are reallocated.
5  *
6  * The p2m table is logically a flat array, but we implement it as a
7  * three-level tree to allow the address space to be sparse.
8  *
9  *                               Xen
10  *                                |
11  *     p2m_top              p2m_top_mfn
12  *       /  \                   /   \
13  * p2m_mid p2m_mid      p2m_mid_mfn p2m_mid_mfn
14  *    / \      / \         /           /
15  *  p2m p2m p2m p2m p2m p2m p2m ...
16  *
17  * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
18  *
19  * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
20  * maximum representable pseudo-physical address space is:
21  *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
22  *
23  * P2M_PER_PAGE depends on the architecture, as a mfn is always
24  * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25  * 512 and 1024 entries respectively. 
26  */
27
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/list.h>
31 #include <linux/hash.h>
32
33 #include <asm/cache.h>
34 #include <asm/setup.h>
35
36 #include <asm/xen/page.h>
37 #include <asm/xen/hypercall.h>
38 #include <asm/xen/hypervisor.h>
39
40 #include "xen-ops.h"
41
42 static void __init m2p_override_init(void);
43
44 unsigned long xen_max_p2m_pfn __read_mostly;
45
46 #define P2M_PER_PAGE            (PAGE_SIZE / sizeof(unsigned long))
47 #define P2M_MID_PER_PAGE        (PAGE_SIZE / sizeof(unsigned long *))
48 #define P2M_TOP_PER_PAGE        (PAGE_SIZE / sizeof(unsigned long **))
49
50 #define MAX_P2M_PFN             (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
51
52 /* Placeholders for holes in the address space */
53 static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
54 static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
55 static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
56
57 static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
58 static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
59 static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
60
61 RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
62 RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
63
64 static inline unsigned p2m_top_index(unsigned long pfn)
65 {
66         BUG_ON(pfn >= MAX_P2M_PFN);
67         return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
68 }
69
70 static inline unsigned p2m_mid_index(unsigned long pfn)
71 {
72         return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
73 }
74
75 static inline unsigned p2m_index(unsigned long pfn)
76 {
77         return pfn % P2M_PER_PAGE;
78 }
79
80 static void p2m_top_init(unsigned long ***top)
81 {
82         unsigned i;
83
84         for (i = 0; i < P2M_TOP_PER_PAGE; i++)
85                 top[i] = p2m_mid_missing;
86 }
87
88 static void p2m_top_mfn_init(unsigned long *top)
89 {
90         unsigned i;
91
92         for (i = 0; i < P2M_TOP_PER_PAGE; i++)
93                 top[i] = virt_to_mfn(p2m_mid_missing_mfn);
94 }
95
96 static void p2m_top_mfn_p_init(unsigned long **top)
97 {
98         unsigned i;
99
100         for (i = 0; i < P2M_TOP_PER_PAGE; i++)
101                 top[i] = p2m_mid_missing_mfn;
102 }
103
104 static void p2m_mid_init(unsigned long **mid)
105 {
106         unsigned i;
107
108         for (i = 0; i < P2M_MID_PER_PAGE; i++)
109                 mid[i] = p2m_missing;
110 }
111
112 static void p2m_mid_mfn_init(unsigned long *mid)
113 {
114         unsigned i;
115
116         for (i = 0; i < P2M_MID_PER_PAGE; i++)
117                 mid[i] = virt_to_mfn(p2m_missing);
118 }
119
120 static void p2m_init(unsigned long *p2m)
121 {
122         unsigned i;
123
124         for (i = 0; i < P2M_MID_PER_PAGE; i++)
125                 p2m[i] = INVALID_P2M_ENTRY;
126 }
127
128 /*
129  * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
130  *
131  * This is called both at boot time, and after resuming from suspend:
132  * - At boot time we're called very early, and must use extend_brk()
133  *   to allocate memory.
134  *
135  * - After resume we're called from within stop_machine, but the mfn
136  *   tree should alreay be completely allocated.
137  */
138 void xen_build_mfn_list_list(void)
139 {
140         unsigned long pfn;
141
142         /* Pre-initialize p2m_top_mfn to be completely missing */
143         if (p2m_top_mfn == NULL) {
144                 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
145                 p2m_mid_mfn_init(p2m_mid_missing_mfn);
146
147                 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
148                 p2m_top_mfn_p_init(p2m_top_mfn_p);
149
150                 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
151                 p2m_top_mfn_init(p2m_top_mfn);
152         } else {
153                 /* Reinitialise, mfn's all change after migration */
154                 p2m_mid_mfn_init(p2m_mid_missing_mfn);
155         }
156
157         for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
158                 unsigned topidx = p2m_top_index(pfn);
159                 unsigned mididx = p2m_mid_index(pfn);
160                 unsigned long **mid;
161                 unsigned long *mid_mfn_p;
162
163                 mid = p2m_top[topidx];
164                 mid_mfn_p = p2m_top_mfn_p[topidx];
165
166                 /* Don't bother allocating any mfn mid levels if
167                  * they're just missing, just update the stored mfn,
168                  * since all could have changed over a migrate.
169                  */
170                 if (mid == p2m_mid_missing) {
171                         BUG_ON(mididx);
172                         BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
173                         p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
174                         pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
175                         continue;
176                 }
177
178                 if (mid_mfn_p == p2m_mid_missing_mfn) {
179                         /*
180                          * XXX boot-time only!  We should never find
181                          * missing parts of the mfn tree after
182                          * runtime.  extend_brk() will BUG if we call
183                          * it too late.
184                          */
185                         mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
186                         p2m_mid_mfn_init(mid_mfn_p);
187
188                         p2m_top_mfn_p[topidx] = mid_mfn_p;
189                 }
190
191                 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
192                 mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
193         }
194 }
195
196 void xen_setup_mfn_list_list(void)
197 {
198         BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
199
200         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
201                 virt_to_mfn(p2m_top_mfn);
202         HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
203 }
204
205 /* Set up p2m_top to point to the domain-builder provided p2m pages */
206 void __init xen_build_dynamic_phys_to_machine(void)
207 {
208         unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
209         unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
210         unsigned long pfn;
211
212         xen_max_p2m_pfn = max_pfn;
213
214         p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
215         p2m_init(p2m_missing);
216
217         p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
218         p2m_mid_init(p2m_mid_missing);
219
220         p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
221         p2m_top_init(p2m_top);
222
223         /*
224          * The domain builder gives us a pre-constructed p2m array in
225          * mfn_list for all the pages initially given to us, so we just
226          * need to graft that into our tree structure.
227          */
228         for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
229                 unsigned topidx = p2m_top_index(pfn);
230                 unsigned mididx = p2m_mid_index(pfn);
231
232                 if (p2m_top[topidx] == p2m_mid_missing) {
233                         unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
234                         p2m_mid_init(mid);
235
236                         p2m_top[topidx] = mid;
237                 }
238
239                 p2m_top[topidx][mididx] = &mfn_list[pfn];
240         }
241
242         m2p_override_init();
243 }
244
245 unsigned long get_phys_to_machine(unsigned long pfn)
246 {
247         unsigned topidx, mididx, idx;
248
249         if (unlikely(pfn >= MAX_P2M_PFN))
250                 return INVALID_P2M_ENTRY;
251
252         topidx = p2m_top_index(pfn);
253         mididx = p2m_mid_index(pfn);
254         idx = p2m_index(pfn);
255
256         return p2m_top[topidx][mididx][idx];
257 }
258 EXPORT_SYMBOL_GPL(get_phys_to_machine);
259
260 static void *alloc_p2m_page(void)
261 {
262         return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
263 }
264
265 static void free_p2m_page(void *p)
266 {
267         free_page((unsigned long)p);
268 }
269
270 /* 
271  * Fully allocate the p2m structure for a given pfn.  We need to check
272  * that both the top and mid levels are allocated, and make sure the
273  * parallel mfn tree is kept in sync.  We may race with other cpus, so
274  * the new pages are installed with cmpxchg; if we lose the race then
275  * simply free the page we allocated and use the one that's there.
276  */
277 static bool alloc_p2m(unsigned long pfn)
278 {
279         unsigned topidx, mididx;
280         unsigned long ***top_p, **mid;
281         unsigned long *top_mfn_p, *mid_mfn;
282
283         topidx = p2m_top_index(pfn);
284         mididx = p2m_mid_index(pfn);
285
286         top_p = &p2m_top[topidx];
287         mid = *top_p;
288
289         if (mid == p2m_mid_missing) {
290                 /* Mid level is missing, allocate a new one */
291                 mid = alloc_p2m_page();
292                 if (!mid)
293                         return false;
294
295                 p2m_mid_init(mid);
296
297                 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
298                         free_p2m_page(mid);
299         }
300
301         top_mfn_p = &p2m_top_mfn[topidx];
302         mid_mfn = p2m_top_mfn_p[topidx];
303
304         BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
305
306         if (mid_mfn == p2m_mid_missing_mfn) {
307                 /* Separately check the mid mfn level */
308                 unsigned long missing_mfn;
309                 unsigned long mid_mfn_mfn;
310
311                 mid_mfn = alloc_p2m_page();
312                 if (!mid_mfn)
313                         return false;
314
315                 p2m_mid_mfn_init(mid_mfn);
316
317                 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
318                 mid_mfn_mfn = virt_to_mfn(mid_mfn);
319                 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
320                         free_p2m_page(mid_mfn);
321                 else
322                         p2m_top_mfn_p[topidx] = mid_mfn;
323         }
324
325         if (p2m_top[topidx][mididx] == p2m_missing) {
326                 /* p2m leaf page is missing */
327                 unsigned long *p2m;
328
329                 p2m = alloc_p2m_page();
330                 if (!p2m)
331                         return false;
332
333                 p2m_init(p2m);
334
335                 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
336                         free_p2m_page(p2m);
337                 else
338                         mid_mfn[mididx] = virt_to_mfn(p2m);
339         }
340
341         return true;
342 }
343
344 /* Try to install p2m mapping; fail if intermediate bits missing */
345 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
346 {
347         unsigned topidx, mididx, idx;
348
349         if (unlikely(pfn >= MAX_P2M_PFN)) {
350                 BUG_ON(mfn != INVALID_P2M_ENTRY);
351                 return true;
352         }
353
354         topidx = p2m_top_index(pfn);
355         mididx = p2m_mid_index(pfn);
356         idx = p2m_index(pfn);
357
358         if (p2m_top[topidx][mididx] == p2m_missing)
359                 return mfn == INVALID_P2M_ENTRY;
360
361         p2m_top[topidx][mididx][idx] = mfn;
362
363         return true;
364 }
365
366 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
367 {
368         if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
369                 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
370                 return true;
371         }
372
373         if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
374                 if (!alloc_p2m(pfn))
375                         return false;
376
377                 if (!__set_phys_to_machine(pfn, mfn))
378                         return false;
379         }
380
381         return true;
382 }
383
384 #define M2P_OVERRIDE_HASH_SHIFT 10
385 #define M2P_OVERRIDE_HASH       (1 << M2P_OVERRIDE_HASH_SHIFT)
386
387 static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH);
388 static DEFINE_SPINLOCK(m2p_override_lock);
389
390 static void __init m2p_override_init(void)
391 {
392         unsigned i;
393
394         m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
395                                    sizeof(unsigned long));
396
397         for (i = 0; i < M2P_OVERRIDE_HASH; i++)
398                 INIT_LIST_HEAD(&m2p_overrides[i]);
399 }
400
401 static unsigned long mfn_hash(unsigned long mfn)
402 {
403         return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
404 }
405
406 /* Add an MFN override for a particular page */
407 void m2p_add_override(unsigned long mfn, struct page *page)
408 {
409         unsigned long flags;
410         page->private = mfn;
411
412         spin_lock_irqsave(&m2p_override_lock, flags);
413         list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
414         spin_unlock_irqrestore(&m2p_override_lock, flags);
415 }
416
417 void m2p_remove_override(struct page *page)
418 {
419         unsigned long flags;
420         spin_lock_irqsave(&m2p_override_lock, flags);
421         list_del(&page->lru);
422         spin_unlock_irqrestore(&m2p_override_lock, flags);
423 }
424
425 struct page *m2p_find_override(unsigned long mfn)
426 {
427         unsigned long flags;
428         struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)];
429         struct page *p, *ret;
430
431         ret = NULL;
432
433         spin_lock_irqsave(&m2p_override_lock, flags);
434
435         list_for_each_entry(p, bucket, lru) {
436                 if (p->private == mfn) {
437                         ret = p;
438                         break;
439                 }
440         }
441
442         spin_unlock_irqrestore(&m2p_override_lock, flags);
443
444         return ret;
445 }
446
447 unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
448 {
449         struct page *p = m2p_find_override(mfn);
450         unsigned long ret = pfn;
451
452         if (p)
453                 ret = page_to_pfn(p);
454
455         return ret;
456 }