iov_iter.c: handle ITER_KVEC directly
[firefly-linux-kernel-4.4.55.git] / mm / iov_iter.c
1 #include <linux/export.h>
2 #include <linux/uio.h>
3 #include <linux/pagemap.h>
4 #include <linux/slab.h>
5 #include <linux/vmalloc.h>
6
7 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
8         size_t left;                                    \
9         size_t wanted = n;                              \
10         __p = i->iov;                                   \
11         __v.iov_len = min(n, __p->iov_len - skip);      \
12         if (likely(__v.iov_len)) {                      \
13                 __v.iov_base = __p->iov_base + skip;    \
14                 left = (STEP);                          \
15                 __v.iov_len -= left;                    \
16                 skip += __v.iov_len;                    \
17                 n -= __v.iov_len;                       \
18         } else {                                        \
19                 left = 0;                               \
20         }                                               \
21         while (unlikely(!left && n)) {                  \
22                 __p++;                                  \
23                 __v.iov_len = min(n, __p->iov_len);     \
24                 if (unlikely(!__v.iov_len))             \
25                         continue;                       \
26                 __v.iov_base = __p->iov_base;           \
27                 left = (STEP);                          \
28                 __v.iov_len -= left;                    \
29                 skip = __v.iov_len;                     \
30                 n -= __v.iov_len;                       \
31         }                                               \
32         n = wanted - n;                                 \
33 }
34
35 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
36         size_t wanted = n;                              \
37         __p = i->kvec;                                  \
38         __v.iov_len = min(n, __p->iov_len - skip);      \
39         if (likely(__v.iov_len)) {                      \
40                 __v.iov_base = __p->iov_base + skip;    \
41                 (void)(STEP);                           \
42                 skip += __v.iov_len;                    \
43                 n -= __v.iov_len;                       \
44         }                                               \
45         while (unlikely(n)) {                           \
46                 __p++;                                  \
47                 __v.iov_len = min(n, __p->iov_len);     \
48                 if (unlikely(!__v.iov_len))             \
49                         continue;                       \
50                 __v.iov_base = __p->iov_base;           \
51                 (void)(STEP);                           \
52                 skip = __v.iov_len;                     \
53                 n -= __v.iov_len;                       \
54         }                                               \
55         n = wanted;                                     \
56 }
57
58 #define iterate_bvec(i, n, __v, __p, skip, STEP) {      \
59         size_t wanted = n;                              \
60         __p = i->bvec;                                  \
61         __v.bv_len = min_t(size_t, n, __p->bv_len - skip);      \
62         if (likely(__v.bv_len)) {                       \
63                 __v.bv_page = __p->bv_page;             \
64                 __v.bv_offset = __p->bv_offset + skip;  \
65                 (void)(STEP);                           \
66                 skip += __v.bv_len;                     \
67                 n -= __v.bv_len;                        \
68         }                                               \
69         while (unlikely(n)) {                           \
70                 __p++;                                  \
71                 __v.bv_len = min_t(size_t, n, __p->bv_len);     \
72                 if (unlikely(!__v.bv_len))              \
73                         continue;                       \
74                 __v.bv_page = __p->bv_page;             \
75                 __v.bv_offset = __p->bv_offset;         \
76                 (void)(STEP);                           \
77                 skip = __v.bv_len;                      \
78                 n -= __v.bv_len;                        \
79         }                                               \
80         n = wanted;                                     \
81 }
82
83 #define iterate_all_kinds(i, n, v, I, B, K) {                   \
84         size_t skip = i->iov_offset;                            \
85         if (unlikely(i->type & ITER_BVEC)) {                    \
86                 const struct bio_vec *bvec;                     \
87                 struct bio_vec v;                               \
88                 iterate_bvec(i, n, v, bvec, skip, (B))          \
89         } else if (unlikely(i->type & ITER_KVEC)) {             \
90                 const struct kvec *kvec;                        \
91                 struct kvec v;                                  \
92                 iterate_kvec(i, n, v, kvec, skip, (K))          \
93         } else {                                                \
94                 const struct iovec *iov;                        \
95                 struct iovec v;                                 \
96                 iterate_iovec(i, n, v, iov, skip, (I))          \
97         }                                                       \
98 }
99
100 #define iterate_and_advance(i, n, v, I, B, K) {                 \
101         size_t skip = i->iov_offset;                            \
102         if (unlikely(i->type & ITER_BVEC)) {                    \
103                 const struct bio_vec *bvec;                     \
104                 struct bio_vec v;                               \
105                 iterate_bvec(i, n, v, bvec, skip, (B))          \
106                 if (skip == bvec->bv_len) {                     \
107                         bvec++;                                 \
108                         skip = 0;                               \
109                 }                                               \
110                 i->nr_segs -= bvec - i->bvec;                   \
111                 i->bvec = bvec;                                 \
112         } else if (unlikely(i->type & ITER_KVEC)) {             \
113                 const struct kvec *kvec;                        \
114                 struct kvec v;                                  \
115                 iterate_kvec(i, n, v, kvec, skip, (K))          \
116                 if (skip == kvec->iov_len) {                    \
117                         kvec++;                                 \
118                         skip = 0;                               \
119                 }                                               \
120                 i->nr_segs -= kvec - i->kvec;                   \
121                 i->kvec = kvec;                                 \
122         } else {                                                \
123                 const struct iovec *iov;                        \
124                 struct iovec v;                                 \
125                 iterate_iovec(i, n, v, iov, skip, (I))          \
126                 if (skip == iov->iov_len) {                     \
127                         iov++;                                  \
128                         skip = 0;                               \
129                 }                                               \
130                 i->nr_segs -= iov - i->iov;                     \
131                 i->iov = iov;                                   \
132         }                                                       \
133         i->count -= n;                                          \
134         i->iov_offset = skip;                                   \
135 }
136
137 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
138                          struct iov_iter *i)
139 {
140         size_t skip, copy, left, wanted;
141         const struct iovec *iov;
142         char __user *buf;
143         void *kaddr, *from;
144
145         if (unlikely(bytes > i->count))
146                 bytes = i->count;
147
148         if (unlikely(!bytes))
149                 return 0;
150
151         wanted = bytes;
152         iov = i->iov;
153         skip = i->iov_offset;
154         buf = iov->iov_base + skip;
155         copy = min(bytes, iov->iov_len - skip);
156
157         if (!fault_in_pages_writeable(buf, copy)) {
158                 kaddr = kmap_atomic(page);
159                 from = kaddr + offset;
160
161                 /* first chunk, usually the only one */
162                 left = __copy_to_user_inatomic(buf, from, copy);
163                 copy -= left;
164                 skip += copy;
165                 from += copy;
166                 bytes -= copy;
167
168                 while (unlikely(!left && bytes)) {
169                         iov++;
170                         buf = iov->iov_base;
171                         copy = min(bytes, iov->iov_len);
172                         left = __copy_to_user_inatomic(buf, from, copy);
173                         copy -= left;
174                         skip = copy;
175                         from += copy;
176                         bytes -= copy;
177                 }
178                 if (likely(!bytes)) {
179                         kunmap_atomic(kaddr);
180                         goto done;
181                 }
182                 offset = from - kaddr;
183                 buf += copy;
184                 kunmap_atomic(kaddr);
185                 copy = min(bytes, iov->iov_len - skip);
186         }
187         /* Too bad - revert to non-atomic kmap */
188         kaddr = kmap(page);
189         from = kaddr + offset;
190         left = __copy_to_user(buf, from, copy);
191         copy -= left;
192         skip += copy;
193         from += copy;
194         bytes -= copy;
195         while (unlikely(!left && bytes)) {
196                 iov++;
197                 buf = iov->iov_base;
198                 copy = min(bytes, iov->iov_len);
199                 left = __copy_to_user(buf, from, copy);
200                 copy -= left;
201                 skip = copy;
202                 from += copy;
203                 bytes -= copy;
204         }
205         kunmap(page);
206 done:
207         if (skip == iov->iov_len) {
208                 iov++;
209                 skip = 0;
210         }
211         i->count -= wanted - bytes;
212         i->nr_segs -= iov - i->iov;
213         i->iov = iov;
214         i->iov_offset = skip;
215         return wanted - bytes;
216 }
217
218 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
219                          struct iov_iter *i)
220 {
221         size_t skip, copy, left, wanted;
222         const struct iovec *iov;
223         char __user *buf;
224         void *kaddr, *to;
225
226         if (unlikely(bytes > i->count))
227                 bytes = i->count;
228
229         if (unlikely(!bytes))
230                 return 0;
231
232         wanted = bytes;
233         iov = i->iov;
234         skip = i->iov_offset;
235         buf = iov->iov_base + skip;
236         copy = min(bytes, iov->iov_len - skip);
237
238         if (!fault_in_pages_readable(buf, copy)) {
239                 kaddr = kmap_atomic(page);
240                 to = kaddr + offset;
241
242                 /* first chunk, usually the only one */
243                 left = __copy_from_user_inatomic(to, buf, copy);
244                 copy -= left;
245                 skip += copy;
246                 to += copy;
247                 bytes -= copy;
248
249                 while (unlikely(!left && bytes)) {
250                         iov++;
251                         buf = iov->iov_base;
252                         copy = min(bytes, iov->iov_len);
253                         left = __copy_from_user_inatomic(to, buf, copy);
254                         copy -= left;
255                         skip = copy;
256                         to += copy;
257                         bytes -= copy;
258                 }
259                 if (likely(!bytes)) {
260                         kunmap_atomic(kaddr);
261                         goto done;
262                 }
263                 offset = to - kaddr;
264                 buf += copy;
265                 kunmap_atomic(kaddr);
266                 copy = min(bytes, iov->iov_len - skip);
267         }
268         /* Too bad - revert to non-atomic kmap */
269         kaddr = kmap(page);
270         to = kaddr + offset;
271         left = __copy_from_user(to, buf, copy);
272         copy -= left;
273         skip += copy;
274         to += copy;
275         bytes -= copy;
276         while (unlikely(!left && bytes)) {
277                 iov++;
278                 buf = iov->iov_base;
279                 copy = min(bytes, iov->iov_len);
280                 left = __copy_from_user(to, buf, copy);
281                 copy -= left;
282                 skip = copy;
283                 to += copy;
284                 bytes -= copy;
285         }
286         kunmap(page);
287 done:
288         if (skip == iov->iov_len) {
289                 iov++;
290                 skip = 0;
291         }
292         i->count -= wanted - bytes;
293         i->nr_segs -= iov - i->iov;
294         i->iov = iov;
295         i->iov_offset = skip;
296         return wanted - bytes;
297 }
298
299 /*
300  * Fault in the first iovec of the given iov_iter, to a maximum length
301  * of bytes. Returns 0 on success, or non-zero if the memory could not be
302  * accessed (ie. because it is an invalid address).
303  *
304  * writev-intensive code may want this to prefault several iovecs -- that
305  * would be possible (callers must not rely on the fact that _only_ the
306  * first iovec will be faulted with the current implementation).
307  */
308 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
309 {
310         if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
311                 char __user *buf = i->iov->iov_base + i->iov_offset;
312                 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
313                 return fault_in_pages_readable(buf, bytes);
314         }
315         return 0;
316 }
317 EXPORT_SYMBOL(iov_iter_fault_in_readable);
318
319 void iov_iter_init(struct iov_iter *i, int direction,
320                         const struct iovec *iov, unsigned long nr_segs,
321                         size_t count)
322 {
323         /* It will get better.  Eventually... */
324         if (segment_eq(get_fs(), KERNEL_DS)) {
325                 direction |= ITER_KVEC;
326                 i->type = direction;
327                 i->kvec = (struct kvec *)iov;
328         } else {
329                 i->type = direction;
330                 i->iov = iov;
331         }
332         i->nr_segs = nr_segs;
333         i->iov_offset = 0;
334         i->count = count;
335 }
336 EXPORT_SYMBOL(iov_iter_init);
337
338 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
339 {
340         char *from = kmap_atomic(page);
341         memcpy(to, from + offset, len);
342         kunmap_atomic(from);
343 }
344
345 static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
346 {
347         char *to = kmap_atomic(page);
348         memcpy(to + offset, from, len);
349         kunmap_atomic(to);
350 }
351
352 static void memzero_page(struct page *page, size_t offset, size_t len)
353 {
354         char *addr = kmap_atomic(page);
355         memset(addr + offset, 0, len);
356         kunmap_atomic(addr);
357 }
358
359 size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
360 {
361         char *from = addr;
362         if (unlikely(bytes > i->count))
363                 bytes = i->count;
364
365         if (unlikely(!bytes))
366                 return 0;
367
368         iterate_and_advance(i, bytes, v,
369                 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
370                                v.iov_len),
371                 memcpy_to_page(v.bv_page, v.bv_offset,
372                                (from += v.bv_len) - v.bv_len, v.bv_len),
373                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
374         )
375
376         return bytes;
377 }
378 EXPORT_SYMBOL(copy_to_iter);
379
380 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
381 {
382         char *to = addr;
383         if (unlikely(bytes > i->count))
384                 bytes = i->count;
385
386         if (unlikely(!bytes))
387                 return 0;
388
389         iterate_and_advance(i, bytes, v,
390                 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
391                                  v.iov_len),
392                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
393                                  v.bv_offset, v.bv_len),
394                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
395         )
396
397         return bytes;
398 }
399 EXPORT_SYMBOL(copy_from_iter);
400
401 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
402                          struct iov_iter *i)
403 {
404         if (i->type & (ITER_BVEC|ITER_KVEC)) {
405                 void *kaddr = kmap_atomic(page);
406                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
407                 kunmap_atomic(kaddr);
408                 return wanted;
409         } else
410                 return copy_page_to_iter_iovec(page, offset, bytes, i);
411 }
412 EXPORT_SYMBOL(copy_page_to_iter);
413
414 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
415                          struct iov_iter *i)
416 {
417         if (i->type & (ITER_BVEC|ITER_KVEC)) {
418                 void *kaddr = kmap_atomic(page);
419                 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
420                 kunmap_atomic(kaddr);
421                 return wanted;
422         } else
423                 return copy_page_from_iter_iovec(page, offset, bytes, i);
424 }
425 EXPORT_SYMBOL(copy_page_from_iter);
426
427 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
428 {
429         if (unlikely(bytes > i->count))
430                 bytes = i->count;
431
432         if (unlikely(!bytes))
433                 return 0;
434
435         iterate_and_advance(i, bytes, v,
436                 __clear_user(v.iov_base, v.iov_len),
437                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
438                 memset(v.iov_base, 0, v.iov_len)
439         )
440
441         return bytes;
442 }
443 EXPORT_SYMBOL(iov_iter_zero);
444
445 size_t iov_iter_copy_from_user_atomic(struct page *page,
446                 struct iov_iter *i, unsigned long offset, size_t bytes)
447 {
448         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
449         iterate_all_kinds(i, bytes, v,
450                 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
451                                           v.iov_base, v.iov_len),
452                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
453                                  v.bv_offset, v.bv_len),
454                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
455         )
456         kunmap_atomic(kaddr);
457         return bytes;
458 }
459 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
460
461 void iov_iter_advance(struct iov_iter *i, size_t size)
462 {
463         iterate_and_advance(i, size, v, 0, 0, 0)
464 }
465 EXPORT_SYMBOL(iov_iter_advance);
466
467 /*
468  * Return the count of just the current iov_iter segment.
469  */
470 size_t iov_iter_single_seg_count(const struct iov_iter *i)
471 {
472         if (i->nr_segs == 1)
473                 return i->count;
474         else if (i->type & ITER_BVEC)
475                 return min(i->count, i->bvec->bv_len - i->iov_offset);
476         else
477                 return min(i->count, i->iov->iov_len - i->iov_offset);
478 }
479 EXPORT_SYMBOL(iov_iter_single_seg_count);
480
481 unsigned long iov_iter_alignment(const struct iov_iter *i)
482 {
483         unsigned long res = 0;
484         size_t size = i->count;
485
486         if (!size)
487                 return 0;
488
489         iterate_all_kinds(i, size, v,
490                 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
491                 res |= v.bv_offset | v.bv_len,
492                 res |= (unsigned long)v.iov_base | v.iov_len
493         )
494         return res;
495 }
496 EXPORT_SYMBOL(iov_iter_alignment);
497
498 ssize_t iov_iter_get_pages(struct iov_iter *i,
499                    struct page **pages, size_t maxsize, unsigned maxpages,
500                    size_t *start)
501 {
502         if (maxsize > i->count)
503                 maxsize = i->count;
504
505         if (!maxsize)
506                 return 0;
507
508         iterate_all_kinds(i, maxsize, v, ({
509                 unsigned long addr = (unsigned long)v.iov_base;
510                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
511                 int n;
512                 int res;
513
514                 if (len > maxpages * PAGE_SIZE)
515                         len = maxpages * PAGE_SIZE;
516                 addr &= ~(PAGE_SIZE - 1);
517                 n = DIV_ROUND_UP(len, PAGE_SIZE);
518                 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
519                 if (unlikely(res < 0))
520                         return res;
521                 return (res == n ? len : res * PAGE_SIZE) - *start;
522         0;}),({
523                 /* can't be more than PAGE_SIZE */
524                 *start = v.bv_offset;
525                 get_page(*pages = v.bv_page);
526                 return v.bv_len;
527         }),({
528                 return -EFAULT;
529         })
530         )
531         return 0;
532 }
533 EXPORT_SYMBOL(iov_iter_get_pages);
534
535 static struct page **get_pages_array(size_t n)
536 {
537         struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
538         if (!p)
539                 p = vmalloc(n * sizeof(struct page *));
540         return p;
541 }
542
543 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
544                    struct page ***pages, size_t maxsize,
545                    size_t *start)
546 {
547         struct page **p;
548
549         if (maxsize > i->count)
550                 maxsize = i->count;
551
552         if (!maxsize)
553                 return 0;
554
555         iterate_all_kinds(i, maxsize, v, ({
556                 unsigned long addr = (unsigned long)v.iov_base;
557                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
558                 int n;
559                 int res;
560
561                 addr &= ~(PAGE_SIZE - 1);
562                 n = DIV_ROUND_UP(len, PAGE_SIZE);
563                 p = get_pages_array(n);
564                 if (!p)
565                         return -ENOMEM;
566                 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
567                 if (unlikely(res < 0)) {
568                         kvfree(p);
569                         return res;
570                 }
571                 *pages = p;
572                 return (res == n ? len : res * PAGE_SIZE) - *start;
573         0;}),({
574                 /* can't be more than PAGE_SIZE */
575                 *start = v.bv_offset;
576                 *pages = p = get_pages_array(1);
577                 if (!p)
578                         return -ENOMEM;
579                 get_page(*p = v.bv_page);
580                 return v.bv_len;
581         }),({
582                 return -EFAULT;
583         })
584         )
585         return 0;
586 }
587 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
588
589 int iov_iter_npages(const struct iov_iter *i, int maxpages)
590 {
591         size_t size = i->count;
592         int npages = 0;
593
594         if (!size)
595                 return 0;
596
597         iterate_all_kinds(i, size, v, ({
598                 unsigned long p = (unsigned long)v.iov_base;
599                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
600                         - p / PAGE_SIZE;
601                 if (npages >= maxpages)
602                         return maxpages;
603         0;}),({
604                 npages++;
605                 if (npages >= maxpages)
606                         return maxpages;
607         }),({
608                 unsigned long p = (unsigned long)v.iov_base;
609                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
610                         - p / PAGE_SIZE;
611                 if (npages >= maxpages)
612                         return maxpages;
613         })
614         )
615         return npages;
616 }
617 EXPORT_SYMBOL(iov_iter_npages);