coredump: add a new elf note with siginfo of the signal
[firefly-linux-kernel-4.4.55.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38 #include <asm/exec.h>
39
40 #ifndef user_siginfo_t
41 #define user_siginfo_t siginfo_t
42 #endif
43
44 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
45 static int load_elf_library(struct file *);
46 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
47                                 int, int, unsigned long);
48
49 /*
50  * If we don't support core dumping, then supply a NULL so we
51  * don't even try.
52  */
53 #ifdef CONFIG_ELF_CORE
54 static int elf_core_dump(struct coredump_params *cprm);
55 #else
56 #define elf_core_dump   NULL
57 #endif
58
59 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
60 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
61 #else
62 #define ELF_MIN_ALIGN   PAGE_SIZE
63 #endif
64
65 #ifndef ELF_CORE_EFLAGS
66 #define ELF_CORE_EFLAGS 0
67 #endif
68
69 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
70 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
71 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
72
73 static struct linux_binfmt elf_format = {
74         .module         = THIS_MODULE,
75         .load_binary    = load_elf_binary,
76         .load_shlib     = load_elf_library,
77         .core_dump      = elf_core_dump,
78         .min_coredump   = ELF_EXEC_PAGESIZE,
79 };
80
81 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
82
83 static int set_brk(unsigned long start, unsigned long end)
84 {
85         start = ELF_PAGEALIGN(start);
86         end = ELF_PAGEALIGN(end);
87         if (end > start) {
88                 unsigned long addr;
89                 addr = vm_brk(start, end - start);
90                 if (BAD_ADDR(addr))
91                         return addr;
92         }
93         current->mm->start_brk = current->mm->brk = end;
94         return 0;
95 }
96
97 /* We need to explicitly zero any fractional pages
98    after the data section (i.e. bss).  This would
99    contain the junk from the file that should not
100    be in memory
101  */
102 static int padzero(unsigned long elf_bss)
103 {
104         unsigned long nbyte;
105
106         nbyte = ELF_PAGEOFFSET(elf_bss);
107         if (nbyte) {
108                 nbyte = ELF_MIN_ALIGN - nbyte;
109                 if (clear_user((void __user *) elf_bss, nbyte))
110                         return -EFAULT;
111         }
112         return 0;
113 }
114
115 /* Let's use some macros to make this stack manipulation a little clearer */
116 #ifdef CONFIG_STACK_GROWSUP
117 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
118 #define STACK_ROUND(sp, items) \
119         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
120 #define STACK_ALLOC(sp, len) ({ \
121         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
122         old_sp; })
123 #else
124 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
125 #define STACK_ROUND(sp, items) \
126         (((unsigned long) (sp - items)) &~ 15UL)
127 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
128 #endif
129
130 #ifndef ELF_BASE_PLATFORM
131 /*
132  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
133  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
134  * will be copied to the user stack in the same manner as AT_PLATFORM.
135  */
136 #define ELF_BASE_PLATFORM NULL
137 #endif
138
139 static int
140 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
141                 unsigned long load_addr, unsigned long interp_load_addr)
142 {
143         unsigned long p = bprm->p;
144         int argc = bprm->argc;
145         int envc = bprm->envc;
146         elf_addr_t __user *argv;
147         elf_addr_t __user *envp;
148         elf_addr_t __user *sp;
149         elf_addr_t __user *u_platform;
150         elf_addr_t __user *u_base_platform;
151         elf_addr_t __user *u_rand_bytes;
152         const char *k_platform = ELF_PLATFORM;
153         const char *k_base_platform = ELF_BASE_PLATFORM;
154         unsigned char k_rand_bytes[16];
155         int items;
156         elf_addr_t *elf_info;
157         int ei_index = 0;
158         const struct cred *cred = current_cred();
159         struct vm_area_struct *vma;
160
161         /*
162          * In some cases (e.g. Hyper-Threading), we want to avoid L1
163          * evictions by the processes running on the same package. One
164          * thing we can do is to shuffle the initial stack for them.
165          */
166
167         p = arch_align_stack(p);
168
169         /*
170          * If this architecture has a platform capability string, copy it
171          * to userspace.  In some cases (Sparc), this info is impossible
172          * for userspace to get any other way, in others (i386) it is
173          * merely difficult.
174          */
175         u_platform = NULL;
176         if (k_platform) {
177                 size_t len = strlen(k_platform) + 1;
178
179                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
180                 if (__copy_to_user(u_platform, k_platform, len))
181                         return -EFAULT;
182         }
183
184         /*
185          * If this architecture has a "base" platform capability
186          * string, copy it to userspace.
187          */
188         u_base_platform = NULL;
189         if (k_base_platform) {
190                 size_t len = strlen(k_base_platform) + 1;
191
192                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
193                 if (__copy_to_user(u_base_platform, k_base_platform, len))
194                         return -EFAULT;
195         }
196
197         /*
198          * Generate 16 random bytes for userspace PRNG seeding.
199          */
200         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
201         u_rand_bytes = (elf_addr_t __user *)
202                        STACK_ALLOC(p, sizeof(k_rand_bytes));
203         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
204                 return -EFAULT;
205
206         /* Create the ELF interpreter info */
207         elf_info = (elf_addr_t *)current->mm->saved_auxv;
208         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
209 #define NEW_AUX_ENT(id, val) \
210         do { \
211                 elf_info[ei_index++] = id; \
212                 elf_info[ei_index++] = val; \
213         } while (0)
214
215 #ifdef ARCH_DLINFO
216         /* 
217          * ARCH_DLINFO must come first so PPC can do its special alignment of
218          * AUXV.
219          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
220          * ARCH_DLINFO changes
221          */
222         ARCH_DLINFO;
223 #endif
224         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
225         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
226         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
227         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
228         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
229         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
230         NEW_AUX_ENT(AT_BASE, interp_load_addr);
231         NEW_AUX_ENT(AT_FLAGS, 0);
232         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
233         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
234         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
235         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
236         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
237         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
238         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
239         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
240         if (k_platform) {
241                 NEW_AUX_ENT(AT_PLATFORM,
242                             (elf_addr_t)(unsigned long)u_platform);
243         }
244         if (k_base_platform) {
245                 NEW_AUX_ENT(AT_BASE_PLATFORM,
246                             (elf_addr_t)(unsigned long)u_base_platform);
247         }
248         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
249                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
250         }
251 #undef NEW_AUX_ENT
252         /* AT_NULL is zero; clear the rest too */
253         memset(&elf_info[ei_index], 0,
254                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
255
256         /* And advance past the AT_NULL entry.  */
257         ei_index += 2;
258
259         sp = STACK_ADD(p, ei_index);
260
261         items = (argc + 1) + (envc + 1) + 1;
262         bprm->p = STACK_ROUND(sp, items);
263
264         /* Point sp at the lowest address on the stack */
265 #ifdef CONFIG_STACK_GROWSUP
266         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
267         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
268 #else
269         sp = (elf_addr_t __user *)bprm->p;
270 #endif
271
272
273         /*
274          * Grow the stack manually; some architectures have a limit on how
275          * far ahead a user-space access may be in order to grow the stack.
276          */
277         vma = find_extend_vma(current->mm, bprm->p);
278         if (!vma)
279                 return -EFAULT;
280
281         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
282         if (__put_user(argc, sp++))
283                 return -EFAULT;
284         argv = sp;
285         envp = argv + argc + 1;
286
287         /* Populate argv and envp */
288         p = current->mm->arg_end = current->mm->arg_start;
289         while (argc-- > 0) {
290                 size_t len;
291                 if (__put_user((elf_addr_t)p, argv++))
292                         return -EFAULT;
293                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
294                 if (!len || len > MAX_ARG_STRLEN)
295                         return -EINVAL;
296                 p += len;
297         }
298         if (__put_user(0, argv))
299                 return -EFAULT;
300         current->mm->arg_end = current->mm->env_start = p;
301         while (envc-- > 0) {
302                 size_t len;
303                 if (__put_user((elf_addr_t)p, envp++))
304                         return -EFAULT;
305                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306                 if (!len || len > MAX_ARG_STRLEN)
307                         return -EINVAL;
308                 p += len;
309         }
310         if (__put_user(0, envp))
311                 return -EFAULT;
312         current->mm->env_end = p;
313
314         /* Put the elf_info on the stack in the right place.  */
315         sp = (elf_addr_t __user *)envp + 1;
316         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
317                 return -EFAULT;
318         return 0;
319 }
320
321 static unsigned long elf_map(struct file *filep, unsigned long addr,
322                 struct elf_phdr *eppnt, int prot, int type,
323                 unsigned long total_size)
324 {
325         unsigned long map_addr;
326         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
327         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
328         addr = ELF_PAGESTART(addr);
329         size = ELF_PAGEALIGN(size);
330
331         /* mmap() will return -EINVAL if given a zero size, but a
332          * segment with zero filesize is perfectly valid */
333         if (!size)
334                 return addr;
335
336         /*
337         * total_size is the size of the ELF (interpreter) image.
338         * The _first_ mmap needs to know the full size, otherwise
339         * randomization might put this image into an overlapping
340         * position with the ELF binary image. (since size < total_size)
341         * So we first map the 'big' image - and unmap the remainder at
342         * the end. (which unmap is needed for ELF images with holes.)
343         */
344         if (total_size) {
345                 total_size = ELF_PAGEALIGN(total_size);
346                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
347                 if (!BAD_ADDR(map_addr))
348                         vm_munmap(map_addr+size, total_size-size);
349         } else
350                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
351
352         return(map_addr);
353 }
354
355 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
356 {
357         int i, first_idx = -1, last_idx = -1;
358
359         for (i = 0; i < nr; i++) {
360                 if (cmds[i].p_type == PT_LOAD) {
361                         last_idx = i;
362                         if (first_idx == -1)
363                                 first_idx = i;
364                 }
365         }
366         if (first_idx == -1)
367                 return 0;
368
369         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
370                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
371 }
372
373
374 /* This is much more generalized than the library routine read function,
375    so we keep this separate.  Technically the library read function
376    is only provided so that we can read a.out libraries that have
377    an ELF header */
378
379 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
380                 struct file *interpreter, unsigned long *interp_map_addr,
381                 unsigned long no_base)
382 {
383         struct elf_phdr *elf_phdata;
384         struct elf_phdr *eppnt;
385         unsigned long load_addr = 0;
386         int load_addr_set = 0;
387         unsigned long last_bss = 0, elf_bss = 0;
388         unsigned long error = ~0UL;
389         unsigned long total_size;
390         int retval, i, size;
391
392         /* First of all, some simple consistency checks */
393         if (interp_elf_ex->e_type != ET_EXEC &&
394             interp_elf_ex->e_type != ET_DYN)
395                 goto out;
396         if (!elf_check_arch(interp_elf_ex))
397                 goto out;
398         if (!interpreter->f_op || !interpreter->f_op->mmap)
399                 goto out;
400
401         /*
402          * If the size of this structure has changed, then punt, since
403          * we will be doing the wrong thing.
404          */
405         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
406                 goto out;
407         if (interp_elf_ex->e_phnum < 1 ||
408                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
409                 goto out;
410
411         /* Now read in all of the header information */
412         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
413         if (size > ELF_MIN_ALIGN)
414                 goto out;
415         elf_phdata = kmalloc(size, GFP_KERNEL);
416         if (!elf_phdata)
417                 goto out;
418
419         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
420                              (char *)elf_phdata, size);
421         error = -EIO;
422         if (retval != size) {
423                 if (retval < 0)
424                         error = retval; 
425                 goto out_close;
426         }
427
428         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
429         if (!total_size) {
430                 error = -EINVAL;
431                 goto out_close;
432         }
433
434         eppnt = elf_phdata;
435         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
436                 if (eppnt->p_type == PT_LOAD) {
437                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
438                         int elf_prot = 0;
439                         unsigned long vaddr = 0;
440                         unsigned long k, map_addr;
441
442                         if (eppnt->p_flags & PF_R)
443                                 elf_prot = PROT_READ;
444                         if (eppnt->p_flags & PF_W)
445                                 elf_prot |= PROT_WRITE;
446                         if (eppnt->p_flags & PF_X)
447                                 elf_prot |= PROT_EXEC;
448                         vaddr = eppnt->p_vaddr;
449                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
450                                 elf_type |= MAP_FIXED;
451                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
452                                 load_addr = -vaddr;
453
454                         map_addr = elf_map(interpreter, load_addr + vaddr,
455                                         eppnt, elf_prot, elf_type, total_size);
456                         total_size = 0;
457                         if (!*interp_map_addr)
458                                 *interp_map_addr = map_addr;
459                         error = map_addr;
460                         if (BAD_ADDR(map_addr))
461                                 goto out_close;
462
463                         if (!load_addr_set &&
464                             interp_elf_ex->e_type == ET_DYN) {
465                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
466                                 load_addr_set = 1;
467                         }
468
469                         /*
470                          * Check to see if the section's size will overflow the
471                          * allowed task size. Note that p_filesz must always be
472                          * <= p_memsize so it's only necessary to check p_memsz.
473                          */
474                         k = load_addr + eppnt->p_vaddr;
475                         if (BAD_ADDR(k) ||
476                             eppnt->p_filesz > eppnt->p_memsz ||
477                             eppnt->p_memsz > TASK_SIZE ||
478                             TASK_SIZE - eppnt->p_memsz < k) {
479                                 error = -ENOMEM;
480                                 goto out_close;
481                         }
482
483                         /*
484                          * Find the end of the file mapping for this phdr, and
485                          * keep track of the largest address we see for this.
486                          */
487                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
488                         if (k > elf_bss)
489                                 elf_bss = k;
490
491                         /*
492                          * Do the same thing for the memory mapping - between
493                          * elf_bss and last_bss is the bss section.
494                          */
495                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
496                         if (k > last_bss)
497                                 last_bss = k;
498                 }
499         }
500
501         if (last_bss > elf_bss) {
502                 /*
503                  * Now fill out the bss section.  First pad the last page up
504                  * to the page boundary, and then perform a mmap to make sure
505                  * that there are zero-mapped pages up to and including the
506                  * last bss page.
507                  */
508                 if (padzero(elf_bss)) {
509                         error = -EFAULT;
510                         goto out_close;
511                 }
512
513                 /* What we have mapped so far */
514                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
515
516                 /* Map the last of the bss segment */
517                 error = vm_brk(elf_bss, last_bss - elf_bss);
518                 if (BAD_ADDR(error))
519                         goto out_close;
520         }
521
522         error = load_addr;
523
524 out_close:
525         kfree(elf_phdata);
526 out:
527         return error;
528 }
529
530 /*
531  * These are the functions used to load ELF style executables and shared
532  * libraries.  There is no binary dependent code anywhere else.
533  */
534
535 #define INTERPRETER_NONE 0
536 #define INTERPRETER_ELF 2
537
538 #ifndef STACK_RND_MASK
539 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
540 #endif
541
542 static unsigned long randomize_stack_top(unsigned long stack_top)
543 {
544         unsigned int random_variable = 0;
545
546         if ((current->flags & PF_RANDOMIZE) &&
547                 !(current->personality & ADDR_NO_RANDOMIZE)) {
548                 random_variable = get_random_int() & STACK_RND_MASK;
549                 random_variable <<= PAGE_SHIFT;
550         }
551 #ifdef CONFIG_STACK_GROWSUP
552         return PAGE_ALIGN(stack_top) + random_variable;
553 #else
554         return PAGE_ALIGN(stack_top) - random_variable;
555 #endif
556 }
557
558 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
559 {
560         struct file *interpreter = NULL; /* to shut gcc up */
561         unsigned long load_addr = 0, load_bias = 0;
562         int load_addr_set = 0;
563         char * elf_interpreter = NULL;
564         unsigned long error;
565         struct elf_phdr *elf_ppnt, *elf_phdata;
566         unsigned long elf_bss, elf_brk;
567         int retval, i;
568         unsigned int size;
569         unsigned long elf_entry;
570         unsigned long interp_load_addr = 0;
571         unsigned long start_code, end_code, start_data, end_data;
572         unsigned long reloc_func_desc __maybe_unused = 0;
573         int executable_stack = EXSTACK_DEFAULT;
574         unsigned long def_flags = 0;
575         struct {
576                 struct elfhdr elf_ex;
577                 struct elfhdr interp_elf_ex;
578         } *loc;
579
580         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
581         if (!loc) {
582                 retval = -ENOMEM;
583                 goto out_ret;
584         }
585         
586         /* Get the exec-header */
587         loc->elf_ex = *((struct elfhdr *)bprm->buf);
588
589         retval = -ENOEXEC;
590         /* First of all, some simple consistency checks */
591         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
592                 goto out;
593
594         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
595                 goto out;
596         if (!elf_check_arch(&loc->elf_ex))
597                 goto out;
598         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
599                 goto out;
600
601         /* Now read in all of the header information */
602         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
603                 goto out;
604         if (loc->elf_ex.e_phnum < 1 ||
605                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
606                 goto out;
607         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
608         retval = -ENOMEM;
609         elf_phdata = kmalloc(size, GFP_KERNEL);
610         if (!elf_phdata)
611                 goto out;
612
613         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
614                              (char *)elf_phdata, size);
615         if (retval != size) {
616                 if (retval >= 0)
617                         retval = -EIO;
618                 goto out_free_ph;
619         }
620
621         elf_ppnt = elf_phdata;
622         elf_bss = 0;
623         elf_brk = 0;
624
625         start_code = ~0UL;
626         end_code = 0;
627         start_data = 0;
628         end_data = 0;
629
630         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
631                 if (elf_ppnt->p_type == PT_INTERP) {
632                         /* This is the program interpreter used for
633                          * shared libraries - for now assume that this
634                          * is an a.out format binary
635                          */
636                         retval = -ENOEXEC;
637                         if (elf_ppnt->p_filesz > PATH_MAX || 
638                             elf_ppnt->p_filesz < 2)
639                                 goto out_free_ph;
640
641                         retval = -ENOMEM;
642                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
643                                                   GFP_KERNEL);
644                         if (!elf_interpreter)
645                                 goto out_free_ph;
646
647                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
648                                              elf_interpreter,
649                                              elf_ppnt->p_filesz);
650                         if (retval != elf_ppnt->p_filesz) {
651                                 if (retval >= 0)
652                                         retval = -EIO;
653                                 goto out_free_interp;
654                         }
655                         /* make sure path is NULL terminated */
656                         retval = -ENOEXEC;
657                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
658                                 goto out_free_interp;
659
660                         interpreter = open_exec(elf_interpreter);
661                         retval = PTR_ERR(interpreter);
662                         if (IS_ERR(interpreter))
663                                 goto out_free_interp;
664
665                         /*
666                          * If the binary is not readable then enforce
667                          * mm->dumpable = 0 regardless of the interpreter's
668                          * permissions.
669                          */
670                         would_dump(bprm, interpreter);
671
672                         retval = kernel_read(interpreter, 0, bprm->buf,
673                                              BINPRM_BUF_SIZE);
674                         if (retval != BINPRM_BUF_SIZE) {
675                                 if (retval >= 0)
676                                         retval = -EIO;
677                                 goto out_free_dentry;
678                         }
679
680                         /* Get the exec headers */
681                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
682                         break;
683                 }
684                 elf_ppnt++;
685         }
686
687         elf_ppnt = elf_phdata;
688         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
689                 if (elf_ppnt->p_type == PT_GNU_STACK) {
690                         if (elf_ppnt->p_flags & PF_X)
691                                 executable_stack = EXSTACK_ENABLE_X;
692                         else
693                                 executable_stack = EXSTACK_DISABLE_X;
694                         break;
695                 }
696
697         /* Some simple consistency checks for the interpreter */
698         if (elf_interpreter) {
699                 retval = -ELIBBAD;
700                 /* Not an ELF interpreter */
701                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
702                         goto out_free_dentry;
703                 /* Verify the interpreter has a valid arch */
704                 if (!elf_check_arch(&loc->interp_elf_ex))
705                         goto out_free_dentry;
706         }
707
708         /* Flush all traces of the currently running executable */
709         retval = flush_old_exec(bprm);
710         if (retval)
711                 goto out_free_dentry;
712
713         /* OK, This is the point of no return */
714         current->mm->def_flags = def_flags;
715
716         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
717            may depend on the personality.  */
718         SET_PERSONALITY(loc->elf_ex);
719         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
720                 current->personality |= READ_IMPLIES_EXEC;
721
722         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
723                 current->flags |= PF_RANDOMIZE;
724
725         setup_new_exec(bprm);
726
727         /* Do this so that we can load the interpreter, if need be.  We will
728            change some of these later */
729         current->mm->free_area_cache = current->mm->mmap_base;
730         current->mm->cached_hole_size = 0;
731         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
732                                  executable_stack);
733         if (retval < 0) {
734                 send_sig(SIGKILL, current, 0);
735                 goto out_free_dentry;
736         }
737         
738         current->mm->start_stack = bprm->p;
739
740         /* Now we do a little grungy work by mmapping the ELF image into
741            the correct location in memory. */
742         for(i = 0, elf_ppnt = elf_phdata;
743             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
744                 int elf_prot = 0, elf_flags;
745                 unsigned long k, vaddr;
746
747                 if (elf_ppnt->p_type != PT_LOAD)
748                         continue;
749
750                 if (unlikely (elf_brk > elf_bss)) {
751                         unsigned long nbyte;
752                     
753                         /* There was a PT_LOAD segment with p_memsz > p_filesz
754                            before this one. Map anonymous pages, if needed,
755                            and clear the area.  */
756                         retval = set_brk(elf_bss + load_bias,
757                                          elf_brk + load_bias);
758                         if (retval) {
759                                 send_sig(SIGKILL, current, 0);
760                                 goto out_free_dentry;
761                         }
762                         nbyte = ELF_PAGEOFFSET(elf_bss);
763                         if (nbyte) {
764                                 nbyte = ELF_MIN_ALIGN - nbyte;
765                                 if (nbyte > elf_brk - elf_bss)
766                                         nbyte = elf_brk - elf_bss;
767                                 if (clear_user((void __user *)elf_bss +
768                                                         load_bias, nbyte)) {
769                                         /*
770                                          * This bss-zeroing can fail if the ELF
771                                          * file specifies odd protections. So
772                                          * we don't check the return value
773                                          */
774                                 }
775                         }
776                 }
777
778                 if (elf_ppnt->p_flags & PF_R)
779                         elf_prot |= PROT_READ;
780                 if (elf_ppnt->p_flags & PF_W)
781                         elf_prot |= PROT_WRITE;
782                 if (elf_ppnt->p_flags & PF_X)
783                         elf_prot |= PROT_EXEC;
784
785                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
786
787                 vaddr = elf_ppnt->p_vaddr;
788                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
789                         elf_flags |= MAP_FIXED;
790                 } else if (loc->elf_ex.e_type == ET_DYN) {
791                         /* Try and get dynamic programs out of the way of the
792                          * default mmap base, as well as whatever program they
793                          * might try to exec.  This is because the brk will
794                          * follow the loader, and is not movable.  */
795 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
796                         /* Memory randomization might have been switched off
797                          * in runtime via sysctl.
798                          * If that is the case, retain the original non-zero
799                          * load_bias value in order to establish proper
800                          * non-randomized mappings.
801                          */
802                         if (current->flags & PF_RANDOMIZE)
803                                 load_bias = 0;
804                         else
805                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
806 #else
807                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
808 #endif
809                 }
810
811                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
812                                 elf_prot, elf_flags, 0);
813                 if (BAD_ADDR(error)) {
814                         send_sig(SIGKILL, current, 0);
815                         retval = IS_ERR((void *)error) ?
816                                 PTR_ERR((void*)error) : -EINVAL;
817                         goto out_free_dentry;
818                 }
819
820                 if (!load_addr_set) {
821                         load_addr_set = 1;
822                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
823                         if (loc->elf_ex.e_type == ET_DYN) {
824                                 load_bias += error -
825                                              ELF_PAGESTART(load_bias + vaddr);
826                                 load_addr += load_bias;
827                                 reloc_func_desc = load_bias;
828                         }
829                 }
830                 k = elf_ppnt->p_vaddr;
831                 if (k < start_code)
832                         start_code = k;
833                 if (start_data < k)
834                         start_data = k;
835
836                 /*
837                  * Check to see if the section's size will overflow the
838                  * allowed task size. Note that p_filesz must always be
839                  * <= p_memsz so it is only necessary to check p_memsz.
840                  */
841                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
842                     elf_ppnt->p_memsz > TASK_SIZE ||
843                     TASK_SIZE - elf_ppnt->p_memsz < k) {
844                         /* set_brk can never work. Avoid overflows. */
845                         send_sig(SIGKILL, current, 0);
846                         retval = -EINVAL;
847                         goto out_free_dentry;
848                 }
849
850                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
851
852                 if (k > elf_bss)
853                         elf_bss = k;
854                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
855                         end_code = k;
856                 if (end_data < k)
857                         end_data = k;
858                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
859                 if (k > elf_brk)
860                         elf_brk = k;
861         }
862
863         loc->elf_ex.e_entry += load_bias;
864         elf_bss += load_bias;
865         elf_brk += load_bias;
866         start_code += load_bias;
867         end_code += load_bias;
868         start_data += load_bias;
869         end_data += load_bias;
870
871         /* Calling set_brk effectively mmaps the pages that we need
872          * for the bss and break sections.  We must do this before
873          * mapping in the interpreter, to make sure it doesn't wind
874          * up getting placed where the bss needs to go.
875          */
876         retval = set_brk(elf_bss, elf_brk);
877         if (retval) {
878                 send_sig(SIGKILL, current, 0);
879                 goto out_free_dentry;
880         }
881         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
882                 send_sig(SIGSEGV, current, 0);
883                 retval = -EFAULT; /* Nobody gets to see this, but.. */
884                 goto out_free_dentry;
885         }
886
887         if (elf_interpreter) {
888                 unsigned long interp_map_addr = 0;
889
890                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
891                                             interpreter,
892                                             &interp_map_addr,
893                                             load_bias);
894                 if (!IS_ERR((void *)elf_entry)) {
895                         /*
896                          * load_elf_interp() returns relocation
897                          * adjustment
898                          */
899                         interp_load_addr = elf_entry;
900                         elf_entry += loc->interp_elf_ex.e_entry;
901                 }
902                 if (BAD_ADDR(elf_entry)) {
903                         force_sig(SIGSEGV, current);
904                         retval = IS_ERR((void *)elf_entry) ?
905                                         (int)elf_entry : -EINVAL;
906                         goto out_free_dentry;
907                 }
908                 reloc_func_desc = interp_load_addr;
909
910                 allow_write_access(interpreter);
911                 fput(interpreter);
912                 kfree(elf_interpreter);
913         } else {
914                 elf_entry = loc->elf_ex.e_entry;
915                 if (BAD_ADDR(elf_entry)) {
916                         force_sig(SIGSEGV, current);
917                         retval = -EINVAL;
918                         goto out_free_dentry;
919                 }
920         }
921
922         kfree(elf_phdata);
923
924         set_binfmt(&elf_format);
925
926 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
927         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
928         if (retval < 0) {
929                 send_sig(SIGKILL, current, 0);
930                 goto out;
931         }
932 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
933
934         install_exec_creds(bprm);
935         retval = create_elf_tables(bprm, &loc->elf_ex,
936                           load_addr, interp_load_addr);
937         if (retval < 0) {
938                 send_sig(SIGKILL, current, 0);
939                 goto out;
940         }
941         /* N.B. passed_fileno might not be initialized? */
942         current->mm->end_code = end_code;
943         current->mm->start_code = start_code;
944         current->mm->start_data = start_data;
945         current->mm->end_data = end_data;
946         current->mm->start_stack = bprm->p;
947
948 #ifdef arch_randomize_brk
949         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
950                 current->mm->brk = current->mm->start_brk =
951                         arch_randomize_brk(current->mm);
952 #ifdef CONFIG_COMPAT_BRK
953                 current->brk_randomized = 1;
954 #endif
955         }
956 #endif
957
958         if (current->personality & MMAP_PAGE_ZERO) {
959                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
960                    and some applications "depend" upon this behavior.
961                    Since we do not have the power to recompile these, we
962                    emulate the SVr4 behavior. Sigh. */
963                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
964                                 MAP_FIXED | MAP_PRIVATE, 0);
965         }
966
967 #ifdef ELF_PLAT_INIT
968         /*
969          * The ABI may specify that certain registers be set up in special
970          * ways (on i386 %edx is the address of a DT_FINI function, for
971          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
972          * that the e_entry field is the address of the function descriptor
973          * for the startup routine, rather than the address of the startup
974          * routine itself.  This macro performs whatever initialization to
975          * the regs structure is required as well as any relocations to the
976          * function descriptor entries when executing dynamically links apps.
977          */
978         ELF_PLAT_INIT(regs, reloc_func_desc);
979 #endif
980
981         start_thread(regs, elf_entry, bprm->p);
982         retval = 0;
983 out:
984         kfree(loc);
985 out_ret:
986         return retval;
987
988         /* error cleanup */
989 out_free_dentry:
990         allow_write_access(interpreter);
991         if (interpreter)
992                 fput(interpreter);
993 out_free_interp:
994         kfree(elf_interpreter);
995 out_free_ph:
996         kfree(elf_phdata);
997         goto out;
998 }
999
1000 /* This is really simpleminded and specialized - we are loading an
1001    a.out library that is given an ELF header. */
1002 static int load_elf_library(struct file *file)
1003 {
1004         struct elf_phdr *elf_phdata;
1005         struct elf_phdr *eppnt;
1006         unsigned long elf_bss, bss, len;
1007         int retval, error, i, j;
1008         struct elfhdr elf_ex;
1009
1010         error = -ENOEXEC;
1011         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1012         if (retval != sizeof(elf_ex))
1013                 goto out;
1014
1015         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1016                 goto out;
1017
1018         /* First of all, some simple consistency checks */
1019         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1020             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1021                 goto out;
1022
1023         /* Now read in all of the header information */
1024
1025         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1026         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1027
1028         error = -ENOMEM;
1029         elf_phdata = kmalloc(j, GFP_KERNEL);
1030         if (!elf_phdata)
1031                 goto out;
1032
1033         eppnt = elf_phdata;
1034         error = -ENOEXEC;
1035         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1036         if (retval != j)
1037                 goto out_free_ph;
1038
1039         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1040                 if ((eppnt + i)->p_type == PT_LOAD)
1041                         j++;
1042         if (j != 1)
1043                 goto out_free_ph;
1044
1045         while (eppnt->p_type != PT_LOAD)
1046                 eppnt++;
1047
1048         /* Now use mmap to map the library into memory. */
1049         error = vm_mmap(file,
1050                         ELF_PAGESTART(eppnt->p_vaddr),
1051                         (eppnt->p_filesz +
1052                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1053                         PROT_READ | PROT_WRITE | PROT_EXEC,
1054                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1055                         (eppnt->p_offset -
1056                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1057         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1058                 goto out_free_ph;
1059
1060         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1061         if (padzero(elf_bss)) {
1062                 error = -EFAULT;
1063                 goto out_free_ph;
1064         }
1065
1066         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1067                             ELF_MIN_ALIGN - 1);
1068         bss = eppnt->p_memsz + eppnt->p_vaddr;
1069         if (bss > len)
1070                 vm_brk(len, bss - len);
1071         error = 0;
1072
1073 out_free_ph:
1074         kfree(elf_phdata);
1075 out:
1076         return error;
1077 }
1078
1079 #ifdef CONFIG_ELF_CORE
1080 /*
1081  * ELF core dumper
1082  *
1083  * Modelled on fs/exec.c:aout_core_dump()
1084  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1085  */
1086
1087 /*
1088  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1089  * that are useful for post-mortem analysis are included in every core dump.
1090  * In that way we ensure that the core dump is fully interpretable later
1091  * without matching up the same kernel and hardware config to see what PC values
1092  * meant. These special mappings include - vDSO, vsyscall, and other
1093  * architecture specific mappings
1094  */
1095 static bool always_dump_vma(struct vm_area_struct *vma)
1096 {
1097         /* Any vsyscall mappings? */
1098         if (vma == get_gate_vma(vma->vm_mm))
1099                 return true;
1100         /*
1101          * arch_vma_name() returns non-NULL for special architecture mappings,
1102          * such as vDSO sections.
1103          */
1104         if (arch_vma_name(vma))
1105                 return true;
1106
1107         return false;
1108 }
1109
1110 /*
1111  * Decide what to dump of a segment, part, all or none.
1112  */
1113 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1114                                    unsigned long mm_flags)
1115 {
1116 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1117
1118         /* always dump the vdso and vsyscall sections */
1119         if (always_dump_vma(vma))
1120                 goto whole;
1121
1122         if (vma->vm_flags & VM_NODUMP)
1123                 return 0;
1124
1125         /* Hugetlb memory check */
1126         if (vma->vm_flags & VM_HUGETLB) {
1127                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1128                         goto whole;
1129                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1130                         goto whole;
1131         }
1132
1133         /* Do not dump I/O mapped devices or special mappings */
1134         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1135                 return 0;
1136
1137         /* By default, dump shared memory if mapped from an anonymous file. */
1138         if (vma->vm_flags & VM_SHARED) {
1139                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1140                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1141                         goto whole;
1142                 return 0;
1143         }
1144
1145         /* Dump segments that have been written to.  */
1146         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1147                 goto whole;
1148         if (vma->vm_file == NULL)
1149                 return 0;
1150
1151         if (FILTER(MAPPED_PRIVATE))
1152                 goto whole;
1153
1154         /*
1155          * If this looks like the beginning of a DSO or executable mapping,
1156          * check for an ELF header.  If we find one, dump the first page to
1157          * aid in determining what was mapped here.
1158          */
1159         if (FILTER(ELF_HEADERS) &&
1160             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1161                 u32 __user *header = (u32 __user *) vma->vm_start;
1162                 u32 word;
1163                 mm_segment_t fs = get_fs();
1164                 /*
1165                  * Doing it this way gets the constant folded by GCC.
1166                  */
1167                 union {
1168                         u32 cmp;
1169                         char elfmag[SELFMAG];
1170                 } magic;
1171                 BUILD_BUG_ON(SELFMAG != sizeof word);
1172                 magic.elfmag[EI_MAG0] = ELFMAG0;
1173                 magic.elfmag[EI_MAG1] = ELFMAG1;
1174                 magic.elfmag[EI_MAG2] = ELFMAG2;
1175                 magic.elfmag[EI_MAG3] = ELFMAG3;
1176                 /*
1177                  * Switch to the user "segment" for get_user(),
1178                  * then put back what elf_core_dump() had in place.
1179                  */
1180                 set_fs(USER_DS);
1181                 if (unlikely(get_user(word, header)))
1182                         word = 0;
1183                 set_fs(fs);
1184                 if (word == magic.cmp)
1185                         return PAGE_SIZE;
1186         }
1187
1188 #undef  FILTER
1189
1190         return 0;
1191
1192 whole:
1193         return vma->vm_end - vma->vm_start;
1194 }
1195
1196 /* An ELF note in memory */
1197 struct memelfnote
1198 {
1199         const char *name;
1200         int type;
1201         unsigned int datasz;
1202         void *data;
1203 };
1204
1205 static int notesize(struct memelfnote *en)
1206 {
1207         int sz;
1208
1209         sz = sizeof(struct elf_note);
1210         sz += roundup(strlen(en->name) + 1, 4);
1211         sz += roundup(en->datasz, 4);
1212
1213         return sz;
1214 }
1215
1216 #define DUMP_WRITE(addr, nr, foffset)   \
1217         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1218
1219 static int alignfile(struct file *file, loff_t *foffset)
1220 {
1221         static const char buf[4] = { 0, };
1222         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1223         return 1;
1224 }
1225
1226 static int writenote(struct memelfnote *men, struct file *file,
1227                         loff_t *foffset)
1228 {
1229         struct elf_note en;
1230         en.n_namesz = strlen(men->name) + 1;
1231         en.n_descsz = men->datasz;
1232         en.n_type = men->type;
1233
1234         DUMP_WRITE(&en, sizeof(en), foffset);
1235         DUMP_WRITE(men->name, en.n_namesz, foffset);
1236         if (!alignfile(file, foffset))
1237                 return 0;
1238         DUMP_WRITE(men->data, men->datasz, foffset);
1239         if (!alignfile(file, foffset))
1240                 return 0;
1241
1242         return 1;
1243 }
1244 #undef DUMP_WRITE
1245
1246 static void fill_elf_header(struct elfhdr *elf, int segs,
1247                             u16 machine, u32 flags, u8 osabi)
1248 {
1249         memset(elf, 0, sizeof(*elf));
1250
1251         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1252         elf->e_ident[EI_CLASS] = ELF_CLASS;
1253         elf->e_ident[EI_DATA] = ELF_DATA;
1254         elf->e_ident[EI_VERSION] = EV_CURRENT;
1255         elf->e_ident[EI_OSABI] = ELF_OSABI;
1256
1257         elf->e_type = ET_CORE;
1258         elf->e_machine = machine;
1259         elf->e_version = EV_CURRENT;
1260         elf->e_phoff = sizeof(struct elfhdr);
1261         elf->e_flags = flags;
1262         elf->e_ehsize = sizeof(struct elfhdr);
1263         elf->e_phentsize = sizeof(struct elf_phdr);
1264         elf->e_phnum = segs;
1265
1266         return;
1267 }
1268
1269 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1270 {
1271         phdr->p_type = PT_NOTE;
1272         phdr->p_offset = offset;
1273         phdr->p_vaddr = 0;
1274         phdr->p_paddr = 0;
1275         phdr->p_filesz = sz;
1276         phdr->p_memsz = 0;
1277         phdr->p_flags = 0;
1278         phdr->p_align = 0;
1279         return;
1280 }
1281
1282 static void fill_note(struct memelfnote *note, const char *name, int type, 
1283                 unsigned int sz, void *data)
1284 {
1285         note->name = name;
1286         note->type = type;
1287         note->datasz = sz;
1288         note->data = data;
1289         return;
1290 }
1291
1292 /*
1293  * fill up all the fields in prstatus from the given task struct, except
1294  * registers which need to be filled up separately.
1295  */
1296 static void fill_prstatus(struct elf_prstatus *prstatus,
1297                 struct task_struct *p, long signr)
1298 {
1299         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1300         prstatus->pr_sigpend = p->pending.signal.sig[0];
1301         prstatus->pr_sighold = p->blocked.sig[0];
1302         rcu_read_lock();
1303         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1304         rcu_read_unlock();
1305         prstatus->pr_pid = task_pid_vnr(p);
1306         prstatus->pr_pgrp = task_pgrp_vnr(p);
1307         prstatus->pr_sid = task_session_vnr(p);
1308         if (thread_group_leader(p)) {
1309                 struct task_cputime cputime;
1310
1311                 /*
1312                  * This is the record for the group leader.  It shows the
1313                  * group-wide total, not its individual thread total.
1314                  */
1315                 thread_group_cputime(p, &cputime);
1316                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1317                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1318         } else {
1319                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1320                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1321         }
1322         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1323         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1324 }
1325
1326 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1327                        struct mm_struct *mm)
1328 {
1329         const struct cred *cred;
1330         unsigned int i, len;
1331         
1332         /* first copy the parameters from user space */
1333         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1334
1335         len = mm->arg_end - mm->arg_start;
1336         if (len >= ELF_PRARGSZ)
1337                 len = ELF_PRARGSZ-1;
1338         if (copy_from_user(&psinfo->pr_psargs,
1339                            (const char __user *)mm->arg_start, len))
1340                 return -EFAULT;
1341         for(i = 0; i < len; i++)
1342                 if (psinfo->pr_psargs[i] == 0)
1343                         psinfo->pr_psargs[i] = ' ';
1344         psinfo->pr_psargs[len] = 0;
1345
1346         rcu_read_lock();
1347         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1348         rcu_read_unlock();
1349         psinfo->pr_pid = task_pid_vnr(p);
1350         psinfo->pr_pgrp = task_pgrp_vnr(p);
1351         psinfo->pr_sid = task_session_vnr(p);
1352
1353         i = p->state ? ffz(~p->state) + 1 : 0;
1354         psinfo->pr_state = i;
1355         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1356         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1357         psinfo->pr_nice = task_nice(p);
1358         psinfo->pr_flag = p->flags;
1359         rcu_read_lock();
1360         cred = __task_cred(p);
1361         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1362         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1363         rcu_read_unlock();
1364         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1365         
1366         return 0;
1367 }
1368
1369 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1370 {
1371         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1372         int i = 0;
1373         do
1374                 i += 2;
1375         while (auxv[i - 2] != AT_NULL);
1376         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1377 }
1378
1379 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1380                 siginfo_t *siginfo)
1381 {
1382         mm_segment_t old_fs = get_fs();
1383         set_fs(KERNEL_DS);
1384         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1385         set_fs(old_fs);
1386         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1387 }
1388
1389 #ifdef CORE_DUMP_USE_REGSET
1390 #include <linux/regset.h>
1391
1392 struct elf_thread_core_info {
1393         struct elf_thread_core_info *next;
1394         struct task_struct *task;
1395         struct elf_prstatus prstatus;
1396         struct memelfnote notes[0];
1397 };
1398
1399 struct elf_note_info {
1400         struct elf_thread_core_info *thread;
1401         struct memelfnote psinfo;
1402         struct memelfnote signote;
1403         struct memelfnote auxv;
1404         user_siginfo_t csigdata;
1405         size_t size;
1406         int thread_notes;
1407 };
1408
1409 /*
1410  * When a regset has a writeback hook, we call it on each thread before
1411  * dumping user memory.  On register window machines, this makes sure the
1412  * user memory backing the register data is up to date before we read it.
1413  */
1414 static void do_thread_regset_writeback(struct task_struct *task,
1415                                        const struct user_regset *regset)
1416 {
1417         if (regset->writeback)
1418                 regset->writeback(task, regset, 1);
1419 }
1420
1421 #ifndef PR_REG_SIZE
1422 #define PR_REG_SIZE(S) sizeof(S)
1423 #endif
1424
1425 #ifndef PRSTATUS_SIZE
1426 #define PRSTATUS_SIZE(S) sizeof(S)
1427 #endif
1428
1429 #ifndef PR_REG_PTR
1430 #define PR_REG_PTR(S) (&((S)->pr_reg))
1431 #endif
1432
1433 #ifndef SET_PR_FPVALID
1434 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1435 #endif
1436
1437 static int fill_thread_core_info(struct elf_thread_core_info *t,
1438                                  const struct user_regset_view *view,
1439                                  long signr, size_t *total)
1440 {
1441         unsigned int i;
1442
1443         /*
1444          * NT_PRSTATUS is the one special case, because the regset data
1445          * goes into the pr_reg field inside the note contents, rather
1446          * than being the whole note contents.  We fill the reset in here.
1447          * We assume that regset 0 is NT_PRSTATUS.
1448          */
1449         fill_prstatus(&t->prstatus, t->task, signr);
1450         (void) view->regsets[0].get(t->task, &view->regsets[0],
1451                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1452                                     PR_REG_PTR(&t->prstatus), NULL);
1453
1454         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1455                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1456         *total += notesize(&t->notes[0]);
1457
1458         do_thread_regset_writeback(t->task, &view->regsets[0]);
1459
1460         /*
1461          * Each other regset might generate a note too.  For each regset
1462          * that has no core_note_type or is inactive, we leave t->notes[i]
1463          * all zero and we'll know to skip writing it later.
1464          */
1465         for (i = 1; i < view->n; ++i) {
1466                 const struct user_regset *regset = &view->regsets[i];
1467                 do_thread_regset_writeback(t->task, regset);
1468                 if (regset->core_note_type && regset->get &&
1469                     (!regset->active || regset->active(t->task, regset))) {
1470                         int ret;
1471                         size_t size = regset->n * regset->size;
1472                         void *data = kmalloc(size, GFP_KERNEL);
1473                         if (unlikely(!data))
1474                                 return 0;
1475                         ret = regset->get(t->task, regset,
1476                                           0, size, data, NULL);
1477                         if (unlikely(ret))
1478                                 kfree(data);
1479                         else {
1480                                 if (regset->core_note_type != NT_PRFPREG)
1481                                         fill_note(&t->notes[i], "LINUX",
1482                                                   regset->core_note_type,
1483                                                   size, data);
1484                                 else {
1485                                         SET_PR_FPVALID(&t->prstatus, 1);
1486                                         fill_note(&t->notes[i], "CORE",
1487                                                   NT_PRFPREG, size, data);
1488                                 }
1489                                 *total += notesize(&t->notes[i]);
1490                         }
1491                 }
1492         }
1493
1494         return 1;
1495 }
1496
1497 static int fill_note_info(struct elfhdr *elf, int phdrs,
1498                           struct elf_note_info *info,
1499                           siginfo_t *siginfo, struct pt_regs *regs)
1500 {
1501         struct task_struct *dump_task = current;
1502         const struct user_regset_view *view = task_user_regset_view(dump_task);
1503         struct elf_thread_core_info *t;
1504         struct elf_prpsinfo *psinfo;
1505         struct core_thread *ct;
1506         unsigned int i;
1507
1508         info->size = 0;
1509         info->thread = NULL;
1510
1511         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1512         if (psinfo == NULL)
1513                 return 0;
1514
1515         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1516
1517         /*
1518          * Figure out how many notes we're going to need for each thread.
1519          */
1520         info->thread_notes = 0;
1521         for (i = 0; i < view->n; ++i)
1522                 if (view->regsets[i].core_note_type != 0)
1523                         ++info->thread_notes;
1524
1525         /*
1526          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1527          * since it is our one special case.
1528          */
1529         if (unlikely(info->thread_notes == 0) ||
1530             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1531                 WARN_ON(1);
1532                 return 0;
1533         }
1534
1535         /*
1536          * Initialize the ELF file header.
1537          */
1538         fill_elf_header(elf, phdrs,
1539                         view->e_machine, view->e_flags, view->ei_osabi);
1540
1541         /*
1542          * Allocate a structure for each thread.
1543          */
1544         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1545                 t = kzalloc(offsetof(struct elf_thread_core_info,
1546                                      notes[info->thread_notes]),
1547                             GFP_KERNEL);
1548                 if (unlikely(!t))
1549                         return 0;
1550
1551                 t->task = ct->task;
1552                 if (ct->task == dump_task || !info->thread) {
1553                         t->next = info->thread;
1554                         info->thread = t;
1555                 } else {
1556                         /*
1557                          * Make sure to keep the original task at
1558                          * the head of the list.
1559                          */
1560                         t->next = info->thread->next;
1561                         info->thread->next = t;
1562                 }
1563         }
1564
1565         /*
1566          * Now fill in each thread's information.
1567          */
1568         for (t = info->thread; t != NULL; t = t->next)
1569                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1570                         return 0;
1571
1572         /*
1573          * Fill in the two process-wide notes.
1574          */
1575         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1576         info->size += notesize(&info->psinfo);
1577
1578         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1579         info->size += notesize(&info->signote);
1580
1581         fill_auxv_note(&info->auxv, current->mm);
1582         info->size += notesize(&info->auxv);
1583
1584         return 1;
1585 }
1586
1587 static size_t get_note_info_size(struct elf_note_info *info)
1588 {
1589         return info->size;
1590 }
1591
1592 /*
1593  * Write all the notes for each thread.  When writing the first thread, the
1594  * process-wide notes are interleaved after the first thread-specific note.
1595  */
1596 static int write_note_info(struct elf_note_info *info,
1597                            struct file *file, loff_t *foffset)
1598 {
1599         bool first = 1;
1600         struct elf_thread_core_info *t = info->thread;
1601
1602         do {
1603                 int i;
1604
1605                 if (!writenote(&t->notes[0], file, foffset))
1606                         return 0;
1607
1608                 if (first && !writenote(&info->psinfo, file, foffset))
1609                         return 0;
1610                 if (first && !writenote(&info->signote, file, foffset))
1611                         return 0;
1612                 if (first && !writenote(&info->auxv, file, foffset))
1613                         return 0;
1614
1615                 for (i = 1; i < info->thread_notes; ++i)
1616                         if (t->notes[i].data &&
1617                             !writenote(&t->notes[i], file, foffset))
1618                                 return 0;
1619
1620                 first = 0;
1621                 t = t->next;
1622         } while (t);
1623
1624         return 1;
1625 }
1626
1627 static void free_note_info(struct elf_note_info *info)
1628 {
1629         struct elf_thread_core_info *threads = info->thread;
1630         while (threads) {
1631                 unsigned int i;
1632                 struct elf_thread_core_info *t = threads;
1633                 threads = t->next;
1634                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1635                 for (i = 1; i < info->thread_notes; ++i)
1636                         kfree(t->notes[i].data);
1637                 kfree(t);
1638         }
1639         kfree(info->psinfo.data);
1640 }
1641
1642 #else
1643
1644 /* Here is the structure in which status of each thread is captured. */
1645 struct elf_thread_status
1646 {
1647         struct list_head list;
1648         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1649         elf_fpregset_t fpu;             /* NT_PRFPREG */
1650         struct task_struct *thread;
1651 #ifdef ELF_CORE_COPY_XFPREGS
1652         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1653 #endif
1654         struct memelfnote notes[3];
1655         int num_notes;
1656 };
1657
1658 /*
1659  * In order to add the specific thread information for the elf file format,
1660  * we need to keep a linked list of every threads pr_status and then create
1661  * a single section for them in the final core file.
1662  */
1663 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1664 {
1665         int sz = 0;
1666         struct task_struct *p = t->thread;
1667         t->num_notes = 0;
1668
1669         fill_prstatus(&t->prstatus, p, signr);
1670         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1671         
1672         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1673                   &(t->prstatus));
1674         t->num_notes++;
1675         sz += notesize(&t->notes[0]);
1676
1677         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1678                                                                 &t->fpu))) {
1679                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1680                           &(t->fpu));
1681                 t->num_notes++;
1682                 sz += notesize(&t->notes[1]);
1683         }
1684
1685 #ifdef ELF_CORE_COPY_XFPREGS
1686         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1687                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1688                           sizeof(t->xfpu), &t->xfpu);
1689                 t->num_notes++;
1690                 sz += notesize(&t->notes[2]);
1691         }
1692 #endif  
1693         return sz;
1694 }
1695
1696 struct elf_note_info {
1697         struct memelfnote *notes;
1698         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1699         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1700         struct list_head thread_list;
1701         elf_fpregset_t *fpu;
1702 #ifdef ELF_CORE_COPY_XFPREGS
1703         elf_fpxregset_t *xfpu;
1704 #endif
1705         user_siginfo_t csigdata;
1706         int thread_status_size;
1707         int numnote;
1708 };
1709
1710 static int elf_note_info_init(struct elf_note_info *info)
1711 {
1712         memset(info, 0, sizeof(*info));
1713         INIT_LIST_HEAD(&info->thread_list);
1714
1715         /* Allocate space for ELF notes */
1716         info->notes = kmalloc(7 * sizeof(struct memelfnote), GFP_KERNEL);
1717         if (!info->notes)
1718                 return 0;
1719         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1720         if (!info->psinfo)
1721                 return 0;
1722         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1723         if (!info->prstatus)
1724                 return 0;
1725         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1726         if (!info->fpu)
1727                 return 0;
1728 #ifdef ELF_CORE_COPY_XFPREGS
1729         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1730         if (!info->xfpu)
1731                 return 0;
1732 #endif
1733         return 1;
1734 }
1735
1736 static int fill_note_info(struct elfhdr *elf, int phdrs,
1737                           struct elf_note_info *info,
1738                           siginfo_t *siginfo, struct pt_regs *regs)
1739 {
1740         struct list_head *t;
1741
1742         if (!elf_note_info_init(info))
1743                 return 0;
1744
1745         if (siginfo->si_signo) {
1746                 struct core_thread *ct;
1747                 struct elf_thread_status *ets;
1748
1749                 for (ct = current->mm->core_state->dumper.next;
1750                                                 ct; ct = ct->next) {
1751                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1752                         if (!ets)
1753                                 return 0;
1754
1755                         ets->thread = ct->task;
1756                         list_add(&ets->list, &info->thread_list);
1757                 }
1758
1759                 list_for_each(t, &info->thread_list) {
1760                         int sz;
1761
1762                         ets = list_entry(t, struct elf_thread_status, list);
1763                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1764                         info->thread_status_size += sz;
1765                 }
1766         }
1767         /* now collect the dump for the current */
1768         memset(info->prstatus, 0, sizeof(*info->prstatus));
1769         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1770         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1771
1772         /* Set up header */
1773         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1774
1775         /*
1776          * Set up the notes in similar form to SVR4 core dumps made
1777          * with info from their /proc.
1778          */
1779
1780         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1781                   sizeof(*info->prstatus), info->prstatus);
1782         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1783         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1784                   sizeof(*info->psinfo), info->psinfo);
1785
1786         info->numnote = 2;
1787
1788         fill_siginfo_note(&info->notes[info->numnote++], &info->csigdata, siginfo);
1789         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1790
1791         /* Try to dump the FPU. */
1792         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1793                                                                info->fpu);
1794         if (info->prstatus->pr_fpvalid)
1795                 fill_note(info->notes + info->numnote++,
1796                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1797 #ifdef ELF_CORE_COPY_XFPREGS
1798         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1799                 fill_note(info->notes + info->numnote++,
1800                           "LINUX", ELF_CORE_XFPREG_TYPE,
1801                           sizeof(*info->xfpu), info->xfpu);
1802 #endif
1803
1804         return 1;
1805 }
1806
1807 static size_t get_note_info_size(struct elf_note_info *info)
1808 {
1809         int sz = 0;
1810         int i;
1811
1812         for (i = 0; i < info->numnote; i++)
1813                 sz += notesize(info->notes + i);
1814
1815         sz += info->thread_status_size;
1816
1817         return sz;
1818 }
1819
1820 static int write_note_info(struct elf_note_info *info,
1821                            struct file *file, loff_t *foffset)
1822 {
1823         int i;
1824         struct list_head *t;
1825
1826         for (i = 0; i < info->numnote; i++)
1827                 if (!writenote(info->notes + i, file, foffset))
1828                         return 0;
1829
1830         /* write out the thread status notes section */
1831         list_for_each(t, &info->thread_list) {
1832                 struct elf_thread_status *tmp =
1833                                 list_entry(t, struct elf_thread_status, list);
1834
1835                 for (i = 0; i < tmp->num_notes; i++)
1836                         if (!writenote(&tmp->notes[i], file, foffset))
1837                                 return 0;
1838         }
1839
1840         return 1;
1841 }
1842
1843 static void free_note_info(struct elf_note_info *info)
1844 {
1845         while (!list_empty(&info->thread_list)) {
1846                 struct list_head *tmp = info->thread_list.next;
1847                 list_del(tmp);
1848                 kfree(list_entry(tmp, struct elf_thread_status, list));
1849         }
1850
1851         kfree(info->prstatus);
1852         kfree(info->psinfo);
1853         kfree(info->notes);
1854         kfree(info->fpu);
1855 #ifdef ELF_CORE_COPY_XFPREGS
1856         kfree(info->xfpu);
1857 #endif
1858 }
1859
1860 #endif
1861
1862 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1863                                         struct vm_area_struct *gate_vma)
1864 {
1865         struct vm_area_struct *ret = tsk->mm->mmap;
1866
1867         if (ret)
1868                 return ret;
1869         return gate_vma;
1870 }
1871 /*
1872  * Helper function for iterating across a vma list.  It ensures that the caller
1873  * will visit `gate_vma' prior to terminating the search.
1874  */
1875 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1876                                         struct vm_area_struct *gate_vma)
1877 {
1878         struct vm_area_struct *ret;
1879
1880         ret = this_vma->vm_next;
1881         if (ret)
1882                 return ret;
1883         if (this_vma == gate_vma)
1884                 return NULL;
1885         return gate_vma;
1886 }
1887
1888 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1889                              elf_addr_t e_shoff, int segs)
1890 {
1891         elf->e_shoff = e_shoff;
1892         elf->e_shentsize = sizeof(*shdr4extnum);
1893         elf->e_shnum = 1;
1894         elf->e_shstrndx = SHN_UNDEF;
1895
1896         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1897
1898         shdr4extnum->sh_type = SHT_NULL;
1899         shdr4extnum->sh_size = elf->e_shnum;
1900         shdr4extnum->sh_link = elf->e_shstrndx;
1901         shdr4extnum->sh_info = segs;
1902 }
1903
1904 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1905                                      unsigned long mm_flags)
1906 {
1907         struct vm_area_struct *vma;
1908         size_t size = 0;
1909
1910         for (vma = first_vma(current, gate_vma); vma != NULL;
1911              vma = next_vma(vma, gate_vma))
1912                 size += vma_dump_size(vma, mm_flags);
1913         return size;
1914 }
1915
1916 /*
1917  * Actual dumper
1918  *
1919  * This is a two-pass process; first we find the offsets of the bits,
1920  * and then they are actually written out.  If we run out of core limit
1921  * we just truncate.
1922  */
1923 static int elf_core_dump(struct coredump_params *cprm)
1924 {
1925         int has_dumped = 0;
1926         mm_segment_t fs;
1927         int segs;
1928         size_t size = 0;
1929         struct vm_area_struct *vma, *gate_vma;
1930         struct elfhdr *elf = NULL;
1931         loff_t offset = 0, dataoff, foffset;
1932         struct elf_note_info info;
1933         struct elf_phdr *phdr4note = NULL;
1934         struct elf_shdr *shdr4extnum = NULL;
1935         Elf_Half e_phnum;
1936         elf_addr_t e_shoff;
1937
1938         /*
1939          * We no longer stop all VM operations.
1940          * 
1941          * This is because those proceses that could possibly change map_count
1942          * or the mmap / vma pages are now blocked in do_exit on current
1943          * finishing this core dump.
1944          *
1945          * Only ptrace can touch these memory addresses, but it doesn't change
1946          * the map_count or the pages allocated. So no possibility of crashing
1947          * exists while dumping the mm->vm_next areas to the core file.
1948          */
1949   
1950         /* alloc memory for large data structures: too large to be on stack */
1951         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1952         if (!elf)
1953                 goto out;
1954         /*
1955          * The number of segs are recored into ELF header as 16bit value.
1956          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1957          */
1958         segs = current->mm->map_count;
1959         segs += elf_core_extra_phdrs();
1960
1961         gate_vma = get_gate_vma(current->mm);
1962         if (gate_vma != NULL)
1963                 segs++;
1964
1965         /* for notes section */
1966         segs++;
1967
1968         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1969          * this, kernel supports extended numbering. Have a look at
1970          * include/linux/elf.h for further information. */
1971         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1972
1973         /*
1974          * Collect all the non-memory information about the process for the
1975          * notes.  This also sets up the file header.
1976          */
1977         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
1978                 goto cleanup;
1979
1980         has_dumped = 1;
1981         current->flags |= PF_DUMPCORE;
1982   
1983         fs = get_fs();
1984         set_fs(KERNEL_DS);
1985
1986         offset += sizeof(*elf);                         /* Elf header */
1987         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1988         foffset = offset;
1989
1990         /* Write notes phdr entry */
1991         {
1992                 size_t sz = get_note_info_size(&info);
1993
1994                 sz += elf_coredump_extra_notes_size();
1995
1996                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1997                 if (!phdr4note)
1998                         goto end_coredump;
1999
2000                 fill_elf_note_phdr(phdr4note, sz, offset);
2001                 offset += sz;
2002         }
2003
2004         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2005
2006         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2007         offset += elf_core_extra_data_size();
2008         e_shoff = offset;
2009
2010         if (e_phnum == PN_XNUM) {
2011                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2012                 if (!shdr4extnum)
2013                         goto end_coredump;
2014                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2015         }
2016
2017         offset = dataoff;
2018
2019         size += sizeof(*elf);
2020         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2021                 goto end_coredump;
2022
2023         size += sizeof(*phdr4note);
2024         if (size > cprm->limit
2025             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2026                 goto end_coredump;
2027
2028         /* Write program headers for segments dump */
2029         for (vma = first_vma(current, gate_vma); vma != NULL;
2030                         vma = next_vma(vma, gate_vma)) {
2031                 struct elf_phdr phdr;
2032
2033                 phdr.p_type = PT_LOAD;
2034                 phdr.p_offset = offset;
2035                 phdr.p_vaddr = vma->vm_start;
2036                 phdr.p_paddr = 0;
2037                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2038                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2039                 offset += phdr.p_filesz;
2040                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2041                 if (vma->vm_flags & VM_WRITE)
2042                         phdr.p_flags |= PF_W;
2043                 if (vma->vm_flags & VM_EXEC)
2044                         phdr.p_flags |= PF_X;
2045                 phdr.p_align = ELF_EXEC_PAGESIZE;
2046
2047                 size += sizeof(phdr);
2048                 if (size > cprm->limit
2049                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2050                         goto end_coredump;
2051         }
2052
2053         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2054                 goto end_coredump;
2055
2056         /* write out the notes section */
2057         if (!write_note_info(&info, cprm->file, &foffset))
2058                 goto end_coredump;
2059
2060         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2061                 goto end_coredump;
2062
2063         /* Align to page */
2064         if (!dump_seek(cprm->file, dataoff - foffset))
2065                 goto end_coredump;
2066
2067         for (vma = first_vma(current, gate_vma); vma != NULL;
2068                         vma = next_vma(vma, gate_vma)) {
2069                 unsigned long addr;
2070                 unsigned long end;
2071
2072                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2073
2074                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2075                         struct page *page;
2076                         int stop;
2077
2078                         page = get_dump_page(addr);
2079                         if (page) {
2080                                 void *kaddr = kmap(page);
2081                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2082                                         !dump_write(cprm->file, kaddr,
2083                                                     PAGE_SIZE);
2084                                 kunmap(page);
2085                                 page_cache_release(page);
2086                         } else
2087                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2088                         if (stop)
2089                                 goto end_coredump;
2090                 }
2091         }
2092
2093         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2094                 goto end_coredump;
2095
2096         if (e_phnum == PN_XNUM) {
2097                 size += sizeof(*shdr4extnum);
2098                 if (size > cprm->limit
2099                     || !dump_write(cprm->file, shdr4extnum,
2100                                    sizeof(*shdr4extnum)))
2101                         goto end_coredump;
2102         }
2103
2104 end_coredump:
2105         set_fs(fs);
2106
2107 cleanup:
2108         free_note_info(&info);
2109         kfree(shdr4extnum);
2110         kfree(phdr4note);
2111         kfree(elf);
2112 out:
2113         return has_dumped;
2114 }
2115
2116 #endif          /* CONFIG_ELF_CORE */
2117
2118 static int __init init_elf_binfmt(void)
2119 {
2120         register_binfmt(&elf_format);
2121         return 0;
2122 }
2123
2124 static void __exit exit_elf_binfmt(void)
2125 {
2126         /* Remove the COFF and ELF loaders. */
2127         unregister_binfmt(&elf_format);
2128 }
2129
2130 core_initcall(init_elf_binfmt);
2131 module_exit(exit_elf_binfmt);
2132 MODULE_LICENSE("GPL");