Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
authorAlex Shi <alex.shi@linaro.org>
Tue, 11 Oct 2016 21:33:37 +0000 (23:33 +0200)
committerAlex Shi <alex.shi@linaro.org>
Tue, 11 Oct 2016 21:33:37 +0000 (23:33 +0200)
Conflicts:
kernel/cpuset.c

15 files changed:
1  2 
arch/x86/include/asm/uaccess.h
drivers/md/dm-crypt.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/proc/base.c
include/linux/mm.h
kernel/cpuset.c
kernel/fork.c
kernel/sched/core.c
kernel/trace/Makefile
kernel/trace/trace.c
net/ipv4/tcp_ipv4.c
net/ipv6/addrconf.c
net/ipv6/ping.c
net/ipv6/tcp_ipv6.c

index dbe64f27280e34138dc5163b587579db35d3c768,d2fc24560260c2b38cd4037128271656b98230fa..b8ff6aba09604ddf198a0f9a3f21ab80d64154a6
@@@ -405,7 -405,11 +405,11 @@@ do {                                                                     
  #define __get_user_asm_ex(x, addr, itype, rtype, ltype)                       \
        asm volatile("1:        mov"itype" %1,%"rtype"0\n"              \
                     "2:\n"                                             \
-                    _ASM_EXTABLE_EX(1b, 2b)                            \
+                    ".section .fixup,\"ax\"\n"                         \
+                      "3:xor"itype" %"rtype"0,%"rtype"0\n"             \
+                    "  jmp 2b\n"                                       \
+                    ".previous\n"                                      \
+                    _ASM_EXTABLE_EX(1b, 3b)                            \
                     : ltype(x) : "m" (__m(addr)))
  
  #define __put_user_nocheck(x, ptr, size)                      \
@@@ -706,7 -710,7 +710,7 @@@ __copy_from_user_overflow(int size, uns
  
  #endif
  
 -static inline unsigned long __must_check
 +static __always_inline unsigned long __must_check
  copy_from_user(void *to, const void __user *from, unsigned long n)
  {
        int sz = __compiletime_object_size(to);
        return n;
  }
  
 -static inline unsigned long __must_check
 +static __always_inline unsigned long __must_check
  copy_to_user(void __user *to, const void *from, unsigned long n)
  {
        int sz = __compiletime_object_size(from);
diff --combined drivers/md/dm-crypt.c
index e85bcae50f65f68c2035efcbb328006d1bf45bb7,51eda7235e32095557e968cb2517db13d91d3037..e6a0bcbe8fd9b167e2ac3e000055ba8d0f868c59
@@@ -1864,24 -1864,16 +1864,24 @@@ static int crypt_ctr(struct dm_target *
        }
  
        ret = -ENOMEM;
 -      cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1);
 +      cc->io_queue = alloc_workqueue("kcryptd_io",
 +                                     WQ_HIGHPRI |
 +                                     WQ_MEM_RECLAIM,
 +                                     1);
        if (!cc->io_queue) {
                ti->error = "Couldn't create kcryptd io queue";
                goto bad;
        }
  
        if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
 -              cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
 +              cc->crypt_queue = alloc_workqueue("kcryptd",
 +                                                WQ_HIGHPRI |
 +                                                WQ_MEM_RECLAIM, 1);
        else
 -              cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
 +              cc->crypt_queue = alloc_workqueue("kcryptd",
 +                                                WQ_HIGHPRI |
 +                                                WQ_MEM_RECLAIM |
 +                                                WQ_UNBOUND,
                                                  num_online_cpus());
        if (!cc->crypt_queue) {
                ti->error = "Couldn't create kcryptd queue";
@@@ -1928,6 -1920,13 +1928,13 @@@ static int crypt_map(struct dm_target *
                return DM_MAPIO_REMAPPED;
        }
  
+       /*
+        * Check if bio is too large, split as needed.
+        */
+       if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
+           bio_data_dir(bio) == WRITE)
+               dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
        io = dm_per_bio_data(bio, cc->per_bio_data_size);
        crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
        io->ctx.req = (struct ablkcipher_request *)(io + 1);
diff --combined fs/ext4/ioctl.c
index 95315b1f4b71c58458f0a6fa2b4d5ae2059e06a5,1fb12f9c97a6b70467700c03327f442a1a273481..7e974878d9a9ca1774acf59059bf9cd88ebf890e
@@@ -587,13 -587,11 +587,13 @@@ resizefs_out
                return err;
        }
  
 +      case FIDTRIM:
        case FITRIM:
        {
                struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 +              int flags  = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
  
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (!blk_queue_discard(q))
                        return -EOPNOTSUPP;
  
 +              if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
 +                      return -EOPNOTSUPP;
                if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                    sizeof(range)))
                        return -EFAULT;
  
                range.minlen = max((unsigned int)range.minlen,
                                   q->limits.discard_granularity);
 -              ret = ext4_trim_fs(sb, &range);
 +              ret = ext4_trim_fs(sb, &range, flags);
                if (ret < 0)
                        return ret;
  
                        goto encryption_policy_out;
                }
  
+               err = mnt_want_write_file(filp);
+               if (err)
+                       goto encryption_policy_out;
                err = ext4_process_policy(&policy, inode);
+               mnt_drop_write_file(filp);
  encryption_policy_out:
                return err;
  #else
diff --combined fs/ext4/mballoc.c
index a0daca4b127b6e4d356d8d536a8d44c1f7afa984,3c7f0c44cfb361c85127a7c4593ba41ee3e48a3e..0b1c97875686bbe2732c0dbb9cb2a131c4d52c80
@@@ -815,7 -815,7 +815,7 @@@ static void mb_regenerate_buddy(struct 
   * for this page; do not hold this lock when calling this routine!
   */
  
- static int ext4_mb_init_cache(struct page *page, char *incore)
+ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
  {
        ext4_group_t ngroups;
        int blocksize;
        /* allocate buffer_heads to read bitmaps */
        if (groups_per_page > 1) {
                i = sizeof(struct buffer_head *) * groups_per_page;
-               bh = kzalloc(i, GFP_NOFS);
+               bh = kzalloc(i, gfp);
                if (bh == NULL) {
                        err = -ENOMEM;
                        goto out;
@@@ -983,7 -983,7 +983,7 @@@ out
   * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
   */
  static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
-               ext4_group_t group, struct ext4_buddy *e4b)
+               ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
  {
        struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
        int block, pnum, poff;
        block = group * 2;
        pnum = block / blocks_per_page;
        poff = block % blocks_per_page;
-       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, pnum, gfp);
        if (!page)
                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
  
        block++;
        pnum = block / blocks_per_page;
-       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, pnum, gfp);
        if (!page)
                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
@@@ -1042,7 -1042,7 +1042,7 @@@ static void ext4_mb_put_buddy_page_lock
   * calling this routine!
   */
  static noinline_for_stack
- int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
  {
  
        struct ext4_group_info *this_grp;
         * The call to ext4_mb_get_buddy_page_lock will mark the
         * page accessed.
         */
-       ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+       ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
        if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
                /*
                 * somebody initialized the group
        }
  
        page = e4b.bd_bitmap_page;
-       ret = ext4_mb_init_cache(page, NULL);
+       ret = ext4_mb_init_cache(page, NULL, gfp);
        if (ret)
                goto err;
        if (!PageUptodate(page)) {
        }
        /* init buddy cache */
        page = e4b.bd_buddy_page;
-       ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+       ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
        if (ret)
                goto err;
        if (!PageUptodate(page)) {
@@@ -1109,8 -1109,8 +1109,8 @@@ err
   * calling this routine!
   */
  static noinline_for_stack int
- ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-                                       struct ext4_buddy *e4b)
+ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+                      struct ext4_buddy *e4b, gfp_t gfp)
  {
        int blocks_per_page;
        int block;
                 * we need full data about the group
                 * to make a good selection
                 */
-               ret = ext4_mb_init_group(sb, group);
+               ret = ext4_mb_init_group(sb, group, gfp);
                if (ret)
                        return ret;
        }
                         * wait for it to initialize.
                         */
                        page_cache_release(page);
-               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, pnum, gfp);
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
                        if (!PageUptodate(page)) {
-                               ret = ext4_mb_init_cache(page, NULL);
+                               ret = ext4_mb_init_cache(page, NULL, gfp);
                                if (ret) {
                                        unlock_page(page);
                                        goto err;
        if (page == NULL || !PageUptodate(page)) {
                if (page)
                        page_cache_release(page);
-               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, pnum, gfp);
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
                        if (!PageUptodate(page)) {
-                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+                                                        gfp);
                                if (ret) {
                                        unlock_page(page);
                                        goto err;
        return ret;
  }
  
+ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                             struct ext4_buddy *e4b)
+ {
+       return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+ }
  static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
  {
        if (e4b->bd_bitmap_page)
@@@ -2047,7 -2054,7 +2054,7 @@@ static int ext4_mb_good_group(struct ex
  
        /* We only do this if the grp has never been initialized */
        if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-               int ret = ext4_mb_init_group(ac->ac_sb, group);
+               int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
                if (ret)
                        return ret;
        }
@@@ -2763,8 -2770,7 +2770,8 @@@ int ext4_mb_release(struct super_block 
  }
  
  static inline int ext4_issue_discard(struct super_block *sb,
 -              ext4_group_t block_group, ext4_grpblk_t cluster, int count)
 +              ext4_group_t block_group, ext4_grpblk_t cluster, int count,
 +              unsigned long flags)
  {
        ext4_fsblk_t discard_block;
  
        count = EXT4_C2B(EXT4_SB(sb), count);
        trace_ext4_discard_blocks(sb,
                        (unsigned long long) discard_block, count);
 -      return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 +      return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
  }
  
  /*
@@@ -2795,7 -2801,7 +2802,7 @@@ static void ext4_free_data_callback(str
        if (test_opt(sb, DISCARD)) {
                err = ext4_issue_discard(sb, entry->efd_group,
                                         entry->efd_start_cluster,
 -                                       entry->efd_count);
 +                                       entry->efd_count, 0);
                if (err && err != -EOPNOTSUPP)
                        ext4_msg(sb, KERN_WARNING, "discard request in"
                                 " group:%d block:%d count:%d failed"
@@@ -4809,7 -4815,9 +4816,9 @@@ do_more
  #endif
        trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
  
-       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+       err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+                                    GFP_NOFS|__GFP_NOFAIL);
        if (err)
                goto error_return;
  
                 * them with group lock_held
                 */
                if (test_opt(sb, DISCARD)) {
 -                      err = ext4_issue_discard(sb, block_group, bit, count);
 +                      err = ext4_issue_discard(sb, block_group, bit, count,
 +                                               0);
                        if (err && err != -EOPNOTSUPP)
                                ext4_msg(sb, KERN_WARNING, "discard request in"
                                         " group:%d block:%d count:%lu failed"
@@@ -5035,15 -5042,13 +5044,15 @@@ error_return
   * @count:    number of blocks to TRIM
   * @group:    alloc. group we are working with
   * @e4b:      ext4 buddy for the group
 + * @blkdev_flags: flags for the block device
   *
   * Trim "count" blocks starting at "start" in the "group". To assure that no
   * one will allocate those blocks, mark it as used in buddy bitmap. This must
   * be called with under the group lock.
   */
  static int ext4_trim_extent(struct super_block *sb, int start, int count,
 -                           ext4_group_t group, struct ext4_buddy *e4b)
 +                          ext4_group_t group, struct ext4_buddy *e4b,
 +                          unsigned long blkdev_flags)
  __releases(bitlock)
  __acquires(bitlock)
  {
         */
        mb_mark_used(e4b, &ex);
        ext4_unlock_group(sb, group);
 -      ret = ext4_issue_discard(sb, group, start, count);
 +      ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
        ext4_lock_group(sb, group);
        mb_free_blocks(NULL, e4b, start, ex.fe_len);
        return ret;
   * @start:            first group block to examine
   * @max:              last group block to examine
   * @minblocks:                minimum extent block count
 + * @blkdev_flags:     flags for the block device
   *
   * ext4_trim_all_free walks through group's buddy bitmap searching for free
   * extents. When the free block is found, ext4_trim_extent is called to TRIM
  static ext4_grpblk_t
  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
                   ext4_grpblk_t start, ext4_grpblk_t max,
 -                 ext4_grpblk_t minblocks)
 +                 ext4_grpblk_t minblocks, unsigned long blkdev_flags)
  {
        void *bitmap;
        ext4_grpblk_t next, count = 0, free_count = 0;
  
                if ((next - start) >= minblocks) {
                        ret = ext4_trim_extent(sb, start,
 -                                             next - start, group, &e4b);
 +                                             next - start, group, &e4b,
 +                                             blkdev_flags);
                        if (ret && ret != -EOPNOTSUPP)
                                break;
                        ret = 0;
@@@ -5168,7 -5171,6 +5177,7 @@@ out
   * ext4_trim_fs() -- trim ioctl handle function
   * @sb:                       superblock for filesystem
   * @range:            fstrim_range structure
 + * @blkdev_flags:     flags for the block device
   *
   * start:     First Byte to trim
   * len:               number of Bytes to trim from start
   * start to start+len. For each such a group ext4_trim_all_free function
   * is invoked to trim all free space.
   */
 -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
 +                      unsigned long blkdev_flags)
  {
        struct ext4_group_info *grp;
        ext4_group_t group, first_group, last_group;
                grp = ext4_get_group_info(sb, group);
                /* We only do this if the grp has never been initialized */
                if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-                       ret = ext4_mb_init_group(sb, group);
+                       ret = ext4_mb_init_group(sb, group, GFP_NOFS);
                        if (ret)
                                break;
                }
  
                if (grp->bb_free >= minlen) {
                        cnt = ext4_trim_all_free(sb, group, first_cluster,
 -                                              end, minlen);
 +                                              end, minlen, blkdev_flags);
                        if (cnt < 0) {
                                ret = cnt;
                                break;
diff --combined fs/proc/base.c
index df715a09532833e2897da2487c9402358e9be087,d2b8c754f627f55b7de6b4af51ef407a869c5276..0c9ea52ab3995829bd4b076bc6eba82297669ec8
@@@ -1545,18 -1545,13 +1545,13 @@@ static const struct file_operations pro
  static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
  {
        struct task_struct *task;
-       struct mm_struct *mm;
        struct file *exe_file;
  
        task = get_proc_task(d_inode(dentry));
        if (!task)
                return -ENOENT;
-       mm = get_task_mm(task);
+       exe_file = get_task_exe_file(task);
        put_task_struct(task);
-       if (!mm)
-               return -ENOENT;
-       exe_file = get_mm_exe_file(mm);
-       mmput(mm);
        if (exe_file) {
                *exe_path = exe_file->f_path;
                path_get(&exe_file->f_path);
@@@ -2245,92 -2240,6 +2240,92 @@@ static const struct file_operations pro
        .release        = seq_release_private,
  };
  
 +static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
 +                                      size_t count, loff_t *offset)
 +{
 +      struct inode *inode = file_inode(file);
 +      struct task_struct *p;
 +      u64 slack_ns;
 +      int err;
 +
 +      err = kstrtoull_from_user(buf, count, 10, &slack_ns);
 +      if (err < 0)
 +              return err;
 +
 +      p = get_proc_task(inode);
 +      if (!p)
 +              return -ESRCH;
 +
 +      if (p != current) {
 +              if (!capable(CAP_SYS_NICE)) {
 +                      count = -EPERM;
 +                      goto out;
 +              }
 +
 +              err = security_task_setscheduler(p);
 +              if (err) {
 +                      count = err;
 +                      goto out;
 +              }
 +      }
 +
 +      task_lock(p);
 +      if (slack_ns == 0)
 +              p->timer_slack_ns = p->default_timer_slack_ns;
 +      else
 +              p->timer_slack_ns = slack_ns;
 +      task_unlock(p);
 +
 +out:
 +      put_task_struct(p);
 +
 +      return count;
 +}
 +
 +static int timerslack_ns_show(struct seq_file *m, void *v)
 +{
 +      struct inode *inode = m->private;
 +      struct task_struct *p;
 +      int err = 0;
 +
 +      p = get_proc_task(inode);
 +      if (!p)
 +              return -ESRCH;
 +
 +      if (p != current) {
 +
 +              if (!capable(CAP_SYS_NICE)) {
 +                      err = -EPERM;
 +                      goto out;
 +              }
 +              err = security_task_getscheduler(p);
 +              if (err)
 +                      goto out;
 +      }
 +
 +      task_lock(p);
 +      seq_printf(m, "%llu\n", p->timer_slack_ns);
 +      task_unlock(p);
 +
 +out:
 +      put_task_struct(p);
 +
 +      return err;
 +}
 +
 +static int timerslack_ns_open(struct inode *inode, struct file *filp)
 +{
 +      return single_open(filp, timerslack_ns_show, inode);
 +}
 +
 +static const struct file_operations proc_pid_set_timerslack_ns_operations = {
 +      .open           = timerslack_ns_open,
 +      .read           = seq_read,
 +      .write          = timerslack_ns_write,
 +      .llseek         = seq_lseek,
 +      .release        = single_release,
 +};
 +
  static int proc_pident_instantiate(struct inode *dir,
        struct dentry *dentry, struct task_struct *task, const void *ptr)
  {
@@@ -2881,8 -2790,8 +2876,8 @@@ static const struct pid_entry tgid_base
        ONE("cgroup",  S_IRUGO, proc_cgroup_show),
  #endif
        ONE("oom_score",  S_IRUGO, proc_oom_score),
 -      REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 -      REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 +      REG("oom_adj",    S_IRUSR, proc_oom_adj_operations),
 +      REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
  #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
  #ifdef CONFIG_CHECKPOINT_RESTORE
        REG("timers",     S_IRUGO, proc_timers_operations),
  #endif
 +      REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
  };
  
  static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@@ -3165,44 -3073,6 +3160,44 @@@ int proc_pid_readdir(struct file *file
        return 0;
  }
  
 +/*
 + * proc_tid_comm_permission is a special permission function exclusively
 + * used for the node /proc/<pid>/task/<tid>/comm.
 + * It bypasses generic permission checks in the case where a task of the same
 + * task group attempts to access the node.
 + * The rational behind this is that glibc and bionic access this node for
 + * cross thread naming (pthread_set/getname_np(!self)). However, if
 + * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
 + * which locks out the cross thread naming implementation.
 + * This function makes sure that the node is always accessible for members of
 + * same thread group.
 + */
 +static int proc_tid_comm_permission(struct inode *inode, int mask)
 +{
 +      bool is_same_tgroup;
 +      struct task_struct *task;
 +
 +      task = get_proc_task(inode);
 +      if (!task)
 +              return -ESRCH;
 +      is_same_tgroup = same_thread_group(current, task);
 +      put_task_struct(task);
 +
 +      if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
 +              /* This file (/proc/<pid>/task/<tid>/comm) can always be
 +               * read or written by the members of the corresponding
 +               * thread group.
 +               */
 +              return 0;
 +      }
 +
 +      return generic_permission(inode, mask);
 +}
 +
 +static const struct inode_operations proc_tid_comm_inode_operations = {
 +              .permission = proc_tid_comm_permission,
 +};
 +
  /*
   * Tasks
   */
@@@ -3221,9 -3091,7 +3216,9 @@@ static const struct pid_entry tid_base_
  #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
  #endif
 -      REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 +      NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
 +                       &proc_tid_comm_inode_operations,
 +                       &proc_pid_set_comm_operations, {}),
  #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
        ONE("syscall",   S_IRUSR, proc_pid_syscall),
  #endif
        ONE("cgroup",  S_IRUGO, proc_cgroup_show),
  #endif
        ONE("oom_score", S_IRUGO, proc_oom_score),
 -      REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 -      REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 +      REG("oom_adj",   S_IRUSR, proc_oom_adj_operations),
 +      REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
  #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
diff --combined include/linux/mm.h
index 3ea86204a32bdb51b574ed27d4954c24f2492516,cfebb742ee18e021d1c6ff3a4037f2226231c395..b009ab1a69a0023ff7569ede1376f927ce7a9cb3
@@@ -51,17 -51,6 +51,17 @@@ extern int sysctl_legacy_va_layout
  #define sysctl_legacy_va_layout 0
  #endif
  
 +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
 +extern const int mmap_rnd_bits_min;
 +extern const int mmap_rnd_bits_max;
 +extern int mmap_rnd_bits __read_mostly;
 +#endif
 +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
 +extern const int mmap_rnd_compat_bits_min;
 +extern const int mmap_rnd_compat_bits_max;
 +extern int mmap_rnd_compat_bits __read_mostly;
 +#endif
 +
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/processor.h>
@@@ -1070,7 -1059,6 +1070,7 @@@ extern void pagefault_out_of_memory(voi
  extern void show_free_areas(unsigned int flags);
  extern bool skip_free_areas_node(unsigned int flags, int nid);
  
 +void shmem_set_file(struct vm_area_struct *vma, struct file *file);
  int shmem_zero_setup(struct vm_area_struct *);
  #ifdef CONFIG_SHMEM
  bool shmem_mapping(struct address_space *mapping);
@@@ -1878,7 -1866,7 +1878,7 @@@ extern int vma_adjust(struct vm_area_st
  extern struct vm_area_struct *vma_merge(struct mm_struct *,
        struct vm_area_struct *prev, unsigned long addr, unsigned long end,
        unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 -      struct mempolicy *, struct vm_userfaultfd_ctx);
 +      struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *);
  extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
  extern int split_vma(struct mm_struct *,
        struct vm_area_struct *, unsigned long addr, int new_below);
@@@ -1910,6 -1898,7 +1910,7 @@@ extern void mm_drop_all_locks(struct mm
  
  extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
  extern struct file *get_mm_exe_file(struct mm_struct *mm);
+ extern struct file *get_task_exe_file(struct task_struct *task);
  
  extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
  extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
diff --combined kernel/cpuset.c
index e2e294d997e0c9b8b5f26b5626649c984ffa44db,b9279a2844d81656115195235df679ff2b2291e0..e5bd044c1cbb4a412f7dedfcb8da29ac80f591b7
@@@ -98,7 -98,6 +98,7 @@@ struct cpuset 
  
        /* user-configured CPUs and Memory Nodes allow to tasks */
        cpumask_var_t cpus_allowed;
 +      cpumask_var_t cpus_requested;
        nodemask_t mems_allowed;
  
        /* effective CPUs and Memory Nodes allow to tasks */
@@@ -387,7 -386,7 +387,7 @@@ static void cpuset_update_task_spread_f
  
  static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  {
 -      return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
 +      return  cpumask_subset(p->cpus_requested, q->cpus_requested) &&
                nodes_subset(p->mems_allowed, q->mems_allowed) &&
                is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                is_mem_exclusive(p) <= is_mem_exclusive(q);
@@@ -487,7 -486,7 +487,7 @@@ static int validate_change(struct cpuse
        cpuset_for_each_child(c, css, par) {
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur &&
 -                  cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
 +                  cpumask_intersects(trial->cpus_requested, c->cpus_requested))
                        goto out;
                if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                    c != cur &&
@@@ -946,18 -945,17 +946,18 @@@ static int update_cpumask(struct cpuse
        if (!*buf) {
                cpumask_clear(trialcs->cpus_allowed);
        } else {
 -              retval = cpulist_parse(buf, trialcs->cpus_allowed);
 +              retval = cpulist_parse(buf, trialcs->cpus_requested);
                if (retval < 0)
                        return retval;
  
 -              if (!cpumask_subset(trialcs->cpus_allowed,
 -                                  top_cpuset.cpus_allowed))
 +              if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
                        return -EINVAL;
 +
 +              cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
        }
  
        /* Nothing to do if the cpus didn't change */
 -      if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
 +      if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
                return 0;
  
        retval = validate_change(cs, trialcs);
  
        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
 +      cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
        spin_unlock_irq(&callback_lock);
  
        /* use trialcs->cpus_allowed as a temp variable */
@@@ -1757,7 -1754,7 +1757,7 @@@ static int cpuset_common_seq_show(struc
  
        switch (type) {
        case FILE_CPULIST:
 -              seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
 +              seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
                break;
        case FILE_MEMLIST:
                seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@@ -1946,14 -1943,11 +1946,14 @@@ cpuset_css_alloc(struct cgroup_subsys_s
                return ERR_PTR(-ENOMEM);
        if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
                goto free_cs;
 +      if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
 +              goto free_allowed;
        if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
 -              goto free_cpus;
 +              goto free_requested;
  
        set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        cpumask_clear(cs->cpus_allowed);
 +      cpumask_clear(cs->cpus_requested);
        nodes_clear(cs->mems_allowed);
        cpumask_clear(cs->effective_cpus);
        nodes_clear(cs->effective_mems);
  
        return &cs->css;
  
 -free_cpus:
 +free_requested:
 +      free_cpumask_var(cs->cpus_requested);
 +free_allowed:
        free_cpumask_var(cs->cpus_allowed);
  free_cs:
        kfree(cs);
@@@ -2027,7 -2019,6 +2027,7 @@@ static int cpuset_css_online(struct cgr
        cs->mems_allowed = parent->mems_allowed;
        cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
 +      cpumask_copy(cs->cpus_requested, parent->cpus_requested);
        cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
        spin_unlock_irq(&callback_lock);
  out_unlock:
@@@ -2062,7 -2053,6 +2062,7 @@@ static void cpuset_css_free(struct cgro
  
        free_cpumask_var(cs->effective_cpus);
        free_cpumask_var(cs->cpus_allowed);
 +      free_cpumask_var(cs->cpus_requested);
        kfree(cs);
  }
  
@@@ -2084,34 -2074,31 +2084,49 @@@ static void cpuset_bind(struct cgroup_s
        mutex_unlock(&cpuset_mutex);
  }
  
 +static int cpuset_allow_attach(struct cgroup_taskset *tset)
 +{
 +      const struct cred *cred = current_cred(), *tcred;
 +      struct task_struct *task;
 +      struct cgroup_subsys_state *css;
 +
 +      cgroup_taskset_for_each(task, css, tset) {
 +              tcred = __task_cred(task);
 +
 +              if ((current != task) && !capable(CAP_SYS_ADMIN) &&
 +                   cred->euid.val != tcred->uid.val && cred->euid.val != tcred->suid.val)
 +                      return -EACCES;
 +      }
 +
 +      return 0;
 +}
 +
+ /*
+  * Make sure the new task conform to the current state of its parent,
+  * which could have been changed by cpuset just after it inherits the
+  * state from the parent and before it sits on the cgroup's task list.
+  */
+ void cpuset_fork(struct task_struct *task, void *priv)
+ {
+       if (task_css_is_root(task, cpuset_cgrp_id))
+               return;
+       set_cpus_allowed_ptr(task, &current->cpus_allowed);
+       task->mems_allowed = current->mems_allowed;
+ }
  struct cgroup_subsys cpuset_cgrp_subsys = {
        .css_alloc      = cpuset_css_alloc,
        .css_online     = cpuset_css_online,
        .css_offline    = cpuset_css_offline,
        .css_free       = cpuset_css_free,
        .can_attach     = cpuset_can_attach,
 +      .allow_attach   = cpuset_allow_attach,
        .cancel_attach  = cpuset_cancel_attach,
        .attach         = cpuset_attach,
        .post_attach    = cpuset_post_attach,
        .bind           = cpuset_bind,
+       .fork           = cpuset_fork,
        .legacy_cftypes = files,
        .early_init     = 1,
  };
@@@ -2130,11 -2117,8 +2145,11 @@@ int __init cpuset_init(void
                BUG();
        if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
                BUG();
 +      if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL))
 +              BUG();
  
        cpumask_setall(top_cpuset.cpus_allowed);
 +      cpumask_setall(top_cpuset.cpus_requested);
        nodes_setall(top_cpuset.mems_allowed);
        cpumask_setall(top_cpuset.effective_cpus);
        nodes_setall(top_cpuset.effective_mems);
@@@ -2268,7 -2252,7 +2283,7 @@@ retry
                goto retry;
        }
  
 -      cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
 +      cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
        nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
  
        cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
diff --combined kernel/fork.c
index d6a6da547e415427603294f41827215b71c56c20,8860d1f50d24a3039dc46bc95d5289817fb2241f..a1d163250909c7c33b4316dffdacda34d06a19c6
@@@ -763,6 -763,29 +763,29 @@@ struct file *get_mm_exe_file(struct mm_
  }
  EXPORT_SYMBOL(get_mm_exe_file);
  
+ /**
+  * get_task_exe_file - acquire a reference to the task's executable file
+  *
+  * Returns %NULL if task's mm (if any) has no associated executable file or
+  * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+  * User must release file via fput().
+  */
+ struct file *get_task_exe_file(struct task_struct *task)
+ {
+       struct file *exe_file = NULL;
+       struct mm_struct *mm;
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (!(task->flags & PF_KTHREAD))
+                       exe_file = get_mm_exe_file(mm);
+       }
+       task_unlock(task);
+       return exe_file;
+ }
+ EXPORT_SYMBOL(get_task_exe_file);
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
@@@ -800,8 -823,7 +823,8 @@@ struct mm_struct *mm_access(struct task
  
        mm = get_task_mm(task);
        if (mm && mm != current->mm &&
 -                      !ptrace_may_access(task, mode)) {
 +                      !ptrace_may_access(task, mode) &&
 +                      !capable(CAP_SYS_RESOURCE)) {
                mmput(mm);
                mm = ERR_PTR(-EACCES);
        }
diff --combined kernel/sched/core.c
index 778335a7140c0985de3004954287e9279a6491da,20253dbc86103a544b2ad875d18c53a2cc36ddd3..01cb249109ccaad4f9fb0b191eea0a30a72638dc
@@@ -89,7 -89,6 +89,7 @@@
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/sched.h>
 +#include "walt.h"
  
  DEFINE_MUTEX(sched_domains_mutex);
  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@@ -288,18 -287,6 +288,18 @@@ int sysctl_sched_rt_runtime = 950000
  /* cpus with isolated domains */
  cpumask_var_t cpu_isolated_map;
  
 +struct rq *
 +lock_rq_of(struct task_struct *p, unsigned long *flags)
 +{
 +      return task_rq_lock(p, flags);
 +}
 +
 +void
 +unlock_rq_of(struct rq *rq, struct task_struct *p, unsigned long *flags)
 +{
 +      task_rq_unlock(rq, p, flags);
 +}
 +
  /*
   * this_rq_lock - lock this runqueue and disable interrupts.
   */
@@@ -1089,9 -1076,7 +1089,9 @@@ static struct rq *move_queued_task(stru
  
        dequeue_task(rq, p, 0);
        p->on_rq = TASK_ON_RQ_MIGRATING;
 +      double_lock_balance(rq, cpu_rq(new_cpu));
        set_task_cpu(p, new_cpu);
 +      double_unlock_balance(rq, cpu_rq(new_cpu));
        raw_spin_unlock(&rq->lock);
  
        rq = cpu_rq(new_cpu);
@@@ -1315,8 -1300,6 +1315,8 @@@ void set_task_cpu(struct task_struct *p
                        p->sched_class->migrate_task_rq(p);
                p->se.nr_migrations++;
                perf_event_task_migrate(p);
 +
 +              walt_fixup_busy_time(p, new_cpu);
        }
  
        __set_task_cpu(p, new_cpu);
@@@ -1945,10 -1928,6 +1945,10 @@@ try_to_wake_up(struct task_struct *p, u
  {
        unsigned long flags;
        int cpu, success = 0;
 +#ifdef CONFIG_SMP
 +      struct rq *rq;
 +      u64 wallclock;
 +#endif
  
        /*
         * If we are going to wake up a thread waiting for CONDITION we
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
  
+       /*
+        * Ensure we load p->on_rq _after_ p->state, otherwise it would
+        * be possible to, falsely, observe p->on_rq == 0 and get stuck
+        * in smp_cond_load_acquire() below.
+        *
+        * sched_ttwu_pending()                 try_to_wake_up()
+        *   [S] p->on_rq = 1;                  [L] P->state
+        *       UNLOCK rq->lock  -----.
+        *                              \
+        *                               +---   RMB
+        * schedule()                   /
+        *       LOCK rq->lock    -----'
+        *       UNLOCK rq->lock
+        *
+        * [task p]
+        *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+        *
+        * Pairs with the UNLOCK+LOCK on rq->lock from the
+        * last wakeup of our task and the schedule that got our task
+        * current.
+        */
+       smp_rmb();
        if (p->on_rq && ttwu_remote(p, wake_flags))
                goto stat;
  
         */
        smp_rmb();
  
 +      rq = cpu_rq(task_cpu(p));
 +
 +      raw_spin_lock(&rq->lock);
 +      wallclock = walt_ktime_clock();
 +      walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 +      walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
 +      raw_spin_unlock(&rq->lock);
 +
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
  
                p->sched_class->task_waking(p);
  
        cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
 +
        if (task_cpu(p) != cpu) {
                wake_flags |= WF_MIGRATED;
                set_task_cpu(p, cpu);
        }
 +
  #endif /* CONFIG_SMP */
  
        ttwu_queue(p, cpu);
@@@ -2075,13 -2066,8 +2097,13 @@@ static void try_to_wake_up_local(struc
  
        trace_sched_waking(p);
  
 -      if (!task_on_rq_queued(p))
 +      if (!task_on_rq_queued(p)) {
 +              u64 wallclock = walt_ktime_clock();
 +
 +              walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 +              walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 +      }
  
        ttwu_do_wakeup(rq, p, 0);
        ttwu_stat(p, smp_processor_id(), 0);
@@@ -2147,7 -2133,6 +2169,7 @@@ static void __sched_fork(unsigned long 
        p->se.nr_migrations             = 0;
        p->se.vruntime                  = 0;
        INIT_LIST_HEAD(&p->se.group_node);
 +      walt_init_new_task_load(p);
  
  #ifdef CONFIG_SCHEDSTATS
        memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@@ -2415,9 -2400,6 +2437,9 @@@ void wake_up_new_task(struct task_struc
        struct rq *rq;
  
        raw_spin_lock_irqsave(&p->pi_lock, flags);
 +
 +      walt_init_new_task_load(p);
 +
        /* Initialize new task's runnable average */
        init_entity_runnable_average(&p->se);
  #ifdef CONFIG_SMP
  #endif
  
        rq = __task_rq_lock(p);
 -      activate_task(rq, p, 0);
 +      walt_mark_task_starting(p);
 +      activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
        p->on_rq = TASK_ON_RQ_QUEUED;
        trace_sched_wakeup_new(p);
        check_preempt_curr(rq, p, WF_FORK);
@@@ -2812,36 -2793,6 +2834,36 @@@ unsigned long nr_iowait_cpu(int cpu
        return atomic_read(&this->nr_iowait);
  }
  
 +#ifdef CONFIG_CPU_QUIET
 +u64 nr_running_integral(unsigned int cpu)
 +{
 +      unsigned int seqcnt;
 +      u64 integral;
 +      struct rq *q;
 +
 +      if (cpu >= nr_cpu_ids)
 +              return 0;
 +
 +      q = cpu_rq(cpu);
 +
 +      /*
 +       * Update average to avoid reading stalled value if there were
 +       * no run-queue changes for a long time. On the other hand if
 +       * the changes are happening right now, just read current value
 +       * directly.
 +       */
 +
 +      seqcnt = read_seqcount_begin(&q->ave_seqcnt);
 +      integral = do_nr_running_integral(q);
 +      if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
 +              read_seqcount_begin(&q->ave_seqcnt);
 +              integral = q->nr_running_integral;
 +      }
 +
 +      return integral;
 +}
 +#endif
 +
  void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
  {
        struct rq *rq = this_rq();
@@@ -2928,93 -2879,6 +2950,93 @@@ unsigned long long task_sched_runtime(s
        return ns;
  }
  
 +#ifdef CONFIG_CPU_FREQ_GOV_SCHED
 +
 +static inline
 +unsigned long add_capacity_margin(unsigned long cpu_capacity)
 +{
 +      cpu_capacity  = cpu_capacity * capacity_margin;
 +      cpu_capacity /= SCHED_CAPACITY_SCALE;
 +      return cpu_capacity;
 +}
 +
 +static inline
 +unsigned long sum_capacity_reqs(unsigned long cfs_cap,
 +                              struct sched_capacity_reqs *scr)
 +{
 +      unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
 +      return total += scr->dl;
 +}
 +
 +static void sched_freq_tick_pelt(int cpu)
 +{
 +      unsigned long cpu_utilization = capacity_max;
 +      unsigned long capacity_curr = capacity_curr_of(cpu);
 +      struct sched_capacity_reqs *scr;
 +
 +      scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
 +      if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
 +              return;
 +
 +      /*
 +       * To make free room for a task that is building up its "real"
 +       * utilization and to harm its performance the least, request
 +       * a jump to a higher OPP as soon as the margin of free capacity
 +       * is impacted (specified by capacity_margin).
 +       */
 +      set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 +}
 +
 +#ifdef CONFIG_SCHED_WALT
 +static void sched_freq_tick_walt(int cpu)
 +{
 +      unsigned long cpu_utilization = cpu_util(cpu);
 +      unsigned long capacity_curr = capacity_curr_of(cpu);
 +
 +      if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
 +              return sched_freq_tick_pelt(cpu);
 +
 +      /*
 +       * Add a margin to the WALT utilization.
 +       * NOTE: WALT tracks a single CPU signal for all the scheduling
 +       * classes, thus this margin is going to be added to the DL class as
 +       * well, which is something we do not do in sched_freq_tick_pelt case.
 +       */
 +      cpu_utilization = add_capacity_margin(cpu_utilization);
 +      if (cpu_utilization <= capacity_curr)
 +              return;
 +
 +      /*
 +       * It is likely that the load is growing so we
 +       * keep the added margin in our request as an
 +       * extra boost.
 +       */
 +      set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 +
 +}
 +#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
 +#else
 +#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
 +#endif /* CONFIG_SCHED_WALT */
 +
 +static void sched_freq_tick(int cpu)
 +{
 +      unsigned long capacity_orig, capacity_curr;
 +
 +      if (!sched_freq())
 +              return;
 +
 +      capacity_orig = capacity_orig_of(cpu);
 +      capacity_curr = capacity_curr_of(cpu);
 +      if (capacity_curr == capacity_orig)
 +              return;
 +
 +      _sched_freq_tick(cpu);
 +}
 +#else
 +static inline void sched_freq_tick(int cpu) { }
 +#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
 +
  /*
   * This function gets called by the timer code, with HZ frequency.
   * We call it with interrupts disabled.
@@@ -3028,14 -2892,10 +3050,14 @@@ void scheduler_tick(void
        sched_clock_tick();
  
        raw_spin_lock(&rq->lock);
 +      walt_set_window_start(rq);
        update_rq_clock(rq);
        curr->sched_class->task_tick(rq, curr, 0);
        update_cpu_load_active(rq);
 +      walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
 +                      walt_ktime_clock(), 0);
        calc_global_load_tick(rq);
 +      sched_freq_tick(cpu);
        raw_spin_unlock(&rq->lock);
  
        perf_event_task_tick();
@@@ -3272,7 -3132,6 +3294,7 @@@ static void __sched notrace __schedule(
        unsigned long *switch_count;
        struct rq *rq;
        int cpu;
 +      u64 wallclock;
  
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
                update_rq_clock(rq);
  
        next = pick_next_task(rq, prev);
 +      wallclock = walt_ktime_clock();
 +      walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
 +      walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
        clear_tsk_need_resched(prev);
        clear_preempt_need_resched();
        rq->clock_skip_update = 0;
@@@ -5163,7 -5019,6 +5185,7 @@@ void init_idle(struct task_struct *idle
        raw_spin_lock(&rq->lock);
  
        __sched_fork(0, idle);
 +
        idle->state = TASK_RUNNING;
        idle->se.exec_start = sched_clock();
  
@@@ -5545,61 -5400,10 +5567,61 @@@ set_table_entry(struct ctl_table *entry
        }
  }
  
 +static struct ctl_table *
 +sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
 +{
 +      struct ctl_table *table = sd_alloc_ctl_entry(5);
 +
 +      if (table == NULL)
 +              return NULL;
 +
 +      set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
 +                      sizeof(int), 0644, proc_dointvec_minmax, false);
 +      set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
 +                      sge->nr_idle_states*sizeof(struct idle_state), 0644,
 +                      proc_doulongvec_minmax, false);
 +      set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
 +                      sizeof(int), 0644, proc_dointvec_minmax, false);
 +      set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
 +                      sge->nr_cap_states*sizeof(struct capacity_state), 0644,
 +                      proc_doulongvec_minmax, false);
 +
 +      return table;
 +}
 +
 +static struct ctl_table *
 +sd_alloc_ctl_group_table(struct sched_group *sg)
 +{
 +      struct ctl_table *table = sd_alloc_ctl_entry(2);
 +
 +      if (table == NULL)
 +              return NULL;
 +
 +      table->procname = kstrdup("energy", GFP_KERNEL);
 +      table->mode = 0555;
 +      table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge);
 +
 +      return table;
 +}
 +
  static struct ctl_table *
  sd_alloc_ctl_domain_table(struct sched_domain *sd)
  {
 -      struct ctl_table *table = sd_alloc_ctl_entry(14);
 +      struct ctl_table *table;
 +      unsigned int nr_entries = 14;
 +
 +      int i = 0;
 +      struct sched_group *sg = sd->groups;
 +
 +      if (sg->sge) {
 +              int nr_sgs = 0;
 +
 +              do {} while (nr_sgs++, sg = sg->next, sg != sd->groups);
 +
 +              nr_entries += nr_sgs;
 +      }
 +
 +      table = sd_alloc_ctl_entry(nr_entries);
  
        if (table == NULL)
                return NULL;
                sizeof(long), 0644, proc_doulongvec_minmax, false);
        set_table_entry(&table[12], "name", sd->name,
                CORENAME_MAX_SIZE, 0444, proc_dostring, false);
 -      /* &table[13] is terminator */
 +      sg = sd->groups;
 +      if (sg->sge) {
 +              char buf[32];
 +              struct ctl_table *entry = &table[13];
 +
 +              do {
 +                      snprintf(buf, 32, "group%d", i);
 +                      entry->procname = kstrdup(buf, GFP_KERNEL);
 +                      entry->mode = 0555;
 +                      entry->child = sd_alloc_ctl_group_table(sg);
 +              } while (entry++, i++, sg = sg->next, sg != sd->groups);
 +      }
 +      /* &table[nr_entries-1] is terminator */
  
        return table;
  }
@@@ -5760,9 -5552,6 +5782,9 @@@ migration_call(struct notifier_block *n
        switch (action & ~CPU_TASKS_FROZEN) {
  
        case CPU_UP_PREPARE:
 +              raw_spin_lock_irqsave(&rq->lock, flags);
 +              walt_set_window_start(rq);
 +              raw_spin_unlock_irqrestore(&rq->lock, flags);
                rq->calc_load_update = calc_load_update;
                account_reset_rq(rq);
                break;
                sched_ttwu_pending();
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
 +              walt_migrate_sync_cpu(cpu);
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                        set_rq_offline(rq);
@@@ -5955,7 -5743,7 +5977,7 @@@ static int sched_domain_debug_one(struc
                printk(KERN_CONT " %*pbl",
                       cpumask_pr_args(sched_group_cpus(group)));
                if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
 -                      printk(KERN_CONT " (cpu_capacity = %d)",
 +                      printk(KERN_CONT " (cpu_capacity = %lu)",
                                group->sgc->capacity);
                }
  
@@@ -6016,8 -5804,7 +6038,8 @@@ static int sd_degenerate(struct sched_d
                         SD_BALANCE_EXEC |
                         SD_SHARE_CPUCAPACITY |
                         SD_SHARE_PKG_RESOURCES |
 -                       SD_SHARE_POWERDOMAIN)) {
 +                       SD_SHARE_POWERDOMAIN |
 +                       SD_SHARE_CAP_STATES)) {
                if (sd->groups != sd->groups->next)
                        return 0;
        }
@@@ -6049,8 -5836,7 +6071,8 @@@ sd_parent_degenerate(struct sched_domai
                                SD_SHARE_CPUCAPACITY |
                                SD_SHARE_PKG_RESOURCES |
                                SD_PREFER_SIBLING |
 -                              SD_SHARE_POWERDOMAIN);
 +                              SD_SHARE_POWERDOMAIN |
 +                              SD_SHARE_CAP_STATES);
                if (nr_node_ids == 1)
                        pflags &= ~SD_SERIALIZE;
        }
@@@ -6129,8 -5915,6 +6151,8 @@@ static int init_rootdomain(struct root_
  
        if (cpupri_init(&rd->cpupri) != 0)
                goto free_rto_mask;
 +
 +      init_max_cpu_capacity(&rd->max_cpu_capacity);
        return 0;
  
  free_rto_mask:
@@@ -6236,13 -6020,11 +6258,13 @@@ DEFINE_PER_CPU(int, sd_llc_id)
  DEFINE_PER_CPU(struct sched_domain *, sd_numa);
  DEFINE_PER_CPU(struct sched_domain *, sd_busy);
  DEFINE_PER_CPU(struct sched_domain *, sd_asym);
 +DEFINE_PER_CPU(struct sched_domain *, sd_ea);
 +DEFINE_PER_CPU(struct sched_domain *, sd_scs);
  
  static void update_top_cache_domain(int cpu)
  {
        struct sched_domain *sd;
 -      struct sched_domain *busy_sd = NULL;
 +      struct sched_domain *busy_sd = NULL, *ea_sd = NULL;
        int id = cpu;
        int size = 1;
  
  
        sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
        rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
 +
 +      for_each_domain(cpu, sd) {
 +              if (sd->groups->sge)
 +                      ea_sd = sd;
 +              else
 +                      break;
 +      }
 +      rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
 +
 +      sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
 +      rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
  }
  
  /*
@@@ -6434,7 -6205,6 +6456,7 @@@ build_overlap_sched_groups(struct sched
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
 +              sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
  
                /*
                 * Make sure the first group of this domain contains the
@@@ -6563,66 -6333,6 +6585,66 @@@ static void init_sched_groups_capacity(
        atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
  }
  
 +/*
 + * Check that the per-cpu provided sd energy data is consistent for all cpus
 + * within the mask.
 + */
 +static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
 +                                         const struct cpumask *cpumask)
 +{
 +      const struct sched_group_energy * const sge = fn(cpu);
 +      struct cpumask mask;
 +      int i;
 +
 +      if (cpumask_weight(cpumask) <= 1)
 +              return;
 +
 +      cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
 +
 +      for_each_cpu(i, &mask) {
 +              const struct sched_group_energy * const e = fn(i);
 +              int y;
 +
 +              BUG_ON(e->nr_idle_states != sge->nr_idle_states);
 +
 +              for (y = 0; y < (e->nr_idle_states); y++) {
 +                      BUG_ON(e->idle_states[y].power !=
 +                                      sge->idle_states[y].power);
 +              }
 +
 +              BUG_ON(e->nr_cap_states != sge->nr_cap_states);
 +
 +              for (y = 0; y < (e->nr_cap_states); y++) {
 +                      BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
 +                      BUG_ON(e->cap_states[y].power !=
 +                                      sge->cap_states[y].power);
 +              }
 +      }
 +}
 +
 +static void init_sched_energy(int cpu, struct sched_domain *sd,
 +                            sched_domain_energy_f fn)
 +{
 +      if (!(fn && fn(cpu)))
 +              return;
 +
 +      if (cpu != group_balance_cpu(sd->groups))
 +              return;
 +
 +      if (sd->child && !sd->child->groups->sge) {
 +              pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
 +#ifdef CONFIG_SCHED_DEBUG
 +              pr_err("     energy data on %s but not on %s domain\n",
 +                      sd->name, sd->child->name);
 +#endif
 +              return;
 +      }
 +
 +      check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
 +
 +      sd->groups->sge = fn(cpu);
 +}
 +
  /*
   * Initializers for schedule domains
   * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@@ -6731,7 -6441,6 +6753,7 @@@ static int sched_domains_curr_level
   * SD_SHARE_PKG_RESOURCES - describes shared caches
   * SD_NUMA                - describes NUMA topologies
   * SD_SHARE_POWERDOMAIN   - describes shared power domain
 + * SD_SHARE_CAP_STATES    - describes shared capacity states
   *
   * Odd one out:
   * SD_ASYM_PACKING        - describes SMT quirks
         SD_SHARE_PKG_RESOURCES |       \
         SD_NUMA |                      \
         SD_ASYM_PACKING |              \
 -       SD_SHARE_POWERDOMAIN)
 +       SD_SHARE_POWERDOMAIN |         \
 +       SD_SHARE_CAP_STATES)
  
  static struct sched_domain *
  sd_init(struct sched_domain_topology_level *tl, int cpu)
@@@ -7292,7 -7000,6 +7314,7 @@@ static int build_sched_domains(const st
        enum s_alloc alloc_state;
        struct sched_domain *sd;
        struct s_data d;
 +      struct rq *rq = NULL;
        int i, ret = -ENOMEM;
  
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
  
        /* Calculate CPU capacity for physical packages and nodes */
        for (i = nr_cpumask_bits-1; i >= 0; i--) {
 +              struct sched_domain_topology_level *tl = sched_domain_topology;
 +
                if (!cpumask_test_cpu(i, cpu_map))
                        continue;
  
 -              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 +              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
 +                      init_sched_energy(i, sd, tl->energy);
                        claim_allocations(i, sd);
                        init_sched_groups_capacity(i, sd);
                }
        /* Attach the domains */
        rcu_read_lock();
        for_each_cpu(i, cpu_map) {
 +              rq = cpu_rq(i);
                sd = *per_cpu_ptr(d.sd, i);
                cpu_attach_domain(sd, d.rd, i);
        }
@@@ -7628,7 -7331,6 +7650,7 @@@ void __init sched_init_smp(void
  {
        cpumask_var_t non_isolated_cpus;
  
 +      walt_init_cpu_efficiency();
        alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
        alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
  
@@@ -7806,11 -7508,6 +7828,11 @@@ void __init sched_init(void
                rq->idle_stamp = 0;
                rq->avg_idle = 2*sysctl_sched_migration_cost;
                rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 +#ifdef CONFIG_SCHED_WALT
 +              rq->cur_irqload = 0;
 +              rq->avg_irqload = 0;
 +              rq->irqload_ts = 0;
 +#endif
  
                INIT_LIST_HEAD(&rq->cfs_tasks);
  
@@@ -7874,14 -7571,6 +7896,14 @@@ static inline int preempt_count_equals(
        return (nested == preempt_offset);
  }
  
 +static int __might_sleep_init_called;
 +int __init __might_sleep_init(void)
 +{
 +      __might_sleep_init_called = 1;
 +      return 0;
 +}
 +early_initcall(__might_sleep_init);
 +
  void __might_sleep(const char *file, int line, int preempt_offset)
  {
        /*
@@@ -7906,10 -7595,8 +7928,10 @@@ void ___might_sleep(const char *file, i
  
        rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
        if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
 -           !is_idle_task(current)) ||
 -          system_state != SYSTEM_RUNNING || oops_in_progress)
 +           !is_idle_task(current)) || oops_in_progress)
 +              return;
 +      if (system_state != SYSTEM_RUNNING &&
 +          (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
                return;
        if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                return;
@@@ -8934,7 -8621,6 +8956,7 @@@ struct cgroup_subsys cpu_cgrp_subsys = 
        .fork           = cpu_cgroup_fork,
        .can_attach     = cpu_cgroup_can_attach,
        .attach         = cpu_cgroup_attach,
 +      .allow_attach   = subsys_cgroup_allow_attach,
        .legacy_cftypes = cpu_files,
        .early_init     = 1,
  };
diff --combined kernel/trace/Makefile
index ba04bf0c2653bc342a8d791d5fe67a8cafb45c1f,05ea5167e6bbaf8de9285dd6ea3ca161c08ae9e1..a9bba37fab5aec746885b403e7452fb39110b861
@@@ -1,4 -1,8 +1,8 @@@
  
+ # We are fully aware of the dangers of __builtin_return_address()
+ FRAME_CFLAGS := $(call cc-disable-warning,frame-address)
+ KBUILD_CFLAGS += $(FRAME_CFLAGS)
  # Do not instrument the tracer itself:
  
  ifdef CONFIG_FUNCTION_TRACER
@@@ -64,7 -68,6 +68,7 @@@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.
  endif
  obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
  obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
 +obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o
  
  obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
  
diff --combined kernel/trace/trace.c
index dd4d86ae8e213aafe5814aa00ffbab01c3cdd1c8,059233abcfcf8d532fa227a380f366d4a476b00a..bff7d4c69274b3e6755b26bc39d6717dca02517e
@@@ -1352,7 -1352,6 +1352,7 @@@ void tracing_reset_all_online_cpus(void
  
  #define SAVED_CMDLINES_DEFAULT 128
  #define NO_CMDLINE_MAP UINT_MAX
 +static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
  static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
  struct saved_cmdlines_buffer {
        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@@ -1591,7 -1590,7 +1591,7 @@@ static int trace_save_cmdline(struct ta
        }
  
        set_cmdline(idx, tsk->comm);
 -
 +      saved_tgids[idx] = tsk->tgid;
        arch_spin_unlock(&trace_cmdline_lock);
  
        return 1;
@@@ -1634,25 -1633,6 +1634,25 @@@ void trace_find_cmdline(int pid, char c
        preempt_enable();
  }
  
 +int trace_find_tgid(int pid)
 +{
 +      unsigned map;
 +      int tgid;
 +
 +      preempt_disable();
 +      arch_spin_lock(&trace_cmdline_lock);
 +      map = savedcmd->map_pid_to_cmdline[pid];
 +      if (map != NO_CMDLINE_MAP)
 +              tgid = saved_tgids[map];
 +      else
 +              tgid = -1;
 +
 +      arch_spin_unlock(&trace_cmdline_lock);
 +      preempt_enable();
 +
 +      return tgid;
 +}
 +
  void tracing_record_cmdline(struct task_struct *tsk)
  {
        if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
@@@ -2603,13 -2583,6 +2603,13 @@@ static void print_func_help_header(stru
                    "#              | |       |          |         |\n");
  }
  
 +static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
 +{
 +      print_event_info(buf, m);
 +      seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
 +      seq_puts(m, "#              | |        |      |          |         |\n");
 +}
 +
  static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
  {
        print_event_info(buf, m);
                    "#              | |       |   ||||       |         |\n");
  }
  
 +static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
 +{
 +      print_event_info(buf, m);
 +      seq_puts(m, "#                                      _-----=> irqs-off\n");
 +      seq_puts(m, "#                                     / _----=> need-resched\n");
 +      seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
 +      seq_puts(m, "#                                    || / _--=> preempt-depth\n");
 +      seq_puts(m, "#                                    ||| /     delay\n");
 +      seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
 +      seq_puts(m, "#              | |        |      |   ||||       |         |\n");
 +}
 +
  void
  print_trace_header(struct seq_file *m, struct trace_iterator *iter)
  {
@@@ -2946,15 -2907,9 +2946,15 @@@ void trace_default_header(struct seq_fi
        } else {
                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
                        if (trace_flags & TRACE_ITER_IRQ_INFO)
 -                              print_func_help_header_irq(iter->trace_buffer, m);
 +                              if (trace_flags & TRACE_ITER_TGID)
 +                                      print_func_help_header_irq_tgid(iter->trace_buffer, m);
 +                              else
 +                                      print_func_help_header_irq(iter->trace_buffer, m);
                        else
 -                              print_func_help_header(iter->trace_buffer, m);
 +                              if (trace_flags & TRACE_ITER_TGID)
 +                                      print_func_help_header_tgid(iter->trace_buffer, m);
 +                              else
 +                                      print_func_help_header(iter->trace_buffer, m);
                }
        }
  }
@@@ -4205,50 -4160,6 +4205,50 @@@ static void trace_insert_enum_map(struc
        trace_insert_enum_map_file(mod, start, len);
  }
  
 +static ssize_t
 +tracing_saved_tgids_read(struct file *file, char __user *ubuf,
 +                              size_t cnt, loff_t *ppos)
 +{
 +      char *file_buf;
 +      char *buf;
 +      int len = 0;
 +      int pid;
 +      int i;
 +
 +      file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
 +      if (!file_buf)
 +              return -ENOMEM;
 +
 +      buf = file_buf;
 +
 +      for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
 +              int tgid;
 +              int r;
 +
 +              pid = savedcmd->map_cmdline_to_pid[i];
 +              if (pid == -1 || pid == NO_CMDLINE_MAP)
 +                      continue;
 +
 +              tgid = trace_find_tgid(pid);
 +              r = sprintf(buf, "%d %d\n", pid, tgid);
 +              buf += r;
 +              len += r;
 +      }
 +
 +      len = simple_read_from_buffer(ubuf, cnt, ppos,
 +                                    file_buf, len);
 +
 +      kfree(file_buf);
 +
 +      return len;
 +}
 +
 +static const struct file_operations tracing_saved_tgids_fops = {
 +      .open   = tracing_open_generic,
 +      .read   = tracing_saved_tgids_read,
 +      .llseek = generic_file_llseek,
 +};
 +
  static ssize_t
  tracing_set_trace_read(struct file *filp, char __user *ubuf,
                       size_t cnt, loff_t *ppos)
@@@ -4816,19 -4727,20 +4816,20 @@@ tracing_read_pipe(struct file *filp, ch
        struct trace_iterator *iter = filp->private_data;
        ssize_t sret;
  
-       /* return any leftover data */
-       sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
-       if (sret != -EBUSY)
-               return sret;
-       trace_seq_init(&iter->seq);
        /*
         * Avoid more than one consumer on a single file descriptor
         * This is just a matter of traces coherency, the ring buffer itself
         * is protected.
         */
        mutex_lock(&iter->mutex);
+       /* return any leftover data */
+       sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+       if (sret != -EBUSY)
+               goto out;
+       trace_seq_init(&iter->seq);
        if (iter->trace->read) {
                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
                if (sret)
@@@ -5855,9 -5767,6 +5856,6 @@@ tracing_buffers_splice_read(struct fil
                return -EBUSY;
  #endif
  
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
        if (*ppos & (PAGE_SIZE - 1))
                return -EINVAL;
  
                len &= PAGE_MASK;
        }
  
+       if (splice_grow_spd(pipe, &spd))
+               return -ENOMEM;
   again:
        trace_access_lock(iter->cpu_file);
        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
        /* did we read anything? */
        if (!spd.nr_pages) {
                if (ret)
-                       return ret;
+                       goto out;
  
+               ret = -EAGAIN;
                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
-                       return -EAGAIN;
+                       goto out;
  
                ret = wait_on_pipe(iter, true);
                if (ret)
-                       return ret;
+                       goto out;
  
                goto again;
        }
  
        ret = splice_to_pipe(pipe, &spd);
+ out:
        splice_shrink_spd(&spd);
  
        return ret;
@@@ -6876,9 -6790,6 +6879,9 @@@ init_tracer_tracefs(struct trace_array 
        trace_create_file("trace_marker", 0220, d_tracer,
                          tr, &tracing_mark_fops);
  
 +      trace_create_file("saved_tgids", 0444, d_tracer,
 +                        tr, &tracing_saved_tgids_fops);
 +
        trace_create_file("trace_clock", 0644, d_tracer, tr,
                          &trace_clock_fops);
  
diff --combined net/ipv4/tcp_ipv4.c
index 7decaa4393605749796bcf96e4fc3e102182f1d5,b5853cac3269ab7ef8d2612a63accdb473cdebbc..364ba22ef2eabf42aa12181d469f7796ca6a13be
@@@ -808,8 -808,14 +808,14 @@@ static void tcp_v4_reqsk_send_ack(cons
        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
                                             tcp_sk(sk)->snd_nxt;
  
+       /* RFC 7323 2.3
+        * The window field (SEG.WND) of every outgoing segment, with the
+        * exception of <SYN> segments, MUST be right-shifted by
+        * Rcv.Wind.Shift bits:
+        */
        tcp_v4_send_ack(sock_net(sk), skb, seq,
-                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp,
                        req->ts_recent,
                        0,
@@@ -2349,7 -2355,6 +2355,7 @@@ struct proto tcp_prot = 
        .destroy_cgroup         = tcp_destroy_cgroup,
        .proto_cgroup           = tcp_proto_cgroup,
  #endif
 +      .diag_destroy           = tcp_abort,
  };
  EXPORT_SYMBOL(tcp_prot);
  
diff --combined net/ipv6/addrconf.c
index 3cdf59161a7efdf9d650305884a1cb572ef19385,036b39eb122007ccc82635cd522ac15ebe15cac5..563a91f15f68e706362a403364f5b13b9f3b3fc6
@@@ -205,7 -205,6 +205,7 @@@ static struct ipv6_devconf ipv6_devcon
        .accept_ra_rt_info_max_plen = 0,
  #endif
  #endif
 +      .accept_ra_rt_table     = 0,
        .proxy_ndp              = 0,
        .accept_source_route    = 0,    /* we do not accept RH0 by default. */
        .disable_ipv6           = 0,
@@@ -250,7 -249,6 +250,7 @@@ static struct ipv6_devconf ipv6_devconf
        .accept_ra_rt_info_max_plen = 0,
  #endif
  #endif
 +      .accept_ra_rt_table     = 0,
        .proxy_ndp              = 0,
        .accept_source_route    = 0,    /* we do not accept RH0 by default. */
        .disable_ipv6           = 0,
@@@ -1900,6 -1898,7 +1900,7 @@@ errdad
        spin_unlock_bh(&ifp->lock);
  
        addrconf_mod_dad_work(ifp, 0);
+       in6_ifa_put(ifp);
  }
  
  /* Join to solicited addr multicast group.
@@@ -2147,31 -2146,6 +2148,31 @@@ static void  __ipv6_try_regen_rndid(str
                __ipv6_regen_rndid(idev);
  }
  
 +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) {
 +      /* Determines into what table to put autoconf PIO/RIO/default routes
 +       * learned on this device.
 +       *
 +       * - If 0, use the same table for every device. This puts routes into
 +       *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
 +       *   (but note that these three are currently all equal to
 +       *   RT6_TABLE_MAIN).
 +       * - If > 0, use the specified table.
 +       * - If < 0, put routes into table dev->ifindex + (-rt_table).
 +       */
 +      struct inet6_dev *idev = in6_dev_get(dev);
 +      u32 table;
 +      int sysctl = idev->cnf.accept_ra_rt_table;
 +      if (sysctl == 0) {
 +              table = default_table;
 +      } else if (sysctl > 0) {
 +              table = (u32) sysctl;
 +      } else {
 +              table = (unsigned) dev->ifindex + (-sysctl);
 +      }
 +      in6_dev_put(idev);
 +      return table;
 +}
 +
  /*
   *    Add prefix route.
   */
@@@ -2181,7 -2155,7 +2182,7 @@@ addrconf_prefix_route(struct in6_addr *
                      unsigned long expires, u32 flags)
  {
        struct fib6_config cfg = {
 -              .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
 +              .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX),
                .fc_metric = IP6_RT_PRIO_ADDRCONF,
                .fc_ifindex = dev->ifindex,
                .fc_expires = expires,
@@@ -2214,7 -2188,7 +2215,7 @@@ static struct rt6_info *addrconf_get_pr
        struct fib6_node *fn;
        struct rt6_info *rt = NULL;
        struct fib6_table *table;
 -      u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 +      u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX);
  
        table = fib6_get_table(dev_net(dev), tb_id);
        if (!table)
@@@ -3636,6 -3610,7 +3637,7 @@@ static void addrconf_dad_work(struct wo
                addrconf_dad_begin(ifp);
                goto out;
        } else if (action == DAD_ABORT) {
+               in6_ifa_hold(ifp);
                addrconf_dad_stop(ifp, 1);
                goto out;
        }
@@@ -4690,7 -4665,6 +4692,7 @@@ static inline void ipv6_store_devconf(s
        array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
  #endif
  #endif
 +      array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table;
        array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
        array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
  #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@@ -5657,13 -5631,6 +5659,13 @@@ static struct addrconf_sysctl_tabl
                },
  #endif
  #endif
 +              {
 +                      .procname       = "accept_ra_rt_table",
 +                      .data           = &ipv6_devconf.accept_ra_rt_table,
 +                      .maxlen         = sizeof(int),
 +                      .mode           = 0644,
 +                      .proc_handler   = proc_dointvec,
 +              },
                {
                        .procname       = "proxy_ndp",
                        .data           = &ipv6_devconf.proxy_ndp,
diff --combined net/ipv6/ping.c
index 9411c8d770a53cf6cf367a567228b7c8e832879e,3e55447b63a43943b5552ed0c9a80bcb31741c4e..fa65e92e9510b3369bddf874f0d1a9a0681e36a2
@@@ -84,7 -84,7 +84,7 @@@ int ping_v6_sendmsg(struct sock *sk, st
        struct icmp6hdr user_icmph;
        int addr_type;
        struct in6_addr *daddr;
 -      int iif = 0;
 +      int oif = 0;
        struct flowi6 fl6;
        int err;
        int hlimit;
                if (u->sin6_family != AF_INET6) {
                        return -EAFNOSUPPORT;
                }
 -              if (sk->sk_bound_dev_if &&
 -                  sk->sk_bound_dev_if != u->sin6_scope_id) {
 -                      return -EINVAL;
 -              }
                daddr = &(u->sin6_addr);
 -              iif = u->sin6_scope_id;
 +              if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
 +                      oif = u->sin6_scope_id;
        } else {
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -EDESTADDRREQ;
                daddr = &sk->sk_v6_daddr;
        }
  
 -      if (!iif)
 -              iif = sk->sk_bound_dev_if;
 +      if (!oif)
 +              oif = sk->sk_bound_dev_if;
 +
 +      if (!oif)
 +              oif = np->sticky_pktinfo.ipi6_ifindex;
 +
 +      if (!oif && ipv6_addr_is_multicast(daddr))
 +              oif = np->mcast_oif;
 +      else if (!oif)
 +              oif = np->ucast_oif;
  
        addr_type = ipv6_addr_type(daddr);
 -      if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
 -              return -EINVAL;
 -      if (addr_type & IPV6_ADDR_MAPPED)
 +      if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
 +          (addr_type & IPV6_ADDR_MAPPED) ||
 +          (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
                return -EINVAL;
  
        /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
        fl6.flowi6_proto = IPPROTO_ICMPV6;
        fl6.saddr = np->saddr;
        fl6.daddr = *daddr;
 +      fl6.flowi6_oif = oif;
        fl6.flowi6_mark = sk->sk_mark;
 +      fl6.flowi6_uid = sock_i_uid(sk);
        fl6.fl6_icmp_type = user_icmph.icmp6_type;
        fl6.fl6_icmp_code = user_icmph.icmp6_code;
        security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
  
 -      if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 -              fl6.flowi6_oif = np->mcast_oif;
 -      else if (!fl6.flowi6_oif)
 -              fl6.flowi6_oif = np->ucast_oif;
 -
        dst = ip6_sk_dst_lookup_flow(sk, &fl6,  daddr);
        if (IS_ERR(dst))
                return PTR_ERR(dst);
        rt = (struct rt6_info *) dst;
  
        np = inet6_sk(sk);
-       if (!np)
-               return -EBADF;
+       if (!np) {
+               err = -EBADF;
+               goto dst_err_out;
+       }
  
 -      if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 -              fl6.flowi6_oif = np->mcast_oif;
 -      else if (!fl6.flowi6_oif)
 -              fl6.flowi6_oif = np->ucast_oif;
 -
        pfh.icmph.type = user_icmph.icmp6_type;
        pfh.icmph.code = user_icmph.icmp6_code;
        pfh.icmph.checksum = 0;
        }
        release_sock(sk);
  
+ dst_err_out:
+       dst_release(dst);
        if (err)
                return err;
  
diff --combined net/ipv6/tcp_ipv6.c
index 3c6acb67d8e57cc3f9e7bcb4dfa1592e09db0c88,2d81e2f33ef21d83535c51c4c3ef7bca9b1dea05..f58632cc45dc1e2367f2f1f7131a7e215f0d9dda
@@@ -234,7 -234,6 +234,7 @@@ static int tcp_v6_connect(struct sock *
        fl6.flowi6_mark = sk->sk_mark;
        fl6.fl6_dport = usin->sin6_port;
        fl6.fl6_sport = inet->inet_sport;
 +      fl6.flowi6_uid = sock_i_uid(sk);
  
        opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
        final_p = fl6_update_dst(&fl6, opt, &final);
@@@ -933,9 -932,15 +933,15 @@@ static void tcp_v6_reqsk_send_ack(cons
        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
         */
+       /* RFC 7323 2.3
+        * The window field (SEG.WND) of every outgoing segment, with the
+        * exception of <SYN> segments, MUST be right-shifted by
+        * Rcv.Wind.Shift bits:
+        */
        tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
                        0, 0);
@@@ -1895,7 -1900,6 +1901,7 @@@ struct proto tcpv6_prot = 
        .proto_cgroup           = tcp_proto_cgroup,
  #endif
        .clear_sk               = tcp_v6_clear_sk,
 +      .diag_destroy           = tcp_abort,
  };
  
  static const struct inet6_protocol tcpv6_protocol = {